LCOV - code coverage report
Current view: top level - gcc - omp-grid.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 0 636 0.0 %
Date: 2020-04-04 11:58:09 Functions: 0 25 0.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* Lowering and expansion of OpenMP directives for HSA GPU agents.
       2                 :            : 
       3                 :            :    Copyright (C) 2013-2020 Free Software Foundation, Inc.
       4                 :            : 
       5                 :            : This file is part of GCC.
       6                 :            : 
       7                 :            : GCC is free software; you can redistribute it and/or modify it under
       8                 :            : the terms of the GNU General Public License as published by the Free
       9                 :            : Software Foundation; either version 3, or (at your option) any later
      10                 :            : version.
      11                 :            : 
      12                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :            : for more details.
      16                 :            : 
      17                 :            : You should have received a copy of the GNU General Public License
      18                 :            : along with GCC; see the file COPYING3.  If not see
      19                 :            : <http://www.gnu.org/licenses/>.  */
      20                 :            : 
      21                 :            : #include "config.h"
      22                 :            : #include "system.h"
      23                 :            : #include "coretypes.h"
      24                 :            : #include "backend.h"
      25                 :            : #include "tree.h"
      26                 :            : #include "gimple.h"
      27                 :            : #include "tree-pass.h"
      28                 :            : #include "ssa.h"
      29                 :            : #include "cgraph.h"
      30                 :            : #include "pretty-print.h"
      31                 :            : #include "fold-const.h"
      32                 :            : #include "gimplify.h"
      33                 :            : #include "gimple-iterator.h"
      34                 :            : #include "gimple-walk.h"
      35                 :            : #include "tree-inline.h"
      36                 :            : #include "langhooks.h"
      37                 :            : #include "omp-general.h"
      38                 :            : #include "omp-low.h"
      39                 :            : #include "omp-grid.h"
      40                 :            : #include "gimple-pretty-print.h"
      41                 :            : 
      42                 :            : /* Return the lastprivate predicate for a given gridified loop described by
      43                 :            :    FD).  */
      44                 :            : 
      45                 :            : tree
      46                 :          0 : omp_grid_lastprivate_predicate (struct omp_for_data *fd)
      47                 :            : {
      48                 :            :   /* When dealing with a gridified loop, we need to check up to three collapsed
      49                 :            :      iteration variables but they are not actually captured in this fd.
      50                 :            :      Fortunately, we can easily rely on HSA builtins to get this
      51                 :            :      information.  */
      52                 :            : 
      53                 :          0 :   tree id, size;
      54                 :          0 :   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
      55                 :          0 :       && gimple_omp_for_grid_intra_group (fd->for_stmt))
      56                 :            :     {
      57                 :          0 :       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
      58                 :          0 :       size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
      59                 :            :     }
      60                 :            :   else
      61                 :            :     {
      62                 :          0 :       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
      63                 :          0 :       size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
      64                 :            :     }
      65                 :          0 :   tree cond = NULL;
      66                 :          0 :   for (int dim = 0; dim < fd->collapse; dim++)
      67                 :            :     {
      68                 :          0 :       tree dim_tree = build_int_cstu (unsigned_type_node, dim);
      69                 :          0 :       tree u1 = build_int_cstu (unsigned_type_node, 1);
      70                 :          0 :       tree c2
      71                 :          0 :         = build2 (EQ_EXPR, boolean_type_node,
      72                 :            :                   build2 (PLUS_EXPR, unsigned_type_node,
      73                 :            :                           build_call_expr (id, 1, dim_tree), u1),
      74                 :            :                   build_call_expr (size, 1, dim_tree));
      75                 :          0 :       if (cond)
      76                 :          0 :         cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
      77                 :            :       else
      78                 :            :         cond = c2;
      79                 :            :     }
      80                 :          0 :   return cond;
      81                 :            : }
      82                 :            : 
      83                 :            : /* Structure describing the basic properties of the loop we ara analyzing
      84                 :            :    whether it can be gridified and when it is gridified.  */
      85                 :            : 
      86                 :            : class grid_prop
      87                 :            : {
      88                 :            : public:
      89                 :            :   /* True when we are doing tiling gridification, i.e. when there is a distinct
      90                 :            :      distribute loop over groups and a loop construct over work-items.  False
      91                 :            :      when distribute and parallel for loops form a combined construct.  */
      92                 :            :   bool tiling;
      93                 :            :   /* Location of the target construct for optimization information
      94                 :            :      messages.  */
      95                 :            :   dump_user_location_t target_loc;
      96                 :            :   /* The collapse clause of the involved loops.  Collapse value of all of them
      97                 :            :      must be the same for gridification to take place.  */
      98                 :            :   size_t collapse;
      99                 :            :   /* Group sizes, if requested by the user or NULL if not requested.  */
     100                 :            :   tree group_sizes[3];
     101                 :            : };
     102                 :            : 
     103                 :            : #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
     104                 :            :   "gridified HSA kernel because "
     105                 :            : 
     106                 :            : /* Return true if STMT is an assignment of a register-type into a local
     107                 :            :    VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
     108                 :            :    any of the trees specifying group sizes there.  */
     109                 :            : 
     110                 :            : static bool
     111                 :          0 : grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
     112                 :            : {
     113                 :          0 :   gassign *assign = dyn_cast <gassign *> (stmt);
     114                 :          0 :   if (!assign)
     115                 :            :     return false;
     116                 :          0 :   if (gimple_clobber_p (assign))
     117                 :            :     return true;
     118                 :          0 :   tree lhs = gimple_assign_lhs (assign);
     119                 :          0 :   if (!VAR_P (lhs)
     120                 :          0 :       || !is_gimple_reg_type (TREE_TYPE (lhs))
     121                 :          0 :       || is_global_var (lhs))
     122                 :            :     return false;
     123                 :          0 :   if (grid)
     124                 :          0 :     for (unsigned i = 0; i < grid->collapse; i++)
     125                 :          0 :       if (lhs == grid->group_sizes[i])
     126                 :            :         return false;
     127                 :            :   return true;
     128                 :            : }
     129                 :            : 
     130                 :            : /* Return true if all statements in SEQ are assignments to local register-type
     131                 :            :    variables that do not hold group size information.  */
     132                 :            : 
     133                 :            : static bool
     134                 :          0 : grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
     135                 :            : {
     136                 :          0 :   if (!seq)
     137                 :            :     return true;
     138                 :            : 
     139                 :            :   gimple_stmt_iterator gsi;
     140                 :          0 :   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
     141                 :          0 :     if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
     142                 :            :       return false;
     143                 :            :   return true;
     144                 :            : }
     145                 :            : 
     146                 :            : /* Scan statements in SEQ and call itself recursively on any bind.  GRID
     147                 :            :    describes hitherto discovered properties of the loop that is evaluated for
     148                 :            :    possible gridification.  If during whole search only assignments to
     149                 :            :    register-type local variables (that do not overwrite group size information)
     150                 :            :    and one single OMP statement is encountered, return true, otherwise return
     151                 :            :    false.  RET is where we store any OMP statement encountered.  */
     152                 :            : 
     153                 :            : static bool
     154                 :          0 : grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
     155                 :            :                                           const char *name, gimple **ret)
     156                 :            : {
     157                 :          0 :   gimple_stmt_iterator gsi;
     158                 :          0 :   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
     159                 :            :     {
     160                 :          0 :       gimple *stmt = gsi_stmt (gsi);
     161                 :            : 
     162                 :          0 :       if (grid_safe_assignment_p (stmt, grid))
     163                 :          0 :         continue;
     164                 :          0 :       if (gbind *bind = dyn_cast <gbind *> (stmt))
     165                 :            :         {
     166                 :          0 :           gimple_seq bind_body = gimple_bind_body (bind);
     167                 :          0 :           if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
     168                 :            :                                                          ret))
     169                 :            :               return false;
     170                 :            :         }
     171                 :          0 :       else if (is_gimple_omp (stmt))
     172                 :            :         {
     173                 :          0 :           if (*ret)
     174                 :            :             {
     175                 :          0 :               if (dump_enabled_p ())
     176                 :            :                 {
     177                 :          0 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     178                 :            :                                    GRID_MISSED_MSG_PREFIX "%s construct "
     179                 :            :                                    "contains multiple OpenMP constructs\n",
     180                 :            :                                    name);
     181                 :          0 :                   dump_printf_loc (MSG_NOTE, *ret,
     182                 :            :                                    "The first OpenMP construct within "
     183                 :            :                                    "a parallel\n");
     184                 :          0 :                   dump_printf_loc (MSG_NOTE, stmt,
     185                 :            :                                    "The second OpenMP construct within "
     186                 :            :                                    "a parallel\n");
     187                 :            :                 }
     188                 :          0 :               return false;
     189                 :            :             }
     190                 :          0 :           *ret = stmt;
     191                 :            :         }
     192                 :            :       else
     193                 :            :         {
     194                 :          0 :           if (dump_enabled_p ())
     195                 :            :             {
     196                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     197                 :            :                                GRID_MISSED_MSG_PREFIX "%s construct contains "
     198                 :            :                                "a complex statement\n", name);
     199                 :          0 :               dump_printf_loc (MSG_NOTE, stmt,
     200                 :            :                                "This statement cannot be analyzed for "
     201                 :            :                                "gridification\n");
     202                 :            :             }
     203                 :          0 :           return false;
     204                 :            :         }
     205                 :            :     }
     206                 :            :   return true;
     207                 :            : }
     208                 :            : 
     209                 :            : /* Scan statements in SEQ and make sure that it and any binds in it contain
     210                 :            :    only assignments to local register-type variables (that do not overwrite
     211                 :            :    group size information) and one OMP construct.  If so, return that
     212                 :            :    construct, otherwise return NULL.  GRID describes hitherto discovered
     213                 :            :    properties of the loop that is evaluated for possible gridification.  If
     214                 :            :    dumping is enabled and function fails, use NAME to dump a note with the
     215                 :            :    reason for failure.  */
     216                 :            : 
     217                 :            : static gimple *
     218                 :          0 : grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
     219                 :            :                                         const char *name)
     220                 :            : {
     221                 :          0 :   if (!seq)
     222                 :            :     {
     223                 :          0 :       if (dump_enabled_p ())
     224                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     225                 :            :                          GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
     226                 :            :                          name);
     227                 :          0 :       return NULL;
     228                 :            :     }
     229                 :            : 
     230                 :          0 :   gimple *ret = NULL;
     231                 :          0 :   if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
     232                 :            :     {
     233                 :          0 :       if (!ret && dump_enabled_p ())
     234                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     235                 :            :                          GRID_MISSED_MSG_PREFIX "%s construct does not contain"
     236                 :            :                          " any other OpenMP construct\n", name);
     237                 :          0 :       return ret;
     238                 :            :     }
     239                 :            :   else
     240                 :            :     return NULL;
     241                 :            : }
     242                 :            : 
     243                 :            : /* Walker function looking for statements there is no point gridifying (and for
     244                 :            :    noreturn function calls which we cannot do).  Return non-NULL if such a
     245                 :            :    function is found.  */
     246                 :            : 
     247                 :            : static tree
     248                 :          0 : grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
     249                 :            :                                    bool *handled_ops_p,
     250                 :            :                                    struct walk_stmt_info *wi)
     251                 :            : {
     252                 :          0 :   *handled_ops_p = false;
     253                 :          0 :   gimple *stmt = gsi_stmt (*gsi);
     254                 :          0 :   switch (gimple_code (stmt))
     255                 :            :     {
     256                 :          0 :     case GIMPLE_CALL:
     257                 :          0 :       if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
     258                 :            :         {
     259                 :          0 :           *handled_ops_p = true;
     260                 :          0 :           wi->info = stmt;
     261                 :          0 :           return error_mark_node;
     262                 :            :         }
     263                 :            :       break;
     264                 :            : 
     265                 :            :     /* We may reduce the following list if we find a way to implement the
     266                 :            :        clauses, but now there is no point trying further.  */
     267                 :          0 :     case GIMPLE_OMP_CRITICAL:
     268                 :          0 :     case GIMPLE_OMP_TASKGROUP:
     269                 :          0 :     case GIMPLE_OMP_TASK:
     270                 :          0 :     case GIMPLE_OMP_SECTION:
     271                 :          0 :     case GIMPLE_OMP_SECTIONS:
     272                 :          0 :     case GIMPLE_OMP_SECTIONS_SWITCH:
     273                 :          0 :     case GIMPLE_OMP_TARGET:
     274                 :          0 :     case GIMPLE_OMP_ORDERED:
     275                 :          0 :       *handled_ops_p = true;
     276                 :          0 :       wi->info = stmt;
     277                 :          0 :       return error_mark_node;
     278                 :            :     default:
     279                 :            :       break;
     280                 :            :     }
     281                 :            :   return NULL;
     282                 :            : }
     283                 :            : 
     284                 :            : /* Examine clauses of omp parallel statement PAR and if any prevents
     285                 :            :    gridification, issue a missed-optimization diagnostics and return false,
     286                 :            :    otherwise return true.  GRID describes hitherto discovered properties of the
     287                 :            :    loop that is evaluated for possible gridification.  */
     288                 :            : 
     289                 :            : static bool
     290                 :          0 : grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
     291                 :            : {
     292                 :          0 :   tree clauses = gimple_omp_parallel_clauses (par);
     293                 :          0 :   while (clauses)
     294                 :            :     {
     295                 :          0 :       switch (OMP_CLAUSE_CODE (clauses))
     296                 :            :         {
     297                 :          0 :         case OMP_CLAUSE_NUM_THREADS:
     298                 :          0 :           if (dump_enabled_p ())
     299                 :            :             {
     300                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     301                 :            :                                GRID_MISSED_MSG_PREFIX "because there is "
     302                 :            :                                "a num_threads clause of the parallel "
     303                 :            :                                "construct\n");
     304                 :          0 :               dump_printf_loc (MSG_NOTE, par,
     305                 :            :                                "Parallel construct has a num_threads clause\n");
     306                 :            :             }
     307                 :            :           return false;
     308                 :            : 
     309                 :          0 :         case OMP_CLAUSE_REDUCTION:
     310                 :          0 :           if (dump_enabled_p ())
     311                 :            :             {
     312                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     313                 :            :                                GRID_MISSED_MSG_PREFIX "a reduction clause "
     314                 :            :                                "is present\n ");
     315                 :          0 :               dump_printf_loc (MSG_NOTE, par,
     316                 :            :                                "Parallel construct has a reduction clause\n");
     317                 :            :             }
     318                 :            :           return false;
     319                 :            : 
     320                 :          0 :         default:
     321                 :          0 :           break;
     322                 :            :         }
     323                 :          0 :       clauses = OMP_CLAUSE_CHAIN (clauses);
     324                 :            :     }
     325                 :            :   return true;
     326                 :            : }
     327                 :            : 
     328                 :            : /* Examine clauses and the body of omp loop statement GFOR and if something
     329                 :            :    prevents gridification, issue a missed-optimization diagnostics and return
     330                 :            :    false, otherwise return true.  GRID describes hitherto discovered properties
     331                 :            :    of the loop that is evaluated for possible gridification.  */
     332                 :            : 
     333                 :            : static bool
     334                 :          0 : grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
     335                 :            : {
     336                 :          0 :   if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
     337                 :            :                                                  grid))
     338                 :            :     {
     339                 :          0 :       if (dump_enabled_p ())
     340                 :            :         {
     341                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     342                 :            :                            GRID_MISSED_MSG_PREFIX "the inner loop "
     343                 :            :                            "loop bounds computation contains a complex "
     344                 :            :                            "statement\n");
     345                 :          0 :           dump_printf_loc (MSG_NOTE, gfor,
     346                 :            :                            "Loop construct cannot be analyzed for "
     347                 :            :                            "gridification\n");
     348                 :            :         }
     349                 :          0 :       return false;
     350                 :            :     }
     351                 :            : 
     352                 :          0 :   tree clauses = gimple_omp_for_clauses (gfor);
     353                 :          0 :   while (clauses)
     354                 :            :     {
     355                 :          0 :       switch (OMP_CLAUSE_CODE (clauses))
     356                 :            :         {
     357                 :          0 :         case OMP_CLAUSE_SCHEDULE:
     358                 :          0 :           if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
     359                 :            :             {
     360                 :          0 :               if (dump_enabled_p ())
     361                 :            :                 {
     362                 :          0 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     363                 :            :                                    GRID_MISSED_MSG_PREFIX "the inner loop "
     364                 :            :                                    "has a non-automatic schedule clause\n");
     365                 :          0 :                   dump_printf_loc (MSG_NOTE, gfor,
     366                 :            :                                    "Loop construct has a non automatic "
     367                 :            :                                    "schedule clause\n");
     368                 :            :                 }
     369                 :          0 :               return false;
     370                 :            :             }
     371                 :            :           break;
     372                 :            : 
     373                 :          0 :         case OMP_CLAUSE_REDUCTION:
     374                 :          0 :           if (dump_enabled_p ())
     375                 :            :             {
     376                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     377                 :            :                                GRID_MISSED_MSG_PREFIX "a reduction "
     378                 :            :                                "clause is present\n ");
     379                 :          0 :               dump_printf_loc (MSG_NOTE, gfor,
     380                 :            :                                "Loop construct has a reduction schedule "
     381                 :            :                                "clause\n");
     382                 :            :             }
     383                 :            :           return false;
     384                 :            : 
     385                 :            :         default:
     386                 :            :           break;
     387                 :            :         }
     388                 :          0 :       clauses = OMP_CLAUSE_CHAIN (clauses);
     389                 :            :     }
     390                 :          0 :   struct walk_stmt_info wi;
     391                 :          0 :   memset (&wi, 0, sizeof (wi));
     392                 :          0 :   if (walk_gimple_seq (gimple_omp_body (gfor),
     393                 :            :                        grid_find_ungridifiable_statement,
     394                 :            :                        NULL, &wi))
     395                 :            :     {
     396                 :          0 :       gimple *bad = (gimple *) wi.info;
     397                 :          0 :       if (dump_enabled_p ())
     398                 :            :         {
     399                 :          0 :           if (is_gimple_call (bad))
     400                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     401                 :            :                                GRID_MISSED_MSG_PREFIX "the inner loop contains "
     402                 :            :                                "call to a noreturn function\n");
     403                 :            :           else
     404                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     405                 :            :                              GRID_MISSED_MSG_PREFIX "the inner loop contains "
     406                 :            :                              "statement %s which cannot be transformed\n",
     407                 :          0 :                              gimple_code_name[(int) gimple_code (bad)]);
     408                 :          0 :           dump_printf_loc (MSG_NOTE, bad,
     409                 :            :                            "This statement cannot be analyzed for "
     410                 :            :                            "gridification\n");
     411                 :            :         }
     412                 :          0 :       return false;
     413                 :            :     }
     414                 :            :   return true;
     415                 :            : }
     416                 :            : 
     417                 :            : /* Given distribute omp construct represented by DIST, which in the original
     418                 :            :    source forms a compound construct with a looping construct, return true if it
     419                 :            :    can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
     420                 :            :    describes hitherto discovered properties of the loop that is evaluated for
     421                 :            :    possible gridification.  */
     422                 :            : 
     423                 :            : static bool
     424                 :          0 : grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
     425                 :            : {
     426                 :          0 :   dump_user_location_t tloc = grid->target_loc;
     427                 :          0 :   gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
     428                 :            :                                                          grid, "distribute");
     429                 :          0 :   gomp_parallel *par;
     430                 :          0 :   if (!stmt
     431                 :          0 :       || !(par = dyn_cast <gomp_parallel *> (stmt))
     432                 :          0 :       || !grid_parallel_clauses_gridifiable (par, tloc))
     433                 :          0 :     return false;
     434                 :            : 
     435                 :          0 :   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
     436                 :            :                                                  "parallel");
     437                 :          0 :   gomp_for *gfor;
     438                 :          0 :   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
     439                 :            :     return false;
     440                 :            : 
     441                 :          0 :   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
     442                 :            :     {
     443                 :          0 :       if (dump_enabled_p ())
     444                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     445                 :            :                          GRID_MISSED_MSG_PREFIX "the inner loop is not "
     446                 :            :                          "a simple for loop\n");
     447                 :          0 :       return false;
     448                 :            :     }
     449                 :          0 :   gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
     450                 :            : 
     451                 :          0 :   if (!grid_inner_loop_gridifiable_p (gfor, grid))
     452                 :          0 :     return false;
     453                 :            : 
     454                 :            :   return true;
     455                 :            : }
     456                 :            : 
     457                 :            : /* Given an omp loop statement GFOR, return true if it can participate in
     458                 :            :    tiling gridification, i.e. in one where the distribute and parallel for
     459                 :            :    loops do not form a compound statement.  GRID describes hitherto discovered
     460                 :            :    properties of the loop that is evaluated for possible gridification.  */
     461                 :            : 
     462                 :            : static bool
     463                 :          0 : grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
     464                 :            : {
     465                 :          0 :   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
     466                 :            :     {
     467                 :          0 :       if (dump_enabled_p ())
     468                 :            :         {
     469                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     470                 :            :                            GRID_MISSED_MSG_PREFIX "an inner loop is not "
     471                 :            :                            "a simple for loop\n");
     472                 :          0 :           dump_printf_loc (MSG_NOTE, gfor,
     473                 :            :                            "This statement is not a simple for loop\n");
     474                 :            :         }
     475                 :          0 :       return false;
     476                 :            :     }
     477                 :            : 
     478                 :          0 :   if (!grid_inner_loop_gridifiable_p (gfor, grid))
     479                 :            :     return false;
     480                 :            : 
     481                 :          0 :   if (gimple_omp_for_collapse (gfor) != grid->collapse)
     482                 :            :     {
     483                 :          0 :       if (dump_enabled_p ())
     484                 :            :         {
     485                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     486                 :            :                            GRID_MISSED_MSG_PREFIX "an inner loop does not "
     487                 :            :                            "have use the same collapse clause\n");
     488                 :          0 :           dump_printf_loc (MSG_NOTE, gfor,
     489                 :            :                            "Loop construct uses a different collapse clause\n");
     490                 :            :         }
     491                 :          0 :       return false;
     492                 :            :     }
     493                 :            : 
     494                 :          0 :   struct omp_for_data fd;
     495                 :          0 :   struct omp_for_data_loop *loops
     496                 :          0 :     = (struct omp_for_data_loop *)alloca (grid->collapse
     497                 :            :                                           * sizeof (struct omp_for_data_loop));
     498                 :          0 :   omp_extract_for_data (gfor, &fd, loops);
     499                 :          0 :   for (unsigned i = 0; i < grid->collapse; i++)
     500                 :            :     {
     501                 :          0 :       tree itype, type = TREE_TYPE (fd.loops[i].v);
     502                 :          0 :       if (POINTER_TYPE_P (type))
     503                 :          0 :         itype = signed_type_for (type);
     504                 :            :       else
     505                 :            :         itype = type;
     506                 :            : 
     507                 :          0 :       tree n1 = fold_convert (itype, fd.loops[i].n1);
     508                 :          0 :       tree n2 = fold_convert (itype, fd.loops[i].n2);
     509                 :          0 :       tree t = build_int_cst (itype,
     510                 :          0 :                               (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
     511                 :          0 :       t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
     512                 :          0 :       t = fold_build2 (PLUS_EXPR, itype, t, n2);
     513                 :          0 :       t = fold_build2 (MINUS_EXPR, itype, t, n1);
     514                 :          0 :       if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
     515                 :          0 :         t = fold_build2 (TRUNC_DIV_EXPR, itype,
     516                 :            :                          fold_build1 (NEGATE_EXPR, itype, t),
     517                 :            :                          fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
     518                 :            :       else
     519                 :          0 :         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
     520                 :            : 
     521                 :          0 :       if (!operand_equal_p (grid->group_sizes[i], t, 0))
     522                 :            :         {
     523                 :          0 :           if (dump_enabled_p ())
     524                 :            :             {
     525                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     526                 :            :                                GRID_MISSED_MSG_PREFIX "the distribute and "
     527                 :            :                                "an internal loop do not agree on tile size\n");
     528                 :          0 :               dump_printf_loc (MSG_NOTE, gfor,
     529                 :            :                                "Loop construct does not seem to loop over "
     530                 :            :                                "a tile size\n");
     531                 :            :             }
     532                 :          0 :           return false;
     533                 :            :         }
     534                 :            :     }
     535                 :            :   return true;
     536                 :            : }
     537                 :            : 
     538                 :            : /* Facing a call to FNDECL in the body of a distribute construct, return true
     539                 :            :    if we can handle it or false if it precludes gridification.  */
     540                 :            : 
     541                 :            : static bool
     542                 :          0 : grid_call_permissible_in_distribute_p (tree fndecl)
     543                 :            : {
     544                 :          0 :   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
     545                 :            :     return true;
     546                 :            : 
     547                 :          0 :   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
     548                 :          0 :   if (strstr (name, "omp_") != name)
     549                 :            :     return false;
     550                 :            : 
     551                 :          0 :   if ((strcmp (name, "omp_get_thread_num") == 0)
     552                 :          0 :       || (strcmp (name, "omp_get_num_threads") == 0)
     553                 :          0 :       || (strcmp (name, "omp_get_num_teams") == 0)
     554                 :          0 :       || (strcmp (name, "omp_get_team_num") == 0)
     555                 :          0 :       || (strcmp (name, "omp_get_level") == 0)
     556                 :          0 :       || (strcmp (name, "omp_get_active_level") == 0)
     557                 :          0 :       || (strcmp (name, "omp_in_parallel") == 0))
     558                 :          0 :     return true;
     559                 :            : 
     560                 :            :   return false;
     561                 :            : }
     562                 :            : 
     563                 :            : /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
     564                 :            :    of a distribute construct that is pointed at by GSI, modify it as necessary
     565                 :            :    for gridification.  If the statement itself got removed, return true.  */
     566                 :            : 
     567                 :            : static bool
     568                 :          0 : grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
     569                 :            : {
     570                 :          0 :   gimple *stmt = gsi_stmt (*gsi);
     571                 :          0 :   tree fndecl = gimple_call_fndecl (stmt);
     572                 :          0 :   gcc_checking_assert (stmt);
     573                 :          0 :   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
     574                 :            :     return false;
     575                 :            : 
     576                 :          0 :   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
     577                 :          0 :   if ((strcmp (name, "omp_get_thread_num") == 0)
     578                 :          0 :       || (strcmp (name, "omp_get_level") == 0)
     579                 :          0 :       || (strcmp (name, "omp_get_active_level") == 0)
     580                 :          0 :       || (strcmp (name, "omp_in_parallel") == 0))
     581                 :            :     {
     582                 :          0 :       tree lhs = gimple_call_lhs (stmt);
     583                 :          0 :       if (lhs)
     584                 :            :         {
     585                 :          0 :           gassign *assign
     586                 :          0 :             = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
     587                 :          0 :           gsi_insert_before (gsi, assign, GSI_SAME_STMT);
     588                 :            :         }
     589                 :          0 :       gsi_remove (gsi, true);
     590                 :          0 :       return true;
     591                 :            :     }
     592                 :            : 
     593                 :            :   /* The rest of the omp functions can stay as they are, HSA back-end will
     594                 :            :      handle them correctly.  */
     595                 :          0 :   gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
     596                 :            :                        || (strcmp (name, "omp_get_num_teams") == 0)
     597                 :            :                        || (strcmp (name, "omp_get_team_num") == 0));
     598                 :            :   return false;
     599                 :            : }
     600                 :            : 
     601                 :            : /* Given a sequence of statements within a distribute omp construct or a
     602                 :            :    parallel construct, which in the original source does not form a compound
     603                 :            :    construct with a looping construct, return true if it does not prevent us
     604                 :            :    from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
     605                 :            :    describes hitherto discovered properties of the loop that is evaluated for
     606                 :            :    possible gridification.  IN_PARALLEL must be true if seq is within a
     607                 :            :    parallel construct and flase if it is only within a distribute
     608                 :            :    construct.  */
     609                 :            : 
     610                 :            : static bool
     611                 :          0 : grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
     612                 :            :                                   bool in_parallel)
     613                 :            : {
     614                 :          0 :   gimple_stmt_iterator gsi;
     615                 :          0 :   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
     616                 :            :     {
     617                 :          0 :       gimple *stmt = gsi_stmt (gsi);
     618                 :            : 
     619                 :          0 :       if (grid_safe_assignment_p (stmt, grid)
     620                 :          0 :           || gimple_code (stmt) == GIMPLE_GOTO
     621                 :          0 :           || gimple_code (stmt) == GIMPLE_LABEL
     622                 :          0 :           || gimple_code (stmt) == GIMPLE_COND)
     623                 :          0 :         continue;
     624                 :          0 :       else if (gbind *bind = dyn_cast <gbind *> (stmt))
     625                 :            :         {
     626                 :          0 :           if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
     627                 :            :                                                  grid, in_parallel))
     628                 :            :             return false;
     629                 :          0 :           continue;
     630                 :            :         }
     631                 :          0 :       else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
     632                 :            :         {
     633                 :          0 :           if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
     634                 :            :             {
     635                 :          0 :               if (dump_enabled_p ())
     636                 :            :                 {
     637                 :          0 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     638                 :            :                                    GRID_MISSED_MSG_PREFIX "the distribute "
     639                 :            :                                    "construct contains a try..catch region\n");
     640                 :          0 :                   dump_printf_loc (MSG_NOTE, try_stmt,
     641                 :            :                                    "This statement cannot be analyzed for "
     642                 :            :                                    "tiled gridification\n");
     643                 :            :                 }
     644                 :          0 :               return false;
     645                 :            :             }
     646                 :          0 :           if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
     647                 :            :                                                  grid, in_parallel))
     648                 :            :             return false;
     649                 :          0 :           if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
     650                 :            :                                                  grid, in_parallel))
     651                 :            :             return false;
     652                 :          0 :           continue;
     653                 :            :         }
     654                 :          0 :       else if (is_gimple_call (stmt))
     655                 :            :         {
     656                 :          0 :           tree fndecl = gimple_call_fndecl (stmt);
     657                 :          0 :           if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
     658                 :          0 :             continue;
     659                 :            : 
     660                 :          0 :           if (dump_enabled_p ())
     661                 :            :             {
     662                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     663                 :            :                                GRID_MISSED_MSG_PREFIX "the distribute "
     664                 :            :                                "construct contains a call\n");
     665                 :          0 :               dump_printf_loc (MSG_NOTE, stmt,
     666                 :            :                                "This statement cannot be analyzed for "
     667                 :            :                                "tiled gridification\n");
     668                 :            :             }
     669                 :          0 :           return false;
     670                 :            :         }
     671                 :          0 :       else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
     672                 :            :         {
     673                 :          0 :           if (in_parallel)
     674                 :            :             {
     675                 :          0 :               if (dump_enabled_p ())
     676                 :            :                 {
     677                 :          0 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     678                 :            :                                    GRID_MISSED_MSG_PREFIX "a parallel "
     679                 :            :                                    "construct contains another parallel "
     680                 :            :                                    "construct\n");
     681                 :          0 :                   dump_printf_loc (MSG_NOTE, stmt,
     682                 :            :                                    "This parallel construct is nested in "
     683                 :            :                                    "another one\n");
     684                 :            :                 }
     685                 :          0 :               return false;
     686                 :            :             }
     687                 :          0 :           if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
     688                 :          0 :               || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
     689                 :            :                                                     grid, true))
     690                 :          0 :             return false;
     691                 :            :         }
     692                 :          0 :       else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
     693                 :            :         {
     694                 :          0 :           if (!in_parallel)
     695                 :            :             {
     696                 :          0 :               if (dump_enabled_p ())
     697                 :            :                 {
     698                 :          0 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     699                 :            :                                    GRID_MISSED_MSG_PREFIX "a loop "
     700                 :            :                                    "construct is not nested within a parallel "
     701                 :            :                                    "construct\n");
     702                 :          0 :                   dump_printf_loc (MSG_NOTE, stmt,
     703                 :            :                                    "This loop construct is not nested in "
     704                 :            :                                    "a parallel construct\n");
     705                 :            :                 }
     706                 :          0 :               return false;
     707                 :            :             }
     708                 :          0 :           if (!grid_gfor_follows_tiling_pattern (gfor, grid))
     709                 :            :             return false;
     710                 :            :         }
     711                 :            :       else
     712                 :            :         {
     713                 :          0 :           if (dump_enabled_p ())
     714                 :            :             {
     715                 :          0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
     716                 :            :                                GRID_MISSED_MSG_PREFIX "the distribute "
     717                 :            :                                "construct contains a complex statement\n");
     718                 :          0 :               dump_printf_loc (MSG_NOTE, stmt,
     719                 :            :                                "This statement cannot be analyzed for "
     720                 :            :                                "tiled gridification\n");
     721                 :            :             }
     722                 :          0 :           return false;
     723                 :            :         }
     724                 :            :     }
     725                 :            :     return true;
     726                 :            : }
     727                 :            : 
     728                 :            : /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
     729                 :            :    return true, otherwise return false.  In the case of success, also fill in
     730                 :            :    GRID with information describing the kernel grid.  */
     731                 :            : 
     732                 :            : static bool
     733                 :          0 : grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
     734                 :            : {
     735                 :          0 :   if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
     736                 :            :     return false;
     737                 :            : 
     738                 :          0 :   dump_user_location_t tloc = target;
     739                 :          0 :   grid->target_loc = tloc;
     740                 :          0 :   gimple *stmt
     741                 :          0 :     = grid_find_single_omp_among_assignments (gimple_omp_body (target),
     742                 :            :                                               grid, "target");
     743                 :          0 :   if (!stmt)
     744                 :            :     return false;
     745                 :          0 :   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
     746                 :          0 :   tree group_size = NULL;
     747                 :          0 :   if (!teams)
     748                 :            :     {
     749                 :          0 :       if (dump_enabled_p ())
     750                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     751                 :            :                          GRID_MISSED_MSG_PREFIX "it does not have a sole "
     752                 :            :                          "teams construct in it.\n");
     753                 :          0 :       return false;
     754                 :            :     }
     755                 :            : 
     756                 :          0 :   tree clauses = gimple_omp_teams_clauses (teams);
     757                 :          0 :   while (clauses)
     758                 :            :     {
     759                 :          0 :       switch (OMP_CLAUSE_CODE (clauses))
     760                 :            :         {
     761                 :          0 :         case OMP_CLAUSE_NUM_TEAMS:
     762                 :          0 :           if (dump_enabled_p ())
     763                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     764                 :            :                              GRID_MISSED_MSG_PREFIX "the teams construct "
     765                 :            :                              "contains a num_teams clause\n ");
     766                 :            :           return false;
     767                 :            : 
     768                 :          0 :         case OMP_CLAUSE_REDUCTION:
     769                 :          0 :           if (dump_enabled_p ())
     770                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     771                 :            :                              GRID_MISSED_MSG_PREFIX "a reduction "
     772                 :            :                              "clause is present\n ");
     773                 :            :           return false;
     774                 :            : 
     775                 :          0 :         case OMP_CLAUSE_THREAD_LIMIT:
     776                 :          0 :           if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
     777                 :          0 :             group_size = OMP_CLAUSE_OPERAND (clauses, 0);
     778                 :            :           break;
     779                 :            : 
     780                 :            :         default:
     781                 :            :           break;
     782                 :            :         }
     783                 :          0 :       clauses = OMP_CLAUSE_CHAIN (clauses);
     784                 :            :     }
     785                 :            : 
     786                 :          0 :   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
     787                 :            :                                                  "teams");
     788                 :          0 :   if (!stmt)
     789                 :            :     return false;
     790                 :          0 :   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
     791                 :          0 :   if (!dist)
     792                 :            :     {
     793                 :          0 :       if (dump_enabled_p ())
     794                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     795                 :            :                          GRID_MISSED_MSG_PREFIX "the teams construct does not "
     796                 :            :                          "have a single distribute construct in it.\n");
     797                 :          0 :       return false;
     798                 :            :     }
     799                 :            : 
     800                 :          0 :   gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
     801                 :            : 
     802                 :          0 :   grid->collapse = gimple_omp_for_collapse (dist);
     803                 :          0 :   if (grid->collapse > 3)
     804                 :            :     {
     805                 :          0 :       if (dump_enabled_p ())
     806                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     807                 :            :                          GRID_MISSED_MSG_PREFIX "the distribute construct "
     808                 :            :                          "contains collapse clause with parameter greater "
     809                 :            :                          "than 3\n");
     810                 :          0 :       return false;
     811                 :            :     }
     812                 :            : 
     813                 :          0 :   struct omp_for_data fd;
     814                 :          0 :   struct omp_for_data_loop *dist_loops
     815                 :          0 :     = (struct omp_for_data_loop *)alloca (grid->collapse
     816                 :            :                                           * sizeof (struct omp_for_data_loop));
     817                 :          0 :   omp_extract_for_data (dist, &fd, dist_loops);
     818                 :          0 :   if (fd.chunk_size)
     819                 :            :     {
     820                 :          0 :       if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
     821                 :            :         {
     822                 :          0 :           if (dump_enabled_p ())
     823                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     824                 :            :                              GRID_MISSED_MSG_PREFIX "the teams "
     825                 :            :                              "thread limit is different from distribute "
     826                 :            :                              "schedule chunk\n");
     827                 :          0 :           return false;
     828                 :            :         }
     829                 :          0 :       group_size = fd.chunk_size;
     830                 :            :     }
     831                 :          0 :   if (group_size && grid->collapse > 1)
     832                 :            :     {
     833                 :          0 :       if (dump_enabled_p ())
     834                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     835                 :            :                          GRID_MISSED_MSG_PREFIX "group size cannot be "
     836                 :            :                          "set using thread_limit or schedule clauses "
     837                 :            :                          "when also using a collapse clause greater than 1\n");
     838                 :          0 :       return false;
     839                 :            :     }
     840                 :            : 
     841                 :          0 :   if (gimple_omp_for_combined_p (dist))
     842                 :            :     {
     843                 :          0 :       grid->tiling = false;
     844                 :          0 :       grid->group_sizes[0] = group_size;
     845                 :          0 :       for (unsigned i = 1; i < grid->collapse; i++)
     846                 :          0 :         grid->group_sizes[i] = NULL;
     847                 :          0 :       return grid_dist_follows_simple_pattern (dist, grid);
     848                 :            :     }
     849                 :            :   else
     850                 :            :     {
     851                 :          0 :       grid->tiling = true;
     852                 :          0 :       if (group_size)
     853                 :            :         {
     854                 :          0 :           if (dump_enabled_p ())
     855                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
     856                 :            :                              GRID_MISSED_MSG_PREFIX "group size cannot be set "
     857                 :            :                              "using thread_limit or schedule clauses when "
     858                 :            :                              "distribute and loop constructs do not form "
     859                 :            :                              "one combined construct\n");
     860                 :          0 :           return false;
     861                 :            :         }
     862                 :          0 :       for (unsigned i = 0; i < grid->collapse; i++)
     863                 :            :         {
     864                 :          0 :           if (fd.loops[i].cond_code == GT_EXPR)
     865                 :          0 :             grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
     866                 :            :                                                 TREE_TYPE (fd.loops[i].step),
     867                 :            :                                                 fd.loops[i].step);
     868                 :            :           else
     869                 :          0 :             grid->group_sizes[i] = fd.loops[i].step;
     870                 :            :         }
     871                 :          0 :       return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
     872                 :          0 :                                                false);
     873                 :            :     }
     874                 :            : }
     875                 :            : 
     876                 :            : /* Operand walker, used to remap pre-body declarations according to a hash map
     877                 :            :    provided in DATA.  */
     878                 :            : 
     879                 :            : static tree
     880                 :          0 : grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
     881                 :            : {
     882                 :          0 :   tree t = *tp;
     883                 :            : 
     884                 :          0 :   if (DECL_P (t) || TYPE_P (t))
     885                 :          0 :     *walk_subtrees = 0;
     886                 :            :   else
     887                 :          0 :     *walk_subtrees = 1;
     888                 :            : 
     889                 :          0 :   if (VAR_P (t))
     890                 :            :     {
     891                 :          0 :       struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
     892                 :          0 :       hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
     893                 :          0 :       tree *repl = declmap->get (t);
     894                 :          0 :       if (repl)
     895                 :          0 :         *tp = *repl;
     896                 :            :     }
     897                 :          0 :   return NULL_TREE;
     898                 :            : }
     899                 :            : 
     900                 :            : /* Identifiers of segments into which a particular variable should be places
     901                 :            :    when gridifying.  */
     902                 :            : 
     903                 :            : enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
     904                 :            :                        GRID_SEGMENT_GLOBAL};
     905                 :            : 
     906                 :            : /* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
     907                 :            :    builtin call into SEQ that will make sure the variable is always considered
     908                 :            :    address taken.  */
     909                 :            : 
     910                 :            : static void
     911                 :          0 : grid_mark_variable_segment (tree var, enum grid_var_segment segment)
     912                 :            : {
     913                 :            :   /* Making a non-addressable variables would require that we re-gimplify all
     914                 :            :      their uses.  Fortunately, we do not have to do this because if they are
     915                 :            :      not addressable, it means they are not used in atomic or parallel
     916                 :            :      statements and so relaxed GPU consistency rules mean we can just keep them
     917                 :            :      private.  */
     918                 :          0 :   if (!TREE_ADDRESSABLE (var))
     919                 :            :     return;
     920                 :            : 
     921                 :          0 :   switch (segment)
     922                 :            :     {
     923                 :          0 :     case GRID_SEGMENT_GROUP:
     924                 :          0 :       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
     925                 :          0 :                                          NULL, DECL_ATTRIBUTES (var));
     926                 :          0 :       break;
     927                 :          0 :     case GRID_SEGMENT_GLOBAL:
     928                 :          0 :       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
     929                 :          0 :                                          NULL, DECL_ATTRIBUTES (var));
     930                 :          0 :       break;
     931                 :          0 :     default:
     932                 :          0 :       gcc_unreachable ();
     933                 :            :     }
     934                 :            : 
     935                 :          0 :   if (!TREE_STATIC (var))
     936                 :            :     {
     937                 :          0 :       TREE_STATIC (var) = 1;
     938                 :          0 :       const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
     939                 :          0 :       SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
     940                 :          0 :       varpool_node::finalize_decl (var);
     941                 :            :     }
     942                 :            : 
     943                 :            : }
     944                 :            : 
     945                 :            : /* Copy leading register-type assignments to local variables in SRC to just
     946                 :            :    before DST, Creating temporaries, adjusting mapping of operands in WI and
     947                 :            :    remapping operands as necessary.  Add any new temporaries to TGT_BIND.
     948                 :            :    Return the first statement that does not conform to grid_safe_assignment_p
     949                 :            :    or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
     950                 :            :    variables in traversed bind statements so that they are put into the
     951                 :            :    appropriate segment.  */
     952                 :            : 
     953                 :            : static gimple *
     954                 :          0 : grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
     955                 :            :                                      gbind *tgt_bind,
     956                 :            :                                      enum grid_var_segment var_segment,
     957                 :            :                                      struct walk_stmt_info *wi)
     958                 :            : {
     959                 :          0 :   hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
     960                 :          0 :   gimple_stmt_iterator gsi;
     961                 :          0 :   for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
     962                 :            :     {
     963                 :          0 :       gimple *stmt = gsi_stmt (gsi);
     964                 :          0 :       if (gbind *bind = dyn_cast <gbind *> (stmt))
     965                 :            :         {
     966                 :          0 :           gimple *r = grid_copy_leading_local_assignments
     967                 :          0 :             (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
     968                 :            : 
     969                 :          0 :           if (var_segment != GRID_SEGMENT_PRIVATE)
     970                 :          0 :             for (tree var = gimple_bind_vars (bind);
     971                 :          0 :                  var;
     972                 :          0 :                  var = DECL_CHAIN (var))
     973                 :          0 :               grid_mark_variable_segment (var, var_segment);
     974                 :          0 :           if (r)
     975                 :          0 :             return r;
     976                 :            :           else
     977                 :          0 :             continue;
     978                 :            :         }
     979                 :          0 :       if (!grid_safe_assignment_p (stmt, NULL))
     980                 :          0 :         return stmt;
     981                 :          0 :       tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
     982                 :          0 :       tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
     983                 :          0 :                                  TREE_TYPE (lhs));
     984                 :          0 :       DECL_CONTEXT (repl) = current_function_decl;
     985                 :          0 :       gimple_bind_append_vars (tgt_bind, repl);
     986                 :            : 
     987                 :          0 :       declmap->put (lhs, repl);
     988                 :          0 :       gassign *copy = as_a <gassign *> (gimple_copy (stmt));
     989                 :          0 :       walk_gimple_op (copy, grid_remap_prebody_decls, wi);
     990                 :          0 :       gsi_insert_before (dst, copy, GSI_SAME_STMT);
     991                 :            :     }
     992                 :            :   return NULL;
     993                 :            : }
     994                 :            : 
     995                 :            : /* Statement walker function to make adjustments to statements within the
     996                 :            :    gridifed kernel copy.  */
     997                 :            : 
     998                 :            : static tree
     999                 :          0 : grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
    1000                 :            :                         struct walk_stmt_info *)
    1001                 :            : {
    1002                 :          0 :   *handled_ops_p = false;
    1003                 :          0 :   gimple *stmt = gsi_stmt (*gsi);
    1004                 :          0 :   if (gimple_code (stmt) == GIMPLE_OMP_FOR
    1005                 :          0 :       && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
    1006                 :            :   {
    1007                 :          0 :     gomp_for *loop = as_a <gomp_for *> (stmt);
    1008                 :          0 :     tree clauses = gimple_omp_for_clauses (loop);
    1009                 :          0 :     tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
    1010                 :          0 :     if (cl)
    1011                 :          0 :       OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
    1012                 :            :     else
    1013                 :            :       {
    1014                 :          0 :         tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
    1015                 :          0 :         OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
    1016                 :          0 :         OMP_CLAUSE_CHAIN (c) = clauses;
    1017                 :          0 :         gimple_omp_for_set_clauses (loop, c);
    1018                 :            :       }
    1019                 :            :   }
    1020                 :          0 :   return NULL_TREE;
    1021                 :            : }
    1022                 :            : 
    1023                 :            : /* Given a PARLOOP that is a normal for looping construct but also a part of a
    1024                 :            :    combined construct with a simd loop, eliminate the simd loop.  */
    1025                 :            : 
    1026                 :            : static void
    1027                 :          0 : grid_eliminate_combined_simd_part (gomp_for *parloop)
    1028                 :            : {
    1029                 :          0 :   struct walk_stmt_info wi;
    1030                 :            : 
    1031                 :          0 :   memset (&wi, 0, sizeof (wi));
    1032                 :          0 :   wi.val_only = true;
    1033                 :          0 :   enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
    1034                 :          0 :   wi.info = (void *) &msk;
    1035                 :          0 :   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
    1036                 :          0 :   gimple *stmt = (gimple *) wi.info;
    1037                 :            :   /* We expect that the SIMD id the only statement in the parallel loop.  */
    1038                 :          0 :   gcc_assert (stmt
    1039                 :            :               && gimple_code (stmt) == GIMPLE_OMP_FOR
    1040                 :            :               && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
    1041                 :            :               && gimple_omp_for_combined_into_p (stmt)
    1042                 :            :               && !gimple_omp_for_combined_p (stmt));
    1043                 :          0 :   gomp_for *simd = as_a <gomp_for *> (stmt);
    1044                 :            : 
    1045                 :            :   /* Copy over the iteration properties because the body refers to the index in
    1046                 :            :      the bottmom-most loop.  */
    1047                 :          0 :   unsigned i, collapse = gimple_omp_for_collapse (parloop);
    1048                 :          0 :   gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
    1049                 :          0 :   for (i = 0; i < collapse; i++)
    1050                 :            :     {
    1051                 :          0 :       gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
    1052                 :          0 :       gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
    1053                 :          0 :       gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
    1054                 :          0 :       gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
    1055                 :            :     }
    1056                 :            : 
    1057                 :          0 :   tree *tgt= gimple_omp_for_clauses_ptr (parloop);
    1058                 :          0 :   while (*tgt)
    1059                 :          0 :     tgt = &OMP_CLAUSE_CHAIN (*tgt);
    1060                 :            : 
    1061                 :            :   /* Copy over all clauses, except for linear clauses, which are turned into
    1062                 :            :      private clauses, and all other simd-specific clauses, which are
    1063                 :            :      ignored.  */
    1064                 :          0 :   tree *pc = gimple_omp_for_clauses_ptr (simd);
    1065                 :          0 :   while (*pc)
    1066                 :            :     {
    1067                 :          0 :       tree c = *pc;
    1068                 :          0 :       switch (TREE_CODE (c))
    1069                 :            :         {
    1070                 :          0 :         case OMP_CLAUSE_LINEAR:
    1071                 :          0 :           {
    1072                 :          0 :             tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
    1073                 :          0 :             OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
    1074                 :          0 :             OMP_CLAUSE_CHAIN (priv) = NULL;
    1075                 :          0 :             *tgt = priv;
    1076                 :          0 :             tgt = &OMP_CLAUSE_CHAIN (priv);
    1077                 :          0 :             pc = &OMP_CLAUSE_CHAIN (c);
    1078                 :          0 :             break;
    1079                 :            :           }
    1080                 :            : 
    1081                 :          0 :         case OMP_CLAUSE_SAFELEN:
    1082                 :          0 :         case OMP_CLAUSE_SIMDLEN:
    1083                 :          0 :         case OMP_CLAUSE_ALIGNED:
    1084                 :          0 :           pc = &OMP_CLAUSE_CHAIN (c);
    1085                 :          0 :           break;
    1086                 :            : 
    1087                 :          0 :         default:
    1088                 :          0 :           *pc = OMP_CLAUSE_CHAIN (c);
    1089                 :          0 :           OMP_CLAUSE_CHAIN (c) = NULL;
    1090                 :          0 :           *tgt = c;
    1091                 :          0 :           tgt = &OMP_CLAUSE_CHAIN (c);
    1092                 :          0 :           break;
    1093                 :            :         }
    1094                 :            :     }
    1095                 :            : 
    1096                 :            :   /* Finally, throw away the simd and mark the parallel loop as not
    1097                 :            :      combined.  */
    1098                 :          0 :   gimple_omp_set_body (parloop, gimple_omp_body (simd));
    1099                 :          0 :   gimple_omp_for_set_combined_p (parloop, false);
    1100                 :          0 : }
    1101                 :            : 
    1102                 :            : /* Statement walker function marking all parallels as grid_phony and loops as
    1103                 :            :    grid ones representing threads of a particular thread group.  */
    1104                 :            : 
    1105                 :            : static tree
    1106                 :          0 : grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
    1107                 :            :                         struct walk_stmt_info *wi_in)
    1108                 :            : {
    1109                 :          0 :   *handled_ops_p = false;
    1110                 :          0 :   if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
    1111                 :            :     {
    1112                 :          0 :       *handled_ops_p = true;
    1113                 :          0 :       gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
    1114                 :          0 :       gimple_omp_for_set_grid_intra_group (loop, true);
    1115                 :          0 :       if (gimple_omp_for_combined_p (loop))
    1116                 :          0 :         grid_eliminate_combined_simd_part (loop);
    1117                 :            : 
    1118                 :          0 :       struct walk_stmt_info body_wi;
    1119                 :          0 :       memset (&body_wi, 0, sizeof (body_wi));
    1120                 :          0 :       walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
    1121                 :            :                            grid_process_grid_body, NULL, &body_wi);
    1122                 :            : 
    1123                 :          0 :       gbind *bind = (gbind *) wi_in->info;
    1124                 :          0 :       tree c;
    1125                 :          0 :       for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
    1126                 :          0 :         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
    1127                 :            :           {
    1128                 :          0 :             push_gimplify_context ();
    1129                 :          0 :             tree ov = OMP_CLAUSE_DECL (c);
    1130                 :          0 :             tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
    1131                 :          0 :                                     TREE_TYPE (ov));
    1132                 :            : 
    1133                 :          0 :             grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
    1134                 :          0 :             DECL_CONTEXT (gv) = current_function_decl;
    1135                 :          0 :             gimple_bind_append_vars (bind, gv);
    1136                 :          0 :             tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
    1137                 :          0 :             gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
    1138                 :          0 :             x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
    1139                 :          0 :             gimple_seq l = NULL;
    1140                 :          0 :             gimplify_and_add (x, &l);
    1141                 :          0 :             gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
    1142                 :          0 :             pop_gimplify_context (bind);
    1143                 :            :           }
    1144                 :            :     }
    1145                 :          0 :   return NULL_TREE;
    1146                 :            : }
    1147                 :            : 
    1148                 :            : /* Statement walker function marking all parallels as grid_phony and loops as
    1149                 :            :    grid ones representing threads of a particular thread group.  */
    1150                 :            : 
    1151                 :            : static tree
    1152                 :          0 : grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
    1153                 :            :                                       bool *handled_ops_p,
    1154                 :            :                                       struct walk_stmt_info *wi_in)
    1155                 :            : {
    1156                 :          0 :   *handled_ops_p = false;
    1157                 :          0 :   wi_in->removed_stmt = false;
    1158                 :          0 :   gimple *stmt = gsi_stmt (*gsi);
    1159                 :          0 :   if (gbind *bind = dyn_cast <gbind *> (stmt))
    1160                 :            :     {
    1161                 :          0 :       for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
    1162                 :          0 :         grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
    1163                 :            :     }
    1164                 :          0 :   else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
    1165                 :            :     {
    1166                 :          0 :       *handled_ops_p = true;
    1167                 :          0 :       gimple_omp_parallel_set_grid_phony (parallel, true);
    1168                 :            : 
    1169                 :          0 :       gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
    1170                 :          0 :       gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
    1171                 :          0 :       gimple_seq s = NULL;
    1172                 :          0 :       gimple_seq_add_stmt (&s, new_bind);
    1173                 :          0 :       gimple_omp_set_body (parallel, s);
    1174                 :            : 
    1175                 :          0 :       struct walk_stmt_info wi_par;
    1176                 :          0 :       memset (&wi_par, 0, sizeof (wi_par));
    1177                 :          0 :       wi_par.info = new_bind;
    1178                 :          0 :       walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
    1179                 :            :                            grid_mark_tiling_loops, NULL, &wi_par);
    1180                 :            :     }
    1181                 :          0 :   else if (is_a <gcall *> (stmt))
    1182                 :          0 :     wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
    1183                 :          0 :   return NULL_TREE;
    1184                 :            : }
    1185                 :            : 
    1186                 :            : /* Given freshly copied top level kernel SEQ, identify the individual OMP
    1187                 :            :    components, mark them as part of kernel, copy assignment leading to them
    1188                 :            :    just before DST, remapping them using WI and adding new temporaries to
    1189                 :            :    TGT_BIND, and return the loop that will be used for kernel dispatch.  */
    1190                 :            : 
    1191                 :            : static gomp_for *
    1192                 :          0 : grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
    1193                 :            :                                gimple_stmt_iterator *dst,
    1194                 :            :                                gbind *tgt_bind, struct walk_stmt_info *wi)
    1195                 :            : {
    1196                 :          0 :   gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
    1197                 :            :                                                       GRID_SEGMENT_GLOBAL, wi);
    1198                 :          0 :   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
    1199                 :          0 :   gcc_assert (teams);
    1200                 :          0 :   gimple_omp_teams_set_grid_phony (teams, true);
    1201                 :          0 :   stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
    1202                 :            :                                               tgt_bind, GRID_SEGMENT_GLOBAL,
    1203                 :            :                                               wi);
    1204                 :          0 :   gcc_checking_assert (stmt);
    1205                 :          0 :   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
    1206                 :          0 :   gcc_assert (dist);
    1207                 :          0 :   gimple_seq prebody = gimple_omp_for_pre_body (dist);
    1208                 :          0 :   if (prebody)
    1209                 :          0 :     grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
    1210                 :            :                                          GRID_SEGMENT_GROUP, wi);
    1211                 :            : 
    1212                 :          0 :   if (grid->tiling)
    1213                 :            :     {
    1214                 :          0 :       gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
    1215                 :          0 :       gimple_omp_for_set_grid_group_iter (dist, true);
    1216                 :            : 
    1217                 :          0 :       struct walk_stmt_info wi_tiled;
    1218                 :          0 :       memset (&wi_tiled, 0, sizeof (wi_tiled));
    1219                 :          0 :       walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
    1220                 :            :                            grid_mark_tiling_parallels_and_loops, NULL,
    1221                 :            :                            &wi_tiled);
    1222                 :          0 :       return dist;
    1223                 :            :     }
    1224                 :            :   else
    1225                 :            :     {
    1226                 :          0 :       gimple_omp_for_set_grid_phony (dist, true);
    1227                 :          0 :       stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
    1228                 :            :                                                   tgt_bind,
    1229                 :            :                                                   GRID_SEGMENT_PRIVATE, wi);
    1230                 :          0 :       gcc_checking_assert (stmt);
    1231                 :          0 :       gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
    1232                 :          0 :       gimple_omp_parallel_set_grid_phony (parallel, true);
    1233                 :          0 :       stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
    1234                 :            :                                                   dst, tgt_bind,
    1235                 :            :                                                   GRID_SEGMENT_PRIVATE, wi);
    1236                 :          0 :       gomp_for *inner_loop = as_a <gomp_for *> (stmt);
    1237                 :          0 :       gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
    1238                 :          0 :       prebody = gimple_omp_for_pre_body (inner_loop);
    1239                 :          0 :       if (prebody)
    1240                 :          0 :         grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
    1241                 :            :                                              GRID_SEGMENT_PRIVATE, wi);
    1242                 :            : 
    1243                 :          0 :       if (gimple_omp_for_combined_p (inner_loop))
    1244                 :          0 :         grid_eliminate_combined_simd_part (inner_loop);
    1245                 :          0 :       struct walk_stmt_info body_wi;
    1246                 :          0 :       memset (&body_wi, 0, sizeof (body_wi));
    1247                 :          0 :       walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
    1248                 :            :                            grid_process_grid_body, NULL, &body_wi);
    1249                 :            : 
    1250                 :          0 :       return inner_loop;
    1251                 :            :     }
    1252                 :            : }
    1253                 :            : 
    1254                 :            : /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
    1255                 :            :    create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
    1256                 :            :    is the bind into which temporaries inserted before TARGET should be
    1257                 :            :    added.  */
    1258                 :            : 
    1259                 :            : static void
    1260                 :          0 : grid_attempt_target_gridification (gomp_target *target,
    1261                 :            :                                    gimple_stmt_iterator *gsi,
    1262                 :            :                                    gbind *tgt_bind)
    1263                 :            : {
    1264                 :            :   /* removed group_size */
    1265                 :          0 :   grid_prop grid = {};
    1266                 :          0 :   if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
    1267                 :          0 :     return;
    1268                 :            : 
    1269                 :          0 :   location_t loc = gimple_location (target);
    1270                 :          0 :   if (dump_enabled_p ())
    1271                 :          0 :     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
    1272                 :            :                      "Target construct will be turned into a gridified HSA "
    1273                 :            :                      "kernel\n");
    1274                 :            : 
    1275                 :            :   /* Copy target body to a GPUKERNEL construct:  */
    1276                 :          0 :   gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
    1277                 :          0 :     (gimple_omp_body (target));
    1278                 :            : 
    1279                 :          0 :   hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
    1280                 :          0 :   struct walk_stmt_info wi;
    1281                 :          0 :   memset (&wi, 0, sizeof (struct walk_stmt_info));
    1282                 :          0 :   wi.info = declmap;
    1283                 :            : 
    1284                 :            :   /* Copy assignments in between OMP statements before target, mark OMP
    1285                 :            :      statements within copy appropriately.  */
    1286                 :          0 :   gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
    1287                 :            :                                                         tgt_bind, &wi);
    1288                 :            : 
    1289                 :          0 :   gbind *old_bind
    1290                 :          0 :     = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
    1291                 :          0 :   gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
    1292                 :          0 :   tree new_block = gimple_bind_block (new_bind);
    1293                 :          0 :   tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
    1294                 :          0 :   BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
    1295                 :          0 :   BLOCK_SUBBLOCKS (enc_block) = new_block;
    1296                 :          0 :   BLOCK_SUPERCONTEXT (new_block) = enc_block;
    1297                 :          0 :   gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
    1298                 :          0 :   gimple_seq_add_stmt
    1299                 :          0 :     (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
    1300                 :            :      gpukernel);
    1301                 :            : 
    1302                 :          0 :   for (size_t i = 0; i < grid.collapse; i++)
    1303                 :          0 :     walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
    1304                 :          0 :   push_gimplify_context ();
    1305                 :          0 :   for (size_t i = 0; i < grid.collapse; i++)
    1306                 :            :     {
    1307                 :          0 :       tree index_var = gimple_omp_for_index (inner_loop, i);
    1308                 :          0 :       tree itype, type = TREE_TYPE (index_var);
    1309                 :          0 :       if (POINTER_TYPE_P (type))
    1310                 :          0 :         itype = signed_type_for (type);
    1311                 :            :       else
    1312                 :            :         itype = type;
    1313                 :            : 
    1314                 :          0 :       enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
    1315                 :          0 :       tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
    1316                 :          0 :       walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
    1317                 :          0 :       tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
    1318                 :          0 :       walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
    1319                 :          0 :       tree step
    1320                 :          0 :         = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
    1321                 :          0 :       omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
    1322                 :          0 :       n1 = fold_convert (itype, n1);
    1323                 :          0 :       n2 = fold_convert (itype, n2);
    1324                 :            : 
    1325                 :          0 :       tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
    1326                 :            : 
    1327                 :          0 :       tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
    1328                 :          0 :       t = fold_build2 (PLUS_EXPR, itype, step, t);
    1329                 :          0 :       t = fold_build2 (PLUS_EXPR, itype, t, n2);
    1330                 :          0 :       t = fold_build2 (MINUS_EXPR, itype, t, n1);
    1331                 :          0 :       if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
    1332                 :          0 :         t = fold_build2 (TRUNC_DIV_EXPR, itype,
    1333                 :            :                          fold_build1 (NEGATE_EXPR, itype, t),
    1334                 :            :                          fold_build1 (NEGATE_EXPR, itype, step));
    1335                 :            :       else
    1336                 :          0 :         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
    1337                 :          0 :       t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
    1338                 :          0 :       if (grid.tiling)
    1339                 :            :         {
    1340                 :          0 :           if (cond_code == GT_EXPR)
    1341                 :          0 :             step = fold_build1 (NEGATE_EXPR, itype, step);
    1342                 :          0 :           t = fold_build2 (MULT_EXPR, itype, t, step);
    1343                 :            :         }
    1344                 :            : 
    1345                 :          0 :       tree gs = fold_convert (uint32_type_node, t);
    1346                 :          0 :       gimple_seq tmpseq = NULL;
    1347                 :          0 :       gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
    1348                 :          0 :       if (!gimple_seq_empty_p (tmpseq))
    1349                 :          0 :         gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
    1350                 :            : 
    1351                 :          0 :       tree ws;
    1352                 :          0 :       if (grid.group_sizes[i])
    1353                 :            :         {
    1354                 :          0 :           ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
    1355                 :          0 :           tmpseq = NULL;
    1356                 :          0 :           gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
    1357                 :          0 :           if (!gimple_seq_empty_p (tmpseq))
    1358                 :          0 :             gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
    1359                 :            :         }
    1360                 :            :       else
    1361                 :          0 :         ws = build_zero_cst (uint32_type_node);
    1362                 :            : 
    1363                 :          0 :       tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
    1364                 :          0 :       OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
    1365                 :          0 :       OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
    1366                 :          0 :       OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
    1367                 :          0 :       OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
    1368                 :          0 :       gimple_omp_target_set_clauses (target, c);
    1369                 :            :     }
    1370                 :          0 :   pop_gimplify_context (tgt_bind);
    1371                 :          0 :   delete declmap;
    1372                 :            :   return;
    1373                 :            : }
    1374                 :            : 
    1375                 :            : /* Walker function doing all the work for create_target_kernels.  */
    1376                 :            : 
    1377                 :            : static tree
    1378                 :          0 : grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
    1379                 :            :                                    bool *handled_ops_p,
    1380                 :            :                                    struct walk_stmt_info *incoming)
    1381                 :            : {
    1382                 :          0 :   *handled_ops_p = false;
    1383                 :            : 
    1384                 :          0 :   gimple *stmt = gsi_stmt (*gsi);
    1385                 :          0 :   gomp_target *target = dyn_cast <gomp_target *> (stmt);
    1386                 :          0 :   if (target)
    1387                 :            :     {
    1388                 :          0 :       gbind *tgt_bind = (gbind *) incoming->info;
    1389                 :          0 :       gcc_checking_assert (tgt_bind);
    1390                 :          0 :       grid_attempt_target_gridification (target, gsi, tgt_bind);
    1391                 :          0 :       return NULL_TREE;
    1392                 :            :     }
    1393                 :          0 :   gbind *bind = dyn_cast <gbind *> (stmt);
    1394                 :          0 :   if (bind)
    1395                 :            :     {
    1396                 :          0 :       *handled_ops_p = true;
    1397                 :          0 :       struct walk_stmt_info wi;
    1398                 :          0 :       memset (&wi, 0, sizeof (wi));
    1399                 :          0 :       wi.info = bind;
    1400                 :          0 :       walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
    1401                 :            :                            grid_gridify_all_targets_stmt, NULL, &wi);
    1402                 :            :     }
    1403                 :            :   return NULL_TREE;
    1404                 :            : }
    1405                 :            : 
    1406                 :            : /* Attempt to gridify all target constructs in BODY_P.  All such targets will
    1407                 :            :    have their bodies duplicated, with the new copy being put into a
    1408                 :            :    gimple_omp_grid_body statement.  All kernel-related construct within the
    1409                 :            :    grid_body will be marked with phony flags or kernel kinds.  Moreover, some
    1410                 :            :    re-structuring is often needed, such as copying pre-bodies before the target
    1411                 :            :    construct so that kernel grid sizes can be computed.  */
    1412                 :            : 
    1413                 :            : void
    1414                 :          0 : omp_grid_gridify_all_targets (gimple_seq *body_p)
    1415                 :            : {
    1416                 :          0 :   struct walk_stmt_info wi;
    1417                 :          0 :   memset (&wi, 0, sizeof (wi));
    1418                 :          0 :   walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
    1419                 :          0 : }

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.