LCOV - code coverage report
Current view: top level - gcc - tree-vect-slp.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 1913 2109 90.7 %
Date: 2020-04-04 11:58:09 Functions: 62 65 95.4 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* SLP - Basic Block Vectorization
       2                 :            :    Copyright (C) 2007-2020 Free Software Foundation, Inc.
       3                 :            :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4                 :            :    and Ira Rosen <irar@il.ibm.com>
       5                 :            : 
       6                 :            : This file is part of GCC.
       7                 :            : 
       8                 :            : GCC is free software; you can redistribute it and/or modify it under
       9                 :            : the terms of the GNU General Public License as published by the Free
      10                 :            : Software Foundation; either version 3, or (at your option) any later
      11                 :            : version.
      12                 :            : 
      13                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16                 :            : for more details.
      17                 :            : 
      18                 :            : You should have received a copy of the GNU General Public License
      19                 :            : along with GCC; see the file COPYING3.  If not see
      20                 :            : <http://www.gnu.org/licenses/>.  */
      21                 :            : 
      22                 :            : #include "config.h"
      23                 :            : #include "system.h"
      24                 :            : #include "coretypes.h"
      25                 :            : #include "backend.h"
      26                 :            : #include "target.h"
      27                 :            : #include "rtl.h"
      28                 :            : #include "tree.h"
      29                 :            : #include "gimple.h"
      30                 :            : #include "tree-pass.h"
      31                 :            : #include "ssa.h"
      32                 :            : #include "optabs-tree.h"
      33                 :            : #include "insn-config.h"
      34                 :            : #include "recog.h"            /* FIXME: for insn_data */
      35                 :            : #include "fold-const.h"
      36                 :            : #include "stor-layout.h"
      37                 :            : #include "gimple-iterator.h"
      38                 :            : #include "cfgloop.h"
      39                 :            : #include "tree-vectorizer.h"
      40                 :            : #include "langhooks.h"
      41                 :            : #include "gimple-walk.h"
      42                 :            : #include "dbgcnt.h"
      43                 :            : #include "tree-vector-builder.h"
      44                 :            : #include "vec-perm-indices.h"
      45                 :            : #include "gimple-fold.h"
      46                 :            : #include "internal-fn.h"
      47                 :            : 
      48                 :            : 
      49                 :            : /* Recursively free the memory allocated for the SLP tree rooted at NODE.
      50                 :            :    FINAL_P is true if we have vectorized the instance or if we have
      51                 :            :    made a final decision not to vectorize the statements in any way.  */
      52                 :            : 
      53                 :            : static void
      54                 :     317111 : vect_free_slp_tree (slp_tree node, bool final_p)
      55                 :            : {
      56                 :     317111 :   int i;
      57                 :     317111 :   slp_tree child;
      58                 :            : 
      59                 :     317111 :   if (--node->refcnt != 0)
      60                 :     317111 :     return;
      61                 :            : 
      62                 :     318667 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      63                 :     117921 :     vect_free_slp_tree (child, final_p);
      64                 :            : 
      65                 :            :   /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
      66                 :            :      Some statements might no longer exist, after having been
      67                 :            :      removed by vect_transform_stmt.  Updating the remaining
      68                 :            :      statements would be redundant.  */
      69                 :     200746 :   if (!final_p)
      70                 :            :     {
      71                 :            :       stmt_vec_info stmt_info;
      72                 :     227388 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
      73                 :            :         {
      74                 :     170260 :           gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
      75                 :     170260 :           STMT_VINFO_NUM_SLP_USES (stmt_info)--;
      76                 :            :         }
      77                 :            :     }
      78                 :            : 
      79                 :     200746 :   SLP_TREE_CHILDREN (node).release ();
      80                 :     200746 :   SLP_TREE_SCALAR_STMTS (node).release ();
      81                 :     200746 :   SLP_TREE_SCALAR_OPS (node).release ();
      82                 :     200746 :   SLP_TREE_VEC_STMTS (node).release ();
      83                 :     200746 :   SLP_TREE_LOAD_PERMUTATION (node).release ();
      84                 :            : 
      85                 :     200746 :   free (node);
      86                 :            : }
      87                 :            : 
      88                 :            : /* Free the memory allocated for the SLP instance.  FINAL_P is true if we
      89                 :            :    have vectorized the instance or if we have made a final decision not
      90                 :            :    to vectorize the statements in any way.  */
      91                 :            : 
      92                 :            : void
      93                 :      74167 : vect_free_slp_instance (slp_instance instance, bool final_p)
      94                 :            : {
      95                 :      74167 :   vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
      96                 :      74167 :   SLP_INSTANCE_LOADS (instance).release ();
      97                 :      74167 :   free (instance);
      98                 :      74167 : }
      99                 :            : 
     100                 :            : 
     101                 :            : /* Create an SLP node for SCALAR_STMTS.  */
     102                 :            : 
     103                 :            : static slp_tree
     104                 :     125382 : vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
     105                 :            : {
     106                 :     125382 :   slp_tree node;
     107                 :     125382 :   stmt_vec_info stmt_info = scalar_stmts[0];
     108                 :     125382 :   unsigned int nops;
     109                 :            : 
     110                 :     125382 :   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
     111                 :         86 :     nops = gimple_call_num_args (stmt);
     112                 :     125296 :   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
     113                 :            :     {
     114                 :     124141 :       nops = gimple_num_ops (stmt) - 1;
     115                 :     227340 :       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
     116                 :        221 :         nops++;
     117                 :            :     }
     118                 :       1155 :   else if (is_a <gphi *> (stmt_info->stmt))
     119                 :            :     nops = 0;
     120                 :            :   else
     121                 :            :     return NULL;
     122                 :            : 
     123                 :     125382 :   node = XNEW (struct _slp_tree);
     124                 :     125382 :   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
     125                 :     125382 :   SLP_TREE_SCALAR_OPS (node) = vNULL;
     126                 :     125382 :   SLP_TREE_VEC_STMTS (node).create (0);
     127                 :     125382 :   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
     128                 :     125382 :   SLP_TREE_CHILDREN (node).create (nops);
     129                 :     125382 :   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
     130                 :     125382 :   SLP_TREE_TWO_OPERATORS (node) = false;
     131                 :     125382 :   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
     132                 :     125382 :   node->refcnt = 1;
     133                 :     125382 :   node->max_nunits = 1;
     134                 :            : 
     135                 :     125382 :   unsigned i;
     136                 :     557000 :   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
     137                 :     431618 :     STMT_VINFO_NUM_SLP_USES (stmt_info)++;
     138                 :            : 
     139                 :            :   return node;
     140                 :            : }
     141                 :            : 
     142                 :            : /* Create an SLP node for OPS.  */
     143                 :            : 
     144                 :            : static slp_tree
     145                 :      75123 : vect_create_new_slp_node (vec<tree> ops)
     146                 :            : {
     147                 :      75123 :   slp_tree node;
     148                 :            : 
     149                 :      75123 :   node = XNEW (struct _slp_tree);
     150                 :      75123 :   SLP_TREE_SCALAR_STMTS (node) = vNULL;
     151                 :      75123 :   SLP_TREE_SCALAR_OPS (node) = ops;
     152                 :      75123 :   SLP_TREE_VEC_STMTS (node).create (0);
     153                 :      75123 :   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
     154                 :      75123 :   SLP_TREE_CHILDREN (node) = vNULL;
     155                 :      75123 :   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
     156                 :      75123 :   SLP_TREE_TWO_OPERATORS (node) = false;
     157                 :      75123 :   SLP_TREE_DEF_TYPE (node) = vect_external_def;
     158                 :      75123 :   node->refcnt = 1;
     159                 :      75123 :   node->max_nunits = 1;
     160                 :            : 
     161                 :      75123 :   return node;
     162                 :            : }
     163                 :            : 
     164                 :            : 
     165                 :            : /* This structure is used in creation of an SLP tree.  Each instance
     166                 :            :    corresponds to the same operand in a group of scalar stmts in an SLP
     167                 :            :    node.  */
     168                 :            : typedef struct _slp_oprnd_info
     169                 :            : {
     170                 :            :   /* Def-stmts for the operands.  */
     171                 :            :   vec<stmt_vec_info> def_stmts;
     172                 :            :   /* Operands.  */
     173                 :            :   vec<tree> ops;
     174                 :            :   /* Information about the first statement, its vector def-type, type, the
     175                 :            :      operand itself in case it's constant, and an indication if it's a pattern
     176                 :            :      stmt.  */
     177                 :            :   tree first_op_type;
     178                 :            :   enum vect_def_type first_dt;
     179                 :            :   bool any_pattern;
     180                 :            : } *slp_oprnd_info;
     181                 :            : 
     182                 :            : 
     183                 :            : /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
     184                 :            :    operand.  */
     185                 :            : static vec<slp_oprnd_info> 
     186                 :     116068 : vect_create_oprnd_info (int nops, int group_size)
     187                 :            : {
     188                 :     116068 :   int i;
     189                 :     116068 :   slp_oprnd_info oprnd_info;
     190                 :     116068 :   vec<slp_oprnd_info> oprnds_info;
     191                 :            : 
     192                 :     116068 :   oprnds_info.create (nops);
     193                 :     255114 :   for (i = 0; i < nops; i++)
     194                 :            :     {
     195                 :     139046 :       oprnd_info = XNEW (struct _slp_oprnd_info);
     196                 :     139046 :       oprnd_info->def_stmts.create (group_size);
     197                 :     139046 :       oprnd_info->ops.create (group_size);
     198                 :     139046 :       oprnd_info->first_dt = vect_uninitialized_def;
     199                 :     139046 :       oprnd_info->first_op_type = NULL_TREE;
     200                 :     139046 :       oprnd_info->any_pattern = false;
     201                 :     139046 :       oprnds_info.quick_push (oprnd_info);
     202                 :            :     }
     203                 :            : 
     204                 :     116068 :   return oprnds_info;
     205                 :            : }
     206                 :            : 
     207                 :            : 
     208                 :            : /* Free operands info.  */
     209                 :            : 
     210                 :            : static void
     211                 :     116068 : vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
     212                 :            : {
     213                 :     116068 :   int i;
     214                 :     116068 :   slp_oprnd_info oprnd_info;
     215                 :            : 
     216                 :     255114 :   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
     217                 :            :     {
     218                 :     139046 :       oprnd_info->def_stmts.release ();
     219                 :     139046 :       oprnd_info->ops.release ();
     220                 :     139046 :       XDELETE (oprnd_info);
     221                 :            :     }
     222                 :            : 
     223                 :     116068 :   oprnds_info.release ();
     224                 :     116068 : }
     225                 :            : 
     226                 :            : 
     227                 :            : /* Return true if STMTS contains a pattern statement.  */
     228                 :            : 
     229                 :            : static bool
     230                 :       7397 : vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
     231                 :            : {
     232                 :       7397 :   stmt_vec_info stmt_info;
     233                 :       7397 :   unsigned int i;
     234                 :      16369 :   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
     235                 :       9041 :     if (is_pattern_stmt_p (stmt_info))
     236                 :            :       return true;
     237                 :            :   return false;
     238                 :            : }
     239                 :            : 
     240                 :            : /* Find the place of the data-ref in STMT_INFO in the interleaving chain
     241                 :            :    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
     242                 :            :    of the chain.  */
     243                 :            : 
     244                 :            : int
     245                 :      42602 : vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
     246                 :            :                                       stmt_vec_info first_stmt_info)
     247                 :            : {
     248                 :      42602 :   stmt_vec_info next_stmt_info = first_stmt_info;
     249                 :      42602 :   int result = 0;
     250                 :            : 
     251                 :      42602 :   if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
     252                 :            :     return -1;
     253                 :            : 
     254                 :     240903 :   do
     255                 :            :     {
     256                 :     240903 :       if (next_stmt_info == stmt_info)
     257                 :      42602 :         return result;
     258                 :     198301 :       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
     259                 :     198301 :       if (next_stmt_info)
     260                 :     198301 :         result += DR_GROUP_GAP (next_stmt_info);
     261                 :            :     }
     262                 :     198301 :   while (next_stmt_info);
     263                 :            : 
     264                 :            :   return -1;
     265                 :            : }
     266                 :            : 
     267                 :            : /* Check whether it is possible to load COUNT elements of type ELT_TYPE
     268                 :            :    using the method implemented by duplicate_and_interleave.  Return true
     269                 :            :    if so, returning the number of intermediate vectors in *NVECTORS_OUT
     270                 :            :    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
     271                 :            :    (if nonnull).  */
     272                 :            : 
     273                 :            : bool
     274                 :          0 : can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
     275                 :            :                                 tree elt_type, unsigned int *nvectors_out,
     276                 :            :                                 tree *vector_type_out,
     277                 :            :                                 tree *permutes)
     278                 :            : {
     279                 :          0 :   tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
     280                 :          0 :   if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
     281                 :          0 :     return false;
     282                 :            : 
     283                 :          0 :   machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
     284                 :          0 :   poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
     285                 :          0 :   unsigned int nvectors = 1;
     286                 :          0 :   for (;;)
     287                 :            :     {
     288                 :          0 :       scalar_int_mode int_mode;
     289                 :          0 :       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
     290                 :          0 :       if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
     291                 :            :         {
     292                 :            :           /* Get the natural vector type for this SLP group size.  */
     293                 :          0 :           tree int_type = build_nonstandard_integer_type
     294                 :          0 :             (GET_MODE_BITSIZE (int_mode), 1);
     295                 :          0 :           tree vector_type
     296                 :          0 :             = get_vectype_for_scalar_type (vinfo, int_type, count);
     297                 :          0 :           if (vector_type
     298                 :          0 :               && VECTOR_MODE_P (TYPE_MODE (vector_type))
     299                 :          0 :               && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
     300                 :            :                            GET_MODE_SIZE (base_vector_mode)))
     301                 :            :             {
     302                 :            :               /* Try fusing consecutive sequences of COUNT / NVECTORS elements
     303                 :            :                  together into elements of type INT_TYPE and using the result
     304                 :            :                  to build NVECTORS vectors.  */
     305                 :          0 :               poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
     306                 :          0 :               vec_perm_builder sel1 (nelts, 2, 3);
     307                 :          0 :               vec_perm_builder sel2 (nelts, 2, 3);
     308                 :          0 :               poly_int64 half_nelts = exact_div (nelts, 2);
     309                 :          0 :               for (unsigned int i = 0; i < 3; ++i)
     310                 :            :                 {
     311                 :          0 :                   sel1.quick_push (i);
     312                 :          0 :                   sel1.quick_push (i + nelts);
     313                 :          0 :                   sel2.quick_push (half_nelts + i);
     314                 :          0 :                   sel2.quick_push (half_nelts + i + nelts);
     315                 :            :                 }
     316                 :          0 :               vec_perm_indices indices1 (sel1, 2, nelts);
     317                 :          0 :               vec_perm_indices indices2 (sel2, 2, nelts);
     318                 :          0 :               if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
     319                 :          0 :                   && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
     320                 :            :                 {
     321                 :          0 :                   if (nvectors_out)
     322                 :          0 :                     *nvectors_out = nvectors;
     323                 :          0 :                   if (vector_type_out)
     324                 :          0 :                     *vector_type_out = vector_type;
     325                 :          0 :                   if (permutes)
     326                 :            :                     {
     327                 :          0 :                       permutes[0] = vect_gen_perm_mask_checked (vector_type,
     328                 :            :                                                                 indices1);
     329                 :          0 :                       permutes[1] = vect_gen_perm_mask_checked (vector_type,
     330                 :            :                                                                 indices2);
     331                 :            :                     }
     332                 :          0 :                   return true;
     333                 :            :                 }
     334                 :            :             }
     335                 :            :         }
     336                 :          0 :       if (!multiple_p (elt_bytes, 2, &elt_bytes))
     337                 :            :         return false;
     338                 :          0 :       nvectors *= 2;
     339                 :          0 :     }
     340                 :            : }
     341                 :            : 
     342                 :            : /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
     343                 :            :    they are of a valid type and that they match the defs of the first stmt of
     344                 :            :    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
     345                 :            :    by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
     346                 :            :    indicates swap is required for cond_expr stmts.  Specifically, *SWAP
     347                 :            :    is 1 if STMT is cond and operands of comparison need to be swapped;
     348                 :            :    *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
     349                 :            :    If there is any operand swap in this function, *SWAP is set to non-zero
     350                 :            :    value.
     351                 :            :    If there was a fatal error return -1; if the error could be corrected by
     352                 :            :    swapping operands of father node of this one, return 1; if everything is
     353                 :            :    ok return 0.  */
     354                 :            : static int
     355                 :     436838 : vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
     356                 :            :                              vec<stmt_vec_info> stmts, unsigned stmt_num,
     357                 :            :                              vec<slp_oprnd_info> *oprnds_info)
     358                 :            : {
     359                 :     436838 :   stmt_vec_info stmt_info = stmts[stmt_num];
     360                 :     436838 :   tree oprnd;
     361                 :     436838 :   unsigned int i, number_of_oprnds;
     362                 :     436838 :   enum vect_def_type dt = vect_uninitialized_def;
     363                 :     436838 :   slp_oprnd_info oprnd_info;
     364                 :     436838 :   int first_op_idx = 1;
     365                 :     436838 :   unsigned int commutative_op = -1U;
     366                 :     436838 :   bool first_op_cond = false;
     367                 :     436838 :   bool first = stmt_num == 0;
     368                 :            : 
     369                 :     436838 :   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
     370                 :            :     {
     371                 :        482 :       number_of_oprnds = gimple_call_num_args (stmt);
     372                 :        482 :       first_op_idx = 3;
     373                 :        482 :       if (gimple_call_internal_p (stmt))
     374                 :            :         {
     375                 :          4 :           internal_fn ifn = gimple_call_internal_fn (stmt);
     376                 :          4 :           commutative_op = first_commutative_argument (ifn);
     377                 :            : 
     378                 :            :           /* Masked load, only look at mask.  */
     379                 :          4 :           if (ifn == IFN_MASK_LOAD)
     380                 :            :             {
     381                 :          2 :               number_of_oprnds = 1;
     382                 :            :               /* Mask operand index.  */
     383                 :          2 :               first_op_idx = 5;
     384                 :            :             }
     385                 :            :         }
     386                 :            :     }
     387                 :     436356 :   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
     388                 :            :     {
     389                 :     436356 :       enum tree_code code = gimple_assign_rhs_code (stmt);
     390                 :     436356 :       number_of_oprnds = gimple_num_ops (stmt) - 1;
     391                 :            :       /* Swap can only be done for cond_expr if asked to, otherwise we
     392                 :            :          could result in different comparison code to the first stmt.  */
     393                 :     436356 :       if (code == COND_EXPR
     394                 :     436356 :           && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
     395                 :            :         {
     396                 :            :           first_op_cond = true;
     397                 :            :           number_of_oprnds++;
     398                 :            :         }
     399                 :            :       else
     400                 :     435854 :         commutative_op = commutative_tree_code (code) ? 0U : -1U;
     401                 :            :     }
     402                 :            :   else
     403                 :            :     return -1;
     404                 :            : 
     405                 :     436838 :   bool swapped = (*swap != 0);
     406                 :     436838 :   gcc_assert (!swapped || first_op_cond);
     407                 :     973127 :   for (i = 0; i < number_of_oprnds; i++)
     408                 :            :     {
     409                 :     538120 : again:
     410                 :     539443 :       if (first_op_cond)
     411                 :            :         {
     412                 :            :           /* Map indicating how operands of cond_expr should be swapped.  */
     413                 :       2008 :           int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
     414                 :       2008 :           int *map = maps[*swap];
     415                 :            : 
     416                 :       2008 :           if (i < 2)
     417                 :       1004 :             oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
     418                 :            :                                              first_op_idx), map[i]);
     419                 :            :           else
     420                 :       1004 :             oprnd = gimple_op (stmt_info->stmt, map[i]);
     421                 :            :         }
     422                 :            :       else
     423                 :     537435 :         oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
     424                 :     539443 :       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
     425                 :          0 :         oprnd = TREE_OPERAND (oprnd, 0);
     426                 :            : 
     427                 :     539443 :       oprnd_info = (*oprnds_info)[i];
     428                 :            : 
     429                 :     539443 :       stmt_vec_info def_stmt_info;
     430                 :     539443 :       if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
     431                 :            :         {
     432                 :        133 :           if (dump_enabled_p ())
     433                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     434                 :            :                              "Build SLP failed: can't analyze def for %T\n",
     435                 :            :                              oprnd);
     436                 :            : 
     437                 :       1831 :           return -1;
     438                 :            :         }
     439                 :            : 
     440                 :     539310 :       if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
     441                 :      33611 :         oprnd_info->any_pattern = true;
     442                 :            : 
     443                 :     539310 :       if (first)
     444                 :            :         {
     445                 :            :           /* For the swapping logic below force vect_reduction_def
     446                 :            :              for the reduction op in a SLP reduction group.  */
     447                 :     138743 :           if (!STMT_VINFO_DATA_REF (stmt_info)
     448                 :      50863 :               && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
     449                 :        391 :               && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
     450                 :     138933 :               && def_stmt_info)
     451                 :        190 :             dt = vect_reduction_def;
     452                 :     138743 :           oprnd_info->first_dt = dt;
     453                 :     138743 :           oprnd_info->first_op_type = TREE_TYPE (oprnd);
     454                 :            :         }
     455                 :            :       else
     456                 :            :         {
     457                 :            :           /* Not first stmt of the group, check that the def-stmt/s match
     458                 :            :              the def-stmt/s of the first stmt.  Allow different definition
     459                 :            :              types for reduction chains: the first stmt must be a
     460                 :            :              vect_reduction_def (a phi node), and the rest
     461                 :            :              end in the reduction chain.  */
     462                 :     400567 :           tree type = TREE_TYPE (oprnd);
     463                 :     400567 :           if ((oprnd_info->first_dt != dt
     464                 :       3549 :                && !(oprnd_info->first_dt == vect_reduction_def
     465                 :        603 :                     && !STMT_VINFO_DATA_REF (stmt_info)
     466                 :        603 :                     && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
     467                 :        558 :                     && def_stmt_info
     468                 :        558 :                     && !STMT_VINFO_DATA_REF (def_stmt_info)
     469                 :        552 :                     && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
     470                 :            :                         == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
     471                 :       5291 :                && !((oprnd_info->first_dt == vect_external_def
     472                 :       3035 :                      || oprnd_info->first_dt == vect_constant_def)
     473                 :            :                     && (dt == vect_external_def
     474                 :       2256 :                         || dt == vect_constant_def)))
     475                 :     397822 :               || !types_compatible_p (oprnd_info->first_op_type, type)
     476                 :     798338 :               || (!STMT_VINFO_DATA_REF (stmt_info)
     477                 :     180992 :                   && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
     478                 :       3388 :                   && ((!def_stmt_info
     479                 :       1657 :                        || STMT_VINFO_DATA_REF (def_stmt_info)
     480                 :       2865 :                        || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
     481                 :            :                            != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
     482                 :       1694 :                       != (oprnd_info->first_dt != vect_reduction_def))))
     483                 :            :             {
     484                 :            :               /* Try swapping operands if we got a mismatch.  */
     485                 :       3021 :               if (i == commutative_op && !swapped)
     486                 :            :                 {
     487                 :       1323 :                   if (dump_enabled_p ())
     488                 :        121 :                     dump_printf_loc (MSG_NOTE, vect_location,
     489                 :            :                                      "trying swapped operands\n");
     490                 :       1323 :                   swapped = true;
     491                 :       1323 :                   goto again;
     492                 :            :                 }
     493                 :            : 
     494                 :       1698 :               if (dump_enabled_p ())
     495                 :        161 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     496                 :            :                                  "Build SLP failed: different types\n");
     497                 :            : 
     498                 :       1698 :               return 1;
     499                 :            :             }
     500                 :     397546 :           if ((dt == vect_constant_def
     501                 :     397546 :                || dt == vect_external_def)
     502                 :     204010 :               && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
     503                 :     397546 :               && (TREE_CODE (type) == BOOLEAN_TYPE
     504                 :            :                   || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
     505                 :            :                                                       type)))
     506                 :            :             {
     507                 :            :               if (dump_enabled_p ())
     508                 :            :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     509                 :            :                                  "Build SLP failed: invalid type of def "
     510                 :            :                                  "for variable-length SLP %T\n", oprnd);
     511                 :            :               return -1;
     512                 :            :             }
     513                 :            :         }
     514                 :            : 
     515                 :            :       /* Check the types of the definitions.  */
     516                 :     536289 :       switch (dt)
     517                 :            :         {
     518                 :      45354 :         case vect_external_def:
     519                 :            :           /* Make sure to demote the overall operand to external.  */
     520                 :      45354 :           oprnd_info->first_dt = vect_external_def;
     521                 :            :           /* Fallthru.  */
     522                 :     281363 :         case vect_constant_def:
     523                 :     281363 :           oprnd_info->def_stmts.quick_push (NULL);
     524                 :     281363 :           oprnd_info->ops.quick_push (oprnd);
     525                 :            :           break;
     526                 :            : 
     527                 :     252935 :         case vect_internal_def:
     528                 :     252935 :         case vect_reduction_def:
     529                 :     252935 :           if (oprnd_info->first_dt == vect_reduction_def
     530                 :       9745 :               && !STMT_VINFO_DATA_REF (stmt_info)
     531                 :       9745 :               && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
     532                 :        866 :               && !STMT_VINFO_DATA_REF (def_stmt_info)
     533                 :     253801 :               && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
     534                 :            :                   == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
     535                 :            :             {
     536                 :            :               /* For a SLP reduction chain we want to duplicate the
     537                 :            :                  reduction to each of the chain members.  That gets
     538                 :            :                  us a sane SLP graph (still the stmts are not 100%
     539                 :            :                  correct wrt the initial values).  */
     540                 :        676 :               gcc_assert (!first);
     541                 :        676 :               oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
     542                 :        676 :               oprnd_info->ops.quick_push (oprnd_info->ops[0]);
     543                 :            :               break;
     544                 :            :             }
     545                 :            :           /* Fallthru.  */
     546                 :     254250 :         case vect_induction_def:
     547                 :     254250 :           oprnd_info->def_stmts.quick_push (def_stmt_info);
     548                 :     790539 :           oprnd_info->ops.quick_push (oprnd);
     549                 :            :           break;
     550                 :            : 
     551                 :          0 :         default:
     552                 :            :           /* FORNOW: Not supported.  */
     553                 :          0 :           if (dump_enabled_p ())
     554                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     555                 :            :                              "Build SLP failed: illegal type of def %T\n",
     556                 :            :                              oprnd);
     557                 :            : 
     558                 :            :           return -1;
     559                 :            :         }
     560                 :            :     }
     561                 :            : 
     562                 :            :   /* Swap operands.  */
     563                 :     435007 :   if (swapped)
     564                 :            :     {
     565                 :       1093 :       if (dump_enabled_p ())
     566                 :        143 :         dump_printf_loc (MSG_NOTE, vect_location,
     567                 :            :                          "swapped operands to match def types in %G",
     568                 :            :                          stmt_info->stmt);
     569                 :            :     }
     570                 :            : 
     571                 :     435007 :   *swap = swapped;
     572                 :     435007 :   return 0;
     573                 :            : }
     574                 :            : 
     575                 :            : /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
     576                 :            :    Return true if we can, meaning that this choice doesn't conflict with
     577                 :            :    existing SLP nodes that use STMT_INFO.  */
     578                 :            : 
     579                 :            : static bool
     580                 :     514438 : vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
     581                 :            : {
     582                 :     514438 :   tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
     583                 :     514438 :   if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
     584                 :            :     return true;
     585                 :            : 
     586                 :     287231 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
     587                 :     630706 :       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     588                 :            :     {
     589                 :            :       /* We maintain the invariant that if any statement in the group is
     590                 :            :          used, all other members of the group have the same vector type.  */
     591                 :      36827 :       stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
     592                 :            :       stmt_vec_info member_info = first_info;
     593                 :      36827 :       for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
     594                 :      29198 :         if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
     595                 :      29198 :             || is_pattern_stmt_p (member_info))
     596                 :            :           break;
     597                 :            : 
     598                 :       7944 :       if (!member_info)
     599                 :            :         {
     600                 :      35493 :           for (member_info = first_info; member_info;
     601                 :      27864 :                member_info = DR_GROUP_NEXT_ELEMENT (member_info))
     602                 :      27864 :             STMT_VINFO_VECTYPE (member_info) = vectype;
     603                 :            :           return true;
     604                 :            :         }
     605                 :            :     }
     606                 :     339347 :   else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
     607                 :     339347 :            && !is_pattern_stmt_p (stmt_info))
     608                 :            :     {
     609                 :     324197 :       STMT_VINFO_VECTYPE (stmt_info) = vectype;
     610                 :     324197 :       return true;
     611                 :            :     }
     612                 :            : 
     613                 :      15465 :   if (dump_enabled_p ())
     614                 :            :     {
     615                 :         53 :       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     616                 :            :                        "Build SLP failed: incompatible vector"
     617                 :            :                        " types for: %G", stmt_info->stmt);
     618                 :         53 :       dump_printf_loc (MSG_NOTE, vect_location,
     619                 :            :                        "    old vector type: %T\n", old_vectype);
     620                 :         53 :       dump_printf_loc (MSG_NOTE, vect_location,
     621                 :            :                        "    new vector type: %T\n", vectype);
     622                 :            :     }
     623                 :            :   return false;
     624                 :            : }
     625                 :            : 
     626                 :            : /* Try to infer and assign a vector type to all the statements in STMTS.
     627                 :            :    Used only for BB vectorization.  */
     628                 :            : 
     629                 :            : static bool
     630                 :      11117 : vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
     631                 :            : {
     632                 :      11117 :   tree vectype, nunits_vectype;
     633                 :      22234 :   if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
     634                 :      22234 :                                        &nunits_vectype, stmts.length ()))
     635                 :            :     return false;
     636                 :            : 
     637                 :            :   stmt_vec_info stmt_info;
     638                 :            :   unsigned int i;
     639                 :      38446 :   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
     640                 :      27591 :     if (!vect_update_shared_vectype (stmt_info, vectype))
     641                 :            :       return false;
     642                 :            : 
     643                 :            :   return true;
     644                 :            : }
     645                 :            : 
     646                 :            : /* Return true if call statements CALL1 and CALL2 are similar enough
     647                 :            :    to be combined into the same SLP group.  */
     648                 :            : 
     649                 :            : static bool
     650                 :        382 : compatible_calls_p (gcall *call1, gcall *call2)
     651                 :            : {
     652                 :        382 :   unsigned int nargs = gimple_call_num_args (call1);
     653                 :        382 :   if (nargs != gimple_call_num_args (call2))
     654                 :            :     return false;
     655                 :            : 
     656                 :        382 :   if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
     657                 :            :     return false;
     658                 :            : 
     659                 :        382 :   if (gimple_call_internal_p (call1))
     660                 :            :     {
     661                 :          1 :       if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
     662                 :          1 :                                TREE_TYPE (gimple_call_lhs (call2))))
     663                 :            :         return false;
     664                 :          2 :       for (unsigned int i = 0; i < nargs; ++i)
     665                 :          2 :         if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
     666                 :          1 :                                  TREE_TYPE (gimple_call_arg (call2, i))))
     667                 :            :           return false;
     668                 :            :     }
     669                 :            :   else
     670                 :            :     {
     671                 :        381 :       if (!operand_equal_p (gimple_call_fn (call1),
     672                 :        381 :                             gimple_call_fn (call2), 0))
     673                 :            :         return false;
     674                 :            : 
     675                 :       1143 :       if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
     676                 :          0 :         return false;
     677                 :            :     }
     678                 :            :   return true;
     679                 :            : }
     680                 :            : 
     681                 :            : /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
     682                 :            :    caller's attempt to find the vector type in STMT_INFO with the narrowest
     683                 :            :    element type.  Return true if VECTYPE is nonnull and if it is valid
     684                 :            :    for STMT_INFO.  When returning true, update MAX_NUNITS to reflect the
     685                 :            :    number of units in VECTYPE.  GROUP_SIZE and MAX_NUNITS are as for
     686                 :            :    vect_build_slp_tree.  */
     687                 :            : 
     688                 :            : static bool
     689                 :     589820 : vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
     690                 :            :                         tree vectype, poly_uint64 *max_nunits)
     691                 :            : {
     692                 :     589820 :   if (!vectype)
     693                 :            :     {
     694                 :          0 :       if (dump_enabled_p ())
     695                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     696                 :            :                          "Build SLP failed: unsupported data-type in %G\n",
     697                 :            :                          stmt_info->stmt);
     698                 :            :       /* Fatal mismatch.  */
     699                 :          0 :       return false;
     700                 :            :     }
     701                 :            : 
     702                 :            :   /* If populating the vector type requires unrolling then fail
     703                 :            :      before adjusting *max_nunits for basic-block vectorization.  */
     704                 :     589820 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
     705                 :     589820 :   unsigned HOST_WIDE_INT const_nunits;
     706                 :     589820 :   if (STMT_VINFO_BB_VINFO (stmt_info)
     707                 :     486847 :       && (!nunits.is_constant (&const_nunits)
     708                 :     486847 :           || const_nunits > group_size))
     709                 :            :     {
     710                 :          0 :       if (dump_enabled_p ())
     711                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     712                 :            :                          "Build SLP failed: unrolling required "
     713                 :            :                          "in basic block SLP\n");
     714                 :            :       /* Fatal mismatch.  */
     715                 :          0 :       return false;
     716                 :            :     }
     717                 :            : 
     718                 :            :   /* In case of multiple types we need to detect the smallest type.  */
     719                 :     589820 :   vect_update_max_nunits (max_nunits, vectype);
     720                 :     589820 :   return true;
     721                 :            : }
     722                 :            : 
     723                 :            : /* STMTS is a group of GROUP_SIZE SLP statements in which some
     724                 :            :    statements do the same operation as the first statement and in which
     725                 :            :    the others do ALT_STMT_CODE.  Return true if we can take one vector
     726                 :            :    of the first operation and one vector of the second and permute them
     727                 :            :    to get the required result.  VECTYPE is the type of the vector that
     728                 :            :    would be permuted.  */
     729                 :            : 
     730                 :            : static bool
     731                 :        994 : vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
     732                 :            :                                unsigned int group_size, tree vectype,
     733                 :            :                                tree_code alt_stmt_code)
     734                 :            : {
     735                 :        994 :   unsigned HOST_WIDE_INT count;
     736                 :        994 :   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
     737                 :            :     return false;
     738                 :            : 
     739                 :        994 :   vec_perm_builder sel (count, count, 1);
     740                 :       5626 :   for (unsigned int i = 0; i < count; ++i)
     741                 :            :     {
     742                 :       4632 :       unsigned int elt = i;
     743                 :       4632 :       gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
     744                 :       4632 :       if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
     745                 :       2253 :         elt += count;
     746                 :       4632 :       sel.quick_push (elt);
     747                 :            :     }
     748                 :       1988 :   vec_perm_indices indices (sel, 2, count);
     749                 :        994 :   return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
     750                 :            : }
     751                 :            : 
     752                 :            : /* Verify if the scalar stmts STMTS are isomorphic, require data
     753                 :            :    permutation or are of unsupported types of operation.  Return
     754                 :            :    true if they are, otherwise return false and indicate in *MATCHES
     755                 :            :    which stmts are not isomorphic to the first one.  If MATCHES[0]
     756                 :            :    is false then this indicates the comparison could not be
     757                 :            :    carried out or the stmts will never be vectorized by SLP.
     758                 :            : 
     759                 :            :    Note COND_EXPR is possibly isomorphic to another one after swapping its
     760                 :            :    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
     761                 :            :    the first stmt by swapping the two operands of comparison; set SWAP[i]
     762                 :            :    to 2 if stmt I is isormorphic to the first stmt by inverting the code
     763                 :            :    of comparison.  Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
     764                 :            :    to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1.  */
     765                 :            : 
     766                 :            : static bool
     767                 :     169322 : vect_build_slp_tree_1 (unsigned char *swap,
     768                 :            :                        vec<stmt_vec_info> stmts, unsigned int group_size,
     769                 :            :                        poly_uint64 *max_nunits, bool *matches,
     770                 :            :                        bool *two_operators)
     771                 :            : {
     772                 :     169322 :   unsigned int i;
     773                 :     169322 :   stmt_vec_info first_stmt_info = stmts[0];
     774                 :     169322 :   enum tree_code first_stmt_code = ERROR_MARK;
     775                 :     169322 :   enum tree_code alt_stmt_code = ERROR_MARK;
     776                 :     169322 :   enum tree_code rhs_code = ERROR_MARK;
     777                 :     169322 :   enum tree_code first_cond_code = ERROR_MARK;
     778                 :     169322 :   tree lhs;
     779                 :     169322 :   bool need_same_oprnds = false;
     780                 :     169322 :   tree vectype = NULL_TREE, first_op1 = NULL_TREE;
     781                 :     169322 :   optab optab;
     782                 :     169322 :   int icode;
     783                 :     169322 :   machine_mode optab_op2_mode;
     784                 :     169322 :   machine_mode vec_mode;
     785                 :     169322 :   stmt_vec_info first_load = NULL, prev_first_load = NULL;
     786                 :     169322 :   bool load_p = false;
     787                 :            : 
     788                 :            :   /* For every stmt in NODE find its def stmt/s.  */
     789                 :     169322 :   stmt_vec_info stmt_info;
     790                 :     749316 :   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
     791                 :            :     {
     792                 :     592727 :       vec_info *vinfo = stmt_info->vinfo;
     793                 :     592727 :       gimple *stmt = stmt_info->stmt;
     794                 :     592727 :       swap[i] = 0;
     795                 :     592727 :       matches[i] = false;
     796                 :            : 
     797                 :     592727 :       if (dump_enabled_p ())
     798                 :      37198 :         dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
     799                 :            : 
     800                 :            :       /* Fail to vectorize statements marked as unvectorizable.  */
     801                 :     592727 :       if (!STMT_VINFO_VECTORIZABLE (stmt_info))
     802                 :            :         {
     803                 :       3908 :           if (dump_enabled_p ())
     804                 :         18 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     805                 :            :                              "Build SLP failed: unvectorizable statement %G",
     806                 :            :                              stmt);
     807                 :            :           /* Fatal mismatch.  */
     808                 :       3908 :           matches[0] = false;
     809                 :      12733 :           return false;
     810                 :            :         }
     811                 :            : 
     812                 :     588819 :       lhs = gimple_get_lhs (stmt);
     813                 :     588819 :       if (lhs == NULL_TREE)
     814                 :            :         {
     815                 :          4 :           if (dump_enabled_p ())
     816                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     817                 :            :                              "Build SLP failed: not GIMPLE_ASSIGN nor "
     818                 :            :                              "GIMPLE_CALL %G", stmt);
     819                 :            :           /* Fatal mismatch.  */
     820                 :          4 :           matches[0] = false;
     821                 :          4 :           return false;
     822                 :            :         }
     823                 :            : 
     824                 :     588815 :       tree nunits_vectype;
     825                 :    1177630 :       if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
     826                 :     588815 :                                            &nunits_vectype, group_size)
     827                 :     588815 :           || (nunits_vectype
     828                 :     588661 :               && !vect_record_max_nunits (stmt_info, group_size,
     829                 :     588661 :                                           nunits_vectype, max_nunits)))
     830                 :            :         {
     831                 :            :           /* Fatal mismatch.  */
     832                 :        154 :           matches[0] = false;
     833                 :        154 :           return false;
     834                 :            :         }
     835                 :            : 
     836                 :     588661 :       gcc_assert (vectype);
     837                 :            : 
     838                 :     588661 :       if (is_a <bb_vec_info> (vinfo)
     839                 :     588661 :           && !vect_update_shared_vectype (stmt_info, vectype))
     840                 :      57594 :         continue;
     841                 :            : 
     842                 :     573319 :       gcall *call_stmt = dyn_cast <gcall *> (stmt);
     843                 :        484 :       if (call_stmt)
     844                 :            :         {
     845                 :        484 :           rhs_code = CALL_EXPR;
     846                 :            : 
     847                 :        484 :           if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
     848                 :            :             load_p = true;
     849                 :        480 :           else if ((gimple_call_internal_p (call_stmt)
     850                 :          2 :                     && (!vectorizable_internal_fn_p
     851                 :          2 :                         (gimple_call_internal_fn (call_stmt))))
     852                 :        480 :                    || gimple_call_tail_p (call_stmt)
     853                 :        480 :                    || gimple_call_noreturn_p (call_stmt)
     854                 :        480 :                    || !gimple_call_nothrow_p (call_stmt)
     855                 :        960 :                    || gimple_call_chain (call_stmt))
     856                 :            :             {
     857                 :          0 :               if (dump_enabled_p ())
     858                 :          0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     859                 :            :                                  "Build SLP failed: unsupported call type %G",
     860                 :            :                                  call_stmt);
     861                 :            :               /* Fatal mismatch.  */
     862                 :          0 :               matches[0] = false;
     863                 :          0 :               return false;
     864                 :            :             }
     865                 :            :         }
     866                 :            :       else
     867                 :            :         {
     868                 :     572835 :           rhs_code = gimple_assign_rhs_code (stmt);
     869                 :     572835 :           load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
     870                 :            :         }
     871                 :            : 
     872                 :            :       /* Check the operation.  */
     873                 :     573319 :       if (i == 0)
     874                 :            :         {
     875                 :     156324 :           first_stmt_code = rhs_code;
     876                 :            : 
     877                 :            :           /* Shift arguments should be equal in all the packed stmts for a
     878                 :            :              vector shift with scalar shift operand.  */
     879                 :     156324 :           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
     880                 :            :               || rhs_code == LROTATE_EXPR
     881                 :     156324 :               || rhs_code == RROTATE_EXPR)
     882                 :            :             {
     883                 :       1469 :               vec_mode = TYPE_MODE (vectype);
     884                 :            : 
     885                 :            :               /* First see if we have a vector/vector shift.  */
     886                 :       1469 :               optab = optab_for_tree_code (rhs_code, vectype,
     887                 :            :                                            optab_vector);
     888                 :            : 
     889                 :       1469 :               if (!optab
     890                 :       1469 :                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
     891                 :            :                 {
     892                 :            :                   /* No vector/vector shift, try for a vector/scalar shift.  */
     893                 :       1392 :                   optab = optab_for_tree_code (rhs_code, vectype,
     894                 :            :                                                optab_scalar);
     895                 :            : 
     896                 :       1392 :                   if (!optab)
     897                 :            :                     {
     898                 :          0 :                       if (dump_enabled_p ())
     899                 :          0 :                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     900                 :            :                                          "Build SLP failed: no optab.\n");
     901                 :            :                       /* Fatal mismatch.  */
     902                 :          0 :                       matches[0] = false;
     903                 :          0 :                       return false;
     904                 :            :                     }
     905                 :       1392 :                   icode = (int) optab_handler (optab, vec_mode);
     906                 :       1392 :                   if (icode == CODE_FOR_nothing)
     907                 :            :                     {
     908                 :          4 :                       if (dump_enabled_p ())
     909                 :          0 :                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     910                 :            :                                          "Build SLP failed: "
     911                 :            :                                          "op not supported by target.\n");
     912                 :            :                       /* Fatal mismatch.  */
     913                 :          4 :                       matches[0] = false;
     914                 :          4 :                       return false;
     915                 :            :                     }
     916                 :       1388 :                   optab_op2_mode = insn_data[icode].operand[2].mode;
     917                 :       1388 :                   if (!VECTOR_MODE_P (optab_op2_mode))
     918                 :            :                     {
     919                 :       1388 :                       need_same_oprnds = true;
     920                 :       1388 :                       first_op1 = gimple_assign_rhs2 (stmt);
     921                 :            :                     }
     922                 :            :                 }
     923                 :            :             }
     924                 :     154855 :           else if (rhs_code == WIDEN_LSHIFT_EXPR)
     925                 :            :             {
     926                 :          0 :               need_same_oprnds = true;
     927                 :          0 :               first_op1 = gimple_assign_rhs2 (stmt);
     928                 :            :             }
     929                 :     154855 :           else if (call_stmt
     930                 :     154855 :                    && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
     931                 :            :             {
     932                 :          0 :               need_same_oprnds = true;
     933                 :          0 :               first_op1 = gimple_call_arg (call_stmt, 1);
     934                 :            :             }
     935                 :            :         }
     936                 :            :       else
     937                 :            :         {
     938                 :     416995 :           if (first_stmt_code != rhs_code
     939                 :     416995 :               && alt_stmt_code == ERROR_MARK)
     940                 :      19057 :             alt_stmt_code = rhs_code;
     941                 :     448869 :           if (first_stmt_code != rhs_code
     942                 :      39091 :               && (first_stmt_code != IMAGPART_EXPR
     943                 :      39091 :                   || rhs_code != REALPART_EXPR)
     944                 :      37997 :               && (first_stmt_code != REALPART_EXPR
     945                 :      37997 :                   || rhs_code != IMAGPART_EXPR)
     946                 :            :               /* Handle mismatches in plus/minus by computing both
     947                 :            :                  and merging the results.  */
     948                 :       2922 :               && !((first_stmt_code == PLUS_EXPR
     949                 :      34833 :                     || first_stmt_code == MINUS_EXPR)
     950                 :       5350 :                    && (alt_stmt_code == PLUS_EXPR
     951                 :       5350 :                        || alt_stmt_code == MINUS_EXPR)
     952                 :            :                    && rhs_code == alt_stmt_code)
     953                 :     448911 :               && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
     954                 :            :                    && (first_stmt_code == ARRAY_REF
     955                 :      27829 :                        || first_stmt_code == BIT_FIELD_REF
     956                 :      27829 :                        || first_stmt_code == INDIRECT_REF
     957                 :      27829 :                        || first_stmt_code == COMPONENT_REF
     958                 :      27829 :                        || first_stmt_code == MEM_REF)))
     959                 :            :             {
     960                 :      31874 :               if (dump_enabled_p ())
     961                 :            :                 {
     962                 :        726 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
     963                 :            :                                    "Build SLP failed: different operation "
     964                 :            :                                    "in stmt %G", stmt);
     965                 :        726 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     966                 :            :                                    "original stmt %G", first_stmt_info->stmt);
     967                 :            :                 }
     968                 :            :               /* Mismatch.  */
     969                 :      31874 :               continue;
     970                 :            :             }
     971                 :            : 
     972                 :     385121 :           if (need_same_oprnds)
     973                 :            :             {
     974                 :       7043 :               tree other_op1 = (call_stmt
     975                 :       7043 :                                 ? gimple_call_arg (call_stmt, 1)
     976                 :       7043 :                                 : gimple_assign_rhs2 (stmt));
     977                 :       7043 :               if (!operand_equal_p (first_op1, other_op1, 0))
     978                 :            :                 {
     979                 :        530 :                   if (dump_enabled_p ())
     980                 :         33 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     981                 :            :                                      "Build SLP failed: different shift "
     982                 :            :                                      "arguments in %G", stmt);
     983                 :            :                   /* Mismatch.  */
     984                 :        530 :                   continue;
     985                 :            :                 }
     986                 :            :             }
     987                 :            : 
     988                 :     384591 :           if (!load_p && rhs_code == CALL_EXPR)
     989                 :            :             {
     990                 :        382 :               if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
     991                 :            :                                        as_a <gcall *> (stmt)))
     992                 :            :                 {
     993                 :          0 :                   if (dump_enabled_p ())
     994                 :          0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     995                 :            :                                      "Build SLP failed: different calls in %G",
     996                 :            :                                      stmt);
     997                 :            :                   /* Mismatch.  */
     998                 :          0 :                   continue;
     999                 :            :                 }
    1000                 :            :             }
    1001                 :            :         }
    1002                 :            : 
    1003                 :            :       /* Grouped store or load.  */
    1004                 :     540911 :       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1005                 :            :         {
    1006                 :     396102 :           if (REFERENCE_CLASS_P (lhs))
    1007                 :            :             {
    1008                 :            :               /* Store.  */
    1009                 :            :               ;
    1010                 :            :             }
    1011                 :            :           else
    1012                 :            :             {
    1013                 :            :               /* Load.  */
    1014                 :      59269 :               first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
    1015                 :      59269 :               if (prev_first_load)
    1016                 :            :                 {
    1017                 :            :                   /* Check that there are no loads from different interleaving
    1018                 :            :                      chains in the same node.  */
    1019                 :      42201 :                   if (prev_first_load != first_load)
    1020                 :            :                     {
    1021                 :       9844 :                       if (dump_enabled_p ())
    1022                 :         51 :                         dump_printf_loc (MSG_MISSED_OPTIMIZATION,
    1023                 :            :                                          vect_location,
    1024                 :            :                                          "Build SLP failed: different "
    1025                 :            :                                          "interleaving chains in one node %G",
    1026                 :            :                                          stmt);
    1027                 :            :                       /* Mismatch.  */
    1028                 :       9844 :                       continue;
    1029                 :            :                     }
    1030                 :            :                 }
    1031                 :            :               else
    1032                 :            :                 prev_first_load = first_load;
    1033                 :            :            }
    1034                 :            :         } /* Grouped access.  */
    1035                 :            :       else
    1036                 :            :         {
    1037                 :     144809 :           if (load_p)
    1038                 :            :             {
    1039                 :            :               /* Not grouped load.  */
    1040                 :       7969 :               if (dump_enabled_p ())
    1041                 :         62 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1042                 :            :                                  "Build SLP failed: not grouped load %G", stmt);
    1043                 :            : 
    1044                 :            :               /* FORNOW: Not grouped loads are not supported.  */
    1045                 :            :               /* Fatal mismatch.  */
    1046                 :       7969 :               matches[0] = false;
    1047                 :       7969 :               return false;
    1048                 :            :             }
    1049                 :            : 
    1050                 :            :           /* Not memory operation.  */
    1051                 :     136840 :           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
    1052                 :     136840 :               && TREE_CODE_CLASS (rhs_code) != tcc_unary
    1053                 :       5712 :               && TREE_CODE_CLASS (rhs_code) != tcc_expression
    1054                 :       4210 :               && TREE_CODE_CLASS (rhs_code) != tcc_comparison
    1055                 :       1174 :               && rhs_code != VIEW_CONVERT_EXPR
    1056                 :       1174 :               && rhs_code != CALL_EXPR)
    1057                 :            :             {
    1058                 :        694 :               if (dump_enabled_p ())
    1059                 :          0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1060                 :            :                                  "Build SLP failed: operation unsupported %G",
    1061                 :            :                                  stmt);
    1062                 :            :               /* Fatal mismatch.  */
    1063                 :        694 :               matches[0] = false;
    1064                 :        694 :               return false;
    1065                 :            :             }
    1066                 :            : 
    1067                 :     136146 :           if (rhs_code == COND_EXPR)
    1068                 :            :             {
    1069                 :       1209 :               tree cond_expr = gimple_assign_rhs1 (stmt);
    1070                 :       1209 :               enum tree_code cond_code = TREE_CODE (cond_expr);
    1071                 :       1209 :               enum tree_code swap_code = ERROR_MARK;
    1072                 :       1209 :               enum tree_code invert_code = ERROR_MARK;
    1073                 :            : 
    1074                 :       1209 :               if (i == 0)
    1075                 :            :                 first_cond_code = TREE_CODE (cond_expr);
    1076                 :        887 :               else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
    1077                 :            :                 {
    1078                 :        393 :                   bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
    1079                 :        393 :                   swap_code = swap_tree_comparison (cond_code);
    1080                 :        393 :                   invert_code = invert_tree_comparison (cond_code, honor_nans);
    1081                 :            :                 }
    1082                 :            : 
    1083                 :       1209 :               if (first_cond_code == cond_code)
    1084                 :            :                 ;
    1085                 :            :               /* Isomorphic can be achieved by swapping.  */
    1086                 :         80 :               else if (first_cond_code == swap_code)
    1087                 :          9 :                 swap[i] = 1;
    1088                 :            :               /* Isomorphic can be achieved by inverting.  */
    1089                 :         71 :               else if (first_cond_code == invert_code)
    1090                 :         67 :                 swap[i] = 2;
    1091                 :            :               else
    1092                 :            :                 {
    1093                 :          4 :                   if (dump_enabled_p ())
    1094                 :          0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1095                 :            :                                      "Build SLP failed: different"
    1096                 :            :                                      " operation %G", stmt);
    1097                 :            :                   /* Mismatch.  */
    1098                 :          4 :                   continue;
    1099                 :            :                 }
    1100                 :            :             }
    1101                 :            :         }
    1102                 :            : 
    1103                 :     522400 :       matches[i] = true;
    1104                 :            :     }
    1105                 :            : 
    1106                 :     665979 :   for (i = 0; i < group_size; ++i)
    1107                 :     537573 :     if (!matches[i])
    1108                 :            :       return false;
    1109                 :            : 
    1110                 :            :   /* If we allowed a two-operation SLP node verify the target can cope
    1111                 :            :      with the permute we are going to use.  */
    1112                 :     128406 :   if (alt_stmt_code != ERROR_MARK
    1113                 :       2579 :       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
    1114                 :            :     {
    1115                 :        994 :       if (!vect_two_operations_perm_ok_p (stmts, group_size,
    1116                 :            :                                           vectype, alt_stmt_code))
    1117                 :            :         {
    1118                 :        885 :           for (i = 0; i < group_size; ++i)
    1119                 :        710 :             if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
    1120                 :            :               {
    1121                 :        373 :                 matches[i] = false;
    1122                 :        373 :                 if (dump_enabled_p ())
    1123                 :            :                   {
    1124                 :        204 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1125                 :            :                                      "Build SLP failed: different operation "
    1126                 :        102 :                                      "in stmt %G", stmts[i]->stmt);
    1127                 :        102 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1128                 :            :                                      "original stmt %G", first_stmt_info->stmt);
    1129                 :            :                   }
    1130                 :            :               }
    1131                 :            :           return false;
    1132                 :            :         }
    1133                 :        819 :       *two_operators = true;
    1134                 :            :     }
    1135                 :            : 
    1136                 :            :   return true;
    1137                 :            : }
    1138                 :            : 
    1139                 :            : /* Traits for the hash_set to record failed SLP builds for a stmt set.
    1140                 :            :    Note we never remove apart from at destruction time so we do not
    1141                 :            :    need a special value for deleted that differs from empty.  */
    1142                 :            : struct bst_traits
    1143                 :            : {
    1144                 :            :   typedef vec <stmt_vec_info> value_type;
    1145                 :            :   typedef vec <stmt_vec_info> compare_type;
    1146                 :            :   static inline hashval_t hash (value_type);
    1147                 :            :   static inline bool equal (value_type existing, value_type candidate);
    1148                 :    5139650 :   static inline bool is_empty (value_type x) { return !x.exists (); }
    1149                 :     915648 :   static inline bool is_deleted (value_type x) { return !x.exists (); }
    1150                 :            :   static const bool empty_zero_p = true;
    1151                 :            :   static inline void mark_empty (value_type &x) { x.release (); }
    1152                 :            :   static inline void mark_deleted (value_type &x) { x.release (); }
    1153                 :    1237100 :   static inline void remove (value_type &x) { x.release (); }
    1154                 :            : };
    1155                 :            : inline hashval_t
    1156                 :     941433 : bst_traits::hash (value_type x)
    1157                 :            : {
    1158                 :     941433 :   inchash::hash h;
    1159                 :    8945190 :   for (unsigned i = 0; i < x.length (); ++i)
    1160                 :    3531160 :     h.add_int (gimple_uid (x[i]->stmt));
    1161                 :     941433 :   return h.end ();
    1162                 :            : }
    1163                 :            : inline bool
    1164                 :     683157 : bst_traits::equal (value_type existing, value_type candidate)
    1165                 :            : {
    1166                 :    2049470 :   if (existing.length () != candidate.length ())
    1167                 :            :     return false;
    1168                 :     418525 :   for (unsigned i = 0; i < existing.length (); ++i)
    1169                 :     416348 :     if (existing[i] != candidate[i])
    1170                 :            :       return false;
    1171                 :            :   return true;
    1172                 :            : }
    1173                 :            : 
    1174                 :            : typedef hash_map <vec <gimple *>, slp_tree,
    1175                 :            :                   simple_hashmap_traits <bst_traits, slp_tree> >
    1176                 :            :   scalar_stmts_to_slp_tree_map_t;
    1177                 :            : 
    1178                 :            : static slp_tree
    1179                 :            : vect_build_slp_tree_2 (vec_info *vinfo,
    1180                 :            :                        vec<stmt_vec_info> stmts, unsigned int group_size,
    1181                 :            :                        poly_uint64 *max_nunits,
    1182                 :            :                        bool *matches, unsigned *npermutes, unsigned *tree_size,
    1183                 :            :                        scalar_stmts_to_slp_tree_map_t *bst_map);
    1184                 :            : 
    1185                 :            : static slp_tree
    1186                 :     172658 : vect_build_slp_tree (vec_info *vinfo,
    1187                 :            :                      vec<stmt_vec_info> stmts, unsigned int group_size,
    1188                 :            :                      poly_uint64 *max_nunits,
    1189                 :            :                      bool *matches, unsigned *npermutes, unsigned *tree_size,
    1190                 :            :                      scalar_stmts_to_slp_tree_map_t *bst_map)
    1191                 :            : {
    1192                 :     172658 :   if (slp_tree *leader = bst_map->get (stmts))
    1193                 :            :     {
    1194                 :       2177 :       if (dump_enabled_p ())
    1195                 :         87 :         dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
    1196                 :            :                          *leader ? "" : "failed ", *leader);
    1197                 :       2177 :       if (*leader)
    1198                 :            :         {
    1199                 :       1573 :           (*leader)->refcnt++;
    1200                 :       1573 :           vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
    1201                 :            :         }
    1202                 :       2177 :       return *leader;
    1203                 :            :     }
    1204                 :     170481 :   poly_uint64 this_max_nunits = 1;
    1205                 :     170481 :   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
    1206                 :            :                                         &this_max_nunits,
    1207                 :     170481 :                                         matches, npermutes, tree_size, bst_map);
    1208                 :     170481 :   if (res)
    1209                 :            :     {
    1210                 :     114788 :       res->max_nunits = this_max_nunits;
    1211                 :     114788 :       vect_update_max_nunits (max_nunits, this_max_nunits);
    1212                 :            :       /* Keep a reference for the bst_map use.  */
    1213                 :     114788 :       res->refcnt++;
    1214                 :            :     }
    1215                 :     340962 :   bst_map->put (stmts.copy (), res);
    1216                 :     170481 :   return res;
    1217                 :            : }
    1218                 :            : 
    1219                 :            : /* Recursively build an SLP tree starting from NODE.
    1220                 :            :    Fail (and return a value not equal to zero) if def-stmts are not
    1221                 :            :    isomorphic, require data permutation or are of unsupported types of
    1222                 :            :    operation.  Otherwise, return 0.
    1223                 :            :    The value returned is the depth in the SLP tree where a mismatch
    1224                 :            :    was found.  */
    1225                 :            : 
    1226                 :            : static slp_tree
    1227                 :     170481 : vect_build_slp_tree_2 (vec_info *vinfo,
    1228                 :            :                        vec<stmt_vec_info> stmts, unsigned int group_size,
    1229                 :            :                        poly_uint64 *max_nunits,
    1230                 :            :                        bool *matches, unsigned *npermutes, unsigned *tree_size,
    1231                 :            :                        scalar_stmts_to_slp_tree_map_t *bst_map)
    1232                 :            : {
    1233                 :     170481 :   unsigned nops, i, this_tree_size = 0;
    1234                 :     170481 :   poly_uint64 this_max_nunits = *max_nunits;
    1235                 :     170481 :   slp_tree node;
    1236                 :            : 
    1237                 :     170481 :   matches[0] = false;
    1238                 :            : 
    1239                 :     170481 :   stmt_vec_info stmt_info = stmts[0];
    1240                 :     170481 :   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
    1241                 :        105 :     nops = gimple_call_num_args (stmt);
    1242                 :     170376 :   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    1243                 :            :     {
    1244                 :     169217 :       nops = gimple_num_ops (stmt) - 1;
    1245                 :     306652 :       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
    1246                 :        441 :         nops++;
    1247                 :            :     }
    1248                 :       1159 :   else if (is_a <gphi *> (stmt_info->stmt))
    1249                 :            :     nops = 0;
    1250                 :            :   else
    1251                 :            :     return NULL;
    1252                 :            : 
    1253                 :            :   /* If the SLP node is a PHI (induction or reduction), terminate
    1254                 :            :      the recursion.  */
    1255                 :     170481 :   if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
    1256                 :            :     {
    1257                 :       1159 :       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
    1258                 :       1159 :       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    1259                 :       1159 :       if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
    1260                 :            :         return NULL;
    1261                 :            : 
    1262                 :       1159 :       vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
    1263                 :            :       /* Induction from different IVs is not supported.  */
    1264                 :       1159 :       if (def_type == vect_induction_def)
    1265                 :            :         {
    1266                 :            :           stmt_vec_info other_info;
    1267                 :       2173 :           FOR_EACH_VEC_ELT (stmts, i, other_info)
    1268                 :       1925 :             if (stmt_info != other_info)
    1269                 :     170481 :               return NULL;
    1270                 :            :         }
    1271                 :        911 :       else if (def_type == vect_reduction_def
    1272                 :            :                || def_type == vect_double_reduction_def
    1273                 :        911 :                || def_type == vect_nested_cycle)
    1274                 :            :         {
    1275                 :            :           /* Else def types have to match.  */
    1276                 :            :           stmt_vec_info other_info;
    1277                 :       6409 :           FOR_EACH_VEC_ELT (stmts, i, other_info)
    1278                 :       5502 :             if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
    1279                 :     170481 :               return NULL;
    1280                 :            :         }
    1281                 :            :       else
    1282                 :            :         return NULL;
    1283                 :       1155 :       (*tree_size)++;
    1284                 :       1155 :       node = vect_create_new_slp_node (stmts);
    1285                 :       1155 :       return node;
    1286                 :            :     }
    1287                 :            : 
    1288                 :            : 
    1289                 :     169322 :   bool two_operators = false;
    1290                 :     169322 :   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
    1291                 :     169322 :   if (!vect_build_slp_tree_1 (swap, stmts, group_size,
    1292                 :            :                               &this_max_nunits, matches, &two_operators))
    1293                 :            :     return NULL;
    1294                 :            : 
    1295                 :            :   /* If the SLP node is a load, terminate the recursion unless masked.  */
    1296                 :     100051 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
    1297                 :     228282 :       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
    1298                 :            :     {
    1299                 :      12165 :       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
    1300                 :            :         {
    1301                 :            :           /* Masked load.  */
    1302                 :          2 :           gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
    1303                 :            :           nops = 1;
    1304                 :            :         }
    1305                 :            :       else
    1306                 :            :         {
    1307                 :      12163 :           *max_nunits = this_max_nunits;
    1308                 :      12163 :           (*tree_size)++;
    1309                 :      12163 :           node = vect_create_new_slp_node (stmts);
    1310                 :            :           /* And compute the load permutation.  Whether it is actually
    1311                 :            :              a permutation depends on the unrolling factor which is
    1312                 :            :              decided later.  */
    1313                 :      12163 :           vec<unsigned> load_permutation;
    1314                 :      12163 :           int j;
    1315                 :      12163 :           stmt_vec_info load_info;
    1316                 :      12163 :           load_permutation.create (group_size);
    1317                 :      12163 :           stmt_vec_info first_stmt_info
    1318                 :      12163 :             = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
    1319                 :      54326 :           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
    1320                 :            :             {
    1321                 :      42163 :               int load_place = vect_get_place_in_interleaving_chain
    1322                 :      42163 :                   (load_info, first_stmt_info);
    1323                 :      42163 :               gcc_assert (load_place != -1);
    1324                 :      42163 :               load_permutation.safe_push (load_place);
    1325                 :            :             }
    1326                 :      12163 :           SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
    1327                 :      12163 :           return node;
    1328                 :            :         }
    1329                 :            :     }
    1330                 :            : 
    1331                 :            :   /* Get at the operands, verifying they are compatible.  */
    1332                 :     116068 :   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
    1333                 :     116068 :   slp_oprnd_info oprnd_info;
    1334                 :     552773 :   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
    1335                 :            :     {
    1336                 :     436838 :       int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
    1337                 :            :                                              stmts, i, &oprnds_info);
    1338                 :     436838 :       if (res != 0)
    1339                 :       3529 :         matches[(res == -1) ? 0 : i] = false;
    1340                 :     436838 :       if (!matches[0])
    1341                 :            :         break;
    1342                 :            :     }
    1343                 :     550867 :   for (i = 0; i < group_size; ++i)
    1344                 :     435753 :     if (!matches[i])
    1345                 :            :       {
    1346                 :        954 :         vect_free_oprnd_info (oprnds_info);
    1347                 :        954 :         return NULL;
    1348                 :            :       }
    1349                 :            : 
    1350                 :     115114 :   auto_vec<slp_tree, 4> children;
    1351                 :            : 
    1352                 :     115114 :   stmt_info = stmts[0];
    1353                 :            : 
    1354                 :            :   /* Create SLP_TREE nodes for the definition node/s.  */
    1355                 :     234554 :   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
    1356                 :            :     {
    1357                 :     133084 :       slp_tree child;
    1358                 :     133084 :       unsigned old_tree_size = this_tree_size;
    1359                 :     133084 :       unsigned int j;
    1360                 :            : 
    1361                 :     133084 :       if (oprnd_info->first_dt == vect_uninitialized_def)
    1362                 :            :         {
    1363                 :            :           /* COND_EXPR have one too many eventually if the condition
    1364                 :            :              is a SSA name.  */
    1365                 :        119 :           gcc_assert (i == 3 && nops == 4);
    1366                 :     119440 :           continue;
    1367                 :            :         }
    1368                 :            : 
    1369                 :     132965 :       if (oprnd_info->first_dt != vect_internal_def
    1370                 :      76303 :           && oprnd_info->first_dt != vect_reduction_def
    1371                 :      75376 :           && oprnd_info->first_dt != vect_induction_def)
    1372                 :            :         {
    1373                 :      75123 :           slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
    1374                 :      75123 :           SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
    1375                 :      75123 :           oprnd_info->ops = vNULL;
    1376                 :      75123 :           children.safe_push (invnode);
    1377                 :      75123 :           continue;
    1378                 :            :         }
    1379                 :            : 
    1380                 :      57842 :       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
    1381                 :            :                                         group_size, &this_max_nunits,
    1382                 :            :                                         matches, npermutes,
    1383                 :            :                                         &this_tree_size, bst_map)) != NULL)
    1384                 :            :         {
    1385                 :            :           /* If we have all children of a non-unary child built up from
    1386                 :            :              scalars then just throw that away and build it up this node
    1387                 :            :              from scalars.  */
    1388                 :      32922 :           if (is_a <bb_vec_info> (vinfo)
    1389                 :      22761 :               && SLP_TREE_CHILDREN (child).length () > 1
    1390                 :            :               /* ???  Rejecting patterns this way doesn't work.  We'd have to
    1391                 :            :                  do extra work to cancel the pattern so the uses see the
    1392                 :            :                  scalar version.  */
    1393                 :      45667 :               && !oprnd_info->any_pattern)
    1394                 :            :             {
    1395                 :            :               slp_tree grandchild;
    1396                 :            : 
    1397                 :      14384 :               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
    1398                 :      14108 :                 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
    1399                 :            :                   break;
    1400                 :      12568 :               if (!grandchild
    1401                 :      12292 :                   && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
    1402                 :            :                 {
    1403                 :            :                   /* Roll back.  */
    1404                 :        276 :                   this_tree_size = old_tree_size;
    1405                 :        828 :                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
    1406                 :        552 :                     vect_free_slp_tree (grandchild, false);
    1407                 :        276 :                   SLP_TREE_CHILDREN (child).truncate (0);
    1408                 :            : 
    1409                 :        276 :                   if (dump_enabled_p ())
    1410                 :          7 :                     dump_printf_loc (MSG_NOTE, vect_location,
    1411                 :            :                                      "Building parent vector operands from "
    1412                 :            :                                      "scalars instead\n");
    1413                 :        276 :                   oprnd_info->def_stmts = vNULL;
    1414                 :        276 :                   SLP_TREE_DEF_TYPE (child) = vect_external_def;
    1415                 :        276 :                   SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
    1416                 :        276 :                   oprnd_info->ops = vNULL;
    1417                 :        276 :                   ++this_tree_size;
    1418                 :        276 :                   children.safe_push (child);
    1419                 :        276 :                   continue;
    1420                 :            :                 }
    1421                 :            :             }
    1422                 :            : 
    1423                 :      32646 :           oprnd_info->def_stmts = vNULL;
    1424                 :      32646 :           children.safe_push (child);
    1425                 :      32646 :           continue;
    1426                 :            :         }
    1427                 :            : 
    1428                 :            :       /* If the SLP build failed fatally and we analyze a basic-block
    1429                 :            :          simply treat nodes we fail to build as externally defined
    1430                 :            :          (and thus build vectors from the scalar defs).
    1431                 :            :          The cost model will reject outright expensive cases.
    1432                 :            :          ???  This doesn't treat cases where permutation ultimatively
    1433                 :            :          fails (or we don't try permutation below).  Ideally we'd
    1434                 :            :          even compute a permutation that will end up with the maximum
    1435                 :            :          SLP tree size...  */
    1436                 :      24920 :       if (is_a <bb_vec_info> (vinfo)
    1437                 :      19762 :           && !matches[0]
    1438                 :            :           /* ???  Rejecting patterns this way doesn't work.  We'd have to
    1439                 :            :              do extra work to cancel the pattern so the uses see the
    1440                 :            :              scalar version.  */
    1441                 :      13916 :           && !is_pattern_stmt_p (stmt_info)
    1442                 :      12515 :           && !oprnd_info->any_pattern
    1443                 :      35761 :           && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
    1444                 :            :         {
    1445                 :      10579 :           if (dump_enabled_p ())
    1446                 :         18 :             dump_printf_loc (MSG_NOTE, vect_location,
    1447                 :            :                              "Building vector operands from scalars\n");
    1448                 :      10579 :           this_tree_size++;
    1449                 :      10579 :           child = vect_create_new_slp_node (oprnd_info->def_stmts);
    1450                 :      10579 :           SLP_TREE_DEF_TYPE (child) = vect_external_def;
    1451                 :      10579 :           SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
    1452                 :      10579 :           children.safe_push (child);
    1453                 :      10579 :           oprnd_info->ops = vNULL;
    1454                 :      10579 :           oprnd_info->def_stmts = vNULL;
    1455                 :      10579 :           continue;
    1456                 :            :         }
    1457                 :            : 
    1458                 :            :       /* If the SLP build for operand zero failed and operand zero
    1459                 :            :          and one can be commutated try that for the scalar stmts
    1460                 :            :          that failed the match.  */
    1461                 :      14341 :       if (i == 0
    1462                 :            :           /* A first scalar stmt mismatch signals a fatal mismatch.  */
    1463                 :      13324 :           && matches[0]
    1464                 :            :           /* ???  For COND_EXPRs we can swap the comparison operands
    1465                 :            :              as well as the arms under some constraints.  */
    1466                 :       7383 :           && nops == 2
    1467                 :       2263 :           && oprnds_info[1]->first_dt == vect_internal_def
    1468                 :       1509 :           && is_gimple_assign (stmt_info->stmt)
    1469                 :            :           /* Swapping operands for reductions breaks assumptions later on.  */
    1470                 :       1505 :           && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
    1471                 :       1501 :           && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
    1472                 :            :           /* Do so only if the number of not successful permutes was nor more
    1473                 :            :              than a cut-ff as re-trying the recursive match on
    1474                 :            :              possibly each level of the tree would expose exponential
    1475                 :            :              behavior.  */
    1476                 :      15842 :           && *npermutes < 4)
    1477                 :            :         {
    1478                 :            :           /* See whether we can swap the matching or the non-matching
    1479                 :            :              stmt operands.  */
    1480                 :            :           bool swap_not_matching = true;
    1481                 :       1829 :           do
    1482                 :            :             {
    1483                 :      10297 :               for (j = 0; j < group_size; ++j)
    1484                 :            :                 {
    1485                 :       9194 :                   if (matches[j] != !swap_not_matching)
    1486                 :       3054 :                     continue;
    1487                 :       6140 :                   stmt_vec_info stmt_info = stmts[j];
    1488                 :            :                   /* Verify if we can swap operands of this stmt.  */
    1489                 :       6140 :                   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    1490                 :       6140 :                   if (!stmt
    1491                 :       6140 :                       || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
    1492                 :            :                     {
    1493                 :        726 :                       if (!swap_not_matching)
    1494                 :        362 :                         goto fail;
    1495                 :            :                       swap_not_matching = false;
    1496                 :            :                       break;
    1497                 :            :                     }
    1498                 :            :                 }
    1499                 :            :             }
    1500                 :       1467 :           while (j != group_size);
    1501                 :            : 
    1502                 :            :           /* Swap mismatched definition stmts.  */
    1503                 :       1103 :           if (dump_enabled_p ())
    1504                 :        105 :             dump_printf_loc (MSG_NOTE, vect_location,
    1505                 :            :                              "Re-trying with swapped operands of stmts ");
    1506                 :       8205 :           for (j = 0; j < group_size; ++j)
    1507                 :       7102 :             if (matches[j] == !swap_not_matching)
    1508                 :            :               {
    1509                 :       4982 :                 std::swap (oprnds_info[0]->def_stmts[j],
    1510                 :       4982 :                            oprnds_info[1]->def_stmts[j]);
    1511                 :       4982 :                 std::swap (oprnds_info[0]->ops[j],
    1512                 :       4982 :                            oprnds_info[1]->ops[j]);
    1513                 :       4982 :                 if (dump_enabled_p ())
    1514                 :        360 :                   dump_printf (MSG_NOTE, "%d ", j);
    1515                 :            :               }
    1516                 :       1103 :           if (dump_enabled_p ())
    1517                 :        105 :             dump_printf (MSG_NOTE, "\n");
    1518                 :            :           /* And try again with scratch 'matches' ... */
    1519                 :       1103 :           bool *tem = XALLOCAVEC (bool, group_size);
    1520                 :       1103 :           if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
    1521                 :            :                                             group_size, &this_max_nunits,
    1522                 :            :                                             tem, npermutes,
    1523                 :            :                                             &this_tree_size, bst_map)) != NULL)
    1524                 :            :             {
    1525                 :            :               /* If we have all children of a non-unary child built up from
    1526                 :            :                  scalars then just throw that away and build it up this node
    1527                 :            :                  from scalars.  */
    1528                 :        697 :               if (is_a <bb_vec_info> (vinfo)
    1529                 :        331 :                   && SLP_TREE_CHILDREN (child).length () > 1
    1530                 :            :                   /* ???  Rejecting patterns this way doesn't work.  We'd have
    1531                 :            :                      to do extra work to cancel the pattern so the uses see the
    1532                 :            :                      scalar version.  */
    1533                 :        830 :                   && !oprnd_info->any_pattern)
    1534                 :            :                 {
    1535                 :            :                   unsigned int j;
    1536                 :            :                   slp_tree grandchild;
    1537                 :            : 
    1538                 :        130 :                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
    1539                 :        130 :                     if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
    1540                 :            :                       break;
    1541                 :        130 :                   if (!grandchild
    1542                 :        130 :                       && (vect_update_all_shared_vectypes
    1543                 :          0 :                           (oprnd_info->def_stmts)))
    1544                 :            :                     {
    1545                 :            :                       /* Roll back.  */
    1546                 :          0 :                       this_tree_size = old_tree_size;
    1547                 :          0 :                       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
    1548                 :          0 :                         vect_free_slp_tree (grandchild, false);
    1549                 :          0 :                       SLP_TREE_CHILDREN (child).truncate (0);
    1550                 :            : 
    1551                 :          0 :                       if (dump_enabled_p ())
    1552                 :          0 :                         dump_printf_loc (MSG_NOTE, vect_location,
    1553                 :            :                                          "Building parent vector operands from "
    1554                 :            :                                          "scalars instead\n");
    1555                 :          0 :                       oprnd_info->def_stmts = vNULL;
    1556                 :          0 :                       SLP_TREE_DEF_TYPE (child) = vect_external_def;
    1557                 :          0 :                       SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
    1558                 :          0 :                       oprnd_info->ops = vNULL;
    1559                 :          0 :                       ++this_tree_size;
    1560                 :          0 :                       children.safe_push (child);
    1561                 :          0 :                       continue;
    1562                 :            :                     }
    1563                 :            :                 }
    1564                 :            : 
    1565                 :        697 :               oprnd_info->def_stmts = vNULL;
    1566                 :        697 :               children.safe_push (child);
    1567                 :        697 :               continue;
    1568                 :            :             }
    1569                 :            : 
    1570                 :        406 :           ++*npermutes;
    1571                 :            :         }
    1572                 :            : 
    1573                 :      13644 : fail:
    1574                 :      13644 :       gcc_assert (child == NULL);
    1575                 :      28334 :       FOR_EACH_VEC_ELT (children, j, child)
    1576                 :       1046 :         vect_free_slp_tree (child, false);
    1577                 :      13644 :       vect_free_oprnd_info (oprnds_info);
    1578                 :      13644 :       return NULL;
    1579                 :            :     }
    1580                 :            : 
    1581                 :     101470 :   vect_free_oprnd_info (oprnds_info);
    1582                 :            : 
    1583                 :     101470 :   *tree_size += this_tree_size + 1;
    1584                 :     101470 :   *max_nunits = this_max_nunits;
    1585                 :            : 
    1586                 :     101470 :   node = vect_create_new_slp_node (stmts);
    1587                 :     101470 :   SLP_TREE_TWO_OPERATORS (node) = two_operators;
    1588                 :     216584 :   SLP_TREE_CHILDREN (node).splice (children);
    1589                 :            :   return node;
    1590                 :            : }
    1591                 :            : 
    1592                 :            : /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
    1593                 :            : 
    1594                 :            : static void
    1595                 :       6609 : vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
    1596                 :            :                      slp_tree node, hash_set<slp_tree> &visited)
    1597                 :            : {
    1598                 :       6609 :   unsigned i, j;
    1599                 :       6609 :   stmt_vec_info stmt_info;
    1600                 :       6609 :   slp_tree child;
    1601                 :       6609 :   tree op;
    1602                 :            : 
    1603                 :       6609 :   if (visited.add (node))
    1604                 :       3118 :     return;
    1605                 :            : 
    1606                 :       6575 :   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
    1607                 :       6575 :   dump_user_location_t user_loc = loc.get_user_location ();
    1608                 :       6575 :   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
    1609                 :       6575 :                    SLP_TREE_DEF_TYPE (node) == vect_external_def
    1610                 :            :                    ? " (external)"
    1611                 :            :                    : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
    1612                 :       6368 :                       ? " (constant)"
    1613                 :            :                       : ""), node,
    1614                 :       6575 :                    estimated_poly_value (node->max_nunits), node->refcnt);
    1615                 :       6575 :   if (SLP_TREE_SCALAR_STMTS (node).exists ())
    1616                 :      32482 :     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    1617                 :      27141 :       dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
    1618                 :            :   else
    1619                 :            :     {
    1620                 :       1234 :       dump_printf_loc (metadata, user_loc, "\t{ ");
    1621                 :       7127 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
    1622                 :       5893 :         dump_printf (metadata, "%T%s ", op,
    1623                 :       5893 :                      i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
    1624                 :       1234 :       dump_printf (metadata, "}\n");
    1625                 :            :     }
    1626                 :       6575 :   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
    1627                 :            :     {
    1628                 :        359 :       dump_printf_loc (metadata, user_loc, "\tload permutation {");
    1629                 :       1975 :       FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
    1630                 :       1616 :         dump_printf (dump_kind, " %u", j);
    1631                 :        359 :       dump_printf (dump_kind, " }\n");
    1632                 :            :     }
    1633                 :       6575 :   if (SLP_TREE_CHILDREN (node).is_empty ())
    1634                 :            :     return;
    1635                 :       3491 :   dump_printf_loc (metadata, user_loc, "\tchildren");
    1636                 :       8678 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1637                 :       5187 :     dump_printf (dump_kind, " %p", (void *)child);
    1638                 :       3491 :   dump_printf (dump_kind, "\n");
    1639                 :      12169 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1640                 :       5187 :     vect_print_slp_tree (dump_kind, loc, child, visited);
    1641                 :            : }
    1642                 :            : 
    1643                 :            : static void
    1644                 :       1422 : vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
    1645                 :            :                      slp_tree node)
    1646                 :            : {
    1647                 :       1422 :   hash_set<slp_tree> visited;
    1648                 :       1422 :   vect_print_slp_tree (dump_kind, loc, node, visited);
    1649                 :       1422 : }
    1650                 :            : 
    1651                 :            : /* Mark the tree rooted at NODE with PURE_SLP.  */
    1652                 :            : 
    1653                 :            : static void
    1654                 :     163118 : vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
    1655                 :            : {
    1656                 :     163118 :   int i;
    1657                 :     163118 :   stmt_vec_info stmt_info;
    1658                 :     163118 :   slp_tree child;
    1659                 :            : 
    1660                 :     163118 :   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    1661                 :            :     return;
    1662                 :            : 
    1663                 :      92399 :   if (visited.add (node))
    1664                 :            :     return;
    1665                 :            : 
    1666                 :     367435 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    1667                 :     275269 :     STMT_SLP_TYPE (stmt_info) = pure_slp;
    1668                 :            : 
    1669                 :     191296 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1670                 :      99130 :     vect_mark_slp_stmts (child, visited);
    1671                 :            : }
    1672                 :            : 
    1673                 :            : static void
    1674                 :      63988 : vect_mark_slp_stmts (slp_tree node)
    1675                 :            : {
    1676                 :      63988 :   hash_set<slp_tree> visited;
    1677                 :      63988 :   vect_mark_slp_stmts (node, visited);
    1678                 :      63988 : }
    1679                 :            : 
    1680                 :            : /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
    1681                 :            : 
    1682                 :            : static void
    1683                 :     147772 : vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
    1684                 :            : {
    1685                 :     147772 :   int i;
    1686                 :     147772 :   stmt_vec_info stmt_info;
    1687                 :     147772 :   slp_tree child;
    1688                 :            : 
    1689                 :     147772 :   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    1690                 :            :     return;
    1691                 :            : 
    1692                 :      79776 :   if (visited.add (node))
    1693                 :            :     return;
    1694                 :            : 
    1695                 :     302985 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    1696                 :            :     {
    1697                 :     223349 :       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
    1698                 :            :                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
    1699                 :     223349 :       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
    1700                 :            :     }
    1701                 :            : 
    1702                 :     166628 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1703                 :      86992 :     vect_mark_slp_stmts_relevant (child, visited);
    1704                 :            : }
    1705                 :            : 
    1706                 :            : static void
    1707                 :      60780 : vect_mark_slp_stmts_relevant (slp_tree node)
    1708                 :            : {
    1709                 :      60780 :   hash_set<slp_tree> visited;
    1710                 :      60780 :   vect_mark_slp_stmts_relevant (node, visited);
    1711                 :      60780 : }
    1712                 :            : 
    1713                 :            : /* Copy the SLP subtree rooted at NODE.  */
    1714                 :            : 
    1715                 :            : static slp_tree
    1716                 :        245 : slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
    1717                 :            : {
    1718                 :        245 :   unsigned i;
    1719                 :            : 
    1720                 :        245 :   bool existed_p;
    1721                 :        245 :   slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
    1722                 :        245 :   if (existed_p)
    1723                 :          4 :     return copy_ref;
    1724                 :            : 
    1725                 :        241 :   copy_ref = XNEW (_slp_tree);
    1726                 :        241 :   slp_tree copy = copy_ref;
    1727                 :        241 :   memcpy (copy, node, sizeof (_slp_tree));
    1728                 :        241 :   if (SLP_TREE_SCALAR_STMTS (node).exists ())
    1729                 :            :     {
    1730                 :        236 :       SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
    1731                 :        236 :       stmt_vec_info stmt_info;
    1732                 :        874 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    1733                 :        638 :         STMT_VINFO_NUM_SLP_USES (stmt_info)++;
    1734                 :            :     }
    1735                 :        241 :   if (SLP_TREE_SCALAR_OPS (node).exists ())
    1736                 :         10 :     SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
    1737                 :        241 :   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
    1738                 :        148 :     SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
    1739                 :        241 :   if (SLP_TREE_CHILDREN (node).exists ())
    1740                 :        272 :     SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
    1741                 :        241 :   gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
    1742                 :        241 :   copy->refcnt = 0;
    1743                 :            : 
    1744                 :        241 :   slp_tree child;
    1745                 :        424 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
    1746                 :            :     {
    1747                 :        183 :       SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
    1748                 :        183 :       SLP_TREE_CHILDREN (copy)[i]->refcnt++;
    1749                 :            :     }
    1750                 :            :   return copy;
    1751                 :            : }
    1752                 :            : 
    1753                 :            : /* Rearrange the statements of NODE according to PERMUTATION.  */
    1754                 :            : 
    1755                 :            : static void
    1756                 :        245 : vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
    1757                 :            :                           vec<unsigned> permutation,
    1758                 :            :                           hash_set<slp_tree> &visited)
    1759                 :            : {
    1760                 :        245 :   unsigned int i;
    1761                 :        245 :   slp_tree child;
    1762                 :            : 
    1763                 :        245 :   if (visited.add (node))
    1764                 :        245 :     return;
    1765                 :            : 
    1766                 :        424 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1767                 :        183 :     vect_slp_rearrange_stmts (child, group_size, permutation, visited);
    1768                 :            : 
    1769                 :        241 :   if (SLP_TREE_SCALAR_STMTS (node).exists ())
    1770                 :            :     {
    1771                 :        236 :       gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
    1772                 :            :       /* ???  Computation nodes are isomorphic and need no rearrangement.
    1773                 :            :          This is a quick hack to cover those where rearrangement breaks
    1774                 :            :          semantics because only the first stmt is guaranteed to have the
    1775                 :            :          correct operation code due to others being swapped or inverted.  */
    1776                 :        236 :       stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
    1777                 :        236 :       if (is_gimple_assign (first->stmt)
    1778                 :        236 :           && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
    1779                 :          1 :         return;
    1780                 :        235 :       vec<stmt_vec_info> tmp_stmts;
    1781                 :        235 :       tmp_stmts.create (group_size);
    1782                 :        235 :       tmp_stmts.quick_grow (group_size);
    1783                 :        235 :       stmt_vec_info stmt_info;
    1784                 :        871 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    1785                 :        636 :         tmp_stmts[permutation[i]] = stmt_info;
    1786                 :        235 :       SLP_TREE_SCALAR_STMTS (node).release ();
    1787                 :        235 :       SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
    1788                 :            :     }
    1789                 :        240 :   if (SLP_TREE_SCALAR_OPS (node).exists ())
    1790                 :            :     {
    1791                 :          5 :       gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
    1792                 :          5 :       vec<tree> tmp_ops;
    1793                 :          5 :       tmp_ops.create (group_size);
    1794                 :          5 :       tmp_ops.quick_grow (group_size);
    1795                 :          5 :       tree op;
    1796                 :         17 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
    1797                 :         12 :         tmp_ops[permutation[i]] = op;
    1798                 :          5 :       SLP_TREE_SCALAR_OPS (node).release ();
    1799                 :          5 :       SLP_TREE_SCALAR_OPS (node) = tmp_ops;
    1800                 :            :     }
    1801                 :            : }
    1802                 :            : 
    1803                 :            : 
    1804                 :            : /* Attempt to reorder stmts in a reduction chain so that we don't
    1805                 :            :    require any load permutation.  Return true if that was possible,
    1806                 :            :    otherwise return false.  */
    1807                 :            : 
    1808                 :            : static bool
    1809                 :        465 : vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
    1810                 :            : {
    1811                 :        465 :   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
    1812                 :        465 :   unsigned int i, j;
    1813                 :        465 :   unsigned int lidx;
    1814                 :        465 :   slp_tree node, load;
    1815                 :            : 
    1816                 :            :   /* Compare all the permutation sequences to the first one.  We know
    1817                 :            :      that at least one load is permuted.  */
    1818                 :        465 :   node = SLP_INSTANCE_LOADS (slp_instn)[0];
    1819                 :        465 :   if (!node->load_permutation.exists ())
    1820                 :            :     return false;
    1821                 :        477 :   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
    1822                 :            :     {
    1823                 :        375 :       if (!load->load_permutation.exists ())
    1824                 :            :         return false;
    1825                 :        417 :       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
    1826                 :        405 :         if (lidx != node->load_permutation[j])
    1827                 :            :           return false;
    1828                 :            :     }
    1829                 :            : 
    1830                 :            :   /* Check that the loads in the first sequence are different and there
    1831                 :            :      are no gaps between them.  */
    1832                 :        102 :   auto_sbitmap load_index (group_size);
    1833                 :        102 :   bitmap_clear (load_index);
    1834                 :        348 :   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
    1835                 :            :     {
    1836                 :        286 :       if (lidx >= group_size)
    1837                 :            :         return false;
    1838                 :        267 :       if (bitmap_bit_p (load_index, lidx))
    1839                 :            :         return false;
    1840                 :            : 
    1841                 :        246 :       bitmap_set_bit (load_index, lidx);
    1842                 :            :     }
    1843                 :        230 :   for (i = 0; i < group_size; i++)
    1844                 :        168 :     if (!bitmap_bit_p (load_index, i))
    1845                 :            :       return false;
    1846                 :            : 
    1847                 :            :   /* This permutation is valid for reduction.  Since the order of the
    1848                 :            :      statements in the nodes is not important unless they are memory
    1849                 :            :      accesses, we can rearrange the statements in all the nodes
    1850                 :            :      according to the order of the loads.  */
    1851                 :            : 
    1852                 :            :   /* We have to unshare the SLP tree we modify.  */
    1853                 :        164 :   hash_map<slp_tree, slp_tree> map;
    1854                 :         62 :   slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
    1855                 :         62 :   vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
    1856                 :         62 :   unshared->refcnt++;
    1857                 :         62 :   SLP_INSTANCE_TREE (slp_instn) = unshared;
    1858                 :        198 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    1859                 :        148 :     SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
    1860                 :         62 :   node = SLP_INSTANCE_LOADS (slp_instn)[0];
    1861                 :            : 
    1862                 :            :   /* Do the actual re-arrangement.  */
    1863                 :        124 :   hash_set<slp_tree> visited;
    1864                 :         62 :   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
    1865                 :            :                             node->load_permutation, visited);
    1866                 :            : 
    1867                 :            :   /* We are done, no actual permutations need to be generated.  */
    1868                 :         62 :   poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
    1869                 :        198 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    1870                 :            :     {
    1871                 :         74 :       stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    1872                 :         74 :       first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
    1873                 :            :       /* But we have to keep those permutations that are required because
    1874                 :            :          of handling of gaps.  */
    1875                 :         74 :       if (known_eq (unrolling_factor, 1U)
    1876                 :         74 :           || (group_size == DR_GROUP_SIZE (first_stmt_info)
    1877                 :         42 :               && DR_GROUP_GAP (first_stmt_info) == 0))
    1878                 :        145 :         SLP_TREE_LOAD_PERMUTATION (node).release ();
    1879                 :            :       else
    1880                 :         20 :         for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
    1881                 :          7 :           SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
    1882                 :            :     }
    1883                 :            : 
    1884                 :         62 :   return true;
    1885                 :            : }
    1886                 :            : 
    1887                 :            : /* Gather loads in the SLP graph NODE and populate the INST loads array.  */
    1888                 :            : 
    1889                 :            : static void
    1890                 :     187972 : vect_gather_slp_loads (slp_instance inst, slp_tree node,
    1891                 :            :                        hash_set<slp_tree> &visited)
    1892                 :            : {
    1893                 :     187972 :   if (visited.add (node))
    1894                 :            :     return;
    1895                 :            : 
    1896                 :     187674 :   if (SLP_TREE_CHILDREN (node).length () == 0)
    1897                 :            :     {
    1898                 :      92600 :       if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    1899                 :            :         return;
    1900                 :      12236 :       stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    1901                 :      11327 :       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
    1902                 :      23563 :           && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
    1903                 :      11327 :         SLP_INSTANCE_LOADS (inst).safe_push (node);
    1904                 :            :     }
    1905                 :            :   else
    1906                 :            :     {
    1907                 :            :       unsigned i;
    1908                 :            :       slp_tree child;
    1909                 :     208879 :       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    1910                 :     113805 :         vect_gather_slp_loads (inst, child, visited);
    1911                 :            :     }
    1912                 :            : }
    1913                 :            : 
    1914                 :            : static void
    1915                 :      74167 : vect_gather_slp_loads (slp_instance inst, slp_tree node)
    1916                 :            : {
    1917                 :      74167 :   hash_set<slp_tree> visited;
    1918                 :      74167 :   vect_gather_slp_loads (inst, node, visited);
    1919                 :      74167 : }
    1920                 :            : 
    1921                 :            : /* Check if the required load permutations in the SLP instance
    1922                 :            :    SLP_INSTN are supported.  */
    1923                 :            : 
    1924                 :            : static bool
    1925                 :       3351 : vect_supported_load_permutation_p (slp_instance slp_instn)
    1926                 :            : {
    1927                 :       3351 :   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
    1928                 :       3351 :   unsigned int i, j, k, next;
    1929                 :       3351 :   slp_tree node;
    1930                 :            : 
    1931                 :       3351 :   if (dump_enabled_p ())
    1932                 :            :     {
    1933                 :        312 :       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
    1934                 :        924 :       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    1935                 :        612 :         if (node->load_permutation.exists ())
    1936                 :       3429 :           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
    1937                 :       2258 :             dump_printf (MSG_NOTE, "%d ", next);
    1938                 :            :         else
    1939                 :        223 :           for (k = 0; k < group_size; ++k)
    1940                 :        170 :             dump_printf (MSG_NOTE, "%d ", k);
    1941                 :        312 :       dump_printf (MSG_NOTE, "\n");
    1942                 :            :     }
    1943                 :            : 
    1944                 :            :   /* In case of reduction every load permutation is allowed, since the order
    1945                 :            :      of the reduction statements is not important (as opposed to the case of
    1946                 :            :      grouped stores).  The only condition we need to check is that all the
    1947                 :            :      load nodes are of the same size and have the same permutation (and then
    1948                 :            :      rearrange all the nodes of the SLP instance according to this 
    1949                 :            :      permutation).  */
    1950                 :            : 
    1951                 :            :   /* Check that all the load nodes are of the same size.  */
    1952                 :            :   /* ???  Can't we assert this? */
    1953                 :       9893 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    1954                 :      13084 :     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
    1955                 :            :       return false;
    1956                 :            : 
    1957                 :       3351 :   node = SLP_INSTANCE_TREE (slp_instn);
    1958                 :       3351 :   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    1959                 :            : 
    1960                 :            :   /* Reduction (there are no data-refs in the root).
    1961                 :            :      In reduction chain the order of the loads is not important.  */
    1962                 :       3351 :   if (!STMT_VINFO_DATA_REF (stmt_info)
    1963                 :       3351 :       && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
    1964                 :        465 :     vect_attempt_slp_rearrange_stmts (slp_instn);
    1965                 :            : 
    1966                 :            :   /* In basic block vectorization we allow any subchain of an interleaving
    1967                 :            :      chain.
    1968                 :            :      FORNOW: not supported in loop SLP because of realignment compications.  */
    1969                 :       3351 :   if (STMT_VINFO_BB_VINFO (stmt_info))
    1970                 :            :     {
    1971                 :            :       /* Check whether the loads in an instance form a subchain and thus
    1972                 :            :          no permutation is necessary.  */
    1973                 :       5340 :       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    1974                 :            :         {
    1975                 :       3725 :           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
    1976                 :        929 :             continue;
    1977                 :       6469 :           bool subchain_p = true;
    1978                 :            :           stmt_vec_info next_load_info = NULL;
    1979                 :            :           stmt_vec_info load_info;
    1980                 :       6469 :           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
    1981                 :            :             {
    1982                 :       4997 :               if (j != 0
    1983                 :       4997 :                   && (next_load_info != load_info
    1984                 :       1182 :                       || DR_GROUP_GAP (load_info) != 1))
    1985                 :            :                 {
    1986                 :            :                   subchain_p = false;
    1987                 :            :                   break;
    1988                 :            :                 }
    1989                 :       3673 :               next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
    1990                 :            :             }
    1991                 :       2796 :           if (subchain_p)
    1992                 :       4422 :             SLP_TREE_LOAD_PERMUTATION (node).release ();
    1993                 :            :           else
    1994                 :            :             {
    1995                 :       1324 :               stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
    1996                 :       1324 :               group_info = DR_GROUP_FIRST_ELEMENT (group_info);
    1997                 :       1324 :               unsigned HOST_WIDE_INT nunits;
    1998                 :       1324 :               unsigned k, maxk = 0;
    1999                 :       7772 :               FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
    2000                 :       6448 :                 if (k > maxk)
    2001                 :            :                   maxk = k;
    2002                 :            :               /* In BB vectorization we may not actually use a loaded vector
    2003                 :            :                  accessing elements in excess of DR_GROUP_SIZE.  */
    2004                 :       1324 :               tree vectype = STMT_VINFO_VECTYPE (group_info);
    2005                 :       1324 :               if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
    2006                 :       1324 :                   || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
    2007                 :            :                 {
    2008                 :        326 :                   if (dump_enabled_p ())
    2009                 :         20 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2010                 :            :                                      "BB vectorization with gaps at the end of "
    2011                 :            :                                      "a load is not supported\n");
    2012                 :        775 :                   return false;
    2013                 :            :                 }
    2014                 :            : 
    2015                 :            :               /* Verify the permutation can be generated.  */
    2016                 :        998 :               vec<tree> tem;
    2017                 :        998 :               unsigned n_perms;
    2018                 :        998 :               if (!vect_transform_slp_perm_load (node, tem, NULL,
    2019                 :        998 :                                                  1, slp_instn, true, &n_perms))
    2020                 :            :                 {
    2021                 :        449 :                   if (dump_enabled_p ())
    2022                 :          3 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION,
    2023                 :            :                                      vect_location,
    2024                 :            :                                      "unsupported load permutation\n");
    2025                 :        449 :                   return false;
    2026                 :            :                 }
    2027                 :            :             }
    2028                 :            :         }
    2029                 :            :       return true;
    2030                 :            :     }
    2031                 :            : 
    2032                 :            :   /* For loop vectorization verify we can generate the permutation.  Be
    2033                 :            :      conservative about the vectorization factor, there are permutations
    2034                 :            :      that will use three vector inputs only starting from a specific factor
    2035                 :            :      and the vectorization factor is not yet final.
    2036                 :            :      ???  The SLP instance unrolling factor might not be the maximum one.  */
    2037                 :        961 :   unsigned n_perms;
    2038                 :        961 :   poly_uint64 test_vf
    2039                 :            :     = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
    2040                 :        961 :                              LOOP_VINFO_VECT_FACTOR
    2041                 :        961 :                              (STMT_VINFO_LOOP_VINFO (stmt_info)));
    2042                 :       3282 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    2043                 :       2489 :     if (node->load_permutation.exists ()
    2044                 :       2489 :         && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
    2045                 :            :                                           slp_instn, true, &n_perms))
    2046                 :            :       return false;
    2047                 :            : 
    2048                 :            :   return true;
    2049                 :            : }
    2050                 :            : 
    2051                 :            : 
    2052                 :            : /* Find the last store in SLP INSTANCE.  */
    2053                 :            : 
    2054                 :            : stmt_vec_info
    2055                 :     194048 : vect_find_last_scalar_stmt_in_slp (slp_tree node)
    2056                 :            : {
    2057                 :     194048 :   stmt_vec_info last = NULL;
    2058                 :     194048 :   stmt_vec_info stmt_vinfo;
    2059                 :            : 
    2060                 :     814761 :   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
    2061                 :            :     {
    2062                 :     620713 :       stmt_vinfo = vect_orig_stmt (stmt_vinfo);
    2063                 :    1047380 :       last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
    2064                 :            :     }
    2065                 :            : 
    2066                 :     194048 :   return last;
    2067                 :            : }
    2068                 :            : 
    2069                 :            : /* Splits a group of stores, currently beginning at FIRST_VINFO, into
    2070                 :            :    two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
    2071                 :            :    (also containing the first GROUP1_SIZE stmts, since stores are
    2072                 :            :    consecutive), the second containing the remainder.
    2073                 :            :    Return the first stmt in the second group.  */
    2074                 :            : 
    2075                 :            : static stmt_vec_info
    2076                 :      16048 : vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
    2077                 :            : {
    2078                 :      16048 :   gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
    2079                 :      16048 :   gcc_assert (group1_size > 0);
    2080                 :      16048 :   int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
    2081                 :      16048 :   gcc_assert (group2_size > 0);
    2082                 :      16048 :   DR_GROUP_SIZE (first_vinfo) = group1_size;
    2083                 :            : 
    2084                 :      16048 :   stmt_vec_info stmt_info = first_vinfo;
    2085                 :      62705 :   for (unsigned i = group1_size; i > 1; i--)
    2086                 :            :     {
    2087                 :      46657 :       stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
    2088                 :      46657 :       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
    2089                 :            :     }
    2090                 :            :   /* STMT is now the last element of the first group.  */
    2091                 :      16048 :   stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
    2092                 :      16048 :   DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
    2093                 :            : 
    2094                 :      16048 :   DR_GROUP_SIZE (group2) = group2_size;
    2095                 :      51542 :   for (stmt_info = group2; stmt_info;
    2096                 :      35494 :        stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
    2097                 :            :     {
    2098                 :      35494 :       DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
    2099                 :      35494 :       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
    2100                 :            :     }
    2101                 :            : 
    2102                 :            :   /* For the second group, the DR_GROUP_GAP is that before the original group,
    2103                 :            :      plus skipping over the first vector.  */
    2104                 :      16048 :   DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
    2105                 :            : 
    2106                 :            :   /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
    2107                 :      16048 :   DR_GROUP_GAP (first_vinfo) += group2_size;
    2108                 :            : 
    2109                 :      16048 :   if (dump_enabled_p ())
    2110                 :         29 :     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
    2111                 :            :                      group1_size, group2_size);
    2112                 :            : 
    2113                 :      16048 :   return group2;
    2114                 :            : }
    2115                 :            : 
    2116                 :            : /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
    2117                 :            :    statements and a vector of NUNITS elements.  */
    2118                 :            : 
    2119                 :            : static poly_uint64
    2120                 :      82742 : calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
    2121                 :            : {
    2122                 :      82742 :   return exact_div (common_multiple (nunits, group_size), group_size);
    2123                 :            : }
    2124                 :            : 
    2125                 :            : /* Analyze an SLP instance starting from a group of grouped stores.  Call
    2126                 :            :    vect_build_slp_tree to build a tree of packed stmts if possible.
    2127                 :            :    Return FALSE if it's impossible to SLP any stmt in the loop.  */
    2128                 :            : 
    2129                 :            : static bool
    2130                 :     118319 : vect_analyze_slp_instance (vec_info *vinfo,
    2131                 :            :                            scalar_stmts_to_slp_tree_map_t *bst_map,
    2132                 :            :                            stmt_vec_info stmt_info, unsigned max_tree_size)
    2133                 :            : {
    2134                 :     118319 :   slp_instance new_instance;
    2135                 :     118319 :   slp_tree node;
    2136                 :     118319 :   unsigned int group_size;
    2137                 :     118319 :   tree vectype, scalar_type = NULL_TREE;
    2138                 :     118319 :   unsigned int i;
    2139                 :     118319 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    2140                 :     118319 :   vec<stmt_vec_info> scalar_stmts;
    2141                 :     118319 :   bool constructor = false;
    2142                 :            : 
    2143                 :     118319 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2144                 :            :     {
    2145                 :     112472 :       scalar_type = TREE_TYPE (DR_REF (dr));
    2146                 :     112472 :       group_size = DR_GROUP_SIZE (stmt_info);
    2147                 :     112472 :       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
    2148                 :            :     }
    2149                 :       5847 :   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
    2150                 :            :     {
    2151                 :        203 :       gcc_assert (is_a <loop_vec_info> (vinfo));
    2152                 :        203 :       vectype = STMT_VINFO_VECTYPE (stmt_info);
    2153                 :        203 :       group_size = REDUC_GROUP_SIZE (stmt_info);
    2154                 :            :     }
    2155                 :       5644 :   else if (is_gimple_assign (stmt_info->stmt)
    2156                 :       5644 :             && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
    2157                 :            :     {
    2158                 :       3979 :       vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
    2159                 :       3979 :       group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
    2160                 :            :       constructor = true;
    2161                 :            :     }
    2162                 :            :   else
    2163                 :            :     {
    2164                 :       1665 :       gcc_assert (is_a <loop_vec_info> (vinfo));
    2165                 :       1665 :       vectype = STMT_VINFO_VECTYPE (stmt_info);
    2166                 :       1665 :       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
    2167                 :            :     }
    2168                 :            : 
    2169                 :     118319 :   if (!vectype)
    2170                 :            :     {
    2171                 :       3313 :       if (dump_enabled_p ())
    2172                 :          9 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2173                 :            :                          "Build SLP failed: unsupported data-type %T\n",
    2174                 :            :                          scalar_type);
    2175                 :            : 
    2176                 :       3313 :       return false;
    2177                 :            :     }
    2178                 :     115006 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    2179                 :            : 
    2180                 :            :   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
    2181                 :     115006 :   scalar_stmts.create (group_size);
    2182                 :     115006 :   stmt_vec_info next_info = stmt_info;
    2183                 :     115006 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2184                 :            :     {
    2185                 :            :       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
    2186                 :     486431 :       while (next_info)
    2187                 :            :         {
    2188                 :     379336 :           scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
    2189                 :     377260 :           next_info = DR_GROUP_NEXT_ELEMENT (next_info);
    2190                 :            :         }
    2191                 :            :     }
    2192                 :       5835 :   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
    2193                 :            :     {
    2194                 :            :       /* Collect the reduction stmts and store them in
    2195                 :            :          SLP_TREE_SCALAR_STMTS.  */
    2196                 :       1067 :       while (next_info)
    2197                 :            :         {
    2198                 :        877 :           scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
    2199                 :        876 :           next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
    2200                 :            :         }
    2201                 :            :       /* Mark the first element of the reduction chain as reduction to properly
    2202                 :            :          transform the node.  In the reduction analysis phase only the last
    2203                 :            :          element of the chain is marked as reduction.  */
    2204                 :        191 :       STMT_VINFO_DEF_TYPE (stmt_info)
    2205                 :        191 :         = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
    2206                 :        382 :       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
    2207                 :        224 :         = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
    2208                 :            :     }
    2209                 :       5644 :   else if (constructor)
    2210                 :            :     {
    2211                 :       3979 :       tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
    2212                 :       3979 :       tree val;
    2213                 :      32480 :       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
    2214                 :            :         {
    2215                 :      13554 :           if (TREE_CODE (val) == SSA_NAME)
    2216                 :            :             {
    2217                 :      12960 :               gimple* def = SSA_NAME_DEF_STMT (val);
    2218                 :      12960 :               stmt_vec_info def_info = vinfo->lookup_stmt (def);
    2219                 :            :               /* Value is defined in another basic block.  */
    2220                 :      12960 :               if (!def_info)
    2221                 :        699 :                 return false;
    2222                 :      12261 :               def_info = vect_stmt_to_vectorize (def_info);
    2223                 :      12261 :               scalar_stmts.safe_push (def_info);
    2224                 :            :             }
    2225                 :            :           else
    2226                 :            :             return false;
    2227                 :            :         }
    2228                 :       2686 :       if (dump_enabled_p ())
    2229                 :          6 :         dump_printf_loc (MSG_NOTE, vect_location,
    2230                 :            :                          "Analyzing vectorizable constructor: %G\n",
    2231                 :            :                          stmt_info->stmt);
    2232                 :            :     }
    2233                 :            :   else
    2234                 :            :     {
    2235                 :            :       /* Collect reduction statements.  */
    2236                 :       1665 :       vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
    2237                 :      14573 :       for (i = 0; reductions.iterate (i, &next_info); i++)
    2238                 :      11243 :         scalar_stmts.safe_push (next_info);
    2239                 :            :     }
    2240                 :            : 
    2241                 :            :   /* Build the tree for the SLP instance.  */
    2242                 :     113713 :   bool *matches = XALLOCAVEC (bool, group_size);
    2243                 :     113713 :   unsigned npermutes = 0;
    2244                 :     113713 :   poly_uint64 max_nunits = nunits;
    2245                 :     113713 :   unsigned tree_size = 0;
    2246                 :     113713 :   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
    2247                 :            :                               &max_nunits, matches, &npermutes,
    2248                 :            :                               &tree_size, bst_map);
    2249                 :     113713 :   if (node != NULL)
    2250                 :            :     {
    2251                 :            :       /* Calculate the unrolling factor based on the smallest type.  */
    2252                 :      82742 :       poly_uint64 unrolling_factor
    2253                 :      82742 :         = calculate_unrolling_factor (max_nunits, group_size);
    2254                 :            : 
    2255                 :      82742 :       if (maybe_ne (unrolling_factor, 1U)
    2256                 :      82742 :           && is_a <bb_vec_info> (vinfo))
    2257                 :            :         {
    2258                 :       8575 :           unsigned HOST_WIDE_INT const_max_nunits;
    2259                 :       8575 :           if (!max_nunits.is_constant (&const_max_nunits)
    2260                 :       8575 :               || const_max_nunits > group_size)
    2261                 :            :             {
    2262                 :          0 :               if (dump_enabled_p ())
    2263                 :          0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2264                 :            :                                  "Build SLP failed: store group "
    2265                 :            :                                  "size not a multiple of the vector size "
    2266                 :            :                                  "in basic block SLP\n");
    2267                 :          0 :               vect_free_slp_tree (node, false);
    2268                 :          0 :               return false;
    2269                 :            :             }
    2270                 :            :           /* Fatal mismatch.  */
    2271                 :       8575 :           matches[group_size / const_max_nunits * const_max_nunits] = false;
    2272                 :       8575 :           vect_free_slp_tree (node, false);
    2273                 :            :         }
    2274                 :            :       else
    2275                 :            :         {
    2276                 :            :           /* Create a new SLP instance.  */
    2277                 :      74167 :           new_instance = XNEW (class _slp_instance);
    2278                 :      74167 :           SLP_INSTANCE_TREE (new_instance) = node;
    2279                 :      74167 :           SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
    2280                 :      74167 :           SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
    2281                 :      74167 :           SLP_INSTANCE_LOADS (new_instance) = vNULL;
    2282                 :      74167 :           SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
    2283                 :            : 
    2284                 :      74167 :           vect_gather_slp_loads (new_instance, node);
    2285                 :      74167 :           if (dump_enabled_p ())
    2286                 :       1515 :             dump_printf_loc (MSG_NOTE, vect_location,
    2287                 :            :                              "SLP size %u vs. limit %u.\n",
    2288                 :            :                              tree_size, max_tree_size);
    2289                 :            : 
    2290                 :            :           /* Compute the load permutation.  */
    2291                 :            :           slp_tree load_node;
    2292                 :            :           bool loads_permuted = false;
    2293                 :      85494 :           FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
    2294                 :            :             {
    2295                 :      11327 :               if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
    2296                 :        343 :                 continue;
    2297                 :      10984 :               unsigned j;
    2298                 :      10984 :               stmt_vec_info load_info;
    2299                 :      10984 :               bool this_load_permuted = false;
    2300                 :      10984 :               stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
    2301                 :            :                   (SLP_TREE_SCALAR_STMTS (load_node)[0]);
    2302                 :      32762 :               FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
    2303                 :      26680 :                 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
    2304                 :            :                   {
    2305                 :            :                     this_load_permuted = true;
    2306                 :            :                     break;
    2307                 :            :                   }
    2308                 :      16859 :               if (!this_load_permuted
    2309                 :            :                   /* The load requires permutation when unrolling exposes
    2310                 :            :                      a gap either because the group is larger than the SLP
    2311                 :            :                      group-size or because there is a gap between the groups.  */
    2312                 :      10984 :                   && (known_eq (unrolling_factor, 1U)
    2313                 :       1495 :                       || (group_size == DR_GROUP_SIZE (first_stmt_info)
    2314                 :       1288 :                           && DR_GROUP_GAP (first_stmt_info) == 0)))
    2315                 :            :                 {
    2316                 :       5875 :                   SLP_TREE_LOAD_PERMUTATION (load_node).release ();
    2317                 :       5875 :                   continue;
    2318                 :            :                 }
    2319                 :            :               loads_permuted = true;
    2320                 :            :             }
    2321                 :            : 
    2322                 :      74167 :           if (loads_permuted)
    2323                 :            :             {
    2324                 :       3351 :               if (!vect_supported_load_permutation_p (new_instance))
    2325                 :            :                 {
    2326                 :        943 :                   if (dump_enabled_p ())
    2327                 :         93 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2328                 :            :                                      "Build SLP failed: unsupported load "
    2329                 :            :                                      "permutation %G", stmt_info->stmt);
    2330                 :        943 :                   vect_free_slp_instance (new_instance, false);
    2331                 :        943 :                   return false;
    2332                 :            :                 }
    2333                 :            :             }
    2334                 :            : 
    2335                 :            :           /* If the loads and stores can be handled with load/store-lan
    2336                 :            :              instructions do not generate this SLP instance.  */
    2337                 :      73224 :           if (is_a <loop_vec_info> (vinfo)
    2338                 :            :               && loads_permuted
    2339                 :      73224 :               && dr && vect_store_lanes_supported (vectype, group_size, false))
    2340                 :            :             {
    2341                 :            :               slp_tree load_node;
    2342                 :          0 :               FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
    2343                 :            :                 {
    2344                 :          0 :                   stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
    2345                 :            :                       (SLP_TREE_SCALAR_STMTS (load_node)[0]);
    2346                 :            :                   /* Use SLP for strided accesses (or if we can't load-lanes).  */
    2347                 :          0 :                   if (STMT_VINFO_STRIDED_P (stmt_vinfo)
    2348                 :          0 :                       || ! vect_load_lanes_supported
    2349                 :          0 :                       (STMT_VINFO_VECTYPE (stmt_vinfo),
    2350                 :          0 :                        DR_GROUP_SIZE (stmt_vinfo), false))
    2351                 :            :                     break;
    2352                 :            :                 }
    2353                 :          0 :               if (i == SLP_INSTANCE_LOADS (new_instance).length ())
    2354                 :            :                 {
    2355                 :          0 :                   if (dump_enabled_p ())
    2356                 :          0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2357                 :            :                                      "Built SLP cancelled: can use "
    2358                 :            :                                      "load/store-lanes\n");
    2359                 :          0 :                   vect_free_slp_instance (new_instance, false);
    2360                 :          0 :                   return false;
    2361                 :            :                 }
    2362                 :            :             }
    2363                 :            : 
    2364                 :            :           /* If this is a reduction chain with a conversion in front
    2365                 :            :              amend the SLP tree with a node for that.  */
    2366                 :      73224 :           if (!dr
    2367                 :        972 :               && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
    2368                 :      73367 :               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
    2369                 :            :             {
    2370                 :            :               /* Get at the conversion stmt - we know it's the single use
    2371                 :            :                  of the last stmt of the reduction chain.  */
    2372                 :         15 :               gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
    2373                 :         15 :               use_operand_p use_p;
    2374                 :         15 :               gimple *use_stmt;
    2375                 :         15 :               bool r = single_imm_use (gimple_assign_lhs (tem),
    2376                 :            :                                        &use_p, &use_stmt);
    2377                 :         15 :               gcc_assert (r);
    2378                 :         15 :               next_info = vinfo->lookup_stmt (use_stmt);
    2379                 :         15 :               next_info = vect_stmt_to_vectorize (next_info);
    2380                 :         15 :               scalar_stmts = vNULL;
    2381                 :         15 :               scalar_stmts.create (group_size);
    2382                 :         51 :               for (unsigned i = 0; i < group_size; ++i)
    2383                 :         36 :                 scalar_stmts.quick_push (next_info);
    2384                 :         15 :               slp_tree conv = vect_create_new_slp_node (scalar_stmts);
    2385                 :         15 :               SLP_TREE_CHILDREN (conv).quick_push (node);
    2386                 :         15 :               SLP_INSTANCE_TREE (new_instance) = conv;
    2387                 :            :               /* We also have to fake this conversion stmt as SLP reduction
    2388                 :            :                  group so we don't have to mess with too much code
    2389                 :            :                  elsewhere.  */
    2390                 :         15 :               REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
    2391                 :         15 :               REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
    2392                 :            :             }
    2393                 :            : 
    2394                 :      73224 :           vinfo->slp_instances.safe_push (new_instance);
    2395                 :            : 
    2396                 :      73224 :           if (dump_enabled_p ())
    2397                 :            :             {
    2398                 :       1422 :               dump_printf_loc (MSG_NOTE, vect_location,
    2399                 :            :                                "Final SLP tree for instance:\n");
    2400                 :       1422 :               vect_print_slp_tree (MSG_NOTE, vect_location,
    2401                 :            :                                    SLP_INSTANCE_TREE (new_instance));
    2402                 :            :             }
    2403                 :            : 
    2404                 :      73224 :           return true;
    2405                 :            :         }
    2406                 :            :     }
    2407                 :            :   else
    2408                 :            :     {
    2409                 :            :       /* Failed to SLP.  */
    2410                 :            :       /* Free the allocated memory.  */
    2411                 :      30971 :       scalar_stmts.release ();
    2412                 :            :     }
    2413                 :            : 
    2414                 :            :   /* For basic block SLP, try to break the group up into multiples of the
    2415                 :            :      vector size.  */
    2416                 :      39546 :   unsigned HOST_WIDE_INT const_nunits;
    2417                 :      39546 :   if (is_a <bb_vec_info> (vinfo)
    2418                 :      37045 :       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
    2419                 :            :       && DR_GROUP_FIRST_ELEMENT (stmt_info)
    2420                 :      74415 :       && nunits.is_constant (&const_nunits))
    2421                 :            :     {
    2422                 :            :       /* We consider breaking the group only on VF boundaries from the existing
    2423                 :            :          start.  */
    2424                 :     110087 :       for (i = 0; i < group_size; i++)
    2425                 :     109694 :         if (!matches[i]) break;
    2426                 :            : 
    2427                 :      34869 :       if (i >= const_nunits && i < group_size)
    2428                 :            :         {
    2429                 :            :           /* Split into two groups at the first vector boundary before i.  */
    2430                 :      15578 :           gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
    2431                 :      15578 :           unsigned group1_size = i & ~(const_nunits - 1);
    2432                 :            : 
    2433                 :      15578 :           stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
    2434                 :            :                                                            group1_size);
    2435                 :      15578 :           bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
    2436                 :            :                                                 max_tree_size);
    2437                 :            :           /* If the first non-match was in the middle of a vector,
    2438                 :            :              skip the rest of that vector.  */
    2439                 :      15578 :           if (group1_size < i)
    2440                 :            :             {
    2441                 :       1070 :               i = group1_size + const_nunits;
    2442                 :       1070 :               if (i < group_size)
    2443                 :        470 :                 rest = vect_split_slp_store_group (rest, const_nunits);
    2444                 :            :             }
    2445                 :      15578 :           if (i < group_size)
    2446                 :      14978 :             res |= vect_analyze_slp_instance (vinfo, bst_map,
    2447                 :            :                                               rest, max_tree_size);
    2448                 :      15578 :           return res;
    2449                 :            :         }
    2450                 :            :       /* Even though the first vector did not all match, we might be able to SLP
    2451                 :            :          (some) of the remainder.  FORNOW ignore this possibility.  */
    2452                 :            :     }
    2453                 :            : 
    2454                 :            :   return false;
    2455                 :            : }
    2456                 :            : 
    2457                 :            : 
    2458                 :            : /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
    2459                 :            :    trees of packed scalar stmts if SLP is possible.  */
    2460                 :            : 
    2461                 :            : opt_result
    2462                 :      75917 : vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
    2463                 :            : {
    2464                 :      75917 :   unsigned int i;
    2465                 :      75917 :   stmt_vec_info first_element;
    2466                 :            : 
    2467                 :      75917 :   DUMP_VECT_SCOPE ("vect_analyze_slp");
    2468                 :            : 
    2469                 :      75917 :   scalar_stmts_to_slp_tree_map_t *bst_map
    2470                 :      75917 :     = new scalar_stmts_to_slp_tree_map_t ();
    2471                 :            : 
    2472                 :            :   /* Find SLP sequences starting from groups of grouped stores.  */
    2473                 :     161812 :   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
    2474                 :      85895 :     vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
    2475                 :            : 
    2476                 :      75917 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    2477                 :            :     {
    2478                 :      33616 :       if (loop_vinfo->reduction_chains.length () > 0)
    2479                 :            :         {
    2480                 :            :           /* Find SLP sequences starting from reduction chains.  */
    2481                 :        402 :           FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
    2482                 :        203 :             if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
    2483                 :            :                                              max_tree_size))
    2484                 :            :               {
    2485                 :            :                 /* Dissolve reduction chain group.  */
    2486                 :         60 :                 stmt_vec_info vinfo = first_element;
    2487                 :         60 :                 stmt_vec_info last = NULL;
    2488                 :        254 :                 while (vinfo)
    2489                 :            :                   {
    2490                 :        194 :                     stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
    2491                 :        194 :                     REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
    2492                 :        194 :                     REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
    2493                 :        194 :                     last = vinfo;
    2494                 :        194 :                     vinfo = next;
    2495                 :            :                   }
    2496                 :         60 :                 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
    2497                 :            :                 /* It can be still vectorized as part of an SLP reduction.  */
    2498                 :         60 :                 loop_vinfo->reductions.safe_push (last);
    2499                 :            :               }
    2500                 :            :         }
    2501                 :            : 
    2502                 :            :       /* Find SLP sequences starting from groups of reductions.  */
    2503                 :      33616 :       if (loop_vinfo->reductions.length () > 1)
    2504                 :       1665 :         vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
    2505                 :            :                                    max_tree_size);
    2506                 :            :     }
    2507                 :            : 
    2508                 :            :   /* The map keeps a reference on SLP nodes built, release that.  */
    2509                 :     492796 :   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
    2510                 :     246398 :        it != bst_map->end (); ++it)
    2511                 :     170481 :     if ((*it).second)
    2512                 :     114788 :       vect_free_slp_tree ((*it).second, false);
    2513                 :      75917 :   delete bst_map;
    2514                 :            : 
    2515                 :      75917 :   return opt_result::success ();
    2516                 :            : }
    2517                 :            : 
    2518                 :            : 
    2519                 :            : /* For each possible SLP instance decide whether to SLP it and calculate overall
    2520                 :            :    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
    2521                 :            :    least one instance.  */
    2522                 :            : 
    2523                 :            : bool
    2524                 :      33616 : vect_make_slp_decision (loop_vec_info loop_vinfo)
    2525                 :            : {
    2526                 :      33616 :   unsigned int i;
    2527                 :      33616 :   poly_uint64 unrolling_factor = 1;
    2528                 :      33616 :   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
    2529                 :      33616 :   slp_instance instance;
    2530                 :      33616 :   int decided_to_slp = 0;
    2531                 :            : 
    2532                 :      33616 :   DUMP_VECT_SCOPE ("vect_make_slp_decision");
    2533                 :            : 
    2534                 :      36824 :   FOR_EACH_VEC_ELT (slp_instances, i, instance)
    2535                 :            :     {
    2536                 :            :       /* FORNOW: SLP if you can.  */
    2537                 :            :       /* All unroll factors have the form:
    2538                 :            : 
    2539                 :            :            GET_MODE_SIZE (vinfo->vector_mode) * X
    2540                 :            : 
    2541                 :            :          for some rational X, so they must have a common multiple.  */
    2542                 :       3208 :       unrolling_factor
    2543                 :            :         = force_common_multiple (unrolling_factor,
    2544                 :       3208 :                                  SLP_INSTANCE_UNROLLING_FACTOR (instance));
    2545                 :            : 
    2546                 :            :       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
    2547                 :            :          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
    2548                 :            :          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
    2549                 :       3208 :       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
    2550                 :       3208 :       decided_to_slp++;
    2551                 :            :     }
    2552                 :            : 
    2553                 :      33616 :   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
    2554                 :            : 
    2555                 :      33616 :   if (decided_to_slp && dump_enabled_p ())
    2556                 :            :     {
    2557                 :       1022 :       dump_printf_loc (MSG_NOTE, vect_location,
    2558                 :            :                        "Decided to SLP %d instances. Unrolling factor ",
    2559                 :            :                        decided_to_slp);
    2560                 :       1022 :       dump_dec (MSG_NOTE, unrolling_factor);
    2561                 :       1022 :       dump_printf (MSG_NOTE, "\n");
    2562                 :            :     }
    2563                 :            : 
    2564                 :      33616 :   return (decided_to_slp > 0);
    2565                 :            : }
    2566                 :            : 
    2567                 :            : 
    2568                 :            : /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
    2569                 :            :    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
    2570                 :            : 
    2571                 :            : static void
    2572                 :      52141 : vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
    2573                 :            :                               hash_map<slp_tree, unsigned> &visited)
    2574                 :            : {
    2575                 :      52141 :   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
    2576                 :      52141 :   imm_use_iterator imm_iter;
    2577                 :      52141 :   gimple *use_stmt;
    2578                 :      52141 :   stmt_vec_info use_vinfo;
    2579                 :      52141 :   slp_tree child;
    2580                 :      52141 :   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
    2581                 :      52141 :   int j;
    2582                 :            : 
    2583                 :            :   /* We need to union stype over the incoming graph edges but we still
    2584                 :            :      want to limit recursion to stay O(N+E).  */
    2585                 :      52141 :   unsigned visited_cnt = ++visited.get_or_insert (node);
    2586                 :      52141 :   gcc_assert (visited_cnt <= node->refcnt);
    2587                 :      52141 :   bool only_edge = (visited_cnt != node->refcnt);
    2588                 :            : 
    2589                 :            :   /* Propagate hybrid down the SLP tree.  */
    2590                 :      52141 :   if (stype == hybrid)
    2591                 :            :     ;
    2592                 :      51869 :   else if (HYBRID_SLP_STMT (stmt_vinfo))
    2593                 :            :     stype = hybrid;
    2594                 :      51790 :   else if (!only_edge)
    2595                 :            :     {
    2596                 :            :       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
    2597                 :      51378 :       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
    2598                 :            :       /* If we get a pattern stmt here we have to use the LHS of the
    2599                 :            :          original stmt for immediate uses.  */
    2600                 :      51378 :       gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
    2601                 :      51378 :       tree def;
    2602                 :      51378 :       if (gimple_code (stmt) == GIMPLE_PHI)
    2603                 :       3710 :         def = gimple_phi_result (stmt);
    2604                 :            :       else
    2605                 :      47668 :         def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
    2606                 :      51378 :       if (def)
    2607                 :     107049 :         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
    2608                 :            :           {
    2609                 :      70224 :             use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
    2610                 :      70224 :             if (!use_vinfo)
    2611                 :       1381 :               continue;
    2612                 :      68843 :             use_vinfo = vect_stmt_to_vectorize (use_vinfo);
    2613                 :      68843 :             if (!STMT_SLP_TYPE (use_vinfo)
    2614                 :       7325 :                 && (STMT_VINFO_RELEVANT (use_vinfo)
    2615                 :       7126 :                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
    2616                 :      69042 :                 && !(gimple_code (use_stmt) == GIMPLE_PHI
    2617                 :          0 :                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
    2618                 :            :               {
    2619                 :        199 :                 if (dump_enabled_p ())
    2620                 :        173 :                   dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
    2621                 :            :                                    "def in non-SLP stmt: %G", use_stmt);
    2622                 :            :                 stype = hybrid;
    2623                 :            :               }
    2624                 :            :           }
    2625                 :            :     }
    2626                 :            : 
    2627                 :      52062 :   if (stype == hybrid
    2628                 :        535 :       && !HYBRID_SLP_STMT (stmt_vinfo))
    2629                 :            :     {
    2630                 :        370 :       if (dump_enabled_p ())
    2631                 :        311 :         dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
    2632                 :            :                          stmt_vinfo->stmt);
    2633                 :        370 :       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
    2634                 :            :     }
    2635                 :            : 
    2636                 :      52141 :   if (!only_edge)
    2637                 :     102649 :     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
    2638                 :      50961 :       if (SLP_TREE_DEF_TYPE (child) != vect_external_def
    2639                 :      50961 :           && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
    2640                 :      36168 :         vect_detect_hybrid_slp_stmts (child, i, stype, visited);
    2641                 :      52141 : }
    2642                 :            : 
    2643                 :            : /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
    2644                 :            : 
    2645                 :            : static tree
    2646                 :        187 : vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
    2647                 :            : {
    2648                 :        187 :   walk_stmt_info *wi = (walk_stmt_info *)data;
    2649                 :        187 :   loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
    2650                 :            : 
    2651                 :        187 :   if (wi->is_lhs)
    2652                 :            :     return NULL_TREE;
    2653                 :            : 
    2654                 :        124 :   stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
    2655                 :        124 :   if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
    2656                 :            :     {
    2657                 :          3 :       if (dump_enabled_p ())
    2658                 :          2 :         dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
    2659                 :            :                          def_stmt_info->stmt);
    2660                 :          3 :       STMT_SLP_TYPE (def_stmt_info) = hybrid;
    2661                 :            :     }
    2662                 :            : 
    2663                 :            :   return NULL_TREE;
    2664                 :            : }
    2665                 :            : 
    2666                 :            : static tree
    2667                 :       9167 : vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
    2668                 :            :                           walk_stmt_info *wi)
    2669                 :            : {
    2670                 :       9167 :   loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
    2671                 :       9167 :   stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
    2672                 :            :   /* If the stmt is in a SLP instance then this isn't a reason
    2673                 :            :      to mark use definitions in other SLP instances as hybrid.  */
    2674                 :       9167 :   if (! STMT_SLP_TYPE (use_vinfo)
    2675                 :       5328 :       && (STMT_VINFO_RELEVANT (use_vinfo)
    2676                 :       5265 :           || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
    2677                 :       9230 :       && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
    2678                 :          0 :             && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
    2679                 :            :     ;
    2680                 :            :   else
    2681                 :       9104 :     *handled = true;
    2682                 :       9167 :   return NULL_TREE;
    2683                 :            : }
    2684                 :            : 
    2685                 :            : /* Find stmts that must be both vectorized and SLPed.  */
    2686                 :            : 
    2687                 :            : void
    2688                 :       2698 : vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
    2689                 :            : {
    2690                 :       2698 :   unsigned int i;
    2691                 :       2698 :   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
    2692                 :       2698 :   slp_instance instance;
    2693                 :            : 
    2694                 :       2698 :   DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
    2695                 :            : 
    2696                 :            :   /* First walk all pattern stmt in the loop and mark defs of uses as
    2697                 :            :      hybrid because immediate uses in them are not recorded.  */
    2698                 :       8106 :   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
    2699                 :            :     {
    2700                 :       5408 :       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
    2701                 :      90470 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    2702                 :      79654 :            gsi_next (&gsi))
    2703                 :            :         {
    2704                 :      79654 :           gimple *stmt = gsi_stmt (gsi);
    2705                 :      79654 :           stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
    2706                 :      79654 :           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    2707                 :            :             {
    2708                 :       4010 :               walk_stmt_info wi;
    2709                 :       4010 :               memset (&wi, 0, sizeof (wi));
    2710                 :       4010 :               wi.info = loop_vinfo;
    2711                 :       4010 :               gimple_stmt_iterator gsi2
    2712                 :       4010 :                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
    2713                 :       4010 :               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
    2714                 :            :                                 vect_detect_hybrid_slp_1, &wi);
    2715                 :       4010 :               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
    2716                 :            :                                vect_detect_hybrid_slp_2,
    2717                 :            :                                vect_detect_hybrid_slp_1, &wi);
    2718                 :            :             }
    2719                 :            :         }
    2720                 :            :     }
    2721                 :            : 
    2722                 :            :   /* Then walk the SLP instance trees marking stmts with uses in
    2723                 :            :      non-SLP stmts as hybrid, also propagating hybrid down the
    2724                 :            :      SLP tree, collecting the above info on-the-fly.  */
    2725                 :      14362 :   for (unsigned j = 0;; ++j)
    2726                 :            :     {
    2727                 :      17060 :       hash_map<slp_tree, unsigned> visited;
    2728                 :      17060 :       bool any = false;
    2729                 :      36679 :       FOR_EACH_VEC_ELT (slp_instances, i, instance)
    2730                 :      19619 :         if (j < SLP_INSTANCE_GROUP_SIZE (instance))
    2731                 :            :           {
    2732                 :      15973 :             any = true;
    2733                 :      15973 :             vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
    2734                 :            :                                           j, pure_slp, visited);
    2735                 :            :           }
    2736                 :      17060 :       if (!any)
    2737                 :            :         break;
    2738                 :      14362 :     }
    2739                 :       2698 : }
    2740                 :            : 
    2741                 :            : 
    2742                 :            : /* Initialize a bb_vec_info struct for the statements between
    2743                 :            :    REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive).  */
    2744                 :            : 
    2745                 :     679909 : _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
    2746                 :            :                             gimple_stmt_iterator region_end_in,
    2747                 :     679909 :                             vec_info_shared *shared)
    2748                 :            :   : vec_info (vec_info::bb, init_cost (NULL), shared),
    2749                 :     679909 :     bb (gsi_bb (region_begin_in)),
    2750                 :            :     region_begin (region_begin_in),
    2751                 :     679909 :     region_end (region_end_in)
    2752                 :            : {
    2753                 :     679909 :   gimple_stmt_iterator gsi;
    2754                 :            : 
    2755                 :    5370700 :   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
    2756                 :    4690800 :        gsi_next (&gsi))
    2757                 :            :     {
    2758                 :    4690800 :       gimple *stmt = gsi_stmt (gsi);
    2759                 :    4690800 :       gimple_set_uid (stmt, 0);
    2760                 :    4690800 :       add_stmt (stmt);
    2761                 :            :     }
    2762                 :            : 
    2763                 :     679909 :   bb->aux = this;
    2764                 :     679909 : }
    2765                 :            : 
    2766                 :            : 
    2767                 :            : /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
    2768                 :            :    stmts in the basic block.  */
    2769                 :            : 
    2770                 :    2039730 : _bb_vec_info::~_bb_vec_info ()
    2771                 :            : {
    2772                 :     679909 :   for (gimple_stmt_iterator si = region_begin;
    2773                 :    5346630 :        gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
    2774                 :            :     /* Reset region marker.  */
    2775                 :    4666720 :     gimple_set_uid (gsi_stmt (si), -1);
    2776                 :            : 
    2777                 :     679909 :   bb->aux = NULL;
    2778                 :     679909 : }
    2779                 :            : 
    2780                 :            : /* Subroutine of vect_slp_analyze_node_operations.  Handle the root of NODE,
    2781                 :            :    given then that child nodes have already been processed, and that
    2782                 :            :    their def types currently match their SLP node's def type.  */
    2783                 :            : 
    2784                 :            : static bool
    2785                 :      87195 : vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
    2786                 :            :                                     slp_instance node_instance,
    2787                 :            :                                     stmt_vector_for_cost *cost_vec)
    2788                 :            : {
    2789                 :      87195 :   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    2790                 :      87195 :   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
    2791                 :            : 
    2792                 :            :   /* Calculate the number of vector statements to be created for the
    2793                 :            :      scalar stmts in this node.  For SLP reductions it is equal to the
    2794                 :            :      number of vector statements in the children (which has already been
    2795                 :            :      calculated by the recursive call).  Otherwise it is the number of
    2796                 :            :      scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
    2797                 :            :      VF divided by the number of elements in a vector.  */
    2798                 :      71265 :   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
    2799                 :      87195 :       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
    2800                 :            :     {
    2801                 :        308 :       for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
    2802                 :        154 :         if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
    2803                 :            :           {
    2804                 :        147 :             SLP_TREE_NUMBER_OF_VEC_STMTS (node)
    2805                 :        147 :               = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
    2806                 :        147 :             break;
    2807                 :            :           }
    2808                 :            :     }
    2809                 :            :   else
    2810                 :            :     {
    2811                 :      87048 :       poly_uint64 vf;
    2812                 :      87048 :       if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    2813                 :      11873 :         vf = loop_vinfo->vectorization_factor;
    2814                 :            :       else
    2815                 :      87048 :         vf = 1;
    2816                 :      87048 :       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
    2817                 :      87048 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2818                 :      87048 :       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
    2819                 :      87048 :         = vect_get_num_vectors (vf * group_size, vectype);
    2820                 :            :     }
    2821                 :            : 
    2822                 :      87195 :   bool dummy;
    2823                 :      87195 :   return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
    2824                 :            : }
    2825                 :            : 
    2826                 :            : /* Try to build NODE from scalars, returning true on success.
    2827                 :            :    NODE_INSTANCE is the SLP instance that contains NODE.  */
    2828                 :            : 
    2829                 :            : static bool
    2830                 :       8911 : vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
    2831                 :            :                               slp_instance node_instance)
    2832                 :            : {
    2833                 :       8911 :   stmt_vec_info stmt_info;
    2834                 :       8911 :   unsigned int i;
    2835                 :            : 
    2836                 :       8911 :   if (!is_a <bb_vec_info> (vinfo)
    2837                 :       8721 :       || node == SLP_INSTANCE_TREE (node_instance)
    2838                 :      16308 :       || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
    2839                 :            :     return false;
    2840                 :            : 
    2841                 :       7328 :   if (dump_enabled_p ())
    2842                 :         29 :     dump_printf_loc (MSG_NOTE, vect_location,
    2843                 :            :                      "Building vector operands from scalars instead\n");
    2844                 :            : 
    2845                 :            :   /* Don't remove and free the child nodes here, since they could be
    2846                 :            :      referenced by other structures.  The analysis and scheduling phases
    2847                 :            :      (need to) ignore child nodes of anything that isn't vect_internal_def.  */
    2848                 :       7328 :   unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
    2849                 :       7328 :   SLP_TREE_DEF_TYPE (node) = vect_external_def;
    2850                 :       7328 :   SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
    2851                 :      16300 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    2852                 :            :     {
    2853                 :       8972 :       tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
    2854                 :       8972 :       SLP_TREE_SCALAR_OPS (node)[i] = lhs;
    2855                 :            :     }
    2856                 :            :   return true;
    2857                 :            : }
    2858                 :            : 
    2859                 :            : /* Analyze statements contained in SLP tree NODE after recursively analyzing
    2860                 :            :    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
    2861                 :            : 
    2862                 :            :    Return true if the operations are supported.  */
    2863                 :            : 
    2864                 :            : static bool
    2865                 :     155134 : vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
    2866                 :            :                                   slp_instance node_instance,
    2867                 :            :                                   hash_set<slp_tree> &visited,
    2868                 :            :                                   hash_set<slp_tree> &lvisited,
    2869                 :            :                                   stmt_vector_for_cost *cost_vec)
    2870                 :            : {
    2871                 :     155134 :   int i, j;
    2872                 :     155134 :   slp_tree child;
    2873                 :            : 
    2874                 :     155134 :   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    2875                 :            :     return true;
    2876                 :            : 
    2877                 :            :   /* If we already analyzed the exact same set of scalar stmts we're done.
    2878                 :            :      We share the generated vector stmts for those.
    2879                 :            :      The SLP graph is acyclic so not caching whether we failed or succeeded
    2880                 :            :      doesn't result in any issue since we throw away the lvisited set
    2881                 :            :      when we fail.  */
    2882                 :      88213 :   if (visited.contains (node)
    2883                 :      88213 :       || lvisited.add (node))
    2884                 :        529 :     return true;
    2885                 :            : 
    2886                 :     178407 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    2887                 :      91212 :     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
    2888                 :            :                                            visited, lvisited, cost_vec))
    2889                 :            :       return false;
    2890                 :            : 
    2891                 :            :   /* ???  We have to catch the case late where two first scalar stmts appear
    2892                 :            :      in multiple SLP children with different def type and fail.  Remember
    2893                 :            :      original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
    2894                 :            :      match it when that is vect_internal_def.  */
    2895                 :      87195 :   auto_vec<vect_def_type, 4> dt;
    2896                 :      87195 :   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
    2897                 :     264105 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
    2898                 :      90675 :     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
    2899                 :      27363 :       dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
    2900                 :            : 
    2901                 :            :   /* Push SLP node def-type to stmt operands.  */
    2902                 :     177870 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
    2903                 :      90675 :     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
    2904                 :      90675 :         && SLP_TREE_SCALAR_STMTS (child).length () != 0)
    2905                 :      10919 :       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
    2906                 :      10919 :         = SLP_TREE_DEF_TYPE (child);
    2907                 :            : 
    2908                 :            :   /* Check everything worked out.  */
    2909                 :            :   bool res = true;
    2910                 :     177870 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
    2911                 :      90675 :       if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
    2912                 :            :         {
    2913                 :      27363 :           if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
    2914                 :            :             {
    2915                 :      10919 :               if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
    2916                 :            :                   != SLP_TREE_DEF_TYPE (child))
    2917                 :          0 :                 res = false;
    2918                 :            :             }
    2919                 :      16444 :           else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
    2920                 :      16444 :                    != dt[j])
    2921                 :          0 :             res = false;
    2922                 :            :         }
    2923                 :      87195 :   if (!res && dump_enabled_p ())
    2924                 :          0 :     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2925                 :            :                      "not vectorized: same operand with different "
    2926                 :            :                      "def type in stmt.\n");
    2927                 :            : 
    2928                 :      87195 :   if (res)
    2929                 :      87195 :     res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
    2930                 :            :                                               cost_vec);
    2931                 :            : 
    2932                 :            :   /* Restore def-types.  */
    2933                 :     177870 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
    2934                 :      90675 :     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
    2935                 :      27363 :       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
    2936                 :            : 
    2937                 :            :   /* If this node can't be vectorized, try pruning the tree here rather
    2938                 :            :      than felling the whole thing.  */
    2939                 :      87195 :   if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
    2940                 :            :     res = true;
    2941                 :            : 
    2942                 :      87195 :   return res;
    2943                 :            : }
    2944                 :            : 
    2945                 :            : 
    2946                 :            : /* Analyze statements in SLP instances of VINFO.  Return true if the
    2947                 :            :    operations are supported. */
    2948                 :            : 
    2949                 :            : bool
    2950                 :      34637 : vect_slp_analyze_operations (vec_info *vinfo)
    2951                 :            : {
    2952                 :      34637 :   slp_instance instance;
    2953                 :      34637 :   int i;
    2954                 :            : 
    2955                 :      34637 :   DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
    2956                 :            : 
    2957                 :      69274 :   hash_set<slp_tree> visited;
    2958                 :      98559 :   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
    2959                 :            :     {
    2960                 :     127844 :       hash_set<slp_tree> lvisited;
    2961                 :      63922 :       stmt_vector_for_cost cost_vec;
    2962                 :      63922 :       cost_vec.create (2);
    2963                 :      63922 :       if (!vect_slp_analyze_node_operations (vinfo,
    2964                 :            :                                              SLP_INSTANCE_TREE (instance),
    2965                 :            :                                              instance, visited, lvisited,
    2966                 :            :                                              &cost_vec)
    2967                 :            :           /* Instances with a root stmt require vectorized defs for the
    2968                 :            :              SLP tree root.  */
    2969                 :      63922 :           || (SLP_INSTANCE_ROOT_STMT (instance)
    2970                 :        321 :               && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
    2971                 :            :                   != vect_internal_def)))
    2972                 :            :         {
    2973                 :       1583 :           slp_tree node = SLP_INSTANCE_TREE (instance);
    2974                 :       1583 :           stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    2975                 :       1583 :           if (dump_enabled_p ())
    2976                 :         60 :             dump_printf_loc (MSG_NOTE, vect_location,
    2977                 :            :                              "removing SLP instance operations starting from: %G",
    2978                 :            :                              stmt_info->stmt);
    2979                 :       1583 :           vect_free_slp_instance (instance, false);
    2980                 :       1583 :           vinfo->slp_instances.ordered_remove (i);
    2981                 :       1583 :           cost_vec.release ();
    2982                 :            :         }
    2983                 :            :       else
    2984                 :            :         {
    2985                 :     206705 :           for (hash_set<slp_tree>::iterator x = lvisited.begin();
    2986                 :     144366 :                x != lvisited.end(); ++x)
    2987                 :      82027 :             visited.add (*x);
    2988                 :      62339 :           i++;
    2989                 :            : 
    2990                 :      62339 :           add_stmt_costs (vinfo->target_cost_data, &cost_vec);
    2991                 :     126261 :           cost_vec.release ();
    2992                 :            :         }
    2993                 :            :     }
    2994                 :            : 
    2995                 :      69274 :   return !vinfo->slp_instances.is_empty ();
    2996                 :            : }
    2997                 :            : 
    2998                 :            : 
    2999                 :            : /* Compute the scalar cost of the SLP node NODE and its children
    3000                 :            :    and return it.  Do not account defs that are marked in LIFE and
    3001                 :            :    update LIFE according to uses of NODE.  */
    3002                 :            : 
    3003                 :            : static void 
    3004                 :      61690 : vect_bb_slp_scalar_cost (basic_block bb,
    3005                 :            :                          slp_tree node, vec<bool, va_heap> *life,
    3006                 :            :                          stmt_vector_for_cost *cost_vec,
    3007                 :            :                          hash_set<slp_tree> &visited)
    3008                 :            : {
    3009                 :      61690 :   unsigned i;
    3010                 :      61690 :   stmt_vec_info stmt_info;
    3011                 :      61690 :   slp_tree child;
    3012                 :            : 
    3013                 :      61690 :   if (visited.add (node))
    3014                 :        231 :     return; 
    3015                 :            : 
    3016                 :     240196 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    3017                 :            :     {
    3018                 :     178737 :       gimple *stmt = stmt_info->stmt;
    3019                 :     178737 :       vec_info *vinfo = stmt_info->vinfo;
    3020                 :     178737 :       ssa_op_iter op_iter;
    3021                 :     178737 :       def_operand_p def_p;
    3022                 :            : 
    3023                 :     178737 :       if ((*life)[i])
    3024                 :       7416 :         continue;
    3025                 :            : 
    3026                 :            :       /* If there is a non-vectorized use of the defs then the scalar
    3027                 :            :          stmt is kept live in which case we do not account it or any
    3028                 :            :          required defs in the SLP children in the scalar cost.  This
    3029                 :            :          way we make the vectorization more costly when compared to
    3030                 :            :          the scalar cost.  */
    3031                 :     191673 :       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
    3032                 :            :         {
    3033                 :      14262 :           imm_use_iterator use_iter;
    3034                 :      14262 :           gimple *use_stmt;
    3035                 :      33362 :           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
    3036                 :      21350 :             if (!is_gimple_debug (use_stmt))
    3037                 :            :               {
    3038                 :      21177 :                 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    3039                 :      21177 :                 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
    3040                 :            :                   {
    3041                 :       2250 :                     (*life)[i] = true;
    3042                 :      16512 :                     BREAK_FROM_IMM_USE_STMT (use_iter);
    3043                 :            :                   }
    3044                 :            :               }
    3045                 :            :         }
    3046                 :     177411 :       if ((*life)[i])
    3047                 :       2250 :         continue;
    3048                 :            : 
    3049                 :            :       /* Count scalar stmts only once.  */
    3050                 :     175161 :       if (gimple_visited_p (stmt))
    3051                 :       3632 :         continue;
    3052                 :     171529 :       gimple_set_visited (stmt, true);
    3053                 :            : 
    3054                 :     171529 :       vect_cost_for_stmt kind;
    3055                 :     171529 :       if (STMT_VINFO_DATA_REF (stmt_info))
    3056                 :            :         {
    3057                 :     167144 :           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
    3058                 :            :             kind = scalar_load;
    3059                 :            :           else
    3060                 :     163149 :             kind = scalar_store;
    3061                 :            :         }
    3062                 :       4385 :       else if (vect_nop_conversion_p (stmt_info))
    3063                 :        208 :         continue;
    3064                 :            :       else
    3065                 :            :         kind = scalar_stmt;
    3066                 :     171321 :       record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
    3067                 :            :     }
    3068                 :            : 
    3069                 :     122918 :   auto_vec<bool, 20> subtree_life;
    3070                 :     184436 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    3071                 :            :     {
    3072                 :      61518 :       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
    3073                 :            :         {
    3074                 :            :           /* Do not directly pass LIFE to the recursive call, copy it to
    3075                 :            :              confine changes in the callee to the current child/subtree.  */
    3076                 :       3811 :           subtree_life.safe_splice (*life);
    3077                 :       3811 :           vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
    3078                 :            :                                    visited);
    3079                 :       3811 :           subtree_life.truncate (0);
    3080                 :            :         }
    3081                 :            :     }
    3082                 :            : }
    3083                 :            : 
    3084                 :            : /* Check if vectorization of the basic block is profitable.  */
    3085                 :            : 
    3086                 :            : static bool
    3087                 :      30425 : vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
    3088                 :            : {
    3089                 :      30425 :   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
    3090                 :      30425 :   slp_instance instance;
    3091                 :      30425 :   int i;
    3092                 :      30425 :   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
    3093                 :      30425 :   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
    3094                 :            : 
    3095                 :            :   /* Calculate scalar cost.  */
    3096                 :      30425 :   stmt_vector_for_cost scalar_costs;
    3097                 :      30425 :   scalar_costs.create (0);
    3098                 :      30425 :   hash_set<slp_tree> visited;
    3099                 :      88304 :   FOR_EACH_VEC_ELT (slp_instances, i, instance)
    3100                 :            :     {
    3101                 :     115758 :       auto_vec<bool, 20> life;
    3102                 :      57879 :       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
    3103                 :      57879 :       vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
    3104                 :            :                                SLP_INSTANCE_TREE (instance),
    3105                 :            :                                &life, &scalar_costs, visited);
    3106                 :            :     }
    3107                 :      30425 :   void *target_cost_data = init_cost (NULL);
    3108                 :      30425 :   add_stmt_costs (target_cost_data, &scalar_costs);
    3109                 :      30425 :   scalar_costs.release ();
    3110                 :      30425 :   unsigned dummy;
    3111                 :      30425 :   finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
    3112                 :      30425 :   destroy_cost_data (target_cost_data);
    3113                 :            : 
    3114                 :            :   /* Unset visited flag.  */
    3115                 :      30425 :   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
    3116                 :     544390 :        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
    3117                 :     513965 :     gimple_set_visited  (gsi_stmt (gsi), false);
    3118                 :            : 
    3119                 :            :   /* Complete the target-specific cost calculation.  */
    3120                 :      30425 :   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
    3121                 :            :                &vec_inside_cost, &vec_epilogue_cost);
    3122                 :            : 
    3123                 :      30425 :   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
    3124                 :            : 
    3125                 :      30425 :   if (dump_enabled_p ())
    3126                 :            :     {
    3127                 :         10 :       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
    3128                 :         10 :       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
    3129                 :            :                    vec_inside_cost);
    3130                 :         10 :       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
    3131                 :         10 :       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
    3132                 :         10 :       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
    3133                 :            :     }
    3134                 :            : 
    3135                 :            :   /* Vectorization is profitable if its cost is more than the cost of scalar
    3136                 :            :      version.  Note that we err on the vector side for equal cost because
    3137                 :            :      the cost estimate is otherwise quite pessimistic (constant uses are
    3138                 :            :      free on the scalar side but cost a load on the vector side for
    3139                 :            :      example).  */
    3140                 :      30425 :   if (vec_outside_cost + vec_inside_cost > scalar_cost)
    3141                 :       3832 :     return false;
    3142                 :            : 
    3143                 :            :   return true;
    3144                 :            : }
    3145                 :            : 
    3146                 :            : /* Find any vectorizable constructors and add them to the grouped_store
    3147                 :            :    array.  */
    3148                 :            : 
    3149                 :            : static void
    3150                 :     160233 : vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
    3151                 :            : {
    3152                 :     160233 :   gimple_stmt_iterator gsi;
    3153                 :            : 
    3154                 :     160233 :   for (gsi = bb_vinfo->region_begin;
    3155                 :    2933740 :        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
    3156                 :            :     {
    3157                 :    2773510 :       gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
    3158                 :    3104440 :       if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
    3159                 :    2769530 :         continue;
    3160                 :            : 
    3161                 :      41083 :       tree rhs = gimple_assign_rhs1 (stmt);
    3162                 :      41083 :       if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
    3163                 :       6372 :           || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
    3164                 :      12239 :                        CONSTRUCTOR_NELTS (rhs))
    3165                 :       5557 :           || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
    3166                 :      46640 :           || uniform_vector_p (rhs))
    3167                 :      37104 :         continue;
    3168                 :            : 
    3169                 :       3979 :       stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
    3170                 :       3979 :       BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
    3171                 :            :     }
    3172                 :     160233 : }
    3173                 :            : 
    3174                 :            : /* Check if the region described by BB_VINFO can be vectorized, returning
    3175                 :            :    true if so.  When returning false, set FATAL to true if the same failure
    3176                 :            :    would prevent vectorization at other vector sizes, false if it is still
    3177                 :            :    worth trying other sizes.  N_STMTS is the number of statements in the
    3178                 :            :    region.  */
    3179                 :            : 
    3180                 :            : static bool
    3181                 :     679909 : vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
    3182                 :            : {
    3183                 :     679909 :   DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
    3184                 :            : 
    3185                 :     679909 :   slp_instance instance;
    3186                 :     679909 :   int i;
    3187                 :     679909 :   poly_uint64 min_vf = 2;
    3188                 :            : 
    3189                 :            :   /* The first group of checks is independent of the vector size.  */
    3190                 :     679909 :   fatal = true;
    3191                 :            : 
    3192                 :            :   /* Analyze the data references.  */
    3193                 :            : 
    3194                 :     679909 :   if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
    3195                 :            :     {
    3196                 :          0 :       if (dump_enabled_p ())
    3197                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3198                 :            :                          "not vectorized: unhandled data-ref in basic "
    3199                 :            :                          "block.\n");
    3200                 :          0 :       return false;
    3201                 :            :     }
    3202                 :            : 
    3203                 :     679909 :   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
    3204                 :            :     {
    3205                 :     519676 :       if (dump_enabled_p ())
    3206                 :       2218 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3207                 :            :                          "not vectorized: not enough data-refs in "
    3208                 :            :                          "basic block.\n");
    3209                 :     519676 :       return false;
    3210                 :            :     }
    3211                 :            : 
    3212                 :     160233 :   if (!vect_analyze_data_ref_accesses (bb_vinfo))
    3213                 :            :     {
    3214                 :          0 :      if (dump_enabled_p ())
    3215                 :          0 :        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3216                 :            :                         "not vectorized: unhandled data access in "
    3217                 :            :                         "basic block.\n");
    3218                 :          0 :       return false;
    3219                 :            :     }
    3220                 :            : 
    3221                 :     160233 :   vect_slp_check_for_constructors (bb_vinfo);
    3222                 :            : 
    3223                 :            :   /* If there are no grouped stores in the region there is no need
    3224                 :            :      to continue with pattern recog as vect_analyze_slp will fail
    3225                 :            :      anyway.  */
    3226                 :     160233 :   if (bb_vinfo->grouped_stores.is_empty ())
    3227                 :            :     {
    3228                 :     117932 :       if (dump_enabled_p ())
    3229                 :        550 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3230                 :            :                          "not vectorized: no grouped stores in "
    3231                 :            :                          "basic block.\n");
    3232                 :     117932 :       return false;
    3233                 :            :     }
    3234                 :            : 
    3235                 :            :   /* While the rest of the analysis below depends on it in some way.  */
    3236                 :      42301 :   fatal = false;
    3237                 :            : 
    3238                 :      42301 :   vect_pattern_recog (bb_vinfo);
    3239                 :            : 
    3240                 :            :   /* Check the SLP opportunities in the basic block, analyze and build SLP
    3241                 :            :      trees.  */
    3242                 :      42301 :   if (!vect_analyze_slp (bb_vinfo, n_stmts))
    3243                 :            :     {
    3244                 :          0 :       if (dump_enabled_p ())
    3245                 :            :         {
    3246                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3247                 :            :                            "Failed to SLP the basic block.\n");
    3248                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
    3249                 :            :                            "not vectorized: failed to find SLP opportunities "
    3250                 :            :                            "in basic block.\n");
    3251                 :            :         }
    3252                 :          0 :       return false;
    3253                 :            :     }
    3254                 :            : 
    3255                 :      42301 :   vect_record_base_alignments (bb_vinfo);
    3256                 :            : 
    3257                 :            :   /* Analyze and verify the alignment of data references and the
    3258                 :            :      dependence in the SLP instances.  */
    3259                 :     112317 :   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
    3260                 :            :     {
    3261                 :      70016 :       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
    3262                 :      70016 :           || ! vect_slp_analyze_instance_dependence (instance))
    3263                 :            :         {
    3264                 :       9236 :           slp_tree node = SLP_INSTANCE_TREE (instance);
    3265                 :       9236 :           stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    3266                 :       9236 :           if (dump_enabled_p ())
    3267                 :          7 :             dump_printf_loc (MSG_NOTE, vect_location,
    3268                 :            :                              "removing SLP instance operations starting from: %G",
    3269                 :            :                              stmt_info->stmt);
    3270                 :       9236 :           vect_free_slp_instance (instance, false);
    3271                 :       9236 :           BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
    3272                 :       9236 :           continue;
    3273                 :            :         }
    3274                 :            : 
    3275                 :            :       /* Mark all the statements that we want to vectorize as pure SLP and
    3276                 :            :          relevant.  */
    3277                 :      60780 :       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
    3278                 :      60780 :       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
    3279                 :      60780 :       if (SLP_INSTANCE_ROOT_STMT (instance))
    3280                 :        323 :         STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
    3281                 :            : 
    3282                 :      60780 :       i++;
    3283                 :            :     }
    3284                 :      42301 :   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
    3285                 :            :     return false;
    3286                 :            : 
    3287                 :      31975 :   if (!vect_slp_analyze_operations (bb_vinfo))
    3288                 :            :     {
    3289                 :        453 :       if (dump_enabled_p ())
    3290                 :          0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3291                 :            :                          "not vectorized: bad operation in basic block.\n");
    3292                 :        453 :       return false;
    3293                 :            :     }
    3294                 :            : 
    3295                 :            :   /* Cost model: check if the vectorization is worthwhile.  */
    3296                 :      31522 :   if (!unlimited_cost_model (NULL)
    3297                 :      31522 :       && !vect_bb_vectorization_profitable_p (bb_vinfo))
    3298                 :            :     {
    3299                 :       3832 :       if (dump_enabled_p ())
    3300                 :          7 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3301                 :            :                          "not vectorized: vectorization is not "
    3302                 :            :                          "profitable.\n");
    3303                 :       3832 :       return false;
    3304                 :            :     }
    3305                 :            : 
    3306                 :      27690 :   if (dump_enabled_p ())
    3307                 :        235 :     dump_printf_loc (MSG_NOTE, vect_location,
    3308                 :            :                      "Basic block will be vectorized using SLP\n");
    3309                 :            :   return true;
    3310                 :            : }
    3311                 :            : 
    3312                 :            : /* Subroutine of vect_slp_bb.  Try to vectorize the statements between
    3313                 :            :    REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
    3314                 :            :    on success.  The region has N_STMTS statements and has the datarefs
    3315                 :            :    given by DATAREFS.  */
    3316                 :            : 
    3317                 :            : static bool
    3318                 :     671637 : vect_slp_bb_region (gimple_stmt_iterator region_begin,
    3319                 :            :                     gimple_stmt_iterator region_end,
    3320                 :            :                     vec<data_reference_p> datarefs,
    3321                 :            :                     unsigned int n_stmts)
    3322                 :            : {
    3323                 :     671637 :   bb_vec_info bb_vinfo;
    3324                 :     671637 :   auto_vector_modes vector_modes;
    3325                 :            : 
    3326                 :            :   /* Autodetect first vector size we try.  */
    3327                 :     671637 :   machine_mode next_vector_mode = VOIDmode;
    3328                 :     671637 :   targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
    3329                 :     671637 :   unsigned int mode_i = 0;
    3330                 :            : 
    3331                 :     671637 :   vec_info_shared shared;
    3332                 :            : 
    3333                 :     671637 :   machine_mode autodetected_vector_mode = VOIDmode;
    3334                 :     688181 :   while (1)
    3335                 :            :     {
    3336                 :     679909 :       bool vectorized = false;
    3337                 :     679909 :       bool fatal = false;
    3338                 :     679909 :       bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
    3339                 :            : 
    3340                 :     679909 :       bool first_time_p = shared.datarefs.is_empty ();
    3341                 :     679909 :       BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
    3342                 :     679909 :       if (first_time_p)
    3343                 :     671637 :         bb_vinfo->shared->save_datarefs ();
    3344                 :            :       else
    3345                 :       8272 :         bb_vinfo->shared->check_datarefs ();
    3346                 :     679909 :       bb_vinfo->vector_mode = next_vector_mode;
    3347                 :            : 
    3348                 :     679909 :       if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
    3349                 :     679909 :           && dbg_cnt (vect_slp))
    3350                 :            :         {
    3351                 :      27690 :           if (dump_enabled_p ())
    3352                 :            :             {
    3353                 :        470 :               dump_printf_loc (MSG_NOTE, vect_location,
    3354                 :            :                                "***** Analysis succeeded with vector mode"
    3355                 :        235 :                                " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
    3356                 :        235 :               dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
    3357                 :            :             }
    3358                 :            : 
    3359                 :      27690 :           bb_vinfo->shared->check_datarefs ();
    3360                 :      27690 :           vect_schedule_slp (bb_vinfo);
    3361                 :            : 
    3362                 :      27690 :           unsigned HOST_WIDE_INT bytes;
    3363                 :      27690 :           if (dump_enabled_p ())
    3364                 :            :             {
    3365                 :        470 :               if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
    3366                 :        235 :                 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    3367                 :            :                                  "basic block part vectorized using %wu byte "
    3368                 :            :                                  "vectors\n", bytes);
    3369                 :            :               else
    3370                 :            :                 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    3371                 :            :                                  "basic block part vectorized using variable "
    3372                 :            :                                  "length vectors\n");
    3373                 :            :             }
    3374                 :            : 
    3375                 :            :           vectorized = true;
    3376                 :            :         }
    3377                 :            :       else
    3378                 :            :         {
    3379                 :     652219 :           if (dump_enabled_p ())
    3380                 :       2835 :             dump_printf_loc (MSG_NOTE, vect_location,
    3381                 :            :                              "***** Analysis failed with vector mode %s\n",
    3382                 :       2835 :                              GET_MODE_NAME (bb_vinfo->vector_mode));
    3383                 :            :         }
    3384                 :            : 
    3385                 :     679909 :       if (mode_i == 0)
    3386                 :     671637 :         autodetected_vector_mode = bb_vinfo->vector_mode;
    3387                 :            : 
    3388                 :     679909 :       if (!fatal)
    3389                 :      77368 :         while (mode_i < vector_modes.length ()
    3390                 :     154736 :                && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
    3391                 :            :           {
    3392                 :      35067 :             if (dump_enabled_p ())
    3393                 :        265 :               dump_printf_loc (MSG_NOTE, vect_location,
    3394                 :            :                                "***** The result for vector mode %s would"
    3395                 :            :                                " be the same\n",
    3396                 :        265 :                                GET_MODE_NAME (vector_modes[mode_i]));
    3397                 :      35067 :             mode_i += 1;
    3398                 :            :           }
    3399                 :            : 
    3400                 :     679909 :       delete bb_vinfo;
    3401                 :            : 
    3402                 :     679909 :       if (mode_i < vector_modes.length ()
    3403                 :     659089 :           && VECTOR_MODE_P (autodetected_vector_mode)
    3404                 :     256043 :           && (related_vector_mode (vector_modes[mode_i],
    3405                 :     512086 :                                    GET_MODE_INNER (autodetected_vector_mode))
    3406                 :     256043 :               == autodetected_vector_mode)
    3407                 :     899472 :           && (related_vector_mode (autodetected_vector_mode,
    3408                 :     679909 :                                    GET_MODE_INNER (vector_modes[mode_i]))
    3409                 :     439126 :               == vector_modes[mode_i]))
    3410                 :            :         {
    3411                 :     219563 :           if (dump_enabled_p ())
    3412                 :       1037 :             dump_printf_loc (MSG_NOTE, vect_location,
    3413                 :            :                              "***** Skipping vector mode %s, which would"
    3414                 :            :                              " repeat the analysis for %s\n",
    3415                 :       1037 :                              GET_MODE_NAME (vector_modes[mode_i]),
    3416                 :       1037 :                              GET_MODE_NAME (autodetected_vector_mode));
    3417                 :     219563 :           mode_i += 1;
    3418                 :            :         }
    3419                 :            : 
    3420                 :     679909 :       if (vectorized
    3421                 :    1304440 :           || mode_i == vector_modes.length ()
    3422                 :     631707 :           || autodetected_vector_mode == VOIDmode
    3423                 :            :           /* If vect_slp_analyze_bb_1 signaled that analysis for all
    3424                 :            :              vector sizes will fail do not bother iterating.  */
    3425                 :     908570 :           || fatal)
    3426                 :    1343270 :         return vectorized;
    3427                 :            : 
    3428                 :            :       /* Try the next biggest vector size.  */
    3429                 :       8272 :       next_vector_mode = vector_modes[mode_i++];
    3430                 :       8272 :       if (dump_enabled_p ())
    3431                 :         39 :         dump_printf_loc (MSG_NOTE, vect_location,
    3432                 :            :                          "***** Re-trying analysis with vector mode %s\n",
    3433                 :         39 :                          GET_MODE_NAME (next_vector_mode));
    3434                 :       8272 :     }
    3435                 :            : }
    3436                 :            : 
    3437                 :            : /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
    3438                 :            :    true if anything in the basic-block was vectorized.  */
    3439                 :            : 
    3440                 :            : bool
    3441                 :     778595 : vect_slp_bb (basic_block bb)
    3442                 :            : {
    3443                 :     778595 :   gimple_stmt_iterator gsi;
    3444                 :     778595 :   bool any_vectorized = false;
    3445                 :            : 
    3446                 :    1557190 :   gsi = gsi_start_bb (bb);
    3447                 :    1110420 :   while (!gsi_end_p (gsi))
    3448                 :            :     {
    3449                 :     844366 :       gimple_stmt_iterator region_begin = gsi;
    3450                 :     844366 :       vec<data_reference_p> datarefs = vNULL;
    3451                 :     844366 :       int insns = 0;
    3452                 :            : 
    3453                 :    5359930 :       for (; !gsi_end_p (gsi); gsi_next (&gsi))
    3454                 :            :         {
    3455                 :    4847390 :           gimple *stmt = gsi_stmt (gsi);
    3456                 :    4847390 :           if (is_gimple_debug (stmt))
    3457                 :    1171160 :             continue;
    3458                 :    3676230 :           insns++;
    3459                 :            : 
    3460                 :    3676230 :           if (gimple_location (stmt) != UNKNOWN_LOCATION)
    3461                 :    3149770 :             vect_location = stmt;
    3462                 :            : 
    3463                 :    3676230 :           if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
    3464                 :            :             break;
    3465                 :            :         }
    3466                 :            : 
    3467                 :            :       /* Skip leading unhandled stmts.  */
    3468                 :     844366 :       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
    3469                 :            :         {
    3470                 :     172724 :           gsi_next (&gsi);
    3471                 :     172724 :           continue;
    3472                 :            :         }
    3473                 :            : 
    3474                 :     671642 :       gimple_stmt_iterator region_end = gsi;
    3475                 :            : 
    3476                 :     671642 :       if (insns > param_slp_max_insns_in_bb)
    3477                 :            :         {
    3478                 :          5 :           if (dump_enabled_p ())
    3479                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3480                 :            :                              "not vectorized: too many instructions in "
    3481                 :            :                              "basic block.\n");
    3482                 :            :         }
    3483                 :     671637 :       else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
    3484                 :      27690 :         any_vectorized = true;
    3485                 :            : 
    3486                 :     671642 :       if (gsi_end_p (region_end))
    3487                 :            :         break;
    3488                 :            : 
    3489                 :            :       /* Skip the unhandled stmt.  */
    3490                 :     159101 :       gsi_next (&gsi);
    3491                 :            :     }
    3492                 :            : 
    3493                 :     778595 :   return any_vectorized;
    3494                 :            : }
    3495                 :            : 
    3496                 :            : 
    3497                 :            : /* Return 1 if vector type STMT_VINFO is a boolean vector.  */
    3498                 :            : 
    3499                 :            : static bool
    3500                 :        523 : vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
    3501                 :            : {
    3502                 :        523 :   enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
    3503                 :        523 :   tree op, vectype;
    3504                 :        523 :   enum vect_def_type dt;
    3505                 :            : 
    3506                 :            :   /* For comparison and COND_EXPR type is chosen depending
    3507                 :            :      on the non-constant other comparison operand.  */
    3508                 :        523 :   if (TREE_CODE_CLASS (code) == tcc_comparison)
    3509                 :            :     {
    3510                 :          1 :       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
    3511                 :          1 :       op = gimple_assign_rhs1 (stmt);
    3512                 :            : 
    3513                 :          1 :       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
    3514                 :          0 :         gcc_unreachable ();
    3515                 :            : 
    3516                 :          1 :       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
    3517                 :            :     }
    3518                 :            : 
    3519                 :        522 :   if (code == COND_EXPR)
    3520                 :            :     {
    3521                 :          2 :       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
    3522                 :          2 :       tree cond = gimple_assign_rhs1 (stmt);
    3523                 :            : 
    3524                 :          2 :       if (TREE_CODE (cond) == SSA_NAME)
    3525                 :            :         {
    3526                 :          2 :           if (op_num > 0)
    3527                 :          2 :             return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
    3528                 :            :           op = cond;
    3529                 :            :         }
    3530                 :            :       else
    3531                 :            :         {
    3532                 :          0 :           if (op_num > 1)
    3533                 :          0 :             return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
    3534                 :          0 :           op = TREE_OPERAND (cond, 0);
    3535                 :            :         }
    3536                 :            : 
    3537                 :          0 :       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
    3538                 :          0 :         gcc_unreachable ();
    3539                 :            : 
    3540                 :          0 :       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
    3541                 :            :     }
    3542                 :            : 
    3543                 :        520 :   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
    3544                 :            : }
    3545                 :            : 
    3546                 :            : /* Build a variable-length vector in which the elements in ELTS are repeated
    3547                 :            :    to a fill NRESULTS vectors of type VECTOR_TYPE.  Store the vectors in
    3548                 :            :    RESULTS and add any new instructions to SEQ.
    3549                 :            : 
    3550                 :            :    The approach we use is:
    3551                 :            : 
    3552                 :            :    (1) Find a vector mode VM with integer elements of mode IM.
    3553                 :            : 
    3554                 :            :    (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
    3555                 :            :        ELTS' has mode IM.  This involves creating NELTS' VIEW_CONVERT_EXPRs
    3556                 :            :        from small vectors to IM.
    3557                 :            : 
    3558                 :            :    (3) Duplicate each ELTS'[I] into a vector of mode VM.
    3559                 :            : 
    3560                 :            :    (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
    3561                 :            :        correct byte contents.
    3562                 :            : 
    3563                 :            :    (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
    3564                 :            : 
    3565                 :            :    We try to find the largest IM for which this sequence works, in order
    3566                 :            :    to cut down on the number of interleaves.  */
    3567                 :            : 
    3568                 :            : void
    3569                 :          0 : duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
    3570                 :            :                           vec<tree> elts, unsigned int nresults,
    3571                 :            :                           vec<tree> &results)
    3572                 :            : {
    3573                 :          0 :   unsigned int nelts = elts.length ();
    3574                 :          0 :   tree element_type = TREE_TYPE (vector_type);
    3575                 :            : 
    3576                 :            :   /* (1) Find a vector mode VM with integer elements of mode IM.  */
    3577                 :          0 :   unsigned int nvectors = 1;
    3578                 :          0 :   tree new_vector_type;
    3579                 :          0 :   tree permutes[2];
    3580                 :          0 :   if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
    3581                 :            :                                        &nvectors, &new_vector_type,
    3582                 :            :                                        permutes))
    3583                 :          0 :     gcc_unreachable ();
    3584                 :            : 
    3585                 :            :   /* Get a vector type that holds ELTS[0:NELTS/NELTS'].  */
    3586                 :          0 :   unsigned int partial_nelts = nelts / nvectors;
    3587                 :          0 :   tree partial_vector_type = build_vector_type (element_type, partial_nelts);
    3588                 :            : 
    3589                 :          0 :   tree_vector_builder partial_elts;
    3590                 :          0 :   auto_vec<tree, 32> pieces (nvectors * 2);
    3591                 :          0 :   pieces.quick_grow (nvectors * 2);
    3592                 :          0 :   for (unsigned int i = 0; i < nvectors; ++i)
    3593                 :            :     {
    3594                 :            :       /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
    3595                 :            :              ELTS' has mode IM.  */
    3596                 :          0 :       partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
    3597                 :          0 :       for (unsigned int j = 0; j < partial_nelts; ++j)
    3598                 :          0 :         partial_elts.quick_push (elts[i * partial_nelts + j]);
    3599                 :          0 :       tree t = gimple_build_vector (seq, &partial_elts);
    3600                 :          0 :       t = gimple_build (seq, VIEW_CONVERT_EXPR,
    3601                 :          0 :                         TREE_TYPE (new_vector_type), t);
    3602                 :            : 
    3603                 :            :       /* (3) Duplicate each ELTS'[I] into a vector of mode VM.  */
    3604                 :          0 :       pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
    3605                 :            :     }
    3606                 :            : 
    3607                 :            :   /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
    3608                 :            :          correct byte contents.
    3609                 :            : 
    3610                 :            :      We need to repeat the following operation log2(nvectors) times:
    3611                 :            : 
    3612                 :            :         out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
    3613                 :            :         out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
    3614                 :            : 
    3615                 :            :      However, if each input repeats every N elements and the VF is
    3616                 :            :      a multiple of N * 2, the HI result is the same as the LO.  */
    3617                 :          0 :   unsigned int in_start = 0;
    3618                 :          0 :   unsigned int out_start = nvectors;
    3619                 :          0 :   unsigned int hi_start = nvectors / 2;
    3620                 :            :   /* A bound on the number of outputs needed to produce NRESULTS results
    3621                 :            :      in the final iteration.  */
    3622                 :          0 :   unsigned int noutputs_bound = nvectors * nresults;
    3623                 :          0 :   for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
    3624                 :            :     {
    3625                 :          0 :       noutputs_bound /= 2;
    3626                 :          0 :       unsigned int limit = MIN (noutputs_bound, nvectors);
    3627                 :          0 :       for (unsigned int i = 0; i < limit; ++i)
    3628                 :            :         {
    3629                 :          0 :           if ((i & 1) != 0
    3630                 :          0 :               && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
    3631                 :            :                              2 * in_repeat))
    3632                 :            :             {
    3633                 :          0 :               pieces[out_start + i] = pieces[out_start + i - 1];
    3634                 :          0 :               continue;
    3635                 :            :             }
    3636                 :            : 
    3637                 :          0 :           tree output = make_ssa_name (new_vector_type);
    3638                 :          0 :           tree input1 = pieces[in_start + (i / 2)];
    3639                 :          0 :           tree input2 = pieces[in_start + (i / 2) + hi_start];
    3640                 :          0 :           gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
    3641                 :            :                                                input1, input2,
    3642                 :            :                                                permutes[i & 1]);
    3643                 :          0 :           gimple_seq_add_stmt (seq, stmt);
    3644                 :          0 :           pieces[out_start + i] = output;
    3645                 :            :         }
    3646                 :          0 :       std::swap (in_start, out_start);
    3647                 :            :     }
    3648                 :            : 
    3649                 :            :   /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type.  */
    3650                 :          0 :   results.reserve (nresults);
    3651                 :          0 :   for (unsigned int i = 0; i < nresults; ++i)
    3652                 :          0 :     if (i < nvectors)
    3653                 :          0 :       results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
    3654                 :          0 :                                         pieces[in_start + i]));
    3655                 :            :     else
    3656                 :          0 :       results.quick_push (results[i - nvectors]);
    3657                 :          0 : }
    3658                 :            : 
    3659                 :            : 
    3660                 :            : /* For constant and loop invariant defs of SLP_NODE this function returns
    3661                 :            :    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
    3662                 :            :    OP_NODE determines the node for the operand containing the scalar
    3663                 :            :    operands.  */
    3664                 :            : 
    3665                 :            : static void
    3666                 :      48528 : vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
    3667                 :            :                            vec<tree> *vec_oprnds)
    3668                 :            : {
    3669                 :      48528 :   slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
    3670                 :      48528 :   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
    3671                 :      48528 :   vec_info *vinfo = stmt_vinfo->vinfo;
    3672                 :      48528 :   unsigned HOST_WIDE_INT nunits;
    3673                 :      48528 :   tree vec_cst;
    3674                 :      48528 :   unsigned j, number_of_places_left_in_vector;
    3675                 :      48528 :   tree vector_type;
    3676                 :      48528 :   tree vop;
    3677                 :      48528 :   int group_size = op_node->ops.length ();
    3678                 :      48528 :   unsigned int vec_num, i;
    3679                 :      48528 :   unsigned number_of_copies = 1;
    3680                 :      48528 :   bool constant_p;
    3681                 :      48528 :   tree neutral_op = NULL;
    3682                 :      48528 :   gimple_seq ctor_seq = NULL;
    3683                 :      48528 :   auto_vec<tree, 16> permute_results;
    3684                 :            : 
    3685                 :            :   /* ???  SLP analysis should compute the vector type for the
    3686                 :            :      constant / invariant and store it in the SLP node.  */
    3687                 :      48528 :   tree op = op_node->ops[0];
    3688                 :            :   /* Check if vector type is a boolean vector.  */
    3689                 :      48528 :   tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
    3690                 :      96533 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
    3691                 :      48528 :       && vect_mask_constant_operand_p (stmt_vinfo, op_num))
    3692                 :          3 :     vector_type = truth_type_for (stmt_vectype);
    3693                 :            :   else
    3694                 :      48525 :     vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
    3695                 :            : 
    3696                 :            :   /* ???  For lane-reducing ops we should also have the required number
    3697                 :            :      of vector stmts initialized rather than second-guessing here.  */
    3698                 :      48528 :   unsigned int number_of_vectors;
    3699                 :      48528 :   if (is_gimple_assign (stmt_vinfo->stmt)
    3700                 :      48528 :       && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
    3701                 :      48522 :           || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
    3702                 :      48522 :           || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
    3703                 :          2 :     number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
    3704                 :            :   else
    3705                 :      48526 :     number_of_vectors
    3706                 :      48526 :       = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
    3707                 :      97052 :                               * TYPE_VECTOR_SUBPARTS (stmt_vectype),
    3708                 :            :                               vector_type);
    3709                 :      48528 :   vec_oprnds->create (number_of_vectors);
    3710                 :      97056 :   auto_vec<tree> voprnds (number_of_vectors);
    3711                 :            : 
    3712                 :            :   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
    3713                 :            :      created vectors. It is greater than 1 if unrolling is performed.
    3714                 :            : 
    3715                 :            :      For example, we have two scalar operands, s1 and s2 (e.g., group of
    3716                 :            :      strided accesses of size two), while NUNITS is four (i.e., four scalars
    3717                 :            :      of this type can be packed in a vector).  The output vector will contain
    3718                 :            :      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
    3719                 :            :      will be 2).
    3720                 :            : 
    3721                 :            :      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
    3722                 :            :      containing the operands.
    3723                 :            : 
    3724                 :            :      For example, NUNITS is four as before, and the group size is 8
    3725                 :            :      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
    3726                 :            :      {s5, s6, s7, s8}.  */
    3727                 :            : 
    3728                 :            :   /* When using duplicate_and_interleave, we just need one element for
    3729                 :            :      each scalar statement.  */
    3730                 :      48528 :   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
    3731                 :            :     nunits = group_size;
    3732                 :            : 
    3733                 :      48528 :   number_of_copies = nunits * number_of_vectors / group_size;
    3734                 :            : 
    3735                 :      48528 :   number_of_places_left_in_vector = nunits;
    3736                 :      48528 :   constant_p = true;
    3737                 :      97056 :   tree_vector_builder elts (vector_type, nunits, 1);
    3738                 :      48528 :   elts.quick_grow (nunits);
    3739                 :      48528 :   bool place_after_defs = false;
    3740                 :     100432 :   for (j = 0; j < number_of_copies; j++)
    3741                 :            :     {
    3742                 :     284940 :       for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
    3743                 :            :         {
    3744                 :            :           /* Create 'vect_ = {op0,op1,...,opn}'.  */
    3745                 :     181132 :           number_of_places_left_in_vector--;
    3746                 :     181132 :           tree orig_op = op;
    3747                 :     181132 :           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
    3748                 :            :             {
    3749                 :      13637 :               if (CONSTANT_CLASS_P (op))
    3750                 :            :                 {
    3751                 :      10308 :                   if (VECTOR_BOOLEAN_TYPE_P (vector_type))
    3752                 :            :                     {
    3753                 :            :                       /* Can't use VIEW_CONVERT_EXPR for booleans because
    3754                 :            :                          of possibly different sizes of scalar value and
    3755                 :            :                          vector element.  */
    3756                 :          0 :                       if (integer_zerop (op))
    3757                 :          0 :                         op = build_int_cst (TREE_TYPE (vector_type), 0);
    3758                 :          0 :                       else if (integer_onep (op))
    3759                 :          0 :                         op = build_all_ones_cst (TREE_TYPE (vector_type));
    3760                 :            :                       else
    3761                 :          0 :                         gcc_unreachable ();
    3762                 :            :                     }
    3763                 :            :                   else
    3764                 :       5154 :                     op = fold_unary (VIEW_CONVERT_EXPR,
    3765                 :            :                                      TREE_TYPE (vector_type), op);
    3766                 :       5154 :                   gcc_assert (op && CONSTANT_CLASS_P (op));
    3767                 :            :                 }
    3768                 :            :               else
    3769                 :            :                 {
    3770                 :       8483 :                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
    3771                 :       8483 :                   gimple *init_stmt;
    3772                 :      16966 :                   if (VECTOR_BOOLEAN_TYPE_P (vector_type))
    3773                 :            :                     {
    3774                 :          8 :                       tree true_val
    3775                 :          8 :                         = build_all_ones_cst (TREE_TYPE (vector_type));
    3776                 :          8 :                       tree false_val
    3777                 :          8 :                         = build_zero_cst (TREE_TYPE (vector_type));
    3778                 :          8 :                       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
    3779                 :          8 :                       init_stmt = gimple_build_assign (new_temp, COND_EXPR,
    3780                 :            :                                                        op, true_val,
    3781                 :            :                                                        false_val);
    3782                 :            :                     }
    3783                 :            :                   else
    3784                 :            :                     {
    3785                 :       8475 :                       op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
    3786                 :            :                                    op);
    3787                 :       8475 :                       init_stmt
    3788                 :       8475 :                         = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
    3789                 :            :                                                op);
    3790                 :            :                     }
    3791                 :       8483 :                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
    3792                 :       8483 :                   op = new_temp;
    3793                 :            :                 }
    3794                 :            :             }
    3795                 :     181132 :           elts[number_of_places_left_in_vector] = op;
    3796                 :     181132 :           if (!CONSTANT_CLASS_P (op))
    3797                 :      26342 :             constant_p = false;
    3798                 :     181132 :           if (TREE_CODE (orig_op) == SSA_NAME
    3799                 :      19535 :               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
    3800                 :      15389 :               && STMT_VINFO_BB_VINFO (stmt_vinfo)
    3801                 :     192825 :               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
    3802                 :      11693 :                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
    3803                 :            :             place_after_defs = true;
    3804                 :            : 
    3805                 :     181132 :           if (number_of_places_left_in_vector == 0)
    3806                 :            :             {
    3807                 :     149728 :               if (constant_p
    3808                 :     149728 :                   ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
    3809                 :      74864 :                   : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
    3810                 :      74864 :                 vec_cst = gimple_build_vector (&ctor_seq, &elts);
    3811                 :            :               else
    3812                 :            :                 {
    3813                 :          0 :                   if (permute_results.is_empty ())
    3814                 :          0 :                     duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
    3815                 :            :                                               elts, number_of_vectors,
    3816                 :            :                                               permute_results);
    3817                 :          0 :                   vec_cst = permute_results[number_of_vectors - j - 1];
    3818                 :            :                 }
    3819                 :      74864 :               tree init;
    3820                 :      74864 :               gimple_stmt_iterator gsi;
    3821                 :      74864 :               if (place_after_defs)
    3822                 :            :                 {
    3823                 :       3347 :                   stmt_vec_info last_stmt_info
    3824                 :       3347 :                     = vect_find_last_scalar_stmt_in_slp (slp_node);
    3825                 :       3347 :                   gsi = gsi_for_stmt (last_stmt_info->stmt);
    3826                 :       3347 :                   init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
    3827                 :            :                                            &gsi);
    3828                 :            :                 }
    3829                 :            :               else
    3830                 :      71517 :                 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
    3831                 :            :                                          NULL);
    3832                 :      74864 :               if (ctor_seq != NULL)
    3833                 :            :                 {
    3834                 :      10785 :                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
    3835                 :      10785 :                   gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
    3836                 :      10785 :                   ctor_seq = NULL;
    3837                 :            :                 }
    3838                 :      74864 :               voprnds.quick_push (init);
    3839                 :      74864 :               place_after_defs = false;
    3840                 :      74864 :               number_of_places_left_in_vector = nunits;
    3841                 :      74864 :               constant_p = true;
    3842                 :      74864 :               elts.new_vector (vector_type, nunits, 1);
    3843                 :      74864 :               elts.quick_grow (nunits);
    3844                 :            :             }
    3845                 :            :         }
    3846                 :            :     }
    3847                 :            : 
    3848                 :            :   /* Since the vectors are created in the reverse order, we should invert
    3849                 :            :      them.  */
    3850                 :      48528 :   vec_num = voprnds.length ();
    3851                 :     123392 :   for (j = vec_num; j != 0; j--)
    3852                 :            :     {
    3853                 :      74864 :       vop = voprnds[j - 1];
    3854                 :      74864 :       vec_oprnds->quick_push (vop);
    3855                 :            :     }
    3856                 :            : 
    3857                 :            :   /* In case that VF is greater than the unrolling factor needed for the SLP
    3858                 :            :      group of stmts, NUMBER_OF_VECTORS to be created is greater than
    3859                 :            :      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
    3860                 :            :      to replicate the vectors.  */
    3861                 :      48528 :   while (number_of_vectors > vec_oprnds->length ())
    3862                 :            :     {
    3863                 :          0 :       tree neutral_vec = NULL;
    3864                 :            : 
    3865                 :            :       if (neutral_op)
    3866                 :            :         {
    3867                 :            :           if (!neutral_vec)
    3868                 :            :             neutral_vec = build_vector_from_val (vector_type, neutral_op);
    3869                 :            : 
    3870                 :      48528 :           vec_oprnds->quick_push (neutral_vec);
    3871                 :            :         }
    3872                 :            :       else
    3873                 :            :         {
    3874                 :      48528 :           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
    3875                 :          0 :             vec_oprnds->quick_push (vop);
    3876                 :            :         }
    3877                 :            :     }
    3878                 :      48528 : }
    3879                 :            : 
    3880                 :            : 
    3881                 :            : /* Get vectorized definitions from SLP_NODE that contains corresponding
    3882                 :            :    vectorized def-stmts.  */
    3883                 :            : 
    3884                 :            : static void
    3885                 :      10961 : vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
    3886                 :            : {
    3887                 :      10961 :   stmt_vec_info vec_def_stmt_info;
    3888                 :      10961 :   unsigned int i;
    3889                 :            : 
    3890                 :      10961 :   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
    3891                 :            : 
    3892                 :      31362 :   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
    3893                 :      20401 :     vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
    3894                 :      10961 : }
    3895                 :            : 
    3896                 :            : 
    3897                 :            : /* Get N vectorized definitions for SLP_NODE.
    3898                 :            :    If the scalar definitions are loop invariants or constants, collect them and
    3899                 :            :    call vect_get_constant_vectors() to create vector stmts.
    3900                 :            :    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
    3901                 :            :    must be stored in the corresponding child of SLP_NODE, and we call
    3902                 :            :    vect_get_slp_vect_defs () to retrieve them.  */
    3903                 :            : 
    3904                 :            : void
    3905                 :      54885 : vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
    3906                 :            : {
    3907                 :      54885 :   if (n == -1U)
    3908                 :        175 :     n = SLP_TREE_CHILDREN (slp_node).length ();
    3909                 :            : 
    3910                 :     114374 :   for (unsigned i = 0; i < n; ++i)
    3911                 :            :     {
    3912                 :      59489 :       slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
    3913                 :            : 
    3914                 :      59489 :       vec<tree> vec_defs = vNULL;
    3915                 :            : 
    3916                 :            :       /* For each operand we check if it has vectorized definitions in a child
    3917                 :            :          node or we need to create them (for invariants and constants).  */
    3918                 :      59489 :       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
    3919                 :            :         {
    3920                 :      10961 :           vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
    3921                 :      10961 :           vect_get_slp_vect_defs (child, &vec_defs);
    3922                 :            :         }
    3923                 :            :       else
    3924                 :      48528 :         vect_get_constant_vectors (slp_node, i, &vec_defs);
    3925                 :            : 
    3926                 :      59489 :       vec_oprnds->quick_push (vec_defs);
    3927                 :            :     }
    3928                 :      54885 : }
    3929                 :            : 
    3930                 :            : /* Generate vector permute statements from a list of loads in DR_CHAIN.
    3931                 :            :    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
    3932                 :            :    permute statements for the SLP node NODE of the SLP instance
    3933                 :            :    SLP_NODE_INSTANCE.  */
    3934                 :            : 
    3935                 :            : bool
    3936                 :       6845 : vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
    3937                 :            :                               gimple_stmt_iterator *gsi, poly_uint64 vf,
    3938                 :            :                               slp_instance slp_node_instance, bool analyze_only,
    3939                 :            :                               unsigned *n_perms)
    3940                 :            : {
    3941                 :       6845 :   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    3942                 :       6845 :   vec_info *vinfo = stmt_info->vinfo;
    3943                 :       6845 :   int vec_index = 0;
    3944                 :       6845 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    3945                 :       6845 :   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
    3946                 :       6845 :   unsigned int mask_element;
    3947                 :       6845 :   machine_mode mode;
    3948                 :            : 
    3949                 :       6845 :   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
    3950                 :            :     return false;
    3951                 :            : 
    3952                 :       6845 :   stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    3953                 :            : 
    3954                 :       6845 :   mode = TYPE_MODE (vectype);
    3955                 :       6845 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    3956                 :            : 
    3957                 :            :   /* Initialize the vect stmts of NODE to properly insert the generated
    3958                 :            :      stmts later.  */
    3959                 :       6845 :   if (! analyze_only)
    3960                 :       1711 :     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
    3961                 :       4239 :          i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
    3962                 :       2528 :       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
    3963                 :            : 
    3964                 :            :   /* Generate permutation masks for every NODE. Number of masks for each NODE
    3965                 :            :      is equal to GROUP_SIZE.
    3966                 :            :      E.g., we have a group of three nodes with three loads from the same
    3967                 :            :      location in each node, and the vector size is 4. I.e., we have a
    3968                 :            :      a0b0c0a1b1c1... sequence and we need to create the following vectors:
    3969                 :            :      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
    3970                 :            :      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
    3971                 :            :      ...
    3972                 :            : 
    3973                 :            :      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
    3974                 :            :      The last mask is illegal since we assume two operands for permute
    3975                 :            :      operation, and the mask element values can't be outside that range.
    3976                 :            :      Hence, the last mask must be converted into {2,5,5,5}.
    3977                 :            :      For the first two permutations we need the first and the second input
    3978                 :            :      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
    3979                 :            :      we need the second and the third vectors: {b1,c1,a2,b2} and
    3980                 :            :      {c2,a3,b3,c3}.  */
    3981                 :            : 
    3982                 :       6845 :   int vect_stmts_counter = 0;
    3983                 :       6845 :   unsigned int index = 0;
    3984                 :       6845 :   int first_vec_index = -1;
    3985                 :       6845 :   int second_vec_index = -1;
    3986                 :       6845 :   bool noop_p = true;
    3987                 :       6845 :   *n_perms = 0;
    3988                 :            : 
    3989                 :       6845 :   vec_perm_builder mask;
    3990                 :       6845 :   unsigned int nelts_to_build;
    3991                 :       6845 :   unsigned int nvectors_per_build;
    3992                 :       6845 :   bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
    3993                 :       6845 :                       && multiple_p (nunits, group_size));
    3994                 :       6845 :   if (repeating_p)
    3995                 :            :     {
    3996                 :            :       /* A single vector contains a whole number of copies of the node, so:
    3997                 :            :          (a) all permutes can use the same mask; and
    3998                 :            :          (b) the permutes only need a single vector input.  */
    3999                 :       4275 :       mask.new_vector (nunits, group_size, 3);
    4000                 :       4275 :       nelts_to_build = mask.encoded_nelts ();
    4001                 :       4275 :       nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
    4002                 :            :     }
    4003                 :            :   else
    4004                 :            :     {
    4005                 :            :       /* We need to construct a separate mask for each vector statement.  */
    4006                 :       2570 :       unsigned HOST_WIDE_INT const_nunits, const_vf;
    4007                 :       2570 :       if (!nunits.is_constant (&const_nunits)
    4008                 :       2570 :           || !vf.is_constant (&const_vf))
    4009                 :            :         return false;
    4010                 :       2570 :       mask.new_vector (const_nunits, const_nunits, 1);
    4011                 :       2570 :       nelts_to_build = const_vf * group_size;
    4012                 :       2570 :       nvectors_per_build = 1;
    4013                 :            :     }
    4014                 :            : 
    4015                 :       6845 :   unsigned int count = mask.encoded_nelts ();
    4016                 :       6845 :   mask.quick_grow (count);
    4017                 :      13690 :   vec_perm_indices indices;
    4018                 :            : 
    4019                 :      60546 :   for (unsigned int j = 0; j < nelts_to_build; j++)
    4020                 :            :     {
    4021                 :      54318 :       unsigned int iter_num = j / group_size;
    4022                 :      54318 :       unsigned int stmt_num = j % group_size;
    4023                 :      54318 :       unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
    4024                 :      54318 :                         + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
    4025                 :      54318 :       if (repeating_p)
    4026                 :            :         {
    4027                 :            :           first_vec_index = 0;
    4028                 :            :           mask_element = i;
    4029                 :            :         }
    4030                 :            :       else
    4031                 :            :         {
    4032                 :            :           /* Enforced before the loop when !repeating_p.  */
    4033                 :      27168 :           unsigned int const_nunits = nunits.to_constant ();
    4034                 :      27168 :           vec_index = i / const_nunits;
    4035                 :      27168 :           mask_element = i % const_nunits;
    4036                 :      27168 :           if (vec_index == first_vec_index
    4037                 :      27168 :               || first_vec_index == -1)
    4038                 :            :             {
    4039                 :            :               first_vec_index = vec_index;
    4040                 :            :             }
    4041                 :       7677 :           else if (vec_index == second_vec_index
    4042                 :       7677 :                    || second_vec_index == -1)
    4043                 :            :             {
    4044                 :       7577 :               second_vec_index = vec_index;
    4045                 :       7577 :               mask_element += const_nunits;
    4046                 :            :             }
    4047                 :            :           else
    4048                 :            :             {
    4049                 :        100 :               if (dump_enabled_p ())
    4050                 :         12 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    4051                 :            :                                  "permutation requires at "
    4052                 :            :                                  "least three vectors %G",
    4053                 :            :                                  stmt_info->stmt);
    4054                 :        100 :               gcc_assert (analyze_only);
    4055                 :            :               return false;
    4056                 :            :             }
    4057                 :            : 
    4058                 :      27068 :           gcc_assert (mask_element < 2 * const_nunits);
    4059                 :            :         }
    4060                 :            : 
    4061                 :      54218 :       if (mask_element != index)
    4062                 :      36783 :         noop_p = false;
    4063                 :      54218 :       mask[index++] = mask_element;
    4064                 :            : 
    4065                 :      54218 :       if (index == count && !noop_p)
    4066                 :            :         {
    4067                 :      11818 :           indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
    4068                 :       9211 :           if (!can_vec_perm_const_p (mode, indices))
    4069                 :            :             {
    4070                 :        517 :               if (dump_enabled_p ())
    4071                 :            :                 {
    4072                 :         61 :                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
    4073                 :            :                                    vect_location,
    4074                 :            :                                    "unsupported vect permute { ");
    4075                 :        419 :                   for (i = 0; i < count; ++i)
    4076                 :            :                     {
    4077                 :        358 :                       dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
    4078                 :        358 :                       dump_printf (MSG_MISSED_OPTIMIZATION, " ");
    4079                 :            :                     }
    4080                 :         61 :                   dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
    4081                 :            :                 }
    4082                 :        517 :               gcc_assert (analyze_only);
    4083                 :            :               return false;
    4084                 :            :             }
    4085                 :            : 
    4086                 :       8694 :           ++*n_perms;
    4087                 :            :         }
    4088                 :            : 
    4089                 :      53701 :       if (index == count)
    4090                 :            :         {
    4091                 :       9771 :           if (!analyze_only)
    4092                 :            :             {
    4093                 :       2455 :               tree mask_vec = NULL_TREE;
    4094                 :            :                   
    4095                 :       2455 :               if (! noop_p)
    4096                 :       2202 :                 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
    4097                 :            : 
    4098                 :       2455 :               if (second_vec_index == -1)
    4099                 :       1882 :                 second_vec_index = first_vec_index;
    4100                 :            : 
    4101                 :       4983 :               for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
    4102                 :            :                 {
    4103                 :            :                   /* Generate the permute statement if necessary.  */
    4104                 :       2528 :                   tree first_vec = dr_chain[first_vec_index + ri];
    4105                 :       2528 :                   tree second_vec = dr_chain[second_vec_index + ri];
    4106                 :       2528 :                   stmt_vec_info perm_stmt_info;
    4107                 :       2528 :                   if (! noop_p)
    4108                 :            :                     {
    4109                 :       2275 :                       gassign *stmt = as_a <gassign *> (stmt_info->stmt);
    4110                 :       2275 :                       tree perm_dest
    4111                 :       2275 :                         = vect_create_destination_var (gimple_assign_lhs (stmt),
    4112                 :            :                                                        vectype);
    4113                 :       2275 :                       perm_dest = make_ssa_name (perm_dest);
    4114                 :       2275 :                       gassign *perm_stmt
    4115                 :       2275 :                         = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
    4116                 :            :                                                first_vec, second_vec,
    4117                 :            :                                                mask_vec);
    4118                 :       2275 :                       perm_stmt_info
    4119                 :       2275 :                         = vect_finish_stmt_generation (stmt_info, perm_stmt,
    4120                 :            :                                                        gsi);
    4121                 :            :                     }
    4122                 :            :                   else
    4123                 :            :                     /* If mask was NULL_TREE generate the requested
    4124                 :            :                        identity transform.  */
    4125                 :        253 :                     perm_stmt_info = vinfo->lookup_def (first_vec);
    4126                 :            : 
    4127                 :            :                   /* Store the vector statement in NODE.  */
    4128                 :       2528 :                   SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
    4129                 :       2528 :                     = perm_stmt_info;
    4130                 :            :                 }
    4131                 :            :             }
    4132                 :            : 
    4133                 :            :           index = 0;
    4134                 :            :           first_vec_index = -1;
    4135                 :            :           second_vec_index = -1;
    4136                 :            :           noop_p = true;
    4137                 :            :         }
    4138                 :            :     }
    4139                 :            : 
    4140                 :            :   return true;
    4141                 :            : }
    4142                 :            : 
    4143                 :            : /* Vectorize SLP instance tree in postorder.  */
    4144                 :            : 
    4145                 :            : static void
    4146                 :     109008 : vect_schedule_slp_instance (slp_tree node, slp_instance instance)
    4147                 :            : {
    4148                 :     109008 :   gimple_stmt_iterator si;
    4149                 :     109008 :   stmt_vec_info stmt_info;
    4150                 :     109008 :   unsigned int group_size;
    4151                 :     109008 :   tree vectype;
    4152                 :     109008 :   int i, j;
    4153                 :     109008 :   slp_tree child;
    4154                 :            : 
    4155                 :     109008 :   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    4156                 :      48869 :     return;
    4157                 :            : 
    4158                 :            :   /* See if we have already vectorized the node in the graph of the
    4159                 :            :      SLP instance.  */
    4160                 :      60346 :   if (SLP_TREE_VEC_STMTS (node).exists ())
    4161                 :            :     return;
    4162                 :            : 
    4163                 :     178552 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    4164                 :      58903 :     vect_schedule_slp_instance (child, instance);
    4165                 :            : 
    4166                 :            :   /* Push SLP node def-type to stmts.  */
    4167                 :     119042 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    4168                 :      58903 :     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
    4169                 :            :       {
    4170                 :            :         stmt_vec_info child_stmt_info;
    4171                 :      64898 :         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
    4172                 :       4780 :           STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
    4173                 :            :       }
    4174                 :            : 
    4175                 :      60139 :   stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    4176                 :            : 
    4177                 :            :   /* VECTYPE is the type of the destination.  */
    4178                 :      60139 :   vectype = STMT_VINFO_VECTYPE (stmt_info);
    4179                 :      60139 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    4180                 :      60139 :   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
    4181                 :            : 
    4182                 :      60139 :   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
    4183                 :      60139 :   SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
    4184                 :            : 
    4185                 :      60139 :   if (dump_enabled_p ())
    4186                 :       4687 :     dump_printf_loc (MSG_NOTE, vect_location,
    4187                 :            :                      "------>vectorizing SLP node starting from: %G",
    4188                 :            :                      stmt_info->stmt);
    4189                 :            : 
    4190                 :            :   /* Vectorized stmts go before the last scalar stmt which is where
    4191                 :            :      all uses are ready.  */
    4192                 :      60139 :   stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
    4193                 :      60139 :   si = gsi_for_stmt (last_stmt_info->stmt);
    4194                 :            : 
    4195                 :            :   /* Handle two-operation SLP nodes by vectorizing the group with
    4196                 :            :      both operations and then performing a merge.  */
    4197                 :      60139 :   bool done_p = false;
    4198                 :      60139 :   if (SLP_TREE_TWO_OPERATORS (node))
    4199                 :            :     {
    4200                 :        373 :       gassign *stmt = as_a <gassign *> (stmt_info->stmt);
    4201                 :        373 :       enum tree_code code0 = gimple_assign_rhs_code (stmt);
    4202                 :        373 :       enum tree_code ocode = ERROR_MARK;
    4203                 :        373 :       stmt_vec_info ostmt_info;
    4204                 :        373 :       vec_perm_builder mask (group_size, group_size, 1);
    4205                 :       1159 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
    4206                 :            :         {
    4207                 :        786 :           gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
    4208                 :        786 :           if (gimple_assign_rhs_code (ostmt) != code0)
    4209                 :            :             {
    4210                 :        393 :               mask.quick_push (1);
    4211                 :        393 :               ocode = gimple_assign_rhs_code (ostmt);
    4212                 :            :             }
    4213                 :            :           else
    4214                 :        393 :             mask.quick_push (0);
    4215                 :            :         }
    4216                 :        373 :       if (ocode != ERROR_MARK)
    4217                 :            :         {
    4218                 :        373 :           vec<stmt_vec_info> v0;
    4219                 :        373 :           vec<stmt_vec_info> v1;
    4220                 :        373 :           unsigned j;
    4221                 :        373 :           tree tmask = NULL_TREE;
    4222                 :        373 :           vect_transform_stmt (stmt_info, &si, node, instance);
    4223                 :        373 :           v0 = SLP_TREE_VEC_STMTS (node).copy ();
    4224                 :        373 :           SLP_TREE_VEC_STMTS (node).truncate (0);
    4225                 :        373 :           gimple_assign_set_rhs_code (stmt, ocode);
    4226                 :        373 :           vect_transform_stmt (stmt_info, &si, node, instance);
    4227                 :        373 :           gimple_assign_set_rhs_code (stmt, code0);
    4228                 :        373 :           v1 = SLP_TREE_VEC_STMTS (node).copy ();
    4229                 :        373 :           SLP_TREE_VEC_STMTS (node).truncate (0);
    4230                 :        373 :           tree meltype = build_nonstandard_integer_type
    4231                 :        746 :               (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
    4232                 :        373 :           tree mvectype = get_same_sized_vectype (meltype, vectype);
    4233                 :        373 :           unsigned k = 0, l;
    4234                 :       1518 :           for (j = 0; j < v0.length (); ++j)
    4235                 :            :             {
    4236                 :            :               /* Enforced by vect_build_slp_tree, which rejects variable-length
    4237                 :            :                  vectors for SLP_TREE_TWO_OPERATORS.  */
    4238                 :        386 :               unsigned int const_nunits = nunits.to_constant ();
    4239                 :        386 :               tree_vector_builder melts (mvectype, const_nunits, 1);
    4240                 :       2336 :               for (l = 0; l < const_nunits; ++l)
    4241                 :            :                 {
    4242                 :       1950 :                   if (k >= group_size)
    4243                 :        582 :                     k = 0;
    4244                 :       1950 :                   tree t = build_int_cst (meltype,
    4245                 :       1950 :                                           mask[k++] * const_nunits + l);
    4246                 :       1950 :                   melts.quick_push (t);
    4247                 :            :                 }
    4248                 :        386 :               tmask = melts.build ();
    4249                 :            : 
    4250                 :            :               /* ???  Not all targets support a VEC_PERM_EXPR with a
    4251                 :            :                  constant mask that would translate to a vec_merge RTX
    4252                 :            :                  (with their vec_perm_const_ok).  We can either not
    4253                 :            :                  vectorize in that case or let veclower do its job.
    4254                 :            :                  Unfortunately that isn't too great and at least for
    4255                 :            :                  plus/minus we'd eventually like to match targets
    4256                 :            :                  vector addsub instructions.  */
    4257                 :        386 :               gimple *vstmt;
    4258                 :        386 :               vstmt = gimple_build_assign (make_ssa_name (vectype),
    4259                 :            :                                            VEC_PERM_EXPR,
    4260                 :        386 :                                            gimple_assign_lhs (v0[j]->stmt),
    4261                 :        386 :                                            gimple_assign_lhs (v1[j]->stmt),
    4262                 :            :                                            tmask);
    4263                 :        386 :               SLP_TREE_VEC_STMTS (node).quick_push
    4264                 :        386 :                 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
    4265                 :            :             }
    4266                 :        373 :           v0.release ();
    4267                 :        746 :           v1.release ();
    4268                 :            :           done_p = true;
    4269                 :            :         }
    4270                 :            :     }
    4271                 :        373 :   if (!done_p)
    4272                 :      59766 :     vect_transform_stmt (stmt_info, &si, node, instance);
    4273                 :            : 
    4274                 :            :   /* Restore stmt def-types.  */
    4275                 :     178552 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    4276                 :      58903 :     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
    4277                 :            :       {
    4278                 :            :         stmt_vec_info child_stmt_info;
    4279                 :      64898 :         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
    4280                 :       4780 :           STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
    4281                 :            :       }
    4282                 :            : }
    4283                 :            : 
    4284                 :            : /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
    4285                 :            :    For loop vectorization this is done in vectorizable_call, but for SLP
    4286                 :            :    it needs to be deferred until end of vect_schedule_slp, because multiple
    4287                 :            :    SLP instances may refer to the same scalar stmt.  */
    4288                 :            : 
    4289                 :            : static void
    4290                 :      10861 : vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
    4291                 :            : {
    4292                 :      10861 :   gimple *new_stmt;
    4293                 :      10861 :   gimple_stmt_iterator gsi;
    4294                 :      10861 :   int i;
    4295                 :      10861 :   slp_tree child;
    4296                 :      10861 :   tree lhs;
    4297                 :      10861 :   stmt_vec_info stmt_info;
    4298                 :            : 
    4299                 :      10861 :   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
    4300                 :       1709 :     return;
    4301                 :            : 
    4302                 :       9216 :   if (visited.add (node))
    4303                 :            :     return;
    4304                 :            : 
    4305                 :      17777 :   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
    4306                 :       8625 :     vect_remove_slp_scalar_calls (child, visited);
    4307                 :            : 
    4308                 :      55912 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
    4309                 :            :     {
    4310                 :      37608 :       gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
    4311                 :         34 :       if (!stmt || gimple_bb (stmt) == NULL)
    4312                 :      37574 :         continue;
    4313                 :         34 :       if (is_pattern_stmt_p (stmt_info)
    4314                 :         34 :           || !PURE_SLP_STMT (stmt_info))
    4315                 :          2 :         continue;
    4316                 :         32 :       lhs = gimple_call_lhs (stmt);
    4317                 :         32 :       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
    4318                 :         32 :       gsi = gsi_for_stmt (stmt);
    4319                 :         32 :       stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
    4320                 :         32 :       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
    4321                 :            :     }
    4322                 :            : }
    4323                 :            : 
    4324                 :            : static void
    4325                 :       2236 : vect_remove_slp_scalar_calls (slp_tree node)
    4326                 :            : {
    4327                 :       2236 :   hash_set<slp_tree> visited;
    4328                 :       2236 :   vect_remove_slp_scalar_calls (node, visited);
    4329                 :       2236 : }
    4330                 :            : 
    4331                 :            : /* Vectorize the instance root.  */
    4332                 :            : 
    4333                 :            : void
    4334                 :        228 : vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
    4335                 :            : {
    4336                 :        228 :   gassign *rstmt = NULL;
    4337                 :            : 
    4338                 :        228 :   if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
    4339                 :            :     {
    4340                 :        228 :       stmt_vec_info child_stmt_info;
    4341                 :        228 :       int j;
    4342                 :            : 
    4343                 :        228 :       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
    4344                 :            :         {
    4345                 :        228 :           tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
    4346                 :        228 :           tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
    4347                 :        456 :           if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
    4348                 :        228 :                                           TREE_TYPE (vect_lhs)))
    4349                 :          0 :             vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
    4350                 :            :                                vect_lhs);
    4351                 :        228 :           rstmt = gimple_build_assign (root_lhs, vect_lhs);
    4352                 :        228 :           break;
    4353                 :            :         }
    4354                 :            :     }
    4355                 :          0 :   else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
    4356                 :            :     {
    4357                 :          0 :       int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
    4358                 :          0 :       stmt_vec_info child_stmt_info;
    4359                 :          0 :       int j;
    4360                 :          0 :       vec<constructor_elt, va_gc> *v;
    4361                 :          0 :       vec_alloc (v, nelts);
    4362                 :            : 
    4363                 :          0 :       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
    4364                 :            :         {
    4365                 :          0 :           CONSTRUCTOR_APPEND_ELT (v,
    4366                 :            :                                   NULL_TREE,
    4367                 :            :                                   gimple_get_lhs (child_stmt_info->stmt));
    4368                 :            :         }
    4369                 :          0 :       tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
    4370                 :          0 :       tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
    4371                 :          0 :       tree r_constructor = build_constructor (rtype, v);
    4372                 :          0 :       rstmt = gimple_build_assign (lhs, r_constructor);
    4373                 :            :     }
    4374                 :            : 
    4375                 :        228 :     gcc_assert (rstmt);
    4376                 :            : 
    4377                 :        228 :     gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
    4378                 :        228 :     gsi_replace (&rgsi, rstmt, true);
    4379                 :        228 : }
    4380                 :            : 
    4381                 :            : /* Generate vector code for all SLP instances in the loop/basic block.  */
    4382                 :            : 
    4383                 :            : void
    4384                 :      29578 : vect_schedule_slp (vec_info *vinfo)
    4385                 :            : {
    4386                 :      29578 :   vec<slp_instance> slp_instances;
    4387                 :      29578 :   slp_instance instance;
    4388                 :      29578 :   unsigned int i;
    4389                 :            : 
    4390                 :      29578 :   slp_instances = vinfo->slp_instances;
    4391                 :      79683 :   FOR_EACH_VEC_ELT (slp_instances, i, instance)
    4392                 :            :     {
    4393                 :      50105 :       slp_tree node = SLP_INSTANCE_TREE (instance);
    4394                 :            :       /* Schedule the tree of INSTANCE.  */
    4395                 :      50105 :       vect_schedule_slp_instance (node, instance);
    4396                 :            : 
    4397                 :      50105 :       if (SLP_INSTANCE_ROOT_STMT (instance))
    4398                 :        228 :         vectorize_slp_instance_root_stmt (node, instance);
    4399                 :            : 
    4400                 :      50105 :       if (dump_enabled_p ())
    4401                 :       1301 :         dump_printf_loc (MSG_NOTE, vect_location,
    4402                 :            :                          "vectorizing stmts using SLP.\n");
    4403                 :            :     }
    4404                 :            : 
    4405                 :      79683 :   FOR_EACH_VEC_ELT (slp_instances, i, instance)
    4406                 :            :     {
    4407                 :      50105 :       slp_tree root = SLP_INSTANCE_TREE (instance);
    4408                 :      50105 :       stmt_vec_info store_info;
    4409                 :      50105 :       unsigned int j;
    4410                 :            : 
    4411                 :            :       /* Remove scalar call stmts.  Do not do this for basic-block
    4412                 :            :          vectorization as not all uses may be vectorized.
    4413                 :            :          ???  Why should this be necessary?  DCE should be able to
    4414                 :            :          remove the stmts itself.
    4415                 :            :          ???  For BB vectorization we can as well remove scalar
    4416                 :            :          stmts starting from the SLP tree root if they have no
    4417                 :            :          uses.  */
    4418                 :      50105 :       if (is_a <loop_vec_info> (vinfo))
    4419                 :       2236 :         vect_remove_slp_scalar_calls (root);
    4420                 :            : 
    4421                 :     218139 :       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
    4422                 :     386693 :                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
    4423                 :            :         {
    4424                 :     168554 :           if (!STMT_VINFO_DATA_REF (store_info))
    4425                 :            :             break;
    4426                 :            : 
    4427                 :     168034 :           if (SLP_INSTANCE_ROOT_STMT (instance))
    4428                 :        394 :             continue;
    4429                 :            : 
    4430                 :     167640 :           store_info = vect_orig_stmt (store_info);
    4431                 :            :           /* Free the attached stmt_vec_info and remove the stmt.  */
    4432                 :     167640 :           vinfo->remove_stmt (store_info);
    4433                 :            :         }
    4434                 :            :     }
    4435                 :      29578 : }

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.