LCOV - code coverage report
Current view: top level - gcc - tree-vect-loop-manip.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 1052 1380 76.2 %
Date: 2020-04-04 11:58:09 Functions: 37 46 80.4 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* Vectorizer Specific Loop Manipulations
       2                 :            :    Copyright (C) 2003-2020 Free Software Foundation, Inc.
       3                 :            :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4                 :            :    and Ira Rosen <irar@il.ibm.com>
       5                 :            : 
       6                 :            : This file is part of GCC.
       7                 :            : 
       8                 :            : GCC is free software; you can redistribute it and/or modify it under
       9                 :            : the terms of the GNU General Public License as published by the Free
      10                 :            : Software Foundation; either version 3, or (at your option) any later
      11                 :            : version.
      12                 :            : 
      13                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16                 :            : for more details.
      17                 :            : 
      18                 :            : You should have received a copy of the GNU General Public License
      19                 :            : along with GCC; see the file COPYING3.  If not see
      20                 :            : <http://www.gnu.org/licenses/>.  */
      21                 :            : 
      22                 :            : #include "config.h"
      23                 :            : #include "system.h"
      24                 :            : #include "coretypes.h"
      25                 :            : #include "backend.h"
      26                 :            : #include "tree.h"
      27                 :            : #include "gimple.h"
      28                 :            : #include "cfghooks.h"
      29                 :            : #include "tree-pass.h"
      30                 :            : #include "ssa.h"
      31                 :            : #include "fold-const.h"
      32                 :            : #include "cfganal.h"
      33                 :            : #include "gimplify.h"
      34                 :            : #include "gimple-iterator.h"
      35                 :            : #include "gimplify-me.h"
      36                 :            : #include "tree-cfg.h"
      37                 :            : #include "tree-ssa-loop-manip.h"
      38                 :            : #include "tree-into-ssa.h"
      39                 :            : #include "tree-ssa.h"
      40                 :            : #include "cfgloop.h"
      41                 :            : #include "tree-scalar-evolution.h"
      42                 :            : #include "tree-vectorizer.h"
      43                 :            : #include "tree-ssa-loop-ivopts.h"
      44                 :            : #include "gimple-fold.h"
      45                 :            : #include "tree-ssa-loop-niter.h"
      46                 :            : #include "internal-fn.h"
      47                 :            : #include "stor-layout.h"
      48                 :            : #include "optabs-query.h"
      49                 :            : #include "vec-perm-indices.h"
      50                 :            : #include "insn-config.h"
      51                 :            : #include "rtl.h"
      52                 :            : #include "recog.h"
      53                 :            : 
      54                 :            : /*************************************************************************
      55                 :            :   Simple Loop Peeling Utilities
      56                 :            : 
      57                 :            :   Utilities to support loop peeling for vectorization purposes.
      58                 :            :  *************************************************************************/
      59                 :            : 
      60                 :            : 
      61                 :            : /* Renames the use *OP_P.  */
      62                 :            : 
      63                 :            : static void
      64                 :     411530 : rename_use_op (use_operand_p op_p)
      65                 :            : {
      66                 :     411530 :   tree new_name;
      67                 :            : 
      68                 :     411530 :   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
      69                 :            :     return;
      70                 :            : 
      71                 :     411302 :   new_name = get_current_def (USE_FROM_PTR (op_p));
      72                 :            : 
      73                 :            :   /* Something defined outside of the loop.  */
      74                 :     411302 :   if (!new_name)
      75                 :            :     return;
      76                 :            : 
      77                 :            :   /* An ordinary ssa name defined in the loop.  */
      78                 :            : 
      79                 :     353838 :   SET_USE (op_p, new_name);
      80                 :            : }
      81                 :            : 
      82                 :            : 
      83                 :            : /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
      84                 :            :    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
      85                 :            :    true.  */
      86                 :            : 
      87                 :            : static void
      88                 :      38656 : rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
      89                 :            : {
      90                 :      38656 :   gimple *stmt;
      91                 :      38656 :   use_operand_p use_p;
      92                 :      38656 :   ssa_op_iter iter;
      93                 :      38656 :   edge e;
      94                 :      38656 :   edge_iterator ei;
      95                 :      38656 :   class loop *loop = bb->loop_father;
      96                 :      38656 :   class loop *outer_loop = NULL;
      97                 :            : 
      98                 :      38656 :   if (rename_from_outer_loop)
      99                 :            :     {
     100                 :        253 :       gcc_assert (loop);
     101                 :        253 :       outer_loop = loop_outer (loop);
     102                 :            :     }
     103                 :            : 
     104                 :     324455 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
     105                 :     247143 :        gsi_next (&gsi))
     106                 :            :     {
     107                 :     247143 :       stmt = gsi_stmt (gsi);
     108                 :     614327 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
     109                 :     367184 :         rename_use_op (use_p);
     110                 :            :     }
     111                 :            : 
     112                 :      90606 :   FOR_EACH_EDGE (e, ei, bb->preds)
     113                 :            :     {
     114                 :      51950 :       if (!flow_bb_inside_loop_p (loop, e->src))
     115                 :            :         {
     116                 :      12860 :           if (!rename_from_outer_loop)
     117                 :      12782 :             continue;
     118                 :         78 :           if (e->src != outer_loop->header)
     119                 :            :             {
     120                 :         43 :               if (outer_loop->inner->next)
     121                 :            :                 {
     122                 :            :                   /* If outer_loop has 2 inner loops, allow there to
     123                 :            :                      be an extra basic block which decides which of the
     124                 :            :                      two loops to use using LOOP_VECTORIZED.  */
     125                 :         42 :                   if (!single_pred_p (e->src)
     126                 :         42 :                       || single_pred (e->src) != outer_loop->header)
     127                 :         32 :                     continue;
     128                 :            :                 }
     129                 :            :             }
     130                 :            :         }
     131                 :      80592 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
     132                 :      41456 :            gsi_next (&gsi))
     133                 :      41456 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     134                 :            :     }
     135                 :      38656 : }
     136                 :            : 
     137                 :            : 
     138                 :            : struct adjust_info
     139                 :            : {
     140                 :            :   tree from, to;
     141                 :            :   basic_block bb;
     142                 :            : };
     143                 :            : 
     144                 :            : /* A stack of values to be adjusted in debug stmts.  We have to
     145                 :            :    process them LIFO, so that the closest substitution applies.  If we
     146                 :            :    processed them FIFO, without the stack, we might substitute uses
     147                 :            :    with a PHI DEF that would soon become non-dominant, and when we got
     148                 :            :    to the suitable one, it wouldn't have anything to substitute any
     149                 :            :    more.  */
     150                 :            : static vec<adjust_info, va_heap> adjust_vec;
     151                 :            : 
     152                 :            : /* Adjust any debug stmts that referenced AI->from values to use the
     153                 :            :    loop-closed AI->to, if the references are dominated by AI->bb and
     154                 :            :    not by the definition of AI->from.  */
     155                 :            : 
     156                 :            : static void
     157                 :      37325 : adjust_debug_stmts_now (adjust_info *ai)
     158                 :            : {
     159                 :      37325 :   basic_block bbphi = ai->bb;
     160                 :      37325 :   tree orig_def = ai->from;
     161                 :      37325 :   tree new_def = ai->to;
     162                 :      37325 :   imm_use_iterator imm_iter;
     163                 :      37325 :   gimple *stmt;
     164                 :      37325 :   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
     165                 :            : 
     166                 :      37325 :   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
     167                 :            : 
     168                 :            :   /* Adjust any debug stmts that held onto non-loop-closed
     169                 :            :      references.  */
     170                 :     197492 :   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
     171                 :            :     {
     172                 :     160167 :       use_operand_p use_p;
     173                 :     160167 :       basic_block bbuse;
     174                 :            : 
     175                 :     160167 :       if (!is_gimple_debug (stmt))
     176                 :     119584 :         continue;
     177                 :            : 
     178                 :      40583 :       gcc_assert (gimple_debug_bind_p (stmt));
     179                 :            : 
     180                 :      40583 :       bbuse = gimple_bb (stmt);
     181                 :            : 
     182                 :      40583 :       if ((bbuse == bbphi
     183                 :      40583 :            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
     184                 :      40583 :           && !(bbuse == bbdef
     185                 :          0 :                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
     186                 :            :         {
     187                 :          0 :           if (new_def)
     188                 :          0 :             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
     189                 :          0 :               SET_USE (use_p, new_def);
     190                 :            :           else
     191                 :            :             {
     192                 :          0 :               gimple_debug_bind_reset_value (stmt);
     193                 :     160167 :               update_stmt (stmt);
     194                 :            :             }
     195                 :            :         }
     196                 :            :     }
     197                 :      37325 : }
     198                 :            : 
     199                 :            : /* Adjust debug stmts as scheduled before.  */
     200                 :            : 
     201                 :            : static void
     202                 :      12178 : adjust_vec_debug_stmts (void)
     203                 :            : {
     204                 :      12178 :   if (!MAY_HAVE_DEBUG_BIND_STMTS)
     205                 :            :     return;
     206                 :            : 
     207                 :       3724 :   gcc_assert (adjust_vec.exists ());
     208                 :            : 
     209                 :      41046 :   while (!adjust_vec.is_empty ())
     210                 :            :     {
     211                 :      37322 :       adjust_debug_stmts_now (&adjust_vec.last ());
     212                 :      37322 :       adjust_vec.pop ();
     213                 :            :     }
     214                 :            : }
     215                 :            : 
     216                 :            : /* Adjust any debug stmts that referenced FROM values to use the
     217                 :            :    loop-closed TO, if the references are dominated by BB and not by
     218                 :            :    the definition of FROM.  If adjust_vec is non-NULL, adjustments
     219                 :            :    will be postponed until adjust_vec_debug_stmts is called.  */
     220                 :            : 
     221                 :            : static void
     222                 :      45185 : adjust_debug_stmts (tree from, tree to, basic_block bb)
     223                 :            : {
     224                 :      45185 :   adjust_info ai;
     225                 :            : 
     226                 :      45185 :   if (MAY_HAVE_DEBUG_BIND_STMTS
     227                 :      45185 :       && TREE_CODE (from) == SSA_NAME
     228                 :      42325 :       && ! SSA_NAME_IS_DEFAULT_DEF (from)
     229                 :      87008 :       && ! virtual_operand_p (from))
     230                 :            :     {
     231                 :      37325 :       ai.from = from;
     232                 :      37325 :       ai.to = to;
     233                 :      37325 :       ai.bb = bb;
     234                 :            : 
     235                 :      37325 :       if (adjust_vec.exists ())
     236                 :      37322 :         adjust_vec.safe_push (ai);
     237                 :            :       else
     238                 :          3 :         adjust_debug_stmts_now (&ai);
     239                 :            :     }
     240                 :      45185 : }
     241                 :            : 
     242                 :            : /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
     243                 :            :    to adjust any debug stmts that referenced the old phi arg,
     244                 :            :    presumably non-loop-closed references left over from other
     245                 :            :    transformations.  */
     246                 :            : 
     247                 :            : static void
     248                 :     104737 : adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
     249                 :            : {
     250                 :     104737 :   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
     251                 :            : 
     252                 :     104737 :   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
     253                 :            : 
     254                 :     104737 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
     255                 :      45185 :     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
     256                 :            :                         gimple_bb (update_phi));
     257                 :     104737 : }
     258                 :            : 
     259                 :            : /* Define one loop mask MASK from loop LOOP.  INIT_MASK is the value that
     260                 :            :    the mask should have during the first iteration and NEXT_MASK is the
     261                 :            :    value that it should have on subsequent iterations.  */
     262                 :            : 
     263                 :            : static void
     264                 :          0 : vect_set_loop_mask (class loop *loop, tree mask, tree init_mask,
     265                 :            :                     tree next_mask)
     266                 :            : {
     267                 :          0 :   gphi *phi = create_phi_node (mask, loop->header);
     268                 :          0 :   add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
     269                 :          0 :   add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
     270                 :          0 : }
     271                 :            : 
     272                 :            : /* Add SEQ to the end of LOOP's preheader block.  */
     273                 :            : 
     274                 :            : static void
     275                 :          0 : add_preheader_seq (class loop *loop, gimple_seq seq)
     276                 :            : {
     277                 :          0 :   if (seq)
     278                 :            :     {
     279                 :          0 :       edge pe = loop_preheader_edge (loop);
     280                 :          0 :       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     281                 :          0 :       gcc_assert (!new_bb);
     282                 :            :     }
     283                 :          0 : }
     284                 :            : 
     285                 :            : /* Add SEQ to the beginning of LOOP's header block.  */
     286                 :            : 
     287                 :            : static void
     288                 :          0 : add_header_seq (class loop *loop, gimple_seq seq)
     289                 :            : {
     290                 :          0 :   if (seq)
     291                 :            :     {
     292                 :          0 :       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
     293                 :          0 :       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
     294                 :            :     }
     295                 :          0 : }
     296                 :            : 
     297                 :            : /* Return true if the target can interleave elements of two vectors.
     298                 :            :    OFFSET is 0 if the first half of the vectors should be interleaved
     299                 :            :    or 1 if the second half should.  When returning true, store the
     300                 :            :    associated permutation in INDICES.  */
     301                 :            : 
     302                 :            : static bool
     303                 :          0 : interleave_supported_p (vec_perm_indices *indices, tree vectype,
     304                 :            :                         unsigned int offset)
     305                 :            : {
     306                 :          0 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
     307                 :          0 :   poly_uint64 base = exact_div (nelts, 2) * offset;
     308                 :          0 :   vec_perm_builder sel (nelts, 2, 3);
     309                 :          0 :   for (unsigned int i = 0; i < 3; ++i)
     310                 :            :     {
     311                 :          0 :       sel.quick_push (base + i);
     312                 :          0 :       sel.quick_push (base + i + nelts);
     313                 :            :     }
     314                 :          0 :   indices->new_vector (sel, 2, nelts);
     315                 :          0 :   return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
     316                 :            : }
     317                 :            : 
     318                 :            : /* Try to use permutes to define the masks in DEST_RGM using the masks
     319                 :            :    in SRC_RGM, given that the former has twice as many masks as the
     320                 :            :    latter.  Return true on success, adding any new statements to SEQ.  */
     321                 :            : 
     322                 :            : static bool
     323                 :          0 : vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
     324                 :            :                                rgroup_masks *src_rgm)
     325                 :            : {
     326                 :          0 :   tree src_masktype = src_rgm->mask_type;
     327                 :          0 :   tree dest_masktype = dest_rgm->mask_type;
     328                 :          0 :   machine_mode src_mode = TYPE_MODE (src_masktype);
     329                 :          0 :   insn_code icode1, icode2;
     330                 :          0 :   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
     331                 :          0 :       && (icode1 = optab_handler (vec_unpacku_hi_optab,
     332                 :            :                                   src_mode)) != CODE_FOR_nothing
     333                 :          0 :       && (icode2 = optab_handler (vec_unpacku_lo_optab,
     334                 :            :                                   src_mode)) != CODE_FOR_nothing)
     335                 :            :     {
     336                 :            :       /* Unpacking the source masks gives at least as many mask bits as
     337                 :            :          we need.  We can then VIEW_CONVERT any excess bits away.  */
     338                 :          0 :       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
     339                 :          0 :       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
     340                 :          0 :       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
     341                 :          0 :       for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
     342                 :            :         {
     343                 :          0 :           tree src = src_rgm->masks[i / 2];
     344                 :          0 :           tree dest = dest_rgm->masks[i];
     345                 :          0 :           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
     346                 :          0 :                             ? VEC_UNPACK_HI_EXPR
     347                 :            :                             : VEC_UNPACK_LO_EXPR);
     348                 :          0 :           gassign *stmt;
     349                 :          0 :           if (dest_masktype == unpack_masktype)
     350                 :          0 :             stmt = gimple_build_assign (dest, code, src);
     351                 :            :           else
     352                 :            :             {
     353                 :          0 :               tree temp = make_ssa_name (unpack_masktype);
     354                 :          0 :               stmt = gimple_build_assign (temp, code, src);
     355                 :          0 :               gimple_seq_add_stmt (seq, stmt);
     356                 :          0 :               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
     357                 :            :                                           build1 (VIEW_CONVERT_EXPR,
     358                 :            :                                                   dest_masktype, temp));
     359                 :            :             }
     360                 :          0 :           gimple_seq_add_stmt (seq, stmt);
     361                 :            :         }
     362                 :            :       return true;
     363                 :            :     }
     364                 :          0 :   vec_perm_indices indices[2];
     365                 :          0 :   if (dest_masktype == src_masktype
     366                 :          0 :       && interleave_supported_p (&indices[0], src_masktype, 0)
     367                 :          0 :       && interleave_supported_p (&indices[1], src_masktype, 1))
     368                 :            :     {
     369                 :            :       /* The destination requires twice as many mask bits as the source, so
     370                 :            :          we can use interleaving permutes to double up the number of bits.  */
     371                 :            :       tree masks[2];
     372                 :          0 :       for (unsigned int i = 0; i < 2; ++i)
     373                 :          0 :         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
     374                 :          0 :       for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
     375                 :            :         {
     376                 :          0 :           tree src = src_rgm->masks[i / 2];
     377                 :          0 :           tree dest = dest_rgm->masks[i];
     378                 :          0 :           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
     379                 :          0 :                                               src, src, masks[i & 1]);
     380                 :          0 :           gimple_seq_add_stmt (seq, stmt);
     381                 :            :         }
     382                 :          0 :       return true;
     383                 :            :     }
     384                 :            :   return false;
     385                 :            : }
     386                 :            : 
     387                 :            : /* Helper for vect_set_loop_condition_masked.  Generate definitions for
     388                 :            :    all the masks in RGM and return a mask that is nonzero when the loop
     389                 :            :    needs to iterate.  Add any new preheader statements to PREHEADER_SEQ.
     390                 :            :    Use LOOP_COND_GSI to insert code before the exit gcond.
     391                 :            : 
     392                 :            :    RGM belongs to loop LOOP.  The loop originally iterated NITERS
     393                 :            :    times and has been vectorized according to LOOP_VINFO.
     394                 :            : 
     395                 :            :    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
     396                 :            :    starts with NITERS_SKIP dummy iterations of the scalar loop before
     397                 :            :    the real work starts.  The mask elements for these dummy iterations
     398                 :            :    must be 0, to ensure that the extra iterations do not have an effect.
     399                 :            : 
     400                 :            :    It is known that:
     401                 :            : 
     402                 :            :      NITERS * RGM->max_nscalars_per_iter
     403                 :            : 
     404                 :            :    does not overflow.  However, MIGHT_WRAP_P says whether an induction
     405                 :            :    variable that starts at 0 and has step:
     406                 :            : 
     407                 :            :      VF * RGM->max_nscalars_per_iter
     408                 :            : 
     409                 :            :    might overflow before hitting a value above:
     410                 :            : 
     411                 :            :      (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
     412                 :            : 
     413                 :            :    This means that we cannot guarantee that such an induction variable
     414                 :            :    would ever hit a value that produces a set of all-false masks for RGM.  */
     415                 :            : 
     416                 :            : static tree
     417                 :          0 : vect_set_loop_masks_directly (class loop *loop, loop_vec_info loop_vinfo,
     418                 :            :                               gimple_seq *preheader_seq,
     419                 :            :                               gimple_stmt_iterator loop_cond_gsi,
     420                 :            :                               rgroup_masks *rgm, tree niters, tree niters_skip,
     421                 :            :                               bool might_wrap_p)
     422                 :            : {
     423                 :          0 :   tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
     424                 :          0 :   tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
     425                 :          0 :   tree mask_type = rgm->mask_type;
     426                 :          0 :   unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
     427                 :          0 :   poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
     428                 :          0 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     429                 :            : 
     430                 :            :   /* Calculate the maximum number of scalar values that the rgroup
     431                 :            :      handles in total, the number that it handles for each iteration
     432                 :            :      of the vector loop, and the number that it should skip during the
     433                 :            :      first iteration of the vector loop.  */
     434                 :          0 :   tree nscalars_total = niters;
     435                 :          0 :   tree nscalars_step = build_int_cst (iv_type, vf);
     436                 :          0 :   tree nscalars_skip = niters_skip;
     437                 :          0 :   if (nscalars_per_iter != 1)
     438                 :            :     {
     439                 :            :       /* We checked before choosing to use a fully-masked loop that these
     440                 :            :          multiplications don't overflow.  */
     441                 :          0 :       tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
     442                 :          0 :       tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
     443                 :          0 :       nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     444                 :            :                                      nscalars_total, compare_factor);
     445                 :          0 :       nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
     446                 :            :                                     nscalars_step, iv_factor);
     447                 :          0 :       if (nscalars_skip)
     448                 :          0 :         nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     449                 :            :                                       nscalars_skip, compare_factor);
     450                 :            :     }
     451                 :            : 
     452                 :            :   /* Create an induction variable that counts the number of scalars
     453                 :            :      processed.  */
     454                 :          0 :   tree index_before_incr, index_after_incr;
     455                 :          0 :   gimple_stmt_iterator incr_gsi;
     456                 :          0 :   bool insert_after;
     457                 :          0 :   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
     458                 :          0 :   create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
     459                 :            :              &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
     460                 :            : 
     461                 :          0 :   tree zero_index = build_int_cst (compare_type, 0);
     462                 :          0 :   tree test_index, test_limit, first_limit;
     463                 :          0 :   gimple_stmt_iterator *test_gsi;
     464                 :          0 :   if (might_wrap_p)
     465                 :            :     {
     466                 :            :       /* In principle the loop should stop iterating once the incremented
     467                 :            :          IV reaches a value greater than or equal to:
     468                 :            : 
     469                 :            :            NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
     470                 :            : 
     471                 :            :          However, there's no guarantee that this addition doesn't overflow
     472                 :            :          the comparison type, or that the IV hits a value above it before
     473                 :            :          wrapping around.  We therefore adjust the limit down by one
     474                 :            :          IV step:
     475                 :            : 
     476                 :            :            (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
     477                 :            :            -[infinite-prec] NSCALARS_STEP
     478                 :            : 
     479                 :            :          and compare the IV against this limit _before_ incrementing it.
     480                 :            :          Since the comparison type is unsigned, we actually want the
     481                 :            :          subtraction to saturate at zero:
     482                 :            : 
     483                 :            :            (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
     484                 :            :            -[sat] NSCALARS_STEP
     485                 :            : 
     486                 :            :          And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
     487                 :            : 
     488                 :            :            NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
     489                 :            : 
     490                 :            :          where the rightmost subtraction can be done directly in
     491                 :            :          COMPARE_TYPE.  */
     492                 :          0 :       test_index = index_before_incr;
     493                 :          0 :       tree adjust = gimple_convert (preheader_seq, compare_type,
     494                 :            :                                     nscalars_step);
     495                 :          0 :       if (nscalars_skip)
     496                 :          0 :         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     497                 :            :                                adjust, nscalars_skip);
     498                 :          0 :       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
     499                 :            :                                  nscalars_total, adjust);
     500                 :          0 :       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     501                 :            :                                  test_limit, adjust);
     502                 :          0 :       test_gsi = &incr_gsi;
     503                 :            : 
     504                 :            :       /* Get a safe limit for the first iteration.  */
     505                 :          0 :       if (nscalars_skip)
     506                 :            :         {
     507                 :            :           /* The first vector iteration can handle at most NSCALARS_STEP
     508                 :            :              scalars.  NSCALARS_STEP <= CONST_LIMIT, and adding
     509                 :            :              NSCALARS_SKIP to that cannot overflow.  */
     510                 :          0 :           tree const_limit = build_int_cst (compare_type,
     511                 :            :                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
     512                 :          0 :                                             * nscalars_per_iter);
     513                 :          0 :           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
     514                 :            :                                       nscalars_total, const_limit);
     515                 :          0 :           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     516                 :            :                                       first_limit, nscalars_skip);
     517                 :            :         }
     518                 :            :       else
     519                 :            :         /* For the first iteration it doesn't matter whether the IV hits
     520                 :            :            a value above NSCALARS_TOTAL.  That only matters for the latch
     521                 :            :            condition.  */
     522                 :            :         first_limit = nscalars_total;
     523                 :            :     }
     524                 :            :   else
     525                 :            :     {
     526                 :            :       /* Test the incremented IV, which will always hit a value above
     527                 :            :          the bound before wrapping.  */
     528                 :          0 :       test_index = index_after_incr;
     529                 :          0 :       test_limit = nscalars_total;
     530                 :          0 :       if (nscalars_skip)
     531                 :          0 :         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     532                 :            :                                    test_limit, nscalars_skip);
     533                 :            :       test_gsi = &loop_cond_gsi;
     534                 :            : 
     535                 :            :       first_limit = test_limit;
     536                 :            :     }
     537                 :            : 
     538                 :            :   /* Convert the IV value to the comparison type (either a no-op or
     539                 :            :      a demotion).  */
     540                 :          0 :   gimple_seq test_seq = NULL;
     541                 :          0 :   test_index = gimple_convert (&test_seq, compare_type, test_index);
     542                 :          0 :   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
     543                 :            : 
     544                 :            :   /* Provide a definition of each mask in the group.  */
     545                 :          0 :   tree next_mask = NULL_TREE;
     546                 :          0 :   tree mask;
     547                 :          0 :   unsigned int i;
     548                 :          0 :   FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
     549                 :            :     {
     550                 :            :       /* Previous masks will cover BIAS scalars.  This mask covers the
     551                 :            :          next batch.  */
     552                 :          0 :       poly_uint64 bias = nscalars_per_mask * i;
     553                 :          0 :       tree bias_tree = build_int_cst (compare_type, bias);
     554                 :          0 :       gimple *tmp_stmt;
     555                 :            : 
     556                 :            :       /* See whether the first iteration of the vector loop is known
     557                 :            :          to have a full mask.  */
     558                 :          0 :       poly_uint64 const_limit;
     559                 :          0 :       bool first_iteration_full
     560                 :          0 :         = (poly_int_tree_p (first_limit, &const_limit)
     561                 :          0 :            && known_ge (const_limit, (i + 1) * nscalars_per_mask));
     562                 :            : 
     563                 :            :       /* Rather than have a new IV that starts at BIAS and goes up to
     564                 :            :          TEST_LIMIT, prefer to use the same 0-based IV for each mask
     565                 :            :          and adjust the bound down by BIAS.  */
     566                 :          0 :       tree this_test_limit = test_limit;
     567                 :          0 :       if (i != 0)
     568                 :            :         {
     569                 :          0 :           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
     570                 :            :                                           compare_type, this_test_limit,
     571                 :            :                                           bias_tree);
     572                 :          0 :           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
     573                 :            :                                           compare_type, this_test_limit,
     574                 :            :                                           bias_tree);
     575                 :            :         }
     576                 :            : 
     577                 :            :       /* Create the initial mask.  First include all scalars that
     578                 :            :          are within the loop limit.  */
     579                 :          0 :       tree init_mask = NULL_TREE;
     580                 :          0 :       if (!first_iteration_full)
     581                 :            :         {
     582                 :          0 :           tree start, end;
     583                 :          0 :           if (first_limit == test_limit)
     584                 :            :             {
     585                 :            :               /* Use a natural test between zero (the initial IV value)
     586                 :            :                  and the loop limit.  The "else" block would be valid too,
     587                 :            :                  but this choice can avoid the need to load BIAS_TREE into
     588                 :            :                  a register.  */
     589                 :            :               start = zero_index;
     590                 :            :               end = this_test_limit;
     591                 :            :             }
     592                 :            :           else
     593                 :            :             {
     594                 :            :               /* FIRST_LIMIT is the maximum number of scalars handled by the
     595                 :            :                  first iteration of the vector loop.  Test the portion
     596                 :            :                  associated with this mask.  */
     597                 :          0 :               start = bias_tree;
     598                 :          0 :               end = first_limit;
     599                 :            :             }
     600                 :            : 
     601                 :          0 :           init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
     602                 :          0 :           tmp_stmt = vect_gen_while (init_mask, start, end);
     603                 :          0 :           gimple_seq_add_stmt (preheader_seq, tmp_stmt);
     604                 :            :         }
     605                 :            : 
     606                 :            :       /* Now AND out the bits that are within the number of skipped
     607                 :            :          scalars.  */
     608                 :          0 :       poly_uint64 const_skip;
     609                 :          0 :       if (nscalars_skip
     610                 :          0 :           && !(poly_int_tree_p (nscalars_skip, &const_skip)
     611                 :          0 :                && known_le (const_skip, bias)))
     612                 :            :         {
     613                 :          0 :           tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
     614                 :            :                                                     bias_tree, nscalars_skip);
     615                 :          0 :           if (init_mask)
     616                 :          0 :             init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
     617                 :            :                                       init_mask, unskipped_mask);
     618                 :            :           else
     619                 :            :             init_mask = unskipped_mask;
     620                 :            :         }
     621                 :            : 
     622                 :          0 :       if (!init_mask)
     623                 :            :         /* First iteration is full.  */
     624                 :          0 :         init_mask = build_minus_one_cst (mask_type);
     625                 :            : 
     626                 :            :       /* Get the mask value for the next iteration of the loop.  */
     627                 :          0 :       next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
     628                 :          0 :       gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
     629                 :          0 :       gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
     630                 :            : 
     631                 :          0 :       vect_set_loop_mask (loop, mask, init_mask, next_mask);
     632                 :            :     }
     633                 :          0 :   return next_mask;
     634                 :            : }
     635                 :            : 
     636                 :            : /* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
     637                 :            :    LOOP_VINFO describes the vectorization of LOOP.  NITERS is the
     638                 :            :    number of iterations of the original scalar loop that should be
     639                 :            :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are
     640                 :            :    as for vect_set_loop_condition.
     641                 :            : 
     642                 :            :    Insert the branch-back condition before LOOP_COND_GSI and return the
     643                 :            :    final gcond.  */
     644                 :            : 
     645                 :            : static gcond *
     646                 :          0 : vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
     647                 :            :                                 tree niters, tree final_iv,
     648                 :            :                                 bool niters_maybe_zero,
     649                 :            :                                 gimple_stmt_iterator loop_cond_gsi)
     650                 :            : {
     651                 :          0 :   gimple_seq preheader_seq = NULL;
     652                 :          0 :   gimple_seq header_seq = NULL;
     653                 :            : 
     654                 :          0 :   tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
     655                 :          0 :   unsigned int compare_precision = TYPE_PRECISION (compare_type);
     656                 :          0 :   tree orig_niters = niters;
     657                 :            : 
     658                 :            :   /* Type of the initial value of NITERS.  */
     659                 :          0 :   tree ni_actual_type = TREE_TYPE (niters);
     660                 :          0 :   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
     661                 :          0 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     662                 :            : 
     663                 :            :   /* Convert NITERS to the same size as the compare.  */
     664                 :          0 :   if (compare_precision > ni_actual_precision
     665                 :          0 :       && niters_maybe_zero)
     666                 :            :     {
     667                 :            :       /* We know that there is always at least one iteration, so if the
     668                 :            :          count is zero then it must have wrapped.  Cope with this by
     669                 :            :          subtracting 1 before the conversion and adding 1 to the result.  */
     670                 :          0 :       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
     671                 :          0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
     672                 :            :                              niters, build_minus_one_cst (ni_actual_type));
     673                 :          0 :       niters = gimple_convert (&preheader_seq, compare_type, niters);
     674                 :          0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
     675                 :            :                              niters, build_one_cst (compare_type));
     676                 :            :     }
     677                 :            :   else
     678                 :          0 :     niters = gimple_convert (&preheader_seq, compare_type, niters);
     679                 :            : 
     680                 :          0 :   widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
     681                 :            : 
     682                 :            :   /* Iterate over all the rgroups and fill in their masks.  We could use
     683                 :            :      the first mask from any rgroup for the loop condition; here we
     684                 :            :      arbitrarily pick the last.  */
     685                 :          0 :   tree test_mask = NULL_TREE;
     686                 :          0 :   rgroup_masks *rgm;
     687                 :          0 :   unsigned int i;
     688                 :          0 :   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
     689                 :          0 :   FOR_EACH_VEC_ELT (*masks, i, rgm)
     690                 :          0 :     if (!rgm->masks.is_empty ())
     691                 :            :       {
     692                 :            :         /* First try using permutes.  This adds a single vector
     693                 :            :            instruction to the loop for each mask, but needs no extra
     694                 :            :            loop invariants or IVs.  */
     695                 :          0 :         unsigned int nmasks = i + 1;
     696                 :          0 :         if ((nmasks & 1) == 0)
     697                 :            :           {
     698                 :          0 :             rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
     699                 :          0 :             if (!half_rgm->masks.is_empty ()
     700                 :          0 :                 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
     701                 :          0 :               continue;
     702                 :            :           }
     703                 :            : 
     704                 :            :         /* See whether zero-based IV would ever generate all-false masks
     705                 :            :            before wrapping around.  */
     706                 :          0 :         bool might_wrap_p
     707                 :          0 :           = (iv_limit == -1
     708                 :          0 :              || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
     709                 :            :                                     UNSIGNED)
     710                 :          0 :                  > compare_precision));
     711                 :            : 
     712                 :            :         /* Set up all masks for this group.  */
     713                 :          0 :         test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
     714                 :            :                                                   &preheader_seq,
     715                 :            :                                                   loop_cond_gsi, rgm,
     716                 :            :                                                   niters, niters_skip,
     717                 :            :                                                   might_wrap_p);
     718                 :            :       }
     719                 :            : 
     720                 :            :   /* Emit all accumulated statements.  */
     721                 :          0 :   add_preheader_seq (loop, preheader_seq);
     722                 :          0 :   add_header_seq (loop, header_seq);
     723                 :            : 
     724                 :            :   /* Get a boolean result that tells us whether to iterate.  */
     725                 :          0 :   edge exit_edge = single_exit (loop);
     726                 :          0 :   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     727                 :          0 :   tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
     728                 :          0 :   gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
     729                 :            :                                         NULL_TREE, NULL_TREE);
     730                 :          0 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     731                 :            : 
     732                 :            :   /* The loop iterates (NITERS - 1) / VF + 1 times.
     733                 :            :      Subtract one from this to get the latch count.  */
     734                 :          0 :   tree step = build_int_cst (compare_type,
     735                 :          0 :                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     736                 :          0 :   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
     737                 :            :                                        build_minus_one_cst (compare_type));
     738                 :          0 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
     739                 :            :                                      niters_minus_one, step);
     740                 :            : 
     741                 :          0 :   if (final_iv)
     742                 :            :     {
     743                 :          0 :       gassign *assign = gimple_build_assign (final_iv, orig_niters);
     744                 :          0 :       gsi_insert_on_edge_immediate (single_exit (loop), assign);
     745                 :            :     }
     746                 :            : 
     747                 :          0 :   return cond_stmt;
     748                 :            : }
     749                 :            : 
     750                 :            : /* Like vect_set_loop_condition, but handle the case in which there
     751                 :            :    are no loop masks.  */
     752                 :            : 
     753                 :            : static gcond *
     754                 :      21576 : vect_set_loop_condition_unmasked (class loop *loop, tree niters,
     755                 :            :                                   tree step, tree final_iv,
     756                 :            :                                   bool niters_maybe_zero,
     757                 :            :                                   gimple_stmt_iterator loop_cond_gsi)
     758                 :            : {
     759                 :      21576 :   tree indx_before_incr, indx_after_incr;
     760                 :      21576 :   gcond *cond_stmt;
     761                 :      21576 :   gcond *orig_cond;
     762                 :      21576 :   edge pe = loop_preheader_edge (loop);
     763                 :      21576 :   edge exit_edge = single_exit (loop);
     764                 :      21576 :   gimple_stmt_iterator incr_gsi;
     765                 :      21576 :   bool insert_after;
     766                 :      21576 :   enum tree_code code;
     767                 :      21576 :   tree niters_type = TREE_TYPE (niters);
     768                 :            : 
     769                 :      21576 :   orig_cond = get_loop_exit_condition (loop);
     770                 :      21576 :   gcc_assert (orig_cond);
     771                 :      21576 :   loop_cond_gsi = gsi_for_stmt (orig_cond);
     772                 :            : 
     773                 :      21576 :   tree init, limit;
     774                 :      21576 :   if (!niters_maybe_zero && integer_onep (step))
     775                 :            :     {
     776                 :            :       /* In this case we can use a simple 0-based IV:
     777                 :            : 
     778                 :            :          A:
     779                 :            :            x = 0;
     780                 :            :            do
     781                 :            :              {
     782                 :            :                ...
     783                 :            :                x += 1;
     784                 :            :              }
     785                 :            :            while (x < NITERS);  */
     786                 :      21576 :       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
     787                 :      21576 :       init = build_zero_cst (niters_type);
     788                 :      21576 :       limit = niters;
     789                 :            :     }
     790                 :            :   else
     791                 :            :     {
     792                 :            :       /* The following works for all values of NITERS except 0:
     793                 :            : 
     794                 :            :          B:
     795                 :            :            x = 0;
     796                 :            :            do
     797                 :            :              {
     798                 :            :                ...
     799                 :            :                x += STEP;
     800                 :            :              }
     801                 :            :            while (x <= NITERS - STEP);
     802                 :            : 
     803                 :            :          so that the loop continues to iterate if x + STEP - 1 < NITERS
     804                 :            :          but stops if x + STEP - 1 >= NITERS.
     805                 :            : 
     806                 :            :          However, if NITERS is zero, x never hits a value above NITERS - STEP
     807                 :            :          before wrapping around.  There are two obvious ways of dealing with
     808                 :            :          this:
     809                 :            : 
     810                 :            :          - start at STEP - 1 and compare x before incrementing it
     811                 :            :          - start at -1 and compare x after incrementing it
     812                 :            : 
     813                 :            :          The latter is simpler and is what we use.  The loop in this case
     814                 :            :          looks like:
     815                 :            : 
     816                 :            :          C:
     817                 :            :            x = -1;
     818                 :            :            do
     819                 :            :              {
     820                 :            :                ...
     821                 :            :                x += STEP;
     822                 :            :              }
     823                 :            :            while (x < NITERS - STEP);
     824                 :            : 
     825                 :            :          In both cases the loop limit is NITERS - STEP.  */
     826                 :          0 :       gimple_seq seq = NULL;
     827                 :          0 :       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
     828                 :          0 :       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
     829                 :          0 :       if (seq)
     830                 :            :         {
     831                 :          0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     832                 :          0 :           gcc_assert (!new_bb);
     833                 :            :         }
     834                 :          0 :       if (niters_maybe_zero)
     835                 :            :         {
     836                 :            :           /* Case C.  */
     837                 :          0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
     838                 :          0 :           init = build_all_ones_cst (niters_type);
     839                 :            :         }
     840                 :            :       else
     841                 :            :         {
     842                 :            :           /* Case B.  */
     843                 :          0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
     844                 :          0 :           init = build_zero_cst (niters_type);
     845                 :            :         }
     846                 :            :     }
     847                 :            : 
     848                 :      21576 :   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
     849                 :      21576 :   create_iv (init, step, NULL_TREE, loop,
     850                 :            :              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
     851                 :      21576 :   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
     852                 :            :                                               true, NULL_TREE, true,
     853                 :            :                                               GSI_SAME_STMT);
     854                 :      21576 :   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
     855                 :            :                                      true, GSI_SAME_STMT);
     856                 :            : 
     857                 :      21576 :   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
     858                 :            :                                  NULL_TREE);
     859                 :            : 
     860                 :      21576 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     861                 :            : 
     862                 :            :   /* Record the number of latch iterations.  */
     863                 :      21576 :   if (limit == niters)
     864                 :            :     /* Case A: the loop iterates NITERS times.  Subtract one to get the
     865                 :            :        latch count.  */
     866                 :      21576 :     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
     867                 :            :                                        build_int_cst (niters_type, 1));
     868                 :            :   else
     869                 :            :     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
     870                 :            :        Subtract one from this to get the latch count.  */
     871                 :          0 :     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
     872                 :            :                                        limit, step);
     873                 :            : 
     874                 :      21576 :   if (final_iv)
     875                 :            :     {
     876                 :          0 :       gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
     877                 :            :                                              indx_after_incr, init);
     878                 :          0 :       gsi_insert_on_edge_immediate (single_exit (loop), assign);
     879                 :            :     }
     880                 :            : 
     881                 :      21576 :   return cond_stmt;
     882                 :            : }
     883                 :            : 
     884                 :            : /* If we're using fully-masked loops, make LOOP iterate:
     885                 :            : 
     886                 :            :       N == (NITERS - 1) / STEP + 1
     887                 :            : 
     888                 :            :    times.  When NITERS is zero, this is equivalent to making the loop
     889                 :            :    execute (1 << M) / STEP times, where M is the precision of NITERS.
     890                 :            :    NITERS_MAYBE_ZERO is true if this last case might occur.
     891                 :            : 
     892                 :            :    If we're not using fully-masked loops, make LOOP iterate:
     893                 :            : 
     894                 :            :       N == (NITERS - STEP) / STEP + 1
     895                 :            : 
     896                 :            :    times, where NITERS is known to be outside the range [1, STEP - 1].
     897                 :            :    This is equivalent to making the loop execute NITERS / STEP times
     898                 :            :    when NITERS is nonzero and (1 << M) / STEP times otherwise.
     899                 :            :    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
     900                 :            : 
     901                 :            :    If FINAL_IV is nonnull, it is an SSA name that should be set to
     902                 :            :    N * STEP on exit from the loop.
     903                 :            : 
     904                 :            :    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
     905                 :            : 
     906                 :            : void
     907                 :      21576 : vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
     908                 :            :                          tree niters, tree step, tree final_iv,
     909                 :            :                          bool niters_maybe_zero)
     910                 :            : {
     911                 :      21576 :   gcond *cond_stmt;
     912                 :      21576 :   gcond *orig_cond = get_loop_exit_condition (loop);
     913                 :      21576 :   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
     914                 :            : 
     915                 :      21576 :   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
     916                 :          0 :     cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
     917                 :            :                                                 final_iv, niters_maybe_zero,
     918                 :            :                                                 loop_cond_gsi);
     919                 :            :   else
     920                 :      21576 :     cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
     921                 :            :                                                   final_iv, niters_maybe_zero,
     922                 :            :                                                   loop_cond_gsi);
     923                 :            : 
     924                 :            :   /* Remove old loop exit test.  */
     925                 :      21576 :   stmt_vec_info orig_cond_info;
     926                 :      21576 :   if (loop_vinfo
     927                 :      21576 :       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
     928                 :      21383 :     loop_vinfo->remove_stmt (orig_cond_info);
     929                 :            :   else
     930                 :        193 :     gsi_remove (&loop_cond_gsi, true);
     931                 :            : 
     932                 :      21576 :   if (dump_enabled_p ())
     933                 :       7203 :     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
     934                 :            :                      cond_stmt);
     935                 :      21576 : }
     936                 :            : 
     937                 :            : /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
     938                 :            :    For all PHI arguments in FROM->dest and TO->dest from those
     939                 :            :    edges ensure that TO->dest PHI arguments have current_def
     940                 :            :    to that in from.  */
     941                 :            : 
     942                 :            : static void
     943                 :        722 : slpeel_duplicate_current_defs_from_edges (edge from, edge to)
     944                 :            : {
     945                 :        722 :   gimple_stmt_iterator gsi_from, gsi_to;
     946                 :            : 
     947                 :        722 :   for (gsi_from = gsi_start_phis (from->dest),
     948                 :        722 :        gsi_to = gsi_start_phis (to->dest);
     949                 :       2176 :        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
     950                 :            :     {
     951                 :       1454 :       gimple *from_phi = gsi_stmt (gsi_from);
     952                 :       1454 :       gimple *to_phi = gsi_stmt (gsi_to);
     953                 :       1454 :       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
     954                 :       1454 :       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
     955                 :       2902 :       if (virtual_operand_p (from_arg))
     956                 :            :         {
     957                 :        552 :           gsi_next (&gsi_from);
     958                 :        552 :           continue;
     959                 :            :         }
     960                 :       1797 :       if (virtual_operand_p (to_arg))
     961                 :            :         {
     962                 :        101 :           gsi_next (&gsi_to);
     963                 :        101 :           continue;
     964                 :            :         }
     965                 :        801 :       if (TREE_CODE (from_arg) != SSA_NAME)
     966                 :          6 :         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
     967                 :        795 :       else if (TREE_CODE (to_arg) == SSA_NAME
     968                 :        794 :                && from_arg != to_arg)
     969                 :            :         {
     970                 :        783 :           if (get_current_def (to_arg) == NULL_TREE)
     971                 :            :             {
     972                 :        679 :               gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
     973                 :            :                                               TREE_TYPE (get_current_def
     974                 :            :                                                            (from_arg))));
     975                 :        679 :               set_current_def (to_arg, get_current_def (from_arg));
     976                 :            :             }
     977                 :            :         }
     978                 :        801 :       gsi_next (&gsi_from);
     979                 :        801 :       gsi_next (&gsi_to);
     980                 :            :     }
     981                 :            : 
     982                 :        722 :   gphi *from_phi = get_virtual_phi (from->dest);
     983                 :        722 :   gphi *to_phi = get_virtual_phi (to->dest);
     984                 :        722 :   if (from_phi)
     985                 :        552 :     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
     986                 :        552 :                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
     987                 :        722 : }
     988                 :            : 
     989                 :            : 
     990                 :            : /* Given LOOP this function generates a new copy of it and puts it
     991                 :            :    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
     992                 :            :    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
     993                 :            :    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
     994                 :            :    entry or exit of LOOP.  */
     995                 :            : 
     996                 :            : class loop *
     997                 :      12820 : slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
     998                 :            :                                         class loop *scalar_loop, edge e)
     999                 :            : {
    1000                 :      12820 :   class loop *new_loop;
    1001                 :      12820 :   basic_block *new_bbs, *bbs, *pbbs;
    1002                 :      12820 :   bool at_exit;
    1003                 :      12820 :   bool was_imm_dom;
    1004                 :      12820 :   basic_block exit_dest;
    1005                 :      12820 :   edge exit, new_exit;
    1006                 :      12820 :   bool duplicate_outer_loop = false;
    1007                 :            : 
    1008                 :      12820 :   exit = single_exit (loop);
    1009                 :      12820 :   at_exit = (e == exit);
    1010                 :      12820 :   if (!at_exit && e != loop_preheader_edge (loop))
    1011                 :            :     return NULL;
    1012                 :            : 
    1013                 :      12820 :   if (scalar_loop == NULL)
    1014                 :      12459 :     scalar_loop = loop;
    1015                 :            : 
    1016                 :      12820 :   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1017                 :      12820 :   pbbs = bbs + 1;
    1018                 :      12820 :   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
    1019                 :            :   /* Allow duplication of outer loops.  */
    1020                 :      12820 :   if (scalar_loop->inner)
    1021                 :         38 :     duplicate_outer_loop = true;
    1022                 :            :   /* Check whether duplication is possible.  */
    1023                 :      12820 :   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
    1024                 :            :     {
    1025                 :          0 :       free (bbs);
    1026                 :          0 :       return NULL;
    1027                 :            :     }
    1028                 :            : 
    1029                 :            :   /* Generate new loop structure.  */
    1030                 :      25640 :   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
    1031                 :      12820 :   duplicate_subloops (scalar_loop, new_loop);
    1032                 :            : 
    1033                 :      12820 :   exit_dest = exit->dest;
    1034                 :      12820 :   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
    1035                 :      12820 :                                           exit_dest) == loop->header ?
    1036                 :            :                  true : false);
    1037                 :            : 
    1038                 :            :   /* Also copy the pre-header, this avoids jumping through hoops to
    1039                 :            :      duplicate the loop entry PHI arguments.  Create an empty
    1040                 :            :      pre-header unconditionally for this.  */
    1041                 :      12820 :   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
    1042                 :      12820 :   edge entry_e = single_pred_edge (preheader);
    1043                 :      12820 :   bbs[0] = preheader;
    1044                 :      12820 :   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1045                 :            : 
    1046                 :      12820 :   exit = single_exit (scalar_loop);
    1047                 :      12820 :   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
    1048                 :            :             &exit, 1, &new_exit, NULL,
    1049                 :            :             at_exit ? loop->latch : e->src, true);
    1050                 :      12820 :   exit = single_exit (loop);
    1051                 :      12820 :   basic_block new_preheader = new_bbs[0];
    1052                 :            : 
    1053                 :      12820 :   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
    1054                 :            : 
    1055                 :      12820 :   if (scalar_loop != loop)
    1056                 :            :     {
    1057                 :            :       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
    1058                 :            :          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
    1059                 :            :          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
    1060                 :            :          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
    1061                 :            :          header) to have current_def set, so copy them over.  */
    1062                 :        361 :       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
    1063                 :            :                                                 exit);
    1064                 :        361 :       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
    1065                 :            :                                                            0),
    1066                 :        361 :                                                 EDGE_SUCC (loop->latch, 0));
    1067                 :            :     }
    1068                 :            : 
    1069                 :      12820 :   if (at_exit) /* Add the loop copy at exit.  */
    1070                 :            :     {
    1071                 :      11985 :       if (scalar_loop != loop)
    1072                 :            :         {
    1073                 :        327 :           gphi_iterator gsi;
    1074                 :        327 :           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
    1075                 :            : 
    1076                 :        705 :           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
    1077                 :        378 :                gsi_next (&gsi))
    1078                 :            :             {
    1079                 :        378 :               gphi *phi = gsi.phi ();
    1080                 :        378 :               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
    1081                 :        378 :               location_t orig_locus
    1082                 :        378 :                 = gimple_phi_arg_location_from_edge (phi, e);
    1083                 :            : 
    1084                 :        378 :               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
    1085                 :            :             }
    1086                 :            :         }
    1087                 :      11985 :       redirect_edge_and_branch_force (e, new_preheader);
    1088                 :      11985 :       flush_pending_stmts (e);
    1089                 :      11985 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
    1090                 :      11985 :       if (was_imm_dom || duplicate_outer_loop)
    1091                 :      11985 :         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
    1092                 :            : 
    1093                 :            :       /* And remove the non-necessary forwarder again.  Keep the other
    1094                 :            :          one so we have a proper pre-header for the loop at the exit edge.  */
    1095                 :      11985 :       redirect_edge_pred (single_succ_edge (preheader),
    1096                 :            :                           single_pred (preheader));
    1097                 :      11985 :       delete_basic_block (preheader);
    1098                 :      11985 :       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1099                 :      11985 :                                loop_preheader_edge (scalar_loop)->src);
    1100                 :            :     }
    1101                 :            :   else /* Add the copy at entry.  */
    1102                 :            :     {
    1103                 :        835 :       if (scalar_loop != loop)
    1104                 :            :         {
    1105                 :            :           /* Remove the non-necessary forwarder of scalar_loop again.  */
    1106                 :         34 :           redirect_edge_pred (single_succ_edge (preheader),
    1107                 :            :                               single_pred (preheader));
    1108                 :         34 :           delete_basic_block (preheader);
    1109                 :         34 :           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1110                 :         34 :                                    loop_preheader_edge (scalar_loop)->src);
    1111                 :         34 :           preheader = split_edge (loop_preheader_edge (loop));
    1112                 :         34 :           entry_e = single_pred_edge (preheader);
    1113                 :            :         }
    1114                 :            : 
    1115                 :        835 :       redirect_edge_and_branch_force (entry_e, new_preheader);
    1116                 :        835 :       flush_pending_stmts (entry_e);
    1117                 :        835 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
    1118                 :            : 
    1119                 :        835 :       redirect_edge_and_branch_force (new_exit, preheader);
    1120                 :        835 :       flush_pending_stmts (new_exit);
    1121                 :        835 :       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
    1122                 :            : 
    1123                 :            :       /* And remove the non-necessary forwarder again.  Keep the other
    1124                 :            :          one so we have a proper pre-header for the loop at the exit edge.  */
    1125                 :        835 :       redirect_edge_pred (single_succ_edge (new_preheader),
    1126                 :            :                           single_pred (new_preheader));
    1127                 :        835 :       delete_basic_block (new_preheader);
    1128                 :        835 :       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
    1129                 :        835 :                                loop_preheader_edge (new_loop)->src);
    1130                 :            :     }
    1131                 :            : 
    1132                 :            :   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
    1133                 :      64296 :   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
    1134                 :      38656 :     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
    1135                 :            : 
    1136                 :      12820 :   if (scalar_loop != loop)
    1137                 :            :     {
    1138                 :            :       /* Update new_loop->header PHIs, so that on the preheader
    1139                 :            :          edge they are the ones from loop rather than scalar_loop.  */
    1140                 :        361 :       gphi_iterator gsi_orig, gsi_new;
    1141                 :        361 :       edge orig_e = loop_preheader_edge (loop);
    1142                 :        361 :       edge new_e = loop_preheader_edge (new_loop);
    1143                 :            : 
    1144                 :        361 :       for (gsi_orig = gsi_start_phis (loop->header),
    1145                 :        361 :            gsi_new = gsi_start_phis (new_loop->header);
    1146                 :       1302 :            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
    1147                 :        941 :            gsi_next (&gsi_orig), gsi_next (&gsi_new))
    1148                 :            :         {
    1149                 :        941 :           gphi *orig_phi = gsi_orig.phi ();
    1150                 :        941 :           gphi *new_phi = gsi_new.phi ();
    1151                 :        941 :           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    1152                 :        941 :           location_t orig_locus
    1153                 :        941 :             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    1154                 :            : 
    1155                 :        941 :           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
    1156                 :            :         }
    1157                 :            :     }
    1158                 :            : 
    1159                 :      12820 :   free (new_bbs);
    1160                 :      12820 :   free (bbs);
    1161                 :            : 
    1162                 :      12820 :   checking_verify_dominators (CDI_DOMINATORS);
    1163                 :            : 
    1164                 :            :   return new_loop;
    1165                 :            : }
    1166                 :            : 
    1167                 :            : 
    1168                 :            : /* Given the condition expression COND, put it as the last statement of
    1169                 :            :    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
    1170                 :            :    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
    1171                 :            :    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
    1172                 :            :    new edge as irreducible if IRREDUCIBLE_P is true.  */
    1173                 :            : 
    1174                 :            : static edge
    1175                 :      18315 : slpeel_add_loop_guard (basic_block guard_bb, tree cond,
    1176                 :            :                        basic_block guard_to, basic_block dom_bb,
    1177                 :            :                        profile_probability probability, bool irreducible_p)
    1178                 :            : {
    1179                 :      18315 :   gimple_stmt_iterator gsi;
    1180                 :      18315 :   edge new_e, enter_e;
    1181                 :      18315 :   gcond *cond_stmt;
    1182                 :      18315 :   gimple_seq gimplify_stmt_list = NULL;
    1183                 :            : 
    1184                 :      18315 :   enter_e = EDGE_SUCC (guard_bb, 0);
    1185                 :      18315 :   enter_e->flags &= ~EDGE_FALLTHRU;
    1186                 :      18315 :   enter_e->flags |= EDGE_FALSE_VALUE;
    1187                 :      18315 :   gsi = gsi_last_bb (guard_bb);
    1188                 :            : 
    1189                 :      18315 :   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
    1190                 :            :                                  NULL_TREE);
    1191                 :      18315 :   if (gimplify_stmt_list)
    1192                 :       8291 :     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
    1193                 :            : 
    1194                 :      18315 :   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
    1195                 :      18315 :   gsi = gsi_last_bb (guard_bb);
    1196                 :      18315 :   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
    1197                 :            : 
    1198                 :            :   /* Add new edge to connect guard block to the merge/loop-exit block.  */
    1199                 :      18315 :   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
    1200                 :            : 
    1201                 :      18315 :   new_e->probability = probability;
    1202                 :      18315 :   if (irreducible_p)
    1203                 :         12 :     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
    1204                 :            : 
    1205                 :      18315 :   enter_e->probability = probability.invert ();
    1206                 :      18315 :   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
    1207                 :            : 
    1208                 :            :   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
    1209                 :      18315 :   if (enter_e->dest->loop_father->header == enter_e->dest)
    1210                 :        834 :     split_edge (enter_e);
    1211                 :            : 
    1212                 :      18315 :   return new_e;
    1213                 :            : }
    1214                 :            : 
    1215                 :            : 
    1216                 :            : /* This function verifies that the following restrictions apply to LOOP:
    1217                 :            :    (1) it consists of exactly 2 basic blocks - header, and an empty latch
    1218                 :            :        for innermost loop and 5 basic blocks for outer-loop.
    1219                 :            :    (2) it is single entry, single exit
    1220                 :            :    (3) its exit condition is the last stmt in the header
    1221                 :            :    (4) E is the entry/exit edge of LOOP.
    1222                 :            :  */
    1223                 :            : 
    1224                 :            : bool
    1225                 :      51620 : slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
    1226                 :            : {
    1227                 :      51620 :   edge exit_e = single_exit (loop);
    1228                 :      51620 :   edge entry_e = loop_preheader_edge (loop);
    1229                 :      51620 :   gcond *orig_cond = get_loop_exit_condition (loop);
    1230                 :      51620 :   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
    1231                 :      51620 :   unsigned int num_bb = loop->inner? 5 : 2;
    1232                 :            : 
    1233                 :            :   /* All loops have an outer scope; the only case loop->outer is NULL is for
    1234                 :            :      the function itself.  */
    1235                 :      51620 :   if (!loop_outer (loop)
    1236                 :      51620 :       || loop->num_nodes != num_bb
    1237                 :      51620 :       || !empty_block_p (loop->latch)
    1238                 :      51620 :       || !single_exit (loop)
    1239                 :            :       /* Verify that new loop exit condition can be trivially modified.  */
    1240                 :      51620 :       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
    1241                 :     103240 :       || (e != exit_e && e != entry_e))
    1242                 :          0 :     return false;
    1243                 :            : 
    1244                 :            :   return true;
    1245                 :            : }
    1246                 :            : 
    1247                 :            : /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
    1248                 :            :    in the exit bb and rename all the uses after the loop.  This simplifies
    1249                 :            :    the *guard[12] routines, which assume loop closed SSA form for all PHIs
    1250                 :            :    (but normally loop closed SSA form doesn't require virtual PHIs to be
    1251                 :            :    in the same form).  Doing this early simplifies the checking what
    1252                 :            :    uses should be renamed.
    1253                 :            : 
    1254                 :            :    If we create a new phi after the loop, return the definition that
    1255                 :            :    applies on entry to the loop, otherwise return null.  */
    1256                 :            : 
    1257                 :            : static tree
    1258                 :      14891 : create_lcssa_for_virtual_phi (class loop *loop)
    1259                 :            : {
    1260                 :      14891 :   gphi_iterator gsi;
    1261                 :      14891 :   edge exit_e = single_exit (loop);
    1262                 :            : 
    1263                 :      53413 :   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
    1264                 :     100682 :     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
    1265                 :            :       {
    1266                 :      11819 :         gphi *phi = gsi.phi ();
    1267                 :      11819 :         for (gsi = gsi_start_phis (exit_e->dest);
    1268                 :      12443 :              !gsi_end_p (gsi); gsi_next (&gsi))
    1269                 :      16516 :           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
    1270                 :            :             break;
    1271                 :      11819 :         if (gsi_end_p (gsi))
    1272                 :            :           {
    1273                 :       4185 :             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
    1274                 :       4185 :             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
    1275                 :       4185 :             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
    1276                 :       4185 :             imm_use_iterator imm_iter;
    1277                 :       4185 :             gimple *stmt;
    1278                 :       4185 :             use_operand_p use_p;
    1279                 :            : 
    1280                 :       4185 :             SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
    1281                 :       4185 :               = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
    1282                 :       4185 :             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
    1283                 :       4185 :             gimple_phi_set_result (new_phi, new_vop);
    1284                 :      19768 :             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
    1285                 :      15583 :               if (stmt != new_phi
    1286                 :      15583 :                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
    1287                 :      28594 :                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
    1288                 :       7225 :                   SET_USE (use_p, new_vop);
    1289                 :            : 
    1290                 :       4185 :             return PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    1291                 :            :           }
    1292                 :            :         break;
    1293                 :            :       }
    1294                 :            :   return NULL_TREE;
    1295                 :            : }
    1296                 :            : 
    1297                 :            : /* Function vect_get_loop_location.
    1298                 :            : 
    1299                 :            :    Extract the location of the loop in the source code.
    1300                 :            :    If the loop is not well formed for vectorization, an estimated
    1301                 :            :    location is calculated.
    1302                 :            :    Return the loop location if succeed and NULL if not.  */
    1303                 :            : 
    1304                 :            : dump_user_location_t
    1305                 :     529366 : find_loop_location (class loop *loop)
    1306                 :            : {
    1307                 :     529366 :   gimple *stmt = NULL;
    1308                 :     529366 :   basic_block bb;
    1309                 :     529366 :   gimple_stmt_iterator si;
    1310                 :            : 
    1311                 :     529366 :   if (!loop)
    1312                 :          0 :     return dump_user_location_t ();
    1313                 :            : 
    1314                 :     529366 :   stmt = get_loop_exit_condition (loop);
    1315                 :            : 
    1316                 :     529366 :   if (stmt
    1317                 :     529366 :       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    1318                 :     307166 :     return stmt;
    1319                 :            : 
    1320                 :            :   /* If we got here the loop is probably not "well formed",
    1321                 :            :      try to estimate the loop location */
    1322                 :            : 
    1323                 :     222200 :   if (!loop->header)
    1324                 :          0 :     return dump_user_location_t ();
    1325                 :            : 
    1326                 :     222200 :   bb = loop->header;
    1327                 :            : 
    1328                 :     684673 :   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    1329                 :            :     {
    1330                 :     438500 :       stmt = gsi_stmt (si);
    1331                 :     438500 :       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    1332                 :     198227 :         return stmt;
    1333                 :            :     }
    1334                 :            : 
    1335                 :      23973 :   return dump_user_location_t ();
    1336                 :            : }
    1337                 :            : 
    1338                 :            : /* Return true if the phi described by STMT_INFO defines an IV of the
    1339                 :            :    loop to be vectorized.  */
    1340                 :            : 
    1341                 :            : static bool
    1342                 :     236318 : iv_phi_p (stmt_vec_info stmt_info)
    1343                 :            : {
    1344                 :     236318 :   gphi *phi = as_a <gphi *> (stmt_info->stmt);
    1345                 :     472636 :   if (virtual_operand_p (PHI_RESULT (phi)))
    1346                 :            :     return false;
    1347                 :            : 
    1348                 :     177417 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
    1349                 :     177417 :       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
    1350                 :      54978 :     return false;
    1351                 :            : 
    1352                 :            :   return true;
    1353                 :            : }
    1354                 :            : 
    1355                 :            : /* Function vect_can_advance_ivs_p
    1356                 :            : 
    1357                 :            :    In case the number of iterations that LOOP iterates is unknown at compile
    1358                 :            :    time, an epilog loop will be generated, and the loop induction variables
    1359                 :            :    (IVs) will be "advanced" to the value they are supposed to take just before
    1360                 :            :    the epilog loop.  Here we check that the access function of the loop IVs
    1361                 :            :    and the expression that represents the loop bound are simple enough.
    1362                 :            :    These restrictions will be relaxed in the future.  */
    1363                 :            : 
    1364                 :            : bool
    1365                 :      51505 : vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
    1366                 :            : {
    1367                 :      51505 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    1368                 :      51505 :   basic_block bb = loop->header;
    1369                 :      51505 :   gphi_iterator gsi;
    1370                 :            : 
    1371                 :            :   /* Analyze phi functions of the loop header.  */
    1372                 :            : 
    1373                 :      51505 :   if (dump_enabled_p ())
    1374                 :      12902 :     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
    1375                 :     211437 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    1376                 :            :     {
    1377                 :     160010 :       tree evolution_part;
    1378                 :            : 
    1379                 :     160010 :       gphi *phi = gsi.phi ();
    1380                 :     160010 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    1381                 :     160010 :       if (dump_enabled_p ())
    1382                 :      37849 :         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
    1383                 :            :                          phi_info->stmt);
    1384                 :            : 
    1385                 :            :       /* Skip virtual phi's. The data dependences that are associated with
    1386                 :            :          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
    1387                 :            : 
    1388                 :            :          Skip reduction phis.  */
    1389                 :     160010 :       if (!iv_phi_p (phi_info))
    1390                 :            :         {
    1391                 :      74439 :           if (dump_enabled_p ())
    1392                 :      13694 :             dump_printf_loc (MSG_NOTE, vect_location,
    1393                 :            :                              "reduc or virtual phi. skip.\n");
    1394                 :      74439 :           continue;
    1395                 :            :         }
    1396                 :            : 
    1397                 :            :       /* Analyze the evolution function.  */
    1398                 :            : 
    1399                 :      85571 :       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    1400                 :      85571 :       if (evolution_part == NULL_TREE)
    1401                 :            :         {
    1402                 :         74 :           if (dump_enabled_p ())
    1403                 :          6 :             dump_printf (MSG_MISSED_OPTIMIZATION,
    1404                 :            :                          "No access function or evolution.\n");
    1405                 :         74 :           return false;
    1406                 :            :         }
    1407                 :            : 
    1408                 :            :       /* FORNOW: We do not transform initial conditions of IVs
    1409                 :            :          which evolution functions are not invariants in the loop.  */
    1410                 :            : 
    1411                 :      85497 :       if (!expr_invariant_in_loop_p (loop, evolution_part))
    1412                 :            :         {
    1413                 :          4 :           if (dump_enabled_p ())
    1414                 :          4 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1415                 :            :                              "evolution not invariant in loop.\n");
    1416                 :          4 :           return false;
    1417                 :            :         }
    1418                 :            : 
    1419                 :            :       /* FORNOW: We do not transform initial conditions of IVs
    1420                 :            :          which evolution functions are a polynomial of degree >= 2.  */
    1421                 :            : 
    1422                 :     245425 :       if (tree_is_chrec (evolution_part))
    1423                 :            :         {
    1424                 :          0 :           if (dump_enabled_p ())
    1425                 :          0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1426                 :            :                              "evolution is chrec.\n");
    1427                 :          0 :           return false;
    1428                 :            :         }
    1429                 :            :     }
    1430                 :            : 
    1431                 :            :   return true;
    1432                 :            : }
    1433                 :            : 
    1434                 :            : 
    1435                 :            : /*   Function vect_update_ivs_after_vectorizer.
    1436                 :            : 
    1437                 :            :      "Advance" the induction variables of LOOP to the value they should take
    1438                 :            :      after the execution of LOOP.  This is currently necessary because the
    1439                 :            :      vectorizer does not handle induction variables that are used after the
    1440                 :            :      loop.  Such a situation occurs when the last iterations of LOOP are
    1441                 :            :      peeled, because:
    1442                 :            :      1. We introduced new uses after LOOP for IVs that were not originally used
    1443                 :            :         after LOOP: the IVs of LOOP are now used by an epilog loop.
    1444                 :            :      2. LOOP is going to be vectorized; this means that it will iterate N/VF
    1445                 :            :         times, whereas the loop IVs should be bumped N times.
    1446                 :            : 
    1447                 :            :      Input:
    1448                 :            :      - LOOP - a loop that is going to be vectorized. The last few iterations
    1449                 :            :               of LOOP were peeled.
    1450                 :            :      - NITERS - the number of iterations that LOOP executes (before it is
    1451                 :            :                 vectorized). i.e, the number of times the ivs should be bumped.
    1452                 :            :      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
    1453                 :            :                   coming out from LOOP on which there are uses of the LOOP ivs
    1454                 :            :                   (this is the path from LOOP->exit to epilog_loop->preheader).
    1455                 :            : 
    1456                 :            :                   The new definitions of the ivs are placed in LOOP->exit.
    1457                 :            :                   The phi args associated with the edge UPDATE_E in the bb
    1458                 :            :                   UPDATE_E->dest are updated accordingly.
    1459                 :            : 
    1460                 :            :      Assumption 1: Like the rest of the vectorizer, this function assumes
    1461                 :            :      a single loop exit that has a single predecessor.
    1462                 :            : 
    1463                 :            :      Assumption 2: The phi nodes in the LOOP header and in update_bb are
    1464                 :            :      organized in the same order.
    1465                 :            : 
    1466                 :            :      Assumption 3: The access function of the ivs is simple enough (see
    1467                 :            :      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
    1468                 :            : 
    1469                 :            :      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
    1470                 :            :      coming out of LOOP on which the ivs of LOOP are used (this is the path
    1471                 :            :      that leads to the epilog loop; other paths skip the epilog loop).  This
    1472                 :            :      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
    1473                 :            :      needs to have its phis updated.
    1474                 :            :  */
    1475                 :            : 
    1476                 :            : static void
    1477                 :      11985 : vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    1478                 :            :                                   tree niters, edge update_e)
    1479                 :            : {
    1480                 :      11985 :   gphi_iterator gsi, gsi1;
    1481                 :      11985 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    1482                 :      11985 :   basic_block update_bb = update_e->dest;
    1483                 :      11985 :   basic_block exit_bb = single_exit (loop)->dest;
    1484                 :            : 
    1485                 :            :   /* Make sure there exists a single-predecessor exit bb:  */
    1486                 :      11985 :   gcc_assert (single_pred_p (exit_bb));
    1487                 :      11985 :   gcc_assert (single_succ_edge (exit_bb) == update_e);
    1488                 :            : 
    1489                 :      11985 :   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
    1490                 :      50139 :        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
    1491                 :      38154 :        gsi_next (&gsi), gsi_next (&gsi1))
    1492                 :            :     {
    1493                 :      38154 :       tree init_expr;
    1494                 :      38154 :       tree step_expr, off;
    1495                 :      38154 :       tree type;
    1496                 :      38154 :       tree var, ni, ni_name;
    1497                 :      38154 :       gimple_stmt_iterator last_gsi;
    1498                 :            : 
    1499                 :      38154 :       gphi *phi = gsi.phi ();
    1500                 :      38154 :       gphi *phi1 = gsi1.phi ();
    1501                 :      38154 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    1502                 :      38154 :       if (dump_enabled_p ())
    1503                 :       6334 :         dump_printf_loc (MSG_NOTE, vect_location,
    1504                 :            :                          "vect_update_ivs_after_vectorizer: phi: %G", phi);
    1505                 :            : 
    1506                 :            :       /* Skip reduction and virtual phis.  */
    1507                 :      38154 :       if (!iv_phi_p (phi_info))
    1508                 :            :         {
    1509                 :      19720 :           if (dump_enabled_p ())
    1510                 :       2543 :             dump_printf_loc (MSG_NOTE, vect_location,
    1511                 :            :                              "reduc or virtual phi. skip.\n");
    1512                 :      19720 :           continue;
    1513                 :            :         }
    1514                 :            : 
    1515                 :      18434 :       type = TREE_TYPE (gimple_phi_result (phi));
    1516                 :      18434 :       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    1517                 :      18434 :       step_expr = unshare_expr (step_expr);
    1518                 :            : 
    1519                 :            :       /* FORNOW: We do not support IVs whose evolution function is a polynomial
    1520                 :            :          of degree >= 2 or exponential.  */
    1521                 :      18434 :       gcc_assert (!tree_is_chrec (step_expr));
    1522                 :            : 
    1523                 :      18434 :       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    1524                 :            : 
    1525                 :      18434 :       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
    1526                 :            :                          fold_convert (TREE_TYPE (step_expr), niters),
    1527                 :            :                          step_expr);
    1528                 :      18434 :       if (POINTER_TYPE_P (type))
    1529                 :       1539 :         ni = fold_build_pointer_plus (init_expr, off);
    1530                 :            :       else
    1531                 :      16895 :         ni = fold_build2 (PLUS_EXPR, type,
    1532                 :            :                           init_expr, fold_convert (type, off));
    1533                 :            : 
    1534                 :      18434 :       var = create_tmp_var (type, "tmp");
    1535                 :            : 
    1536                 :      18434 :       last_gsi = gsi_last_bb (exit_bb);
    1537                 :      18434 :       gimple_seq new_stmts = NULL;
    1538                 :      18434 :       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
    1539                 :            :       /* Exit_bb shouldn't be empty.  */
    1540                 :      18434 :       if (!gsi_end_p (last_gsi))
    1541                 :      13529 :         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
    1542                 :            :       else
    1543                 :       4905 :         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
    1544                 :            : 
    1545                 :            :       /* Fix phi expressions in the successor bb.  */
    1546                 :      18434 :       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
    1547                 :            :     }
    1548                 :      11985 : }
    1549                 :            : 
    1550                 :            : /* Return a gimple value containing the misalignment (measured in vector
    1551                 :            :    elements) for the loop described by LOOP_VINFO, i.e. how many elements
    1552                 :            :    it is away from a perfectly aligned address.  Add any new statements
    1553                 :            :    to SEQ.  */
    1554                 :            : 
    1555                 :            : static tree
    1556                 :         17 : get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
    1557                 :            : {
    1558                 :         17 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    1559                 :         17 :   stmt_vec_info stmt_info = dr_info->stmt;
    1560                 :         17 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    1561                 :            : 
    1562                 :         17 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    1563                 :         17 :   unsigned HOST_WIDE_INT target_align_c;
    1564                 :         17 :   tree target_align_minus_1;
    1565                 :            : 
    1566                 :         17 :   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    1567                 :         17 :                                         size_zero_node) < 0;
    1568                 :         17 :   tree offset = (negative
    1569                 :         17 :                  ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    1570                 :         17 :                  : size_zero_node);
    1571                 :         17 :   tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
    1572                 :            :                                                           offset);
    1573                 :         17 :   tree type = unsigned_type_for (TREE_TYPE (start_addr));
    1574                 :         17 :   if (target_align.is_constant (&target_align_c))
    1575                 :         17 :     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
    1576                 :            :   else
    1577                 :            :     {
    1578                 :            :       tree vla = build_int_cst (type, target_align);
    1579                 :            :       tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
    1580                 :            :                                     fold_build2 (MINUS_EXPR, type,
    1581                 :            :                                                  build_int_cst (type, 0), vla));
    1582                 :            :       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
    1583                 :            :                                           build_int_cst (type, 1));
    1584                 :            :     }
    1585                 :            : 
    1586                 :         17 :   HOST_WIDE_INT elem_size
    1587                 :         17 :     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1588                 :         34 :   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
    1589                 :            : 
    1590                 :            :   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
    1591                 :         17 :   tree int_start_addr = fold_convert (type, start_addr);
    1592                 :         17 :   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
    1593                 :            :                                         target_align_minus_1);
    1594                 :            : 
    1595                 :            :   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
    1596                 :         17 :   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
    1597                 :            :                                         elem_size_log);
    1598                 :            : 
    1599                 :         17 :   return misalign_in_elems;
    1600                 :            : }
    1601                 :            : 
    1602                 :            : /* Function vect_gen_prolog_loop_niters
    1603                 :            : 
    1604                 :            :    Generate the number of iterations which should be peeled as prolog for the
    1605                 :            :    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
    1606                 :            :    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
    1607                 :            :    As a result, after the execution of this loop, the data reference DR will
    1608                 :            :    refer to an aligned location.  The following computation is generated:
    1609                 :            : 
    1610                 :            :    If the misalignment of DR is known at compile time:
    1611                 :            :      addr_mis = int mis = DR_MISALIGNMENT (dr);
    1612                 :            :    Else, compute address misalignment in bytes:
    1613                 :            :      addr_mis = addr & (target_align - 1)
    1614                 :            : 
    1615                 :            :    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
    1616                 :            : 
    1617                 :            :    (elem_size = element type size; an element is the scalar element whose type
    1618                 :            :    is the inner type of the vectype)
    1619                 :            : 
    1620                 :            :    The computations will be emitted at the end of BB.  We also compute and
    1621                 :            :    store upper bound (included) of the result in BOUND.
    1622                 :            : 
    1623                 :            :    When the step of the data-ref in the loop is not 1 (as in interleaved data
    1624                 :            :    and SLP), the number of iterations of the prolog must be divided by the step
    1625                 :            :    (which is equal to the size of interleaved group).
    1626                 :            : 
    1627                 :            :    The above formulas assume that VF == number of elements in the vector. This
    1628                 :            :    may not hold when there are multiple-types in the loop.
    1629                 :            :    In this case, for some data-references in the loop the VF does not represent
    1630                 :            :    the number of elements that fit in the vector.  Therefore, instead of VF we
    1631                 :            :    use TYPE_VECTOR_SUBPARTS.  */
    1632                 :            : 
    1633                 :            : static tree
    1634                 :        193 : vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
    1635                 :            :                              basic_block bb, int *bound)
    1636                 :            : {
    1637                 :        193 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    1638                 :        193 :   tree var;
    1639                 :        193 :   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    1640                 :        193 :   gimple_seq stmts = NULL, new_stmts = NULL;
    1641                 :        193 :   tree iters, iters_name;
    1642                 :        193 :   stmt_vec_info stmt_info = dr_info->stmt;
    1643                 :        193 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    1644                 :        193 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    1645                 :            : 
    1646                 :        193 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    1647                 :            :     {
    1648                 :        176 :       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    1649                 :            : 
    1650                 :        176 :       if (dump_enabled_p ())
    1651                 :        176 :         dump_printf_loc (MSG_NOTE, vect_location,
    1652                 :            :                          "known peeling = %d.\n", npeel);
    1653                 :            : 
    1654                 :        176 :       iters = build_int_cst (niters_type, npeel);
    1655                 :        176 :       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    1656                 :            :     }
    1657                 :            :   else
    1658                 :            :     {
    1659                 :         17 :       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
    1660                 :         17 :       tree type = TREE_TYPE (misalign_in_elems);
    1661                 :         17 :       HOST_WIDE_INT elem_size
    1662                 :         17 :         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1663                 :            :       /* We only do prolog peeling if the target alignment is known at compile
    1664                 :            :          time.  */
    1665                 :         17 :       poly_uint64 align_in_elems =
    1666                 :         17 :         exact_div (target_align, elem_size);
    1667                 :         17 :       tree align_in_elems_minus_1 =
    1668                 :         17 :         build_int_cst (type, align_in_elems - 1);
    1669                 :         17 :       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
    1670                 :            : 
    1671                 :            :       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
    1672                 :            :                                  & (align_in_elems - 1)).  */
    1673                 :         17 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    1674                 :         17 :                                             size_zero_node) < 0;
    1675                 :         17 :       if (negative)
    1676                 :          4 :         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
    1677                 :            :                              align_in_elems_tree);
    1678                 :            :       else
    1679                 :         13 :         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
    1680                 :            :                              misalign_in_elems);
    1681                 :         17 :       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
    1682                 :         17 :       iters = fold_convert (niters_type, iters);
    1683                 :         17 :       unsigned HOST_WIDE_INT align_in_elems_c;
    1684                 :         17 :       if (align_in_elems.is_constant (&align_in_elems_c))
    1685                 :         17 :         *bound = align_in_elems_c - 1;
    1686                 :            :       else
    1687                 :            :         *bound = -1;
    1688                 :            :     }
    1689                 :            : 
    1690                 :        193 :   if (dump_enabled_p ())
    1691                 :        178 :     dump_printf_loc (MSG_NOTE, vect_location,
    1692                 :            :                      "niters for prolog loop: %T\n", iters);
    1693                 :            : 
    1694                 :        193 :   var = create_tmp_var (niters_type, "prolog_loop_niters");
    1695                 :        193 :   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
    1696                 :            : 
    1697                 :        193 :   if (new_stmts)
    1698                 :         17 :     gimple_seq_add_seq (&stmts, new_stmts);
    1699                 :        193 :   if (stmts)
    1700                 :            :     {
    1701                 :         17 :       gcc_assert (single_succ_p (bb));
    1702                 :         17 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
    1703                 :         17 :       if (gsi_end_p (gsi))
    1704                 :          5 :         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    1705                 :            :       else
    1706                 :         12 :         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
    1707                 :            :     }
    1708                 :        193 :   return iters_name;
    1709                 :            : }
    1710                 :            : 
    1711                 :            : 
    1712                 :            : /* Function vect_update_init_of_dr
    1713                 :            : 
    1714                 :            :    If CODE is PLUS, the vector loop starts NITERS iterations after the
    1715                 :            :    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
    1716                 :            :    iterations before the scalar one (using masking to skip inactive
    1717                 :            :    elements).  This function updates the information recorded in DR to
    1718                 :            :    account for the difference.  Specifically, it updates the OFFSET
    1719                 :            :    field of DR_INFO.  */
    1720                 :            : 
    1721                 :            : static void
    1722                 :      11330 : vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
    1723                 :            : {
    1724                 :      11330 :   struct data_reference *dr = dr_info->dr;
    1725                 :      11330 :   tree offset = dr_info->offset;
    1726                 :      11330 :   if (!offset)
    1727                 :      11330 :     offset = build_zero_cst (sizetype);
    1728                 :            : 
    1729                 :      11330 :   niters = fold_build2 (MULT_EXPR, sizetype,
    1730                 :            :                         fold_convert (sizetype, niters),
    1731                 :            :                         fold_convert (sizetype, DR_STEP (dr)));
    1732                 :      11330 :   offset = fold_build2 (code, sizetype,
    1733                 :            :                         fold_convert (sizetype, offset), niters);
    1734                 :      11330 :   dr_info->offset = offset;
    1735                 :      11330 : }
    1736                 :            : 
    1737                 :            : 
    1738                 :            : /* Function vect_update_inits_of_drs
    1739                 :            : 
    1740                 :            :    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
    1741                 :            :    CODE and NITERS are as for vect_update_inits_of_dr.  */
    1742                 :            : 
    1743                 :            : void
    1744                 :       3461 : vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
    1745                 :            :                           tree_code code)
    1746                 :            : {
    1747                 :       3461 :   unsigned int i;
    1748                 :       3461 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    1749                 :       3461 :   struct data_reference *dr;
    1750                 :            : 
    1751                 :       3461 :   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
    1752                 :            : 
    1753                 :            :   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
    1754                 :            :      here, but since we might use these niters to update the epilogues niters
    1755                 :            :      and data references we can't insert them here as this definition might not
    1756                 :            :      always dominate its uses.  */
    1757                 :       3461 :   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
    1758                 :       2181 :     niters = fold_convert (sizetype, niters);
    1759                 :            : 
    1760                 :      18259 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    1761                 :            :     {
    1762                 :      11378 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    1763                 :      11378 :       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
    1764                 :      11330 :         vect_update_init_of_dr (dr_info, niters, code);
    1765                 :            :     }
    1766                 :       3461 : }
    1767                 :            : 
    1768                 :            : /* For the information recorded in LOOP_VINFO prepare the loop for peeling
    1769                 :            :    by masking.  This involves calculating the number of iterations to
    1770                 :            :    be peeled and then aligning all memory references appropriately.  */
    1771                 :            : 
    1772                 :            : void
    1773                 :          0 : vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
    1774                 :            : {
    1775                 :          0 :   tree misalign_in_elems;
    1776                 :          0 :   tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
    1777                 :            : 
    1778                 :          0 :   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
    1779                 :            : 
    1780                 :            :   /* From the information recorded in LOOP_VINFO get the number of iterations
    1781                 :            :      that need to be skipped via masking.  */
    1782                 :          0 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    1783                 :            :     {
    1784                 :          0 :       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    1785                 :          0 :                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
    1786                 :          0 :       misalign_in_elems = build_int_cst (type, misalign);
    1787                 :            :     }
    1788                 :            :   else
    1789                 :            :     {
    1790                 :          0 :       gimple_seq seq1 = NULL, seq2 = NULL;
    1791                 :          0 :       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
    1792                 :          0 :       misalign_in_elems = fold_convert (type, misalign_in_elems);
    1793                 :          0 :       misalign_in_elems = force_gimple_operand (misalign_in_elems,
    1794                 :            :                                                 &seq2, true, NULL_TREE);
    1795                 :          0 :       gimple_seq_add_seq (&seq1, seq2);
    1796                 :          0 :       if (seq1)
    1797                 :            :         {
    1798                 :          0 :           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    1799                 :          0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
    1800                 :          0 :           gcc_assert (!new_bb);
    1801                 :            :         }
    1802                 :            :     }
    1803                 :            : 
    1804                 :          0 :   if (dump_enabled_p ())
    1805                 :          0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1806                 :            :                      "misalignment for fully-masked loop: %T\n",
    1807                 :            :                      misalign_in_elems);
    1808                 :            : 
    1809                 :          0 :   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
    1810                 :            : 
    1811                 :          0 :   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
    1812                 :          0 : }
    1813                 :            : 
    1814                 :            : /* This function builds ni_name = number of iterations.  Statements
    1815                 :            :    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
    1816                 :            :    it to TRUE if new ssa_var is generated.  */
    1817                 :            : 
    1818                 :            : tree
    1819                 :      21576 : vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
    1820                 :            : {
    1821                 :      21576 :   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
    1822                 :      21576 :   if (TREE_CODE (ni) == INTEGER_CST)
    1823                 :            :     return ni;
    1824                 :            :   else
    1825                 :            :     {
    1826                 :       9734 :       tree ni_name, var;
    1827                 :       9734 :       gimple_seq stmts = NULL;
    1828                 :       9734 :       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    1829                 :            : 
    1830                 :       9734 :       var = create_tmp_var (TREE_TYPE (ni), "niters");
    1831                 :       9734 :       ni_name = force_gimple_operand (ni, &stmts, false, var);
    1832                 :       9734 :       if (stmts)
    1833                 :            :         {
    1834                 :       9188 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    1835                 :       9188 :           if (new_var_p != NULL)
    1836                 :         52 :             *new_var_p = true;
    1837                 :            :         }
    1838                 :            : 
    1839                 :       9734 :       return ni_name;
    1840                 :            :     }
    1841                 :            : }
    1842                 :            : 
    1843                 :            : /* Calculate the number of iterations above which vectorized loop will be
    1844                 :            :    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
    1845                 :            :    of prolog loop.  If it's integer const, the integer number is also passed
    1846                 :            :    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
    1847                 :            :    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
    1848                 :            :    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
    1849                 :            :    threshold below which the scalar (rather than vectorized) loop will be
    1850                 :            :    executed.  This function stores the upper bound (inclusive) of the result
    1851                 :            :    in BOUND_SCALAR.  */
    1852                 :            : 
    1853                 :            : static tree
    1854                 :       8841 : vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
    1855                 :            :                              int bound_prolog, poly_int64 bound_epilog, int th,
    1856                 :            :                              poly_uint64 *bound_scalar,
    1857                 :            :                              bool check_profitability)
    1858                 :            : {
    1859                 :       8841 :   tree type = TREE_TYPE (niters_prolog);
    1860                 :       8841 :   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
    1861                 :            :                              build_int_cst (type, bound_epilog));
    1862                 :            : 
    1863                 :       8841 :   *bound_scalar = bound_prolog + bound_epilog;
    1864                 :       8841 :   if (check_profitability)
    1865                 :            :     {
    1866                 :            :       /* TH indicates the minimum niters of vectorized loop, while we
    1867                 :            :          compute the maximum niters of scalar loop.  */
    1868                 :       4948 :       th--;
    1869                 :            :       /* Peeling for constant times.  */
    1870                 :       4948 :       if (int_niters_prolog >= 0)
    1871                 :            :         {
    1872                 :       4948 :           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
    1873                 :       4948 :           return build_int_cst (type, *bound_scalar);
    1874                 :            :         }
    1875                 :            :       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
    1876                 :            :          and BOUND_EPILOG are inclusive upper bounds.  */
    1877                 :          0 :       if (known_ge (th, bound_prolog + bound_epilog))
    1878                 :            :         {
    1879                 :          0 :           *bound_scalar = th;
    1880                 :          0 :           return build_int_cst (type, th);
    1881                 :            :         }
    1882                 :            :       /* Need to do runtime comparison.  */
    1883                 :          0 :       else if (maybe_gt (th, bound_epilog))
    1884                 :            :         {
    1885                 :          0 :           *bound_scalar = upper_bound (*bound_scalar, th);
    1886                 :          0 :           return fold_build2 (MAX_EXPR, type,
    1887                 :            :                               build_int_cst (type, th), niters);
    1888                 :            :         }
    1889                 :            :     }
    1890                 :            :   return niters;
    1891                 :            : }
    1892                 :            : 
    1893                 :            : /* NITERS is the number of times that the original scalar loop executes
    1894                 :            :    after peeling.  Work out the maximum number of iterations N that can
    1895                 :            :    be handled by the vectorized form of the loop and then either:
    1896                 :            : 
    1897                 :            :    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
    1898                 :            : 
    1899                 :            :         niters_vector = N
    1900                 :            : 
    1901                 :            :    b) set *STEP_VECTOR_PTR to one and generate:
    1902                 :            : 
    1903                 :            :         niters_vector = N / vf
    1904                 :            : 
    1905                 :            :    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
    1906                 :            :    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
    1907                 :            :    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
    1908                 :            : 
    1909                 :            : void
    1910                 :      12152 : vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
    1911                 :            :                              tree *niters_vector_ptr, tree *step_vector_ptr,
    1912                 :            :                              bool niters_no_overflow)
    1913                 :            : {
    1914                 :      12152 :   tree ni_minus_gap, var;
    1915                 :      12152 :   tree niters_vector, step_vector, type = TREE_TYPE (niters);
    1916                 :      12152 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    1917                 :      12152 :   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    1918                 :      12152 :   tree log_vf = NULL_TREE;
    1919                 :            : 
    1920                 :            :   /* If epilogue loop is required because of data accesses with gaps, we
    1921                 :            :      subtract one iteration from the total number of iterations here for
    1922                 :            :      correct calculation of RATIO.  */
    1923                 :      12152 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    1924                 :            :     {
    1925                 :        411 :       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
    1926                 :            :                                   build_one_cst (type));
    1927                 :        411 :       if (!is_gimple_val (ni_minus_gap))
    1928                 :            :         {
    1929                 :        239 :           var = create_tmp_var (type, "ni_gap");
    1930                 :        239 :           gimple *stmts = NULL;
    1931                 :        239 :           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
    1932                 :            :                                                true, var);
    1933                 :        239 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    1934                 :            :         }
    1935                 :            :     }
    1936                 :            :   else
    1937                 :            :     ni_minus_gap = niters;
    1938                 :            : 
    1939                 :      12152 :   unsigned HOST_WIDE_INT const_vf;
    1940                 :      12152 :   if (vf.is_constant (&const_vf)
    1941                 :      12152 :       && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
    1942                 :            :     {
    1943                 :            :       /* Create: niters >> log2(vf) */
    1944                 :            :       /* If it's known that niters == number of latch executions + 1 doesn't
    1945                 :            :          overflow, we can generate niters >> log2(vf); otherwise we generate
    1946                 :            :          (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
    1947                 :            :          will be at least one.  */
    1948                 :      24304 :       log_vf = build_int_cst (type, exact_log2 (const_vf));
    1949                 :      12152 :       if (niters_no_overflow)
    1950                 :      12132 :         niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
    1951                 :            :       else
    1952                 :         20 :         niters_vector
    1953                 :         20 :           = fold_build2 (PLUS_EXPR, type,
    1954                 :            :                          fold_build2 (RSHIFT_EXPR, type,
    1955                 :            :                                       fold_build2 (MINUS_EXPR, type,
    1956                 :            :                                                    ni_minus_gap,
    1957                 :            :                                                    build_int_cst (type, vf)),
    1958                 :            :                                       log_vf),
    1959                 :            :                          build_int_cst (type, 1));
    1960                 :      12152 :       step_vector = build_one_cst (type);
    1961                 :            :     }
    1962                 :            :   else
    1963                 :            :     {
    1964                 :          0 :       niters_vector = ni_minus_gap;
    1965                 :          0 :       step_vector = build_int_cst (type, vf);
    1966                 :            :     }
    1967                 :            : 
    1968                 :      12152 :   if (!is_gimple_val (niters_vector))
    1969                 :            :     {
    1970                 :       9522 :       var = create_tmp_var (type, "bnd");
    1971                 :       9522 :       gimple_seq stmts = NULL;
    1972                 :       9522 :       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
    1973                 :       9522 :       gsi_insert_seq_on_edge_immediate (pe, stmts);
    1974                 :            :       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
    1975                 :            :          we set range information to make niters analyzer's life easier.  */
    1976                 :       9522 :       if (stmts != NULL && log_vf)
    1977                 :       9522 :         set_range_info (niters_vector, VR_RANGE,
    1978                 :       9522 :                         wi::to_wide (build_int_cst (type, 1)),
    1979                 :      19044 :                         wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
    1980                 :            :                                                   TYPE_MAX_VALUE (type),
    1981                 :       9522 :                                                   log_vf)));
    1982                 :            :     }
    1983                 :      12152 :   *niters_vector_ptr = niters_vector;
    1984                 :      12152 :   *step_vector_ptr = step_vector;
    1985                 :            : 
    1986                 :      12152 :   return;
    1987                 :            : }
    1988                 :            : 
    1989                 :            : /* Given NITERS_VECTOR which is the number of iterations for vectorized
    1990                 :            :    loop specified by LOOP_VINFO after vectorization, compute the number
    1991                 :            :    of iterations before vectorization (niters_vector * vf) and store it
    1992                 :            :    to NITERS_VECTOR_MULT_VF_PTR.  */
    1993                 :            : 
    1994                 :            : static void
    1995                 :      11985 : vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
    1996                 :            :                                      tree niters_vector,
    1997                 :            :                                      tree *niters_vector_mult_vf_ptr)
    1998                 :            : {
    1999                 :            :   /* We should be using a step_vector of VF if VF is variable.  */
    2000                 :      11985 :   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
    2001                 :      11985 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2002                 :      11985 :   tree type = TREE_TYPE (niters_vector);
    2003                 :      23970 :   tree log_vf = build_int_cst (type, exact_log2 (vf));
    2004                 :      11985 :   basic_block exit_bb = single_exit (loop)->dest;
    2005                 :            : 
    2006                 :      11985 :   gcc_assert (niters_vector_mult_vf_ptr != NULL);
    2007                 :      11985 :   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
    2008                 :            :                                             niters_vector, log_vf);
    2009                 :      11985 :   if (!is_gimple_val (niters_vector_mult_vf))
    2010                 :            :     {
    2011                 :       9506 :       tree var = create_tmp_var (type, "niters_vector_mult_vf");
    2012                 :       9506 :       gimple_seq stmts = NULL;
    2013                 :       9506 :       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
    2014                 :            :                                                     &stmts, true, var);
    2015                 :       9506 :       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
    2016                 :       9506 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2017                 :            :     }
    2018                 :      11985 :   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
    2019                 :      11985 : }
    2020                 :            : 
    2021                 :            : /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
    2022                 :            :    this function searches for the corresponding lcssa phi node in exit
    2023                 :            :    bb of LOOP.  If it is found, return the phi result; otherwise return
    2024                 :            :    NULL.  */
    2025                 :            : 
    2026                 :            : static tree
    2027                 :      27894 : find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
    2028                 :            :                 gphi *lcssa_phi)
    2029                 :            : {
    2030                 :      27894 :   gphi_iterator gsi;
    2031                 :      27894 :   edge e = single_exit (loop);
    2032                 :            : 
    2033                 :      27894 :   gcc_assert (single_pred_p (e->dest));
    2034                 :     158231 :   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    2035                 :            :     {
    2036                 :     157825 :       gphi *phi = gsi.phi ();
    2037                 :     315650 :       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
    2038                 :     157825 :                            PHI_ARG_DEF (lcssa_phi, 0), 0))
    2039                 :      27488 :         return PHI_RESULT (phi);
    2040                 :            :     }
    2041                 :            :   return NULL_TREE;
    2042                 :            : }
    2043                 :            : 
    2044                 :            : /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
    2045                 :            :    from SECOND/FIRST and puts it at the original loop's preheader/exit
    2046                 :            :    edge, the two loops are arranged as below:
    2047                 :            : 
    2048                 :            :        preheader_a:
    2049                 :            :      first_loop:
    2050                 :            :        header_a:
    2051                 :            :          i_1 = PHI<i_0, i_2>;
    2052                 :            :          ...
    2053                 :            :          i_2 = i_1 + 1;
    2054                 :            :          if (cond_a)
    2055                 :            :            goto latch_a;
    2056                 :            :          else
    2057                 :            :            goto between_bb;
    2058                 :            :        latch_a:
    2059                 :            :          goto header_a;
    2060                 :            : 
    2061                 :            :        between_bb:
    2062                 :            :          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
    2063                 :            : 
    2064                 :            :      second_loop:
    2065                 :            :        header_b:
    2066                 :            :          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
    2067                 :            :                                  or with i_2 if no LCSSA phi is created
    2068                 :            :                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
    2069                 :            :          ...
    2070                 :            :          i_4 = i_3 + 1;
    2071                 :            :          if (cond_b)
    2072                 :            :            goto latch_b;
    2073                 :            :          else
    2074                 :            :            goto exit_bb;
    2075                 :            :        latch_b:
    2076                 :            :          goto header_b;
    2077                 :            : 
    2078                 :            :        exit_bb:
    2079                 :            : 
    2080                 :            :    This function creates loop closed SSA for the first loop; update the
    2081                 :            :    second loop's PHI nodes by replacing argument on incoming edge with the
    2082                 :            :    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
    2083                 :            :    is false, Loop closed ssa phis will only be created for non-iv phis for
    2084                 :            :    the first loop.
    2085                 :            : 
    2086                 :            :    This function assumes exit bb of the first loop is preheader bb of the
    2087                 :            :    second loop, i.e, between_bb in the example code.  With PHIs updated,
    2088                 :            :    the second loop will execute rest iterations of the first.  */
    2089                 :            : 
    2090                 :            : static void
    2091                 :      12178 : slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
    2092                 :            :                                    class loop *first, class loop *second,
    2093                 :            :                                    bool create_lcssa_for_iv_phis)
    2094                 :            : {
    2095                 :      12178 :   gphi_iterator gsi_update, gsi_orig;
    2096                 :      12178 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2097                 :            : 
    2098                 :      12178 :   edge first_latch_e = EDGE_SUCC (first->latch, 0);
    2099                 :      12178 :   edge second_preheader_e = loop_preheader_edge (second);
    2100                 :      12178 :   basic_block between_bb = single_exit (first)->dest;
    2101                 :            : 
    2102                 :      12178 :   gcc_assert (between_bb == second_preheader_e->src);
    2103                 :      12178 :   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
    2104                 :            :   /* Either the first loop or the second is the loop to be vectorized.  */
    2105                 :      12178 :   gcc_assert (loop == first || loop == second);
    2106                 :            : 
    2107                 :      12178 :   for (gsi_orig = gsi_start_phis (first->header),
    2108                 :      12178 :        gsi_update = gsi_start_phis (second->header);
    2109                 :      50896 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    2110                 :      38718 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    2111                 :            :     {
    2112                 :      38718 :       gphi *orig_phi = gsi_orig.phi ();
    2113                 :      38718 :       gphi *update_phi = gsi_update.phi ();
    2114                 :            : 
    2115                 :      38718 :       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
    2116                 :            :       /* Generate lcssa PHI node for the first loop.  */
    2117                 :      38718 :       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
    2118                 :      38718 :       stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
    2119                 :      38718 :       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
    2120                 :            :         {
    2121                 :      20284 :           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    2122                 :      20284 :           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
    2123                 :      20284 :           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
    2124                 :      20284 :           arg = new_res;
    2125                 :            :         }
    2126                 :            : 
    2127                 :            :       /* Update PHI node in the second loop by replacing arg on the loop's
    2128                 :            :          incoming edge.  */
    2129                 :      38718 :       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
    2130                 :            :     }
    2131                 :            : 
    2132                 :            :   /* For epilogue peeling we have to make sure to copy all LC PHIs
    2133                 :            :      for correct vectorization of live stmts.  */
    2134                 :      12178 :   if (loop == first)
    2135                 :            :     {
    2136                 :      11985 :       basic_block orig_exit = single_exit (second)->dest;
    2137                 :      11985 :       for (gsi_orig = gsi_start_phis (orig_exit);
    2138                 :      32106 :            !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
    2139                 :            :         {
    2140                 :      20121 :           gphi *orig_phi = gsi_orig.phi ();
    2141                 :      20121 :           tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
    2142                 :      20121 :           if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p  (orig_arg))
    2143                 :       9458 :             continue;
    2144                 :            : 
    2145                 :            :           /* Already created in the above loop.   */
    2146                 :      10663 :           if (find_guard_arg (first, second, orig_phi))
    2147                 :      10264 :             continue;
    2148                 :            : 
    2149                 :        399 :           tree new_res = copy_ssa_name (orig_arg);
    2150                 :        399 :           gphi *lcphi = create_phi_node (new_res, between_bb);
    2151                 :        399 :           add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
    2152                 :            :         }
    2153                 :            :     }
    2154                 :      12178 : }
    2155                 :            : 
    2156                 :            : /* Function slpeel_add_loop_guard adds guard skipping from the beginning
    2157                 :            :    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
    2158                 :            :    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
    2159                 :            :    appear like below:
    2160                 :            : 
    2161                 :            :        guard_bb:
    2162                 :            :          if (cond)
    2163                 :            :            goto merge_bb;
    2164                 :            :          else
    2165                 :            :            goto skip_loop;
    2166                 :            : 
    2167                 :            :      skip_loop:
    2168                 :            :        header_a:
    2169                 :            :          i_1 = PHI<i_0, i_2>;
    2170                 :            :          ...
    2171                 :            :          i_2 = i_1 + 1;
    2172                 :            :          if (cond_a)
    2173                 :            :            goto latch_a;
    2174                 :            :          else
    2175                 :            :            goto exit_a;
    2176                 :            :        latch_a:
    2177                 :            :          goto header_a;
    2178                 :            : 
    2179                 :            :        exit_a:
    2180                 :            :          i_5 = PHI<i_2>;
    2181                 :            : 
    2182                 :            :        merge_bb:
    2183                 :            :          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
    2184                 :            : 
    2185                 :            :      update_loop:
    2186                 :            :        header_b:
    2187                 :            :          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
    2188                 :            :          ...
    2189                 :            :          i_4 = i_3 + 1;
    2190                 :            :          if (cond_b)
    2191                 :            :            goto latch_b;
    2192                 :            :          else
    2193                 :            :            goto exit_bb;
    2194                 :            :        latch_b:
    2195                 :            :          goto header_b;
    2196                 :            : 
    2197                 :            :        exit_bb:
    2198                 :            : 
    2199                 :            :    This function creates PHI nodes at merge_bb and replaces the use of i_5
    2200                 :            :    in the update_loop's PHI node with the result of new PHI result.  */
    2201                 :            : 
    2202                 :            : static void
    2203                 :       9034 : slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
    2204                 :            :                                     class loop *update_loop,
    2205                 :            :                                     edge guard_edge, edge merge_edge)
    2206                 :            : {
    2207                 :       9034 :   location_t merge_loc, guard_loc;
    2208                 :       9034 :   edge orig_e = loop_preheader_edge (skip_loop);
    2209                 :       9034 :   edge update_e = loop_preheader_edge (update_loop);
    2210                 :       9034 :   gphi_iterator gsi_orig, gsi_update;
    2211                 :            : 
    2212                 :       9034 :   for ((gsi_orig = gsi_start_phis (skip_loop->header),
    2213                 :       9034 :         gsi_update = gsi_start_phis (update_loop->header));
    2214                 :      38459 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    2215                 :      29425 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    2216                 :            :     {
    2217                 :      29425 :       gphi *orig_phi = gsi_orig.phi ();
    2218                 :      29425 :       gphi *update_phi = gsi_update.phi ();
    2219                 :            : 
    2220                 :            :       /* Generate new phi node at merge bb of the guard.  */
    2221                 :      29425 :       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    2222                 :      29425 :       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
    2223                 :            : 
    2224                 :            :       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
    2225                 :            :          args in NEW_PHI for these edges.  */
    2226                 :      29425 :       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
    2227                 :      29425 :       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    2228                 :      29425 :       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
    2229                 :      29425 :       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    2230                 :      29425 :       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
    2231                 :      29425 :       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
    2232                 :            : 
    2233                 :            :       /* Update phi in UPDATE_PHI.  */
    2234                 :      29425 :       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
    2235                 :            :     }
    2236                 :       9034 : }
    2237                 :            : 
    2238                 :            : /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
    2239                 :            :    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
    2240                 :            :    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
    2241                 :            :    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
    2242                 :            :    The CFG looks like:
    2243                 :            : 
    2244                 :            :      loop:
    2245                 :            :        header_a:
    2246                 :            :          i_1 = PHI<i_0, i_2>;
    2247                 :            :          ...
    2248                 :            :          i_2 = i_1 + 1;
    2249                 :            :          if (cond_a)
    2250                 :            :            goto latch_a;
    2251                 :            :          else
    2252                 :            :            goto exit_a;
    2253                 :            :        latch_a:
    2254                 :            :          goto header_a;
    2255                 :            : 
    2256                 :            :        exit_a:
    2257                 :            : 
    2258                 :            :        guard_bb:
    2259                 :            :          if (cond)
    2260                 :            :            goto merge_bb;
    2261                 :            :          else
    2262                 :            :            goto epilog_loop;
    2263                 :            : 
    2264                 :            :        ;; fall_through_bb
    2265                 :            : 
    2266                 :            :      epilog_loop:
    2267                 :            :        header_b:
    2268                 :            :          i_3 = PHI<i_2, i_4>;
    2269                 :            :          ...
    2270                 :            :          i_4 = i_3 + 1;
    2271                 :            :          if (cond_b)
    2272                 :            :            goto latch_b;
    2273                 :            :          else
    2274                 :            :            goto merge_bb;
    2275                 :            :        latch_b:
    2276                 :            :          goto header_b;
    2277                 :            : 
    2278                 :            :        merge_bb:
    2279                 :            :          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
    2280                 :            : 
    2281                 :            :        exit_bb:
    2282                 :            :          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
    2283                 :            : 
    2284                 :            :    For each name used out side EPILOG (i.e - for each name that has a lcssa
    2285                 :            :    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
    2286                 :            :    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
    2287                 :            :    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
    2288                 :            :    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
    2289                 :            :    in exit_bb will also be updated.  */
    2290                 :            : 
    2291                 :            : static void
    2292                 :       9281 : slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
    2293                 :            :                                     edge guard_edge, edge merge_edge)
    2294                 :            : {
    2295                 :       9281 :   gphi_iterator gsi;
    2296                 :       9281 :   basic_block merge_bb = guard_edge->dest;
    2297                 :            : 
    2298                 :       9281 :   gcc_assert (single_succ_p (merge_bb));
    2299                 :       9281 :   edge e = single_succ_edge (merge_bb);
    2300                 :       9281 :   basic_block exit_bb = e->dest;
    2301                 :       9281 :   gcc_assert (single_pred_p (exit_bb));
    2302                 :       9281 :   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
    2303                 :            : 
    2304                 :      26512 :   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2305                 :            :     {
    2306                 :      17231 :       gphi *update_phi = gsi.phi ();
    2307                 :      17231 :       tree old_arg = PHI_ARG_DEF (update_phi, 0);
    2308                 :            : 
    2309                 :      17231 :       tree merge_arg = NULL_TREE;
    2310                 :            : 
    2311                 :            :       /* If the old argument is a SSA_NAME use its current_def.  */
    2312                 :      17231 :       if (TREE_CODE (old_arg) == SSA_NAME)
    2313                 :      17224 :         merge_arg = get_current_def (old_arg);
    2314                 :            :       /* If it's a constant or doesn't have a current_def, just use the old
    2315                 :            :          argument.  */
    2316                 :      17224 :       if (!merge_arg)
    2317                 :            :         merge_arg = old_arg;
    2318                 :            : 
    2319                 :      17231 :       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
    2320                 :            :       /* If the var is live after loop but not a reduction, we simply
    2321                 :            :          use the old arg.  */
    2322                 :      17231 :       if (!guard_arg)
    2323                 :          7 :         guard_arg = old_arg;
    2324                 :            : 
    2325                 :            :       /* Create new phi node in MERGE_BB:  */
    2326                 :      17231 :       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
    2327                 :      17231 :       gphi *merge_phi = create_phi_node (new_res, merge_bb);
    2328                 :            : 
    2329                 :            :       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
    2330                 :            :          the two PHI args in merge_phi for these edges.  */
    2331                 :      17231 :       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
    2332                 :      17231 :       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
    2333                 :            : 
    2334                 :            :       /* Update the original phi in exit_bb.  */
    2335                 :      17231 :       adjust_phi_and_debug_stmts (update_phi, e, new_res);
    2336                 :            :     }
    2337                 :       9281 : }
    2338                 :            : 
    2339                 :            : /* EPILOG loop is duplicated from the original loop for vectorizing,
    2340                 :            :    the arg of its loop closed ssa PHI needs to be updated.  */
    2341                 :            : 
    2342                 :            : static void
    2343                 :       2704 : slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
    2344                 :            : {
    2345                 :       2704 :   gphi_iterator gsi;
    2346                 :       2704 :   basic_block exit_bb = single_exit (epilog)->dest;
    2347                 :            : 
    2348                 :       2704 :   gcc_assert (single_pred_p (exit_bb));
    2349                 :       2704 :   edge e = EDGE_PRED (exit_bb, 0);
    2350                 :       5594 :   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2351                 :       2890 :     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
    2352                 :       2704 : }
    2353                 :            : 
    2354                 :            : /* Function vect_do_peeling.
    2355                 :            : 
    2356                 :            :    Input:
    2357                 :            :    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
    2358                 :            : 
    2359                 :            :        preheader:
    2360                 :            :      LOOP:
    2361                 :            :        header_bb:
    2362                 :            :          loop_body
    2363                 :            :          if (exit_loop_cond) goto exit_bb
    2364                 :            :          else                goto header_bb
    2365                 :            :        exit_bb:
    2366                 :            : 
    2367                 :            :    - NITERS: The number of iterations of the loop.
    2368                 :            :    - NITERSM1: The number of iterations of the loop's latch.
    2369                 :            :    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
    2370                 :            :    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
    2371                 :            :                               CHECK_PROFITABILITY is true.
    2372                 :            :    Output:
    2373                 :            :    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
    2374                 :            :      iterate after vectorization; see vect_set_loop_condition for details.
    2375                 :            :    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
    2376                 :            :      should be set to the number of scalar iterations handled by the
    2377                 :            :      vector loop.  The SSA name is only used on exit from the loop.
    2378                 :            : 
    2379                 :            :    This function peels prolog and epilog from the loop, adds guards skipping
    2380                 :            :    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
    2381                 :            :    would look like:
    2382                 :            : 
    2383                 :            :        guard_bb_1:
    2384                 :            :          if (prefer_scalar_loop) goto merge_bb_1
    2385                 :            :          else                    goto guard_bb_2
    2386                 :            : 
    2387                 :            :        guard_bb_2:
    2388                 :            :          if (skip_prolog) goto merge_bb_2
    2389                 :            :          else             goto prolog_preheader
    2390                 :            : 
    2391                 :            :        prolog_preheader:
    2392                 :            :      PROLOG:
    2393                 :            :        prolog_header_bb:
    2394                 :            :          prolog_body
    2395                 :            :          if (exit_prolog_cond) goto prolog_exit_bb
    2396                 :            :          else                  goto prolog_header_bb
    2397                 :            :        prolog_exit_bb:
    2398                 :            : 
    2399                 :            :        merge_bb_2:
    2400                 :            : 
    2401                 :            :        vector_preheader:
    2402                 :            :      VECTOR LOOP:
    2403                 :            :        vector_header_bb:
    2404                 :            :          vector_body
    2405                 :            :          if (exit_vector_cond) goto vector_exit_bb
    2406                 :            :          else                  goto vector_header_bb
    2407                 :            :        vector_exit_bb:
    2408                 :            : 
    2409                 :            :        guard_bb_3:
    2410                 :            :          if (skip_epilog) goto merge_bb_3
    2411                 :            :          else             goto epilog_preheader
    2412                 :            : 
    2413                 :            :        merge_bb_1:
    2414                 :            : 
    2415                 :            :        epilog_preheader:
    2416                 :            :      EPILOG:
    2417                 :            :        epilog_header_bb:
    2418                 :            :          epilog_body
    2419                 :            :          if (exit_epilog_cond) goto merge_bb_3
    2420                 :            :          else                  goto epilog_header_bb
    2421                 :            : 
    2422                 :            :        merge_bb_3:
    2423                 :            : 
    2424                 :            :    Note this function peels prolog and epilog only if it's necessary,
    2425                 :            :    as well as guards.
    2426                 :            :    This function returns the epilogue loop if a decision was made to vectorize
    2427                 :            :    it, otherwise NULL.
    2428                 :            : 
    2429                 :            :    The analysis resulting in this epilogue loop's loop_vec_info was performed
    2430                 :            :    in the same vect_analyze_loop call as the main loop's.  At that time
    2431                 :            :    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
    2432                 :            :    vectorization factors than the main loop.  This list is stored in the main
    2433                 :            :    loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
    2434                 :            :    vectorize the epilogue loop for a lower vectorization factor,  the
    2435                 :            :    loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
    2436                 :            :    updated and linked to the epilogue loop.  This is later used to vectorize
    2437                 :            :    the epilogue.  The reason the loop_vec_info needs updating is that it was
    2438                 :            :    constructed based on the original main loop, and the epilogue loop is a
    2439                 :            :    copy of this loop, so all links pointing to statements in the original loop
    2440                 :            :    need updating.  Furthermore, these loop_vec_infos share the
    2441                 :            :    data_reference's records, which will also need to be updated.
    2442                 :            : 
    2443                 :            :    TODO: Guard for prefer_scalar_loop should be emitted along with
    2444                 :            :    versioning conditions if loop versioning is needed.  */
    2445                 :            : 
    2446                 :            : 
    2447                 :            : class loop *
    2448                 :      21383 : vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
    2449                 :            :                  tree *niters_vector, tree *step_vector,
    2450                 :            :                  tree *niters_vector_mult_vf_var, int th,
    2451                 :            :                  bool check_profitability, bool niters_no_overflow,
    2452                 :            :                  tree *advance)
    2453                 :            : {
    2454                 :      21383 :   edge e, guard_e;
    2455                 :      21383 :   tree type = TREE_TYPE (niters), guard_cond;
    2456                 :      21383 :   basic_block guard_bb, guard_to;
    2457                 :      21383 :   profile_probability prob_prolog, prob_vector, prob_epilog;
    2458                 :      21383 :   int estimated_vf;
    2459                 :      21383 :   int prolog_peeling = 0;
    2460                 :      21383 :   bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
    2461                 :            :   /* We currently do not support prolog peeling if the target alignment is not
    2462                 :            :      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
    2463                 :            :      target alignment being constant.  */
    2464                 :      21383 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2465                 :      21383 :   if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
    2466                 :            :     return NULL;
    2467                 :            : 
    2468                 :      21383 :   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
    2469                 :      21383 :     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2470                 :            : 
    2471                 :      21383 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2472                 :      21383 :   poly_uint64 bound_epilog = 0;
    2473                 :      21383 :   if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
    2474                 :      21383 :       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
    2475                 :      11948 :     bound_epilog += vf - 1;
    2476                 :      21383 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2477                 :        411 :     bound_epilog += 1;
    2478                 :      21383 :   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
    2479                 :      21383 :   poly_uint64 bound_scalar = bound_epilog;
    2480                 :            : 
    2481                 :      21383 :   if (!prolog_peeling && !epilog_peeling)
    2482                 :            :     return NULL;
    2483                 :            : 
    2484                 :      12017 :   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
    2485                 :      12017 :   estimated_vf = vect_vf_for_cost (loop_vinfo);
    2486                 :      12017 :   if (estimated_vf == 2)
    2487                 :       2842 :     estimated_vf = 3;
    2488                 :      12017 :   prob_prolog = prob_epilog = profile_probability::guessed_always ()
    2489                 :      12017 :                         .apply_scale (estimated_vf - 1, estimated_vf);
    2490                 :            : 
    2491                 :      12017 :   class loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2492                 :      12017 :   class loop *first_loop = loop;
    2493                 :      12017 :   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    2494                 :            : 
    2495                 :            :   /* We might have a queued need to update virtual SSA form.  As we
    2496                 :            :      delete the update SSA machinery below after doing a regular
    2497                 :            :      incremental SSA update during loop copying make sure we don't
    2498                 :            :      lose that fact.
    2499                 :            :      ???  Needing to update virtual SSA form by renaming is unfortunate
    2500                 :            :      but not all of the vectorizer code inserting new loads / stores
    2501                 :            :      properly assigns virtual operands to those statements.  */
    2502                 :      12017 :   update_ssa (TODO_update_ssa_only_virtuals);
    2503                 :            : 
    2504                 :      12017 :   create_lcssa_for_virtual_phi (loop);
    2505                 :            : 
    2506                 :            :   /* If we're vectorizing an epilogue loop, the update_ssa above will
    2507                 :            :      have ensured that the virtual operand is in SSA form throughout the
    2508                 :            :      vectorized main loop.  Normally it is possible to trace the updated
    2509                 :            :      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
    2510                 :            :      back to scalar-stmt vuses, meaning that the effect of the SSA update
    2511                 :            :      remains local to the main loop.  However, there are rare cases in
    2512                 :            :      which the vectorized loop has vdefs even when the original scalar
    2513                 :            :      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
    2514                 :            :      introduces clobbers of the temporary vector array, which in turn
    2515                 :            :      needs new vdefs.  If the scalar loop doesn't write to memory, these
    2516                 :            :      new vdefs will be the only ones in the vector loop.
    2517                 :            : 
    2518                 :            :      In that case, update_ssa will have added a new virtual phi to the
    2519                 :            :      main loop, which previously didn't need one.  Ensure that we (locally)
    2520                 :            :      maintain LCSSA form for the virtual operand, just as we would have
    2521                 :            :      done if the virtual phi had existed from the outset.  This makes it
    2522                 :            :      easier to duplicate the scalar epilogue loop below.  */
    2523                 :      12017 :   tree vop_to_rename = NULL_TREE;
    2524                 :      12017 :   if (loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
    2525                 :            :     {
    2526                 :       2874 :       class loop *orig_loop = LOOP_VINFO_LOOP (orig_loop_vinfo);
    2527                 :       2874 :       vop_to_rename = create_lcssa_for_virtual_phi (orig_loop);
    2528                 :            :     }
    2529                 :            : 
    2530                 :      12017 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
    2531                 :            :     {
    2532                 :       3724 :       gcc_assert (!adjust_vec.exists ());
    2533                 :       3724 :       adjust_vec.create (32);
    2534                 :            :     }
    2535                 :      12017 :   initialize_original_copy_tables ();
    2536                 :            : 
    2537                 :            :   /* Record the anchor bb at which the guard should be placed if the scalar
    2538                 :            :      loop might be preferred.  */
    2539                 :      12017 :   basic_block anchor = loop_preheader_edge (loop)->src;
    2540                 :            : 
    2541                 :            :   /* Generate the number of iterations for the prolog loop.  We do this here
    2542                 :            :      so that we can also get the upper bound on the number of iterations.  */
    2543                 :      12017 :   tree niters_prolog;
    2544                 :      12017 :   int bound_prolog = 0;
    2545                 :      12017 :   if (prolog_peeling)
    2546                 :        193 :     niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
    2547                 :            :                                                   &bound_prolog);
    2548                 :            :   else
    2549                 :      11824 :     niters_prolog = build_int_cst (type, 0);
    2550                 :            : 
    2551                 :      12017 :   loop_vec_info epilogue_vinfo = NULL;
    2552                 :      12017 :   if (vect_epilogues)
    2553                 :            :     {
    2554                 :       3498 :       epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
    2555                 :       3498 :       loop_vinfo->epilogue_vinfos.ordered_remove (0);
    2556                 :            :     }
    2557                 :            : 
    2558                 :      12017 :   tree niters_vector_mult_vf = NULL_TREE;
    2559                 :            :   /* Saving NITERs before the loop, as this may be changed by prologue.  */
    2560                 :      12017 :   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
    2561                 :      12017 :   edge update_e = NULL, skip_e = NULL;
    2562                 :      12017 :   unsigned int lowest_vf = constant_lower_bound (vf);
    2563                 :            :   /* If we know the number of scalar iterations for the main loop we should
    2564                 :            :      check whether after the main loop there are enough iterations left over
    2565                 :            :      for the epilogue.  */
    2566                 :      12017 :   if (vect_epilogues
    2567                 :       3498 :       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2568                 :        808 :       && prolog_peeling >= 0
    2569                 :      12823 :       && known_eq (vf, lowest_vf))
    2570                 :            :     {
    2571                 :        806 :       unsigned HOST_WIDE_INT eiters
    2572                 :        806 :         = (LOOP_VINFO_INT_NITERS (loop_vinfo)
    2573                 :        806 :            - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
    2574                 :            : 
    2575                 :        806 :       eiters -= prolog_peeling;
    2576                 :        806 :       eiters
    2577                 :        806 :         = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
    2578                 :            : 
    2579                 :        806 :       unsigned int ratio;
    2580                 :        806 :       unsigned int epilogue_gaps
    2581                 :        806 :         = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
    2582                 :       2418 :       while (!(constant_multiple_p
    2583                 :       1612 :                (GET_MODE_SIZE (loop_vinfo->vector_mode),
    2584                 :       1612 :                 GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
    2585                 :        806 :                && eiters >= lowest_vf / ratio + epilogue_gaps))
    2586                 :            :         {
    2587                 :        230 :           delete epilogue_vinfo;
    2588                 :        230 :           epilogue_vinfo = NULL;
    2589                 :        230 :           if (loop_vinfo->epilogue_vinfos.length () == 0)
    2590                 :            :             {
    2591                 :            :               vect_epilogues = false;
    2592                 :            :               break;
    2593                 :            :             }
    2594                 :          0 :           epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
    2595                 :          0 :           loop_vinfo->epilogue_vinfos.ordered_remove (0);
    2596                 :          0 :           epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
    2597                 :            :         }
    2598                 :            :     }
    2599                 :            :   /* Prolog loop may be skipped.  */
    2600                 :      12017 :   bool skip_prolog = (prolog_peeling != 0);
    2601                 :            :   /* Skip this loop to epilog when there are not enough iterations to enter this
    2602                 :            :      vectorized loop.  If true we should perform runtime checks on the NITERS
    2603                 :            :      to check whether we should skip the current vectorized loop.  If we know
    2604                 :            :      the number of scalar iterations we may choose to add a runtime check if
    2605                 :            :      this number "maybe" smaller than the number of iterations required
    2606                 :            :      when we know the number of scalar iterations may potentially
    2607                 :            :      be smaller than the number of iterations required to enter this loop, for
    2608                 :            :      this we use the upper bounds on the prolog and epilog peeling.  When we
    2609                 :            :      don't know the number of iterations and don't require versioning it is
    2610                 :            :      because we have asserted that there are enough scalar iterations to enter
    2611                 :            :      the main loop, so this skip is not necessary.  When we are versioning then
    2612                 :            :      we only add such a skip if we have chosen to vectorize the epilogue.  */
    2613                 :       2502 :   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2614                 :      14519 :                       ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
    2615                 :       2502 :                                   bound_prolog + bound_epilog)
    2616                 :       9515 :                       : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
    2617                 :       1694 :                          || vect_epilogues));
    2618                 :            :   /* Epilog loop must be executed if the number of iterations for epilog
    2619                 :            :      loop is known at compile time, otherwise we need to add a check at
    2620                 :            :      the end of vector loop and skip to the end of epilog loop.  */
    2621                 :      12017 :   bool skip_epilog = (prolog_peeling < 0
    2622                 :      12000 :                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2623                 :      12017 :                       || !vf.is_constant ());
    2624                 :            :   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
    2625                 :      12017 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2626                 :        411 :     skip_epilog = false;
    2627                 :            : 
    2628                 :      12017 :   if (skip_vector)
    2629                 :            :     {
    2630                 :       8841 :       split_edge (loop_preheader_edge (loop));
    2631                 :            : 
    2632                 :            :       /* Due to the order in which we peel prolog and epilog, we first
    2633                 :            :          propagate probability to the whole loop.  The purpose is to
    2634                 :            :          avoid adjusting probabilities of both prolog and vector loops
    2635                 :            :          separately.  Note in this case, the probability of epilog loop
    2636                 :            :          needs to be scaled back later.  */
    2637                 :       8841 :       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
    2638                 :       8841 :       if (prob_vector.initialized_p ())
    2639                 :            :         {
    2640                 :       8841 :           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
    2641                 :       8841 :           scale_loop_profile (loop, prob_vector, 0);
    2642                 :            :         }
    2643                 :            :     }
    2644                 :            : 
    2645                 :      12017 :   dump_user_location_t loop_loc = find_loop_location (loop);
    2646                 :      12017 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    2647                 :      12017 :   if (vect_epilogues)
    2648                 :            :     /* Make sure to set the epilogue's epilogue scalar loop, such that we can
    2649                 :            :        use the original scalar loop as remaining epilogue if necessary.  */
    2650                 :       3268 :     LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
    2651                 :       3268 :       = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    2652                 :            : 
    2653                 :      12017 :   if (prolog_peeling)
    2654                 :            :     {
    2655                 :        193 :       e = loop_preheader_edge (loop);
    2656                 :        193 :       if (!slpeel_can_duplicate_loop_p (loop, e))
    2657                 :            :         {
    2658                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
    2659                 :            :                            "loop can't be duplicated to preheader edge.\n");
    2660                 :          0 :           gcc_unreachable ();
    2661                 :            :         }
    2662                 :            :       /* Peel prolog and put it on preheader edge of loop.  */
    2663                 :        193 :       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
    2664                 :        193 :       if (!prolog)
    2665                 :            :         {
    2666                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
    2667                 :            :                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
    2668                 :          0 :           gcc_unreachable ();
    2669                 :            :         }
    2670                 :        193 :       prolog->force_vectorize = false;
    2671                 :        193 :       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
    2672                 :        193 :       first_loop = prolog;
    2673                 :        193 :       reset_original_copy_tables ();
    2674                 :            : 
    2675                 :            :       /* Update the number of iterations for prolog loop.  */
    2676                 :        193 :       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
    2677                 :        193 :       vect_set_loop_condition (prolog, NULL, niters_prolog,
    2678                 :            :                                step_prolog, NULL_TREE, false);
    2679                 :            : 
    2680                 :            :       /* Skip the prolog loop.  */
    2681                 :        193 :       if (skip_prolog)
    2682                 :            :         {
    2683                 :        193 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    2684                 :            :                                     niters_prolog, build_int_cst (type, 0));
    2685                 :        193 :           guard_bb = loop_preheader_edge (prolog)->src;
    2686                 :        193 :           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
    2687                 :        193 :           guard_to = split_edge (loop_preheader_edge (loop));
    2688                 :        193 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    2689                 :            :                                            guard_to, guard_bb,
    2690                 :            :                                            prob_prolog.invert (),
    2691                 :            :                                            irred_flag);
    2692                 :        193 :           e = EDGE_PRED (guard_to, 0);
    2693                 :        193 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    2694                 :        193 :           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
    2695                 :            : 
    2696                 :        193 :           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
    2697                 :        193 :           scale_loop_profile (prolog, prob_prolog, bound_prolog);
    2698                 :            :         }
    2699                 :            : 
    2700                 :            :       /* Update init address of DRs.  */
    2701                 :        193 :       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
    2702                 :            :       /* Update niters for vector loop.  */
    2703                 :        193 :       LOOP_VINFO_NITERS (loop_vinfo)
    2704                 :        193 :         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
    2705                 :        193 :       LOOP_VINFO_NITERSM1 (loop_vinfo)
    2706                 :        193 :         = fold_build2 (MINUS_EXPR, type,
    2707                 :            :                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
    2708                 :        193 :       bool new_var_p = false;
    2709                 :        193 :       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
    2710                 :            :       /* It's guaranteed that vector loop bound before vectorization is at
    2711                 :            :          least VF, so set range information for newly generated var.  */
    2712                 :        193 :       if (new_var_p)
    2713                 :        104 :         set_range_info (niters, VR_RANGE,
    2714                 :         52 :                         wi::to_wide (build_int_cst (type, vf)),
    2715                 :        104 :                         wi::to_wide (TYPE_MAX_VALUE (type)));
    2716                 :            : 
    2717                 :            :       /* Prolog iterates at most bound_prolog times, latch iterates at
    2718                 :            :          most bound_prolog - 1 times.  */
    2719                 :        193 :       record_niter_bound (prolog, bound_prolog - 1, false, true);
    2720                 :        193 :       delete_update_ssa ();
    2721                 :        193 :       adjust_vec_debug_stmts ();
    2722                 :        193 :       scev_reset ();
    2723                 :            :     }
    2724                 :            : 
    2725                 :      12017 :   if (epilog_peeling)
    2726                 :            :     {
    2727                 :      11985 :       e = single_exit (loop);
    2728                 :      11985 :       if (!slpeel_can_duplicate_loop_p (loop, e))
    2729                 :            :         {
    2730                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
    2731                 :            :                            "loop can't be duplicated to exit edge.\n");
    2732                 :          0 :           gcc_unreachable ();
    2733                 :            :         }
    2734                 :            :       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
    2735                 :            :          said epilog then we should use a copy of the main loop as a starting
    2736                 :            :          point.  This loop may have already had some preliminary transformations
    2737                 :            :          to allow for more optimal vectorization, for example if-conversion.
    2738                 :            :          If we are not vectorizing the epilog then we should use the scalar loop
    2739                 :            :          as the transformations mentioned above make less or no sense when not
    2740                 :            :          vectorizing.  */
    2741                 :      11985 :       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
    2742                 :      11985 :       if (vop_to_rename)
    2743                 :            :         {
    2744                 :            :           /* Vectorizing the main loop can sometimes introduce a vdef to
    2745                 :            :              a loop that previously didn't have one; see the comment above
    2746                 :            :              the definition of VOP_TO_RENAME for details.  The definition
    2747                 :            :              D that holds on E will then be different from the definition
    2748                 :            :              VOP_TO_RENAME that holds during SCALAR_LOOP, so we need to
    2749                 :            :              rename VOP_TO_RENAME to D when copying the loop.
    2750                 :            : 
    2751                 :            :              The virtual operand is in LCSSA form for the main loop,
    2752                 :            :              and no stmt between the main loop and E needs a vdef,
    2753                 :            :              so we know that D is provided by a phi rather than by a
    2754                 :            :              vdef on a normal gimple stmt.  */
    2755                 :          0 :           basic_block vdef_bb = e->src;
    2756                 :          0 :           gphi *vphi;
    2757                 :          0 :           while (!(vphi = get_virtual_phi (vdef_bb)))
    2758                 :          0 :             vdef_bb = get_immediate_dominator (CDI_DOMINATORS, vdef_bb);
    2759                 :          0 :           gcc_assert (vop_to_rename != gimple_phi_result (vphi));
    2760                 :          0 :           set_current_def (vop_to_rename, gimple_phi_result (vphi));
    2761                 :            :         }
    2762                 :      11985 :       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
    2763                 :      11985 :       if (!epilog)
    2764                 :            :         {
    2765                 :          0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
    2766                 :            :                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
    2767                 :          0 :           gcc_unreachable ();
    2768                 :            :         }
    2769                 :      11985 :       epilog->force_vectorize = false;
    2770                 :      11985 :       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
    2771                 :            : 
    2772                 :            :       /* Scalar version loop may be preferred.  In this case, add guard
    2773                 :            :          and skip to epilog.  Note this only happens when the number of
    2774                 :            :          iterations of loop is unknown at compile time, otherwise this
    2775                 :            :          won't be vectorized.  */
    2776                 :      11985 :       if (skip_vector)
    2777                 :            :         {
    2778                 :            :           /* Additional epilogue iteration is peeled if gap exists.  */
    2779                 :       8841 :           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
    2780                 :            :                                                 bound_prolog, bound_epilog,
    2781                 :            :                                                 th, &bound_scalar,
    2782                 :            :                                                 check_profitability);
    2783                 :            :           /* Build guard against NITERSM1 since NITERS may overflow.  */
    2784                 :       8841 :           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
    2785                 :       8841 :           guard_bb = anchor;
    2786                 :       8841 :           guard_to = split_edge (loop_preheader_edge (epilog));
    2787                 :       8841 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    2788                 :            :                                            guard_to, guard_bb,
    2789                 :            :                                            prob_vector.invert (),
    2790                 :            :                                            irred_flag);
    2791                 :       8841 :           skip_e = guard_e;
    2792                 :       8841 :           e = EDGE_PRED (guard_to, 0);
    2793                 :       8841 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    2794                 :       8841 :           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
    2795                 :            : 
    2796                 :            :           /* Simply propagate profile info from guard_bb to guard_to which is
    2797                 :            :              a merge point of control flow.  */
    2798                 :       8841 :           guard_to->count = guard_bb->count;
    2799                 :            : 
    2800                 :            :           /* Scale probability of epilog loop back.
    2801                 :            :              FIXME: We should avoid scaling down and back up.  Profile may
    2802                 :            :              get lost if we scale down to 0.  */
    2803                 :       8841 :           basic_block *bbs = get_loop_body (epilog);
    2804                 :      26968 :           for (unsigned int i = 0; i < epilog->num_nodes; i++)
    2805                 :      18127 :             bbs[i]->count = bbs[i]->count.apply_scale
    2806                 :      18127 :                                  (bbs[i]->count,
    2807                 :            :                                   bbs[i]->count.apply_probability
    2808                 :      18127 :                                     (prob_vector));
    2809                 :       8841 :           free (bbs);
    2810                 :            :         }
    2811                 :            : 
    2812                 :      11985 :       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
    2813                 :            :       /* If loop is peeled for non-zero constant times, now niters refers to
    2814                 :            :          orig_niters - prolog_peeling, it won't overflow even the orig_niters
    2815                 :            :          overflows.  */
    2816                 :      11985 :       niters_no_overflow |= (prolog_peeling > 0);
    2817                 :      11985 :       vect_gen_vector_loop_niters (loop_vinfo, niters,
    2818                 :            :                                    niters_vector, step_vector,
    2819                 :            :                                    niters_no_overflow);
    2820                 :      11985 :       if (!integer_onep (*step_vector))
    2821                 :            :         {
    2822                 :            :           /* On exit from the loop we will have an easy way of calcalating
    2823                 :            :              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
    2824                 :            :              until then.  */
    2825                 :          0 :           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
    2826                 :          0 :           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
    2827                 :          0 :           *niters_vector_mult_vf_var = niters_vector_mult_vf;
    2828                 :            :         }
    2829                 :            :       else
    2830                 :      11985 :         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
    2831                 :            :                                              &niters_vector_mult_vf);
    2832                 :            :       /* Update IVs of original loop as if they were advanced by
    2833                 :            :          niters_vector_mult_vf steps.  */
    2834                 :      11985 :       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
    2835                 :      11985 :       update_e = skip_vector ? e : loop_preheader_edge (epilog);
    2836                 :      11985 :       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
    2837                 :            :                                         update_e);
    2838                 :            : 
    2839                 :      11985 :       if (skip_epilog)
    2840                 :            :         {
    2841                 :       9281 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    2842                 :            :                                     niters, niters_vector_mult_vf);
    2843                 :       9281 :           guard_bb = single_exit (loop)->dest;
    2844                 :       9281 :           guard_to = split_edge (single_exit (epilog));
    2845                 :       9922 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
    2846                 :            :                                            skip_vector ? anchor : guard_bb,
    2847                 :            :                                            prob_epilog.invert (),
    2848                 :            :                                            irred_flag);
    2849                 :       9281 :           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
    2850                 :            :                                               single_exit (epilog));
    2851                 :            :           /* Only need to handle basic block before epilog loop if it's not
    2852                 :            :              the guard_bb, which is the case when skip_vector is true.  */
    2853                 :       9281 :           if (guard_bb != bb_before_epilog)
    2854                 :            :             {
    2855                 :       8640 :               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
    2856                 :            : 
    2857                 :       8640 :               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
    2858                 :            :             }
    2859                 :       9281 :           scale_loop_profile (epilog, prob_epilog, 0);
    2860                 :            :         }
    2861                 :            :       else
    2862                 :       2704 :         slpeel_update_phi_nodes_for_lcssa (epilog);
    2863                 :            : 
    2864                 :      11985 :       unsigned HOST_WIDE_INT bound;
    2865                 :      11985 :       if (bound_scalar.is_constant (&bound))
    2866                 :            :         {
    2867                 :      11985 :           gcc_assert (bound != 0);
    2868                 :            :           /* -1 to convert loop iterations to latch iterations.  */
    2869                 :      23970 :           record_niter_bound (epilog, bound - 1, false, true);
    2870                 :            :         }
    2871                 :            : 
    2872                 :      11985 :       delete_update_ssa ();
    2873                 :      11985 :       adjust_vec_debug_stmts ();
    2874                 :      11985 :       scev_reset ();
    2875                 :            :     }
    2876                 :            : 
    2877                 :      12017 :   if (vect_epilogues)
    2878                 :            :     {
    2879                 :       3268 :       epilog->aux = epilogue_vinfo;
    2880                 :       3268 :       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
    2881                 :            : 
    2882                 :       3268 :       loop_constraint_clear (epilog, LOOP_C_INFINITE);
    2883                 :            : 
    2884                 :            :       /* We now must calculate the number of NITERS performed by the previous
    2885                 :            :          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
    2886                 :       3268 :       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
    2887                 :            :                                  niters_prolog, niters_vector_mult_vf);
    2888                 :            : 
    2889                 :            :       /* If skip_vector we may skip the previous loop, we insert a phi-node to
    2890                 :            :          determine whether we are coming from the previous vectorized loop
    2891                 :            :          using the update_e edge or the skip_vector basic block using the
    2892                 :            :          skip_e edge.  */
    2893                 :       3268 :       if (skip_vector)
    2894                 :            :         {
    2895                 :       2690 :           gcc_assert (update_e != NULL && skip_e != NULL);
    2896                 :       2690 :           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
    2897                 :            :                                            update_e->dest);
    2898                 :       2690 :           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
    2899                 :       2690 :           gimple *stmt = gimple_build_assign (new_ssa, niters);
    2900                 :       2690 :           gimple_stmt_iterator gsi;
    2901                 :       2690 :           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
    2902                 :       2690 :               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
    2903                 :            :             {
    2904                 :       2690 :               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
    2905                 :       2690 :               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
    2906                 :            :             }
    2907                 :            :           else
    2908                 :            :             {
    2909                 :          0 :               gsi = gsi_last_bb (update_e->src);
    2910                 :          0 :               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
    2911                 :            :             }
    2912                 :            : 
    2913                 :       2690 :           niters = new_ssa;
    2914                 :       2690 :           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
    2915                 :       2690 :           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
    2916                 :            :                        UNKNOWN_LOCATION);
    2917                 :       2690 :           niters = PHI_RESULT (new_phi);
    2918                 :            :         }
    2919                 :            : 
    2920                 :            :       /* Subtract the number of iterations performed by the vectorized loop
    2921                 :            :          from the number of total iterations.  */
    2922                 :       3268 :       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
    2923                 :            :                                           before_loop_niters,
    2924                 :            :                                           niters);
    2925                 :            : 
    2926                 :       3268 :       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
    2927                 :       3268 :       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
    2928                 :       3268 :         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
    2929                 :            :                        epilogue_niters,
    2930                 :            :                        build_one_cst (TREE_TYPE (epilogue_niters)));
    2931                 :            : 
    2932                 :            :       /* Set ADVANCE to the number of iterations performed by the previous
    2933                 :            :          loop and its prologue.  */
    2934                 :       3268 :       *advance = niters;
    2935                 :            : 
    2936                 :            :       /* Redo the peeling for niter analysis as the NITERs and alignment
    2937                 :            :          may have been updated to take the main loop into account.  */
    2938                 :       3268 :       determine_peel_for_niter (epilogue_vinfo);
    2939                 :            :     }
    2940                 :            : 
    2941                 :      12017 :   adjust_vec.release ();
    2942                 :      12017 :   free_original_copy_tables ();
    2943                 :            : 
    2944                 :      12017 :   return vect_epilogues ? epilog : NULL;
    2945                 :            : }
    2946                 :            : 
    2947                 :            : /* Function vect_create_cond_for_niters_checks.
    2948                 :            : 
    2949                 :            :    Create a conditional expression that represents the run-time checks for
    2950                 :            :    loop's niter.  The loop is guaranteed to terminate if the run-time
    2951                 :            :    checks hold.
    2952                 :            : 
    2953                 :            :    Input:
    2954                 :            :    COND_EXPR  - input conditional expression.  New conditions will be chained
    2955                 :            :                 with logical AND operation.  If it is NULL, then the function
    2956                 :            :                 is used to return the number of alias checks.
    2957                 :            :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    2958                 :            :                 to be checked.
    2959                 :            : 
    2960                 :            :    Output:
    2961                 :            :    COND_EXPR - conditional expression.
    2962                 :            : 
    2963                 :            :    The returned COND_EXPR is the conditional expression to be used in the
    2964                 :            :    if statement that controls which version of the loop gets executed at
    2965                 :            :    runtime.  */
    2966                 :            : 
    2967                 :            : static void
    2968                 :         29 : vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
    2969                 :            : {
    2970                 :         29 :   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
    2971                 :            : 
    2972                 :         29 :   if (*cond_expr)
    2973                 :         29 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    2974                 :            :                               *cond_expr, part_cond_expr);
    2975                 :            :   else
    2976                 :          0 :     *cond_expr = part_cond_expr;
    2977                 :         29 : }
    2978                 :            : 
    2979                 :            : /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    2980                 :            :    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
    2981                 :            : 
    2982                 :            : static void
    2983                 :        194 : chain_cond_expr (tree *cond_expr, tree part_cond_expr)
    2984                 :            : {
    2985                 :        194 :   if (*cond_expr)
    2986                 :        194 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    2987                 :            :                               *cond_expr, part_cond_expr);
    2988                 :            :   else
    2989                 :          0 :     *cond_expr = part_cond_expr;
    2990                 :        194 : }
    2991                 :            : 
    2992                 :            : /* Function vect_create_cond_for_align_checks.
    2993                 :            : 
    2994                 :            :    Create a conditional expression that represents the alignment checks for
    2995                 :            :    all of data references (array element references) whose alignment must be
    2996                 :            :    checked at runtime.
    2997                 :            : 
    2998                 :            :    Input:
    2999                 :            :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3000                 :            :                 with logical AND operation.
    3001                 :            :    LOOP_VINFO - two fields of the loop information are used.
    3002                 :            :                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
    3003                 :            :                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
    3004                 :            : 
    3005                 :            :    Output:
    3006                 :            :    COND_EXPR_STMT_LIST - statements needed to construct the conditional
    3007                 :            :                          expression.
    3008                 :            :    The returned value is the conditional expression to be used in the if
    3009                 :            :    statement that controls which version of the loop gets executed at runtime.
    3010                 :            : 
    3011                 :            :    The algorithm makes two assumptions:
    3012                 :            :      1) The number of bytes "n" in a vector is a power of 2.
    3013                 :            :      2) An address "a" is aligned if a%n is zero and that this
    3014                 :            :         test can be done as a&(n-1) == 0.  For example, for 16
    3015                 :            :         byte vectors the test is a&0xf == 0.  */
    3016                 :            : 
    3017                 :            : static void
    3018                 :          0 : vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    3019                 :            :                                    tree *cond_expr,
    3020                 :            :                                    gimple_seq *cond_expr_stmt_list)
    3021                 :            : {
    3022                 :          0 :   vec<stmt_vec_info> may_misalign_stmts
    3023                 :            :     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    3024                 :          0 :   stmt_vec_info stmt_info;
    3025                 :          0 :   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
    3026                 :          0 :   tree mask_cst;
    3027                 :          0 :   unsigned int i;
    3028                 :          0 :   tree int_ptrsize_type;
    3029                 :          0 :   char tmp_name[20];
    3030                 :          0 :   tree or_tmp_name = NULL_TREE;
    3031                 :          0 :   tree and_tmp_name;
    3032                 :          0 :   gimple *and_stmt;
    3033                 :          0 :   tree ptrsize_zero;
    3034                 :          0 :   tree part_cond_expr;
    3035                 :            : 
    3036                 :            :   /* Check that mask is one less than a power of 2, i.e., mask is
    3037                 :            :      all zeros followed by all ones.  */
    3038                 :          0 :   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
    3039                 :            : 
    3040                 :          0 :   int_ptrsize_type = signed_type_for (ptr_type_node);
    3041                 :            : 
    3042                 :            :   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
    3043                 :            :      of the first vector of the i'th data reference. */
    3044                 :            : 
    3045                 :          0 :   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    3046                 :            :     {
    3047                 :          0 :       gimple_seq new_stmt_list = NULL;
    3048                 :          0 :       tree addr_base;
    3049                 :          0 :       tree addr_tmp_name;
    3050                 :          0 :       tree new_or_tmp_name;
    3051                 :          0 :       gimple *addr_stmt, *or_stmt;
    3052                 :          0 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    3053                 :          0 :       bool negative = tree_int_cst_compare
    3054                 :          0 :         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
    3055                 :          0 :       tree offset = negative
    3056                 :          0 :         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
    3057                 :            : 
    3058                 :            :       /* create: addr_tmp = (int)(address_of_first_vector) */
    3059                 :          0 :       addr_base =
    3060                 :          0 :         vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
    3061                 :            :                                               offset);
    3062                 :          0 :       if (new_stmt_list != NULL)
    3063                 :          0 :         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
    3064                 :            : 
    3065                 :          0 :       sprintf (tmp_name, "addr2int%d", i);
    3066                 :          0 :       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    3067                 :          0 :       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
    3068                 :          0 :       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
    3069                 :            : 
    3070                 :            :       /* The addresses are OR together.  */
    3071                 :            : 
    3072                 :          0 :       if (or_tmp_name != NULL_TREE)
    3073                 :            :         {
    3074                 :            :           /* create: or_tmp = or_tmp | addr_tmp */
    3075                 :          0 :           sprintf (tmp_name, "orptrs%d", i);
    3076                 :          0 :           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    3077                 :          0 :           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    3078                 :            :                                          or_tmp_name, addr_tmp_name);
    3079                 :          0 :           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    3080                 :          0 :           or_tmp_name = new_or_tmp_name;
    3081                 :            :         }
    3082                 :            :       else
    3083                 :            :         or_tmp_name = addr_tmp_name;
    3084                 :            : 
    3085                 :            :     } /* end for i */
    3086                 :            : 
    3087                 :          0 :   mask_cst = build_int_cst (int_ptrsize_type, mask);
    3088                 :            : 
    3089                 :            :   /* create: and_tmp = or_tmp & mask  */
    3090                 :          0 :   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
    3091                 :            : 
    3092                 :          0 :   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
    3093                 :            :                                   or_tmp_name, mask_cst);
    3094                 :          0 :   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
    3095                 :            : 
    3096                 :            :   /* Make and_tmp the left operand of the conditional test against zero.
    3097                 :            :      if and_tmp has a nonzero bit then some address is unaligned.  */
    3098                 :          0 :   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
    3099                 :          0 :   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    3100                 :            :                                 and_tmp_name, ptrsize_zero);
    3101                 :          0 :   chain_cond_expr (cond_expr, part_cond_expr);
    3102                 :          0 : }
    3103                 :            : 
    3104                 :            : /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
    3105                 :            :    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
    3106                 :            :    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    3107                 :            :    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
    3108                 :            : 
    3109                 :            : static void
    3110                 :       2348 : vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
    3111                 :            : {
    3112                 :       2348 :   vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    3113                 :       2348 :   unsigned int i;
    3114                 :       2348 :   vec_object_pair *pair;
    3115                 :       2352 :   FOR_EACH_VEC_ELT (pairs, i, pair)
    3116                 :            :     {
    3117                 :          4 :       tree addr1 = build_fold_addr_expr (pair->first);
    3118                 :          4 :       tree addr2 = build_fold_addr_expr (pair->second);
    3119                 :          4 :       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
    3120                 :            :                                          addr1, addr2);
    3121                 :          4 :       chain_cond_expr (cond_expr, part_cond_expr);
    3122                 :            :     }
    3123                 :       2348 : }
    3124                 :            : 
    3125                 :            : /* Create an expression that is true when all lower-bound conditions for
    3126                 :            :    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
    3127                 :            : 
    3128                 :            : static void
    3129                 :       2348 : vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
    3130                 :            : {
    3131                 :       2348 :   vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    3132                 :       2864 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    3133                 :            :     {
    3134                 :        190 :       tree expr = lower_bounds[i].expr;
    3135                 :        190 :       tree type = unsigned_type_for (TREE_TYPE (expr));
    3136                 :        190 :       expr = fold_convert (type, expr);
    3137                 :        190 :       poly_uint64 bound = lower_bounds[i].min_value;
    3138                 :        190 :       if (!lower_bounds[i].unsigned_p)
    3139                 :            :         {
    3140                 :         77 :           expr = fold_build2 (PLUS_EXPR, type, expr,
    3141                 :            :                               build_int_cstu (type, bound - 1));
    3142                 :         77 :           bound += bound - 1;
    3143                 :            :         }
    3144                 :        190 :       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
    3145                 :            :                                          build_int_cstu (type, bound));
    3146                 :        190 :       chain_cond_expr (cond_expr, part_cond_expr);
    3147                 :            :     }
    3148                 :       2348 : }
    3149                 :            : 
    3150                 :            : /* Function vect_create_cond_for_alias_checks.
    3151                 :            : 
    3152                 :            :    Create a conditional expression that represents the run-time checks for
    3153                 :            :    overlapping of address ranges represented by a list of data references
    3154                 :            :    relations passed as input.
    3155                 :            : 
    3156                 :            :    Input:
    3157                 :            :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3158                 :            :                 with logical AND operation.  If it is NULL, then the function
    3159                 :            :                 is used to return the number of alias checks.
    3160                 :            :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    3161                 :            :                 to be checked.
    3162                 :            : 
    3163                 :            :    Output:
    3164                 :            :    COND_EXPR - conditional expression.
    3165                 :            : 
    3166                 :            :    The returned COND_EXPR is the conditional expression to be used in the if
    3167                 :            :    statement that controls which version of the loop gets executed at runtime.
    3168                 :            : */
    3169                 :            : 
    3170                 :            : void
    3171                 :       2348 : vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
    3172                 :            : {
    3173                 :       2348 :   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
    3174                 :            :     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    3175                 :            : 
    3176                 :       2348 :   if (comp_alias_ddrs.is_empty ())
    3177                 :        113 :     return;
    3178                 :            : 
    3179                 :       2235 :   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
    3180                 :            :                                &comp_alias_ddrs, cond_expr);
    3181                 :       2235 :   if (dump_enabled_p ())
    3182                 :        904 :     dump_printf_loc (MSG_NOTE, vect_location,
    3183                 :            :                      "created %u versioning for alias checks.\n",
    3184                 :            :                      comp_alias_ddrs.length ());
    3185                 :            : }
    3186                 :            : 
    3187                 :            : 
    3188                 :            : /* Function vect_loop_versioning.
    3189                 :            : 
    3190                 :            :    If the loop has data references that may or may not be aligned or/and
    3191                 :            :    has data reference relations whose independence was not proven then
    3192                 :            :    two versions of the loop need to be generated, one which is vectorized
    3193                 :            :    and one which isn't.  A test is then generated to control which of the
    3194                 :            :    loops is executed.  The test checks for the alignment of all of the
    3195                 :            :    data references that may or may not be aligned.  An additional
    3196                 :            :    sequence of runtime tests is generated for each pairs of DDRs whose
    3197                 :            :    independence was not proven.  The vectorized version of loop is
    3198                 :            :    executed only if both alias and alignment tests are passed.
    3199                 :            : 
    3200                 :            :    The test generated to check which version of loop is executed
    3201                 :            :    is modified to also check for profitability as indicated by the
    3202                 :            :    cost model threshold TH.
    3203                 :            : 
    3204                 :            :    The versioning precondition(s) are placed in *COND_EXPR and
    3205                 :            :    *COND_EXPR_STMT_LIST.  */
    3206                 :            : 
    3207                 :            : class loop *
    3208                 :       2387 : vect_loop_versioning (loop_vec_info loop_vinfo,
    3209                 :            :                       gimple *loop_vectorized_call)
    3210                 :            : {
    3211                 :       2387 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
    3212                 :       2387 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3213                 :       2387 :   basic_block condition_bb;
    3214                 :       2387 :   gphi_iterator gsi;
    3215                 :       2387 :   gimple_stmt_iterator cond_exp_gsi;
    3216                 :       2387 :   basic_block merge_bb;
    3217                 :       2387 :   basic_block new_exit_bb;
    3218                 :       2387 :   edge new_exit_e, e;
    3219                 :       2387 :   gphi *orig_phi, *new_phi;
    3220                 :       2387 :   tree cond_expr = NULL_TREE;
    3221                 :       2387 :   gimple_seq cond_expr_stmt_list = NULL;
    3222                 :       2387 :   tree arg;
    3223                 :       2387 :   profile_probability prob = profile_probability::likely ();
    3224                 :       2387 :   gimple_seq gimplify_stmt_list = NULL;
    3225                 :       2387 :   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
    3226                 :       2387 :   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
    3227                 :       2387 :   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
    3228                 :       2387 :   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
    3229                 :       2387 :   poly_uint64 versioning_threshold
    3230                 :            :     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
    3231                 :       2387 :   tree version_simd_if_cond
    3232                 :            :     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
    3233                 :       2387 :   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
    3234                 :            : 
    3235                 :       2387 :   if (vect_apply_runtime_profitability_check_p (loop_vinfo)
    3236                 :            :       && !ordered_p (th, versioning_threshold))
    3237                 :            :     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    3238                 :            :                              build_int_cst (TREE_TYPE (scalar_loop_iters),
    3239                 :            :                                             th - 1));
    3240                 :       2387 :   if (maybe_ne (versioning_threshold, 0U))
    3241                 :            :     {
    3242                 :       2387 :       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    3243                 :            :                                build_int_cst (TREE_TYPE (scalar_loop_iters),
    3244                 :            :                                               versioning_threshold - 1));
    3245                 :       2387 :       if (cond_expr)
    3246                 :          0 :         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
    3247                 :            :                                  expr, cond_expr);
    3248                 :            :       else
    3249                 :       2387 :         cond_expr = expr;
    3250                 :            :     }
    3251                 :            : 
    3252                 :       2387 :   if (version_niter)
    3253                 :         29 :     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
    3254                 :            : 
    3255                 :       2387 :   if (cond_expr)
    3256                 :       2387 :     cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    3257                 :            :                                         &cond_expr_stmt_list,
    3258                 :            :                                         is_gimple_condexpr, NULL_TREE);
    3259                 :            : 
    3260                 :       2387 :   if (version_align)
    3261                 :          0 :     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
    3262                 :            :                                        &cond_expr_stmt_list);
    3263                 :            : 
    3264                 :       2387 :   if (version_alias)
    3265                 :            :     {
    3266                 :       2348 :       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
    3267                 :       2348 :       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
    3268                 :       2348 :       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
    3269                 :            :     }
    3270                 :            : 
    3271                 :       2387 :   if (version_simd_if_cond)
    3272                 :            :     {
    3273                 :         11 :       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
    3274                 :         11 :       if (flag_checking)
    3275                 :         11 :         if (basic_block bb
    3276                 :         11 :             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
    3277                 :         11 :           gcc_assert (bb != loop->header
    3278                 :            :                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
    3279                 :            :                       && (scalar_loop == NULL
    3280                 :            :                           || (bb != scalar_loop->header
    3281                 :            :                               && dominated_by_p (CDI_DOMINATORS,
    3282                 :            :                                                  scalar_loop->header, bb))));
    3283                 :         11 :       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
    3284                 :         11 :       tree c = fold_build2 (NE_EXPR, boolean_type_node,
    3285                 :            :                             version_simd_if_cond, zero);
    3286                 :         11 :       if (cond_expr)
    3287                 :         11 :         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3288                 :            :                                  c, cond_expr);
    3289                 :            :       else
    3290                 :          0 :         cond_expr = c;
    3291                 :         11 :       if (dump_enabled_p ())
    3292                 :          5 :         dump_printf_loc (MSG_NOTE, vect_location,
    3293                 :            :                          "created versioning for simd if condition check.\n");
    3294                 :            :     }
    3295                 :            : 
    3296                 :       2387 :   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    3297                 :            :                                       &gimplify_stmt_list,
    3298                 :            :                                       is_gimple_condexpr, NULL_TREE);
    3299                 :       2387 :   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    3300                 :            : 
    3301                 :            :   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
    3302                 :            :      invariant in.  */
    3303                 :       2387 :   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
    3304                 :       2387 :   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
    3305                 :      40106 :        !gsi_end_p (gsi); gsi_next (&gsi))
    3306                 :            :     {
    3307                 :      37719 :       gimple *stmt = gsi_stmt (gsi);
    3308                 :      37719 :       update_stmt (stmt);
    3309                 :      37719 :       ssa_op_iter iter;
    3310                 :      37719 :       use_operand_p use_p;
    3311                 :      37719 :       basic_block def_bb;
    3312                 :      86872 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    3313                 :      49153 :         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
    3314                 :      49153 :             && flow_bb_inside_loop_p (outermost, def_bb))
    3315                 :       1541 :           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
    3316                 :            :     }
    3317                 :            : 
    3318                 :            :   /* Search for the outermost loop we can version.  Avoid versioning of
    3319                 :            :      non-perfect nests but allow if-conversion versioned loops inside.  */
    3320                 :       2387 :   class loop *loop_to_version = loop;
    3321                 :       2387 :   if (flow_loop_nested_p (outermost, loop))
    3322                 :            :     { 
    3323                 :       1045 :       if (dump_enabled_p ())
    3324                 :        447 :         dump_printf_loc (MSG_NOTE, vect_location,
    3325                 :            :                          "trying to apply versioning to outer loop %d\n",
    3326                 :            :                          outermost->num);
    3327                 :       1045 :       if (outermost->num == 0)
    3328                 :       1036 :         outermost = superloop_at_depth (loop, 1);
    3329                 :            :       /* And avoid applying versioning on non-perfect nests.  */
    3330                 :       1055 :       while (loop_to_version != outermost
    3331                 :         32 :              && single_exit (loop_outer (loop_to_version))
    3332                 :         32 :              && (!loop_outer (loop_to_version)->inner->next
    3333                 :         10 :                  || vect_loop_vectorized_call (loop_to_version))
    3334                 :       1075 :              && (!loop_outer (loop_to_version)->inner->next
    3335                 :          8 :                  || !loop_outer (loop_to_version)->inner->next->next))
    3336                 :         10 :         loop_to_version = loop_outer (loop_to_version);
    3337                 :            :     }
    3338                 :            : 
    3339                 :            :   /* Apply versioning.  If there is already a scalar version created by
    3340                 :            :      if-conversion re-use that.  Note we cannot re-use the copy of
    3341                 :            :      an if-converted outer-loop when vectorizing the inner loop only.  */
    3342                 :       2387 :   gcond *cond;
    3343                 :       2387 :   if ((!loop_to_version->inner || loop == loop_to_version)
    3344                 :       2381 :       && loop_vectorized_call)
    3345                 :            :     {
    3346                 :         73 :       gcc_assert (scalar_loop);
    3347                 :         73 :       condition_bb = gimple_bb (loop_vectorized_call);
    3348                 :         73 :       cond = as_a <gcond *> (last_stmt (condition_bb));
    3349                 :         73 :       gimple_cond_set_condition_from_tree (cond, cond_expr);
    3350                 :         73 :       update_stmt (cond);
    3351                 :            : 
    3352                 :         73 :       if (cond_expr_stmt_list)
    3353                 :            :         {
    3354                 :         73 :           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
    3355                 :         73 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    3356                 :            :                                  GSI_SAME_STMT);
    3357                 :            :         }
    3358                 :            : 
    3359                 :            :       /* if-conversion uses profile_probability::always () for both paths,
    3360                 :            :          reset the paths probabilities appropriately.  */
    3361                 :         73 :       edge te, fe;
    3362                 :         73 :       extract_true_false_edges_from_block (condition_bb, &te, &fe);
    3363                 :         73 :       te->probability = prob;
    3364                 :         73 :       fe->probability = prob.invert ();
    3365                 :            :       /* We can scale loops counts immediately but have to postpone
    3366                 :            :          scaling the scalar loop because we re-use it during peeling.  */
    3367                 :         73 :       scale_loop_frequencies (loop_to_version, te->probability);
    3368                 :         73 :       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
    3369                 :            : 
    3370                 :         73 :       nloop = scalar_loop;
    3371                 :         73 :       if (dump_enabled_p ())
    3372                 :         82 :         dump_printf_loc (MSG_NOTE, vect_location,
    3373                 :            :                          "reusing %sloop version created by if conversion\n",
    3374                 :         73 :                          loop_to_version != loop ? "outer " : "");
    3375                 :            :     }
    3376                 :            :   else
    3377                 :            :     {
    3378                 :       2314 :       if (loop_to_version != loop
    3379                 :       2314 :           && dump_enabled_p ())
    3380                 :          3 :         dump_printf_loc (MSG_NOTE, vect_location,
    3381                 :            :                          "applying loop versioning to outer loop %d\n",
    3382                 :            :                          loop_to_version->num);
    3383                 :            : 
    3384                 :       2314 :       initialize_original_copy_tables ();
    3385                 :       2314 :       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
    3386                 :            :                             prob, prob.invert (), prob, prob.invert (), true);
    3387                 :       2314 :       gcc_assert (nloop);
    3388                 :       2314 :       nloop = get_loop_copy (loop);
    3389                 :            : 
    3390                 :            :       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
    3391                 :            :          reap those otherwise;  they also refer to the original
    3392                 :            :          loops.  */
    3393                 :       2314 :       class loop *l = loop;
    3394                 :       2318 :       while (gimple *call = vect_loop_vectorized_call (l))
    3395                 :            :         {
    3396                 :          4 :           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
    3397                 :          4 :           fold_loop_internal_call (call, boolean_false_node);
    3398                 :          4 :           l = loop_outer (l);
    3399                 :            :         }
    3400                 :       2314 :       free_original_copy_tables ();
    3401                 :            : 
    3402                 :       2314 :       if (cond_expr_stmt_list)
    3403                 :            :         {
    3404                 :       2294 :           cond_exp_gsi = gsi_last_bb (condition_bb);
    3405                 :       2294 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    3406                 :            :                                  GSI_SAME_STMT);
    3407                 :            :         }
    3408                 :            : 
    3409                 :            :       /* Loop versioning violates an assumption we try to maintain during
    3410                 :            :          vectorization - that the loop exit block has a single predecessor.
    3411                 :            :          After versioning, the exit block of both loop versions is the same
    3412                 :            :          basic block (i.e. it has two predecessors). Just in order to simplify
    3413                 :            :          following transformations in the vectorizer, we fix this situation
    3414                 :            :          here by adding a new (empty) block on the exit-edge of the loop,
    3415                 :            :          with the proper loop-exit phis to maintain loop-closed-form.
    3416                 :            :          If loop versioning wasn't done from loop, but scalar_loop instead,
    3417                 :            :          merge_bb will have already just a single successor.  */
    3418                 :            : 
    3419                 :       2314 :       merge_bb = single_exit (loop_to_version)->dest;
    3420                 :       2314 :       if (EDGE_COUNT (merge_bb->preds) >= 2)
    3421                 :            :         {
    3422                 :       2314 :           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
    3423                 :       2314 :           new_exit_bb = split_edge (single_exit (loop_to_version));
    3424                 :       2314 :           new_exit_e = single_exit (loop_to_version);
    3425                 :       2314 :           e = EDGE_SUCC (new_exit_bb, 0);
    3426                 :            : 
    3427                 :       3243 :           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
    3428                 :        929 :                gsi_next (&gsi))
    3429                 :            :             {
    3430                 :        929 :               tree new_res;
    3431                 :        929 :               orig_phi = gsi.phi ();
    3432                 :        929 :               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    3433                 :        929 :               new_phi = create_phi_node (new_res, new_exit_bb);
    3434                 :        929 :               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
    3435                 :        929 :               add_phi_arg (new_phi, arg, new_exit_e,
    3436                 :            :                            gimple_phi_arg_location_from_edge (orig_phi, e));
    3437                 :        929 :               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
    3438                 :            :             }
    3439                 :            :         }
    3440                 :            : 
    3441                 :       2314 :       update_ssa (TODO_update_ssa);
    3442                 :            :     }
    3443                 :            : 
    3444                 :       2387 :   if (version_niter)
    3445                 :            :     {
    3446                 :            :       /* The versioned loop could be infinite, we need to clear existing
    3447                 :            :          niter information which is copied from the original loop.  */
    3448                 :         29 :       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
    3449                 :         29 :       vect_free_loop_info_assumptions (nloop);
    3450                 :            :       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
    3451                 :         29 :       loop_constraint_set (loop, LOOP_C_INFINITE);
    3452                 :            :     }
    3453                 :            : 
    3454                 :       2387 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    3455                 :       2387 :       && dump_enabled_p ())
    3456                 :            :     {
    3457                 :        558 :       if (version_alias)
    3458                 :        546 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    3459                 :            :                          vect_location,
    3460                 :            :                          "loop versioned for vectorization because of "
    3461                 :            :                          "possible aliasing\n");
    3462                 :        558 :       if (version_align)
    3463                 :          0 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    3464                 :            :                          vect_location,
    3465                 :            :                          "loop versioned for vectorization to enhance "
    3466                 :            :                          "alignment\n");
    3467                 :            : 
    3468                 :            :     }
    3469                 :            : 
    3470                 :       2387 :   return nloop;
    3471                 :            : }

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.