LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 1890 2169 87.1 %
Date: 2020-04-04 11:58:09 Functions: 58 69 84.1 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* Analysis Utilities for Loop Vectorization.
       2                 :            :    Copyright (C) 2006-2020 Free Software Foundation, Inc.
       3                 :            :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4                 :            : 
       5                 :            : This file is part of GCC.
       6                 :            : 
       7                 :            : GCC is free software; you can redistribute it and/or modify it under
       8                 :            : the terms of the GNU General Public License as published by the Free
       9                 :            : Software Foundation; either version 3, or (at your option) any later
      10                 :            : version.
      11                 :            : 
      12                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :            : for more details.
      16                 :            : 
      17                 :            : You should have received a copy of the GNU General Public License
      18                 :            : along with GCC; see the file COPYING3.  If not see
      19                 :            : <http://www.gnu.org/licenses/>.  */
      20                 :            : 
      21                 :            : #include "config.h"
      22                 :            : #include "system.h"
      23                 :            : #include "coretypes.h"
      24                 :            : #include "backend.h"
      25                 :            : #include "rtl.h"
      26                 :            : #include "tree.h"
      27                 :            : #include "gimple.h"
      28                 :            : #include "ssa.h"
      29                 :            : #include "expmed.h"
      30                 :            : #include "optabs-tree.h"
      31                 :            : #include "insn-config.h"
      32                 :            : #include "recog.h"            /* FIXME: for insn_data */
      33                 :            : #include "fold-const.h"
      34                 :            : #include "stor-layout.h"
      35                 :            : #include "tree-eh.h"
      36                 :            : #include "gimplify.h"
      37                 :            : #include "gimple-iterator.h"
      38                 :            : #include "cfgloop.h"
      39                 :            : #include "tree-vectorizer.h"
      40                 :            : #include "dumpfile.h"
      41                 :            : #include "builtins.h"
      42                 :            : #include "internal-fn.h"
      43                 :            : #include "case-cfn-macros.h"
      44                 :            : #include "fold-const-call.h"
      45                 :            : #include "attribs.h"
      46                 :            : #include "cgraph.h"
      47                 :            : #include "omp-simd-clone.h"
      48                 :            : #include "predict.h"
      49                 :            : #include "tree-vector-builder.h"
      50                 :            : #include "vec-perm-indices.h"
      51                 :            : 
      52                 :            : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      53                 :            :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      54                 :            : 
      55                 :            : static bool
      56                 :     564346 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      57                 :            : {
      58                 :     564346 :   value_range_kind vr_type = get_range_info (var, min_value, max_value);
      59                 :     564346 :   wide_int nonzero = get_nonzero_bits (var);
      60                 :     564346 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      61                 :     564346 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      62                 :            :                                          nonzero, sgn) == VR_RANGE)
      63                 :            :     {
      64                 :     277675 :       if (dump_enabled_p ())
      65                 :            :         {
      66                 :      37372 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      67                 :      37372 :           dump_printf (MSG_NOTE, " has range [");
      68                 :      37372 :           dump_hex (MSG_NOTE, *min_value);
      69                 :      37372 :           dump_printf (MSG_NOTE, ", ");
      70                 :      37372 :           dump_hex (MSG_NOTE, *max_value);
      71                 :      37372 :           dump_printf (MSG_NOTE, "]\n");
      72                 :            :         }
      73                 :     277675 :       return true;
      74                 :            :     }
      75                 :            :   else
      76                 :            :     {
      77                 :     286671 :       if (dump_enabled_p ())
      78                 :            :         {
      79                 :      44435 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      80                 :      44435 :           dump_printf (MSG_NOTE, " has no range info\n");
      81                 :            :         }
      82                 :     286671 :       return false;
      83                 :            :     }
      84                 :            : }
      85                 :            : 
      86                 :            : /* Report that we've found an instance of pattern PATTERN in
      87                 :            :    statement STMT.  */
      88                 :            : 
      89                 :            : static void
      90                 :      53591 : vect_pattern_detected (const char *name, gimple *stmt)
      91                 :            : {
      92                 :      53591 :   if (dump_enabled_p ())
      93                 :      10435 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
      94                 :      53591 : }
      95                 :            : 
      96                 :            : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
      97                 :            :    return the pattern statement's stmt_vec_info.  Set its vector type to
      98                 :            :    VECTYPE if it doesn't have one already.  */
      99                 :            : 
     100                 :            : static stmt_vec_info
     101                 :     131923 : vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
     102                 :            :                         tree vectype)
     103                 :            : {
     104                 :     131923 :   vec_info *vinfo = orig_stmt_info->vinfo;
     105                 :     131923 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     106                 :     131923 :   if (pattern_stmt_info == NULL)
     107                 :      81428 :     pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
     108                 :     131923 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     109                 :            : 
     110                 :     131923 :   pattern_stmt_info->pattern_stmt_p = true;
     111                 :     131923 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     112                 :     131923 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     113                 :     131923 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     114                 :     131923 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     115                 :            :     {
     116                 :     252367 :       gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
     117                 :            :                   == vect_use_mask_type_p (orig_stmt_info));
     118                 :      84171 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     119                 :      84171 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     120                 :            :     }
     121                 :     131923 :   return pattern_stmt_info;
     122                 :            : }
     123                 :            : 
     124                 :            : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     125                 :            :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     126                 :            :    have one already.  */
     127                 :            : 
     128                 :            : static void
     129                 :      49390 : vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
     130                 :            :                        tree vectype)
     131                 :            : {
     132                 :      49390 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     133                 :      49390 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     134                 :          0 :     = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
     135                 :      46394 : }
     136                 :            : 
     137                 :            : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     138                 :            :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     139                 :            :    be different from the vector type of the final pattern statement.
     140                 :            :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     141                 :            :    from which it was derived.  */
     142                 :            : 
     143                 :            : static inline void
     144                 :      78531 : append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
     145                 :            :                         tree vectype = NULL_TREE,
     146                 :            :                         tree scalar_type_for_mask = NULL_TREE)
     147                 :            : {
     148                 :     170704 :   gcc_assert (!scalar_type_for_mask
     149                 :            :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     150                 :      78531 :   vec_info *vinfo = stmt_info->vinfo;
     151                 :      78531 :   if (vectype)
     152                 :            :     {
     153                 :      47752 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     154                 :      47752 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     155                 :      47752 :       if (scalar_type_for_mask)
     156                 :       3331 :         new_stmt_info->mask_precision
     157                 :       6662 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     158                 :            :     }
     159                 :      78531 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     160                 :            :                                       new_stmt);
     161                 :      78531 : }
     162                 :            : 
     163                 :            : /* The caller wants to perform new operations on vect_external variable
     164                 :            :    VAR, so that the result of the operations would also be vect_external.
     165                 :            :    Return the edge on which the operations can be performed, if one exists.
     166                 :            :    Return null if the operations should instead be treated as part of
     167                 :            :    the pattern that needs them.  */
     168                 :            : 
     169                 :            : static edge
     170                 :       3043 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     171                 :            : {
     172                 :       3043 :   edge e = NULL;
     173                 :       3043 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     174                 :            :     {
     175                 :        168 :       e = loop_preheader_edge (loop_vinfo->loop);
     176                 :        168 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     177                 :            :         {
     178                 :         72 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     179                 :         72 :           if (bb == NULL
     180                 :         72 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     181                 :            :             e = NULL;
     182                 :            :         }
     183                 :            :     }
     184                 :       3043 :   return e;
     185                 :            : }
     186                 :            : 
     187                 :            : /* Return true if the target supports a vector version of CODE,
     188                 :            :    where CODE is known to map to a direct optab.  ITYPE specifies
     189                 :            :    the type of (some of) the scalar inputs and OTYPE specifies the
     190                 :            :    type of the scalar result.
     191                 :            : 
     192                 :            :    If CODE allows the inputs and outputs to have different type
     193                 :            :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     194                 :            :    than the output mode that determines the appropriate target pattern.
     195                 :            :    Operand 0 of the target pattern then specifies the mode that the output
     196                 :            :    must have.
     197                 :            : 
     198                 :            :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     199                 :            :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     200                 :            :    is nonnull.  */
     201                 :            : 
     202                 :            : static bool
     203                 :        804 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     204                 :            :                                  tree itype, tree *vecotype_out,
     205                 :            :                                  tree *vecitype_out = NULL)
     206                 :            : {
     207                 :        804 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     208                 :        804 :   if (!vecitype)
     209                 :            :     return false;
     210                 :            : 
     211                 :        804 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     212                 :        804 :   if (!vecotype)
     213                 :            :     return false;
     214                 :            : 
     215                 :        804 :   optab optab = optab_for_tree_code (code, vecitype, optab_default);
     216                 :        804 :   if (!optab)
     217                 :            :     return false;
     218                 :            : 
     219                 :        804 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     220                 :        804 :   if (icode == CODE_FOR_nothing
     221                 :        804 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     222                 :        577 :     return false;
     223                 :            : 
     224                 :        227 :   *vecotype_out = vecotype;
     225                 :        227 :   if (vecitype_out)
     226                 :        221 :     *vecitype_out = vecitype;
     227                 :            :   return true;
     228                 :            : }
     229                 :            : 
     230                 :            : /* Round bit precision PRECISION up to a full element.  */
     231                 :            : 
     232                 :            : static unsigned int
     233                 :     176176 : vect_element_precision (unsigned int precision)
     234                 :            : {
     235                 :          0 :   precision = 1 << ceil_log2 (precision);
     236                 :     176176 :   return MAX (precision, BITS_PER_UNIT);
     237                 :            : }
     238                 :            : 
     239                 :            : /* If OP is defined by a statement that's being considered for vectorization,
     240                 :            :    return information about that statement, otherwise return NULL.  */
     241                 :            : 
     242                 :            : static stmt_vec_info
     243                 :      49438 : vect_get_internal_def (vec_info *vinfo, tree op)
     244                 :            : {
     245                 :          0 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     246                 :      49438 :   if (def_stmt_info
     247                 :      48679 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     248                 :          0 :     return def_stmt_info;
     249                 :            :   return NULL;
     250                 :            : }
     251                 :            : 
     252                 :            : /* Check whether NAME, an ssa-name used in STMT_VINFO,
     253                 :            :    is a result of a type promotion, such that:
     254                 :            :      DEF_STMT: NAME = NOP (name0)
     255                 :            :    If CHECK_SIGN is TRUE, check that either both types are signed or both are
     256                 :            :    unsigned.  */
     257                 :            : 
     258                 :            : static bool
     259                 :        517 : type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
     260                 :            :                    tree *orig_type, gimple **def_stmt, bool *promotion)
     261                 :            : {
     262                 :        517 :   tree type = TREE_TYPE (name);
     263                 :        517 :   tree oprnd0;
     264                 :        517 :   enum vect_def_type dt;
     265                 :            : 
     266                 :        517 :   stmt_vec_info def_stmt_info;
     267                 :        517 :   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
     268                 :            :                            def_stmt))
     269                 :            :     return false;
     270                 :            : 
     271                 :        502 :   if (dt != vect_internal_def
     272                 :        502 :       && dt != vect_external_def && dt != vect_constant_def)
     273                 :            :     return false;
     274                 :            : 
     275                 :        495 :   if (!*def_stmt)
     276                 :            :     return false;
     277                 :            : 
     278                 :        495 :   if (!is_gimple_assign (*def_stmt))
     279                 :            :     return false;
     280                 :            : 
     281                 :        481 :   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
     282                 :            :     return false;
     283                 :            : 
     284                 :        171 :   oprnd0 = gimple_assign_rhs1 (*def_stmt);
     285                 :            : 
     286                 :        171 :   *orig_type = TREE_TYPE (oprnd0);
     287                 :        171 :   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
     288                 :        342 :       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
     289                 :            :     return false;
     290                 :            : 
     291                 :        171 :   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
     292                 :         64 :     *promotion = true;
     293                 :            :   else
     294                 :        107 :     *promotion = false;
     295                 :            : 
     296                 :        171 :   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
     297                 :          8 :     return false;
     298                 :            : 
     299                 :            :   return true;
     300                 :            : }
     301                 :            : 
     302                 :            : /* Holds information about an input operand after some sign changes
     303                 :            :    and type promotions have been peeled away.  */
     304                 :            : class vect_unpromoted_value {
     305                 :            : public:
     306                 :            :   vect_unpromoted_value ();
     307                 :            : 
     308                 :            :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     309                 :            : 
     310                 :            :   /* The value obtained after peeling away zero or more casts.  */
     311                 :            :   tree op;
     312                 :            : 
     313                 :            :   /* The type of OP.  */
     314                 :            :   tree type;
     315                 :            : 
     316                 :            :   /* The definition type of OP.  */
     317                 :            :   vect_def_type dt;
     318                 :            : 
     319                 :            :   /* If OP is the result of peeling at least one cast, and if the cast
     320                 :            :      of OP itself is a vectorizable statement, CASTER identifies that
     321                 :            :      statement, otherwise it is null.  */
     322                 :            :   stmt_vec_info caster;
     323                 :            : };
     324                 :            : 
     325                 :    6354010 : inline vect_unpromoted_value::vect_unpromoted_value ()
     326                 :            :   : op (NULL_TREE),
     327                 :            :     type (NULL_TREE),
     328                 :            :     dt (vect_uninitialized_def),
     329                 :     315054 :     caster (NULL)
     330                 :            : {
     331                 :            : }
     332                 :            : 
     333                 :            : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     334                 :            :    statement that casts it to CASTER_IN.  */
     335                 :            : 
     336                 :            : inline void
     337                 :     414076 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     338                 :            :                                stmt_vec_info caster_in)
     339                 :            : {
     340                 :     414076 :   op = op_in;
     341                 :     828152 :   type = TREE_TYPE (op);
     342                 :     414076 :   dt = dt_in;
     343                 :     414076 :   caster = caster_in;
     344                 :      99848 : }
     345                 :            : 
     346                 :            : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     347                 :            :    to reach some vectorizable inner operand OP', continuing as long as it
     348                 :            :    is possible to convert OP' back to OP using a possible sign change
     349                 :            :    followed by a possible promotion P.  Return this OP', or null if OP is
     350                 :            :    not a vectorizable SSA name.  If there is a promotion P, describe its
     351                 :            :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     352                 :            :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     353                 :            :    have more than one user.
     354                 :            : 
     355                 :            :    A successful return means that it is possible to go from OP' to OP
     356                 :            :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     357                 :            :    whereas the cast from UNPROM to OP might be a promotion, a sign
     358                 :            :    change, or a nop.
     359                 :            : 
     360                 :            :    E.g. say we have:
     361                 :            : 
     362                 :            :        signed short *ptr = ...;
     363                 :            :        signed short C = *ptr;
     364                 :            :        unsigned short B = (unsigned short) C;    // sign change
     365                 :            :        signed int A = (signed int) B;            // unsigned promotion
     366                 :            :        ...possible other uses of A...
     367                 :            :        unsigned int OP = (unsigned int) A;       // sign change
     368                 :            : 
     369                 :            :    In this case it's possible to go directly from C to OP using:
     370                 :            : 
     371                 :            :        OP = (unsigned int) (unsigned short) C;
     372                 :            :             +------------+ +--------------+
     373                 :            :                promotion      sign change
     374                 :            : 
     375                 :            :    so OP' would be C.  The input to the promotion is B, so UNPROM
     376                 :            :    would describe B.  */
     377                 :            : 
     378                 :            : static tree
     379                 :     258120 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     380                 :            :                                       vect_unpromoted_value *unprom,
     381                 :            :                                       bool *single_use_p = NULL)
     382                 :            : {
     383                 :     258120 :   tree res = NULL_TREE;
     384                 :     258120 :   tree op_type = TREE_TYPE (op);
     385                 :     258120 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     386                 :     258120 :   unsigned int min_precision = orig_precision;
     387                 :     258120 :   stmt_vec_info caster = NULL;
     388                 :     327087 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     389                 :            :     {
     390                 :            :       /* See whether OP is simple enough to vectorize.  */
     391                 :     307015 :       stmt_vec_info def_stmt_info;
     392                 :     307015 :       gimple *def_stmt;
     393                 :     307015 :       vect_def_type dt;
     394                 :     307015 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     395                 :            :         break;
     396                 :            : 
     397                 :            :       /* If OP is the input of a demotion, skip over it to see whether
     398                 :            :          OP is itself the result of a promotion.  If so, the combined
     399                 :            :          effect of the promotion and the demotion might fit the required
     400                 :            :          pattern, otherwise neither operation fits.
     401                 :            : 
     402                 :            :          This copes with cases such as the result of an arithmetic
     403                 :            :          operation being truncated before being stored, and where that
     404                 :            :          arithmetic operation has been recognized as an over-widened one.  */
     405                 :     306516 :       if (TYPE_PRECISION (op_type) <= min_precision)
     406                 :            :         {
     407                 :            :           /* Use OP as the UNPROM described above if we haven't yet
     408                 :            :              found a promotion, or if using the new input preserves the
     409                 :            :              sign of the previous promotion.  */
     410                 :     304422 :           if (!res
     411                 :      66705 :               || TYPE_PRECISION (unprom->type) == orig_precision
     412                 :     307136 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
     413                 :            :             {
     414                 :     302182 :               unprom->set_op (op, dt, caster);
     415                 :     302182 :               min_precision = TYPE_PRECISION (op_type);
     416                 :            :             }
     417                 :            :           /* Stop if we've already seen a promotion and if this
     418                 :            :              conversion does more than change the sign.  */
     419                 :       2240 :           else if (TYPE_PRECISION (op_type)
     420                 :       2240 :                    != TYPE_PRECISION (unprom->type))
     421                 :            :             break;
     422                 :            : 
     423                 :            :           /* The sequence now extends to OP.  */
     424                 :            :           res = op;
     425                 :            :         }
     426                 :            : 
     427                 :            :       /* See whether OP is defined by a cast.  Record it as CASTER if
     428                 :            :          the cast is potentially vectorizable.  */
     429                 :     306512 :       if (!def_stmt)
     430                 :            :         break;
     431                 :     304622 :       caster = def_stmt_info;
     432                 :            : 
     433                 :            :       /* Ignore pattern statements, since we don't link uses for them.  */
     434                 :     304622 :       if (caster
     435                 :     304622 :           && single_use_p
     436                 :     115100 :           && !STMT_VINFO_RELATED_STMT (caster)
     437                 :     405767 :           && !has_single_use (res))
     438                 :      73778 :         *single_use_p = false;
     439                 :            : 
     440                 :     474964 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     441                 :     236916 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     442                 :            :         break;
     443                 :            : 
     444                 :            :       /* Continue with the input to the cast.  */
     445                 :      68967 :       op = gimple_assign_rhs1 (def_stmt);
     446                 :      68967 :       op_type = TREE_TYPE (op);
     447                 :            :     }
     448                 :     258120 :   return res;
     449                 :            : }
     450                 :            : 
     451                 :            : /* OP is an integer operand to an operation that returns TYPE, and we
     452                 :            :    want to treat the operation as a widening one.  So far we can treat
     453                 :            :    it as widening from *COMMON_TYPE.
     454                 :            : 
     455                 :            :    Return true if OP is suitable for such a widening operation,
     456                 :            :    either widening from *COMMON_TYPE or from some supertype of it.
     457                 :            :    Update *COMMON_TYPE to the supertype in the latter case.
     458                 :            : 
     459                 :            :    SHIFT_P is true if OP is a shift amount.  */
     460                 :            : 
     461                 :            : static bool
     462                 :      12046 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     463                 :            :                             tree *common_type)
     464                 :            : {
     465                 :            :   /* Calculate the minimum precision required by OP, without changing
     466                 :            :      the sign of either operand.  */
     467                 :      12046 :   unsigned int precision;
     468                 :      12046 :   if (shift_p)
     469                 :            :     {
     470                 :        403 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     471                 :            :         return false;
     472                 :        361 :       precision = TREE_INT_CST_LOW (op);
     473                 :            :     }
     474                 :            :   else
     475                 :            :     {
     476                 :      11643 :       precision = wi::min_precision (wi::to_widest (op),
     477                 :      11643 :                                      TYPE_SIGN (*common_type));
     478                 :      11643 :       if (precision * 2 > TYPE_PRECISION (type))
     479                 :            :         return false;
     480                 :            :     }
     481                 :            : 
     482                 :            :   /* If OP requires a wider type, switch to that type.  The checks
     483                 :            :      above ensure that this is still narrower than the result.  */
     484                 :      11959 :   precision = vect_element_precision (precision);
     485                 :      11959 :   if (TYPE_PRECISION (*common_type) < precision)
     486                 :        107 :     *common_type = build_nonstandard_integer_type
     487                 :        107 :       (precision, TYPE_UNSIGNED (*common_type));
     488                 :            :   return true;
     489                 :            : }
     490                 :            : 
     491                 :            : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     492                 :            :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     493                 :            : 
     494                 :            : static bool
     495                 :       1733 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     496                 :            : {
     497                 :       1733 :   if (types_compatible_p (*common_type, new_type))
     498                 :            :     return true;
     499                 :            : 
     500                 :            :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     501                 :         45 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     502                 :         45 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     503                 :            :     return true;
     504                 :            : 
     505                 :            :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     506                 :         11 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     507                 :         11 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     508                 :            :     {
     509                 :         11 :       *common_type = new_type;
     510                 :         11 :       return true;
     511                 :            :     }
     512                 :            : 
     513                 :            :   /* We have mismatched signs, with the signed type being
     514                 :            :      no wider than the unsigned type.  In this case we need
     515                 :            :      a wider signed type.  */
     516                 :          0 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     517                 :            :                                 TYPE_PRECISION (new_type));
     518                 :          0 :   precision *= 2;
     519                 :          0 :   if (precision * 2 > TYPE_PRECISION (type))
     520                 :            :     return false;
     521                 :            : 
     522                 :          0 :   *common_type = build_nonstandard_integer_type (precision, false);
     523                 :          0 :   return true;
     524                 :            : }
     525                 :            : 
     526                 :            : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     527                 :            :    in which each node either performs CODE or WIDENED_CODE, and where
     528                 :            :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     529                 :            :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     530                 :            :    CODE and WIDENED_CODE are some sort of shift.
     531                 :            : 
     532                 :            :    If STMT_INFO is such a tree, return the number of leaf operands
     533                 :            :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     534                 :            :    to a type that (a) is narrower than the result of STMT_INFO and
     535                 :            :    (b) can hold all leaf operand values.
     536                 :            : 
     537                 :            :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     538                 :            :    exists.  */
     539                 :            : 
     540                 :            : static unsigned int
     541                 :    2986630 : vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
     542                 :            :                       tree_code widened_code, bool shift_p,
     543                 :            :                       unsigned int max_nops,
     544                 :            :                       vect_unpromoted_value *unprom, tree *common_type)
     545                 :            : {
     546                 :            :   /* Check for an integer operation with the right code.  */
     547                 :    2986630 :   vec_info *vinfo = stmt_info->vinfo;
     548                 :    2986630 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
     549                 :    2329400 :   if (!assign)
     550                 :            :     return 0;
     551                 :            : 
     552                 :    2329400 :   tree_code rhs_code = gimple_assign_rhs_code (assign);
     553                 :    2329400 :   if (rhs_code != code && rhs_code != widened_code)
     554                 :            :     return 0;
     555                 :            : 
     556                 :     111220 :   tree type = gimple_expr_type (assign);
     557                 :     111220 :   if (!INTEGRAL_TYPE_P (type))
     558                 :            :     return 0;
     559                 :            : 
     560                 :            :   /* Assume that both operands will be leaf operands.  */
     561                 :      84918 :   max_nops -= 2;
     562                 :            : 
     563                 :            :   /* Check the operands.  */
     564                 :      84918 :   unsigned int next_op = 0;
     565                 :     113417 :   for (unsigned int i = 0; i < 2; ++i)
     566                 :            :     {
     567                 :      99617 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     568                 :      99617 :       unsigned int nops = 1;
     569                 :      99617 :       tree op = gimple_op (assign, i + 1);
     570                 :      99617 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     571                 :            :         {
     572                 :            :           /* We already have a common type from earlier operands.
     573                 :            :              Update it to account for OP.  */
     574                 :      12046 :           this_unprom->set_op (op, vect_constant_def);
     575                 :      12046 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     576                 :            :             return 0;
     577                 :            :         }
     578                 :            :       else
     579                 :            :         {
     580                 :            :           /* Only allow shifts by constants.  */
     581                 :      87571 :           if (shift_p && i == 1)
     582                 :            :             return 0;
     583                 :            : 
     584                 :      87489 :           if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
     585                 :            :                                                      this_unprom))
     586                 :            :             return 0;
     587                 :            : 
     588                 :      87011 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     589                 :            :             {
     590                 :            :               /* The operand isn't widened.  If STMT_INFO has the code
     591                 :            :                  for an unwidened operation, recursively check whether
     592                 :            :                  this operand is a node of the tree.  */
     593                 :      70621 :               if (rhs_code != code
     594                 :      70621 :                   || max_nops == 0
     595                 :        165 :                   || this_unprom->dt != vect_internal_def)
     596                 :            :                 return 0;
     597                 :            : 
     598                 :            :               /* Give back the leaf slot allocated above now that we're
     599                 :            :                  not treating this as a leaf operand.  */
     600                 :        165 :               max_nops += 1;
     601                 :            : 
     602                 :            :               /* Recursively process the definition of the operand.  */
     603                 :        165 :               stmt_vec_info def_stmt_info
     604                 :        165 :                 = vinfo->lookup_def (this_unprom->op);
     605                 :        165 :               nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
     606                 :            :                                            shift_p, max_nops, this_unprom,
     607                 :            :                                            common_type);
     608                 :        165 :               if (nops == 0)
     609                 :            :                 return 0;
     610                 :            : 
     611                 :        150 :               max_nops -= nops;
     612                 :            :             }
     613                 :            :           else
     614                 :            :             {
     615                 :            :               /* Make sure that the operand is narrower than the result.  */
     616                 :      16390 :               if (TYPE_PRECISION (this_unprom->type) * 2
     617                 :      16390 :                   > TYPE_PRECISION (type))
     618                 :            :                 return 0;
     619                 :            : 
     620                 :            :               /* Update COMMON_TYPE for the new operand.  */
     621                 :      16390 :               if (i == 0)
     622                 :      14657 :                 *common_type = this_unprom->type;
     623                 :       1733 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     624                 :            :                                                  common_type))
     625                 :            :                 return 0;
     626                 :            :             }
     627                 :            :         }
     628                 :      28499 :       next_op += nops;
     629                 :            :     }
     630                 :            :   return next_op;
     631                 :            : }
     632                 :            : 
     633                 :            : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     634                 :            :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     635                 :            : 
     636                 :            : static tree
     637                 :     129452 : vect_recog_temp_ssa_var (tree type, gimple *stmt)
     638                 :            : {
     639                 :          0 :   return make_temp_ssa_name (type, stmt, "patt");
     640                 :            : }
     641                 :            : 
     642                 :            : /* STMT2_INFO describes a type conversion that could be split into STMT1
     643                 :            :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     644                 :            :    input.  Try to do this using pattern statements, returning true on
     645                 :            :    success.  */
     646                 :            : 
     647                 :            : static bool
     648                 :       3010 : vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
     649                 :            :                       gimple *stmt1, tree vectype)
     650                 :            : {
     651                 :       3010 :   vec_info *vinfo = stmt2_info->vinfo;
     652                 :       3010 :   if (is_pattern_stmt_p (stmt2_info))
     653                 :            :     {
     654                 :            :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     655                 :            :          the pattern is attached.  */
     656                 :         14 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     657                 :         14 :       vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
     658                 :            : 
     659                 :         14 :       if (dump_enabled_p ())
     660                 :         12 :         dump_printf_loc (MSG_NOTE, vect_location,
     661                 :            :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     662                 :            : 
     663                 :            :       /* Since STMT2_INFO is a pattern statement, we can change it
     664                 :            :          in-situ without worrying about changing the code for the
     665                 :            :          containing block.  */
     666                 :         14 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     667                 :            : 
     668                 :         14 :       if (dump_enabled_p ())
     669                 :            :         {
     670                 :         12 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     671                 :         12 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     672                 :            :                            stmt2_info->stmt);
     673                 :            :         }
     674                 :            : 
     675                 :         14 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     676                 :         14 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     677                 :            :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     678                 :            :            to the end of the definition sequence.  */
     679                 :         14 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     680                 :            :       else
     681                 :            :         {
     682                 :            :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     683                 :            :              before it.  */
     684                 :          0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     685                 :          0 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     686                 :            :         }
     687                 :         14 :       return true;
     688                 :            :     }
     689                 :            :   else
     690                 :            :     {
     691                 :            :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     692                 :            :          two-statement pattern now.  */
     693                 :       2996 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     694                 :       2996 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     695                 :       2996 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     696                 :       2996 :       if (!lhs_vectype)
     697                 :            :         return false;
     698                 :            : 
     699                 :       2996 :       if (dump_enabled_p ())
     700                 :       1367 :         dump_printf_loc (MSG_NOTE, vect_location,
     701                 :            :                          "Splitting statement: %G", stmt2_info->stmt);
     702                 :            : 
     703                 :            :       /* Add STMT1 as a singleton pattern definition sequence.  */
     704                 :       2996 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     705                 :       2996 :       vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
     706                 :       2996 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     707                 :            : 
     708                 :            :       /* Build the second of the two pattern statements.  */
     709                 :       2996 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     710                 :       2996 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     711                 :       2996 :       vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
     712                 :            : 
     713                 :       2996 :       if (dump_enabled_p ())
     714                 :            :         {
     715                 :       1367 :           dump_printf_loc (MSG_NOTE, vect_location,
     716                 :            :                            "into pattern statements: %G", stmt1);
     717                 :       1367 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
     718                 :            :         }
     719                 :            : 
     720                 :       2996 :       return true;
     721                 :            :     }
     722                 :            : }
     723                 :            : 
     724                 :            : /* Convert UNPROM to TYPE and return the result, adding new statements
     725                 :            :    to STMT_INFO's pattern definition statements if no better way is
     726                 :            :    available.  VECTYPE is the vector form of TYPE.  */
     727                 :            : 
     728                 :            : static tree
     729                 :      49926 : vect_convert_input (stmt_vec_info stmt_info, tree type,
     730                 :            :                     vect_unpromoted_value *unprom, tree vectype)
     731                 :            : {
     732                 :      49926 :   vec_info *vinfo = stmt_info->vinfo;
     733                 :            : 
     734                 :            :   /* Check for a no-op conversion.  */
     735                 :      49926 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     736                 :      12045 :     return unprom->op;
     737                 :            : 
     738                 :            :   /* Allow the caller to create constant vect_unpromoted_values.  */
     739                 :      37881 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     740                 :      21951 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     741                 :            : 
     742                 :      15930 :   tree input = unprom->op;
     743                 :      15930 :   if (unprom->caster)
     744                 :            :     {
     745                 :       5448 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     746                 :       5448 :       tree lhs_type = TREE_TYPE (lhs);
     747                 :            : 
     748                 :            :       /* If the result of the existing cast is the right width, use it
     749                 :            :          instead of the source of the cast.  */
     750                 :       5448 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     751                 :            :         input = lhs;
     752                 :            :       /* If the precision we want is between the source and result
     753                 :            :          precisions of the existing cast, try splitting the cast into
     754                 :            :          two and tapping into a mid-way point.  */
     755                 :       5267 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     756                 :       5267 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     757                 :            :         {
     758                 :            :           /* In order to preserve the semantics of the original cast,
     759                 :            :              give the mid-way point the same signedness as the input value.
     760                 :            : 
     761                 :            :              It would be possible to use a signed type here instead if
     762                 :            :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     763                 :            :              make the sign of the midtype sensitive to the order in
     764                 :            :              which we process the statements, since the signedness of
     765                 :            :              TYPE is the signedness required by just one of possibly
     766                 :            :              many users.  Also, unsigned promotions are usually as cheap
     767                 :            :              as or cheaper than signed ones, so it's better to keep an
     768                 :            :              unsigned promotion.  */
     769                 :       3010 :           tree midtype = build_nonstandard_integer_type
     770                 :       3010 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     771                 :       3010 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     772                 :       3010 :           if (vec_midtype)
     773                 :            :             {
     774                 :       3010 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     775                 :       3010 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     776                 :            :                                                        unprom->op);
     777                 :       3010 :               if (!vect_split_statement (unprom->caster, input, new_stmt,
     778                 :            :                                          vec_midtype))
     779                 :          0 :                 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
     780                 :            :             }
     781                 :            :         }
     782                 :            : 
     783                 :            :       /* See if we can reuse an existing result.  */
     784                 :       5448 :       if (types_compatible_p (type, TREE_TYPE (input)))
     785                 :            :         return input;
     786                 :            :     }
     787                 :            : 
     788                 :            :   /* We need a new conversion statement.  */
     789                 :      14163 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     790                 :      14163 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     791                 :            : 
     792                 :            :   /* If OP is an external value, see if we can insert the new statement
     793                 :            :      on an incoming edge.  */
     794                 :      14163 :   if (input == unprom->op && unprom->dt == vect_external_def)
     795                 :       3039 :     if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
     796                 :            :       {
     797                 :        164 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     798                 :        164 :         gcc_assert (!new_bb);
     799                 :            :         return new_op;
     800                 :            :       }
     801                 :            : 
     802                 :            :   /* As a (common) last resort, add the statement to the pattern itself.  */
     803                 :      13999 :   append_pattern_def_seq (stmt_info, new_stmt, vectype);
     804                 :      13999 :   return new_op;
     805                 :            : }
     806                 :            : 
     807                 :            : /* Invoke vect_convert_input for N elements of UNPROM and store the
     808                 :            :    result in the corresponding elements of RESULT.  */
     809                 :            : 
     810                 :            : static void
     811                 :      25297 : vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
     812                 :            :                      tree *result, tree type, vect_unpromoted_value *unprom,
     813                 :            :                      tree vectype)
     814                 :            : {
     815                 :      75252 :   for (unsigned int i = 0; i < n; ++i)
     816                 :            :     {
     817                 :            :       unsigned int j;
     818                 :      74584 :       for (j = 0; j < i; ++j)
     819                 :      24658 :         if (unprom[j].op == unprom[i].op)
     820                 :            :           break;
     821                 :      49955 :       if (j < i)
     822                 :         29 :         result[i] = result[j];
     823                 :            :       else
     824                 :      49926 :         result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
     825                 :            :     }
     826                 :      25297 : }
     827                 :            : 
     828                 :            : /* The caller has created a (possibly empty) sequence of pattern definition
     829                 :            :    statements followed by a single statement PATTERN_STMT.  Cast the result
     830                 :            :    of this final statement to TYPE.  If a new statement is needed, add
     831                 :            :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     832                 :            :    and return the new statement, otherwise return PATTERN_STMT as-is.
     833                 :            :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     834                 :            : 
     835                 :            : static gimple *
     836                 :      27416 : vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
     837                 :            :                      tree vecitype)
     838                 :            : {
     839                 :      27416 :   tree lhs = gimple_get_lhs (pattern_stmt);
     840                 :      27416 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     841                 :            :     {
     842                 :      26104 :       append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
     843                 :      26104 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     844                 :      26104 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     845                 :            :     }
     846                 :      27416 :   return pattern_stmt;
     847                 :            : }
     848                 :            : 
     849                 :            : /* Return true if STMT_VINFO describes a reduction for which reassociation
     850                 :            :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
     851                 :            :    a reduction chain and optimistically assume that all statements
     852                 :            :    except the last allow reassociation.
     853                 :            :    Also require it to have code CODE and to be a reduction
     854                 :            :    in the outermost loop.  When returning true, store the operands in
     855                 :            :    *OP0_OUT and *OP1_OUT.  */
     856                 :            : 
     857                 :            : static bool
     858                 :    4465830 : vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
     859                 :            :                                 tree *op0_out, tree *op1_out)
     860                 :            : {
     861                 :    4465830 :   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
     862                 :    1859790 :   if (!loop_info)
     863                 :            :     return false;
     864                 :            : 
     865                 :    1859790 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
     866                 :    1802450 :   if (!assign || gimple_assign_rhs_code (assign) != code)
     867                 :            :     return false;
     868                 :            : 
     869                 :            :   /* We don't allow changing the order of the computation in the inner-loop
     870                 :            :      when doing outer-loop vectorization.  */
     871                 :     419240 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
     872                 :     419240 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
     873                 :            :     return false;
     874                 :            : 
     875                 :     405983 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
     876                 :            :     {
     877                 :      40002 :       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
     878                 :            :                                        code))
     879                 :            :         return false;
     880                 :            :     }
     881                 :     365981 :   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
     882                 :            :     return false;
     883                 :            : 
     884                 :      39413 :   *op0_out = gimple_assign_rhs1 (assign);
     885                 :      39413 :   *op1_out = gimple_assign_rhs2 (assign);
     886                 :      39413 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
     887                 :      14317 :     std::swap (*op0_out, *op1_out);
     888                 :            :   return true;
     889                 :            : }
     890                 :            : 
     891                 :            : /* Function vect_recog_dot_prod_pattern
     892                 :            : 
     893                 :            :    Try to find the following pattern:
     894                 :            : 
     895                 :            :      type x_t, y_t;
     896                 :            :      TYPE1 prod;
     897                 :            :      TYPE2 sum = init;
     898                 :            :    loop:
     899                 :            :      sum_0 = phi <init, sum_1>
     900                 :            :      S1  x_t = ...
     901                 :            :      S2  y_t = ...
     902                 :            :      S3  x_T = (TYPE1) x_t;
     903                 :            :      S4  y_T = (TYPE1) y_t;
     904                 :            :      S5  prod = x_T * y_T;
     905                 :            :      [S6  prod = (TYPE2) prod;  #optional]
     906                 :            :      S7  sum_1 = prod + sum_0;
     907                 :            : 
     908                 :            :    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
     909                 :            :    same size of 'TYPE1' or bigger. This is a special case of a reduction
     910                 :            :    computation.
     911                 :            : 
     912                 :            :    Input:
     913                 :            : 
     914                 :            :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
     915                 :            :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
     916                 :            :    will be detected.
     917                 :            : 
     918                 :            :    Output:
     919                 :            : 
     920                 :            :    * TYPE_OUT: The type of the output  of this pattern.
     921                 :            : 
     922                 :            :    * Return value: A new stmt that will be used to replace the sequence of
     923                 :            :    stmts that constitute the pattern. In this case it will be:
     924                 :            :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
     925                 :            : 
     926                 :            :    Note: The dot-prod idiom is a widening reduction pattern that is
     927                 :            :          vectorized without preserving all the intermediate results. It
     928                 :            :          produces only N/2 (widened) results (by summing up pairs of
     929                 :            :          intermediate results) rather than all N results.  Therefore, we
     930                 :            :          cannot allow this pattern when we want to get all the results and in
     931                 :            :          the correct order (as is the case when this computation is in an
     932                 :            :          inner-loop nested in an outer-loop that us being vectorized).  */
     933                 :            : 
     934                 :            : static gimple *
     935                 :    1488690 : vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
     936                 :            : {
     937                 :    1488690 :   tree oprnd0, oprnd1;
     938                 :    1488690 :   gimple *last_stmt = stmt_vinfo->stmt;
     939                 :    1488690 :   vec_info *vinfo = stmt_vinfo->vinfo;
     940                 :    1488690 :   tree type, half_type;
     941                 :    1488690 :   gimple *pattern_stmt;
     942                 :    1488690 :   tree var;
     943                 :            : 
     944                 :            :   /* Look for the following pattern
     945                 :            :           DX = (TYPE1) X;
     946                 :            :           DY = (TYPE1) Y;
     947                 :            :           DPROD = DX * DY;
     948                 :            :           DDPROD = (TYPE2) DPROD;
     949                 :            :           sum_1 = DDPROD + sum_0;
     950                 :            :      In which
     951                 :            :      - DX is double the size of X
     952                 :            :      - DY is double the size of Y
     953                 :            :      - DX, DY, DPROD all have the same type
     954                 :            :      - sum is the same size of DPROD or bigger
     955                 :            :      - sum has been recognized as a reduction variable.
     956                 :            : 
     957                 :            :      This is equivalent to:
     958                 :            :        DPROD = X w* Y;          #widen mult
     959                 :            :        sum_1 = DPROD w+ sum_0;  #widen summation
     960                 :            :      or
     961                 :            :        DPROD = X w* Y;          #widen mult
     962                 :            :        sum_1 = DPROD + sum_0;   #summation
     963                 :            :    */
     964                 :            : 
     965                 :            :   /* Starting from LAST_STMT, follow the defs of its uses in search
     966                 :            :      of the above pattern.  */
     967                 :            : 
     968                 :    1488690 :   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
     969                 :            :                                        &oprnd0, &oprnd1))
     970                 :            :     return NULL;
     971                 :            : 
     972                 :      13222 :   type = gimple_expr_type (last_stmt);
     973                 :            : 
     974                 :      13222 :   vect_unpromoted_value unprom_mult;
     975                 :      13222 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
     976                 :            : 
     977                 :            :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
     978                 :            :      we know that oprnd1 is the reduction variable (defined by a loop-header
     979                 :            :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
     980                 :            :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
     981                 :      13222 :   if (!oprnd0)
     982                 :            :     return NULL;
     983                 :            : 
     984                 :       6626 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
     985                 :            :   if (!mult_vinfo)
     986                 :            :     return NULL;
     987                 :            : 
     988                 :            :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     989                 :            :      inside the loop (in case we are analyzing an outer-loop).  */
     990                 :      19440 :   vect_unpromoted_value unprom0[2];
     991                 :       6480 :   if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
     992                 :            :                              false, 2, unprom0, &half_type))
     993                 :            :     return NULL;
     994                 :            : 
     995                 :            :   /* If there are two widening operations, make sure they agree on
     996                 :            :      the sign of the extension.  */
     997                 :        152 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
     998                 :        152 :       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
     999                 :            :     return NULL;
    1000                 :            : 
    1001                 :        128 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1002                 :            : 
    1003                 :        128 :   tree half_vectype;
    1004                 :        128 :   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1005                 :            :                                         type_out, &half_vectype))
    1006                 :            :     return NULL;
    1007                 :            : 
    1008                 :            :   /* Get the inputs in the appropriate types.  */
    1009                 :         32 :   tree mult_oprnd[2];
    1010                 :         32 :   vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
    1011                 :            :                        unprom0, half_vectype);
    1012                 :            : 
    1013                 :         32 :   var = vect_recog_temp_ssa_var (type, NULL);
    1014                 :         32 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1015                 :            :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1016                 :            : 
    1017                 :         32 :   return pattern_stmt;
    1018                 :            : }
    1019                 :            : 
    1020                 :            : 
    1021                 :            : /* Function vect_recog_sad_pattern
    1022                 :            : 
    1023                 :            :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1024                 :            : 
    1025                 :            :      type x_t, y_t;
    1026                 :            :      signed TYPE1 diff, abs_diff;
    1027                 :            :      TYPE2 sum = init;
    1028                 :            :    loop:
    1029                 :            :      sum_0 = phi <init, sum_1>
    1030                 :            :      S1  x_t = ...
    1031                 :            :      S2  y_t = ...
    1032                 :            :      S3  x_T = (TYPE1) x_t;
    1033                 :            :      S4  y_T = (TYPE1) y_t;
    1034                 :            :      S5  diff = x_T - y_T;
    1035                 :            :      S6  abs_diff = ABS_EXPR <diff>;
    1036                 :            :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1037                 :            :      S8  sum_1 = abs_diff + sum_0;
    1038                 :            : 
    1039                 :            :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1040                 :            :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1041                 :            :    computation.
    1042                 :            : 
    1043                 :            :    Input:
    1044                 :            : 
    1045                 :            :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1046                 :            :    example, when this function is called with S8, the pattern
    1047                 :            :    {S3,S4,S5,S6,S7,S8} will be detected.
    1048                 :            : 
    1049                 :            :    Output:
    1050                 :            : 
    1051                 :            :    * TYPE_OUT: The type of the output of this pattern.
    1052                 :            : 
    1053                 :            :    * Return value: A new stmt that will be used to replace the sequence of
    1054                 :            :    stmts that constitute the pattern. In this case it will be:
    1055                 :            :         SAD_EXPR <x_t, y_t, sum_0>
    1056                 :            :   */
    1057                 :            : 
    1058                 :            : static gimple *
    1059                 :    1488660 : vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    1060                 :            : {
    1061                 :    1488660 :   gimple *last_stmt = stmt_vinfo->stmt;
    1062                 :    1488660 :   vec_info *vinfo = stmt_vinfo->vinfo;
    1063                 :    1488660 :   tree half_type;
    1064                 :            : 
    1065                 :            :   /* Look for the following pattern
    1066                 :            :           DX = (TYPE1) X;
    1067                 :            :           DY = (TYPE1) Y;
    1068                 :            :           DDIFF = DX - DY;
    1069                 :            :           DAD = ABS_EXPR <DDIFF>;
    1070                 :            :           DDPROD = (TYPE2) DPROD;
    1071                 :            :           sum_1 = DAD + sum_0;
    1072                 :            :      In which
    1073                 :            :      - DX is at least double the size of X
    1074                 :            :      - DY is at least double the size of Y
    1075                 :            :      - DX, DY, DDIFF, DAD all have the same type
    1076                 :            :      - sum is the same size of DAD or bigger
    1077                 :            :      - sum has been recognized as a reduction variable.
    1078                 :            : 
    1079                 :            :      This is equivalent to:
    1080                 :            :        DDIFF = X w- Y;          #widen sub
    1081                 :            :        DAD = ABS_EXPR <DDIFF>;
    1082                 :            :        sum_1 = DAD w+ sum_0;    #widen summation
    1083                 :            :      or
    1084                 :            :        DDIFF = X w- Y;          #widen sub
    1085                 :            :        DAD = ABS_EXPR <DDIFF>;
    1086                 :            :        sum_1 = DAD + sum_0;     #summation
    1087                 :            :    */
    1088                 :            : 
    1089                 :            :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1090                 :            :      of the above pattern.  */
    1091                 :            : 
    1092                 :    1488660 :   tree plus_oprnd0, plus_oprnd1;
    1093                 :    1488660 :   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
    1094                 :            :                                        &plus_oprnd0, &plus_oprnd1))
    1095                 :            :     return NULL;
    1096                 :            : 
    1097                 :      13190 :   tree sum_type = gimple_expr_type (last_stmt);
    1098                 :            : 
    1099                 :            :   /* Any non-truncating sequence of conversions is OK here, since
    1100                 :            :      with a successful match, the result of the ABS(U) is known to fit
    1101                 :            :      within the nonnegative range of the result type.  (It cannot be the
    1102                 :            :      negative of the minimum signed value due to the range of the widening
    1103                 :            :      MINUS_EXPR.)  */
    1104                 :      13190 :   vect_unpromoted_value unprom_abs;
    1105                 :      13190 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1106                 :            :                                                       &unprom_abs);
    1107                 :            : 
    1108                 :            :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1109                 :            :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1110                 :            :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1111                 :            :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1112                 :            : 
    1113                 :      13190 :   if (!plus_oprnd0)
    1114                 :            :     return NULL;
    1115                 :            : 
    1116                 :       6594 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1117                 :       6448 :   if (!abs_stmt_vinfo)
    1118                 :            :     return NULL;
    1119                 :            : 
    1120                 :            :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1121                 :            :      inside the loop (in case we are analyzing an outer-loop).  */
    1122                 :       6448 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1123                 :       6377 :   if (!abs_stmt
    1124                 :       6377 :       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
    1125                 :       6127 :           && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
    1126                 :            :     return NULL;
    1127                 :            : 
    1128                 :        298 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
    1129                 :        298 :   tree abs_type = TREE_TYPE (abs_oprnd);
    1130                 :        298 :   if (TYPE_UNSIGNED (abs_type))
    1131                 :            :     return NULL;
    1132                 :            : 
    1133                 :            :   /* Peel off conversions from the ABS input.  This can involve sign
    1134                 :            :      changes (e.g. from an unsigned subtraction to a signed ABS input)
    1135                 :            :      or signed promotion, but it can't include unsigned promotion.
    1136                 :            :      (Note that ABS of an unsigned promotion should have been folded
    1137                 :            :      away before now anyway.)  */
    1138                 :        298 :   vect_unpromoted_value unprom_diff;
    1139                 :        298 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
    1140                 :            :                                                     &unprom_diff);
    1141                 :        298 :   if (!abs_oprnd)
    1142                 :            :     return NULL;
    1143                 :        298 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
    1144                 :        298 :       && TYPE_UNSIGNED (unprom_diff.type))
    1145                 :            :     return NULL;
    1146                 :            : 
    1147                 :            :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
    1148                 :        298 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
    1149                 :            :   if (!diff_stmt_vinfo)
    1150                 :            :     return NULL;
    1151                 :            : 
    1152                 :            :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1153                 :            :      inside the loop (in case we are analyzing an outer-loop).  */
    1154                 :        894 :   vect_unpromoted_value unprom[2];
    1155                 :        298 :   if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
    1156                 :            :                              false, 2, unprom, &half_type))
    1157                 :            :     return NULL;
    1158                 :            : 
    1159                 :        277 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1160                 :            : 
    1161                 :        277 :   tree half_vectype;
    1162                 :        277 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1163                 :            :                                         type_out, &half_vectype))
    1164                 :            :     return NULL;
    1165                 :            : 
    1166                 :            :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1167                 :        189 :   tree sad_oprnd[2];
    1168                 :        189 :   vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
    1169                 :            :                        unprom, half_vectype);
    1170                 :            : 
    1171                 :        189 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1172                 :        189 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1173                 :            :                                               sad_oprnd[1], plus_oprnd1);
    1174                 :            : 
    1175                 :        189 :   return pattern_stmt;
    1176                 :            : }
    1177                 :            : 
    1178                 :            : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1179                 :            :    so that it can be treated as though it had the form:
    1180                 :            : 
    1181                 :            :       A_TYPE a;
    1182                 :            :       B_TYPE b;
    1183                 :            :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1184                 :            :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1185                 :            :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1186                 :            :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1187                 :            :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1188                 :            : 
    1189                 :            :    Try to replace the pattern with:
    1190                 :            : 
    1191                 :            :       A_TYPE a;
    1192                 :            :       B_TYPE b;
    1193                 :            :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1194                 :            :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1195                 :            :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1196                 :            :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1197                 :            : 
    1198                 :            :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1199                 :            : 
    1200                 :            :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1201                 :            :    name of the pattern being matched, for dump purposes.  */
    1202                 :            : 
    1203                 :            : static gimple *
    1204                 :    2978390 : vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
    1205                 :            :                              tree_code orig_code, tree_code wide_code,
    1206                 :            :                              bool shift_p, const char *name)
    1207                 :            : {
    1208                 :    2978390 :   vec_info *vinfo = last_stmt_info->vinfo;
    1209                 :    2978390 :   gimple *last_stmt = last_stmt_info->stmt;
    1210                 :            : 
    1211                 :    8935170 :   vect_unpromoted_value unprom[2];
    1212                 :    2978390 :   tree half_type;
    1213                 :    2978390 :   if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
    1214                 :            :                              shift_p, 2, unprom, &half_type))
    1215                 :            :     return NULL;
    1216                 :            : 
    1217                 :            :   /* Pattern detected.  */
    1218                 :      12392 :   vect_pattern_detected (name, last_stmt);
    1219                 :            : 
    1220                 :      12392 :   tree type = gimple_expr_type (last_stmt);
    1221                 :      12392 :   tree itype = type;
    1222                 :      12392 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1223                 :      12392 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1224                 :      10231 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1225                 :      10231 :                                             TYPE_UNSIGNED (half_type));
    1226                 :            : 
    1227                 :            :   /* Check target support  */
    1228                 :      12392 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1229                 :      12392 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1230                 :      12392 :   enum tree_code dummy_code;
    1231                 :      12392 :   int dummy_int;
    1232                 :      12392 :   auto_vec<tree> dummy_vec;
    1233                 :      12392 :   if (!vectype
    1234                 :      12392 :       || !vecitype
    1235                 :      12392 :       || !supportable_widening_operation (wide_code, last_stmt_info,
    1236                 :            :                                           vecitype, vectype,
    1237                 :            :                                           &dummy_code, &dummy_code,
    1238                 :            :                                           &dummy_int, &dummy_vec))
    1239                 :       5147 :     return NULL;
    1240                 :            : 
    1241                 :       7245 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1242                 :       7245 :   if (!*type_out)
    1243                 :            :     return NULL;
    1244                 :            : 
    1245                 :       7245 :   tree oprnd[2];
    1246                 :       7245 :   vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
    1247                 :            : 
    1248                 :       7245 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1249                 :       7245 :   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
    1250                 :            :                                               oprnd[0], oprnd[1]);
    1251                 :            : 
    1252                 :       7245 :   return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
    1253                 :            : }
    1254                 :            : 
    1255                 :            : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1256                 :            :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1257                 :            : 
    1258                 :            : static gimple *
    1259                 :    1489920 : vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    1260                 :            : {
    1261                 :    1489920 :   return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
    1262                 :            :                                       WIDEN_MULT_EXPR, false,
    1263                 :    1489920 :                                       "vect_recog_widen_mult_pattern");
    1264                 :            : }
    1265                 :            : 
    1266                 :            : /* Function vect_recog_pow_pattern
    1267                 :            : 
    1268                 :            :    Try to find the following pattern:
    1269                 :            : 
    1270                 :            :      x = POW (y, N);
    1271                 :            : 
    1272                 :            :    with POW being one of pow, powf, powi, powif and N being
    1273                 :            :    either 2 or 0.5.
    1274                 :            : 
    1275                 :            :    Input:
    1276                 :            : 
    1277                 :            :    * STMT_VINFO: The stmt from which the pattern search begins.
    1278                 :            : 
    1279                 :            :    Output:
    1280                 :            : 
    1281                 :            :    * TYPE_OUT: The type of the output of this pattern.
    1282                 :            : 
    1283                 :            :    * Return value: A new stmt that will be used to replace the sequence of
    1284                 :            :    stmts that constitute the pattern. In this case it will be:
    1285                 :            :         x = x * x
    1286                 :            :    or
    1287                 :            :         x = sqrt (x)
    1288                 :            : */
    1289                 :            : 
    1290                 :            : static gimple *
    1291                 :    1488480 : vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    1292                 :            : {
    1293                 :    1488480 :   vec_info *vinfo = stmt_vinfo->vinfo;
    1294                 :    1488480 :   gimple *last_stmt = stmt_vinfo->stmt;
    1295                 :    1488480 :   tree base, exp;
    1296                 :    1488480 :   gimple *stmt;
    1297                 :    1488480 :   tree var;
    1298                 :            : 
    1299                 :    1488480 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    1300                 :            :     return NULL;
    1301                 :            : 
    1302                 :       3876 :   switch (gimple_call_combined_fn (last_stmt))
    1303                 :            :     {
    1304                 :         37 :     CASE_CFN_POW:
    1305                 :         37 :     CASE_CFN_POWI:
    1306                 :         37 :       break;
    1307                 :            : 
    1308                 :            :     default:
    1309                 :            :       return NULL;
    1310                 :            :     }
    1311                 :            : 
    1312                 :         37 :   base = gimple_call_arg (last_stmt, 0);
    1313                 :         37 :   exp = gimple_call_arg (last_stmt, 1);
    1314                 :         37 :   if (TREE_CODE (exp) != REAL_CST
    1315                 :         25 :       && TREE_CODE (exp) != INTEGER_CST)
    1316                 :            :     {
    1317                 :         25 :       if (flag_unsafe_math_optimizations
    1318                 :          5 :           && TREE_CODE (base) == REAL_CST
    1319                 :         27 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    1320                 :            :         {
    1321                 :          2 :           combined_fn log_cfn;
    1322                 :          2 :           built_in_function exp_bfn;
    1323                 :          2 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    1324                 :            :             {
    1325                 :            :             case BUILT_IN_POW:
    1326                 :            :               log_cfn = CFN_BUILT_IN_LOG;
    1327                 :            :               exp_bfn = BUILT_IN_EXP;
    1328                 :            :               break;
    1329                 :          0 :             case BUILT_IN_POWF:
    1330                 :          0 :               log_cfn = CFN_BUILT_IN_LOGF;
    1331                 :          0 :               exp_bfn = BUILT_IN_EXPF;
    1332                 :          0 :               break;
    1333                 :          0 :             case BUILT_IN_POWL:
    1334                 :          0 :               log_cfn = CFN_BUILT_IN_LOGL;
    1335                 :          0 :               exp_bfn = BUILT_IN_EXPL;
    1336                 :          0 :               break;
    1337                 :            :             default:
    1338                 :            :               return NULL;
    1339                 :            :             }
    1340                 :          2 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    1341                 :          2 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    1342                 :            :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    1343                 :            :              does that, but if C is a power of 2, we want to use
    1344                 :            :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    1345                 :            :              vectorization we don't have vectorized exp2.  */
    1346                 :          2 :           if (logc
    1347                 :          2 :               && TREE_CODE (logc) == REAL_CST
    1348                 :          2 :               && exp_decl
    1349                 :          4 :               && lookup_attribute ("omp declare simd",
    1350                 :          2 :                                    DECL_ATTRIBUTES (exp_decl)))
    1351                 :            :             {
    1352                 :          2 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    1353                 :          2 :               if (node->simd_clones == NULL)
    1354                 :            :                 {
    1355                 :          2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    1356                 :          2 :                       || node->definition)
    1357                 :            :                     return NULL;
    1358                 :          2 :                   expand_simd_clones (node);
    1359                 :          2 :                   if (node->simd_clones == NULL)
    1360                 :            :                     return NULL;
    1361                 :            :                 }
    1362                 :          2 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    1363                 :          2 :               if (!*type_out)
    1364                 :            :                 return NULL;
    1365                 :          2 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    1366                 :          2 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    1367                 :          2 :               append_pattern_def_seq (stmt_vinfo, g);
    1368                 :          2 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    1369                 :          2 :               g = gimple_build_call (exp_decl, 1, def);
    1370                 :          2 :               gimple_call_set_lhs (g, res);
    1371                 :          2 :               return g;
    1372                 :            :             }
    1373                 :            :         }
    1374                 :            : 
    1375                 :         23 :       return NULL;
    1376                 :            :     }
    1377                 :            : 
    1378                 :            :   /* We now have a pow or powi builtin function call with a constant
    1379                 :            :      exponent.  */
    1380                 :            : 
    1381                 :            :   /* Catch squaring.  */
    1382                 :         12 :   if ((tree_fits_shwi_p (exp)
    1383                 :          0 :        && tree_to_shwi (exp) == 2)
    1384                 :         12 :       || (TREE_CODE (exp) == REAL_CST
    1385                 :         12 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    1386                 :            :     {
    1387                 :         12 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    1388                 :          6 :                                             TREE_TYPE (base), type_out))
    1389                 :            :         return NULL;
    1390                 :            : 
    1391                 :          6 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    1392                 :          6 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    1393                 :          6 :       return stmt;
    1394                 :            :     }
    1395                 :            : 
    1396                 :            :   /* Catch square root.  */
    1397                 :          6 :   if (TREE_CODE (exp) == REAL_CST
    1398                 :          6 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    1399                 :            :     {
    1400                 :          4 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    1401                 :          4 :       if (*type_out
    1402                 :          4 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    1403                 :            :                                              OPTIMIZE_FOR_SPEED))
    1404                 :            :         {
    1405                 :          4 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    1406                 :          4 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    1407                 :          4 :           gimple_call_set_lhs (stmt, var);
    1408                 :          4 :           gimple_call_set_nothrow (stmt, true);
    1409                 :          4 :           return stmt;
    1410                 :            :         }
    1411                 :            :     }
    1412                 :            : 
    1413                 :            :   return NULL;
    1414                 :            : }
    1415                 :            : 
    1416                 :            : 
    1417                 :            : /* Function vect_recog_widen_sum_pattern
    1418                 :            : 
    1419                 :            :    Try to find the following pattern:
    1420                 :            : 
    1421                 :            :      type x_t;
    1422                 :            :      TYPE x_T, sum = init;
    1423                 :            :    loop:
    1424                 :            :      sum_0 = phi <init, sum_1>
    1425                 :            :      S1  x_t = *p;
    1426                 :            :      S2  x_T = (TYPE) x_t;
    1427                 :            :      S3  sum_1 = x_T + sum_0;
    1428                 :            : 
    1429                 :            :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    1430                 :            :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    1431                 :            :    a special case of a reduction computation.
    1432                 :            : 
    1433                 :            :    Input:
    1434                 :            : 
    1435                 :            :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    1436                 :            :    when this function is called with S3, the pattern {S2,S3} will be detected.
    1437                 :            : 
    1438                 :            :    Output:
    1439                 :            : 
    1440                 :            :    * TYPE_OUT: The type of the output of this pattern.
    1441                 :            : 
    1442                 :            :    * Return value: A new stmt that will be used to replace the sequence of
    1443                 :            :    stmts that constitute the pattern. In this case it will be:
    1444                 :            :         WIDEN_SUM <x_t, sum_0>
    1445                 :            : 
    1446                 :            :    Note: The widening-sum idiom is a widening reduction pattern that is
    1447                 :            :          vectorized without preserving all the intermediate results. It
    1448                 :            :          produces only N/2 (widened) results (by summing up pairs of
    1449                 :            :          intermediate results) rather than all N results.  Therefore, we
    1450                 :            :          cannot allow this pattern when we want to get all the results and in
    1451                 :            :          the correct order (as is the case when this computation is in an
    1452                 :            :          inner-loop nested in an outer-loop that us being vectorized).  */
    1453                 :            : 
    1454                 :            : static gimple *
    1455                 :    1488480 : vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    1456                 :            : {
    1457                 :    1488480 :   gimple *last_stmt = stmt_vinfo->stmt;
    1458                 :    1488480 :   tree oprnd0, oprnd1;
    1459                 :    1488480 :   vec_info *vinfo = stmt_vinfo->vinfo;
    1460                 :    1488480 :   tree type;
    1461                 :    1488480 :   gimple *pattern_stmt;
    1462                 :    1488480 :   tree var;
    1463                 :            : 
    1464                 :            :   /* Look for the following pattern
    1465                 :            :           DX = (TYPE) X;
    1466                 :            :           sum_1 = DX + sum_0;
    1467                 :            :      In which DX is at least double the size of X, and sum_1 has been
    1468                 :            :      recognized as a reduction variable.
    1469                 :            :    */
    1470                 :            : 
    1471                 :            :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1472                 :            :      of the above pattern.  */
    1473                 :            : 
    1474                 :    1488480 :   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
    1475                 :            :                                        &oprnd0, &oprnd1))
    1476                 :            :     return NULL;
    1477                 :            : 
    1478                 :      13001 :   type = gimple_expr_type (last_stmt);
    1479                 :            : 
    1480                 :            :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1481                 :            :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1482                 :            :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1483                 :            :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    1484                 :            :      'TYPE'.  */
    1485                 :            : 
    1486                 :      13001 :   vect_unpromoted_value unprom0;
    1487                 :      13001 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    1488                 :      13001 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    1489                 :            :     return NULL;
    1490                 :            : 
    1491                 :        393 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    1492                 :            : 
    1493                 :        393 :   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    1494                 :            :                                         unprom0.type, type_out))
    1495                 :            :     return NULL;
    1496                 :            : 
    1497                 :          0 :   var = vect_recog_temp_ssa_var (type, NULL);
    1498                 :          0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    1499                 :            : 
    1500                 :          0 :   return pattern_stmt;
    1501                 :            : }
    1502                 :            : 
    1503                 :            : /* Recognize cases in which an operation is performed in one type WTYPE
    1504                 :            :    but could be done more efficiently in a narrower type NTYPE.  For example,
    1505                 :            :    if we have:
    1506                 :            : 
    1507                 :            :      ATYPE a;  // narrower than NTYPE
    1508                 :            :      BTYPE b;  // narrower than NTYPE
    1509                 :            :      WTYPE aw = (WTYPE) a;
    1510                 :            :      WTYPE bw = (WTYPE) b;
    1511                 :            :      WTYPE res = aw + bw;  // only uses of aw and bw
    1512                 :            : 
    1513                 :            :    then it would be more efficient to do:
    1514                 :            : 
    1515                 :            :      NTYPE an = (NTYPE) a;
    1516                 :            :      NTYPE bn = (NTYPE) b;
    1517                 :            :      NTYPE resn = an + bn;
    1518                 :            :      WTYPE res = (WTYPE) resn;
    1519                 :            : 
    1520                 :            :    Other situations include things like:
    1521                 :            : 
    1522                 :            :      ATYPE a;  // NTYPE or narrower
    1523                 :            :      WTYPE aw = (WTYPE) a;
    1524                 :            :      WTYPE res = aw + b;
    1525                 :            : 
    1526                 :            :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    1527                 :            :    to truncate "b" and do the operation on NTYPE instead:
    1528                 :            : 
    1529                 :            :      NTYPE an = (NTYPE) a;
    1530                 :            :      NTYPE bn = (NTYPE) b;  // truncation
    1531                 :            :      NTYPE resn = an + bn;
    1532                 :            :      WTYPE res = (WTYPE) resn;
    1533                 :            : 
    1534                 :            :    All users of "res" should then use "resn" instead, making the final
    1535                 :            :    statement dead (not marked as relevant).  The final statement is still
    1536                 :            :    needed to maintain the type correctness of the IR.
    1537                 :            : 
    1538                 :            :    vect_determine_precisions has already determined the minimum
    1539                 :            :    precison of the operation and the minimum precision required
    1540                 :            :    by users of the result.  */
    1541                 :            : 
    1542                 :            : static gimple *
    1543                 :    1476760 : vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    1544                 :            : {
    1545                 :    1476760 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    1546                 :    1148240 :   if (!last_stmt)
    1547                 :            :     return NULL;
    1548                 :            : 
    1549                 :            :   /* See whether we have found that this operation can be done on a
    1550                 :            :      narrower type without changing its semantics.  */
    1551                 :    1148240 :   unsigned int new_precision = last_stmt_info->operation_precision;
    1552                 :    1148240 :   if (!new_precision)
    1553                 :            :     return NULL;
    1554                 :            : 
    1555                 :     105018 :   vec_info *vinfo = last_stmt_info->vinfo;
    1556                 :     105018 :   tree lhs = gimple_assign_lhs (last_stmt);
    1557                 :     105018 :   tree type = TREE_TYPE (lhs);
    1558                 :     105018 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    1559                 :            : 
    1560                 :            :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    1561                 :            :      operands are interesting.  */
    1562                 :     105018 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    1563                 :            : 
    1564                 :            :   /* Check the operands.  */
    1565                 :     105018 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    1566                 :     105018 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    1567                 :     105018 :   unprom.quick_grow (nops);
    1568                 :     105018 :   unsigned int min_precision = 0;
    1569                 :     105018 :   bool single_use_p = false;
    1570                 :     310604 :   for (unsigned int i = 0; i < nops; ++i)
    1571                 :            :     {
    1572                 :     205695 :       tree op = gimple_op (last_stmt, first_op + i);
    1573                 :     205695 :       if (TREE_CODE (op) == INTEGER_CST)
    1574                 :     305434 :         unprom[i].set_op (op, vect_constant_def);
    1575                 :     105847 :       else if (TREE_CODE (op) == SSA_NAME)
    1576                 :            :         {
    1577                 :     105847 :           bool op_single_use_p = true;
    1578                 :     105847 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    1579                 :            :                                                      &op_single_use_p))
    1580                 :        109 :             return NULL;
    1581                 :            :           /* If:
    1582                 :            : 
    1583                 :            :              (1) N bits of the result are needed;
    1584                 :            :              (2) all inputs are widened from M<N bits; and
    1585                 :            :              (3) one operand OP is a single-use SSA name
    1586                 :            : 
    1587                 :            :              we can shift the M->N widening from OP to the output
    1588                 :            :              without changing the number or type of extensions involved.
    1589                 :            :              This then reduces the number of copies of STMT_INFO.
    1590                 :            : 
    1591                 :            :              If instead of (3) more than one operand is a single-use SSA name,
    1592                 :            :              shifting the extension to the output is even more of a win.
    1593                 :            : 
    1594                 :            :              If instead:
    1595                 :            : 
    1596                 :            :              (1) N bits of the result are needed;
    1597                 :            :              (2) one operand OP2 is widened from M2<N bits;
    1598                 :            :              (3) another operand OP1 is widened from M1<M2 bits; and
    1599                 :            :              (4) both OP1 and OP2 are single-use
    1600                 :            : 
    1601                 :            :              the choice is between:
    1602                 :            : 
    1603                 :            :              (a) truncating OP2 to M1, doing the operation on M1,
    1604                 :            :                  and then widening the result to N
    1605                 :            : 
    1606                 :            :              (b) widening OP1 to M2, doing the operation on M2, and then
    1607                 :            :                  widening the result to N
    1608                 :            : 
    1609                 :            :              Both shift the M2->N widening of the inputs to the output.
    1610                 :            :              (a) additionally shifts the M1->M2 widening to the output;
    1611                 :            :              it requires fewer copies of STMT_INFO but requires an extra
    1612                 :            :              M2->M1 truncation.
    1613                 :            : 
    1614                 :            :              Which is better will depend on the complexity and cost of
    1615                 :            :              STMT_INFO, which is hard to predict at this stage.  However,
    1616                 :            :              a clear tie-breaker in favor of (b) is the fact that the
    1617                 :            :              truncation in (a) increases the length of the operation chain.
    1618                 :            : 
    1619                 :            :              If instead of (4) only one of OP1 or OP2 is single-use,
    1620                 :            :              (b) is still a win over doing the operation in N bits:
    1621                 :            :              it still shifts the M2->N widening on the single-use operand
    1622                 :            :              to the output and reduces the number of STMT_INFO copies.
    1623                 :            : 
    1624                 :            :              If neither operand is single-use then operating on fewer than
    1625                 :            :              N bits might lead to more extensions overall.  Whether it does
    1626                 :            :              or not depends on global information about the vectorization
    1627                 :            :              region, and whether that's a good trade-off would again
    1628                 :            :              depend on the complexity and cost of the statements involved,
    1629                 :            :              as well as things like register pressure that are not normally
    1630                 :            :              modelled at this stage.  We therefore ignore these cases
    1631                 :            :              and just optimize the clear single-use wins above.
    1632                 :            : 
    1633                 :            :              Thus we take the maximum precision of the unpromoted operands
    1634                 :            :              and record whether any operand is single-use.  */
    1635                 :     105738 :           if (unprom[i].dt == vect_internal_def)
    1636                 :            :             {
    1637                 :      56859 :               min_precision = MAX (min_precision,
    1638                 :            :                                    TYPE_PRECISION (unprom[i].type));
    1639                 :      56859 :               single_use_p |= op_single_use_p;
    1640                 :            :             }
    1641                 :            :         }
    1642                 :            :     }
    1643                 :            : 
    1644                 :            :   /* Although the operation could be done in operation_precision, we have
    1645                 :            :      to balance that against introducing extra truncations or extensions.
    1646                 :            :      Calculate the minimum precision that can be handled efficiently.
    1647                 :            : 
    1648                 :            :      The loop above determined that the operation could be handled
    1649                 :            :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    1650                 :            :      extension from the inputs to the output without introducing more
    1651                 :            :      instructions, and would reduce the number of instructions required
    1652                 :            :      for STMT_INFO itself.
    1653                 :            : 
    1654                 :            :      vect_determine_precisions has also determined that the result only
    1655                 :            :      needs min_output_precision bits.  Truncating by a factor of N times
    1656                 :            :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    1657                 :            :      than min_output_precision, doing the operation in TYPE and truncating
    1658                 :            :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    1659                 :            :      In contrast:
    1660                 :            : 
    1661                 :            :      - truncating the input to a unary operation and doing the operation
    1662                 :            :        in the new type requires at most N - 1 + 1 = N instructions per
    1663                 :            :        output vector
    1664                 :            : 
    1665                 :            :      - doing the same for a binary operation requires at most
    1666                 :            :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    1667                 :            : 
    1668                 :            :      Both unary and binary operations require fewer instructions than
    1669                 :            :      this if the operands were extended from a suitable truncated form.
    1670                 :            :      Thus there is usually nothing to lose by doing operations in
    1671                 :            :      min_output_precision bits, but there can be something to gain.  */
    1672                 :     104909 :   if (!single_use_p)
    1673                 :      85304 :     min_precision = last_stmt_info->min_output_precision;
    1674                 :            :   else
    1675                 :      19605 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    1676                 :            : 
    1677                 :            :   /* Apply the minimum efficient precision we just calculated.  */
    1678                 :     104909 :   if (new_precision < min_precision)
    1679                 :            :     new_precision = min_precision;
    1680                 :     104909 :   if (new_precision >= TYPE_PRECISION (type))
    1681                 :            :     return NULL;
    1682                 :            : 
    1683                 :      17531 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    1684                 :            : 
    1685                 :      17531 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1686                 :      17531 :   if (!*type_out)
    1687                 :            :     return NULL;
    1688                 :            : 
    1689                 :            :   /* We've found a viable pattern.  Get the new type of the operation.  */
    1690                 :      17528 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    1691                 :      17528 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    1692                 :            : 
    1693                 :            :   /* If we're truncating an operation, we need to make sure that we
    1694                 :            :      don't introduce new undefined overflow.  The codes tested here are
    1695                 :            :      a subset of those accepted by vect_truncatable_operation_p.  */
    1696                 :      17528 :   tree op_type = new_type;
    1697                 :      17528 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    1698                 :      21427 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    1699                 :       2340 :     op_type = build_nonstandard_integer_type (new_precision, true);
    1700                 :            : 
    1701                 :            :   /* We specifically don't check here whether the target supports the
    1702                 :            :      new operation, since it might be something that a later pattern
    1703                 :            :      wants to rewrite anyway.  If targets have a minimum element size
    1704                 :            :      for some optabs, we should pattern-match smaller ops to larger ops
    1705                 :            :      where beneficial.  */
    1706                 :      17528 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    1707                 :      17528 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    1708                 :      17528 :   if (!new_vectype || !op_vectype)
    1709                 :            :     return NULL;
    1710                 :            : 
    1711                 :      17528 :   if (dump_enabled_p ())
    1712                 :       2920 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    1713                 :            :                      type, new_type);
    1714                 :            : 
    1715                 :            :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    1716                 :      17528 :   tree ops[3] = {};
    1717                 :      17580 :   for (unsigned int i = 1; i < first_op; ++i)
    1718                 :         52 :     ops[i - 1] = gimple_op (last_stmt, i);
    1719                 :      35056 :   vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
    1720                 :      17528 :                        op_type, &unprom[0], op_vectype);
    1721                 :            : 
    1722                 :            :   /* Use the operation to produce a result of type OP_TYPE.  */
    1723                 :      17528 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    1724                 :      17528 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    1725                 :            :                                               ops[0], ops[1], ops[2]);
    1726                 :      17528 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    1727                 :            : 
    1728                 :      17528 :   if (dump_enabled_p ())
    1729                 :       2920 :     dump_printf_loc (MSG_NOTE, vect_location,
    1730                 :            :                      "created pattern stmt: %G", pattern_stmt);
    1731                 :            : 
    1732                 :            :   /* Convert back to the original signedness, if OP_TYPE is different
    1733                 :            :      from NEW_TYPE.  */
    1734                 :      17528 :   if (op_type != new_type)
    1735                 :       2340 :     pattern_stmt = vect_convert_output (last_stmt_info, new_type,
    1736                 :            :                                         pattern_stmt, op_vectype);
    1737                 :            : 
    1738                 :            :   /* Promote the result to the original type.  */
    1739                 :      17528 :   pattern_stmt = vect_convert_output (last_stmt_info, type,
    1740                 :            :                                       pattern_stmt, new_vectype);
    1741                 :            : 
    1742                 :      17528 :   return pattern_stmt;
    1743                 :            : }
    1744                 :            : 
    1745                 :            : /* Recognize the following patterns:
    1746                 :            : 
    1747                 :            :      ATYPE a;  // narrower than TYPE
    1748                 :            :      BTYPE b;  // narrower than TYPE
    1749                 :            : 
    1750                 :            :    1) Multiply high with scaling
    1751                 :            :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    1752                 :            :    2) ... or also with rounding
    1753                 :            :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    1754                 :            : 
    1755                 :            :    where only the bottom half of res is used.  */
    1756                 :            : 
    1757                 :            : static gimple *
    1758                 :    1494580 : vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    1759                 :            : {
    1760                 :            :   /* Check for a right shift.  */
    1761                 :    1494580 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    1762                 :    1165990 :   if (!last_stmt
    1763                 :    1165990 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    1764                 :            :     return NULL;
    1765                 :       8584 :   vec_info *vinfo = last_stmt_info->vinfo;
    1766                 :            : 
    1767                 :            :   /* Check that the shift result is wider than the users of the
    1768                 :            :      result need (i.e. that narrowing would be a natural choice).  */
    1769                 :       8584 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1770                 :       8584 :   unsigned int target_precision
    1771                 :       8584 :     = vect_element_precision (last_stmt_info->min_output_precision);
    1772                 :       8584 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1773                 :      16902 :       || target_precision >= TYPE_PRECISION (lhs_type))
    1774                 :            :     return NULL;
    1775                 :            : 
    1776                 :            :   /* Look through any change in sign on the outer shift input.  */
    1777                 :       4428 :   vect_unpromoted_value unprom_rshift_input;
    1778                 :       4428 :   tree rshift_input = vect_look_through_possible_promotion
    1779                 :       4428 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    1780                 :       4428 :   if (!rshift_input
    1781                 :       8854 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    1782                 :       4426 :            != TYPE_PRECISION (lhs_type))
    1783                 :            :     return NULL;
    1784                 :            : 
    1785                 :            :   /* Get the definition of the shift input.  */
    1786                 :       4335 :   stmt_vec_info rshift_input_stmt_info
    1787                 :    1498790 :     = vect_get_internal_def (vinfo, rshift_input);
    1788                 :       4215 :   if (!rshift_input_stmt_info)
    1789                 :            :     return NULL;
    1790                 :       4215 :   gassign *rshift_input_stmt
    1791                 :       4215 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    1792                 :       4207 :   if (!rshift_input_stmt)
    1793                 :            :     return NULL;
    1794                 :            : 
    1795                 :       4207 :   stmt_vec_info mulh_stmt_info;
    1796                 :       4207 :   tree scale_term;
    1797                 :       4207 :   internal_fn ifn;
    1798                 :       4207 :   unsigned int expect_offset;
    1799                 :            : 
    1800                 :            :   /* Check for the presence of the rounding term.  */
    1801                 :       4465 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    1802                 :            :     {
    1803                 :            :       /* Check that the outer shift was by 1.  */
    1804                 :       6308 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    1805                 :       3139 :         return NULL;
    1806                 :            : 
    1807                 :            :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    1808                 :       1002 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    1809                 :            :         return NULL;
    1810                 :            : 
    1811                 :            :       /* Look through any change in sign on the addition input.  */
    1812                 :         33 :       vect_unpromoted_value unprom_plus_input;
    1813                 :         33 :       tree plus_input = vect_look_through_possible_promotion
    1814                 :         33 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    1815                 :         33 :       if (!plus_input
    1816                 :         66 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    1817                 :         33 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    1818                 :            :         return NULL;
    1819                 :            : 
    1820                 :            :       /* Get the definition of the multiply-high-scale part.  */
    1821                 :         33 :       stmt_vec_info plus_input_stmt_info
    1822                 :       3172 :         = vect_get_internal_def (vinfo, plus_input);
    1823                 :         33 :       if (!plus_input_stmt_info)
    1824                 :            :         return NULL;
    1825                 :         33 :       gassign *plus_input_stmt
    1826                 :       3172 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    1827                 :         33 :       if (!plus_input_stmt
    1828                 :         33 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    1829                 :            :         return NULL;
    1830                 :            : 
    1831                 :            :       /* Look through any change in sign on the scaling input.  */
    1832                 :         15 :       vect_unpromoted_value unprom_scale_input;
    1833                 :         15 :       tree scale_input = vect_look_through_possible_promotion
    1834                 :         15 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    1835                 :         15 :       if (!scale_input
    1836                 :         30 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    1837                 :         15 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    1838                 :            :         return NULL;
    1839                 :            : 
    1840                 :            :       /* Get the definition of the multiply-high part.  */
    1841                 :       3154 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    1842                 :         15 :       if (!mulh_stmt_info)
    1843                 :            :         return NULL;
    1844                 :            : 
    1845                 :            :       /* Get the scaling term.  */
    1846                 :         15 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    1847                 :            : 
    1848                 :         15 :       expect_offset = target_precision + 2;
    1849                 :         15 :       ifn = IFN_MULHRS;
    1850                 :            :     }
    1851                 :            :   else
    1852                 :            :     {
    1853                 :       1053 :       mulh_stmt_info = rshift_input_stmt_info;
    1854                 :       1053 :       scale_term = gimple_assign_rhs2 (last_stmt);
    1855                 :            : 
    1856                 :       1053 :       expect_offset = target_precision + 1;
    1857                 :       1053 :       ifn = IFN_MULHS;
    1858                 :            :     }
    1859                 :            : 
    1860                 :            :   /* Check that the scaling factor is correct.  */
    1861                 :       1068 :   if (TREE_CODE (scale_term) != INTEGER_CST
    1862                 :       2102 :       || wi::to_widest (scale_term) + expect_offset
    1863                 :       2119 :            != TYPE_PRECISION (lhs_type))
    1864                 :       1017 :     return NULL;
    1865                 :            : 
    1866                 :            :   /* Check whether the scaling input term can be seen as two widened
    1867                 :            :      inputs multiplied together.  */
    1868                 :        153 :   vect_unpromoted_value unprom_mult[2];
    1869                 :         51 :   tree new_type;
    1870                 :         51 :   unsigned int nops
    1871                 :         51 :     = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    1872                 :            :                             false, 2, unprom_mult, &new_type);
    1873                 :         51 :   if (nops != 2)
    1874                 :            :     return NULL;
    1875                 :            : 
    1876                 :         43 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    1877                 :            : 
    1878                 :            :   /* Adjust output precision.  */
    1879                 :         43 :   if (TYPE_PRECISION (new_type) < target_precision)
    1880                 :          0 :     new_type = build_nonstandard_integer_type
    1881                 :          0 :       (target_precision, TYPE_UNSIGNED (new_type));
    1882                 :            : 
    1883                 :            :   /* Check for target support.  */
    1884                 :         43 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    1885                 :         43 :   if (!new_vectype
    1886                 :         86 :       || !direct_internal_fn_supported_p
    1887                 :         43 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    1888                 :         40 :     return NULL;
    1889                 :            : 
    1890                 :            :   /* The IR requires a valid vector type for the cast result, even though
    1891                 :            :      it's likely to be discarded.  */
    1892                 :          3 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    1893                 :          3 :   if (!*type_out)
    1894                 :            :     return NULL;
    1895                 :            : 
    1896                 :            :   /* Generate the IFN_MULHRS call.  */
    1897                 :          3 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    1898                 :          3 :   tree new_ops[2];
    1899                 :          3 :   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
    1900                 :            :                        unprom_mult, new_vectype);
    1901                 :          3 :   gcall *mulhrs_stmt
    1902                 :          3 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    1903                 :          3 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    1904                 :          3 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    1905                 :            : 
    1906                 :          3 :   if (dump_enabled_p ())
    1907                 :          0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1908                 :            :                      "created pattern stmt: %G", mulhrs_stmt);
    1909                 :            : 
    1910                 :          3 :   return vect_convert_output (last_stmt_info, lhs_type,
    1911                 :          3 :                               mulhrs_stmt, new_vectype);
    1912                 :            : }
    1913                 :            : 
    1914                 :            : /* Recognize the patterns:
    1915                 :            : 
    1916                 :            :             ATYPE a;  // narrower than TYPE
    1917                 :            :             BTYPE b;  // narrower than TYPE
    1918                 :            :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    1919                 :            :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    1920                 :            : 
    1921                 :            :    where only the bottom half of avg is used.  Try to transform them into:
    1922                 :            : 
    1923                 :            :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    1924                 :            :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    1925                 :            : 
    1926                 :            :   followed by:
    1927                 :            : 
    1928                 :            :             TYPE avg = (TYPE) avg';
    1929                 :            : 
    1930                 :            :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    1931                 :            :   of avg is used, all or part of the cast of avg' should become redundant.
    1932                 :            : 
    1933                 :            :   If there is no target support available, generate code to distribute rshift
    1934                 :            :   over plus and add a carry.  */
    1935                 :            : 
    1936                 :            : static gimple *
    1937                 :    1493080 : vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    1938                 :            : {
    1939                 :            :   /* Check for a shift right by one bit.  */
    1940                 :    1493080 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    1941                 :    2655180 :   vec_info *vinfo = last_stmt_info->vinfo;
    1942                 :    1164550 :   if (!last_stmt
    1943                 :    1164550 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    1944                 :       8404 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    1945                 :    1490630 :     return NULL;
    1946                 :            : 
    1947                 :            :   /* Check that the shift result is wider than the users of the
    1948                 :            :      result need (i.e. that narrowing would be a natural choice).  */
    1949                 :       2446 :   tree lhs = gimple_assign_lhs (last_stmt);
    1950                 :       2446 :   tree type = TREE_TYPE (lhs);
    1951                 :       2446 :   unsigned int target_precision
    1952                 :       2446 :     = vect_element_precision (last_stmt_info->min_output_precision);
    1953                 :       4640 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    1954                 :            :     return NULL;
    1955                 :            : 
    1956                 :            :   /* Look through any change in sign on the shift input.  */
    1957                 :       1245 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    1958                 :       1245 :   vect_unpromoted_value unprom_plus;
    1959                 :       1245 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    1960                 :            :                                                      &unprom_plus);
    1961                 :       1245 :   if (!rshift_rhs
    1962                 :       2490 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    1963                 :            :     return NULL;
    1964                 :            : 
    1965                 :            :   /* Get the definition of the shift input.  */
    1966                 :       1245 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    1967                 :            :   if (!plus_stmt_info)
    1968                 :            :     return NULL;
    1969                 :            : 
    1970                 :            :   /* Check whether the shift input can be seen as a tree of additions on
    1971                 :            :      2 or 3 widened inputs.
    1972                 :            : 
    1973                 :            :      Note that the pattern should be a win even if the result of one or
    1974                 :            :      more additions is reused elsewhere: if the pattern matches, we'd be
    1975                 :            :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    1976                 :       4980 :   internal_fn ifn = IFN_AVG_FLOOR;
    1977                 :       4980 :   vect_unpromoted_value unprom[3];
    1978                 :       1245 :   tree new_type;
    1979                 :       1245 :   unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
    1980                 :            :                                             PLUS_EXPR, false, 3,
    1981                 :            :                                             unprom, &new_type);
    1982                 :       1245 :   if (nops == 0)
    1983                 :            :     return NULL;
    1984                 :        786 :   if (nops == 3)
    1985                 :            :     {
    1986                 :            :       /* Check that one operand is 1.  */
    1987                 :            :       unsigned int i;
    1988                 :        486 :       for (i = 0; i < 3; ++i)
    1989                 :        450 :         if (integer_onep (unprom[i].op))
    1990                 :            :           break;
    1991                 :        150 :       if (i == 3)
    1992                 :            :         return NULL;
    1993                 :            :       /* Throw away the 1 operand and keep the other two.  */
    1994                 :        114 :       if (i < 2)
    1995                 :          0 :         unprom[i] = unprom[2];
    1996                 :            :       ifn = IFN_AVG_CEIL;
    1997                 :            :     }
    1998                 :            : 
    1999                 :        750 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    2000                 :            : 
    2001                 :            :   /* We know that:
    2002                 :            : 
    2003                 :            :      (a) the operation can be viewed as:
    2004                 :            : 
    2005                 :            :            TYPE widened0 = (TYPE) UNPROM[0];
    2006                 :            :            TYPE widened1 = (TYPE) UNPROM[1];
    2007                 :            :            TYPE tmp1 = widened0 + widened1 {+ 1};
    2008                 :            :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    2009                 :            : 
    2010                 :            :      (b) the first two statements are equivalent to:
    2011                 :            : 
    2012                 :            :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    2013                 :            :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    2014                 :            : 
    2015                 :            :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    2016                 :            :          where sensible;
    2017                 :            : 
    2018                 :            :      (d) all the operations can be performed correctly at twice the width of
    2019                 :            :          NEW_TYPE, due to the nature of the average operation; and
    2020                 :            : 
    2021                 :            :      (e) users of the result of the right shift need only TARGET_PRECISION
    2022                 :            :          bits, where TARGET_PRECISION is no more than half of TYPE's
    2023                 :            :          precision.
    2024                 :            : 
    2025                 :            :      Under these circumstances, the only situation in which NEW_TYPE
    2026                 :            :      could be narrower than TARGET_PRECISION is if widened0, widened1
    2027                 :            :      and an addition result are all used more than once.  Thus we can
    2028                 :            :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    2029                 :            :      as "free", whereas widening the result of the average instruction
    2030                 :            :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    2031                 :            :      therefore better not to go narrower than TARGET_PRECISION.  */
    2032                 :        750 :   if (TYPE_PRECISION (new_type) < target_precision)
    2033                 :          0 :     new_type = build_nonstandard_integer_type (target_precision,
    2034                 :          0 :                                                TYPE_UNSIGNED (new_type));
    2035                 :            : 
    2036                 :            :   /* Check for target support.  */
    2037                 :        750 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    2038                 :        750 :   if (!new_vectype)
    2039                 :            :     return NULL;
    2040                 :            : 
    2041                 :        750 :   bool fallback_p = false;
    2042                 :            : 
    2043                 :        750 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    2044                 :            :     ;
    2045                 :        690 :   else if (TYPE_UNSIGNED (new_type)
    2046                 :        240 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    2047                 :        240 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    2048                 :        240 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    2049                 :        930 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    2050                 :            :     fallback_p = true;
    2051                 :            :   else
    2052                 :        450 :     return NULL;
    2053                 :            : 
    2054                 :            :   /* The IR requires a valid vector type for the cast result, even though
    2055                 :            :      it's likely to be discarded.  */
    2056                 :        300 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    2057                 :        300 :   if (!*type_out)
    2058                 :            :     return NULL;
    2059                 :            : 
    2060                 :        300 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    2061                 :        300 :   tree new_ops[2];
    2062                 :        300 :   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
    2063                 :            :                        unprom, new_vectype);
    2064                 :            : 
    2065                 :        300 :   if (fallback_p)
    2066                 :            :     {
    2067                 :            :       /* As a fallback, generate code for following sequence:
    2068                 :            : 
    2069                 :            :          shifted_op0 = new_ops[0] >> 1;
    2070                 :            :          shifted_op1 = new_ops[1] >> 1;
    2071                 :            :          sum_of_shifted = shifted_op0 + shifted_op1;
    2072                 :            :          unmasked_carry = new_ops[0] and/or new_ops[1];
    2073                 :            :          carry = unmasked_carry & 1;
    2074                 :            :          new_var = sum_of_shifted + carry;
    2075                 :            :       */         
    2076                 :            : 
    2077                 :        240 :       tree one_cst = build_one_cst (new_type);
    2078                 :        240 :       gassign *g;
    2079                 :            : 
    2080                 :        240 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    2081                 :        240 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    2082                 :        240 :       append_pattern_def_seq (last_stmt_info, g, new_vectype);
    2083                 :            : 
    2084                 :        240 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    2085                 :        240 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    2086                 :        240 :       append_pattern_def_seq (last_stmt_info, g, new_vectype);
    2087                 :            : 
    2088                 :        240 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    2089                 :        240 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    2090                 :            :                                shifted_op0, shifted_op1);
    2091                 :        240 :       append_pattern_def_seq (last_stmt_info, g, new_vectype);
    2092                 :            :       
    2093                 :        240 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    2094                 :        240 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    2095                 :        240 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    2096                 :        240 :       append_pattern_def_seq (last_stmt_info, g, new_vectype);
    2097                 :            :  
    2098                 :        240 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    2099                 :        240 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    2100                 :        240 :       append_pattern_def_seq (last_stmt_info, g, new_vectype);
    2101                 :            : 
    2102                 :        240 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    2103                 :        240 :       return vect_convert_output (last_stmt_info, type, g, new_vectype);
    2104                 :            :     }
    2105                 :            : 
    2106                 :            :   /* Generate the IFN_AVG* call.  */
    2107                 :         60 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    2108                 :            :                                                     new_ops[1]);
    2109                 :         60 :   gimple_call_set_lhs (average_stmt, new_var);
    2110                 :         60 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    2111                 :            : 
    2112                 :         60 :   if (dump_enabled_p ())
    2113                 :         22 :     dump_printf_loc (MSG_NOTE, vect_location,
    2114                 :            :                      "created pattern stmt: %G", average_stmt);
    2115                 :            : 
    2116                 :         60 :   return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
    2117                 :            : }
    2118                 :            : 
    2119                 :            : /* Recognize cases in which the input to a cast is wider than its
    2120                 :            :    output, and the input is fed by a widening operation.  Fold this
    2121                 :            :    by removing the unnecessary intermediate widening.  E.g.:
    2122                 :            : 
    2123                 :            :      unsigned char a;
    2124                 :            :      unsigned int b = (unsigned int) a;
    2125                 :            :      unsigned short c = (unsigned short) b;
    2126                 :            : 
    2127                 :            :    -->
    2128                 :            : 
    2129                 :            :      unsigned short c = (unsigned short) a;
    2130                 :            : 
    2131                 :            :    Although this is rare in input IR, it is an expected side-effect
    2132                 :            :    of the over-widening pattern above.
    2133                 :            : 
    2134                 :            :    This is beneficial also for integer-to-float conversions, if the
    2135                 :            :    widened integer has more bits than the float, and if the unwidened
    2136                 :            :    input doesn't.  */
    2137                 :            : 
    2138                 :            : static gimple *
    2139                 :    1494580 : vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    2140                 :            : {
    2141                 :            :   /* Check for a cast, including an integer-to-float conversion.  */
    2142                 :    1494580 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    2143                 :    1165990 :   if (!last_stmt)
    2144                 :            :     return NULL;
    2145                 :    1165990 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2146                 :    1165990 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    2147                 :            :     return NULL;
    2148                 :            : 
    2149                 :            :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    2150                 :     106717 :   tree lhs = gimple_assign_lhs (last_stmt);
    2151                 :     106717 :   if (!lhs)
    2152                 :            :     return NULL;
    2153                 :     106717 :   tree lhs_type = TREE_TYPE (lhs);
    2154                 :     106717 :   scalar_mode lhs_mode;
    2155                 :     207743 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    2156                 :     213288 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    2157                 :        369 :     return NULL;
    2158                 :            : 
    2159                 :            :   /* Check for a narrowing operation (from a vector point of view).  */
    2160                 :     106348 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    2161                 :     106348 :   tree rhs_type = TREE_TYPE (rhs);
    2162                 :     106348 :   if (!INTEGRAL_TYPE_P (rhs_type)
    2163                 :     206266 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    2164                 :     311208 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    2165                 :            :     return NULL;
    2166                 :            : 
    2167                 :            :   /* Try to find an unpromoted input.  */
    2168                 :      19352 :   vec_info *vinfo = last_stmt_info->vinfo;
    2169                 :      19352 :   vect_unpromoted_value unprom;
    2170                 :      19352 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    2171                 :      19352 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    2172                 :            :     return NULL;
    2173                 :            : 
    2174                 :            :   /* If the bits above RHS_TYPE matter, make sure that they're the
    2175                 :            :      same when extending from UNPROM as they are when extending from RHS.  */
    2176                 :       4651 :   if (!INTEGRAL_TYPE_P (lhs_type)
    2177                 :       4651 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    2178                 :            :     return NULL;
    2179                 :            : 
    2180                 :            :   /* We can get the same result by casting UNPROM directly, to avoid
    2181                 :            :      the unnecessary widening and narrowing.  */
    2182                 :       4651 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    2183                 :            : 
    2184                 :       4651 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    2185                 :       4651 :   if (!*type_out)
    2186                 :            :     return NULL;
    2187                 :            : 
    2188                 :       4651 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2189                 :       4651 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    2190                 :       4651 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2191                 :            : 
    2192                 :       4651 :   return pattern_stmt;
    2193                 :            : }
    2194                 :            : 
    2195                 :            : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    2196                 :            :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    2197                 :            : 
    2198                 :            : static gimple *
    2199                 :    1488470 : vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
    2200                 :            : {
    2201                 :    1488470 :   return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
    2202                 :            :                                       WIDEN_LSHIFT_EXPR, true,
    2203                 :    1488470 :                                       "vect_recog_widen_shift_pattern");
    2204                 :            : }
    2205                 :            : 
    2206                 :            : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    2207                 :            : 
    2208                 :            :    type a_t, b_t, c_t;
    2209                 :            : 
    2210                 :            :    S0 a_t = b_t r<< c_t;
    2211                 :            : 
    2212                 :            :   Input/Output:
    2213                 :            : 
    2214                 :            :   * STMT_VINFO: The stmt from which the pattern search begins,
    2215                 :            :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    2216                 :            :     with a sequence:
    2217                 :            : 
    2218                 :            :    S1 d_t = -c_t;
    2219                 :            :    S2 e_t = d_t & (B - 1);
    2220                 :            :    S3 f_t = b_t << c_t;
    2221                 :            :    S4 g_t = b_t >> e_t;
    2222                 :            :    S0 a_t = f_t | g_t;
    2223                 :            : 
    2224                 :            :     where B is element bitsize of type.
    2225                 :            : 
    2226                 :            :   Output:
    2227                 :            : 
    2228                 :            :   * TYPE_OUT: The type of the output of this pattern.
    2229                 :            : 
    2230                 :            :   * Return value: A new stmt that will be used to replace the rotate
    2231                 :            :     S0 stmt.  */
    2232                 :            : 
    2233                 :            : static gimple *
    2234                 :    1488470 : vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    2235                 :            : {
    2236                 :    1488470 :   gimple *last_stmt = stmt_vinfo->stmt;
    2237                 :    1488470 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    2238                 :    1488470 :   gimple *pattern_stmt, *def_stmt;
    2239                 :    1488470 :   enum tree_code rhs_code;
    2240                 :    1488470 :   vec_info *vinfo = stmt_vinfo->vinfo;
    2241                 :    1488470 :   enum vect_def_type dt;
    2242                 :    1488470 :   optab optab1, optab2;
    2243                 :    1488470 :   edge ext_def = NULL;
    2244                 :    1488470 :   bool bswap16_p = false;
    2245                 :            : 
    2246                 :    1488470 :   if (is_gimple_assign (last_stmt))
    2247                 :            :     {
    2248                 :    1159890 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    2249                 :    1159890 :       switch (rhs_code)
    2250                 :            :         {
    2251                 :        118 :         case LROTATE_EXPR:
    2252                 :        118 :         case RROTATE_EXPR:
    2253                 :        118 :           break;
    2254                 :            :         default:
    2255                 :            :           return NULL;
    2256                 :            :         }
    2257                 :            : 
    2258                 :        118 :       lhs = gimple_assign_lhs (last_stmt);
    2259                 :        118 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    2260                 :        118 :       type = TREE_TYPE (oprnd0);
    2261                 :        118 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    2262                 :            :     }
    2263                 :     328574 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    2264                 :            :     {
    2265                 :            :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    2266                 :            :          The vectorizer has bswap support, but only if the argument isn't
    2267                 :            :          promoted.  */
    2268                 :          6 :       lhs = gimple_call_lhs (last_stmt);
    2269                 :          6 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    2270                 :          6 :       type = TREE_TYPE (oprnd0);
    2271                 :          6 :       if (!lhs
    2272                 :          6 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    2273                 :          6 :           || TYPE_PRECISION (type) <= 16
    2274                 :            :           || TREE_CODE (oprnd0) != SSA_NAME
    2275                 :          6 :           || BITS_PER_UNIT != 8
    2276                 :         12 :           || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
    2277                 :          3 :         return NULL;
    2278                 :            : 
    2279                 :          6 :       stmt_vec_info def_stmt_info;
    2280                 :          6 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    2281                 :            :         return NULL;
    2282                 :            : 
    2283                 :          6 :       if (dt != vect_internal_def)
    2284                 :            :         return NULL;
    2285                 :            : 
    2286                 :          6 :       if (gimple_assign_cast_p (def_stmt))
    2287                 :            :         {
    2288                 :          6 :           def = gimple_assign_rhs1 (def_stmt);
    2289                 :         12 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    2290                 :         12 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    2291                 :            :             oprnd0 = def;
    2292                 :            :         }
    2293                 :            : 
    2294                 :          6 :       type = TREE_TYPE (lhs);
    2295                 :          6 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    2296                 :          6 :       if (vectype == NULL_TREE)
    2297                 :            :         return NULL;
    2298                 :            : 
    2299                 :          6 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    2300                 :            :         {
    2301                 :            :           /* The encoding uses one stepped pattern for each byte in the
    2302                 :            :              16-bit word.  */
    2303                 :          9 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    2304                 :         24 :           for (unsigned i = 0; i < 3; ++i)
    2305                 :         54 :             for (unsigned j = 0; j < 2; ++j)
    2306                 :         36 :               elts.quick_push ((i + 1) * 2 - j - 1);
    2307                 :            : 
    2308                 :          6 :           vec_perm_indices indices (elts, 1,
    2309                 :          9 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    2310                 :          6 :           if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
    2311                 :            :             {
    2312                 :            :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    2313                 :            :                  undo the argument promotion.  */
    2314                 :          3 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    2315                 :            :                 {
    2316                 :          0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    2317                 :          0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    2318                 :          0 :                   append_pattern_def_seq (stmt_vinfo, def_stmt);
    2319                 :          0 :                   oprnd0 = def;
    2320                 :            :                 }
    2321                 :            : 
    2322                 :            :               /* Pattern detected.  */
    2323                 :          3 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    2324                 :            : 
    2325                 :          3 :               *type_out = vectype;
    2326                 :            : 
    2327                 :            :               /* Pattern supported.  Create a stmt to be used to replace the
    2328                 :            :                  pattern, with the unpromoted argument.  */
    2329                 :          3 :               var = vect_recog_temp_ssa_var (type, NULL);
    2330                 :          3 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    2331                 :            :                                                 1, oprnd0);
    2332                 :          3 :               gimple_call_set_lhs (pattern_stmt, var);
    2333                 :          3 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    2334                 :            :                                       gimple_call_fntype (last_stmt));
    2335                 :          6 :               return pattern_stmt;
    2336                 :            :             }
    2337                 :            :         }
    2338                 :            : 
    2339                 :          3 :       oprnd1 = build_int_cst (integer_type_node, 8);
    2340                 :          3 :       rhs_code = LROTATE_EXPR;
    2341                 :          3 :       bswap16_p = true;
    2342                 :            :     }
    2343                 :            :   else
    2344                 :            :     return NULL;
    2345                 :            : 
    2346                 :        121 :   if (TREE_CODE (oprnd0) != SSA_NAME
    2347                 :        121 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
    2348                 :        121 :       || !INTEGRAL_TYPE_P (type)
    2349                 :        242 :       || !TYPE_UNSIGNED (type))
    2350                 :            :     return NULL;
    2351                 :            : 
    2352                 :        121 :   stmt_vec_info def_stmt_info;
    2353                 :        121 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    2354                 :            :     return NULL;
    2355                 :            : 
    2356                 :        121 :   if (dt != vect_internal_def
    2357                 :        117 :       && dt != vect_constant_def
    2358                 :          6 :       && dt != vect_external_def)
    2359                 :            :     return NULL;
    2360                 :            : 
    2361                 :        121 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    2362                 :        121 :   if (vectype == NULL_TREE)
    2363                 :            :     return NULL;
    2364                 :            : 
    2365                 :            :   /* If vector/vector or vector/scalar rotate is supported by the target,
    2366                 :            :      don't do anything here.  */
    2367                 :        121 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    2368                 :        121 :   if (optab1
    2369                 :        242 :       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
    2370                 :            :     {
    2371                 :          5 :      use_rotate:
    2372                 :          5 :       if (bswap16_p)
    2373                 :            :         {
    2374                 :          0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    2375                 :            :             {
    2376                 :          0 :               def = vect_recog_temp_ssa_var (type, NULL);
    2377                 :          0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    2378                 :          0 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    2379                 :          0 :               oprnd0 = def;
    2380                 :            :             }
    2381                 :            : 
    2382                 :            :           /* Pattern detected.  */
    2383                 :          0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    2384                 :            : 
    2385                 :          0 :           *type_out = vectype;
    2386                 :            : 
    2387                 :            :           /* Pattern supported.  Create a stmt to be used to replace the
    2388                 :            :              pattern.  */
    2389                 :          0 :           var = vect_recog_temp_ssa_var (type, NULL);
    2390                 :          0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    2391                 :            :                                               oprnd1);
    2392                 :          0 :           return pattern_stmt;
    2393                 :            :         }
    2394                 :            :       return NULL;
    2395                 :            :     }
    2396                 :            : 
    2397                 :        116 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    2398                 :            :     {
    2399                 :        113 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    2400                 :        113 :       if (optab2
    2401                 :        226 :           && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
    2402                 :          0 :         goto use_rotate;
    2403                 :            :     }
    2404                 :            : 
    2405                 :            :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    2406                 :            :      don't do anything here either.  */
    2407                 :        116 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
    2408                 :        116 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
    2409                 :        116 :   if (!optab1
    2410                 :        116 :       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
    2411                 :          2 :       || !optab2
    2412                 :        118 :       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
    2413                 :            :     {
    2414                 :        114 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    2415                 :            :         return NULL;
    2416                 :        113 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
    2417                 :        113 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
    2418                 :        113 :       if (!optab1
    2419                 :        113 :           || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
    2420                 :        105 :           || !optab2
    2421                 :        218 :           || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
    2422                 :          8 :         return NULL;
    2423                 :            :     }
    2424                 :            : 
    2425                 :        107 :   *type_out = vectype;
    2426                 :            : 
    2427                 :        110 :   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    2428                 :            :     {
    2429                 :          0 :       def = vect_recog_temp_ssa_var (type, NULL);
    2430                 :          0 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    2431                 :          0 :       append_pattern_def_seq (stmt_vinfo, def_stmt);
    2432                 :          0 :       oprnd0 = def;
    2433                 :            :     }
    2434                 :            : 
    2435                 :        107 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    2436                 :          4 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    2437                 :            : 
    2438                 :        107 :   def = NULL_TREE;
    2439                 :        107 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
    2440                 :        109 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    2441                 :            :     def = oprnd1;
    2442                 :          0 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    2443                 :            :     {
    2444                 :          0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    2445                 :          0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    2446                 :          0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    2447                 :          0 :              == TYPE_PRECISION (type))
    2448                 :            :         def = rhs1;
    2449                 :            :     }
    2450                 :            : 
    2451                 :        107 :   if (def == NULL_TREE)
    2452                 :            :     {
    2453                 :          0 :       def = vect_recog_temp_ssa_var (type, NULL);
    2454                 :          0 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    2455                 :          0 :       append_pattern_def_seq (stmt_vinfo, def_stmt);
    2456                 :            :     }
    2457                 :        107 :   stype = TREE_TYPE (def);
    2458                 :        107 :   scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
    2459                 :            : 
    2460                 :        107 :   if (TREE_CODE (def) == INTEGER_CST)
    2461                 :            :     {
    2462                 :        101 :       if (!tree_fits_uhwi_p (def)
    2463                 :        101 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    2464                 :        202 :           || integer_zerop (def))
    2465                 :          0 :         return NULL;
    2466                 :        101 :       def2 = build_int_cst (stype,
    2467                 :        101 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    2468                 :            :     }
    2469                 :            :   else
    2470                 :            :     {
    2471                 :          6 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    2472                 :            : 
    2473                 :          6 :       if (vecstype == NULL_TREE)
    2474                 :            :         return NULL;
    2475                 :          6 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    2476                 :          6 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    2477                 :          6 :       if (ext_def)
    2478                 :            :         {
    2479                 :          4 :           basic_block new_bb
    2480                 :          4 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    2481                 :          4 :           gcc_assert (!new_bb);
    2482                 :            :         }
    2483                 :            :       else
    2484                 :          2 :         append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
    2485                 :            : 
    2486                 :          6 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    2487                 :          6 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
    2488                 :          6 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    2489                 :            :                                       gimple_assign_lhs (def_stmt), mask);
    2490                 :          6 :       if (ext_def)
    2491                 :            :         {
    2492                 :          4 :           basic_block new_bb
    2493                 :          4 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    2494                 :          4 :           gcc_assert (!new_bb);
    2495                 :            :         }
    2496                 :            :       else
    2497                 :          2 :         append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
    2498                 :            :     }
    2499                 :            : 
    2500                 :        107 :   var1 = vect_recog_temp_ssa_var (type, NULL);
    2501                 :        207 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    2502                 :            :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    2503                 :            :                                   oprnd0, def);
    2504                 :        107 :   append_pattern_def_seq (stmt_vinfo, def_stmt);
    2505                 :            : 
    2506                 :        107 :   var2 = vect_recog_temp_ssa_var (type, NULL);
    2507                 :        207 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    2508                 :            :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    2509                 :            :                                   oprnd0, def2);
    2510                 :        107 :   append_pattern_def_seq (stmt_vinfo, def_stmt);
    2511                 :            : 
    2512                 :            :   /* Pattern detected.  */
    2513                 :        107 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    2514                 :            : 
    2515                 :            :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    2516                 :        107 :   var = vect_recog_temp_ssa_var (type, NULL);
    2517                 :        107 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    2518                 :            : 
    2519                 :        107 :   return pattern_stmt;
    2520                 :            : }
    2521                 :            : 
    2522                 :            : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    2523                 :            :    vectorized:
    2524                 :            : 
    2525                 :            :    type a_t;
    2526                 :            :    TYPE b_T, res_T;
    2527                 :            : 
    2528                 :            :    S1 a_t = ;
    2529                 :            :    S2 b_T = ;
    2530                 :            :    S3 res_T = b_T op a_t;
    2531                 :            : 
    2532                 :            :   where type 'TYPE' is a type with different size than 'type',
    2533                 :            :   and op is <<, >> or rotate.
    2534                 :            : 
    2535                 :            :   Also detect cases:
    2536                 :            : 
    2537                 :            :    type a_t;
    2538                 :            :    TYPE b_T, c_T, res_T;
    2539                 :            : 
    2540                 :            :    S0 c_T = ;
    2541                 :            :    S1 a_t = (type) c_T;
    2542                 :            :    S2 b_T = ;
    2543                 :            :    S3 res_T = b_T op a_t;
    2544                 :            : 
    2545                 :            :   Input/Output:
    2546                 :            : 
    2547                 :            :   * STMT_VINFO: The stmt from which the pattern search begins,
    2548                 :            :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    2549                 :            :     with a shift/rotate which has same type on both operands, in the
    2550                 :            :     second case just b_T op c_T, in the first case with added cast
    2551                 :            :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    2552                 :            : 
    2553                 :            :   Output:
    2554                 :            : 
    2555                 :            :   * TYPE_OUT: The type of the output of this pattern.
    2556                 :            : 
    2557                 :            :   * Return value: A new stmt that will be used to replace the shift/rotate
    2558                 :            :     S3 stmt.  */
    2559                 :            : 
    2560                 :            : static gimple *
    2561                 :    1488570 : vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
    2562                 :            :                                         tree *type_out)
    2563                 :            : {
    2564                 :    1488570 :   gimple *last_stmt = stmt_vinfo->stmt;
    2565                 :    1488570 :   tree oprnd0, oprnd1, lhs, var;
    2566                 :    1488570 :   gimple *pattern_stmt;
    2567                 :    1488570 :   enum tree_code rhs_code;
    2568                 :    1488570 :   vec_info *vinfo = stmt_vinfo->vinfo;
    2569                 :            : 
    2570                 :    1488570 :   if (!is_gimple_assign (last_stmt))
    2571                 :            :     return NULL;
    2572                 :            : 
    2573                 :    1160010 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    2574                 :    1160010 :   switch (rhs_code)
    2575                 :            :     {
    2576                 :      15271 :     case LSHIFT_EXPR:
    2577                 :      15271 :     case RSHIFT_EXPR:
    2578                 :      15271 :     case LROTATE_EXPR:
    2579                 :      15271 :     case RROTATE_EXPR:
    2580                 :      15271 :       break;
    2581                 :            :     default:
    2582                 :            :       return NULL;
    2583                 :            :     }
    2584                 :            : 
    2585                 :      15271 :   lhs = gimple_assign_lhs (last_stmt);
    2586                 :      15271 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    2587                 :      15271 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    2588                 :      15271 :   if (TREE_CODE (oprnd0) != SSA_NAME
    2589                 :      14885 :       || TREE_CODE (oprnd1) != SSA_NAME
    2590                 :       2792 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    2591                 :        238 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    2592                 :      15509 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    2593                 :        238 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    2594                 :      15033 :     return NULL;
    2595                 :            : 
    2596                 :        238 :   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
    2597                 :        187 :   if (!def_vinfo)
    2598                 :            :     return NULL;
    2599                 :            : 
    2600                 :        187 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    2601                 :        187 :   if (*type_out == NULL_TREE)
    2602                 :            :     return NULL;
    2603                 :            : 
    2604                 :        187 :   tree def = NULL_TREE;
    2605                 :        187 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    2606                 :        187 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    2607                 :            :     {
    2608                 :         98 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    2609                 :        196 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    2610                 :        177 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    2611                 :         79 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    2612                 :            :         {
    2613                 :         79 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    2614                 :         79 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    2615                 :            :             def = rhs1;
    2616                 :            :           else
    2617                 :            :             {
    2618                 :         79 :               tree mask
    2619                 :         79 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    2620                 :         79 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    2621                 :         79 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    2622                 :         79 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    2623                 :         79 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    2624                 :         79 :                                                            TREE_TYPE (rhs1));
    2625                 :         79 :               append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
    2626                 :            :             }
    2627                 :            :         }
    2628                 :            :     }
    2629                 :            : 
    2630                 :         79 :   if (def == NULL_TREE)
    2631                 :            :     {
    2632                 :        108 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    2633                 :        108 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    2634                 :        108 :       append_pattern_def_seq (stmt_vinfo, def_stmt);
    2635                 :            :     }
    2636                 :            : 
    2637                 :            :   /* Pattern detected.  */
    2638                 :        187 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    2639                 :            : 
    2640                 :            :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    2641                 :        187 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    2642                 :        187 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    2643                 :            : 
    2644                 :        187 :   return pattern_stmt;
    2645                 :            : }
    2646                 :            : 
    2647                 :            : /* Return true iff the target has a vector optab implementing the operation
    2648                 :            :    CODE on type VECTYPE.  */
    2649                 :            : 
    2650                 :            : static bool
    2651                 :      32062 : target_has_vecop_for_code (tree_code code, tree vectype)
    2652                 :            : {
    2653                 :      32062 :   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
    2654                 :      32062 :   return voptab
    2655                 :      64124 :          && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
    2656                 :            : }
    2657                 :            : 
    2658                 :            : /* Verify that the target has optabs of VECTYPE to perform all the steps
    2659                 :            :    needed by the multiplication-by-immediate synthesis algorithm described by
    2660                 :            :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    2661                 :            :    present.  Return true iff the target supports all the steps.  */
    2662                 :            : 
    2663                 :            : static bool
    2664                 :      15985 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    2665                 :            :                                  tree vectype, bool synth_shift_p)
    2666                 :            : {
    2667                 :      15985 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    2668                 :            :     return false;
    2669                 :            : 
    2670                 :      15985 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    2671                 :      15985 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    2672                 :            : 
    2673                 :      15985 :   if (var == negate_variant
    2674                 :      15985 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    2675                 :            :     return false;
    2676                 :            : 
    2677                 :            :   /* If we must synthesize shifts with additions make sure that vector
    2678                 :            :      addition is available.  */
    2679                 :      15893 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    2680                 :            :     return false;
    2681                 :            : 
    2682                 :      34304 :   for (int i = 1; i < alg->ops; i++)
    2683                 :            :     {
    2684                 :      23731 :       switch (alg->op[i])
    2685                 :            :         {
    2686                 :            :         case alg_shift:
    2687                 :            :           break;
    2688                 :       7872 :         case alg_add_t_m2:
    2689                 :       7872 :         case alg_add_t2_m:
    2690                 :       7872 :         case alg_add_factor:
    2691                 :       7872 :           if (!supports_vplus)
    2692                 :            :             return false;
    2693                 :            :           break;
    2694                 :       4340 :         case alg_sub_t_m2:
    2695                 :       4340 :         case alg_sub_t2_m:
    2696                 :       4340 :         case alg_sub_factor:
    2697                 :       4340 :           if (!supports_vminus)
    2698                 :            :             return false;
    2699                 :            :           break;
    2700                 :            :         case alg_unknown:
    2701                 :            :         case alg_m:
    2702                 :            :         case alg_zero:
    2703                 :            :         case alg_impossible:
    2704                 :            :           return false;
    2705                 :          0 :         default:
    2706                 :          0 :           gcc_unreachable ();
    2707                 :            :         }
    2708                 :            :     }
    2709                 :            : 
    2710                 :            :   return true;
    2711                 :            : }
    2712                 :            : 
    2713                 :            : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    2714                 :            :    putting the final result in DEST.  Append all statements but the last into
    2715                 :            :    VINFO.  Return the last statement.  */
    2716                 :            : 
    2717                 :            : static gimple *
    2718                 :        121 : synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
    2719                 :            :                            stmt_vec_info vinfo)
    2720                 :            : {
    2721                 :        121 :   HOST_WIDE_INT i;
    2722                 :        121 :   tree itype = TREE_TYPE (op);
    2723                 :        121 :   tree prev_res = op;
    2724                 :        121 :   gcc_assert (amnt >= 0);
    2725                 :        151 :   for (i = 0; i < amnt; i++)
    2726                 :            :     {
    2727                 :        151 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    2728                 :        151 :                       : dest;
    2729                 :        151 :       gimple *stmt
    2730                 :        151 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    2731                 :        151 :       prev_res = tmp_var;
    2732                 :        151 :       if (i < amnt - 1)
    2733                 :         30 :         append_pattern_def_seq (vinfo, stmt);
    2734                 :            :       else
    2735                 :        121 :         return stmt;
    2736                 :            :     }
    2737                 :          0 :   gcc_unreachable ();
    2738                 :            :   return NULL;
    2739                 :            : }
    2740                 :            : 
    2741                 :            : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    2742                 :            :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    2743                 :            :    the process if necessary.  Append the resulting assignment statements
    2744                 :            :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    2745                 :            :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    2746                 :            :    left shifts using additions.  */
    2747                 :            : 
    2748                 :            : static tree
    2749                 :      11201 : apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
    2750                 :            :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    2751                 :            : {
    2752                 :      11201 :   if (integer_zerop (op2)
    2753                 :      11201 :       && (code == LSHIFT_EXPR
    2754                 :       2836 :           || code == PLUS_EXPR))
    2755                 :            :     {
    2756                 :       2836 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    2757                 :            :       return op1;
    2758                 :            :     }
    2759                 :            : 
    2760                 :       8365 :   gimple *stmt;
    2761                 :       8365 :   tree itype = TREE_TYPE (op1);
    2762                 :       8365 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    2763                 :            : 
    2764                 :       8365 :   if (code == LSHIFT_EXPR
    2765                 :       8365 :       && synth_shift_p)
    2766                 :            :     {
    2767                 :         42 :       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
    2768                 :            :                                          stmt_vinfo);
    2769                 :         42 :       append_pattern_def_seq (stmt_vinfo, stmt);
    2770                 :         42 :       return tmp_var;
    2771                 :            :     }
    2772                 :            : 
    2773                 :       8323 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    2774                 :       8323 :   append_pattern_def_seq (stmt_vinfo, stmt);
    2775                 :       8323 :   return tmp_var;
    2776                 :            : }
    2777                 :            : 
    2778                 :            : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    2779                 :            :    and simple arithmetic operations to be vectorized.  Record the statements
    2780                 :            :    produced in STMT_VINFO and return the last statement in the sequence or
    2781                 :            :    NULL if it's not possible to synthesize such a multiplication.
    2782                 :            :    This function mirrors the behavior of expand_mult_const in expmed.c but
    2783                 :            :    works on tree-ssa form.  */
    2784                 :            : 
    2785                 :            : static gimple *
    2786                 :      15985 : vect_synth_mult_by_constant (tree op, tree val,
    2787                 :            :                              stmt_vec_info stmt_vinfo)
    2788                 :            : {
    2789                 :      15985 :   vec_info *vinfo = stmt_vinfo->vinfo;
    2790                 :      15985 :   tree itype = TREE_TYPE (op);
    2791                 :      15985 :   machine_mode mode = TYPE_MODE (itype);
    2792                 :      15985 :   struct algorithm alg;
    2793                 :      15985 :   mult_variant variant;
    2794                 :      15985 :   if (!tree_fits_shwi_p (val))
    2795                 :            :     return NULL;
    2796                 :            : 
    2797                 :            :   /* Multiplication synthesis by shifts, adds and subs can introduce
    2798                 :            :      signed overflow where the original operation didn't.  Perform the
    2799                 :            :      operations on an unsigned type and cast back to avoid this.
    2800                 :            :      In the future we may want to relax this for synthesis algorithms
    2801                 :            :      that we can prove do not cause unexpected overflow.  */
    2802                 :      20039 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    2803                 :            : 
    2804                 :      15985 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    2805                 :            : 
    2806                 :            :   /* Targets that don't support vector shifts but support vector additions
    2807                 :            :      can synthesize shifts that way.  */
    2808                 :      15985 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    2809                 :            : 
    2810                 :      15985 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    2811                 :            :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    2812                 :            :      The vectorizer's benefit analysis will decide whether it's beneficial
    2813                 :            :      to do this.  */
    2814                 :      15985 :   bool possible = choose_mult_variant (mode, hwval, &alg,
    2815                 :            :                                         &variant, MAX_COST);
    2816                 :      15985 :   if (!possible)
    2817                 :            :     return NULL;
    2818                 :            : 
    2819                 :      15985 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    2820                 :            : 
    2821                 :      15985 :   if (!vectype
    2822                 :      15985 :       || !target_supports_mult_synth_alg (&alg, variant,
    2823                 :            :                                            vectype, synth_shift_p))
    2824                 :       5412 :     return NULL;
    2825                 :            : 
    2826                 :      10573 :   tree accumulator;
    2827                 :            : 
    2828                 :            :   /* Clear out the sequence of statements so we can populate it below.  */
    2829                 :      10573 :   gimple *stmt = NULL;
    2830                 :            : 
    2831                 :      10573 :   if (cast_to_unsigned_p)
    2832                 :            :     {
    2833                 :       2530 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    2834                 :       2530 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    2835                 :       2530 :       append_pattern_def_seq (stmt_vinfo, stmt);
    2836                 :       2530 :       op = tmp_op;
    2837                 :            :     }
    2838                 :            : 
    2839                 :      10573 :   if (alg.op[0] == alg_zero)
    2840                 :         18 :     accumulator = build_int_cst (multtype, 0);
    2841                 :            :   else
    2842                 :            :     accumulator = op;
    2843                 :            : 
    2844                 :      10573 :   bool needs_fixup = (variant == negate_variant)
    2845                 :      10573 :                       || (variant == add_variant);
    2846                 :            : 
    2847                 :      33188 :   for (int i = 1; i < alg.ops; i++)
    2848                 :            :     {
    2849                 :      22615 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    2850                 :      22615 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    2851                 :      22615 :       tree tmp_var = NULL_TREE;
    2852                 :            : 
    2853                 :      22615 :       switch (alg.op[i])
    2854                 :            :         {
    2855                 :      11414 :         case alg_shift:
    2856                 :      11414 :           if (synth_shift_p)
    2857                 :         79 :             stmt
    2858                 :         79 :               = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
    2859                 :            :                                             stmt_vinfo);
    2860                 :            :           else
    2861                 :      11335 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    2862                 :            :                                          shft_log);
    2863                 :            :           break;
    2864                 :         20 :         case alg_add_t_m2:
    2865                 :         20 :           tmp_var
    2866                 :         20 :             = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
    2867                 :            :                                             stmt_vinfo, synth_shift_p);
    2868                 :         20 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    2869                 :            :                                        tmp_var);
    2870                 :         20 :           break;
    2871                 :       2850 :         case alg_sub_t_m2:
    2872                 :       2850 :           tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
    2873                 :            :                                                   shft_log, stmt_vinfo,
    2874                 :            :                                                   synth_shift_p);
    2875                 :            :           /* In some algorithms the first step involves zeroing the
    2876                 :            :              accumulator.  If subtracting from such an accumulator
    2877                 :            :              just emit the negation directly.  */
    2878                 :       2850 :           if (integer_zerop (accumulator))
    2879                 :         18 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    2880                 :            :           else
    2881                 :       2832 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    2882                 :            :                                         tmp_var);
    2883                 :            :           break;
    2884                 :       5184 :         case alg_add_t2_m:
    2885                 :       5184 :           tmp_var
    2886                 :       5184 :             = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
    2887                 :            :                                            stmt_vinfo, synth_shift_p);
    2888                 :       5184 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    2889                 :       5184 :           break;
    2890                 :          0 :         case alg_sub_t2_m:
    2891                 :          0 :           tmp_var
    2892                 :          0 :             = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
    2893                 :            :                                            stmt_vinfo, synth_shift_p);
    2894                 :          0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    2895                 :          0 :           break;
    2896                 :       1758 :         case alg_add_factor:
    2897                 :       1758 :           tmp_var
    2898                 :       1758 :             = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
    2899                 :            :                                             stmt_vinfo, synth_shift_p);
    2900                 :       1758 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    2901                 :            :                                        tmp_var);
    2902                 :       1758 :           break;
    2903                 :       1389 :         case alg_sub_factor:
    2904                 :       1389 :           tmp_var
    2905                 :       1389 :             = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
    2906                 :            :                                            stmt_vinfo, synth_shift_p);
    2907                 :       1389 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    2908                 :            :                                       accumulator);
    2909                 :       1389 :           break;
    2910                 :          0 :         default:
    2911                 :          0 :           gcc_unreachable ();
    2912                 :            :         }
    2913                 :            :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    2914                 :            :          but rather return it directly.  */
    2915                 :            : 
    2916                 :      22615 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    2917                 :      14576 :         append_pattern_def_seq (stmt_vinfo, stmt);
    2918                 :      22615 :       accumulator = accum_tmp;
    2919                 :            :     }
    2920                 :      10573 :   if (variant == negate_variant)
    2921                 :            :     {
    2922                 :          0 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    2923                 :          0 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    2924                 :          0 :       accumulator = accum_tmp;
    2925                 :          0 :       if (cast_to_unsigned_p)
    2926                 :          0 :         append_pattern_def_seq (stmt_vinfo, stmt);
    2927                 :            :     }
    2928                 :      10573 :   else if (variant == add_variant)
    2929                 :            :     {
    2930                 :         10 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    2931                 :         10 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    2932                 :         10 :       accumulator = accum_tmp;
    2933                 :         10 :       if (cast_to_unsigned_p)
    2934                 :          6 :         append_pattern_def_seq (stmt_vinfo, stmt);
    2935                 :            :     }
    2936                 :            :   /* Move back to a signed if needed.  */
    2937                 :      10573 :   if (cast_to_unsigned_p)
    2938                 :            :     {
    2939                 :       2530 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    2940                 :       2530 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    2941                 :            :     }
    2942                 :            : 
    2943                 :            :   return stmt;
    2944                 :            : }
    2945                 :            : 
    2946                 :            : /* Detect multiplication by constant and convert it into a sequence of
    2947                 :            :    shifts and additions, subtractions, negations.  We reuse the
    2948                 :            :    choose_mult_variant algorithms from expmed.c
    2949                 :            : 
    2950                 :            :    Input/Output:
    2951                 :            : 
    2952                 :            :    STMT_VINFO: The stmt from which the pattern search begins,
    2953                 :            :    i.e. the mult stmt.
    2954                 :            : 
    2955                 :            :  Output:
    2956                 :            : 
    2957                 :            :   * TYPE_OUT: The type of the output of this pattern.
    2958                 :            : 
    2959                 :            :   * Return value: A new stmt that will be used to replace
    2960                 :            :     the multiplication.  */
    2961                 :            : 
    2962                 :            : static gimple *
    2963                 :    1490180 : vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    2964                 :            : {
    2965                 :    1490180 :   vec_info *vinfo = stmt_vinfo->vinfo;
    2966                 :    1490180 :   gimple *last_stmt = stmt_vinfo->stmt;
    2967                 :    1490180 :   tree oprnd0, oprnd1, vectype, itype;
    2968                 :    1490180 :   gimple *pattern_stmt;
    2969                 :            : 
    2970                 :    1490180 :   if (!is_gimple_assign (last_stmt))
    2971                 :            :     return NULL;
    2972                 :            : 
    2973                 :    1161620 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    2974                 :            :     return NULL;
    2975                 :            : 
    2976                 :      92352 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    2977                 :      92352 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    2978                 :      92352 :   itype = TREE_TYPE (oprnd0);
    2979                 :            : 
    2980                 :      92352 :   if (TREE_CODE (oprnd0) != SSA_NAME
    2981                 :      92347 :       || TREE_CODE (oprnd1) != INTEGER_CST
    2982                 :      48337 :       || !INTEGRAL_TYPE_P (itype)
    2983                 :     140689 :       || !type_has_mode_precision_p (itype))
    2984                 :      44015 :     return NULL;
    2985                 :            : 
    2986                 :      48337 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    2987                 :      48337 :   if (vectype == NULL_TREE)
    2988                 :            :     return NULL;
    2989                 :            : 
    2990                 :            :   /* If the target can handle vectorized multiplication natively,
    2991                 :            :      don't attempt to optimize this.  */
    2992                 :      48337 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    2993                 :      48337 :   if (mul_optab != unknown_optab)
    2994                 :            :     {
    2995                 :      48337 :       machine_mode vec_mode = TYPE_MODE (vectype);
    2996                 :      48337 :       int icode = (int) optab_handler (mul_optab, vec_mode);
    2997                 :      48337 :       if (icode != CODE_FOR_nothing)
    2998                 :            :        return NULL;
    2999                 :            :     }
    3000                 :            : 
    3001                 :      15985 :   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
    3002                 :      15985 :   if (!pattern_stmt)
    3003                 :            :     return NULL;
    3004                 :            : 
    3005                 :            :   /* Pattern detected.  */
    3006                 :      10573 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    3007                 :            : 
    3008                 :      10573 :   *type_out = vectype;
    3009                 :            : 
    3010                 :      10573 :   return pattern_stmt;
    3011                 :            : }
    3012                 :            : 
    3013                 :            : /* Detect a signed division by a constant that wouldn't be
    3014                 :            :    otherwise vectorized:
    3015                 :            : 
    3016                 :            :    type a_t, b_t;
    3017                 :            : 
    3018                 :            :    S1 a_t = b_t / N;
    3019                 :            : 
    3020                 :            :   where type 'type' is an integral type and N is a constant.
    3021                 :            : 
    3022                 :            :   Similarly handle modulo by a constant:
    3023                 :            : 
    3024                 :            :    S4 a_t = b_t % N;
    3025                 :            : 
    3026                 :            :   Input/Output:
    3027                 :            : 
    3028                 :            :   * STMT_VINFO: The stmt from which the pattern search begins,
    3029                 :            :     i.e. the division stmt.  S1 is replaced by if N is a power
    3030                 :            :     of two constant and type is signed:
    3031                 :            :   S3  y_t = b_t < 0 ? N - 1 : 0;
    3032                 :            :   S2  x_t = b_t + y_t;
    3033                 :            :   S1' a_t = x_t >> log2 (N);
    3034                 :            : 
    3035                 :            :     S4 is replaced if N is a power of two constant and
    3036                 :            :     type is signed by (where *_T temporaries have unsigned type):
    3037                 :            :   S9  y_T = b_t < 0 ? -1U : 0U;
    3038                 :            :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    3039                 :            :   S7  z_t = (type) z_T;
    3040                 :            :   S6  w_t = b_t + z_t;
    3041                 :            :   S5  x_t = w_t & (N - 1);
    3042                 :            :   S4' a_t = x_t - z_t;
    3043                 :            : 
    3044                 :            :   Output:
    3045                 :            : 
    3046                 :            :   * TYPE_OUT: The type of the output of this pattern.
    3047                 :            : 
    3048                 :            :   * Return value: A new stmt that will be used to replace the division
    3049                 :            :     S1 or modulo S4 stmt.  */
    3050                 :            : 
    3051                 :            : static gimple *
    3052                 :    1488570 : vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    3053                 :            : {
    3054                 :    1488570 :   vec_info *vinfo = stmt_vinfo->vinfo;
    3055                 :    1488570 :   gimple *last_stmt = stmt_vinfo->stmt;
    3056                 :    1488570 :   tree oprnd0, oprnd1, vectype, itype, cond;
    3057                 :    1488570 :   gimple *pattern_stmt, *def_stmt;
    3058                 :    1488570 :   enum tree_code rhs_code;
    3059                 :    1488570 :   optab optab;
    3060                 :    1488570 :   tree q;
    3061                 :    1488570 :   int dummy_int, prec;
    3062                 :            : 
    3063                 :    1488570 :   if (!is_gimple_assign (last_stmt))
    3064                 :            :     return NULL;
    3065                 :            : 
    3066                 :    1160010 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    3067                 :    1160010 :   switch (rhs_code)
    3068                 :            :     {
    3069                 :       2958 :     case TRUNC_DIV_EXPR:
    3070                 :       2958 :     case EXACT_DIV_EXPR:
    3071                 :       2958 :     case TRUNC_MOD_EXPR:
    3072                 :       2958 :       break;
    3073                 :            :     default:
    3074                 :            :       return NULL;
    3075                 :            :     }
    3076                 :            : 
    3077                 :       2958 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    3078                 :       2958 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    3079                 :       2958 :   itype = TREE_TYPE (oprnd0);
    3080                 :       2958 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3081                 :       2935 :       || TREE_CODE (oprnd1) != INTEGER_CST
    3082                 :       1473 :       || TREE_CODE (itype) != INTEGER_TYPE
    3083                 :       4431 :       || !type_has_mode_precision_p (itype))
    3084                 :       1485 :     return NULL;
    3085                 :            : 
    3086                 :       1473 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    3087                 :       1473 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    3088                 :       1473 :   if (vectype == NULL_TREE)
    3089                 :            :     return NULL;
    3090                 :            : 
    3091                 :       1473 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    3092                 :            :     {
    3093                 :            :       /* If the target can handle vectorized division or modulo natively,
    3094                 :            :          don't attempt to optimize this, since native division is likely
    3095                 :            :          to give smaller code.  */
    3096                 :          8 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    3097                 :          8 :       if (optab != unknown_optab)
    3098                 :            :         {
    3099                 :          8 :           machine_mode vec_mode = TYPE_MODE (vectype);
    3100                 :          8 :           int icode = (int) optab_handler (optab, vec_mode);
    3101                 :          8 :           if (icode != CODE_FOR_nothing)
    3102                 :            :             return NULL;
    3103                 :            :         }
    3104                 :            :     }
    3105                 :            : 
    3106                 :       1473 :   prec = TYPE_PRECISION (itype);
    3107                 :       1473 :   if (integer_pow2p (oprnd1))
    3108                 :            :     {
    3109                 :        576 :       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
    3110                 :          4 :         return NULL;
    3111                 :            : 
    3112                 :            :       /* Pattern detected.  */
    3113                 :        572 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    3114                 :            : 
    3115                 :        572 :       *type_out = vectype;
    3116                 :            : 
    3117                 :            :       /* Check if the target supports this internal function.  */
    3118                 :        572 :       internal_fn ifn = IFN_DIV_POW2;
    3119                 :        572 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    3120                 :            :         {
    3121                 :          0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    3122                 :            : 
    3123                 :          0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    3124                 :          0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    3125                 :          0 :           gimple_call_set_lhs (div_stmt, var_div);
    3126                 :            : 
    3127                 :          0 :           if (rhs_code == TRUNC_MOD_EXPR)
    3128                 :            :             {
    3129                 :          0 :               append_pattern_def_seq (stmt_vinfo, div_stmt);
    3130                 :          0 :               def_stmt
    3131                 :          0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3132                 :            :                                        LSHIFT_EXPR, var_div, shift);
    3133                 :          0 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3134                 :          0 :               pattern_stmt
    3135                 :          0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3136                 :            :                                        MINUS_EXPR, oprnd0,
    3137                 :            :                                        gimple_assign_lhs (def_stmt));
    3138                 :            :             }
    3139                 :            :           else
    3140                 :            :             pattern_stmt = div_stmt;
    3141                 :          0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3142                 :            : 
    3143                 :          0 :           return pattern_stmt;
    3144                 :            :         }
    3145                 :            : 
    3146                 :        572 :       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
    3147                 :        572 :                      build_int_cst (itype, 0));
    3148                 :        572 :       if (rhs_code == TRUNC_DIV_EXPR
    3149                 :        572 :           || rhs_code == EXACT_DIV_EXPR)
    3150                 :            :         {
    3151                 :        496 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    3152                 :        496 :           tree shift;
    3153                 :        496 :           def_stmt
    3154                 :        496 :             = gimple_build_assign (var, COND_EXPR, cond,
    3155                 :        496 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    3156                 :            :                                                 build_int_cst (itype, 1)),
    3157                 :        496 :                                    build_int_cst (itype, 0));
    3158                 :        496 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3159                 :        496 :           var = vect_recog_temp_ssa_var (itype, NULL);
    3160                 :        496 :           def_stmt
    3161                 :        496 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    3162                 :            :                                    gimple_assign_lhs (def_stmt));
    3163                 :        496 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3164                 :            : 
    3165                 :        496 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    3166                 :        496 :           pattern_stmt
    3167                 :        496 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3168                 :            :                                    RSHIFT_EXPR, var, shift);
    3169                 :            :         }
    3170                 :            :       else
    3171                 :            :         {
    3172                 :         76 :           tree signmask;
    3173                 :         76 :           if (compare_tree_int (oprnd1, 2) == 0)
    3174                 :            :             {
    3175                 :         17 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    3176                 :         17 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    3177                 :         17 :                                               build_int_cst (itype, 1),
    3178                 :         17 :                                               build_int_cst (itype, 0));
    3179                 :         17 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3180                 :            :             }
    3181                 :            :           else
    3182                 :            :             {
    3183                 :         59 :               tree utype
    3184                 :         59 :                 = build_nonstandard_integer_type (prec, 1);
    3185                 :         59 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    3186                 :         59 :               tree shift
    3187                 :         59 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    3188                 :         59 :                                         - tree_log2 (oprnd1));
    3189                 :         59 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    3190                 :            : 
    3191                 :         59 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    3192                 :         59 :                                               build_int_cst (utype, -1),
    3193                 :         59 :                                               build_int_cst (utype, 0));
    3194                 :         59 :               append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
    3195                 :         59 :               var = vect_recog_temp_ssa_var (utype, NULL);
    3196                 :         59 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    3197                 :            :                                               gimple_assign_lhs (def_stmt),
    3198                 :            :                                               shift);
    3199                 :         59 :               append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
    3200                 :         59 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    3201                 :         59 :               def_stmt
    3202                 :         59 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    3203                 :         59 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3204                 :            :             }
    3205                 :         76 :           def_stmt
    3206                 :         76 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3207                 :            :                                    PLUS_EXPR, oprnd0, signmask);
    3208                 :         76 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3209                 :         76 :           def_stmt
    3210                 :         76 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3211                 :            :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    3212                 :         76 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    3213                 :            :                                                 build_int_cst (itype, 1)));
    3214                 :         76 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3215                 :            : 
    3216                 :         76 :           pattern_stmt
    3217                 :         76 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3218                 :            :                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
    3219                 :            :                                    signmask);
    3220                 :            :         }
    3221                 :            : 
    3222                 :        572 :       return pattern_stmt;
    3223                 :            :     }
    3224                 :            : 
    3225                 :        897 :   if (prec > HOST_BITS_PER_WIDE_INT
    3226                 :        897 :       || integer_zerop (oprnd1))
    3227                 :          0 :     return NULL;
    3228                 :            : 
    3229                 :        897 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    3230                 :            :     return NULL;
    3231                 :            : 
    3232                 :        622 :   if (TYPE_UNSIGNED (itype))
    3233                 :            :     {
    3234                 :        344 :       unsigned HOST_WIDE_INT mh, ml;
    3235                 :        344 :       int pre_shift, post_shift;
    3236                 :        344 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    3237                 :        344 :                                   & GET_MODE_MASK (itype_mode));
    3238                 :        344 :       tree t1, t2, t3, t4;
    3239                 :            : 
    3240                 :        344 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    3241                 :            :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    3242                 :          6 :         return NULL;
    3243                 :            : 
    3244                 :            :       /* Find a suitable multiplier and right shift count
    3245                 :            :          instead of multiplying with D.  */
    3246                 :        338 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
    3247                 :            : 
    3248                 :            :       /* If the suggested multiplier is more than SIZE bits, we can do better
    3249                 :            :          for even divisors, using an initial right shift.  */
    3250                 :        338 :       if (mh != 0 && (d & 1) == 0)
    3251                 :            :         {
    3252                 :          2 :           pre_shift = ctz_or_zero (d);
    3253                 :          2 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    3254                 :            :                                   &ml, &post_shift, &dummy_int);
    3255                 :          2 :           gcc_assert (!mh);
    3256                 :            :         }
    3257                 :            :       else
    3258                 :            :         pre_shift = 0;
    3259                 :            : 
    3260                 :        338 :       if (mh != 0)
    3261                 :            :         {
    3262                 :         19 :           if (post_shift - 1 >= prec)
    3263                 :            :             return NULL;
    3264                 :            : 
    3265                 :            :           /* t1 = oprnd0 h* ml;
    3266                 :            :              t2 = oprnd0 - t1;
    3267                 :            :              t3 = t2 >> 1;
    3268                 :            :              t4 = t1 + t3;
    3269                 :            :              q = t4 >> (post_shift - 1);  */
    3270                 :         19 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    3271                 :         19 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    3272                 :            :                                           build_int_cst (itype, ml));
    3273                 :         19 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3274                 :            : 
    3275                 :         19 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    3276                 :         19 :           def_stmt
    3277                 :         19 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    3278                 :         19 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3279                 :            : 
    3280                 :         19 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    3281                 :         19 :           def_stmt
    3282                 :         19 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    3283                 :         19 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3284                 :            : 
    3285                 :         19 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    3286                 :         19 :           def_stmt
    3287                 :         19 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    3288                 :            : 
    3289                 :         19 :           if (post_shift != 1)
    3290                 :            :             {
    3291                 :         19 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3292                 :            : 
    3293                 :         19 :               q = vect_recog_temp_ssa_var (itype, NULL);
    3294                 :         19 :               pattern_stmt
    3295                 :         19 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    3296                 :         19 :                                        build_int_cst (itype, post_shift - 1));
    3297                 :            :             }
    3298                 :            :           else
    3299                 :            :             {
    3300                 :            :               q = t4;
    3301                 :            :               pattern_stmt = def_stmt;
    3302                 :            :             }
    3303                 :            :         }
    3304                 :            :       else
    3305                 :            :         {
    3306                 :        319 :           if (pre_shift >= prec || post_shift >= prec)
    3307                 :            :             return NULL;
    3308                 :            : 
    3309                 :            :           /* t1 = oprnd0 >> pre_shift;
    3310                 :            :              t2 = t1 h* ml;
    3311                 :            :              q = t2 >> post_shift;  */
    3312                 :        319 :           if (pre_shift)
    3313                 :            :             {
    3314                 :          2 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    3315                 :          2 :               def_stmt
    3316                 :          2 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    3317                 :            :                                        build_int_cst (NULL, pre_shift));
    3318                 :          2 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3319                 :            :             }
    3320                 :            :           else
    3321                 :            :             t1 = oprnd0;
    3322                 :            : 
    3323                 :        319 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    3324                 :        319 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    3325                 :            :                                           build_int_cst (itype, ml));
    3326                 :            : 
    3327                 :        319 :           if (post_shift)
    3328                 :            :             {
    3329                 :        319 :               append_pattern_def_seq (stmt_vinfo, def_stmt);
    3330                 :            : 
    3331                 :        319 :               q = vect_recog_temp_ssa_var (itype, NULL);
    3332                 :        319 :               def_stmt
    3333                 :        319 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    3334                 :            :                                        build_int_cst (itype, post_shift));
    3335                 :            :             }
    3336                 :            :           else
    3337                 :            :             q = t2;
    3338                 :            : 
    3339                 :            :           pattern_stmt = def_stmt;
    3340                 :            :         }
    3341                 :            :     }
    3342                 :            :   else
    3343                 :            :     {
    3344                 :        278 :       unsigned HOST_WIDE_INT ml;
    3345                 :        278 :       int post_shift;
    3346                 :        278 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    3347                 :        278 :       unsigned HOST_WIDE_INT abs_d;
    3348                 :        278 :       bool add = false;
    3349                 :        278 :       tree t1, t2, t3, t4;
    3350                 :            : 
    3351                 :            :       /* Give up for -1.  */
    3352                 :        278 :       if (d == -1)
    3353                 :          0 :         return NULL;
    3354                 :            : 
    3355                 :            :       /* Since d might be INT_MIN, we have to cast to
    3356                 :            :          unsigned HOST_WIDE_INT before negating to avoid
    3357                 :            :          undefined signed overflow.  */
    3358                 :        556 :       abs_d = (d >= 0
    3359                 :        278 :                ? (unsigned HOST_WIDE_INT) d
    3360                 :          6 :                : - (unsigned HOST_WIDE_INT) d);
    3361                 :            : 
    3362                 :            :       /* n rem d = n rem -d */
    3363                 :        278 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    3364                 :            :         {
    3365                 :          0 :           d = abs_d;
    3366                 :          0 :           oprnd1 = build_int_cst (itype, abs_d);
    3367                 :            :         }
    3368                 :        278 :       else if (HOST_BITS_PER_WIDE_INT >= prec
    3369                 :        278 :                && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    3370                 :            :         /* This case is not handled correctly below.  */
    3371                 :            :         return NULL;
    3372                 :            : 
    3373                 :        278 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
    3374                 :        278 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    3375                 :            :         {
    3376                 :         87 :           add = true;
    3377                 :         87 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    3378                 :            :         }
    3379                 :        278 :       if (post_shift >= prec)
    3380                 :            :         return NULL;
    3381                 :            : 
    3382                 :            :       /* t1 = oprnd0 h* ml;  */
    3383                 :        278 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    3384                 :        278 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    3385                 :            :                                       build_int_cst (itype, ml));
    3386                 :            : 
    3387                 :        278 :       if (add)
    3388                 :            :         {
    3389                 :            :           /* t2 = t1 + oprnd0;  */
    3390                 :         87 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3391                 :         87 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    3392                 :         87 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    3393                 :            :         }
    3394                 :            :       else
    3395                 :            :         t2 = t1;
    3396                 :            : 
    3397                 :        278 :       if (post_shift)
    3398                 :            :         {
    3399                 :            :           /* t3 = t2 >> post_shift;  */
    3400                 :        221 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3401                 :        221 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    3402                 :        221 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    3403                 :            :                                           build_int_cst (itype, post_shift));
    3404                 :            :         }
    3405                 :            :       else
    3406                 :            :         t3 = t2;
    3407                 :            : 
    3408                 :        278 :       wide_int oprnd0_min, oprnd0_max;
    3409                 :        278 :       int msb = 1;
    3410                 :        278 :       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
    3411                 :            :         {
    3412                 :        246 :           if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
    3413                 :            :             msb = 0;
    3414                 :         16 :           else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
    3415                 :            :             msb = -1;
    3416                 :            :         }
    3417                 :            : 
    3418                 :        107 :       if (msb == 0 && d >= 0)
    3419                 :            :         {
    3420                 :            :           /* q = t3;  */
    3421                 :            :           q = t3;
    3422                 :            :           pattern_stmt = def_stmt;
    3423                 :            :         }
    3424                 :            :       else
    3425                 :            :         {
    3426                 :            :           /* t4 = oprnd0 >> (prec - 1);
    3427                 :            :              or if we know from VRP that oprnd0 >= 0
    3428                 :            :              t4 = 0;
    3429                 :            :              or if we know from VRP that oprnd0 < 0
    3430                 :            :              t4 = -1;  */
    3431                 :        171 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3432                 :        171 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    3433                 :        171 :           if (msb != 1)
    3434                 :          2 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    3435                 :            :                                             build_int_cst (itype, msb));
    3436                 :            :           else
    3437                 :        169 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    3438                 :        169 :                                             build_int_cst (itype, prec - 1));
    3439                 :        171 :           append_pattern_def_seq (stmt_vinfo, def_stmt);
    3440                 :            : 
    3441                 :            :           /* q = t3 - t4;  or q = t4 - t3;  */
    3442                 :        171 :           q = vect_recog_temp_ssa_var (itype, NULL);
    3443                 :        501 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    3444                 :            :                                               d < 0 ? t3 : t4);
    3445                 :            :         }
    3446                 :            :     }
    3447                 :            : 
    3448                 :        616 :   if (rhs_code == TRUNC_MOD_EXPR)
    3449                 :            :     {
    3450                 :        163 :       tree r, t1;
    3451                 :            : 
    3452                 :            :       /* We divided.  Now finish by:
    3453                 :            :          t1 = q * oprnd1;
    3454                 :            :          r = oprnd0 - t1;  */
    3455                 :        163 :       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
    3456                 :            : 
    3457                 :        163 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    3458                 :        163 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    3459                 :        163 :       append_pattern_def_seq (stmt_vinfo, def_stmt);
    3460                 :            : 
    3461                 :        163 :       r = vect_recog_temp_ssa_var (itype, NULL);
    3462                 :        163 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    3463                 :            :     }
    3464                 :            : 
    3465                 :            :   /* Pattern detected.  */
    3466                 :        616 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    3467                 :            : 
    3468                 :        616 :   *type_out = vectype;
    3469                 :        616 :   return pattern_stmt;
    3470                 :            : }
    3471                 :            : 
    3472                 :            : /* Function vect_recog_mixed_size_cond_pattern
    3473                 :            : 
    3474                 :            :    Try to find the following pattern:
    3475                 :            : 
    3476                 :            :      type x_t, y_t;
    3477                 :            :      TYPE a_T, b_T, c_T;
    3478                 :            :    loop:
    3479                 :            :      S1  a_T = x_t CMP y_t ? b_T : c_T;
    3480                 :            : 
    3481                 :            :    where type 'TYPE' is an integral type which has different size
    3482                 :            :    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
    3483                 :            :    than 'type', the constants need to fit into an integer type
    3484                 :            :    with the same width as 'type') or results of conversion from 'type'.
    3485                 :            : 
    3486                 :            :    Input:
    3487                 :            : 
    3488                 :            :    * STMT_VINFO: The stmt from which the pattern search begins.
    3489                 :            : 
    3490                 :            :    Output:
    3491                 :            : 
    3492                 :            :    * TYPE_OUT: The type of the output of this pattern.
    3493                 :            : 
    3494                 :            :    * Return value: A new stmt that will be used to replace the pattern.
    3495                 :            :         Additionally a def_stmt is added.
    3496                 :            : 
    3497                 :            :         a_it = x_t CMP y_t ? b_it : c_it;
    3498                 :            :         a_T = (TYPE) a_it;  */
    3499                 :            : 
    3500                 :            : static gimple *
    3501                 :    1505630 : vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    3502                 :            : {
    3503                 :    1505630 :   vec_info *vinfo = stmt_vinfo->vinfo;
    3504                 :    1505630 :   gimple *last_stmt = stmt_vinfo->stmt;
    3505                 :    1505630 :   tree cond_expr, then_clause, else_clause;
    3506                 :    1505630 :   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
    3507                 :    1505630 :   gimple *pattern_stmt, *def_stmt;
    3508                 :    1505630 :   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
    3509                 :    1505630 :   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
    3510                 :    1505630 :   bool promotion;
    3511                 :    1505630 :   tree comp_scalar_type;
    3512                 :            : 
    3513                 :    1505630 :   if (!is_gimple_assign (last_stmt)
    3514                 :    1177060 :       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
    3515                 :    1512040 :       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
    3516                 :            :     return NULL;
    3517                 :            : 
    3518                 :       4989 :   cond_expr = gimple_assign_rhs1 (last_stmt);
    3519                 :       4989 :   then_clause = gimple_assign_rhs2 (last_stmt);
    3520                 :       4989 :   else_clause = gimple_assign_rhs3 (last_stmt);
    3521                 :            : 
    3522                 :       4989 :   if (!COMPARISON_CLASS_P (cond_expr))
    3523                 :            :     return NULL;
    3524                 :            : 
    3525                 :       3563 :   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
    3526                 :       3563 :   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
    3527                 :       3563 :   if (comp_vectype == NULL_TREE)
    3528                 :            :     return NULL;
    3529                 :            : 
    3530                 :       3559 :   type = gimple_expr_type (last_stmt);
    3531                 :       3559 :   if (types_compatible_p (type, comp_scalar_type)
    3532                 :       1129 :       || ((TREE_CODE (then_clause) != INTEGER_CST
    3533                 :        486 :            || TREE_CODE (else_clause) != INTEGER_CST)
    3534                 :        645 :           && !INTEGRAL_TYPE_P (comp_scalar_type))
    3535                 :       4656 :       || !INTEGRAL_TYPE_P (type))
    3536                 :            :     return NULL;
    3537                 :            : 
    3538                 :        914 :   if ((TREE_CODE (then_clause) != INTEGER_CST
    3539                 :        428 :        && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
    3540                 :            :                               &def_stmt0, &promotion))
    3541                 :       1057 :       || (TREE_CODE (else_clause) != INTEGER_CST
    3542                 :         89 :           && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
    3543                 :            :                                  &def_stmt1, &promotion)))
    3544                 :        354 :     return NULL;
    3545                 :            : 
    3546                 :         74 :   if (orig_type0 && orig_type1
    3547                 :        578 :       && !types_compatible_p (orig_type0, orig_type1))
    3548                 :            :     return NULL;
    3549                 :            : 
    3550                 :        556 :   if (orig_type0)
    3551                 :            :     {
    3552                 :         70 :       if (!types_compatible_p (orig_type0, comp_scalar_type))
    3553                 :            :         return NULL;
    3554                 :         19 :       then_clause = gimple_assign_rhs1 (def_stmt0);
    3555                 :         19 :       itype = orig_type0;
    3556                 :            :     }
    3557                 :            : 
    3558                 :        505 :   if (orig_type1)
    3559                 :            :     {
    3560                 :         12 :       if (!types_compatible_p (orig_type1, comp_scalar_type))
    3561                 :            :         return NULL;
    3562                 :         10 :       else_clause = gimple_assign_rhs1 (def_stmt1);
    3563                 :         10 :       itype = orig_type1;
    3564                 :            :     }
    3565                 :            : 
    3566                 :            : 
    3567                 :        503 :   HOST_WIDE_INT cmp_mode_size
    3568                 :        503 :     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
    3569                 :            : 
    3570                 :        503 :   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
    3571                 :       1006 :   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
    3572                 :            :     return NULL;
    3573                 :            : 
    3574                 :        305 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3575                 :        305 :   if (vectype == NULL_TREE)
    3576                 :            :     return NULL;
    3577                 :            : 
    3578                 :        305 :   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
    3579                 :            :     return NULL;
    3580                 :            : 
    3581                 :        305 :   if (itype == NULL_TREE)
    3582                 :        294 :     itype = build_nonstandard_integer_type (cmp_mode_size,
    3583                 :        294 :                                             TYPE_UNSIGNED (type));
    3584                 :            : 
    3585                 :        305 :   if (itype == NULL_TREE
    3586                 :        915 :       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
    3587                 :          0 :     return NULL;
    3588                 :            : 
    3589                 :        305 :   vecitype = get_vectype_for_scalar_type (vinfo, itype);
    3590                 :        305 :   if (vecitype == NULL_TREE)
    3591                 :            :     return NULL;
    3592                 :            : 
    3593                 :        305 :   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
    3594                 :            :     return NULL;
    3595                 :            : 
    3596                 :        288 :   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
    3597                 :            :     {
    3598                 :         29 :       if ((TREE_CODE (then_clause) == INTEGER_CST
    3599                 :         27 :            && !int_fits_type_p (then_clause, itype))
    3600                 :         29 :           || (TREE_CODE (else_clause) == INTEGER_CST
    3601                 :         27 :               && !int_fits_type_p (else_clause, itype)))
    3602                 :            :         return NULL;
    3603                 :            :     }
    3604                 :            : 
    3605                 :        144 :   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3606                 :            :                                   COND_EXPR, unshare_expr (cond_expr),
    3607                 :            :                                   fold_convert (itype, then_clause),
    3608                 :            :                                   fold_convert (itype, else_clause));
    3609                 :        144 :   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
    3610                 :            :                                       NOP_EXPR, gimple_assign_lhs (def_stmt));
    3611                 :            : 
    3612                 :        144 :   append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
    3613                 :        144 :   *type_out = vectype;
    3614                 :            : 
    3615                 :        144 :   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
    3616                 :            : 
    3617                 :        144 :   return pattern_stmt;
    3618                 :            : }
    3619                 :            : 
    3620                 :            : 
    3621                 :            : /* Helper function of vect_recog_bool_pattern.  Called recursively, return
    3622                 :            :    true if bool VAR can and should be optimized that way.  Assume it shouldn't
    3623                 :            :    in case it's a result of a comparison which can be directly vectorized into
    3624                 :            :    a vector comparison.  Fills in STMTS with all stmts visited during the
    3625                 :            :    walk.  */
    3626                 :            : 
    3627                 :            : static bool
    3628                 :      10367 : check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
    3629                 :            : {
    3630                 :      10367 :   tree rhs1;
    3631                 :      10367 :   enum tree_code rhs_code;
    3632                 :            : 
    3633                 :      10367 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    3634                 :      10261 :   if (!def_stmt_info)
    3635                 :            :     return false;
    3636                 :            : 
    3637                 :      10261 :   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
    3638                 :      10257 :   if (!def_stmt)
    3639                 :            :     return false;
    3640                 :            : 
    3641                 :      10257 :   if (stmts.contains (def_stmt))
    3642                 :            :     return true;
    3643                 :            : 
    3644                 :      10257 :   rhs1 = gimple_assign_rhs1 (def_stmt);
    3645                 :      10257 :   rhs_code = gimple_assign_rhs_code (def_stmt);
    3646                 :      10257 :   switch (rhs_code)
    3647                 :            :     {
    3648                 :          0 :     case SSA_NAME:
    3649                 :          0 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    3650                 :            :         return false;
    3651                 :            :       break;
    3652                 :            : 
    3653                 :         76 :     CASE_CONVERT:
    3654                 :         76 :       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
    3655                 :            :         return false;
    3656                 :         46 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    3657                 :            :         return false;
    3658                 :            :       break;
    3659                 :            : 
    3660                 :        545 :     case BIT_NOT_EXPR:
    3661                 :        545 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    3662                 :            :         return false;
    3663                 :            :       break;
    3664                 :            : 
    3665                 :       2754 :     case BIT_AND_EXPR:
    3666                 :       2754 :     case BIT_IOR_EXPR:
    3667                 :       2754 :     case BIT_XOR_EXPR:
    3668                 :       2754 :       if (! check_bool_pattern (rhs1, vinfo, stmts)
    3669                 :       2754 :           || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
    3670                 :       2754 :         return false;
    3671                 :            :       break;
    3672                 :            : 
    3673                 :       6882 :     default:
    3674                 :       6882 :       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
    3675                 :            :         {
    3676                 :       6155 :           tree vecitype, comp_vectype;
    3677                 :            : 
    3678                 :            :           /* If the comparison can throw, then is_gimple_condexpr will be
    3679                 :            :              false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
    3680                 :       6155 :           if (stmt_could_throw_p (cfun, def_stmt))
    3681                 :            :             return false;
    3682                 :            : 
    3683                 :       6154 :           comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
    3684                 :       6154 :           if (comp_vectype == NULL_TREE)
    3685                 :            :             return false;
    3686                 :            : 
    3687                 :       6113 :           tree mask_type = get_mask_type_for_scalar_type (vinfo,
    3688                 :       6113 :                                                           TREE_TYPE (rhs1));
    3689                 :       6113 :           if (mask_type
    3690                 :       6113 :               && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
    3691                 :            :             return false;
    3692                 :            : 
    3693                 :       2057 :           if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
    3694                 :            :             {
    3695                 :         97 :               scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
    3696                 :         97 :               tree itype
    3697                 :         97 :                 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
    3698                 :         97 :               vecitype = get_vectype_for_scalar_type (vinfo, itype);
    3699                 :         97 :               if (vecitype == NULL_TREE)
    3700                 :      10367 :                 return false;
    3701                 :            :             }
    3702                 :            :           else
    3703                 :            :             vecitype = comp_vectype;
    3704                 :       2057 :           if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
    3705                 :            :             return false;
    3706                 :            :         }
    3707                 :            :       else
    3708                 :            :         return false;
    3709                 :            :       break;
    3710                 :            :     }
    3711                 :            : 
    3712                 :          0 :   bool res = stmts.add (def_stmt);
    3713                 :            :   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
    3714                 :          0 :   gcc_assert (!res);
    3715                 :            : 
    3716                 :            :   return true;
    3717                 :            : }
    3718                 :            : 
    3719                 :            : 
    3720                 :            : /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
    3721                 :            :    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
    3722                 :            :    pattern sequence.  */
    3723                 :            : 
    3724                 :            : static tree
    3725                 :          0 : adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
    3726                 :            : {
    3727                 :          0 :   vec_info *vinfo = stmt_info->vinfo;
    3728                 :          0 :   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
    3729                 :            :                                            NOP_EXPR, var);
    3730                 :          0 :   append_pattern_def_seq (stmt_info, cast_stmt,
    3731                 :            :                           get_vectype_for_scalar_type (vinfo, type));
    3732                 :          0 :   return gimple_assign_lhs (cast_stmt);
    3733                 :            : }
    3734                 :            : 
    3735                 :            : /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
    3736                 :            :    VAR is an SSA_NAME that should be transformed from bool to a wider integer
    3737                 :            :    type, OUT_TYPE is the desired final integer type of the whole pattern.
    3738                 :            :    STMT_INFO is the info of the pattern root and is where pattern stmts should
    3739                 :            :    be associated with.  DEFS is a map of pattern defs.  */
    3740                 :            : 
    3741                 :            : static void
    3742                 :          0 : adjust_bool_pattern (tree var, tree out_type,
    3743                 :            :                      stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
    3744                 :            : {
    3745                 :          0 :   vec_info *vinfo = stmt_info->vinfo;
    3746                 :          0 :   gimple *stmt = SSA_NAME_DEF_STMT (var);
    3747                 :          0 :   enum tree_code rhs_code, def_rhs_code;
    3748                 :          0 :   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
    3749                 :          0 :   location_t loc;
    3750                 :          0 :   gimple *pattern_stmt, *def_stmt;
    3751                 :          0 :   tree trueval = NULL_TREE;
    3752                 :            : 
    3753                 :          0 :   rhs1 = gimple_assign_rhs1 (stmt);
    3754                 :          0 :   rhs2 = gimple_assign_rhs2 (stmt);
    3755                 :          0 :   rhs_code = gimple_assign_rhs_code (stmt);
    3756                 :          0 :   loc = gimple_location (stmt);
    3757                 :          0 :   switch (rhs_code)
    3758                 :            :     {
    3759                 :          0 :     case SSA_NAME:
    3760                 :          0 :     CASE_CONVERT:
    3761                 :          0 :       irhs1 = *defs.get (rhs1);
    3762                 :          0 :       itype = TREE_TYPE (irhs1);
    3763                 :          0 :       pattern_stmt
    3764                 :          0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3765                 :            :                                SSA_NAME, irhs1);
    3766                 :          0 :       break;
    3767                 :            : 
    3768                 :          0 :     case BIT_NOT_EXPR:
    3769                 :          0 :       irhs1 = *defs.get (rhs1);
    3770                 :          0 :       itype = TREE_TYPE (irhs1);
    3771                 :          0 :       pattern_stmt
    3772                 :          0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3773                 :          0 :                                BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
    3774                 :          0 :       break;
    3775                 :            : 
    3776                 :          0 :     case BIT_AND_EXPR:
    3777                 :            :       /* Try to optimize x = y & (a < b ? 1 : 0); into
    3778                 :            :          x = (a < b ? y : 0);
    3779                 :            : 
    3780                 :            :          E.g. for:
    3781                 :            :            bool a_b, b_b, c_b;
    3782                 :            :            TYPE d_T;
    3783                 :            : 
    3784                 :            :            S1  a_b = x1 CMP1 y1;
    3785                 :            :            S2  b_b = x2 CMP2 y2;
    3786                 :            :            S3  c_b = a_b & b_b;
    3787                 :            :            S4  d_T = (TYPE) c_b;
    3788                 :            : 
    3789                 :            :          we would normally emit:
    3790                 :            : 
    3791                 :            :            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    3792                 :            :            S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    3793                 :            :            S3'  c_T = a_T & b_T;
    3794                 :            :            S4'  d_T = c_T;
    3795                 :            : 
    3796                 :            :          but we can save one stmt by using the
    3797                 :            :          result of one of the COND_EXPRs in the other COND_EXPR and leave
    3798                 :            :          BIT_AND_EXPR stmt out:
    3799                 :            : 
    3800                 :            :            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    3801                 :            :            S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    3802                 :            :            S4'  f_T = c_T;
    3803                 :            : 
    3804                 :            :          At least when VEC_COND_EXPR is implemented using masks
    3805                 :            :          cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
    3806                 :            :          computes the comparison masks and ands it, in one case with
    3807                 :            :          all ones vector, in the other case with a vector register.
    3808                 :            :          Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
    3809                 :            :          often more expensive.  */
    3810                 :          0 :       def_stmt = SSA_NAME_DEF_STMT (rhs2);
    3811                 :          0 :       def_rhs_code = gimple_assign_rhs_code (def_stmt);
    3812                 :          0 :       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
    3813                 :            :         {
    3814                 :          0 :           irhs1 = *defs.get (rhs1);
    3815                 :          0 :           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
    3816                 :          0 :           if (TYPE_PRECISION (TREE_TYPE (irhs1))
    3817                 :          0 :               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
    3818                 :            :             {
    3819                 :          0 :               rhs_code = def_rhs_code;
    3820                 :          0 :               rhs1 = def_rhs1;
    3821                 :          0 :               rhs2 = gimple_assign_rhs2 (def_stmt);
    3822                 :          0 :               trueval = irhs1;
    3823                 :          0 :               goto do_compare;
    3824                 :            :             }
    3825                 :            :           else
    3826                 :          0 :             irhs2 = *defs.get (rhs2);
    3827                 :          0 :           goto and_ior_xor;
    3828                 :            :         }
    3829                 :          0 :       def_stmt = SSA_NAME_DEF_STMT (rhs1);
    3830                 :          0 :       def_rhs_code = gimple_assign_rhs_code (def_stmt);
    3831                 :          0 :       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
    3832                 :            :         {
    3833                 :          0 :           irhs2 = *defs.get (rhs2);
    3834                 :          0 :           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
    3835                 :          0 :           if (TYPE_PRECISION (TREE_TYPE (irhs2))
    3836                 :          0 :               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
    3837                 :            :             {
    3838                 :          0 :               rhs_code = def_rhs_code;
    3839                 :          0 :               rhs1 = def_rhs1;
    3840                 :          0 :               rhs2 = gimple_assign_rhs2 (def_stmt);
    3841                 :          0 :               trueval = irhs2;
    3842                 :          0 :               goto do_compare;
    3843                 :            :             }
    3844                 :            :           else
    3845                 :          0 :             irhs1 = *defs.get (rhs1);
    3846                 :          0 :           goto and_ior_xor;
    3847                 :            :         }
    3848                 :            :       /* FALLTHRU */
    3849                 :          0 :     case BIT_IOR_EXPR:
    3850                 :          0 :     case BIT_XOR_EXPR:
    3851                 :          0 :       irhs1 = *defs.get (rhs1);
    3852                 :          0 :       irhs2 = *defs.get (rhs2);
    3853                 :          0 :     and_ior_xor:
    3854                 :          0 :       if (TYPE_PRECISION (TREE_TYPE (irhs1))
    3855                 :          0 :           != TYPE_PRECISION (TREE_TYPE (irhs2)))
    3856                 :            :         {
    3857                 :          0 :           int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
    3858                 :          0 :           int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
    3859                 :          0 :           int out_prec = TYPE_PRECISION (out_type);
    3860                 :          0 :           if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
    3861                 :          0 :             irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
    3862                 :            :                                               stmt_info);
    3863                 :          0 :           else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
    3864                 :          0 :             irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
    3865                 :            :                                               stmt_info);
    3866                 :            :           else
    3867                 :            :             {
    3868                 :          0 :               irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
    3869                 :          0 :               irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
    3870                 :            :             }
    3871                 :            :         }
    3872                 :          0 :       itype = TREE_TYPE (irhs1);
    3873                 :          0 :       pattern_stmt
    3874                 :          0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3875                 :            :                                rhs_code, irhs1, irhs2);
    3876                 :          0 :       break;
    3877                 :            : 
    3878                 :          0 :     default:
    3879                 :          0 :     do_compare:
    3880                 :          0 :       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
    3881                 :          0 :       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
    3882                 :          0 :           || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
    3883                 :          0 :           || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
    3884                 :          0 :                        GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
    3885                 :            :         {
    3886                 :          0 :           scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
    3887                 :          0 :           itype
    3888                 :          0 :             = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
    3889                 :            :         }
    3890                 :            :       else
    3891                 :          0 :         itype = TREE_TYPE (rhs1);
    3892                 :          0 :       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
    3893                 :          0 :       if (trueval == NULL_TREE)
    3894                 :          0 :         trueval = build_int_cst (itype, 1);
    3895                 :            :       else
    3896                 :          0 :         gcc_checking_assert (useless_type_conversion_p (itype,
    3897                 :            :                                                         TREE_TYPE (trueval)));
    3898                 :          0 :       pattern_stmt
    3899                 :          0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    3900                 :            :                                COND_EXPR, cond_expr, trueval,
    3901                 :          0 :                                build_int_cst (itype, 0));
    3902                 :          0 :       break;
    3903                 :            :     }
    3904                 :            : 
    3905                 :          0 :   gimple_set_location (pattern_stmt, loc);
    3906                 :          0 :   append_pattern_def_seq (stmt_info, pattern_stmt,
    3907                 :            :                           get_vectype_for_scalar_type (vinfo, itype));
    3908                 :          0 :   defs.put (var, gimple_assign_lhs (pattern_stmt));
    3909                 :          0 : }
    3910                 :            : 
    3911                 :            : /* Comparison function to qsort a vector of gimple stmts after UID.  */
    3912                 :            : 
    3913                 :            : static int
    3914                 :          0 : sort_after_uid (const void *p1, const void *p2)
    3915                 :            : {
    3916                 :          0 :   const gimple *stmt1 = *(const gimple * const *)p1;
    3917                 :          0 :   const gimple *stmt2 = *(const gimple * const *)p2;
    3918                 :          0 :   return gimple_uid (stmt1) - gimple_uid (stmt2);
    3919                 :            : }
    3920                 :            : 
    3921                 :            : /* Create pattern stmts for all stmts participating in the bool pattern
    3922                 :            :    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
    3923                 :            :    OUT_TYPE.  Return the def of the pattern root.  */
    3924                 :            : 
    3925                 :            : static tree
    3926                 :          0 : adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
    3927                 :            :                    tree out_type, stmt_vec_info stmt_info)
    3928                 :            : {
    3929                 :            :   /* Gather original stmts in the bool pattern in their order of appearance
    3930                 :            :      in the IL.  */
    3931                 :          0 :   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
    3932                 :          0 :   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
    3933                 :          0 :        i != bool_stmt_set.end (); ++i)
    3934                 :          0 :     bool_stmts.quick_push (*i);
    3935                 :          0 :   bool_stmts.qsort (sort_after_uid);
    3936                 :            : 
    3937                 :            :   /* Now process them in that order, producing pattern stmts.  */
    3938                 :          0 :   hash_map <tree, tree> defs;
    3939                 :          0 :   for (unsigned i = 0; i < bool_stmts.length (); ++i)
    3940                 :          0 :     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
    3941                 :            :                          out_type, stmt_info, defs);
    3942                 :            : 
    3943                 :            :   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
    3944                 :          0 :   gimple *pattern_stmt
    3945                 :          0 :     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    3946                 :          0 :   return gimple_assign_lhs (pattern_stmt);
    3947                 :            : }
    3948                 :            : 
    3949                 :            : /* Return the proper type for converting bool VAR into
    3950                 :            :    an integer value or NULL_TREE if no such type exists.
    3951                 :            :    The type is chosen so that the converted value has the
    3952                 :            :    same number of elements as VAR's vector type.  */
    3953                 :            : 
    3954                 :            : static tree
    3955                 :      40647 : integer_type_for_mask (tree var, vec_info *vinfo)
    3956                 :            : {
    3957                 :      40647 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    3958                 :            :     return NULL_TREE;
    3959                 :            : 
    3960                 :      19687 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    3961                 :      19005 :   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
    3962                 :            :     return NULL_TREE;
    3963                 :            : 
    3964                 :      12049 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    3965                 :            : }
    3966                 :            : 
    3967                 :            : /* Function vect_recog_bool_pattern
    3968                 :            : 
    3969                 :            :    Try to find pattern like following:
    3970                 :            : 
    3971                 :            :      bool a_b, b_b, c_b, d_b, e_b;
    3972                 :            :      TYPE f_T;
    3973                 :            :    loop:
    3974                 :            :      S1  a_b = x1 CMP1 y1;
    3975                 :            :      S2  b_b = x2 CMP2 y2;
    3976                 :            :      S3  c_b = a_b & b_b;
    3977                 :            :      S4  d_b = x3 CMP3 y3;
    3978                 :            :      S5  e_b = c_b | d_b;
    3979                 :            :      S6  f_T = (TYPE) e_b;
    3980                 :            : 
    3981                 :            :    where type 'TYPE' is an integral type.  Or a similar pattern
    3982                 :            :    ending in
    3983                 :            : 
    3984                 :            :      S6  f_Y = e_b ? r_Y : s_Y;
    3985                 :            : 
    3986                 :            :    as results from if-conversion of a complex condition.
    3987                 :            : 
    3988                 :            :    Input:
    3989                 :            : 
    3990                 :            :    * STMT_VINFO: The stmt at the end from which the pattern
    3991                 :            :                  search begins, i.e. cast of a bool to
    3992                 :            :                  an integer type.
    3993                 :            : 
    3994                 :            :    Output:
    3995                 :            : 
    3996                 :            :    * TYPE_OUT: The type of the output of this pattern.
    3997                 :            : 
    3998                 :            :    * Return value: A new stmt that will be used to replace the pattern.
    3999                 :            : 
    4000                 :            :         Assuming size of TYPE is the same as size of all comparisons
    4001                 :            :         (otherwise some casts would be added where needed), the above
    4002                 :            :         sequence we create related pattern stmts:
    4003                 :            :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    4004                 :            :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    4005                 :            :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    4006                 :            :         S5'  e_T = c_T | d_T;
    4007                 :            :         S6'  f_T = e_T;
    4008                 :            : 
    4009                 :            :         Instead of the above S3' we could emit:
    4010                 :            :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    4011                 :            :         S3'  c_T = a_T | b_T;
    4012                 :            :         but the above is more efficient.  */
    4013                 :            : 
    4014                 :            : static gimple *
    4015                 :    1505630 : vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    4016                 :            : {
    4017                 :    1505630 :   gimple *last_stmt = stmt_vinfo->stmt;
    4018                 :    1505630 :   enum tree_code rhs_code;
    4019                 :    1505630 :   tree var, lhs, rhs, vectype;
    4020                 :    1505630 :   vec_info *vinfo = stmt_vinfo->vinfo;
    4021                 :    1505630 :   gimple *pattern_stmt;
    4022                 :            : 
    4023                 :    1505630 :   if (!is_gimple_assign (last_stmt))
    4024                 :            :     return NULL;
    4025                 :            : 
    4026                 :    1177060 :   var = gimple_assign_rhs1 (last_stmt);
    4027                 :    1177060 :   lhs = gimple_assign_lhs (last_stmt);
    4028                 :            : 
    4029                 :    1177060 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    4030                 :            :     return NULL;
    4031                 :            : 
    4032                 :      25119 :   hash_set<gimple *> bool_stmts;
    4033                 :            : 
    4034                 :      25119 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4035                 :      25119 :   if (CONVERT_EXPR_CODE_P (rhs_code))
    4036                 :            :     {
    4037                 :       2938 :       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    4038                 :       2931 :           || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
    4039                 :            :         return NULL;
    4040                 :       1399 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    4041                 :       1399 :       if (vectype == NULL_TREE)
    4042                 :            :         return NULL;
    4043                 :            : 
    4044                 :       1399 :       if (check_bool_pattern (var, vinfo, bool_stmts))
    4045                 :            :         {
    4046                 :          0 :           rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
    4047                 :          0 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4048                 :          0 :           if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    4049                 :          0 :             pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    4050                 :            :           else
    4051                 :          0 :             pattern_stmt
    4052                 :          0 :               = gimple_build_assign (lhs, NOP_EXPR, rhs);
    4053                 :            :         }
    4054                 :            :       else
    4055                 :            :         {
    4056                 :       1399 :           tree type = integer_type_for_mask (var, vinfo);
    4057                 :       1399 :           tree cst0, cst1, tmp;
    4058                 :            : 
    4059                 :       1399 :           if (!type)
    4060                 :            :             return NULL;
    4061                 :            : 
    4062                 :            :           /* We may directly use cond with narrowed type to avoid
    4063                 :            :              multiple cond exprs with following result packing and
    4064                 :            :              perform single cond with packed mask instead.  In case
    4065                 :            :              of widening we better make cond first and then extract
    4066                 :            :              results.  */
    4067                 :       1514 :           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    4068                 :        581 :             type = TREE_TYPE (lhs);
    4069                 :            : 
    4070                 :        757 :           cst0 = build_int_cst (type, 0);
    4071                 :        757 :           cst1 = build_int_cst (type, 1);
    4072                 :        757 :           tmp = vect_recog_temp_ssa_var (type, NULL);
    4073                 :        757 :           pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    4074                 :            : 
    4075                 :        757 :           if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    4076                 :            :             {
    4077                 :        176 :               tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    4078                 :        176 :               append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
    4079                 :            : 
    4080                 :        176 :               lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4081                 :        176 :               pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
    4082                 :            :             }
    4083                 :            :         }
    4084                 :            : 
    4085                 :        757 :       *type_out = vectype;
    4086                 :        757 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    4087                 :            : 
    4088                 :        757 :       return pattern_stmt;
    4089                 :            :     }
    4090                 :      23647 :   else if (rhs_code == COND_EXPR
    4091                 :       6400 :            && TREE_CODE (var) == SSA_NAME)
    4092                 :            :     {
    4093                 :       1480 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    4094                 :       1480 :       if (vectype == NULL_TREE)
    4095                 :            :         return NULL;
    4096                 :            : 
    4097                 :            :       /* Build a scalar type for the boolean result that when
    4098                 :            :          vectorized matches the vector type of the result in
    4099                 :            :          size and number of elements.  */
    4100                 :       1480 :       unsigned prec
    4101                 :       1480 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    4102                 :            :                                TYPE_VECTOR_SUBPARTS (vectype));
    4103                 :            : 
    4104                 :       1480 :       tree type
    4105                 :       2960 :         = build_nonstandard_integer_type (prec,
    4106                 :       1480 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    4107                 :       1480 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    4108                 :            :         return NULL;
    4109                 :            : 
    4110                 :       1480 :       if (!check_bool_pattern (var, vinfo, bool_stmts))
    4111                 :            :         return NULL;
    4112                 :            : 
    4113                 :          0 :       rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
    4114                 :            : 
    4115                 :          0 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4116                 :          0 :       pattern_stmt 
    4117                 :          0 :           = gimple_build_assign (lhs, COND_EXPR,
    4118                 :            :                                  build2 (NE_EXPR, boolean_type_node,
    4119                 :          0 :                                          rhs, build_int_cst (type, 0)),
    4120                 :            :                                  gimple_assign_rhs2 (last_stmt),
    4121                 :            :                                  gimple_assign_rhs3 (last_stmt));
    4122                 :          0 :       *type_out = vectype;
    4123                 :          0 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    4124                 :            : 
    4125                 :          0 :       return pattern_stmt;
    4126                 :            :     }
    4127                 :      22167 :   else if (rhs_code == SSA_NAME
    4128                 :       4144 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    4129                 :            :     {
    4130                 :       4143 :       stmt_vec_info pattern_stmt_info;
    4131                 :       4143 :       tree nunits_vectype;
    4132                 :       8286 :       if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
    4133                 :       4143 :                                            &nunits_vectype)
    4134                 :       4143 :           || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    4135                 :          0 :         return NULL;
    4136                 :            : 
    4137                 :       4143 :       if (check_bool_pattern (var, vinfo, bool_stmts))
    4138                 :          0 :         rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
    4139                 :            :       else
    4140                 :            :         {
    4141                 :       4143 :           tree type = integer_type_for_mask (var, vinfo);
    4142                 :       4143 :           tree cst0, cst1, new_vectype;
    4143                 :            : 
    4144                 :       4143 :           if (!type)
    4145                 :            :             return NULL;
    4146                 :            : 
    4147                 :       2597 :           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
    4148                 :        242 :             type = TREE_TYPE (vectype);
    4149                 :            : 
    4150                 :       2597 :           cst0 = build_int_cst (type, 0);
    4151                 :       2597 :           cst1 = build_int_cst (type, 1);
    4152                 :       2597 :           new_vectype = get_vectype_for_scalar_type (vinfo, type);
    4153                 :            : 
    4154                 :       2597 :           rhs = vect_recog_temp_ssa_var (type, NULL);
    4155                 :       2597 :           pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    4156                 :       2597 :           append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
    4157                 :            :         }
    4158                 :            : 
    4159                 :       2597 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    4160                 :       2597 :       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    4161                 :            :         {
    4162                 :       2355 :           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4163                 :       2355 :           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
    4164                 :       2355 :           append_pattern_def_seq (stmt_vinfo, cast_stmt);
    4165                 :       2355 :           rhs = rhs2;
    4166                 :            :         }
    4167                 :       2597 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    4168                 :       2597 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    4169                 :       2597 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    4170                 :       2597 :       *type_out = vectype;
    4171                 :       2597 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    4172                 :            : 
    4173                 :       2597 :       return pattern_stmt;
    4174                 :            :     }
    4175                 :            :   else
    4176                 :            :     return NULL;
    4177                 :            : }
    4178                 :            : 
    4179                 :            : 
    4180                 :            : /* A helper for vect_recog_mask_conversion_pattern.  Build
    4181                 :            :    conversion of MASK to a type suitable for masking VECTYPE.
    4182                 :            :    Built statement gets required vectype and is appended to
    4183                 :            :    a pattern sequence of STMT_VINFO.
    4184                 :            : 
    4185                 :            :    Return converted mask.  */
    4186                 :            : 
    4187                 :            : static tree
    4188                 :       1869 : build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    4189                 :            : {
    4190                 :       1869 :   gimple *stmt;
    4191                 :       1869 :   tree masktype, tmp;
    4192                 :            : 
    4193                 :       1869 :   masktype = truth_type_for (vectype);
    4194                 :       1869 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    4195                 :       1869 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    4196                 :       1869 :   append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
    4197                 :            : 
    4198                 :       1869 :   return tmp;
    4199                 :            : }
    4200                 :            : 
    4201                 :            : 
    4202                 :            : /* Function vect_recog_mask_conversion_pattern
    4203                 :            : 
    4204                 :            :    Try to find statements which require boolean type
    4205                 :            :    converison.  Additional conversion statements are
    4206                 :            :    added to handle such cases.  For example:
    4207                 :            : 
    4208                 :            :    bool m_1, m_2, m_3;
    4209                 :            :    int i_4, i_5;
    4210                 :            :    double d_6, d_7;
    4211                 :            :    char c_1, c_2, c_3;
    4212                 :            : 
    4213                 :            :    S1   m_1 = i_4 > i_5;
    4214                 :            :    S2   m_2 = d_6 < d_7;
    4215                 :            :    S3   m_3 = m_1 & m_2;
    4216                 :            :    S4   c_1 = m_3 ? c_2 : c_3;
    4217                 :            : 
    4218                 :            :    Will be transformed into:
    4219                 :            : 
    4220                 :            :    S1   m_1 = i_4 > i_5;
    4221                 :            :    S2   m_2 = d_6 < d_7;
    4222                 :            :    S3'' m_2' = (_Bool[bitsize=32])m_2
    4223                 :            :    S3'  m_3' = m_1 & m_2';
    4224                 :            :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    4225                 :            :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    4226                 :            : 
    4227                 :            : static gimple *
    4228                 :    1507410 : vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
    4229                 :            : {
    4230                 :    1507410 :   gimple *last_stmt = stmt_vinfo->stmt;
    4231                 :    1507410 :   enum tree_code rhs_code;
    4232                 :    1507410 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    4233                 :    1507410 :   tree vectype1, vectype2;
    4234                 :    1507410 :   stmt_vec_info pattern_stmt_info;
    4235                 :    1507410 :   vec_info *vinfo = stmt_vinfo->vinfo;
    4236                 :            : 
    4237                 :            :   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
    4238                 :    1507410 :   if (is_gimple_call (last_stmt)
    4239                 :    1507410 :       && gimple_call_internal_p (last_stmt))
    4240                 :            :     {
    4241                 :       2715 :       gcall *pattern_stmt;
    4242                 :            : 
    4243                 :       2715 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    4244                 :       2715 :       int mask_argno = internal_fn_mask_index (ifn);
    4245                 :       2715 :       if (mask_argno < 0)
    4246                 :            :         return NULL;
    4247                 :            : 
    4248                 :        738 :       bool store_p = internal_store_fn_p (ifn);
    4249                 :        738 :       if (store_p)
    4250                 :            :         {
    4251                 :        366 :           int rhs_index = internal_fn_stored_value_index (ifn);
    4252                 :        366 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    4253                 :        366 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    4254                 :            :         }
    4255                 :            :       else
    4256                 :            :         {
    4257                 :        372 :           lhs = gimple_call_lhs (last_stmt);
    4258                 :        372 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    4259                 :            :         }
    4260                 :            : 
    4261                 :        738 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    4262                 :        738 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    4263                 :        738 :       if (!mask_arg_type)
    4264                 :            :         return NULL;
    4265                 :        717 :       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    4266                 :            : 
    4267                 :        717 :       if (!vectype1 || !vectype2
    4268                 :        717 :           || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    4269                 :            :                        TYPE_VECTOR_SUBPARTS (vectype2)))
    4270                 :        571 :         return NULL;
    4271                 :            : 
    4272                 :        146 :       tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
    4273                 :            : 
    4274                 :       2861 :       auto_vec<tree, 8> args;
    4275                 :        146 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    4276                 :        146 :       args.safe_grow (nargs);
    4277                 :        630 :       for (unsigned int i = 0; i < nargs; ++i)
    4278                 :        484 :         args[i] = ((int) i == mask_argno
    4279                 :        484 :                    ? tmp
    4280                 :        338 :                    : gimple_call_arg (last_stmt, i));
    4281                 :        146 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    4282                 :            : 
    4283                 :        146 :       if (!store_p)
    4284                 :            :         {
    4285                 :        100 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4286                 :        100 :           gimple_call_set_lhs (pattern_stmt, lhs);
    4287                 :            :         }
    4288                 :        146 :       gimple_call_set_nothrow (pattern_stmt, true);
    4289                 :            : 
    4290                 :        146 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    4291                 :        146 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    4292                 :        146 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    4293                 :            : 
    4294                 :        146 :       *type_out = vectype1;
    4295                 :        146 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    4296                 :            : 
    4297                 :        146 :       return pattern_stmt;
    4298                 :            :     }
    4299                 :            : 
    4300                 :    1504690 :   if (!is_gimple_assign (last_stmt))
    4301                 :            :     return NULL;
    4302                 :            : 
    4303                 :    1178840 :   gimple *pattern_stmt;
    4304                 :    1178840 :   lhs = gimple_assign_lhs (last_stmt);
    4305                 :    1178840 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    4306                 :    1178840 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4307                 :            : 
    4308                 :            :   /* Check for cond expression requiring mask conversion.  */
    4309                 :    1178840 :   if (rhs_code == COND_EXPR)
    4310                 :            :     {
    4311                 :       9183 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    4312                 :            : 
    4313                 :       9183 :       if (TREE_CODE (rhs1) == SSA_NAME)
    4314                 :            :         {
    4315                 :       4263 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    4316                 :       4263 :           if (!rhs1_type)
    4317                 :            :             return NULL;
    4318                 :            :         }
    4319                 :       4920 :       else if (COMPARISON_CLASS_P (rhs1))
    4320                 :            :         {
    4321                 :            :           /* Check whether we're comparing scalar booleans and (if so)
    4322                 :            :              whether a better mask type exists than the mask associated
    4323                 :            :              with boolean-sized elements.  This avoids unnecessary packs
    4324                 :            :              and unpacks if the booleans are set from comparisons of
    4325                 :            :              wider types.  E.g. in:
    4326                 :            : 
    4327                 :            :                int x1, x2, x3, x4, y1, y1;
    4328                 :            :                ...
    4329                 :            :                bool b1 = (x1 == x2);
    4330                 :            :                bool b2 = (x3 == x4);
    4331                 :            :                ... = b1 == b2 ? y1 : y2;
    4332                 :            : 
    4333                 :            :              it is better for b1 and b2 to use the mask type associated
    4334                 :            :              with int elements rather bool (byte) elements.  */
    4335                 :       4920 :           rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
    4336                 :       4920 :           if (!rhs1_type)
    4337                 :       4919 :             rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
    4338                 :            :         }
    4339                 :            :       else
    4340                 :            :         return NULL;
    4341                 :            : 
    4342                 :       8589 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    4343                 :            : 
    4344                 :       8589 :       if (!vectype1 || !vectype2)
    4345                 :            :         return NULL;
    4346                 :            : 
    4347                 :            :       /* Continue if a conversion is needed.  Also continue if we have
    4348                 :            :          a comparison whose vector type would normally be different from
    4349                 :            :          VECTYPE2 when considered in isolation.  In that case we'll
    4350                 :            :          replace the comparison with an SSA name (so that we can record
    4351                 :            :          its vector type) and behave as though the comparison was an SSA
    4352                 :            :          name from the outset.  */
    4353                 :       8583 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    4354                 :            :                     TYPE_VECTOR_SUBPARTS (vectype2))
    4355                 :       8583 :           && (TREE_CODE (rhs1) == SSA_NAME
    4356                 :       3447 :               || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
    4357                 :       6999 :         return NULL;
    4358                 :            : 
    4359                 :            :       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
    4360                 :            :          in place, we can handle it in vectorizable_condition.  This avoids
    4361                 :            :          unnecessary promotion stmts and increased vectorization factor.  */
    4362                 :       1584 :       if (COMPARISON_CLASS_P (rhs1)
    4363                 :       1468 :           && INTEGRAL_TYPE_P (rhs1_type)
    4364                 :       3052 :           && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
    4365                 :            :                        TYPE_VECTOR_SUBPARTS (vectype2)))
    4366                 :            :         {
    4367                 :        311 :           enum vect_def_type dt;
    4368                 :        311 :           if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
    4369                 :        296 :               && dt == vect_external_def
    4370                 :         35 :               && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
    4371                 :        311 :               && (dt == vect_external_def
    4372                 :         35 :                   || dt == vect_constant_def))
    4373                 :            :             {
    4374                 :         26 :               tree wide_scalar_type = build_nonstandard_integer_type
    4375                 :         26 :                 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
    4376                 :         26 :                  TYPE_UNSIGNED (rhs1_type));
    4377                 :         26 :               tree vectype3 = get_vectype_for_scalar_type (vinfo,
    4378                 :            :                                                            wide_scalar_type);
    4379                 :         26 :               if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
    4380                 :          6 :                 return NULL;
    4381                 :            :             }
    4382                 :            :         }
    4383                 :            : 
    4384                 :            :       /* If rhs1 is a comparison we need to move it into a
    4385                 :            :          separate statement.  */
    4386                 :       1578 :       if (TREE_CODE (rhs1) != SSA_NAME)
    4387                 :            :         {
    4388                 :       1462 :           tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4389                 :       1462 :           pattern_stmt = gimple_build_assign (tmp, rhs1);
    4390                 :       1462 :           rhs1 = tmp;
    4391                 :       1462 :           append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
    4392                 :            :                                   rhs1_type);
    4393                 :            :         }
    4394                 :            : 
    4395                 :       1578 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    4396                 :       3156 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    4397                 :       1577 :         tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
    4398                 :            :       else
    4399                 :            :         tmp = rhs1;
    4400                 :            : 
    4401                 :       1578 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4402                 :       1578 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    4403                 :            :                                           gimple_assign_rhs2 (last_stmt),
    4404                 :            :                                           gimple_assign_rhs3 (last_stmt));
    4405                 :            : 
    4406                 :       1578 :       *type_out = vectype1;
    4407                 :       1578 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    4408                 :            : 
    4409                 :       1578 :       return pattern_stmt;
    4410                 :            :     }
    4411                 :            : 
    4412                 :            :   /* Now check for binary boolean operations requiring conversion for
    4413                 :            :      one of operands.  */
    4414                 :    1169660 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    4415                 :            :     return NULL;
    4416                 :            : 
    4417                 :      22822 :   if (rhs_code != BIT_IOR_EXPR
    4418                 :            :       && rhs_code != BIT_XOR_EXPR
    4419                 :      22822 :       && rhs_code != BIT_AND_EXPR
    4420                 :      18661 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    4421                 :            :     return NULL;
    4422                 :            : 
    4423                 :      12592 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    4424                 :            : 
    4425                 :      12592 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    4426                 :      12592 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    4427                 :            : 
    4428                 :      12592 :   if (!rhs1_type || !rhs2_type
    4429                 :      14418 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    4430                 :            :     return NULL;
    4431                 :            : 
    4432                 :        146 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    4433                 :            :     {
    4434                 :         98 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    4435                 :         98 :       if (!vectype1)
    4436                 :            :         return NULL;
    4437                 :         98 :       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
    4438                 :            :     }
    4439                 :            :   else
    4440                 :            :     {
    4441                 :         48 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    4442                 :         48 :       if (!vectype1)
    4443                 :            :         return NULL;
    4444                 :         48 :       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
    4445                 :            :     }
    4446                 :            : 
    4447                 :        146 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    4448                 :        146 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    4449                 :            : 
    4450                 :        146 :   *type_out = vectype1;
    4451                 :        146 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    4452                 :            : 
    4453                 :        146 :   return pattern_stmt;
    4454                 :            : }
    4455                 :            : 
    4456                 :            : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    4457                 :            :    the boolean condition under which it occurs, otherwise return null.  */
    4458                 :            : 
    4459                 :            : static tree
    4460                 :        347 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    4461                 :            : {
    4462                 :        347 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    4463                 :            :     {
    4464                 :        160 :       gcc_assert (gimple_assign_single_p (def_assign));
    4465                 :            :       return NULL_TREE;
    4466                 :            :     }
    4467                 :            : 
    4468                 :        187 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    4469                 :            :     {
    4470                 :        187 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    4471                 :        187 :       int mask_index = internal_fn_mask_index (ifn);
    4472                 :        187 :       return gimple_call_arg (def_call, mask_index);
    4473                 :            :     }
    4474                 :            : 
    4475                 :          0 :   gcc_unreachable ();
    4476                 :            : }
    4477                 :            : 
    4478                 :            : /* Return MASK if MASK is suitable for masking an operation on vectors
    4479                 :            :    of type VECTYPE, otherwise convert it into such a form and return
    4480                 :            :    the result.  Associate any conversion statements with STMT_INFO's
    4481                 :            :    pattern.  */
    4482                 :            : 
    4483                 :            : static tree
    4484                 :          0 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    4485                 :            :                                stmt_vec_info stmt_info, vec_info *vinfo)
    4486                 :            : {
    4487                 :          0 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    4488                 :          0 :   if (mask_type)
    4489                 :            :     {
    4490                 :          0 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    4491                 :          0 :       if (mask_vectype
    4492                 :          0 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    4493                 :          0 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    4494                 :          0 :         mask = build_mask_conversion (mask, vectype, stmt_info);
    4495                 :            :     }
    4496                 :          0 :   return mask;
    4497                 :            : }
    4498                 :            : 
    4499                 :            : /* Return the equivalent of:
    4500                 :            : 
    4501                 :            :      fold_convert (TYPE, VALUE)
    4502                 :            : 
    4503                 :            :    with the expectation that the operation will be vectorized.
    4504                 :            :    If new statements are needed, add them as pattern statements
    4505                 :            :    to STMT_INFO.  */
    4506                 :            : 
    4507                 :            : static tree
    4508                 :          0 : vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
    4509                 :            : {
    4510                 :          0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    4511                 :            :     return value;
    4512                 :            : 
    4513                 :          0 :   vec_info *vinfo = stmt_info->vinfo;
    4514                 :          0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    4515                 :          0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    4516                 :          0 :   append_pattern_def_seq (stmt_info, conversion,
    4517                 :            :                           get_vectype_for_scalar_type (vinfo, type));
    4518                 :          0 :   return new_value;
    4519                 :            : }
    4520                 :            : 
    4521                 :            : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    4522                 :            :    internal function.  Return the final statement on success and set
    4523                 :            :    *TYPE_OUT to the vector type being loaded or stored.
    4524                 :            : 
    4525                 :            :    This function only handles gathers and scatters that were recognized
    4526                 :            :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    4527                 :            : 
    4528                 :            : static gimple *
    4529                 :    1507410 : vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
    4530                 :            : {
    4531                 :            :   /* Currently we only support this for loop vectorization.  */
    4532                 :    1507410 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
    4533                 :     620102 :   if (!loop_vinfo)
    4534                 :            :     return NULL;
    4535                 :            : 
    4536                 :            :   /* Make sure that we're looking at a gather load or scatter store.  */
    4537                 :     620102 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    4538                 :     620102 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    4539                 :            :     return NULL;
    4540                 :            : 
    4541                 :            :   /* Get the boolean that controls whether the load or store happens.
    4542                 :            :      This is null if the operation is unconditional.  */
    4543                 :        347 :   tree mask = vect_get_load_store_mask (stmt_info);
    4544                 :            : 
    4545                 :            :   /* Make sure that the target supports an appropriate internal
    4546                 :            :      function for the gather/scatter operation.  */
    4547                 :        347 :   gather_scatter_info gs_info;
    4548                 :        347 :   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
    4549                 :        347 :       || gs_info.decl)
    4550                 :            :     return NULL;
    4551                 :            : 
    4552                 :            :   /* Convert the mask to the right form.  */
    4553                 :          0 :   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
    4554                 :            :                                                  gs_info.element_type);
    4555                 :          0 :   if (mask)
    4556                 :          0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    4557                 :            :                                           loop_vinfo);
    4558                 :            : 
    4559                 :            :   /* Get the invariant base and non-invariant offset, converting the
    4560                 :            :      latter to the same width as the vector elements.  */
    4561                 :          0 :   tree base = gs_info.base;
    4562                 :          0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    4563                 :          0 :   tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
    4564                 :            :                                                 stmt_info);
    4565                 :            : 
    4566                 :            :   /* Build the new pattern statement.  */
    4567                 :          0 :   tree scale = size_int (gs_info.scale);
    4568                 :          0 :   gcall *pattern_stmt;
    4569                 :          0 :   if (DR_IS_READ (dr))
    4570                 :            :     {
    4571                 :          0 :       tree zero = build_zero_cst (gs_info.element_type);
    4572                 :          0 :       if (mask != NULL)
    4573                 :          0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
    4574                 :            :                                                    offset, scale, zero, mask);
    4575                 :            :       else
    4576                 :          0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
    4577                 :            :                                                    offset, scale, zero);
    4578                 :          0 :       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
    4579                 :          0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    4580                 :            :     }
    4581                 :            :   else
    4582                 :            :     {
    4583                 :          0 :       tree rhs = vect_get_store_rhs (stmt_info);
    4584                 :          0 :       if (mask != NULL)
    4585                 :          0 :         pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
    4586                 :            :                                                    base, offset, scale, rhs,
    4587                 :            :                                                    mask);
    4588                 :            :       else
    4589                 :          0 :         pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
    4590                 :            :                                                    base, offset, scale, rhs);
    4591                 :            :     }
    4592                 :          0 :   gimple_call_set_nothrow (pattern_stmt, true);
    4593                 :            : 
    4594                 :            :   /* Copy across relevant vectorization info and associate DR with the
    4595                 :            :      new pattern statement instead of the original statement.  */
    4596                 :          0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    4597                 :          0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    4598                 :            : 
    4599                 :          0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4600                 :          0 :   *type_out = vectype;
    4601                 :          0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    4602                 :            : 
    4603                 :          0 :   return pattern_stmt;
    4604                 :            : }
    4605                 :            : 
    4606                 :            : /* Return true if TYPE is a non-boolean integer type.  These are the types
    4607                 :            :    that we want to consider for narrowing.  */
    4608                 :            : 
    4609                 :            : static bool
    4610                 :    2763500 : vect_narrowable_type_p (tree type)
    4611                 :            : {
    4612                 :    4661790 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    4613                 :            : }
    4614                 :            : 
    4615                 :            : /* Return true if the operation given by CODE can be truncated to N bits
    4616                 :            :    when only N bits of the output are needed.  This is only true if bit N+1
    4617                 :            :    of the inputs has no effect on the low N bits of the result.  */
    4618                 :            : 
    4619                 :            : static bool
    4620                 :     974466 : vect_truncatable_operation_p (tree_code code)
    4621                 :            : {
    4622                 :          0 :   switch (code)
    4623                 :            :     {
    4624                 :            :     case PLUS_EXPR:
    4625                 :            :     case MINUS_EXPR:
    4626                 :            :     case MULT_EXPR:
    4627                 :            :     case BIT_AND_EXPR:
    4628                 :            :     case BIT_IOR_EXPR:
    4629                 :            :     case BIT_XOR_EXPR:
    4630                 :            :     case COND_EXPR:
    4631                 :            :       return true;
    4632                 :            : 
    4633                 :          0 :     default:
    4634                 :          0 :       return false;
    4635                 :            :     }
    4636                 :            : }
    4637                 :            : 
    4638                 :            : /* Record that STMT_INFO could be changed from operating on TYPE to
    4639                 :            :    operating on a type with the precision and sign given by PRECISION
    4640                 :            :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    4641                 :            :    it might not be a whole number of bytes.  */
    4642                 :            : 
    4643                 :            : static void
    4644                 :     153187 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    4645                 :            :                          unsigned int precision, signop sign)
    4646                 :            : {
    4647                 :            :   /* Round the precision up to a whole number of bytes.  */
    4648                 :     153187 :   precision = vect_element_precision (precision);
    4649                 :     153187 :   if (precision < TYPE_PRECISION (type)
    4650                 :     153187 :       && (!stmt_info->operation_precision
    4651                 :       8021 :           || stmt_info->operation_precision > precision))
    4652                 :            :     {
    4653                 :     107474 :       stmt_info->operation_precision = precision;
    4654                 :     107474 :       stmt_info->operation_sign = sign;
    4655                 :            :     }
    4656                 :     153187 : }
    4657                 :            : 
    4658                 :            : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    4659                 :            :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    4660                 :            :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    4661                 :            : 
    4662                 :            : static void
    4663                 :     570506 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    4664                 :            :                               unsigned int min_input_precision)
    4665                 :            : {
    4666                 :            :   /* This operation in isolation only requires the inputs to have
    4667                 :            :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    4668                 :            :      that MIN_INPUT_PRECISION is a natural precision for the chain
    4669                 :            :      as a whole.  E.g. consider something like:
    4670                 :            : 
    4671                 :            :          unsigned short *x, *y;
    4672                 :            :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    4673                 :            : 
    4674                 :            :      The right shift can be done on unsigned chars, and only requires the
    4675                 :            :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    4676                 :            :      approach would mean turning a natural chain of single-vector unsigned
    4677                 :            :      short operations into one that truncates "*x" and then extends
    4678                 :            :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    4679                 :            :      operation and one vector for each unsigned char operation.
    4680                 :            :      This would be a significant pessimization.
    4681                 :            : 
    4682                 :            :      Instead only propagate the maximum of this precision and the precision
    4683                 :            :      required by the users of the result.  This means that we don't pessimize
    4684                 :            :      the case above but continue to optimize things like:
    4685                 :            : 
    4686                 :            :          unsigned char *y;
    4687                 :            :          unsigned short *x;
    4688                 :            :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    4689                 :            : 
    4690                 :            :      Here we would truncate two vectors of *x to a single vector of
    4691                 :            :      unsigned chars and use single-vector unsigned char operations for
    4692                 :            :      everything else, rather than doing two unsigned short copies of
    4693                 :            :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    4694                 :     570506 :   min_input_precision = MAX (min_input_precision,
    4695                 :            :                              stmt_info->min_output_precision);
    4696                 :            : 
    4697                 :     570506 :   if (min_input_precision < TYPE_PRECISION (type)
    4698                 :     570506 :       && (!stmt_info->min_input_precision
    4699                 :       8822 :           || stmt_info->min_input_precision > min_input_precision))
    4700                 :      32408 :     stmt_info->min_input_precision = min_input_precision;
    4701                 :     570506 : }
    4702                 :            : 
    4703                 :            : /* Subroutine of vect_determine_min_output_precision.  Return true if
    4704                 :            :    we can calculate a reduced number of output bits for STMT_INFO,
    4705                 :            :    whose result is LHS.  */
    4706                 :            : 
    4707                 :            : static bool
    4708                 :     549149 : vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
    4709                 :            : {
    4710                 :            :   /* Take the maximum precision required by users of the result.  */
    4711                 :     549149 :   vec_info *vinfo = stmt_info->vinfo;
    4712                 :     549149 :   unsigned int precision = 0;
    4713                 :     549149 :   imm_use_iterator iter;
    4714                 :     549149 :   use_operand_p use;
    4715                 :     567142 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    4716                 :            :     {
    4717                 :     543956 :       gimple *use_stmt = USE_STMT (use);
    4718                 :     543956 :       if (is_gimple_debug (use_stmt))
    4719                 :       2651 :         continue;
    4720                 :     541305 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    4721                 :     541305 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    4722                 :            :         return false;
    4723                 :            :       /* The input precision recorded for COND_EXPRs applies only to the
    4724                 :            :          "then" and "else" values.  */
    4725                 :      15400 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    4726                 :      15375 :       if (assign
    4727                 :      15375 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    4728                 :         58 :           && use->use != gimple_assign_rhs2_ptr (assign)
    4729                 :         58 :           && use->use != gimple_assign_rhs3_ptr (assign))
    4730                 :            :         return false;
    4731                 :      15342 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    4732                 :            :     }
    4733                 :            : 
    4734                 :      23186 :   if (dump_enabled_p ())
    4735                 :       4053 :     dump_printf_loc (MSG_NOTE, vect_location,
    4736                 :            :                      "only the low %d bits of %T are significant\n",
    4737                 :            :                      precision, lhs);
    4738                 :      23186 :   stmt_info->min_output_precision = precision;
    4739                 :      23186 :   return true;
    4740                 :            : }
    4741                 :            : 
    4742                 :            : /* Calculate min_output_precision for STMT_INFO.  */
    4743                 :            : 
    4744                 :            : static void
    4745                 :    1476760 : vect_determine_min_output_precision (stmt_vec_info stmt_info)
    4746                 :            : {
    4747                 :            :   /* We're only interested in statements with a narrowable result.  */
    4748                 :    1476760 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    4749                 :    1476760 :   if (!lhs
    4750                 :    1152050 :       || TREE_CODE (lhs) != SSA_NAME
    4751                 :    2286300 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    4752                 :            :     return;
    4753                 :            : 
    4754                 :     549149 :   if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
    4755                 :     525963 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    4756                 :            : }
    4757                 :            : 
    4758                 :            : /* Use range information to decide whether STMT (described by STMT_INFO)
    4759                 :            :    could be done in a narrower type.  This is effectively a forward
    4760                 :            :    propagation, since it uses context-independent information that applies
    4761                 :            :    to all users of an SSA name.  */
    4762                 :            : 
    4763                 :            : static void
    4764                 :    1148240 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    4765                 :            : {
    4766                 :    1148240 :   tree lhs = gimple_assign_lhs (stmt);
    4767                 :    1148240 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    4768                 :    1011360 :     return;
    4769                 :            : 
    4770                 :     805725 :   tree type = TREE_TYPE (lhs);
    4771                 :     805725 :   if (!vect_narrowable_type_p (type))
    4772                 :            :     return;
    4773                 :            : 
    4774                 :            :   /* First see whether we have any useful range information for the result.  */
    4775                 :     546336 :   unsigned int precision = TYPE_PRECISION (type);
    4776                 :     546336 :   signop sign = TYPE_SIGN (type);
    4777                 :     546336 :   wide_int min_value, max_value;
    4778                 :     546336 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    4779                 :            :     return;
    4780                 :            : 
    4781                 :     268039 :   tree_code code = gimple_assign_rhs_code (stmt);
    4782                 :     268039 :   unsigned int nops = gimple_num_ops (stmt);
    4783                 :            : 
    4784                 :     268039 :   if (!vect_truncatable_operation_p (code))
    4785                 :            :     /* Check that all relevant input operands are compatible, and update
    4786                 :            :        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
    4787                 :      74437 :     for (unsigned int i = 1; i < nops; ++i)
    4788                 :            :       {
    4789                 :      62196 :         tree op = gimple_op (stmt, i);
    4790                 :      62196 :         if (TREE_CODE (op) == INTEGER_CST)
    4791                 :            :           {
    4792                 :            :             /* Don't require the integer to have RHS_TYPE (which it might
    4793                 :            :                not for things like shift amounts, etc.), but do require it
    4794                 :            :                to fit the type.  */
    4795                 :      11824 :             if (!int_fits_type_p (op, type))
    4796                 :            :               return;
    4797                 :            : 
    4798                 :      11824 :             min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
    4799                 :      11824 :             max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
    4800                 :            :           }
    4801                 :      50372 :         else if (TREE_CODE (op) == SSA_NAME)
    4802                 :            :           {
    4803                 :            :             /* Ignore codes that don't take uniform arguments.  */
    4804                 :      50322 :             if (!types_compatible_p (TREE_TYPE (op), type))
    4805                 :      40686 :               return;
    4806                 :            : 
    4807                 :      18010 :             wide_int op_min_value, op_max_value;
    4808                 :      18010 :             if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    4809                 :            :               return;
    4810                 :            : 
    4811                 :       9636 :             min_value = wi::min (min_value, op_min_value, sign);
    4812                 :       9636 :             max_value = wi::max (max_value, op_max_value, sign);
    4813                 :            :           }
    4814                 :            :         else
    4815                 :            :           return;
    4816                 :            :       }
    4817                 :            : 
    4818                 :            :   /* Try to switch signed types for unsigned types if we can.
    4819                 :            :      This is better for two reasons.  First, unsigned ops tend
    4820                 :            :      to be cheaper than signed ops.  Second, it means that we can
    4821                 :            :      handle things like:
    4822                 :            : 
    4823                 :            :         signed char c;
    4824                 :            :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    4825                 :            : 
    4826                 :            :      as:
    4827                 :            : 
    4828                 :            :         signed char c;
    4829                 :            :         unsigned short res_1 = (unsigned short) c & 0xff00;
    4830                 :            :         int res = (int) res_1;
    4831                 :            : 
    4832                 :            :      where the intermediate result res_1 has unsigned rather than
    4833                 :            :      signed type.  */
    4834                 :     405204 :   if (sign == SIGNED && !wi::neg_p (min_value))
    4835                 :            :     sign = UNSIGNED;
    4836                 :            : 
    4837                 :            :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    4838                 :     227303 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    4839                 :     227303 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    4840                 :     227303 :   unsigned int value_precision = MAX (precision1, precision2);
    4841                 :     227303 :   if (value_precision >= precision)
    4842                 :            :     return;
    4843                 :            : 
    4844                 :     136876 :   if (dump_enabled_p ())
    4845                 :      43566 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    4846                 :            :                      " without loss of precision: %G",
    4847                 :            :                      sign == SIGNED ? "signed" : "unsigned",
    4848                 :            :                      value_precision, stmt);
    4849                 :            : 
    4850                 :     136876 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    4851                 :     136876 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    4852                 :            : }
    4853                 :            : 
    4854                 :            : /* Use information about the users of STMT's result to decide whether
    4855                 :            :    STMT (described by STMT_INFO) could be done in a narrower type.
    4856                 :            :    This is effectively a backward propagation.  */
    4857                 :            : 
    4858                 :            : static void
    4859                 :    1148240 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    4860                 :            : {
    4861                 :    1148240 :   tree_code code = gimple_assign_rhs_code (stmt);
    4862                 :    1148240 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    4863                 :    1148240 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    4864                 :    1148240 :   if (!vect_narrowable_type_p (type))
    4865                 :     714610 :     return;
    4866                 :            : 
    4867                 :     802708 :   unsigned int precision = TYPE_PRECISION (type);
    4868                 :     802708 :   unsigned int operation_precision, min_input_precision;
    4869                 :     802708 :   switch (code)
    4870                 :            :     {
    4871                 :      82869 :     CASE_CONVERT:
    4872                 :            :       /* Only the bits that contribute to the output matter.  Don't change
    4873                 :            :          the precision of the operation itself.  */
    4874                 :      82869 :       operation_precision = precision;
    4875                 :      82869 :       min_input_precision = stmt_info->min_output_precision;
    4876                 :      82869 :       break;
    4877                 :            : 
    4878                 :      13412 :     case LSHIFT_EXPR:
    4879                 :      13412 :     case RSHIFT_EXPR:
    4880                 :      13412 :       {
    4881                 :      13412 :         tree shift = gimple_assign_rhs2 (stmt);
    4882                 :      13412 :         if (TREE_CODE (shift) != INTEGER_CST
    4883                 :      25058 :             || !wi::ltu_p (wi::to_widest (shift), precision))
    4884                 :       1766 :           return;
    4885                 :      11646 :         unsigned int const_shift = TREE_INT_CST_LOW (shift);
    4886                 :      11646 :         if (code == LSHIFT_EXPR)
    4887                 :            :           {
    4888                 :            :             /* We need CONST_SHIFT fewer bits of the input.  */
    4889                 :       4504 :             operation_precision = stmt_info->min_output_precision;
    4890                 :       4504 :             min_input_precision = (MAX (operation_precision, const_shift)
    4891                 :       4504 :                                     - const_shift);
    4892                 :            :           }
    4893                 :            :         else
    4894                 :            :           {
    4895                 :            :             /* We need CONST_SHIFT extra bits to do the operation.  */
    4896                 :       7142 :             operation_precision = (stmt_info->min_output_precision
    4897                 :            :                                    + const_shift);
    4898                 :       7142 :             min_input_precision = operation_precision;
    4899                 :            :           }
    4900                 :            :         break;
    4901                 :            :       }
    4902                 :            : 
    4903                 :     706427 :     default:
    4904                 :    1053720 :       if (vect_truncatable_operation_p (code))
    4905                 :            :         {
    4906                 :            :           /* Input bit N has no effect on output bits N-1 and lower.  */
    4907                 :     339115 :           operation_precision = stmt_info->min_output_precision;
    4908                 :     339115 :           min_input_precision = operation_precision;
    4909                 :     339115 :           break;
    4910                 :            :         }
    4911                 :            :       return;
    4912                 :            :     }
    4913                 :            : 
    4914                 :     433630 :   if (operation_precision < precision)
    4915                 :            :     {
    4916                 :      16311 :       if (dump_enabled_p ())
    4917                 :       4222 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    4918                 :            :                          " without affecting users: %G",
    4919                 :       2111 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    4920                 :            :                          operation_precision, stmt);
    4921                 :      16311 :       vect_set_operation_type (stmt_info, type, operation_precision,
    4922                 :      16311 :                                TYPE_SIGN (type));
    4923                 :            :     }
    4924                 :     433630 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    4925                 :            : }
    4926                 :            : 
    4927                 :            : /* Return true if the statement described by STMT_INFO sets a boolean
    4928                 :            :    SSA_NAME and if we know how to vectorize this kind of statement using
    4929                 :            :    vector mask types.  */
    4930                 :            : 
    4931                 :            : static bool
    4932                 :    1485940 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    4933                 :            : {
    4934                 :    1485940 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    4935                 :    1485940 :   if (!lhs
    4936                 :    1161230 :       || TREE_CODE (lhs) != SSA_NAME
    4937                 :    2304660 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    4938                 :            :     return false;
    4939                 :            : 
    4940                 :      24533 :   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
    4941                 :            :     {
    4942                 :      24215 :       tree_code rhs_code = gimple_assign_rhs_code (assign);
    4943                 :      24215 :       switch (rhs_code)
    4944                 :            :         {
    4945                 :            :         CASE_CONVERT:
    4946                 :            :         case SSA_NAME:
    4947                 :            :         case BIT_NOT_EXPR:
    4948                 :            :         case BIT_IOR_EXPR:
    4949                 :            :         case BIT_XOR_EXPR:
    4950                 :            :         case BIT_AND_EXPR:
    4951                 :            :           return true;
    4952                 :            : 
    4953                 :      16182 :         default:
    4954                 :      16182 :           return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
    4955                 :            :         }
    4956                 :            :     }
    4957                 :            :   return false;
    4958                 :            : }
    4959                 :            : 
    4960                 :            : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    4961                 :            :    a vector mask type instead of a normal vector type.  Record the
    4962                 :            :    result in STMT_INFO->mask_precision.  */
    4963                 :            : 
    4964                 :            : static void
    4965                 :    1476760 : vect_determine_mask_precision (stmt_vec_info stmt_info)
    4966                 :            : {
    4967                 :    1476760 :   vec_info *vinfo = stmt_info->vinfo;
    4968                 :            : 
    4969                 :    1476760 :   if (!possible_vector_mask_operation_p (stmt_info)
    4970                 :    1476760 :       || stmt_info->mask_precision)
    4971                 :    1470110 :     return;
    4972                 :            : 
    4973                 :      13304 :   auto_vec<stmt_vec_info, 32> worklist;
    4974                 :       6652 :   worklist.quick_push (stmt_info);
    4975                 :      25068 :   while (!worklist.is_empty ())
    4976                 :            :     {
    4977                 :      18416 :       stmt_info = worklist.last ();
    4978                 :      18416 :       unsigned int orig_length = worklist.length ();
    4979                 :            : 
    4980                 :            :       /* If at least one boolean input uses a vector mask type,
    4981                 :            :          pick the mask type with the narrowest elements.
    4982                 :            : 
    4983                 :            :          ??? This is the traditional behavior.  It should always produce
    4984                 :            :          the smallest number of operations, but isn't necessarily the
    4985                 :            :          optimal choice.  For example, if we have:
    4986                 :            : 
    4987                 :            :            a = b & c
    4988                 :            : 
    4989                 :            :          where:
    4990                 :            : 
    4991                 :            :          - the user of a wants it to have a mask type for 16-bit elements (M16)
    4992                 :            :          - b also uses M16
    4993                 :            :          - c uses a mask type for 8-bit elements (M8)
    4994                 :            : 
    4995                 :            :          then picking M8 gives:
    4996                 :            : 
    4997                 :            :          - 1 M16->M8 pack for b
    4998                 :            :          - 1 M8 AND for a
    4999                 :            :          - 2 M8->M16 unpacks for the user of a
    5000                 :            : 
    5001                 :            :          whereas picking M16 would have given:
    5002                 :            : 
    5003                 :            :          - 2 M8->M16 unpacks for c
    5004                 :            :          - 2 M16 ANDs for a
    5005                 :            : 
    5006                 :            :          The number of operations are equal, but M16 would have given
    5007                 :            :          a shorter dependency chain and allowed more ILP.  */
    5008                 :      18416 :       unsigned int precision = ~0U;
    5009                 :      18416 :       gassign *assign = as_a <gassign *> (stmt_info->stmt);
    5010                 :      18416 :       unsigned int nops = gimple_num_ops (assign);
    5011                 :      53590 :       for (unsigned int i = 1; i < nops; ++i)
    5012                 :            :         {
    5013                 :      35174 :           tree rhs = gimple_op (assign, i);
    5014                 :      35174 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    5015                 :      17078 :             continue;
    5016                 :            : 
    5017                 :      18654 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    5018                 :      18654 :           if (!def_stmt_info)
    5019                 :            :             /* Don't let external or constant operands influence the choice.
    5020                 :            :                We can convert them to whichever vector type we pick.  */
    5021                 :        558 :             continue;
    5022                 :            : 
    5023                 :      18096 :           if (def_stmt_info->mask_precision)
    5024                 :            :             {
    5025                 :       8917 :               if (precision > def_stmt_info->mask_precision)
    5026                 :            :                 precision = def_stmt_info->mask_precision;
    5027                 :            :             }
    5028                 :       9179 :           else if (possible_vector_mask_operation_p (def_stmt_info))
    5029                 :       7276 :             worklist.safe_push (def_stmt_info);
    5030                 :            :         }
    5031                 :            : 
    5032                 :            :       /* Defer the choice if we need to visit operands first.  */
    5033                 :      36832 :       if (orig_length != worklist.length ())
    5034                 :       4488 :         continue;
    5035                 :            : 
    5036                 :            :       /* If the statement compares two values that shouldn't use vector masks,
    5037                 :            :          try comparing the values as normal scalars instead.  */
    5038                 :      13928 :       tree_code rhs_code = gimple_assign_rhs_code (assign);
    5039                 :      13928 :       if (precision == ~0U
    5040                 :      10788 :           && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
    5041                 :            :         {
    5042                 :       8542 :           tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    5043                 :       8542 :           scalar_mode mode;
    5044                 :       8542 :           tree vectype, mask_type;
    5045                 :       8542 :           if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
    5046                 :       8542 :               && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
    5047                 :       8487 :               && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
    5048                 :       8487 :               && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
    5049                 :      10200 :             precision = GET_MODE_BITSIZE (mode);
    5050                 :            :         }
    5051                 :            : 
    5052                 :      13928 :       if (dump_enabled_p ())
    5053                 :            :         {
    5054                 :       1448 :           if (precision == ~0U)
    5055                 :        289 :             dump_printf_loc (MSG_NOTE, vect_location,
    5056                 :            :                              "using normal nonmask vectors for %G",
    5057                 :            :                              stmt_info->stmt);
    5058                 :            :           else
    5059                 :       1159 :             dump_printf_loc (MSG_NOTE, vect_location,
    5060                 :            :                              "using boolean precision %d for %G",
    5061                 :            :                              precision, stmt_info->stmt);
    5062                 :            :         }
    5063                 :            : 
    5064                 :      13928 :       stmt_info->mask_precision = precision;
    5065                 :      13928 :       worklist.pop ();
    5066                 :            :     }
    5067                 :            : }
    5068                 :            : 
    5069                 :            : /* Handle vect_determine_precisions for STMT_INFO, given that we
    5070                 :            :    have already done so for the users of its result.  */
    5071                 :            : 
    5072                 :            : void
    5073                 :    1476760 : vect_determine_stmt_precisions (stmt_vec_info stmt_info)
    5074                 :            : {
    5075                 :    1476760 :   vect_determine_min_output_precision (stmt_info);
    5076                 :    1476760 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    5077                 :            :     {
    5078                 :    1148240 :       vect_determine_precisions_from_range (stmt_info, stmt);
    5079                 :    1148240 :       vect_determine_precisions_from_users (stmt_info, stmt);
    5080                 :            :     }
    5081                 :    1476760 :   vect_determine_mask_precision (stmt_info);
    5082                 :    1476760 : }
    5083                 :            : 
    5084                 :            : /* Walk backwards through the vectorizable region to determine the
    5085                 :            :    values of these fields:
    5086                 :            : 
    5087                 :            :    - min_output_precision
    5088                 :            :    - min_input_precision
    5089                 :            :    - operation_precision
    5090                 :            :    - operation_sign.  */
    5091                 :            : 
    5092                 :            : void
    5093                 :      80890 : vect_determine_precisions (vec_info *vinfo)
    5094                 :            : {
    5095                 :      80890 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    5096                 :            : 
    5097                 :      80890 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    5098                 :            :     {
    5099                 :      38589 :       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    5100                 :      38589 :       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
    5101                 :      38589 :       unsigned int nbbs = loop->num_nodes;
    5102                 :            : 
    5103                 :     119517 :       for (unsigned int i = 0; i < nbbs; i++)
    5104                 :            :         {
    5105                 :      80928 :           basic_block bb = bbs[nbbs - i - 1];
    5106                 :      80928 :           for (gimple_stmt_iterator si = gsi_last_bb (bb);
    5107                 :    1317740 :                !gsi_end_p (si); gsi_prev (&si))
    5108                 :     618408 :             vect_determine_stmt_precisions
    5109                 :     618408 :               (vinfo->lookup_stmt (gsi_stmt (si)));
    5110                 :            :         }
    5111                 :            :     }
    5112                 :            :   else
    5113                 :            :     {
    5114                 :      42301 :       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
    5115                 :      42301 :       gimple_stmt_iterator si = bb_vinfo->region_end;
    5116                 :    1006690 :       gimple *stmt;
    5117                 :    1006690 :       do
    5118                 :            :         {
    5119                 :    1006690 :           if (!gsi_stmt (si))
    5120                 :      31710 :             si = gsi_last_bb (bb_vinfo->bb);
    5121                 :            :           else
    5122                 :     990834 :             gsi_prev (&si);
    5123                 :    1006690 :           stmt = gsi_stmt (si);
    5124                 :    1006690 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    5125                 :    1006690 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    5126                 :     858355 :             vect_determine_stmt_precisions (stmt_info);
    5127                 :            :         }
    5128                 :    1006690 :       while (stmt != gsi_stmt (bb_vinfo->region_begin));
    5129                 :            :     }
    5130                 :      80890 : }
    5131                 :            : 
    5132                 :            : typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
    5133                 :            : 
    5134                 :            : struct vect_recog_func
    5135                 :            : {
    5136                 :            :   vect_recog_func_ptr fn;
    5137                 :            :   const char *name;
    5138                 :            : };
    5139                 :            : 
    5140                 :            : /* Note that ordering matters - the first pattern matching on a stmt is
    5141                 :            :    taken which means usually the more complex one needs to preceed the
    5142                 :            :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    5143                 :            : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    5144                 :            :   { vect_recog_over_widening_pattern, "over_widening" },
    5145                 :            :   /* Must come after over_widening, which narrows the shift as much as
    5146                 :            :      possible beforehand.  */
    5147                 :            :   { vect_recog_average_pattern, "average" },
    5148                 :            :   { vect_recog_mulhs_pattern, "mult_high" },
    5149                 :            :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    5150                 :            :   { vect_recog_widen_mult_pattern, "widen_mult" },
    5151                 :            :   { vect_recog_dot_prod_pattern, "dot_prod" },
    5152                 :            :   { vect_recog_sad_pattern, "sad" },
    5153                 :            :   { vect_recog_widen_sum_pattern, "widen_sum" },
    5154                 :            :   { vect_recog_pow_pattern, "pow" },
    5155                 :            :   { vect_recog_widen_shift_pattern, "widen_shift" },
    5156                 :            :   { vect_recog_rotate_pattern, "rotate" },
    5157                 :            :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    5158                 :            :   { vect_recog_divmod_pattern, "divmod" },
    5159                 :            :   { vect_recog_mult_pattern, "mult" },
    5160                 :            :   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
    5161                 :            :   { vect_recog_bool_pattern, "bool" },
    5162                 :            :   /* This must come before mask conversion, and includes the parts
    5163                 :            :      of mask conversion that are needed for gather and scatter
    5164                 :            :      internal functions.  */
    5165                 :            :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    5166                 :            :   { vect_recog_mask_conversion_pattern, "mask_conversion" }
    5167                 :            : };
    5168                 :            : 
    5169                 :            : const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
    5170                 :            : 
    5171                 :            : /* Mark statements that are involved in a pattern.  */
    5172                 :            : 
    5173                 :            : static inline void
    5174                 :      47386 : vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    5175                 :            :                          tree pattern_vectype)
    5176                 :            : {
    5177                 :      47386 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    5178                 :      47386 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    5179                 :            : 
    5180                 :      47386 :   gimple *orig_pattern_stmt = NULL;
    5181                 :      47386 :   if (is_pattern_stmt_p (orig_stmt_info))
    5182                 :            :     {
    5183                 :            :       /* We're replacing a statement in an existing pattern definition
    5184                 :            :          sequence.  */
    5185                 :        992 :       orig_pattern_stmt = orig_stmt_info->stmt;
    5186                 :        992 :       if (dump_enabled_p ())
    5187                 :        351 :         dump_printf_loc (MSG_NOTE, vect_location,
    5188                 :            :                          "replacing earlier pattern %G", orig_pattern_stmt);
    5189                 :            : 
    5190                 :            :       /* To keep the book-keeping simple, just swap the lhs of the
    5191                 :            :          old and new statements, so that the old one has a valid but
    5192                 :            :          unused lhs.  */
    5193                 :        992 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    5194                 :        992 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    5195                 :        992 :       gimple_set_lhs (pattern_stmt, old_lhs);
    5196                 :            : 
    5197                 :        992 :       if (dump_enabled_p ())
    5198                 :        351 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    5199                 :            : 
    5200                 :            :       /* Switch to the statement that ORIG replaces.  */
    5201                 :        992 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    5202                 :            : 
    5203                 :            :       /* We shouldn't be replacing the main pattern statement.  */
    5204                 :        992 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    5205                 :            :                   != orig_pattern_stmt);
    5206                 :            :     }
    5207                 :            : 
    5208                 :      47386 :   if (def_seq)
    5209                 :     112650 :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    5210                 :     112650 :          !gsi_end_p (si); gsi_next (&si))
    5211                 :            :       {
    5212                 :      78531 :         if (dump_enabled_p ())
    5213                 :      11328 :           dump_printf_loc (MSG_NOTE, vect_location,
    5214                 :            :                            "extra pattern stmt: %G", gsi_stmt (si));
    5215                 :      78531 :         stmt_vec_info pattern_stmt_info
    5216                 :      78531 :           = vect_init_pattern_stmt (gsi_stmt (si),
    5217                 :            :                                     orig_stmt_info, pattern_vectype);
    5218                 :            :         /* Stmts in the def sequence are not vectorizable cycle or
    5219                 :            :            induction defs, instead they should all be vect_internal_def
    5220                 :            :            feeding the main pattern stmt which retains this def type.  */
    5221                 :      78531 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    5222                 :            :       }
    5223                 :            : 
    5224                 :      47386 :   if (orig_pattern_stmt)
    5225                 :            :     {
    5226                 :        992 :       vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
    5227                 :            : 
    5228                 :            :       /* Insert all the new pattern statements before the original one.  */
    5229                 :        992 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    5230                 :        992 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    5231                 :        992 :                                                orig_def_seq);
    5232                 :        992 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    5233                 :        992 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    5234                 :            : 
    5235                 :            :       /* Remove the pattern statement that this new pattern replaces.  */
    5236                 :        992 :       gsi_remove (&gsi, false);
    5237                 :            :     }
    5238                 :            :   else
    5239                 :      46394 :     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
    5240                 :            : 
    5241                 :            :   /* Transfer reduction path info to the pattern.  */
    5242                 :      47386 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    5243                 :            :     {
    5244                 :       1294 :       vec_info *vinfo = orig_stmt_info_saved->vinfo;
    5245                 :       2588 :       tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
    5246                 :       1294 :                                 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
    5247                 :            :       /* Search the pattern def sequence and the main pattern stmt.  Note
    5248                 :            :          we may have inserted all into a containing pattern def sequence
    5249                 :            :          so the following is a bit awkward.  */
    5250                 :       1294 :       gimple_stmt_iterator si;
    5251                 :       1294 :       gimple *s;
    5252                 :       1294 :       if (def_seq)
    5253                 :            :         {
    5254                 :       1061 :           si = gsi_start (def_seq);
    5255                 :       1061 :           s = gsi_stmt (si);
    5256                 :       1061 :           gsi_next (&si);
    5257                 :            :         }
    5258                 :            :       else
    5259                 :            :         {
    5260                 :       1294 :           si = gsi_none ();
    5261                 :       1294 :           s = pattern_stmt;
    5262                 :            :         }
    5263                 :       3372 :       do
    5264                 :            :         {
    5265                 :       3372 :           bool found = false;
    5266                 :       7986 :           for (unsigned i = 1; i < gimple_num_ops (s); ++i)
    5267                 :       6067 :             if (gimple_op (s, i) == lookfor)
    5268                 :            :               {
    5269                 :       1453 :                 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
    5270                 :       1453 :                 lookfor = gimple_get_lhs (s);
    5271                 :       1453 :                 found = true;
    5272                 :       1453 :                 break;
    5273                 :            :               }
    5274                 :       3372 :           if (s == pattern_stmt)
    5275                 :            :             {
    5276                 :       1294 :               if (!found && dump_enabled_p ())
    5277                 :          0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    5278                 :            :                                  "failed to update reduction index.\n");
    5279                 :       1294 :               break;
    5280                 :            :             }
    5281                 :       2078 :           if (gsi_end_p (si))
    5282                 :            :             s = pattern_stmt;
    5283                 :            :           else
    5284                 :            :             {
    5285                 :       1027 :               s = gsi_stmt (si);
    5286                 :       1027 :               if (s == pattern_stmt)
    5287                 :            :                 /* Found the end inside a bigger pattern def seq.  */
    5288                 :       2078 :                 si = gsi_none ();
    5289                 :            :               else
    5290                 :       1017 :                 gsi_next (&si);
    5291                 :            :             }
    5292                 :            :         } while (1);
    5293                 :            :     }
    5294                 :      47386 : }
    5295                 :            : 
    5296                 :            : /* Function vect_pattern_recog_1
    5297                 :            : 
    5298                 :            :    Input:
    5299                 :            :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    5300                 :            :         computation pattern.
    5301                 :            :    STMT_INFO: A stmt from which the pattern search should start.
    5302                 :            : 
    5303                 :            :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    5304                 :            :    a sequence of statements that has the same functionality and can be
    5305                 :            :    used to replace STMT_INFO.  It returns the last statement in the sequence
    5306                 :            :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    5307                 :            :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    5308                 :            :    statement, having first checked that the target supports the new operation
    5309                 :            :    in that type.
    5310                 :            : 
    5311                 :            :    This function also does some bookkeeping, as explained in the documentation
    5312                 :            :    for vect_recog_pattern.  */
    5313                 :            : 
    5314                 :            : static void
    5315                 :   27387500 : vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
    5316                 :            : {
    5317                 :   27387500 :   vec_info *vinfo = stmt_info->vinfo;
    5318                 :   27387500 :   gimple *pattern_stmt;
    5319                 :   27387500 :   loop_vec_info loop_vinfo;
    5320                 :   27387500 :   tree pattern_vectype;
    5321                 :            : 
    5322                 :            :   /* If this statement has already been replaced with pattern statements,
    5323                 :            :      leave the original statement alone, since the first match wins.
    5324                 :            :      Instead try to match against the definition statements that feed
    5325                 :            :      the main pattern statement.  */
    5326                 :   27387500 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    5327                 :            :     {
    5328                 :     513914 :       gimple_stmt_iterator gsi;
    5329                 :     513914 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    5330                 :    1319660 :            !gsi_end_p (gsi); gsi_next (&gsi))
    5331                 :     805743 :         vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
    5332                 :     513914 :       return;
    5333                 :            :     }
    5334                 :            : 
    5335                 :   26873600 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    5336                 :   26873600 :   pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
    5337                 :   26873600 :   if (!pattern_stmt)
    5338                 :            :     {
    5339                 :            :       /* Clear any half-formed pattern definition sequence.  */
    5340                 :   26826200 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    5341                 :   26826200 :       return;
    5342                 :            :     }
    5343                 :            : 
    5344                 :      47386 :   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    5345                 :      47386 :   gcc_assert (pattern_vectype);
    5346                 :            :  
    5347                 :            :   /* Found a vectorizable pattern.  */
    5348                 :      47386 :   if (dump_enabled_p ())
    5349                 :       8858 :     dump_printf_loc (MSG_NOTE, vect_location,
    5350                 :            :                      "%s pattern recognized: %G",
    5351                 :            :                      recog_func->name, pattern_stmt);
    5352                 :            : 
    5353                 :            :   /* Mark the stmts that are involved in the pattern. */
    5354                 :      47386 :   vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
    5355                 :            : 
    5356                 :            :   /* Patterns cannot be vectorized using SLP, because they change the order of
    5357                 :            :      computation.  */
    5358                 :      47386 :   if (loop_vinfo)
    5359                 :            :     {
    5360                 :      19057 :       unsigned ix, ix2;
    5361                 :      19057 :       stmt_vec_info *elem_ptr;
    5362                 :      59665 :       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
    5363                 :            :                              elem_ptr, *elem_ptr == stmt_info);
    5364                 :            :     }
    5365                 :            : }
    5366                 :            : 
    5367                 :            : 
    5368                 :            : /* Function vect_pattern_recog
    5369                 :            : 
    5370                 :            :    Input:
    5371                 :            :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    5372                 :            :         computation idioms.
    5373                 :            : 
    5374                 :            :    Output - for each computation idiom that is detected we create a new stmt
    5375                 :            :         that provides the same functionality and that can be vectorized.  We
    5376                 :            :         also record some information in the struct_stmt_info of the relevant
    5377                 :            :         stmts, as explained below:
    5378                 :            : 
    5379                 :            :    At the entry to this function we have the following stmts, with the
    5380                 :            :    following initial value in the STMT_VINFO fields:
    5381                 :            : 
    5382                 :            :          stmt                     in_pattern_p  related_stmt    vec_stmt
    5383                 :            :          S1: a_i = ....                 -       -               -
    5384                 :            :          S2: a_2 = ..use(a_i)..         -       -               -
    5385                 :            :          S3: a_1 = ..use(a_2)..         -       -               -
    5386                 :            :          S4: a_0 = ..use(a_1)..         -       -               -
    5387                 :            :          S5: ... = ..use(a_0)..         -       -               -
    5388                 :            : 
    5389                 :            :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    5390                 :            :    represented by a single stmt.  We then:
    5391                 :            :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    5392                 :            :      inserted into the code)
    5393                 :            :    - fill in the STMT_VINFO fields as follows:
    5394                 :            : 
    5395                 :            :                                   in_pattern_p  related_stmt    vec_stmt
    5396                 :            :          S1: a_i = ....                 -       -               -
    5397                 :            :          S2: a_2 = ..use(a_i)..         -       -               -
    5398                 :            :          S3: a_1 = ..use(a_2)..         -       -               -
    5399                 :            :          S4: a_0 = ..use(a_1)..         true    S6              -
    5400                 :            :           '---> S6: a_new = ....        -       S4              -
    5401                 :            :          S5: ... = ..use(a_0)..         -       -               -
    5402                 :            : 
    5403                 :            :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    5404                 :            :    to each other through the RELATED_STMT field).
    5405                 :            : 
    5406                 :            :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    5407                 :            :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    5408                 :            :    remain irrelevant unless used by stmts other than S4.
    5409                 :            : 
    5410                 :            :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    5411                 :            :    (because they are marked as irrelevant).  It will vectorize S6, and record
    5412                 :            :    a pointer to the new vector stmt VS6 from S6 (as usual).
    5413                 :            :    S4 will be skipped, and S5 will be vectorized as usual:
    5414                 :            : 
    5415                 :            :                                   in_pattern_p  related_stmt    vec_stmt
    5416                 :            :          S1: a_i = ....                 -       -               -
    5417                 :            :          S2: a_2 = ..use(a_i)..         -       -               -
    5418                 :            :          S3: a_1 = ..use(a_2)..         -       -               -
    5419                 :            :        > VS6: va_new = ....             -       -               -
    5420                 :            :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    5421                 :            :           '---> S6: a_new = ....        -       S4              VS6
    5422                 :            :        > VS5: ... = ..vuse(va_new)..    -       -               -
    5423                 :            :          S5: ... = ..use(a_0)..         -       -               -
    5424                 :            : 
    5425                 :            :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    5426                 :            :    elsewhere), and we'll end up with:
    5427                 :            : 
    5428                 :            :         VS6: va_new = ....
    5429                 :            :         VS5: ... = ..vuse(va_new)..
    5430                 :            : 
    5431                 :            :    In case of more than one pattern statements, e.g., widen-mult with
    5432                 :            :    intermediate type:
    5433                 :            : 
    5434                 :            :      S1  a_t = ;
    5435                 :            :      S2  a_T = (TYPE) a_t;
    5436                 :            :            '--> S3: a_it = (interm_type) a_t;
    5437                 :            :      S4  prod_T = a_T * CONST;
    5438                 :            :            '--> S5: prod_T' = a_it w* CONST;
    5439                 :            : 
    5440                 :            :    there may be other users of a_T outside the pattern.  In that case S2 will
    5441                 :            :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    5442                 :            :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    5443                 :            :    be recorded in S3.  */
    5444                 :            : 
    5445                 :            : void
    5446                 :      80890 : vect_pattern_recog (vec_info *vinfo)
    5447                 :            : {
    5448                 :      80890 :   class loop *loop;
    5449                 :      80890 :   basic_block *bbs;
    5450                 :      80890 :   unsigned int nbbs;
    5451                 :      80890 :   gimple_stmt_iterator si;
    5452                 :      80890 :   unsigned int i, j;
    5453                 :            : 
    5454                 :      80890 :   vect_determine_precisions (vinfo);
    5455                 :            : 
    5456                 :      80890 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    5457                 :            : 
    5458                 :      80890 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    5459                 :            :     {
    5460                 :      38589 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    5461                 :      38589 :       bbs = LOOP_VINFO_BBS (loop_vinfo);
    5462                 :      38589 :       nbbs = loop->num_nodes;
    5463                 :            : 
    5464                 :            :       /* Scan through the loop stmts, applying the pattern recognition
    5465                 :            :          functions starting at each stmt visited:  */
    5466                 :     119517 :       for (i = 0; i < nbbs; i++)
    5467                 :            :         {
    5468                 :      80928 :           basic_block bb = bbs[i];
    5469                 :     780264 :           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    5470                 :            :             {
    5471                 :     618408 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    5472                 :            :               /* Scan over all generic vect_recog_xxx_pattern functions.  */
    5473                 :   11749800 :               for (j = 0; j < NUM_PATTERNS; j++)
    5474                 :   11131300 :                 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
    5475                 :            :                                       stmt_info);
    5476                 :            :             }
    5477                 :            :         }
    5478                 :            :     }
    5479                 :            :   else
    5480                 :            :     {
    5481                 :      42301 :       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
    5482                 :      42301 :       for (si = bb_vinfo->region_begin;
    5483                 :    1048990 :            gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
    5484                 :            :         {
    5485                 :    1006690 :           gimple *stmt = gsi_stmt (si);
    5486                 :    1006690 :           stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
    5487                 :    1006690 :           if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
    5488                 :     148334 :             continue;
    5489                 :            : 
    5490                 :            :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    5491                 :   16308700 :           for (j = 0; j < NUM_PATTERNS; j++)
    5492                 :   15450400 :             vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
    5493                 :            :         }
    5494                 :            :     }
    5495                 :      80890 : }

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.