Bug Summary

File:build/gcc/tree-vect-loop.c
Warning:line 8125, column 8
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name tree-vect-loop.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model static -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib64/clang/11.0.0 -D IN_GCC -D HAVE_CONFIG_H -I . -I . -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/. -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../include -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcpp/include -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libcody -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libdecnumber/bid -I ../libdecnumber -I /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/../libbacktrace -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/10/../../../../include/c++/10 -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/10/../../../../include/c++/10/x86_64-suse-linux -internal-isystem /usr/bin/../lib64/gcc/x86_64-suse-linux/10/../../../../include/c++/10/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib64/clang/11.0.0/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-narrowing -Wwrite-strings -Wno-error=format-diag -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fdeprecated-macro -fdebug-compilation-dir /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/gcc -ferror-limit 19 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=plist-html -analyzer-config silence-checkers=core.NullDereference -faddrsig -o /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/objdir/clang-static-analyzer/2021-01-16-135054-17580-1/report-ewUEHF.plist -x c++ /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c

/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c

1/* Loop Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "cfghooks.h"
31#include "tree-pass.h"
32#include "ssa.h"
33#include "optabs-tree.h"
34#include "diagnostic-core.h"
35#include "fold-const.h"
36#include "stor-layout.h"
37#include "cfganal.h"
38#include "gimplify.h"
39#include "gimple-iterator.h"
40#include "gimplify-me.h"
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
44#include "tree-ssa-loop.h"
45#include "cfgloop.h"
46#include "tree-scalar-evolution.h"
47#include "tree-vectorizer.h"
48#include "gimple-fold.h"
49#include "cgraph.h"
50#include "tree-cfg.h"
51#include "tree-if-conv.h"
52#include "internal-fn.h"
53#include "tree-vector-builder.h"
54#include "vec-perm-indices.h"
55#include "tree-eh.h"
56
57/* Loop Vectorization Pass.
58
59 This pass tries to vectorize loops.
60
61 For example, the vectorizer transforms the following simple loop:
62
63 short a[N]; short b[N]; short c[N]; int i;
64
65 for (i=0; i<N; i++){
66 a[i] = b[i] + c[i];
67 }
68
69 as if it was manually vectorized by rewriting the source code into:
70
71 typedef int __attribute__((mode(V8HI))) v8hi;
72 short a[N]; short b[N]; short c[N]; int i;
73 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
74 v8hi va, vb, vc;
75
76 for (i=0; i<N/8; i++){
77 vb = pb[i];
78 vc = pc[i];
79 va = vb + vc;
80 pa[i] = va;
81 }
82
83 The main entry to this pass is vectorize_loops(), in which
84 the vectorizer applies a set of analyses on a given set of loops,
85 followed by the actual vectorization transformation for the loops that
86 had successfully passed the analysis phase.
87 Throughout this pass we make a distinction between two types of
88 data: scalars (which are represented by SSA_NAMES), and memory references
89 ("data-refs"). These two types of data require different handling both
90 during analysis and transformation. The types of data-refs that the
91 vectorizer currently supports are ARRAY_REFS which base is an array DECL
92 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
93 accesses are required to have a simple (consecutive) access pattern.
94
95 Analysis phase:
96 ===============
97 The driver for the analysis phase is vect_analyze_loop().
98 It applies a set of analyses, some of which rely on the scalar evolution
99 analyzer (scev) developed by Sebastian Pop.
100
101 During the analysis phase the vectorizer records some information
102 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
103 loop, as well as general information about the loop as a whole, which is
104 recorded in a "loop_vec_info" struct attached to each loop.
105
106 Transformation phase:
107 =====================
108 The loop transformation phase scans all the stmts in the loop, and
109 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
110 the loop that needs to be vectorized. It inserts the vector code sequence
111 just before the scalar stmt S, and records a pointer to the vector code
112 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
113 attached to S). This pointer will be used for the vectorization of following
114 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
115 otherwise, we rely on dead code elimination for removing it.
116
117 For example, say stmt S1 was vectorized into stmt VS1:
118
119 VS1: vb = px[i];
120 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
121 S2: a = b;
122
123 To vectorize stmt S2, the vectorizer first finds the stmt that defines
124 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
125 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
126 resulting sequence would be:
127
128 VS1: vb = px[i];
129 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
130 VS2: va = vb;
131 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
132
133 Operands that are not SSA_NAMEs, are data-refs that appear in
134 load/store operations (like 'x[i]' in S1), and are handled differently.
135
136 Target modeling:
137 =================
138 Currently the only target specific information that is used is the
139 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
140 Targets that can support different sizes of vectors, for now will need
141 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
142 flexibility will be added in the future.
143
144 Since we only vectorize operations which vector form can be
145 expressed using existing tree codes, to verify that an operation is
146 supported, the vectorizer checks the relevant optab at the relevant
147 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
148 the value found is CODE_FOR_nothing, then there's no target support, and
149 we can't vectorize the stmt.
150
151 For additional information on this project see:
152 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
153*/
154
155static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
156static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
157 bool *, bool *);
158
159/* Subroutine of vect_determine_vf_for_stmt that handles only one
160 statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
161 may already be set for general statements (not just data refs). */
162
163static opt_result
164vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
165 bool vectype_maybe_set_p,
166 poly_uint64 *vf)
167{
168 gimple *stmt = stmt_info->stmt;
169
170 if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
171 && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
172 || gimple_clobber_p (stmt))
173 {
174 if (dump_enabled_p ())
175 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
176 return opt_result::success ();
177 }
178
179 tree stmt_vectype, nunits_vectype;
180 opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
181 &stmt_vectype,
182 &nunits_vectype);
183 if (!res)
184 return res;
185
186 if (stmt_vectype)
187 {
188 if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype)
189 /* The only case when a vectype had been already set is for stmts
190 that contain a data ref, or for "pattern-stmts" (stmts generated
191 by the vectorizer to represent/replace a certain idiom). */
192 gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 194, __FUNCTION__), 0 : 0))
193 || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 194, __FUNCTION__), 0 : 0))
194 && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p
) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 194, __FUNCTION__), 0 : 0))
;
195 else
196 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype;
197 }
198
199 if (nunits_vectype)
200 vect_update_max_nunits (vf, nunits_vectype);
201
202 return opt_result::success ();
203}
204
205/* Subroutine of vect_determine_vectorization_factor. Set the vector
206 types of STMT_INFO and all attached pattern statements and update
207 the vectorization factor VF accordingly. Return true on success
208 or false if something prevented vectorization. */
209
210static opt_result
211vect_determine_vf_for_stmt (vec_info *vinfo,
212 stmt_vec_info stmt_info, poly_uint64 *vf)
213{
214 if (dump_enabled_p ())
215 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
216 stmt_info->stmt);
217 opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
218 if (!res)
219 return res;
220
221 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p
222 && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt)
223 {
224 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
226
227 /* If a pattern statement has def stmts, analyze them too. */
228 for (gimple_stmt_iterator si = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq));
229 !gsi_end_p (si); gsi_next (&si))
230 {
231 stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
232 if (dump_enabled_p ())
233 dump_printf_loc (MSG_NOTE, vect_location,
234 "==> examining pattern def stmt: %G",
235 def_stmt_info->stmt);
236 res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
237 if (!res)
238 return res;
239 }
240
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "==> examining pattern statement: %G",
244 stmt_info->stmt);
245 res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
246 if (!res)
247 return res;
248 }
249
250 return opt_result::success ();
251}
252
253/* Function vect_determine_vectorization_factor
254
255 Determine the vectorization factor (VF). VF is the number of data elements
256 that are operated upon in parallel in a single iteration of the vectorized
257 loop. For example, when vectorizing a loop that operates on 4byte elements,
258 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
259 elements can fit in a single vector register.
260
261 We currently support vectorization of loops in which all types operated upon
262 are of the same size. Therefore this function currently sets VF according to
263 the size of the types operated upon, and fails if there are multiple sizes
264 in the loop.
265
266 VF is also the factor by which the loop iterations are strip-mined, e.g.:
267 original loop:
268 for (i=0; i<N; i++){
269 a[i] = b[i] + c[i];
270 }
271
272 vectorized loop:
273 for (i=0; i<N; i+=VF){
274 a[i:VF] = b[i:VF] + c[i:VF];
275 }
276*/
277
278static opt_result
279vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
280{
281 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
282 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
283 unsigned nbbs = loop->num_nodes;
284 poly_uint64 vectorization_factor = 1;
285 tree scalar_type = NULL_TREE(tree) nullptr;
286 gphi *phi;
287 tree vectype;
288 stmt_vec_info stmt_info;
289 unsigned i;
290
291 DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor",
vect_location)
;
292
293 for (i = 0; i < nbbs; i++)
294 {
295 basic_block bb = bbs[i];
296
297 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
298 gsi_next (&si))
299 {
300 phi = si.phi ();
301 stmt_info = loop_vinfo->lookup_stmt (phi);
302 if (dump_enabled_p ())
303 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
304 phi);
305
306 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 306, __FUNCTION__), 0 : 0))
;
307
308 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
309 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
310 {
311 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 311, __FUNCTION__), 0 : 0))
;
312 scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr
(phi))), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 312, __FUNCTION__))->typed.type)
;
313
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "get vectype for scalar type: %T\n",
317 scalar_type);
318
319 vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
320 if (!vectype)
321 return opt_result::failure_at (phi,
322 "not vectorized: unsupported "
323 "data-type %T\n",
324 scalar_type);
325 STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype;
326
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
329 vectype);
330
331 if (dump_enabled_p ())
332 {
333 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
334 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
335 dump_printf (MSG_NOTE, "\n");
336 }
337
338 vect_update_max_nunits (&vectorization_factor, vectype);
339 }
340 }
341
342 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
343 gsi_next (&si))
344 {
345 if (is_gimple_debug (gsi_stmt (si)))
346 continue;
347 stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
348 opt_result res
349 = vect_determine_vf_for_stmt (loop_vinfo,
350 stmt_info, &vectorization_factor);
351 if (!res)
352 return res;
353 }
354 }
355
356 /* TODO: Analyze cost. Decide if worth while to vectorize. */
357 if (dump_enabled_p ())
358 {
359 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
360 dump_dec (MSG_NOTE, vectorization_factor);
361 dump_printf (MSG_NOTE, "\n");
362 }
363
364 if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor)))
365 return opt_result::failure_at (vect_location,
366 "not vectorized: unsupported data-type\n");
367 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
368 return opt_result::success ();
369}
370
371
372/* Function vect_is_simple_iv_evolution.
373
374 FORNOW: A simple evolution of an induction variables in the loop is
375 considered a polynomial evolution. */
376
377static bool
378vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
379 tree * step)
380{
381 tree init_expr;
382 tree step_expr;
383 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
384 basic_block bb;
385
386 /* When there is no evolution in this loop, the evolution function
387 is not "simple". */
388 if (evolution_part == NULL_TREE(tree) nullptr)
389 return false;
390
391 /* When the evolution is a polynomial of degree >= 2
392 the evolution function is not "simple". */
393 if (tree_is_chrec (evolution_part))
394 return false;
395
396 step_expr = evolution_part;
397 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
398
399 if (dump_enabled_p ())
400 dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
401 step_expr, init_expr);
402
403 *init = init_expr;
404 *step = step_expr;
405
406 if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST
407 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME
408 || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 408, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
))
409 && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb))
410 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 410, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 410, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (((contains_struct_check ((step_expr), (
TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 410, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE
)
411 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 411, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE
)
412 || !flag_associative_mathglobal_options.x_flag_associative_math)))
413 && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST
414 || !flag_associative_mathglobal_options.x_flag_associative_math))
415 {
416 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 "step unknown.\n");
419 return false;
420 }
421
422 return true;
423}
424
425/* Return true if PHI, described by STMT_INFO, is the inner PHI in
426 what we are assuming is a double reduction. For example, given
427 a structure like this:
428
429 outer1:
430 x_1 = PHI <x_4(outer2), ...>;
431 ...
432
433 inner:
434 x_2 = PHI <x_1(outer1), ...>;
435 ...
436 x_3 = ...;
437 ...
438
439 outer2:
440 x_4 = PHI <x_3(inner)>;
441 ...
442
443 outer loop analysis would treat x_1 as a double reduction phi and
444 this function would then return true for x_2. */
445
446static bool
447vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
448{
449 use_operand_p use_p;
450 ssa_op_iter op_iter;
451 FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01
); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use
(&(op_iter)))
452 if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p)))
453 if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def)
454 return true;
455 return false;
456}
457
458/* Function vect_analyze_scalar_cycles_1.
459
460 Examine the cross iteration def-use cycles of scalar variables
461 in LOOP. LOOP_VINFO represents the loop that is now being
462 considered for vectorization (can be LOOP, or an outer-loop
463 enclosing LOOP). */
464
465static void
466vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
467{
468 basic_block bb = loop->header;
469 tree init, step;
470 auto_vec<stmt_vec_info, 64> worklist;
471 gphi_iterator gsi;
472 bool double_reduc, reduc_chain;
473
474 DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location
)
;
475
476 /* First - identify all inductions. Reduction detection assumes that all the
477 inductions have been identified, therefore, this order must not be
478 changed. */
479 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
480 {
481 gphi *phi = gsi.phi ();
482 tree access_fn = NULLnullptr;
483 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
484 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
485
486 if (dump_enabled_p ())
487 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
488
489 /* Skip virtual phi's. The data dependences that are associated with
490 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
491 if (virtual_operand_p (def))
492 continue;
493
494 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type;
495
496 /* Analyze the evolution function. */
497 access_fn = analyze_scalar_evolution (loop, def);
498 if (access_fn)
499 {
500 STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union
tree_node *> (((access_fn)))))
;
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "Access function of PHI: %T\n", access_fn);
504 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged
505 = initial_condition_in_loop_num (access_fn, loop->num);
506 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part
507 = evolution_part_in_loop_num (access_fn, loop->num);
508 }
509
510 if (!access_fn
511 || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
512 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
513 || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop
514 && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST))
515 {
516 worklist.safe_push (stmt_vinfo);
517 continue;
518 }
519
520 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 521, __FUNCTION__), 0 : 0))
521 != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged !=
(tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 521, __FUNCTION__), 0 : 0))
;
522 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 522, __FUNCTION__), 0 : 0))
;
523
524 if (dump_enabled_p ())
525 dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
526 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def;
527 }
528
529
530 /* Second - identify all reductions and nested cycles. */
531 while (worklist.length () > 0)
532 {
533 stmt_vec_info stmt_vinfo = worklist.pop ();
534 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
535 tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
536
537 if (dump_enabled_p ())
538 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
539
540 gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 541, __FUNCTION__), 0 : 0))
541 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)->
def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 541, __FUNCTION__), 0 : 0))
;
542
543 stmt_vec_info reduc_stmt_info
544 = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
545 &reduc_chain);
546 if (reduc_stmt_info)
547 {
548 STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info;
549 STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo;
550 if (double_reduc)
551 {
552 if (dump_enabled_p ())
553 dump_printf_loc (MSG_NOTE, vect_location,
554 "Detected double reduction.\n");
555
556 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def;
557 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def;
558 }
559 else
560 {
561 if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)
562 {
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "Detected vectorizable nested cycle.\n");
566
567 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle;
568 }
569 else
570 {
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location,
573 "Detected reduction.\n");
574
575 STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def;
576 STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def;
577 /* Store the reduction cycles for possible vectorization in
578 loop-aware SLP if it was not detected as reduction
579 chain. */
580 if (! reduc_chain)
581 LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push
582 (reduc_stmt_info);
583 }
584 }
585 }
586 else
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
589 "Unknown def-use cycle pattern.\n");
590 }
591}
592
593
594/* Function vect_analyze_scalar_cycles.
595
596 Examine the cross iteration def-use cycles of scalar variables, by
597 analyzing the loop-header PHIs of scalar variables. Classify each
598 cycle as one of the following: invariant, induction, reduction, unknown.
599 We do that for the loop represented by LOOP_VINFO, and also to its
600 inner-loop, if exists.
601 Examples for scalar cycles:
602
603 Example1: reduction:
604
605 loop1:
606 for (i=0; i<N; i++)
607 sum += a[i];
608
609 Example2: induction:
610
611 loop2:
612 for (i=0; i<N; i++)
613 a[i] = i; */
614
615static void
616vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
617{
618 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
619
620 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
621
622 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
623 Reductions in such inner-loop therefore have different properties than
624 the reductions in the nest that gets vectorized:
625 1. When vectorized, they are executed in the same order as in the original
626 scalar loop, so we can't change the order of computation when
627 vectorizing them.
628 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
629 current checks are too strict. */
630
631 if (loop->inner)
632 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
633}
634
635/* Transfer group and reduction information from STMT_INFO to its
636 pattern stmt. */
637
638static void
639vect_fixup_reduc_chain (stmt_vec_info stmt_info)
640{
641 stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
642 stmt_vec_info stmtp;
643 gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0))
644 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) &&
(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element)
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 644, __FUNCTION__), 0 : 0))
;
645 REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0)), (firstp)->size)
= REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 645, __FUNCTION__), 0 : 0)), (stmt_info)->size)
;
646 do
647 {
648 stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
649 gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 650, __FUNCTION__), 0 : 0))
650 == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ?
fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 650, __FUNCTION__), 0 : 0))
;
651 REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 651, __FUNCTION__), 0 : 0)), (stmtp)->first_element)
= firstp;
652 stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 652, __FUNCTION__), 0 : 0)), (stmt_info)->next_element)
;
653 if (stmt_info)
654 REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 654, __FUNCTION__), 0 : 0)), (stmtp)->next_element)
655 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
656 }
657 while (stmt_info);
658}
659
660/* Fixup scalar cycles that now have their stmts detected as patterns. */
661
662static void
663vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
664{
665 stmt_vec_info first;
666 unsigned i;
667
668 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i),
&(first)); ++(i))
669 {
670 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 670, __FUNCTION__), 0 : 0)), (first)->next_element)
;
671 while (next)
672 {
673 if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p
674 != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
675 || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1)
676 break;
677 next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 677, __FUNCTION__), 0 : 0)), (next)->next_element)
;
678 }
679 /* If all reduction chain members are well-formed patterns adjust
680 the group to group the pattern stmts instead. */
681 if (! next
682 && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1)
683 {
684 if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p)
685 {
686 vect_fixup_reduc_chain (first);
687 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i]
688 = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt;
689 }
690 }
691 /* If not all stmt in the chain are patterns or if we failed
692 to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
693 it as regular reduction instead. */
694 else
695 {
696 stmt_vec_info vinfo = first;
697 stmt_vec_info last = NULLnullptr;
698 while (vinfo)
699 {
700 next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 700, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
701 REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 701, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= NULLnullptr;
702 REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 702, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
703 last = vinfo;
704 vinfo = next;
705 }
706 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type
707 = vect_internal_def;
708 loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
709 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i);
710 --i;
711 }
712 }
713}
714
715/* Function vect_get_loop_niters.
716
717 Determine how many iterations the loop is executed and place it
718 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
719 in NUMBER_OF_ITERATIONSM1. Place the condition under which the
720 niter information holds in ASSUMPTIONS.
721
722 Return the loop exit condition. */
723
724
725static gcond *
726vect_get_loop_niters (class loop *loop, tree *assumptions,
727 tree *number_of_iterations, tree *number_of_iterationsm1)
728{
729 edge exit = single_exit (loop);
730 class tree_niter_desc niter_desc;
731 tree niter_assumptions, niter, may_be_zero;
732 gcond *cond = get_loop_exit_condition (loop);
733
734 *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE];
735 *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
736 *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW];
737 DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location);
738
739 if (!exit)
740 return cond;
741
742 may_be_zero = NULL_TREE(tree) nullptr;
743 if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr)
744 || chrec_contains_undetermined (niter_desc.niter))
745 return cond;
746
747 niter_assumptions = niter_desc.assumptions;
748 may_be_zero = niter_desc.may_be_zero;
749 niter = niter_desc.niter;
750
751 if (may_be_zero && integer_zerop (may_be_zero))
752 may_be_zero = NULL_TREE(tree) nullptr;
753
754 if (may_be_zero)
755 {
756 if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type[(int) (((enum tree_code) (may_be_zero)->base
.code))] == tcc_comparison)
)
757 {
758 /* Try to combine may_be_zero with assumptions, this can simplify
759 computation of niter expression. */
760 if (niter_assumptions && !integer_nonzerop (niter_assumptions))
761 niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
762 niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
763 fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
764 boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
765 may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees
[TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t
) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero
) )
;
766 else
767 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
768 build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
769 rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow
(niter) )
;
770
771 may_be_zero = NULL_TREE(tree) nullptr;
772 }
773 else if (integer_nonzerop (may_be_zero))
774 {
775 *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 775, __FUNCTION__))->typed.type)
, 0);
776 *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 776, __FUNCTION__))->typed.type)
, 1);
777 return cond;
778 }
779 else
780 return cond;
781 }
782
783 *assumptions = niter_assumptions;
784 *number_of_iterationsm1 = niter;
785
786 /* We want the number of loop header executions which is the number
787 of latch executions plus one.
788 ??? For UINT_MAX latch executions this number overflows to zero
789 for loops like do { n++; } while (n != 0); */
790 if (niter && !chrec_contains_undetermined (niter))
791 niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 792, __FUNCTION__))->typed.type), 1) )
792 build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check
((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst
(((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 792, __FUNCTION__))->typed.type), 1) )
;
793 *number_of_iterations = niter;
794
795 return cond;
796}
797
798/* Function bb_in_loop_p
799
800 Used as predicate for dfs order traversal of the loop bbs. */
801
802static bool
803bb_in_loop_p (const_basic_block bb, const void *data)
804{
805 const class loop *const loop = (const class loop *)data;
806 if (flow_bb_inside_loop_p (loop, bb))
807 return true;
808 return false;
809}
810
811
812/* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
813 stmt_vec_info structs for all the stmts in LOOP_IN. */
814
815_loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
816 : vec_info (vec_info::loop, init_cost (loop_in), shared),
817 loop (loop_in),
818 bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block
)))
),
819 num_itersm1 (NULL_TREE(tree) nullptr),
820 num_iters (NULL_TREE(tree) nullptr),
821 num_iters_unchanged (NULL_TREE(tree) nullptr),
822 num_iters_assumptions (NULL_TREE(tree) nullptr),
823 th (0),
824 versioning_threshold (0),
825 vectorization_factor (0),
826 max_vectorization_factor (0),
827 mask_skip_niters (NULL_TREE(tree) nullptr),
828 rgroup_compare_type (NULL_TREE(tree) nullptr),
829 simd_if_cond (NULL_TREE(tree) nullptr),
830 unaligned_dr (NULLnullptr),
831 peeling_for_alignment (0),
832 ptr_mask (0),
833 ivexpr_map (NULLnullptr),
834 scan_map (NULLnullptr),
835 slp_unrolling_factor (1),
836 single_scalar_iteration_cost (0),
837 vec_outside_cost (0),
838 vec_inside_cost (0),
839 vectorizable (false),
840 can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0),
841 using_partial_vectors_p (false),
842 epil_using_partial_vectors_p (false),
843 peeling_for_gaps (false),
844 peeling_for_niter (false),
845 no_data_dependencies (false),
846 has_mask_store (false),
847 scalar_loop_scaling (profile_probability::uninitialized ()),
848 scalar_loop (NULLnullptr),
849 orig_loop_info (NULLnullptr)
850{
851 /* CHECKME: We want to visit all BBs before their successors (except for
852 latch blocks, for which this assertion wouldn't hold). In the simple
853 case of the loop forms we allow, a dfs order of the BBs would the same
854 as reversed postorder traversal, so we are safe. */
855
856 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
857 bbs, loop->num_nodes, loop);
858 gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 858, __FUNCTION__), 0 : 0))
;
859
860 for (unsigned int i = 0; i < nbbs; i++)
861 {
862 basic_block bb = bbs[i];
863 gimple_stmt_iterator si;
864
865 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
866 {
867 gimple *phi = gsi_stmt (si);
868 gimple_set_uid (phi, 0);
869 add_stmt (phi);
870 }
871
872 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
873 {
874 gimple *stmt = gsi_stmt (si);
875 gimple_set_uid (stmt, 0);
876 if (is_gimple_debug (stmt))
877 continue;
878 add_stmt (stmt);
879 /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
880 third argument is the #pragma omp simd if (x) condition, when 0,
881 loop shouldn't be vectorized, when non-zero constant, it should
882 be vectorized normally, otherwise versioned with vectorized loop
883 done if the condition is non-zero at runtime. */
884 if (loop_in->simduid
885 && is_gimple_call (stmt)
886 && gimple_call_internal_p (stmt)
887 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
888 && gimple_call_num_args (stmt) >= 3
889 && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME
890 && (loop_in->simduid
891 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 891, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr
|| ((enum tree_code) ((gimple_call_arg (stmt, 0))->ssa_name
.var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg
(stmt, 0))->ssa_name.var)
))
892 {
893 tree arg = gimple_call_arg (stmt, 2);
894 if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME)
895 simd_if_cond = arg;
896 else
897 gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 897, __FUNCTION__), 0 : 0))
;
898 }
899 }
900 }
901
902 epilogue_vinfos.create (6);
903}
904
905/* Free all levels of rgroup CONTROLS. */
906
907void
908release_vec_loop_controls (vec<rgroup_controls> *controls)
909{
910 rgroup_controls *rgc;
911 unsigned int i;
912 FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i))
913 rgc->controls.release ();
914 controls->release ();
915}
916
917/* Free all memory used by the _loop_vec_info, as well as all the
918 stmt_vec_info structs of all the stmts in the loop. */
919
920_loop_vec_info::~_loop_vec_info ()
921{
922 free (bbs);
923
924 release_vec_loop_controls (&masks);
925 release_vec_loop_controls (&lens);
926 delete ivexpr_map;
927 delete scan_map;
928 epilogue_vinfos.release ();
929
930 loop->aux = NULLnullptr;
931}
932
933/* Return an invariant or register for EXPR and emit necessary
934 computations in the LOOP_VINFO loop preheader. */
935
936tree
937cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
938{
939 if (is_gimple_reg (expr)
940 || is_gimple_min_invariant (expr))
941 return expr;
942
943 if (! loop_vinfo->ivexpr_map)
944 loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
945 tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
946 if (! cached)
947 {
948 gimple_seq stmts = NULLnullptr;
949 cached = force_gimple_operand (unshare_expr (expr),
950 &stmts, true, NULL_TREE(tree) nullptr);
951 if (stmts)
952 {
953 edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
954 gsi_insert_seq_on_edge_immediate (e, stmts);
955 }
956 }
957 return cached;
958}
959
960/* Return true if we can use CMP_TYPE as the comparison type to produce
961 all masks required to mask LOOP_VINFO. */
962
963static bool
964can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
965{
966 rgroup_controls *rgm;
967 unsigned int i;
968 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
969 if (rgm->type != NULL_TREE(tree) nullptr
970 && !direct_internal_fn_supported_p (IFN_WHILE_ULT,
971 cmp_type, rgm->type,
972 OPTIMIZE_FOR_SPEED))
973 return false;
974 return true;
975}
976
977/* Calculate the maximum number of scalars per iteration for every
978 rgroup in LOOP_VINFO. */
979
980static unsigned int
981vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
982{
983 unsigned int res = 1;
984 unsigned int i;
985 rgroup_controls *rgm;
986 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm)
); ++(i))
987 res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm->
max_nscalars_per_iter))
;
988 return res;
989}
990
991/* Calculate the minimum precision necessary to represent:
992
993 MAX_NITERS * FACTOR
994
995 as an unsigned integer, where MAX_NITERS is the maximum number of
996 loop header iterations for the original scalar form of LOOP_VINFO. */
997
998static unsigned
999vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
1000{
1001 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1002
1003 /* Get the maximum number of iterations that is representable
1004 in the counter type. */
1005 tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1005, __FUNCTION__))->typed.type)
;
1006 widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1006, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE
), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval
)
) + 1;
1007
1008 /* Get a more refined estimate for the number of iterations. */
1009 widest_int max_back_edges;
1010 if (max_loop_iterations (loop, &max_back_edges))
1011 max_ni = wi::smin (max_ni, max_back_edges + 1);
1012
1013 /* Work out how many bits we need to represent the limit. */
1014 return wi::min_precision (max_ni * factor, UNSIGNED);
1015}
1016
1017/* True if the loop needs peeling or partial vectors when vectorized. */
1018
1019static bool
1020vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
1021{
1022 unsigned HOST_WIDE_INTlong const_vf;
1023 HOST_WIDE_INTlong max_niter
1024 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1025
1026 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
1027 if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info)
1028 th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th
1029 (loop_vinfo))((loop_vinfo)->orig_loop_info)->th;
1030
1031 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
1032 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0)
1033 {
1034 /* Work out the (constant) number of iterations that need to be
1035 peeled for reasons other than niters. */
1036 unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
1037 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
1038 peel_niter += 1;
1039 if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1039, __FUNCTION__))))
- peel_niter,
1040 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
1041 return true;
1042 }
1043 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1044 /* ??? When peeling for gaps but not alignment, we could
1045 try to check whether the (variable) niters is known to be
1046 VF * N + 1. That's something of a niche case though. */
1047 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1048 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf)
1049 || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters)
1050 < (unsigned) exact_log2 (const_vf))
1051 /* In case of versioning, check if the maximum number of
1052 iterations is greater than th. If they are identical,
1053 the epilogue is unnecessary. */
1054 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
1055 || ((unsigned HOST_WIDE_INTlong) max_niter
1056 > (th / const_vf) * const_vf))))
1057 return true;
1058
1059 return false;
1060}
1061
1062/* Each statement in LOOP_VINFO can be masked where necessary. Check
1063 whether we can actually generate the masks required. Return true if so,
1064 storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */
1065
1066static bool
1067vect_verify_full_masking (loop_vec_info loop_vinfo)
1068{
1069 unsigned int min_ni_width;
1070 unsigned int max_nscalars_per_iter
1071 = vect_get_max_nscalars_per_iter (loop_vinfo);
1072
1073 /* Use a normal loop if there are no statements that need masking.
1074 This only happens in rare degenerate cases: it means that the loop
1075 has no loads, no stores, and no live-out values. */
1076 if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ())
1077 return false;
1078
1079 /* Work out how many bits we need to represent the limit. */
1080 min_ni_width
1081 = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
1082
1083 /* Find a scalar mode for which WHILE_ULT is supported. */
1084 opt_scalar_int_mode cmp_mode_iter;
1085 tree cmp_type = NULL_TREE(tree) nullptr;
1086 tree iv_type = NULL_TREE(tree) nullptr;
1087 widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1088 unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U);
1089
1090 if (iv_limit != -1)
1091 iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
1092 UNSIGNED);
1093
1094 FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator
::iterate_p (&(cmp_mode_iter)); mode_iterator::get_wider (
&(cmp_mode_iter)))
1095 {
1096 unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
1097 if (cmp_bits >= min_ni_width
1098 && targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1099 {
1100 tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1101 if (this_type
1102 && can_produce_all_loop_masks_p (loop_vinfo, this_type))
1103 {
1104 /* Although we could stop as soon as we find a valid mode,
1105 there are at least two reasons why that's not always the
1106 best choice:
1107
1108 - An IV that's Pmode or wider is more likely to be reusable
1109 in address calculations than an IV that's narrower than
1110 Pmode.
1111
1112 - Doing the comparison in IV_PRECISION or wider allows
1113 a natural 0-based IV, whereas using a narrower comparison
1114 type requires mitigations against wrap-around.
1115
1116 Conversely, if the IV limit is variable, doing the comparison
1117 in a wider type than the original type can introduce
1118 unnecessary extensions, so picking the widest valid mode
1119 is not always a good choice either.
1120
1121 Here we prefer the first IV type that's Pmode or wider,
1122 and the first comparison type that's IV_PRECISION or wider.
1123 (The comparison type must be no wider than the IV type,
1124 to avoid extensions in the vector loop.)
1125
1126 ??? We might want to try continuing beyond Pmode for ILP32
1127 targets if CMP_BITS < IV_PRECISION. */
1128 iv_type = this_type;
1129 if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1129, __FUNCTION__))->type_common.precision)
)
1130 cmp_type = this_type;
1131 if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode (
(scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode
::from_int) E_SImode)))
))
1132 break;
1133 }
1134 }
1135 }
1136
1137 if (!cmp_type)
1138 return false;
1139
1140 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type;
1141 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1142 return true;
1143}
1144
1145/* Check whether we can use vector access with length based on precison
1146 comparison. So far, to keep it simple, we only allow the case that the
1147 precision of the target supported length is larger than the precision
1148 required by loop niters. */
1149
1150static bool
1151vect_verify_loop_lens (loop_vec_info loop_vinfo)
1152{
1153 if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
1154 return false;
1155
1156 unsigned int max_nitems_per_iter = 1;
1157 unsigned int i;
1158 rgroup_controls *rgl;
1159 /* Find the maximum number of items per iteration for every rgroup. */
1160 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl))
; ++(i))
1161 {
1162 unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
1163 max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter
) : (nitems_per_iter))
;
1164 }
1165
1166 /* Work out how many bits we need to represent the length limit. */
1167 unsigned int min_ni_prec
1168 = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter);
1169
1170 /* Now use the maximum of below precisions for one suitable IV type:
1171 - the IV's natural precision
1172 - the precision needed to hold: the maximum number of scalar
1173 iterations multiplied by the scale factor (min_ni_prec above)
1174 - the Pmode precision
1175
1176 If min_ni_prec is less than the precision of the current niters,
1177 we perfer to still use the niters type. Prefer to use Pmode and
1178 wider IV to avoid narrow conversions. */
1179
1180 unsigned int ni_prec
1181 = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)->
num_iters), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1181, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1181, __FUNCTION__))->type_common.precision)
;
1182 min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec));
1183 min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode
== PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode
)) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode))
))))
;
1184
1185 tree iv_type = NULL_TREE(tree) nullptr;
1186 opt_scalar_int_mode tmode_iter;
1187 FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator
::iterate_p (&(tmode_iter)); mode_iterator::get_wider (&
(tmode_iter)))
1188 {
1189 scalar_mode tmode = tmode_iter.require ();
1190 unsigned int tbits = GET_MODE_BITSIZE (tmode);
1191
1192 /* ??? Do we really want to construct one IV whose precision exceeds
1193 BITS_PER_WORD? */
1194 if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL <<
1)) != 0) ? 8 : 4))
)
1195 break;
1196
1197 /* Find the first available standard integral type. */
1198 if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
1199 {
1200 iv_type = build_nonstandard_integer_type (tbits, true);
1201 break;
1202 }
1203 }
1204
1205 if (!iv_type)
1206 {
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209 "can't vectorize with length-based partial vectors"
1210 " because there is no suitable iv type.\n");
1211 return false;
1212 }
1213
1214 LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type;
1215 LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type;
1216
1217 return true;
1218}
1219
1220/* Calculate the cost of one scalar iteration of the loop. */
1221static void
1222vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1223{
1224 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1225 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1226 int nbbs = loop->num_nodes, factor;
1227 int innerloop_iters, i;
1228
1229 DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost"
, vect_location)
;
1230
1231 /* Gather costs for statements in the scalar loop. */
1232
1233 /* FORNOW. */
1234 innerloop_iters = 1;
1235 if (loop->inner)
1236 innerloop_iters = 50; /* FIXME */
1237
1238 for (i = 0; i < nbbs; i++)
1239 {
1240 gimple_stmt_iterator si;
1241 basic_block bb = bbs[i];
1242
1243 if (bb->loop_father == loop->inner)
1244 factor = innerloop_iters;
1245 else
1246 factor = 1;
1247
1248 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1249 {
1250 gimple *stmt = gsi_stmt (si);
1251 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1252
1253 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1254 continue;
1255
1256 /* Skip stmts that are not vectorized inside the loop. */
1257 stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
1258 if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope)
1259 && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live
1260 || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
1261 (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info
)->def_type) == vect_double_reduction_def) || (((vstmt_info
)->def_type) == vect_nested_cycle))
))
1262 continue;
1263
1264 vect_cost_for_stmt kind;
1265 if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0))
1266 {
1267 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read)
1268 kind = scalar_load;
1269 else
1270 kind = scalar_store;
1271 }
1272 else if (vect_nop_conversion_p (stmt_info))
1273 continue;
1274 else
1275 kind = scalar_stmt;
1276
1277 record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec,
1278 factor, kind, stmt_info, 0, vect_prologue);
1279 }
1280 }
1281
1282 /* Now accumulate cost. */
1283 void *target_cost_data = init_cost (loop);
1284 stmt_info_for_cost *si;
1285 int j;
1286 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
1287 j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
1288 (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
1289 si->kind, si->stmt_info, si->vectype,
1290 si->misalign, vect_body);
1291 unsigned dummy, body_cost = 0;
1292 finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
1293 destroy_cost_data (target_cost_data);
1294 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost = body_cost;
1295}
1296
1297
1298/* Function vect_analyze_loop_form_1.
1299
1300 Verify that certain CFG restrictions hold, including:
1301 - the loop has a pre-header
1302 - the loop has a single entry and exit
1303 - the loop exit condition is simple enough
1304 - the number of iterations can be analyzed, i.e, a countable loop. The
1305 niter could be analyzed under some assumptions. */
1306
1307opt_result
1308vect_analyze_loop_form_1 (class loop *loop, gcond **loop_cond,
1309 tree *assumptions, tree *number_of_iterationsm1,
1310 tree *number_of_iterations, gcond **inner_loop_cond)
1311{
1312 DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location
)
;
1313
1314 /* Different restrictions apply when we are considering an inner-most loop,
1315 vs. an outer (nested) loop.
1316 (FORNOW. May want to relax some of these restrictions in the future). */
1317
1318 if (!loop->inner)
1319 {
1320 /* Inner-most loop. We currently require that the number of BBs is
1321 exactly 2 (the header and latch). Vectorizable inner-most loops
1322 look like this:
1323
1324 (pre-header)
1325 |
1326 header <--------+
1327 | | |
1328 | +--> latch --+
1329 |
1330 (exit-bb) */
1331
1332 if (loop->num_nodes != 2)
1333 return opt_result::failure_at (vect_location,
1334 "not vectorized:"
1335 " control flow in loop.\n");
1336
1337 if (empty_block_p (loop->header))
1338 return opt_result::failure_at (vect_location,
1339 "not vectorized: empty loop.\n");
1340 }
1341 else
1342 {
1343 class loop *innerloop = loop->inner;
1344 edge entryedge;
1345
1346 /* Nested loop. We currently require that the loop is doubly-nested,
1347 contains a single inner loop, and the number of BBs is exactly 5.
1348 Vectorizable outer-loops look like this:
1349
1350 (pre-header)
1351 |
1352 header <---+
1353 | |
1354 inner-loop |
1355 | |
1356 tail ------+
1357 |
1358 (exit-bb)
1359
1360 The inner-loop has the properties expected of inner-most loops
1361 as described above. */
1362
1363 if ((loop->inner)->inner || (loop->inner)->next)
1364 return opt_result::failure_at (vect_location,
1365 "not vectorized:"
1366 " multiple nested loops.\n");
1367
1368 if (loop->num_nodes != 5)
1369 return opt_result::failure_at (vect_location,
1370 "not vectorized:"
1371 " control flow in loop.\n");
1372
1373 entryedge = loop_preheader_edge (innerloop);
1374 if (entryedge->src != loop->header
1375 || !single_exit (innerloop)
1376 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src)
1377 return opt_result::failure_at (vect_location,
1378 "not vectorized:"
1379 " unsupported outerloop form.\n");
1380
1381 /* Analyze the inner-loop. */
1382 tree inner_niterm1, inner_niter, inner_assumptions;
1383 opt_result res
1384 = vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1385 &inner_assumptions, &inner_niterm1,
1386 &inner_niter, NULLnullptr);
1387 if (!res)
1388 {
1389 if (dump_enabled_p ())
1390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1391 "not vectorized: Bad inner loop.\n");
1392 return res;
1393 }
1394
1395 /* Don't support analyzing niter under assumptions for inner
1396 loop. */
1397 if (!integer_onep (inner_assumptions))
1398 return opt_result::failure_at (vect_location,
1399 "not vectorized: Bad inner loop.\n");
1400
1401 if (!expr_invariant_in_loop_p (loop, inner_niter))
1402 return opt_result::failure_at (vect_location,
1403 "not vectorized: inner-loop count not"
1404 " invariant.\n");
1405
1406 if (dump_enabled_p ())
1407 dump_printf_loc (MSG_NOTE, vect_location,
1408 "Considering outer-loop vectorization.\n");
1409 }
1410
1411 if (!single_exit (loop))
1412 return opt_result::failure_at (vect_location,
1413 "not vectorized: multiple exits.\n");
1414 if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2)
1415 return opt_result::failure_at (vect_location,
1416 "not vectorized:"
1417 " too many incoming edges.\n");
1418
1419 /* We assume that the loop exit condition is at the end of the loop. i.e,
1420 that the loop is represented as a do-while (with a proper if-guard
1421 before the loop if needed), where the loop header contains all the
1422 executable statements, and the latch is empty. */
1423 if (!empty_block_p (loop->latch)
1424 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1425 return opt_result::failure_at (vect_location,
1426 "not vectorized: latch block not empty.\n");
1427
1428 /* Make sure the exit is not abnormal. */
1429 edge e = single_exit (loop);
1430 if (e->flags & EDGE_ABNORMAL)
1431 return opt_result::failure_at (vect_location,
1432 "not vectorized:"
1433 " abnormal loop exit edge.\n");
1434
1435 *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1436 number_of_iterationsm1);
1437 if (!*loop_cond)
1438 return opt_result::failure_at
1439 (vect_location,
1440 "not vectorized: complicated exit condition.\n");
1441
1442 if (integer_zerop (*assumptions)
1443 || !*number_of_iterations
1444 || chrec_contains_undetermined (*number_of_iterations))
1445 return opt_result::failure_at
1446 (*loop_cond,
1447 "not vectorized: number of iterations cannot be computed.\n");
1448
1449 if (integer_zerop (*number_of_iterations))
1450 return opt_result::failure_at
1451 (*loop_cond,
1452 "not vectorized: number of iterations = 0.\n");
1453
1454 return opt_result::success ();
1455}
1456
1457/* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */
1458
1459opt_loop_vec_info
1460vect_analyze_loop_form (class loop *loop, vec_info_shared *shared)
1461{
1462 tree assumptions, number_of_iterations, number_of_iterationsm1;
1463 gcond *loop_cond, *inner_loop_cond = NULLnullptr;
1464
1465 opt_result res
1466 = vect_analyze_loop_form_1 (loop, &loop_cond,
1467 &assumptions, &number_of_iterationsm1,
1468 &number_of_iterations, &inner_loop_cond);
1469 if (!res)
1470 return opt_loop_vec_info::propagate_failure (res);
1471
1472 loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1473 LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = number_of_iterationsm1;
1474 LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = number_of_iterations;
1475 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = number_of_iterations;
1476 if (!integer_onep (assumptions))
1477 {
1478 /* We consider to vectorize this loop by versioning it under
1479 some assumptions. In order to do this, we need to clear
1480 existing information computed by scev and niter analyzer. */
1481 scev_reset_htab ();
1482 free_numbers_of_iterations_estimates (loop);
1483 /* Also set flag for this loop so that following scev and niter
1484 analysis are done under the assumptions. */
1485 loop_constraint_set (loop, LOOP_C_FINITE(1 << 1));
1486 /* Also record the assumptions for versioning. */
1487 LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = assumptions;
1488 }
1489
1490 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
1491 {
1492 if (dump_enabled_p ())
1493 {
1494 dump_printf_loc (MSG_NOTE, vect_location,
1495 "Symbolic number of iterations is ");
1496 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1497 dump_printf (MSG_NOTE, "\n");
1498 }
1499 }
1500
1501 stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (loop_cond);
1502 STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1503 if (inner_loop_cond)
1504 {
1505 stmt_vec_info inner_loop_cond_info
1506 = loop_vinfo->lookup_stmt (inner_loop_cond);
1507 STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type;
1508 }
1509
1510 gcc_assert (!loop->aux)((void)(!(!loop->aux) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1510, __FUNCTION__), 0 : 0))
;
1511 loop->aux = loop_vinfo;
1512 return opt_loop_vec_info::success (loop_vinfo);
1513}
1514
1515
1516
1517/* Scan the loop stmts and dependent on whether there are any (non-)SLP
1518 statements update the vectorization factor. */
1519
1520static void
1521vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1522{
1523 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1524 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1525 int nbbs = loop->num_nodes;
1526 poly_uint64 vectorization_factor;
1527 int i;
1528
1529 DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location
)
;
1530
1531 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
1532 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1532, __FUNCTION__), 0 : 0))
;
1533
1534 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1535 vectorization factor of the loop is the unrolling factor required by
1536 the SLP instances. If that unrolling factor is 1, we say, that we
1537 perform pure SLP on loop - cross iteration parallelism is not
1538 exploited. */
1539 bool only_slp_in_loop = true;
1540 for (i = 0; i < nbbs; i++)
1541 {
1542 basic_block bb = bbs[i];
1543 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1544 gsi_next (&si))
1545 {
1546 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
1547 if (!stmt_info)
1548 continue;
1549 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1550 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1551 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1552 /* STMT needs both SLP and loop-based vectorization. */
1553 only_slp_in_loop = false;
1554 }
1555 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1556 gsi_next (&si))
1557 {
1558 if (is_gimple_debug (gsi_stmt (si)))
1559 continue;
1560 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
1561 stmt_info = vect_stmt_to_vectorize (stmt_info);
1562 if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)
1563 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
1564 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1565 /* STMT needs both SLP and loop-based vectorization. */
1566 only_slp_in_loop = false;
1567 }
1568 }
1569
1570 if (only_slp_in_loop)
1571 {
1572 if (dump_enabled_p ())
1573 dump_printf_loc (MSG_NOTE, vect_location,
1574 "Loop contains only SLP stmts\n");
1575 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor;
1576 }
1577 else
1578 {
1579 if (dump_enabled_p ())
1580 dump_printf_loc (MSG_NOTE, vect_location,
1581 "Loop contains SLP and non-SLP stmts\n");
1582 /* Both the vectorization factor and unroll factor have the form
1583 GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
1584 so they must have a common multiple. */
1585 vectorization_factor
1586 = force_common_multiple (vectorization_factor,
1587 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor);
1588 }
1589
1590 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor;
1591 if (dump_enabled_p ())
1592 {
1593 dump_printf_loc (MSG_NOTE, vect_location,
1594 "Updating vectorization factor to ");
1595 dump_dec (MSG_NOTE, vectorization_factor);
1596 dump_printf (MSG_NOTE, ".\n");
1597 }
1598}
1599
1600/* Return true if STMT_INFO describes a double reduction phi and if
1601 the other phi in the reduction is also relevant for vectorization.
1602 This rejects cases such as:
1603
1604 outer1:
1605 x_1 = PHI <x_3(outer2), ...>;
1606 ...
1607
1608 inner:
1609 x_2 = ...;
1610 ...
1611
1612 outer2:
1613 x_3 = PHI <x_2(inner)>;
1614
1615 if nothing in x_2 or elsewhere makes x_1 relevant. */
1616
1617static bool
1618vect_active_double_reduction_p (stmt_vec_info stmt_info)
1619{
1620 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def)
1621 return false;
1622
1623 return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope
)
;
1624}
1625
1626/* Function vect_analyze_loop_operations.
1627
1628 Scan the loop stmts and make sure they are all vectorizable. */
1629
1630static opt_result
1631vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1632{
1633 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1634 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs;
1635 int nbbs = loop->num_nodes;
1636 int i;
1637 stmt_vec_info stmt_info;
1638 bool need_to_vectorize = false;
1639 bool ok;
1640
1641 DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location
)
;
1642
1643 auto_vec<stmt_info_for_cost> cost_vec;
1644
1645 for (i = 0; i < nbbs; i++)
1646 {
1647 basic_block bb = bbs[i];
1648
1649 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1650 gsi_next (&si))
1651 {
1652 gphi *phi = si.phi ();
1653 ok = true;
1654
1655 stmt_info = loop_vinfo->lookup_stmt (phi);
1656 if (dump_enabled_p ())
1657 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi);
1658 if (virtual_operand_p (gimple_phi_result (phi)))
1659 continue;
1660
1661 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1662 (i.e., a phi in the tail of the outer-loop). */
1663 if (! is_loop_header_bb_p (bb))
1664 {
1665 /* FORNOW: we currently don't support the case that these phis
1666 are not used in the outerloop (unless it is double reduction,
1667 i.e., this phi is vect_reduction_def), cause this case
1668 requires to actually do something here. */
1669 if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1670 && !vect_active_double_reduction_p (stmt_info))
1671 return opt_result::failure_at (phi,
1672 "Unsupported loop-closed phi"
1673 " in outer-loop.\n");
1674
1675 /* If PHI is used in the outer loop, we check that its operand
1676 is defined in the inner loop. */
1677 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1678 {
1679 tree phi_op;
1680
1681 if (gimple_phi_num_args (phi) != 1)
1682 return opt_result::failure_at (phi, "unsupported phi");
1683
1684 phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0));
1685 stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
1686 if (!op_def_info)
1687 return opt_result::failure_at (phi, "unsupported phi\n");
1688
1689 if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer
1690 && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant
1691 != vect_used_in_outer_by_reduction))
1692 return opt_result::failure_at (phi, "unsupported phi\n");
1693
1694 if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def
1695 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1696 == vect_double_reduction_def))
1697 && !vectorizable_lc_phi (loop_vinfo,
1698 stmt_info, NULLnullptr, NULLnullptr))
1699 return opt_result::failure_at (phi, "unsupported phi\n");
1700 }
1701
1702 continue;
1703 }
1704
1705 gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1705, __FUNCTION__), 0 : 0))
;
1706
1707 if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope
1708 || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live)
1709 && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def)
1710 /* A scalar-dependence cycle that we don't support. */
1711 return opt_result::failure_at (phi,
1712 "not vectorized:"
1713 " scalar dependence cycle.\n");
1714
1715 if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope))
1716 {
1717 need_to_vectorize = true;
1718 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def
1719 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1720 ok = vectorizable_induction (loop_vinfo,
1721 stmt_info, NULLnullptr, NULLnullptr,
1722 &cost_vec);
1723 else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
1724 || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type
1725 == vect_double_reduction_def)
1726 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
1727 && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1728 ok = vectorizable_reduction (loop_vinfo,
1729 stmt_info, NULLnullptr, NULLnullptr, &cost_vec);
1730 }
1731
1732 /* SLP PHIs are tested by vect_slp_analyze_node_operations. */
1733 if (ok
1734 && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live
1735 && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp))
1736 ok = vectorizable_live_operation (loop_vinfo,
1737 stmt_info, NULLnullptr, NULLnullptr, NULLnullptr,
1738 -1, false, &cost_vec);
1739
1740 if (!ok)
1741 return opt_result::failure_at (phi,
1742 "not vectorized: relevant phi not "
1743 "supported: %G",
1744 static_cast <gimple *> (phi));
1745 }
1746
1747 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1748 gsi_next (&si))
1749 {
1750 gimple *stmt = gsi_stmt (si);
1751 if (!gimple_clobber_p (stmt)
1752 && !is_gimple_debug (stmt))
1753 {
1754 opt_result res
1755 = vect_analyze_stmt (loop_vinfo,
1756 loop_vinfo->lookup_stmt (stmt),
1757 &need_to_vectorize,
1758 NULLnullptr, NULLnullptr, &cost_vec);
1759 if (!res)
1760 return res;
1761 }
1762 }
1763 } /* bbs */
1764
1765 add_stmt_costs (loop_vinfo, loop_vinfo->target_cost_data, &cost_vec);
1766
1767 /* All operations in the loop are either irrelevant (deal with loop
1768 control, or dead), or only used outside the loop and can be moved
1769 out of the loop (e.g. invariants, inductions). The loop can be
1770 optimized away by scalar optimizations. We're better off not
1771 touching this loop. */
1772 if (!need_to_vectorize)
1773 {
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_NOTE, vect_location,
1776 "All the computation can be taken out of the loop.\n");
1777 return opt_result::failure_at
1778 (vect_location,
1779 "not vectorized: redundant loop. no profit to vectorize.\n");
1780 }
1781
1782 return opt_result::success ();
1783}
1784
1785/* Return true if we know that the iteration count is smaller than the
1786 vectorization factor. Return false if it isn't, or if we can't be sure
1787 either way. */
1788
1789static bool
1790vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
1791{
1792 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1793
1794 HOST_WIDE_INTlong max_niter;
1795 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
1796 max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1796, __FUNCTION__))))
;
1797 else
1798 max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
1799
1800 if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf)
1801 return true;
1802
1803 return false;
1804}
1805
1806/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
1807 is worthwhile to vectorize. Return 1 if definitely yes, 0 if
1808 definitely no, or -1 if it's worth retrying. */
1809
1810static int
1811vect_analyze_loop_costing (loop_vec_info loop_vinfo)
1812{
1813 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
1814 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1815
1816 /* Only loops that can handle partially-populated vectors can have iteration
1817 counts less than the vectorization factor. */
1818 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
1819 {
1820 if (vect_known_niters_smaller_than_vf (loop_vinfo))
1821 {
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "not vectorized: iteration count smaller than "
1825 "vectorization factor.\n");
1826 return 0;
1827 }
1828 }
1829
1830 /* If using the "very cheap" model. reject cases in which we'd keep
1831 a copy of the scalar code (even if we might be able to vectorize it). */
1832 if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
1833 && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1834 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
1835 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter))
1836 {
1837 if (dump_enabled_p ())
1838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1839 "some scalar iterations would need to be peeled\n");
1840 return 0;
1841 }
1842
1843 int min_profitable_iters, min_profitable_estimate;
1844 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1845 &min_profitable_estimate);
1846
1847 if (min_profitable_iters < 0)
1848 {
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1851 "not vectorized: vectorization not profitable.\n");
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854 "not vectorized: vector version will never be "
1855 "profitable.\n");
1856 return -1;
1857 }
1858
1859 int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound
1860 * assumed_vf);
1861
1862 /* Use the cost model only if it is more conservative than user specified
1863 threshold. */
1864 unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
1865 min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound
) : (min_profitable_iters))
;
1866
1867 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th;
1868
1869 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
1870 && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 1870, __FUNCTION__))))
< th)
1871 {
1872 if (dump_enabled_p ())
1873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1874 "not vectorized: vectorization not profitable.\n");
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_NOTE, vect_location,
1877 "not vectorized: iteration count smaller than user "
1878 "specified loop bound parameter or minimum profitable "
1879 "iterations (whichever is more conservative).\n");
1880 return 0;
1881 }
1882
1883 /* The static profitablity threshold min_profitable_estimate includes
1884 the cost of having to check at runtime whether the scalar loop
1885 should be used instead. If it turns out that we don't need or want
1886 such a check, the threshold we should use for the static estimate
1887 is simply the point at which the vector loop becomes more profitable
1888 than the scalar loop. */
1889 if (min_profitable_estimate > min_profitable_iters
1890 && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
1891 && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
1892 && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment
1893 && !vect_apply_runtime_profitability_check_p (loop_vinfo))
1894 {
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
1897 " choice between the scalar and vector loops\n");
1898 min_profitable_estimate = min_profitable_iters;
1899 }
1900
1901 /* If the vector loop needs multiple iterations to be beneficial then
1902 things are probably too close to call, and the conservative thing
1903 would be to stick with the scalar code. */
1904 if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
1905 && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
1906 {
1907 if (dump_enabled_p ())
1908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1909 "one iteration of the vector loop would be"
1910 " more expensive than the equivalent number of"
1911 " iterations of the scalar loop\n");
1912 return 0;
1913 }
1914
1915 HOST_WIDE_INTlong estimated_niter;
1916
1917 /* If we are vectorizing an epilogue then we know the maximum number of
1918 scalar iterations it will cover is at least one lower than the
1919 vectorization factor of the main loop. */
1920 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
1921 estimated_niter
1922 = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1;
1923 else
1924 {
1925 estimated_niter = estimated_stmt_executions_int (loop);
1926 if (estimated_niter == -1)
1927 estimated_niter = likely_max_stmt_executions_int (loop);
1928 }
1929 if (estimated_niter != -1
1930 && ((unsigned HOST_WIDE_INTlong) estimated_niter
1931 < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned
) min_profitable_estimate))
))
1932 {
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "not vectorized: estimated iteration count too "
1936 "small.\n");
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_NOTE, vect_location,
1939 "not vectorized: estimated iteration count smaller "
1940 "than specified loop bound parameter or minimum "
1941 "profitable iterations (whichever is more "
1942 "conservative).\n");
1943 return -1;
1944 }
1945
1946 return 1;
1947}
1948
1949static opt_result
1950vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
1951 vec<data_reference_p> *datarefs,
1952 unsigned int *n_stmts)
1953{
1954 *n_stmts = 0;
1955 for (unsigned i = 0; i < loop->num_nodes; i++)
1956 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1957 !gsi_end_p (gsi); gsi_next (&gsi))
1958 {
1959 gimple *stmt = gsi_stmt (gsi);
1960 if (is_gimple_debug (stmt))
1961 continue;
1962 ++(*n_stmts);
1963 opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
1964 NULLnullptr, 0);
1965 if (!res)
1966 {
1967 if (is_gimple_call (stmt) && loop->safelen)
1968 {
1969 tree fndecl = gimple_call_fndecl (stmt), op;
1970 if (fndecl != NULL_TREE(tree) nullptr)
1971 {
1972 cgraph_node *node = cgraph_node::get (fndecl);
1973 if (node != NULLnullptr && node->simd_clones != NULLnullptr)
1974 {
1975 unsigned int j, n = gimple_call_num_args (stmt);
1976 for (j = 0; j < n; j++)
1977 {
1978 op = gimple_call_arg (stmt, j);
1979 if (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_declaration)
1980 || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_reference)
1981 && get_base_address (op)))
1982 break;
1983 }
1984 op = gimple_call_lhs (stmt);
1985 /* Ignore #pragma omp declare simd functions
1986 if they don't have data references in the
1987 call stmt itself. */
1988 if (j == n
1989 && !(op
1990 && (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_declaration)
1991 || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_reference)
1992 && get_base_address (op)))))
1993 continue;
1994 }
1995 }
1996 }
1997 return res;
1998 }
1999 /* If dependence analysis will give up due to the limit on the
2000 number of datarefs stop here and fail fatally. */
2001 if (datarefs->length ()
2002 > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps)
2003 return opt_result::failure_at (stmt, "exceeded param "
2004 "loop-max-datarefs-for-datadeps\n");
2005 }
2006 return opt_result::success ();
2007}
2008
2009/* Look for SLP-only access groups and turn each individual access into its own
2010 group. */
2011static void
2012vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
2013{
2014 unsigned int i;
2015 struct data_reference *dr;
2016
2017 DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location
)
;
2018
2019 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs;
2020 FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i))
2021 {
2022 gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2022, __FUNCTION__), 0 : 0))
;
2023 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt);
2024
2025 /* Check if the load is a part of an interleaving chain. */
2026 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)->
dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2026, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
))
)
2027 {
2028 stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2028, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
;
2029 unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2029, __FUNCTION__), 0 : 0)), (first_element)->size)
;
2030
2031 /* Check if SLP-only groups. */
2032 if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type
2033 && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p)
2034 {
2035 /* Dissolve the group. */
2036 STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false;
2037
2038 stmt_vec_info vinfo = first_element;
2039 while (vinfo)
2040 {
2041 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2041, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2042 DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2042, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
= vinfo;
2043 DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2043, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
= NULLnullptr;
2044 DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2044, __FUNCTION__), 0 : 0)), (vinfo)->size)
= 1;
2045 if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p)
2046 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2046, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= 0;
2047 else
2048 DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2048, __FUNCTION__), 0 : 0)), (vinfo)->gap)
= group_size - 1;
2049 vinfo = next;
2050 }
2051 }
2052 }
2053 }
2054}
2055
2056/* Determine if operating on full vectors for LOOP_VINFO might leave
2057 some scalar iterations still to do. If so, decide how we should
2058 handle those scalar iterations. The possibilities are:
2059
2060 (1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
2061 In this case:
2062
2063 LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
2064 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2065 LOOP_VINFO_PEELING_FOR_NITER == false
2066
2067 (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
2068 to handle the remaining scalar iterations. In this case:
2069
2070 LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
2071 LOOP_VINFO_PEELING_FOR_NITER == true
2072
2073 There are two choices:
2074
2075 (2a) Consider vectorizing the epilogue loop at the same VF as the
2076 main loop, but using partial vectors instead of full vectors.
2077 In this case:
2078
2079 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
2080
2081 (2b) Consider vectorizing the epilogue loop at lower VFs only.
2082 In this case:
2083
2084 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2085
2086 When FOR_EPILOGUE_P is true, make this determination based on the
2087 assumption that LOOP_VINFO is an epilogue loop, otherwise make it
2088 based on the assumption that LOOP_VINFO is the main loop. The caller
2089 has made sure that the number of iterations is set appropriately for
2090 this value of FOR_EPILOGUE_P. */
2091
2092opt_result
2093vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
2094 bool for_epilogue_p)
2095{
2096 /* Determine whether there would be any scalar iterations left over. */
2097 bool need_peeling_or_partial_vectors_p
2098 = vect_need_peeling_or_partial_vectors_p (loop_vinfo);
2099
2100 /* Decide whether to vectorize the loop with partial vectors. */
2101 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false;
2102 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false;
2103 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2104 && need_peeling_or_partial_vectors_p)
2105 {
2106 /* For partial-vector-usage=1, try to push the handling of partial
2107 vectors to the epilogue, with the main loop continuing to operate
2108 on full vectors.
2109
2110 ??? We could then end up failing to use partial vectors if we
2111 decide to peel iterations into a prologue, and if the main loop
2112 then ends up processing fewer than VF iterations. */
2113 if (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1
2114 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2115 && !vect_known_niters_smaller_than_vf (loop_vinfo))
2116 LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true;
2117 else
2118 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true;
2119 }
2120
2121 if (dump_enabled_p ())
2122 {
2123 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2124 dump_printf_loc (MSG_NOTE, vect_location,
2125 "operating on partial vectors%s.\n",
2126 for_epilogue_p ? " for epilogue loop" : "");
2127 else
2128 dump_printf_loc (MSG_NOTE, vect_location,
2129 "operating only on full vectors%s.\n",
2130 for_epilogue_p ? " for epilogue loop" : "");
2131 }
2132
2133 if (for_epilogue_p)
2134 {
2135 loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2136 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2136, __FUNCTION__), 0 : 0))
;
2137 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2138 gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2139, __FUNCTION__), 0 : 0))
2139 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor
, (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2139, __FUNCTION__), 0 : 0))
;
2140 }
2141
2142 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
2143 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2144 {
2145 /* Check that the loop processes at least one full vector. */
2146 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2147 tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters;
2148 if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters))))
2149 return opt_result::failure_at (vect_location,
2150 "loop does not have enough iterations"
2151 " to support vectorization.\n");
2152
2153 /* If we need to peel an extra epilogue iteration to handle data
2154 accesses with gaps, check that there are enough scalar iterations
2155 available.
2156
2157 The check above is redundant with this one when peeling for gaps,
2158 but the distinction is useful for diagnostics. */
2159 tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1;
2160 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2161 && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1))))
2162 return opt_result::failure_at (vect_location,
2163 "loop does not have enough iterations"
2164 " to support peeling for gaps.\n");
2165 }
2166
2167 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter
2168 = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
2169 && need_peeling_or_partial_vectors_p);
2170
2171 return opt_result::success ();
2172}
2173
2174/* Function vect_analyze_loop_2.
2175
2176 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2177 for it. The different analyses will record information in the
2178 loop_vec_info struct. */
2179static opt_result
2180vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
2181{
2182 opt_result ok = opt_result::success ();
2183 int res;
2184 unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647;
2185 poly_uint64 min_vf = 2;
2186 loop_vec_info orig_loop_vinfo = NULLnullptr;
2187
2188 /* If we are dealing with an epilogue then orig_loop_vinfo points to the
2189 loop_vec_info of the first vectorized loop. */
2190 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2191 orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info;
2192 else
2193 orig_loop_vinfo = loop_vinfo;
2194 gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2194, __FUNCTION__), 0 : 0))
;
2195
2196 /* The first group of checks is independent of the vector size. */
2197 fatal = true;
2198
2199 if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond
2200 && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond))
2201 return opt_result::failure_at (vect_location,
2202 "not vectorized: simd if(0)\n");
2203
2204 /* Find all data references in the loop (which correspond to vdefs/vuses)
2205 and analyze their evolution in the loop. */
2206
2207 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
2208
2209 /* Gather the data references and count stmts in the loop. */
2210 if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ())
2211 {
2212 opt_result res
2213 = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs,
2214 &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs,
2215 n_stmts);
2216 if (!res)
2217 {
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2220 "not vectorized: loop contains function "
2221 "calls or data references that cannot "
2222 "be analyzed\n");
2223 return res;
2224 }
2225 loop_vinfo->shared->save_datarefs ();
2226 }
2227 else
2228 loop_vinfo->shared->check_datarefs ();
2229
2230 /* Analyze the data references and also adjust the minimal
2231 vectorization factor according to the loads and stores. */
2232
2233 ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
2234 if (!ok)
2235 {
2236 if (dump_enabled_p ())
2237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2238 "bad data references.\n");
2239 return ok;
2240 }
2241
2242 /* Classify all cross-iteration scalar data-flow cycles.
2243 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2244 vect_analyze_scalar_cycles (loop_vinfo);
2245
2246 vect_pattern_recog (loop_vinfo);
2247
2248 vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
2249
2250 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2251 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2252
2253 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr);
2254 if (!ok)
2255 {
2256 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "bad data access.\n");
2259 return ok;
2260 }
2261
2262 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2263
2264 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
2265 if (!ok)
2266 {
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 "unexpected pattern.\n");
2270 return ok;
2271 }
2272
2273 /* While the rest of the analysis below depends on it in some way. */
2274 fatal = false;
2275
2276 /* Analyze data dependences between the data-refs in the loop
2277 and adjust the maximum vectorization factor according to
2278 the dependences.
2279 FORNOW: fail at the first data dependence that we encounter. */
2280
2281 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
2282 if (!ok)
2283 {
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "bad data dependence.\n");
2287 return ok;
2288 }
2289 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2290 && maybe_lt (max_vf, min_vf))
2291 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2292 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf;
2293
2294 ok = vect_determine_vectorization_factor (loop_vinfo);
2295 if (!ok)
2296 {
2297 if (dump_enabled_p ())
2298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2299 "can't determine vectorization factor.\n");
2300 return ok;
2301 }
2302 if (max_vf != MAX_VECTORIZATION_FACTOR2147483647
2303 && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor))
2304 return opt_result::failure_at (vect_location, "bad data dependence.\n");
2305
2306 /* Compute the scalar iteration cost. */
2307 vect_compute_single_scalar_iteration_cost (loop_vinfo);
2308
2309 poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2310
2311 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
2312 ok = vect_analyze_slp (loop_vinfo, *n_stmts);
2313 if (!ok)
2314 return ok;
2315
2316 /* If there are any SLP instances mark them as pure_slp. */
2317 bool slp = vect_make_slp_decision (loop_vinfo);
2318 if (slp)
2319 {
2320 /* Find stmts that need to be both vectorized and SLPed. */
2321 vect_detect_hybrid_slp (loop_vinfo);
2322
2323 /* Update the vectorization factor based on the SLP decision. */
2324 vect_update_vf_for_slp (loop_vinfo);
2325
2326 /* Optimize the SLP graph with the vectorization factor fixed. */
2327 vect_optimize_slp (loop_vinfo);
2328
2329 /* Gather the loads reachable from the SLP graph entries. */
2330 vect_gather_slp_loads (loop_vinfo);
2331 }
2332
2333 bool saved_can_use_partial_vectors_p
2334 = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p;
2335
2336 /* We don't expect to have to roll back to anything other than an empty
2337 set of rgroups. */
2338 gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort (
"/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2338, __FUNCTION__), 0 : 0))
;
2339
2340 /* This is the point where we can re-start analysis with SLP forced off. */
2341start_over:
2342
2343 /* Now the vectorization factor is final. */
2344 poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2345 gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2345, __FUNCTION__), 0 : 0))
;
2346
2347 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& dump_enabled_p ())
2348 {
2349 dump_printf_loc (MSG_NOTE, vect_location,
2350 "vectorization_factor = ");
2351 dump_dec (MSG_NOTE, vectorization_factor);
2352 dump_printf (MSG_NOTE, ", niters = %wd\n",
2353 LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2353, __FUNCTION__))))
);
2354 }
2355
2356 /* Analyze the alignment of the data-refs in the loop.
2357 Fail if a data reference is found that cannot be vectorized. */
2358
2359 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2360 if (!ok)
2361 {
2362 if (dump_enabled_p ())
2363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2364 "bad data alignment.\n");
2365 return ok;
2366 }
2367
2368 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2369 It is important to call pruning after vect_analyze_data_ref_accesses,
2370 since we use grouping information gathered by interleaving analysis. */
2371 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2372 if (!ok)
2373 return ok;
2374
2375 /* Do not invoke vect_enhance_data_refs_alignment for epilogue
2376 vectorization, since we do not want to add extra peeling or
2377 add versioning for alignment. */
2378 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2379 /* This pass will decide on using loop versioning and/or loop peeling in
2380 order to enhance the alignment of data references in the loop. */
2381 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2382 if (!ok)
2383 return ok;
2384
2385 if (slp)
2386 {
2387 /* Analyze operations in the SLP instances. Note this may
2388 remove unsupported SLP instances which makes the above
2389 SLP kind detection invalid. */
2390 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length ();
2391 vect_slp_analyze_operations (loop_vinfo);
2392 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size)
2393 {
2394 ok = opt_result::failure_at (vect_location,
2395 "unsupported SLP instances\n");
2396 goto again;
2397 }
2398
2399 /* Check whether any load in ALL SLP instances is possibly permuted. */
2400 slp_tree load_node, slp_root;
2401 unsigned i, x;
2402 slp_instance instance;
2403 bool can_use_lanes = true;
2404 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), &
(instance)); ++(x))
2405 {
2406 slp_root = SLP_INSTANCE_TREE (instance)(instance)->root;
2407 int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes;
2408 tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype;
2409 bool loads_permuted = false;
2410 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2411 {
2412 if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ())
2413 continue;
2414 unsigned j;
2415 stmt_vec_info load_info;
2416 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info
)); ++(j))
2417 if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j)
2418 {
2419 loads_permuted = true;
2420 break;
2421 }
2422 }
2423
2424 /* If the loads and stores can be handled with load/store-lane
2425 instructions record it and move on to the next instance. */
2426 if (loads_permuted
2427 && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store
2428 && vect_store_lanes_supported (vectype, group_size, false))
2429 {
2430 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node
)); ++(i))
2431 {
2432 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
2433 (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])->
first_element)
;
2434 /* Use SLP for strided accesses (or if we can't
2435 load-lanes). */
2436 if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p
2437 || ! vect_load_lanes_supported
2438 (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype,
2439 DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2439, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size)
, false))
2440 break;
2441 }
2442
2443 can_use_lanes
2444 = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length ();
2445
2446 if (can_use_lanes && dump_enabled_p ())
2447 dump_printf_loc (MSG_NOTE, vect_location,
2448 "SLP instance %p can use load/store-lanes\n",
2449 instance);
2450 }
2451 else
2452 {
2453 can_use_lanes = false;
2454 break;
2455 }
2456 }
2457
2458 /* If all SLP instances can use load/store-lanes abort SLP and try again
2459 with SLP disabled. */
2460 if (can_use_lanes)
2461 {
2462 ok = opt_result::failure_at (vect_location,
2463 "Built SLP cancelled: can use "
2464 "load/store-lanes\n");
2465 if (dump_enabled_p ())
2466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2467 "Built SLP cancelled: all SLP instances support "
2468 "load/store-lanes\n");
2469 goto again;
2470 }
2471 }
2472
2473 /* Dissolve SLP-only groups. */
2474 vect_dissolve_slp_only_groups (loop_vinfo);
2475
2476 /* Scan all the remaining operations in the loop that are not subject
2477 to SLP and make sure they are vectorizable. */
2478 ok = vect_analyze_loop_operations (loop_vinfo);
2479 if (!ok)
2480 {
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "bad operation or unsupported loop bound.\n");
2484 return ok;
2485 }
2486
2487 /* For now, we don't expect to mix both masking and length approaches for one
2488 loop, disable it if both are recorded. */
2489 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2490 && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()
2491 && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ())
2492 {
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495 "can't vectorize a loop with partial vectors"
2496 " because we don't expect to mix different"
2497 " approaches with partial vectors for the"
2498 " same loop.\n");
2499 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2500 }
2501
2502 /* If we still have the option of using partial vectors,
2503 check whether we can generate the necessary loop controls. */
2504 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2505 && !vect_verify_full_masking (loop_vinfo)
2506 && !vect_verify_loop_lens (loop_vinfo))
2507 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false;
2508
2509 /* If we're vectorizing an epilogue loop, the vectorized loop either needs
2510 to be able to handle fewer than VF scalars, or needs to have a lower VF
2511 than the main loop. */
2512 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
2513 && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2514 && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
2515 LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo
)->vectorization_factor)
)
2516 return opt_result::failure_at (vect_location,
2517 "Vectorization factor too high for"
2518 " epilogue loop.\n");
2519
2520 /* Decide whether this loop_vinfo should use partial vectors or peeling,
2521 assuming that the loop will be used as a main loop. We will redo
2522 this analysis later if we instead decide to use the loop as an
2523 epilogue loop. */
2524 ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
2525 if (!ok)
2526 return ok;
2527
2528 /* Check the costings of the loop make vectorizing worthwhile. */
2529 res = vect_analyze_loop_costing (loop_vinfo);
2530 if (res < 0)
2531 {
2532 ok = opt_result::failure_at (vect_location,
2533 "Loop costings may not be worthwhile.\n");
2534 goto again;
2535 }
2536 if (!res)
2537 return opt_result::failure_at (vect_location,
2538 "Loop costings not worthwhile.\n");
2539
2540 /* If an epilogue loop is required make sure we can create one. */
2541 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps
2542 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)
2543 {
2544 if (dump_enabled_p ())
2545 dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2546 if (!vect_can_advance_ivs_p (loop_vinfo)
2547 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop,
2548 single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop
2549 (loop_vinfo)(loop_vinfo)->loop)))
2550 {
2551 ok = opt_result::failure_at (vect_location,
2552 "not vectorized: can't create required "
2553 "epilog loop\n");
2554 goto again;
2555 }
2556 }
2557
2558 /* During peeling, we need to check if number of loop iterations is
2559 enough for both peeled prolog loop and vector loop. This check
2560 can be merged along with threshold check of loop versioning, so
2561 increase threshold for this case if necessary.
2562
2563 If we are analyzing an epilogue we still want to check what its
2564 versioning threshold would be. If we decide to vectorize the epilogues we
2565 will want to use the lowest versioning threshold of all epilogues and main
2566 loop. This will enable us to enter a vectorized epilogue even when
2567 versioning the loop. We can't simply check whether the epilogue requires
2568 versioning though since we may have skipped some versioning checks when
2569 analyzing the epilogue. For instance, checks for alias versioning will be
2570 skipped when dealing with epilogues as we assume we already checked them
2571 for the main loop. So instead we always check the 'orig_loop_vinfo'. */
2572 if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) ||
((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || (
orig_loop_vinfo)->check_unequal_addrs.length () > 0 || (
orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo
)->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond
))
)
2573 {
2574 poly_uint64 niters_th = 0;
2575 unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th;
2576
2577 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2578 {
2579 /* Niters for peeled prolog loop. */
2580 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
2581 {
2582 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr;
2583 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype;
2584 niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
2585 }
2586 else
2587 niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
2588 }
2589
2590 /* Niters for at least one iteration of vectorized loop. */
2591 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
2592 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor;
2593 /* One additional iteration because of peeling for gap. */
2594 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps)
2595 niters_th += 1;
2596
2597 /* Use the same condition as vect_transform_loop to decide when to use
2598 the cost to determine a versioning threshold. */
2599 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
2600 && ordered_p (th, niters_th))
2601 niters_th = ordered_max (poly_uint64 (th), niters_th);
2602
2603 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th;
2604 }
2605
2606 gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2607, __FUNCTION__), 0 : 0))
2607 LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)->
vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2607, __FUNCTION__), 0 : 0))
;
2608
2609 /* Ok to vectorize! */
2610 return opt_result::success ();
2611
2612again:
2613 /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
2614 gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2614, __FUNCTION__), 0 : 0))
;
2615
2616 /* Try again with SLP forced off but if we didn't do any SLP there is
2617 no point in re-trying. */
2618 if (!slp)
2619 return ok;
2620
2621 /* If there are reduction chains re-trying will fail anyway. */
2622 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ())
2623 return ok;
2624
2625 /* Likewise if the grouped loads or stores in the SLP cannot be handled
2626 via interleaving or lane instructions. */
2627 slp_instance instance;
2628 slp_tree node;
2629 unsigned i, j;
2630 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), &
(instance)); ++(i))
2631 {
2632 stmt_vec_info vinfo;
2633 vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0];
2634 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux
.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2634, __FUNCTION__), 0 : 0)), (vinfo)->first_element))
)
2635 continue;
2636 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2636, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2637 unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2637, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2638 tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2639 if (! vect_store_lanes_supported (vectype, size, false)
2640 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2641 && ! vect_grouped_store_supported (vectype, size))
2642 return opt_result::failure_at (vinfo->stmt,
2643 "unsupported grouped store\n");
2644 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node))
; ++(j))
2645 {
2646 vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0];
2647 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2647, __FUNCTION__), 0 : 0)), (vinfo)->first_element)
;
2648 bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2648, __FUNCTION__), 0 : 0)), (vinfo)->next_element)
;
2649 size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2649, __FUNCTION__), 0 : 0)), (vinfo)->size)
;
2650 vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype;
2651 if (! vect_load_lanes_supported (vectype, size, false)
2652 && ! vect_grouped_load_supported (vectype, single_element_p,
2653 size))
2654 return opt_result::failure_at (vinfo->stmt,
2655 "unsupported grouped load\n");
2656 }
2657 }
2658
2659 if (dump_enabled_p ())
2660 dump_printf_loc (MSG_NOTE, vect_location,
2661 "re-trying with SLP disabled\n");
2662
2663 /* Roll back state appropriately. No SLP this time. */
2664 slp = false;
2665 /* Restore vectorization factor as it were without SLP. */
2666 LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor;
2667 /* Free the SLP instances. */
2668 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), &
(instance)); ++(j))
2669 vect_free_slp_instance (instance);
2670 LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release ();
2671 /* Reset SLP type to loop_vect on all stmts. */
2672 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i)
2673 {
2674 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i];
2675 for (gimple_stmt_iterator si = gsi_start_phis (bb);
2676 !gsi_end_p (si); gsi_next (&si))
2677 {
2678 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2679 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2680 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def
2681 || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
2682 {
2683 /* vectorizable_reduction adjusts reduction stmt def-types,
2684 restore them to that of the PHI. */
2685 STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type
2686 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2687 STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2688 (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type
2689 = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type;
2690 }
2691 }
2692 for (gimple_stmt_iterator si = gsi_start_bb (bb);
2693 !gsi_end_p (si); gsi_next (&si))
2694 {
2695 if (is_gimple_debug (gsi_stmt (si)))
2696 continue;
2697 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2698 STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect;
2699 if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p)
2700 {
2701 stmt_vec_info pattern_stmt_info
2702 = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt;
2703 if (STMT_VINFO_SLP_VECT_ONLY (pattern_stmt_info)(pattern_stmt_info)->slp_vect_only_p)
2704 STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false;
2705
2706 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq;
2707 STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect;
2708 for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq));
2709 !gsi_end_p (pi); gsi_next (&pi))
2710 STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type
2711 = loop_vect;
2712 }
2713 }
2714 }
2715 /* Free optimized alias test DDRS. */
2716 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0);
2717 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release ();
2718 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release ();
2719 /* Reset target cost data. */
2720 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data);
2721 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data
2722 = init_cost (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop);
2723 /* Reset accumulated rgroup information. */
2724 release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks);
2725 release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens);
2726 /* Reset assorted flags. */
2727 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false;
2728 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false;
2729 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0;
2730 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0;
2731 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p
2732 = saved_can_use_partial_vectors_p;
2733
2734 goto start_over;
2735}
2736
2737/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
2738 to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
2739 OLD_LOOP_VINFO is better unless something specifically indicates
2740 otherwise.
2741
2742 Note that this deliberately isn't a partial order. */
2743
2744static bool
2745vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
2746 loop_vec_info old_loop_vinfo)
2747{
2748 struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop;
2749 gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2749, __FUNCTION__), 0 : 0))
;
2750
2751 poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor;
2752 poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor;
2753
2754 /* Always prefer a VF of loop->simdlen over any other VF. */
2755 if (loop->simdlen)
2756 {
2757 bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen));
2758 bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen));
2759 if (new_simdlen_p != old_simdlen_p)
2760 return new_simdlen_p;
2761 }
2762
2763 /* Limit the VFs to what is likely to be the maximum number of iterations,
2764 to handle cases in which at least one loop_vinfo is fully-masked. */
2765 HOST_WIDE_INTlong estimated_max_niter = likely_max_stmt_executions_int (loop);
2766 if (estimated_max_niter != -1)
2767 {
2768 if (known_le (estimated_max_niter, new_vf)(!maybe_lt (new_vf, estimated_max_niter)))
2769 new_vf = estimated_max_niter;
2770 if (known_le (estimated_max_niter, old_vf)(!maybe_lt (old_vf, estimated_max_niter)))
2771 old_vf = estimated_max_niter;
2772 }
2773
2774 /* Check whether the (fractional) cost per scalar iteration is lower
2775 or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf. */
2776 poly_int64 rel_new = new_loop_vinfo->vec_inside_cost * old_vf;
2777 poly_int64 rel_old = old_loop_vinfo->vec_inside_cost * new_vf;
2778
2779 HOST_WIDE_INTlong est_rel_new_min
2780 = estimated_poly_value (rel_new, POLY_VALUE_MIN);
2781 HOST_WIDE_INTlong est_rel_new_max
2782 = estimated_poly_value (rel_new, POLY_VALUE_MAX);
2783
2784 HOST_WIDE_INTlong est_rel_old_min
2785 = estimated_poly_value (rel_old, POLY_VALUE_MIN);
2786 HOST_WIDE_INTlong est_rel_old_max
2787 = estimated_poly_value (rel_old, POLY_VALUE_MAX);
2788
2789 /* Check first if we can make out an unambigous total order from the minimum
2790 and maximum estimates. */
2791 if (est_rel_new_min < est_rel_old_min
2792 && est_rel_new_max < est_rel_old_max)
2793 return true;
2794 else if (est_rel_old_min < est_rel_new_min
2795 && est_rel_old_max < est_rel_new_max)
2796 return false;
2797 /* When old_loop_vinfo uses a variable vectorization factor,
2798 we know that it has a lower cost for at least one runtime VF.
2799 However, we don't know how likely that VF is.
2800
2801 One option would be to compare the costs for the estimated VFs.
2802 The problem is that that can put too much pressure on the cost
2803 model. E.g. if the estimated VF is also the lowest possible VF,
2804 and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
2805 for the estimated VF, we'd then choose new_loop_vinfo even
2806 though (a) new_loop_vinfo might not actually be better than
2807 old_loop_vinfo for that VF and (b) it would be significantly
2808 worse at larger VFs.
2809
2810 Here we go for a hacky compromise: pick new_loop_vinfo if it is
2811 no more expensive than old_loop_vinfo even after doubling the
2812 estimated old_loop_vinfo VF. For all but trivial loops, this
2813 ensures that we only pick new_loop_vinfo if it is significantly
2814 better than old_loop_vinfo at the estimated VF. */
2815
2816 if (est_rel_old_min != est_rel_new_min
2817 || est_rel_old_max != est_rel_new_max)
2818 {
2819 HOST_WIDE_INTlong est_rel_new_likely
2820 = estimated_poly_value (rel_new, POLY_VALUE_LIKELY);
2821 HOST_WIDE_INTlong est_rel_old_likely
2822 = estimated_poly_value (rel_old, POLY_VALUE_LIKELY);
2823
2824 return est_rel_new_likely * 2 <= est_rel_old_likely;
2825 }
2826
2827 /* If there's nothing to choose between the loop bodies, see whether
2828 there's a difference in the prologue and epilogue costs. */
2829 if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost)
2830 return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost;
2831
2832 return false;
2833}
2834
2835/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return
2836 true if we should. */
2837
2838static bool
2839vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
2840 loop_vec_info old_loop_vinfo)
2841{
2842 if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
2843 return false;
2844
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE, vect_location,
2847 "***** Preferring vector mode %s to vector mode %s\n",
2848 GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode],
2849 GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]);
2850 return true;
2851}
2852
2853/* If LOOP_VINFO is already a main loop, return it unmodified. Otherwise
2854 try to reanalyze it as a main loop. Return the loop_vinfo on success
2855 and null on failure. */
2856
2857static loop_vec_info
2858vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts)
2859{
2860 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr))
2861 return loop_vinfo;
2862
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE, vect_location,
2865 "***** Reanalyzing as a main loop with vector mode %s\n",
2866 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
2867
2868 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
2869 vec_info_shared *shared = loop_vinfo->shared;
2870 opt_loop_vec_info main_loop_vinfo = vect_analyze_loop_form (loop, shared);
2871 gcc_assert (main_loop_vinfo)((void)(!(main_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2871, __FUNCTION__), 0 : 0))
;
2872
2873 main_loop_vinfo->vector_mode = loop_vinfo->vector_mode;
2874
2875 bool fatal = false;
2876 bool res = vect_analyze_loop_2 (main_loop_vinfo, fatal, n_stmts);
2877 loop->aux = NULLnullptr;
2878 if (!res)
2879 {
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_NOTE, vect_location,
2882 "***** Failed to analyze main loop with vector"
2883 " mode %s\n",
2884 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
2885 delete main_loop_vinfo;
2886 return NULLnullptr;
2887 }
2888 LOOP_VINFO_VECTORIZABLE_P (main_loop_vinfo)(main_loop_vinfo)->vectorizable = 1;
2889 return main_loop_vinfo;
2890}
2891
2892/* Function vect_analyze_loop.
2893
2894 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2895 for it. The different analyses will record information in the
2896 loop_vec_info struct. */
2897opt_loop_vec_info
2898vect_analyze_loop (class loop *loop, vec_info_shared *shared)
2899{
2900 auto_vector_modes vector_modes;
2901
2902 /* Autodetect first vector size we try. */
2903 unsigned int autovec_flags
2904 = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
2905 loop->simdlen != 0);
2906 unsigned int mode_i = 0;
2907
2908 DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location);
2909
2910 if (loop_outer (loop)
2911 && loop_vec_info_for_loop (loop_outer (loop))
2912 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable)
2913 return opt_loop_vec_info::failure_at (vect_location,
2914 "outer-loop already vectorized.\n");
2915
2916 if (!find_loop_nest (loop, &shared->loop_nest))
2917 return opt_loop_vec_info::failure_at
2918 (vect_location,
2919 "not vectorized: loop nest containing two or more consecutive inner"
2920 " loops cannot be vectorized\n");
2921
2922 unsigned n_stmts = 0;
2923 machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode);
2924 opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
2925 machine_mode next_vector_mode = VOIDmode((void) 0, E_VOIDmode);
2926 poly_uint64 lowest_th = 0;
2927 unsigned vectorized_loops = 0;
2928 bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
2929 && !unlimited_cost_model (loop));
2930
2931 bool vect_epilogues = false;
2932 opt_result res = opt_result::success ();
2933 unsigned HOST_WIDE_INTlong simdlen = loop->simdlen;
2934 while (1)
2935 {
2936 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
2937 opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
2938 if (!loop_vinfo)
2939 {
2940 if (dump_enabled_p ())
2941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2942 "bad loop form.\n");
2943 gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 2943, __FUNCTION__), 0 : 0))
;
2944 return loop_vinfo;
2945 }
2946 loop_vinfo->vector_mode = next_vector_mode;
2947
2948 bool fatal = false;
2949
2950 /* When pick_lowest_cost_p is true, we should in principle iterate
2951 over all the loop_vec_infos that LOOP_VINFO could replace and
2952 try to vectorize LOOP_VINFO under the same conditions.
2953 E.g. when trying to replace an epilogue loop, we should vectorize
2954 LOOP_VINFO as an epilogue loop with the same VF limit. When trying
2955 to replace the main loop, we should vectorize LOOP_VINFO as a main
2956 loop too.
2957
2958 However, autovectorize_vector_modes is usually sorted as follows:
2959
2960 - Modes that naturally produce lower VFs usually follow modes that
2961 naturally produce higher VFs.
2962
2963 - When modes naturally produce the same VF, maskable modes
2964 usually follow unmaskable ones, so that the maskable mode
2965 can be used to vectorize the epilogue of the unmaskable mode.
2966
2967 This order is preferred because it leads to the maximum
2968 epilogue vectorization opportunities. Targets should only use
2969 a different order if they want to make wide modes available while
2970 disparaging them relative to earlier, smaller modes. The assumption
2971 in that case is that the wider modes are more expensive in some
2972 way that isn't reflected directly in the costs.
2973
2974 There should therefore be few interesting cases in which
2975 LOOP_VINFO fails when treated as an epilogue loop, succeeds when
2976 treated as a standalone loop, and ends up being genuinely cheaper
2977 than FIRST_LOOP_VINFO. */
2978 if (vect_epilogues)
2979 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = first_loop_vinfo;
2980
2981 res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
2982 if (mode_i == 0)
2983 autodetected_vector_mode = loop_vinfo->vector_mode;
2984 if (dump_enabled_p ())
2985 {
2986 if (res)
2987 dump_printf_loc (MSG_NOTE, vect_location,
2988 "***** Analysis succeeded with vector mode %s\n",
2989 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
2990 else
2991 dump_printf_loc (MSG_NOTE, vect_location,
2992 "***** Analysis failed with vector mode %s\n",
2993 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
2994 }
2995
2996 loop->aux = NULLnullptr;
2997
2998 if (!fatal)
2999 while (mode_i < vector_modes.length ()
3000 && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i]))
3001 {
3002 if (dump_enabled_p ())
3003 dump_printf_loc (MSG_NOTE, vect_location,
3004 "***** The result for vector mode %s would"
3005 " be the same\n",
3006 GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]);
3007 mode_i += 1;
3008 }
3009
3010 if (res)
3011 {
3012 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1;
3013 vectorized_loops++;
3014
3015 /* Once we hit the desired simdlen for the first time,
3016 discard any previous attempts. */
3017 if (simdlen
3018 && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen)))
3019 {
3020 delete first_loop_vinfo;
3021 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3022 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = NULLnullptr;
3023 simdlen = 0;
3024 }
3025 else if (pick_lowest_cost_p && first_loop_vinfo)
3026 {
3027 /* Keep trying to roll back vectorization attempts while the
3028 loop_vec_infos they produced were worse than this one. */
3029 vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
3030 while (!vinfos.is_empty ()
3031 && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
3032 {
3033 gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3033, __FUNCTION__), 0 : 0))
;
3034 delete vinfos.pop ();
3035 }
3036 if (vinfos.is_empty ()
3037 && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
3038 {
3039 loop_vec_info main_loop_vinfo
3040 = vect_reanalyze_as_main_loop (loop_vinfo, &n_stmts);
3041 if (main_loop_vinfo == loop_vinfo)
3042 {
3043 delete first_loop_vinfo;
3044 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3045 }
3046 else if (main_loop_vinfo
3047 && vect_joust_loop_vinfos (main_loop_vinfo,
3048 first_loop_vinfo))
3049 {
3050 delete first_loop_vinfo;
3051 first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3052 delete loop_vinfo;
3053 loop_vinfo
3054 = opt_loop_vec_info::success (main_loop_vinfo);
3055 }
3056 else
3057 delete main_loop_vinfo;
3058 }
3059 }
3060
3061 if (first_loop_vinfo == NULLnullptr)
3062 {
3063 first_loop_vinfo = loop_vinfo;
3064 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold;
3065 }
3066 else if (vect_epilogues
3067 /* For now only allow one epilogue loop. */
3068 && first_loop_vinfo->epilogue_vinfos.is_empty ())
3069 {
3070 first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
3071 poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold;
3072 gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3073, __FUNCTION__), 0 : 0))
3073 || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () >
0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || (
loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo
)->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th,
0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3073, __FUNCTION__), 0 : 0))
;
3074 /* Keep track of the known smallest versioning
3075 threshold. */
3076 if (ordered_p (lowest_th, th))
3077 lowest_th = ordered_min (lowest_th, th);
3078 }
3079 else
3080 {
3081 delete loop_vinfo;
3082 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3083 }
3084
3085 /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
3086 enabled, SIMDUID is not set, it is the innermost loop and we have
3087 either already found the loop's SIMDLEN or there was no SIMDLEN to
3088 begin with.
3089 TODO: Enable epilogue vectorization for loops with SIMDUID set. */
3090 vect_epilogues = (!simdlen
3091 && loop->inner == NULLnullptr
3092 && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask
3093 && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter
3094 && !loop->simduid
3095 /* For now only allow one epilogue loop, but allow
3096 pick_lowest_cost_p to replace it. */
3097 && (first_loop_vinfo->epilogue_vinfos.is_empty ()
3098 || pick_lowest_cost_p));
3099
3100 /* Commit to first_loop_vinfo if we have no reason to try
3101 alternatives. */
3102 if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
3103 break;
3104 }
3105 else
3106 {
3107 delete loop_vinfo;
3108 loop_vinfo = opt_loop_vec_info::success (NULLnullptr);
3109 if (fatal)
3110 {
3111 gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3111, __FUNCTION__), 0 : 0))
;
3112 break;
3113 }
3114 }
3115
3116 /* Handle the case that the original loop can use partial
3117 vectorization, but want to only adopt it for the epilogue.
3118 The retry should be in the same mode as original. */
3119 if (vect_epilogues
3120 && loop_vinfo
3121 && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p)
3122 {
3123 gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)((void)(!((loop_vinfo)->can_use_partial_vectors_p &&
!(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3124, __FUNCTION__), 0 : 0))
3124 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))((void)(!((loop_vinfo)->can_use_partial_vectors_p &&
!(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3124, __FUNCTION__), 0 : 0))
;
3125 if (dump_enabled_p ())
3126 dump_printf_loc (MSG_NOTE, vect_location,
3127 "***** Re-trying analysis with same vector mode"
3128 " %s for epilogue with partial vectors.\n",
3129 GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]);
3130 continue;
3131 }
3132
3133 if (mode_i < vector_modes.length ()
3134 && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL
|| ((enum mode_class) mode_class[autodetected_vector_mode]) ==
MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode
]) == MODE_VECTOR_UACCUM)
3135 && (related_vector_mode (vector_modes[mode_i],
3136 GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode)))
3137 == autodetected_vector_mode)
3138 && (related_vector_mode (autodetected_vector_mode,
3139 GET_MODE_INNER (vector_modes[mode_i])(mode_to_inner (vector_modes[mode_i])))
3140 == vector_modes[mode_i]))
3141 {
3142 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_NOTE, vect_location,
3144 "***** Skipping vector mode %s, which would"
3145 " repeat the analysis for %s\n",
3146 GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]],
3147 GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]);
3148 mode_i += 1;
3149 }
3150
3151 if (mode_i == vector_modes.length ()
3152 || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode))
3153 break;
3154
3155 /* Try the next biggest vector size. */
3156 next_vector_mode = vector_modes[mode_i++];
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_NOTE, vect_location,
3159 "***** Re-trying analysis with vector mode %s\n",
3160 GET_MODE_NAME (next_vector_mode)mode_name[next_vector_mode]);
3161 }
3162
3163 if (first_loop_vinfo)
3164 {
3165 loop->aux = (loop_vec_info) first_loop_vinfo;
3166 if (dump_enabled_p ())
3167 dump_printf_loc (MSG_NOTE, vect_location,
3168 "***** Choosing vector mode %s\n",
3169 GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]);
3170 LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th;
3171 return first_loop_vinfo;
3172 }
3173
3174 return opt_loop_vec_info::propagate_failure (res);
3175}
3176
3177/* Return true if there is an in-order reduction function for CODE, storing
3178 it in *REDUC_FN if so. */
3179
3180static bool
3181fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn)
3182{
3183 switch (code)
3184 {
3185 case PLUS_EXPR:
3186 *reduc_fn = IFN_FOLD_LEFT_PLUS;
3187 return true;
3188
3189 default:
3190 return false;
3191 }
3192}
3193
3194/* Function reduction_fn_for_scalar_code
3195
3196 Input:
3197 CODE - tree_code of a reduction operations.
3198
3199 Output:
3200 REDUC_FN - the corresponding internal function to be used to reduce the
3201 vector of partial results into a single scalar result, or IFN_LAST
3202 if the operation is a supported reduction operation, but does not have
3203 such an internal function.
3204
3205 Return FALSE if CODE currently cannot be vectorized as reduction. */
3206
3207static bool
3208reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
3209{
3210 switch (code)
3211 {
3212 case MAX_EXPR:
3213 *reduc_fn = IFN_REDUC_MAX;
3214 return true;
3215
3216 case MIN_EXPR:
3217 *reduc_fn = IFN_REDUC_MIN;
3218 return true;
3219
3220 case PLUS_EXPR:
3221 *reduc_fn = IFN_REDUC_PLUS;
3222 return true;
3223
3224 case BIT_AND_EXPR:
3225 *reduc_fn = IFN_REDUC_AND;
3226 return true;
3227
3228 case BIT_IOR_EXPR:
3229 *reduc_fn = IFN_REDUC_IOR;
3230 return true;
3231
3232 case BIT_XOR_EXPR:
3233 *reduc_fn = IFN_REDUC_XOR;
3234 return true;
3235
3236 case MULT_EXPR:
3237 case MINUS_EXPR:
3238 *reduc_fn = IFN_LAST;
3239 return true;
3240
3241 default:
3242 return false;
3243 }
3244}
3245
3246/* If there is a neutral value X such that SLP reduction NODE would not
3247 be affected by the introduction of additional X elements, return that X,
3248 otherwise return null. CODE is the code of the reduction and VECTOR_TYPE
3249 is the vector type that would hold element X. REDUC_CHAIN is true if
3250 the SLP statements perform a single reduction, false if each statement
3251 performs an independent reduction. */
3252
3253static tree
3254neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
3255 tree_code code, bool reduc_chain)
3256{
3257 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts;
3258 stmt_vec_info stmt_vinfo = stmts[0];
3259 tree scalar_type = TREE_TYPE (vector_type)((contains_struct_check ((vector_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3259, __FUNCTION__))->typed.type)
;
3260 class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
3261 gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3261, __FUNCTION__), 0 : 0))
;
3262
3263 switch (code)
3264 {
3265 case WIDEN_SUM_EXPR:
3266 case DOT_PROD_EXPR:
3267 case SAD_EXPR:
3268 case PLUS_EXPR:
3269 case MINUS_EXPR:
3270 case BIT_IOR_EXPR:
3271 case BIT_XOR_EXPR:
3272 return build_zero_cst (scalar_type);
3273
3274 case MULT_EXPR:
3275 return build_one_cst (scalar_type);
3276
3277 case BIT_AND_EXPR:
3278 return build_all_ones_cst (scalar_type);
3279
3280 case MAX_EXPR:
3281 case MIN_EXPR:
3282 /* For MIN/MAX the initial values are neutral. A reduction chain
3283 has only a single initial value, so that value is neutral for
3284 all statements. */
3285 if (reduc_chain)
3286 return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge
(loop))->dest_idx))
3287 loop_preheader_edge (loop))gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge
(loop))->dest_idx))
;
3288 return NULL_TREE(tree) nullptr;
3289
3290 default:
3291 return NULL_TREE(tree) nullptr;
3292 }
3293}
3294
3295/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
3296 STMT is printed with a message MSG. */
3297
3298static void
3299report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
3300{
3301 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
3302}
3303
3304/* Return true if we need an in-order reduction for operation CODE
3305 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
3306 overflow must wrap. */
3307
3308bool
3309needs_fold_left_reduction_p (tree type, tree_code code)
3310{
3311 /* CHECKME: check for !flag_finite_math_only too? */
3312 if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE))
3313 switch (code)
3314 {
3315 case MIN_EXPR:
3316 case MAX_EXPR:
3317 return false;
3318
3319 default:
3320 return !flag_associative_mathglobal_options.x_flag_associative_math;
3321 }
3322
3323 if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || (
(enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum
tree_code) (type)->base.code) == INTEGER_TYPE)
)
3324 {
3325 if (!operation_no_trapping_overflow (type, code))
3326 return true;
3327 return false;
3328 }
3329
3330 if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE &&
((tree_not_check4 ((type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3330, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE
), (ARRAY_TYPE)))->base.u.bits.saturating_flag))
)
3331 return true;
3332
3333 return false;
3334}
3335
3336/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
3337 has a handled computation expression. Store the main reduction
3338 operation in *CODE. */
3339
3340static bool
3341check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3342 tree loop_arg, enum tree_code *code,
3343 vec<std::pair<ssa_op_iter, use_operand_p> > &path)
3344{
3345 auto_bitmap visited;
3346 tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3347 ssa_op_iter curri;
3348 use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01);
3349 while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg)
3350 curr = op_iter_next_use (&curri);
3351 curri.i = curri.numops;
3352 do
3353 {
3354 path.safe_push (std::make_pair (curri, curr));
3355 tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr);
3356 if (use == lookfor)
3357 break;
3358 gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3358, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3359 if (gimple_nop_p (def)
3360 || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
3361 {
3362pop:
3363 do
3364 {
3365 std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
3366 curri = x.first;
3367 curr = x.second;
3368 do
3369 curr = op_iter_next_use (&curri);
3370 /* Skip already visited or non-SSA operands (from iterating
3371 over PHI args). */
3372 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3373 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3374 || ! bitmap_set_bit (visited,
3375 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3376, __FUNCTION__, (SSA_NAME)))->base.u.version
3376 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3376, __FUNCTION__, (SSA_NAME)))->base.u.version
)));
3377 }
3378 while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ());
3379 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3380 break;
3381 }
3382 else
3383 {
3384 if (gimple_code (def) == GIMPLE_PHI)
3385 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01);
3386 else
3387 curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01);
3388 while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr)
3389 && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME
3390 || ! bitmap_set_bit (visited,
3391 SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3392, __FUNCTION__, (SSA_NAME)))->base.u.version
3392 (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3392, __FUNCTION__, (SSA_NAME)))->base.u.version
)))
3393 curr = op_iter_next_use (&curri);
3394 if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr))
3395 goto pop;
3396 }
3397 }
3398 while (1);
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 {
3401 dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
3402 unsigned i;
3403 std::pair<ssa_op_iter, use_operand_p> *x;
3404 FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i))
3405 dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second));
3406 dump_printf (MSG_NOTE, "\n");
3407 }
3408
3409 /* Check whether the reduction path detected is valid. */
3410 bool fail = path.length () == 0;
3411 bool neg = false;
3412 int sign = -1;
3413 *code = ERROR_MARK;
3414 for (unsigned i = 1; i < path.length (); ++i)
3415 {
3416 gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3417 tree op = USE_FROM_PTR (path[i].second)get_use_from_ptr (path[i].second);
3418 if (! is_gimple_assign (use_stmt)
3419 /* The following make sure we can compute the operand index
3420 easily plus it mostly disallows chaining via COND_EXPR condition
3421 operands. */
3422 || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use
3423 && (gimple_num_ops (use_stmt) <= 2
3424 || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use)
3425 && (gimple_num_ops (use_stmt) <= 3
3426 || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use)))
3427 {
3428 fail = true;
3429 break;
3430 }
3431 /* Check there's only a single stmt the op is used on. For the
3432 not value-changing tail and the last stmt allow out-of-loop uses.
3433 ??? We could relax this and handle arbitrary live stmts by
3434 forcing a scalar epilogue for example. */
3435 imm_use_iterator imm_iter;
3436 gimple *op_use_stmt;
3437 unsigned cnt = 0;
3438 FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse
((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op)
)), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter));
(void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter)))
)
3439 if (!is_gimple_debug (op_use_stmt)
3440 && (*code != ERROR_MARK
3441 || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
3442 {
3443 /* We want to allow x + x but not x < 1 ? x : 2. */
3444 if (is_gimple_assign (op_use_stmt)
3445 && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR)
3446 {
3447 use_operand_p use_p;
3448 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p
(&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (&
(imm_iter))))
3449 cnt++;
3450 }
3451 else
3452 cnt++;
3453 }
3454 if (cnt != 1)
3455 {
3456 fail = true;
3457 break;
3458 }
3459 tree_code use_code = gimple_assign_rhs_code (use_stmt);
3460 if (use_code == MINUS_EXPR)
3461 {
3462 use_code = PLUS_EXPR;
3463 /* Track whether we negate the reduction value each iteration. */
3464 if (gimple_assign_rhs2 (use_stmt) == op)
3465 neg = ! neg;
3466 }
3467 if (CONVERT_EXPR_CODE_P (use_code)((use_code) == NOP_EXPR || (use_code) == CONVERT_EXPR)
3468 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt))((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3468, __FUNCTION__))->typed.type)
,
3469 TREE_TYPE (gimple_assign_rhs1 (use_stmt))((contains_struct_check ((gimple_assign_rhs1 (use_stmt)), (TS_TYPED
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3469, __FUNCTION__))->typed.type)
))
3470 ;
3471 else if (*code == ERROR_MARK)
3472 {
3473 *code = use_code;
3474 sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3474, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3474, __FUNCTION__))->base.u.bits.unsigned_flag))
;
3475 }
3476 else if (use_code != *code)
3477 {
3478 fail = true;
3479 break;
3480 }
3481 else if ((use_code == MIN_EXPR
3482 || use_code == MAX_EXPR)
3483 && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3483, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3483, __FUNCTION__))->base.u.bits.unsigned_flag))
)
3484 {
3485 fail = true;
3486 break;
3487 }
3488 }
3489 return ! fail && ! neg && *code != ERROR_MARK;
3490}
3491
3492bool
3493check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3494 tree loop_arg, enum tree_code code)
3495{
3496 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3497 enum tree_code code_;
3498 return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
3499 && code_ == code);
3500}
3501
3502
3503
3504/* Function vect_is_simple_reduction
3505
3506 (1) Detect a cross-iteration def-use cycle that represents a simple
3507 reduction computation. We look for the following pattern:
3508
3509 loop_header:
3510 a1 = phi < a0, a2 >
3511 a3 = ...
3512 a2 = operation (a3, a1)
3513
3514 or
3515
3516 a3 = ...
3517 loop_header:
3518 a1 = phi < a0, a2 >
3519 a2 = operation (a3, a1)
3520
3521 such that:
3522 1. operation is commutative and associative and it is safe to
3523 change the order of the computation
3524 2. no uses for a2 in the loop (a2 is used out of the loop)
3525 3. no uses of a1 in the loop besides the reduction operation
3526 4. no uses of a1 outside the loop.
3527
3528 Conditions 1,4 are tested here.
3529 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
3530
3531 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
3532 nested cycles.
3533
3534 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
3535 reductions:
3536
3537 a1 = phi < a0, a2 >
3538 inner loop (def of a3)
3539 a2 = phi < a3 >
3540
3541 (4) Detect condition expressions, ie:
3542 for (int i = 0; i < N; i++)
3543 if (a[i] < val)
3544 ret_val = a[i];
3545
3546*/
3547
3548static stmt_vec_info
3549vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
3550 bool *double_reduc, bool *reduc_chain_p)
3551{
3552 gphi *phi = as_a <gphi *> (phi_info->stmt);
3553 gimple *phi_use_stmt = NULLnullptr;
3554 imm_use_iterator imm_iter;
3555 use_operand_p use_p;
3556
3557 *double_reduc = false;
3558 *reduc_chain_p = false;
3559 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION;
3560
3561 tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi));
3562 /* ??? If there are no uses of the PHI result the inner loop reduction
3563 won't be detected as possibly double-reduction by vectorizable_reduction
3564 because that tries to walk the PHI arg from the preheader edge which
3565 can be constant. See PR60382. */
3566 if (has_zero_uses (phi_name))
3567 return NULLnullptr;
3568 class loop *loop = (gimple_bb (phi))->loop_father;
3569 unsigned nphi_def_loop_uses = 0;
3570 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3571 {
3572 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3573 if (is_gimple_debug (use_stmt))
3574 continue;
3575
3576 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3577 {
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3580 "intermediate value used outside loop.\n");
3581
3582 return NULLnullptr;
3583 }
3584
3585 nphi_def_loop_uses++;
3586 phi_use_stmt = use_stmt;
3587 }
3588
3589 tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx
))
;
3590 if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME)
3591 {
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "reduction: not ssa_name: %T\n", latch_def);
3595 return NULLnullptr;
3596 }
3597
3598 stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
3599 if (!def_stmt_info
3600 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
3601 return NULLnullptr;
3602
3603 bool nested_in_vect_loop
3604 = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop);
3605 unsigned nlatch_def_loop_uses = 0;
3606 auto_vec<gphi *, 3> lcphis;
3607 bool inner_loop_of_double_reduc = false;
3608 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def
)); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p
) = next_readonly_imm_use (&(imm_iter))))
3609 {
3610 gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt;
3611 if (is_gimple_debug (use_stmt))
3612 continue;
3613 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
3614 nlatch_def_loop_uses++;
3615 else
3616 {
3617 /* We can have more than one loop-closed PHI. */
3618 lcphis.safe_push (as_a <gphi *> (use_stmt));
3619 if (nested_in_vect_loop
3620 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type
3621 == vect_double_reduction_def))
3622 inner_loop_of_double_reduc = true;
3623 }
3624 }
3625
3626 /* If we are vectorizing an inner reduction we are executing that
3627 in the original order only in case we are not dealing with a
3628 double reduction. */
3629 if (nested_in_vect_loop && !inner_loop_of_double_reduc)
3630 {
3631 if (dump_enabled_p ())
3632 report_vect_op (MSG_NOTE, def_stmt_info->stmt,
3633 "detected nested cycle: ");
3634 return def_stmt_info;
3635 }
3636
3637 /* If this isn't a nested cycle or if the nested cycle reduction value
3638 is used ouside of the inner loop we cannot handle uses of the reduction
3639 value. */
3640 if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
3641 {
3642 if (dump_enabled_p ())
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3644 "reduction used in loop.\n");
3645 return NULLnullptr;
3646 }
3647
3648 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
3649 defined in the inner loop. */
3650 if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
3651 {
3652 tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0));
3653 if (gimple_phi_num_args (def_stmt) != 1
3654 || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME)
3655 {
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3658 "unsupported phi node definition.\n");
3659
3660 return NULLnullptr;
3661 }
3662
3663 gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3663, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
;
3664 if (gimple_bb (def1)
3665 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3666 && loop->inner
3667 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
3668 && is_gimple_assign (def1)
3669 && is_a <gphi *> (phi_use_stmt)
3670 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
3671 {
3672 if (dump_enabled_p ())
3673 report_vect_op (MSG_NOTE, def_stmt,
3674 "detected double reduction: ");
3675
3676 *double_reduc = true;
3677 return def_stmt_info;
3678 }
3679
3680 return NULLnullptr;
3681 }
3682
3683 /* Look for the expression computing latch_def from then loop PHI result. */
3684 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
3685 enum tree_code code;
3686 if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
3687 path))
3688 {
3689 STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code;
3690 if (code == COND_EXPR && !nested_in_vect_loop)
3691 STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION;
3692
3693 /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
3694 reduction chain for which the additional restriction is that
3695 all operations in the chain are the same. */
3696 auto_vec<stmt_vec_info, 8> reduc_chain;
3697 unsigned i;
3698 bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
3699 for (i = path.length () - 1; i >= 1; --i)
3700 {
3701 gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt;
3702 stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
3703 STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx
3704 = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
3705 enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
3706 bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)((stmt_code) == NOP_EXPR || (stmt_code) == CONVERT_EXPR)
3707 && (i == 1 || i == path.length () - 1));
3708 if ((stmt_code != code && !leading_conversion)
3709 /* We can only handle the final value in epilogue
3710 generation for reduction chains. */
3711 || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
3712 is_slp_reduc = false;
3713 /* For reduction chains we support a trailing/leading
3714 conversions. We do not store those in the actual chain. */
3715 if (leading_conversion)
3716 continue;
3717 reduc_chain.safe_push (stmt_info);
3718 }
3719 if (is_slp_reduc && reduc_chain.length () > 1)
3720 {
3721 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
3722 {
3723 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3723, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element
)
= reduc_chain[0];
3724 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3724, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element
)
= reduc_chain[i+1];
3725 }
3726 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3726, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element
)
= reduc_chain[0];
3727 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3727, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element
)
= NULLnullptr;
3728
3729 /* Save the chain for further analysis in SLP detection. */
3730 LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]);
3731 REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3731, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size)
= reduc_chain.length ();
3732
3733 *reduc_chain_p = true;
3734 if (dump_enabled_p ())
3735 dump_printf_loc (MSG_NOTE, vect_location,
3736 "reduction: detected reduction chain\n");
3737 }
3738 else if (dump_enabled_p ())
3739 dump_printf_loc (MSG_NOTE, vect_location,
3740 "reduction: detected reduction\n");
3741
3742 return def_stmt_info;
3743 }
3744
3745 if (dump_enabled_p ())
3746 dump_printf_loc (MSG_NOTE, vect_location,
3747 "reduction: unknown pattern\n");
3748
3749 return NULLnullptr;
3750}
3751
3752/* Estimate the number of peeled epilogue iterations for LOOP_VINFO.
3753 PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
3754 or -1 if not known. */
3755
3756static int
3757vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
3758{
3759 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3760 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
|| peel_iters_prologue == -1)
3761 {
3762 if (dump_enabled_p ())
3763 dump_printf_loc (MSG_NOTE, vect_location,
3764 "cost model: epilogue peel iters set to vf/2 "
3765 "because loop iterations are unknown .\n");
3766 return assumed_vf / 2;
3767 }
3768 else
3769 {
3770 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)->
num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 3770, __FUNCTION__))))
;
3771 peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue
))
;
3772 int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
3773 /* If we need to peel for gaps, but no peeling is required, we have to
3774 peel VF iterations. */
3775 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue)
3776 peel_iters_epilogue = assumed_vf;
3777 return peel_iters_epilogue;
3778 }
3779}
3780
3781/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
3782int
3783vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3784 int *peel_iters_epilogue,
3785 stmt_vector_for_cost *scalar_cost_vec,
3786 stmt_vector_for_cost *prologue_cost_vec,
3787 stmt_vector_for_cost *epilogue_cost_vec)
3788{
3789 int retval = 0;
3790
3791 *peel_iters_epilogue
3792 = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
3793
3794 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
)
3795 {
3796 /* If peeled iterations are known but number of scalar loop
3797 iterations are unknown, count a taken branch per peeled loop. */
3798 if (peel_iters_prologue > 0)
3799 retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3800 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3801 if (*peel_iters_epilogue > 0)
3802 retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
3803 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue);
3804 }
3805
3806 stmt_info_for_cost *si;
3807 int j;
3808 if (peel_iters_prologue)
3809 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
3810 retval += record_stmt_cost (prologue_cost_vec,
3811 si->count * peel_iters_prologue,
3812 si->kind, si->stmt_info, si->misalign,
3813 vect_prologue);
3814 if (*peel_iters_epilogue)
3815 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j
))
3816 retval += record_stmt_cost (epilogue_cost_vec,
3817 si->count * *peel_iters_epilogue,
3818 si->kind, si->stmt_info, si->misalign,
3819 vect_epilogue);
3820
3821 return retval;
3822}
3823
3824/* Function vect_estimate_min_profitable_iters
3825
3826 Return the number of iterations required for the vector version of the
3827 loop to be profitable relative to the cost of the scalar version of the
3828 loop.
3829
3830 *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3831 of iterations for vectorization. -1 value means loop vectorization
3832 is not profitable. This returned value may be used for dynamic
3833 profitability check.
3834
3835 *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3836 for static check against estimated number of iterations. */
3837
3838static void
3839vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3840 int *ret_min_profitable_niters,
3841 int *ret_min_profitable_estimate)
3842{
3843 int min_profitable_iters;
3844 int min_profitable_estimate;
3845 int peel_iters_prologue;
3846 int peel_iters_epilogue;
3847 unsigned vec_inside_cost = 0;
3848 int vec_outside_cost = 0;
3849 unsigned vec_prologue_cost = 0;
3850 unsigned vec_epilogue_cost = 0;
3851 int scalar_single_iter_cost = 0;
3852 int scalar_outside_cost = 0;
3853 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3854 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment;
3855 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data;
3856
3857 /* Cost model disabled. */
3858 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop))
3859 {
3860 if (dump_enabled_p ())
3861 dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3862 *ret_min_profitable_niters = 0;
3863 *ret_min_profitable_estimate = 0;
3864 return;
3865 }
3866
3867 /* Requires loop versioning tests to handle misalignment. */
3868 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0))
3869 {
3870 /* FIXME: Make cost depend on complexity of individual check. */
3871 unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length ();
3872 (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
3873 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3874 if (dump_enabled_p ())
3875 dump_printf (MSG_NOTE,
3876 "cost model: Adding cost of checks for loop "
3877 "versioning to treat misalignment.\n");
3878 }
3879
3880 /* Requires loop versioning with alias checks. */
3881 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0)
)
3882 {
3883 /* FIXME: Make cost depend on complexity of individual check. */
3884 unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length ();
3885 (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
3886 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3887 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length ();
3888 if (len)
3889 /* Count LEN - 1 ANDs and LEN comparisons. */
3890 (void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
3891 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3892 len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length ();
3893 if (len)
3894 {
3895 /* Count LEN - 1 ANDs and LEN comparisons. */
3896 unsigned int nstmts = len * 2 - 1;
3897 /* +1 for each bias that needs adding. */
3898 for (unsigned int i = 0; i < len; ++i)
3899 if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p)
3900 nstmts += 1;
3901 (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
3902 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3903 }
3904 if (dump_enabled_p ())
3905 dump_printf (MSG_NOTE,
3906 "cost model: Adding cost of checks for loop "
3907 "versioning aliasing.\n");
3908 }
3909
3910 /* Requires loop versioning with niter checks. */
3911 if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions))
3912 {
3913 /* FIXME: Make cost depend on complexity of individual check. */
3914 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
3915 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3916 if (dump_enabled_p ())
3917 dump_printf (MSG_NOTE,
3918 "cost model: Adding cost of checks for loop "
3919 "versioning niters.\n");
3920 }
3921
3922 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
3923 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
3924 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
3925
3926 /* Count statements in scalar loop. Using this as scalar cost for a single
3927 iteration for now.
3928
3929 TODO: Add outer loop support.
3930
3931 TODO: Consider assigning different costs to different scalar
3932 statements. */
3933
3934 scalar_single_iter_cost
3935 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost;
3936
3937 /* Add additional cost for the peeled instructions in prologue and epilogue
3938 loop. (For fully-masked loops there will be no peeling.)
3939
3940 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3941 at compile-time - we assume it's vf/2 (the worst would be vf-1).
3942
3943 TODO: Build an expression that represents peel_iters for prologue and
3944 epilogue to be used in a run-time test. */
3945
3946 bool prologue_need_br_taken_cost = false;
3947 bool prologue_need_br_not_taken_cost = false;
3948
3949 /* Calculate peel_iters_prologue. */
3950 if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
3951 peel_iters_prologue = 0;
3952 else if (npeel < 0)
3953 {
3954 peel_iters_prologue = assumed_vf / 2;
3955 if (dump_enabled_p ())
3956 dump_printf (MSG_NOTE, "cost model: "
3957 "prologue peel iters set to vf/2.\n");
3958
3959 /* If peeled iterations are unknown, count a taken branch and a not taken
3960 branch per peeled loop. Even if scalar loop iterations are known,
3961 vector iterations are not known since peeled prologue iterations are
3962 not known. Hence guards remain the same. */
3963 prologue_need_br_taken_cost = true;
3964 prologue_need_br_not_taken_cost = true;
3965 }
3966 else
3967 {
3968 peel_iters_prologue = npeel;
3969 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_prologue > 0)
3970 /* If peeled iterations are known but number of scalar loop
3971 iterations are unknown, count a taken branch per peeled loop. */
3972 prologue_need_br_taken_cost = true;
3973 }
3974
3975 bool epilogue_need_br_taken_cost = false;
3976 bool epilogue_need_br_not_taken_cost = false;
3977
3978 /* Calculate peel_iters_epilogue. */
3979 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
3980 /* We need to peel exactly one iteration for gaps. */
3981 peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0;
3982 else if (npeel < 0)
3983 {
3984 /* If peeling for alignment is unknown, loop bound of main loop
3985 becomes unknown. */
3986 peel_iters_epilogue = assumed_vf / 2;
3987 if (dump_enabled_p ())
3988 dump_printf (MSG_NOTE, "cost model: "
3989 "epilogue peel iters set to vf/2 because "
3990 "peeling for alignment is unknown.\n");
3991
3992 /* See the same reason above in peel_iters_prologue calculation. */
3993 epilogue_need_br_taken_cost = true;
3994 epilogue_need_br_not_taken_cost = true;
3995 }
3996 else
3997 {
3998 peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel);
3999 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
&& peel_iters_epilogue > 0)
4000 /* If peeled iterations are known but number of scalar loop
4001 iterations are unknown, count a taken branch per peeled loop. */
4002 epilogue_need_br_taken_cost = true;
4003 }
4004
4005 stmt_info_for_cost *si;
4006 int j;
4007 /* Add costs associated with peel_iters_prologue. */
4008 if (peel_iters_prologue)
4009 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
4010 {
4011 (void) add_stmt_cost (loop_vinfo, target_cost_data,
4012 si->count * peel_iters_prologue, si->kind,
4013 si->stmt_info, si->vectype, si->misalign,
4014 vect_prologue);
4015 }
4016
4017 /* Add costs associated with peel_iters_epilogue. */
4018 if (peel_iters_epilogue)
4019 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), &
(si)); ++(j))
4020 {
4021 (void) add_stmt_cost (loop_vinfo, target_cost_data,
4022 si->count * peel_iters_epilogue, si->kind,
4023 si->stmt_info, si->vectype, si->misalign,
4024 vect_epilogue);
4025 }
4026
4027 /* Add possible cond_branch_taken/cond_branch_not_taken cost. */
4028
4029 if (prologue_need_br_taken_cost)
4030 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
4031 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4032
4033 if (prologue_need_br_not_taken_cost)
4034 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
4035 cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0,
4036 vect_prologue);
4037
4038 if (epilogue_need_br_taken_cost)
4039 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
4040 NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue);
4041
4042 if (epilogue_need_br_not_taken_cost)
4043 (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
4044 cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0,
4045 vect_epilogue);
4046
4047 /* Take care of special costs for rgroup controls of partial vectors. */
4048 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->masks.is_empty ())
)
4049 {
4050 /* Calculate how many masks we need to generate. */
4051 unsigned int num_masks = 0;
4052 rgroup_controls *rgm;
4053 unsigned int num_vectors_m1;
4054 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1
), &(rgm)); ++(num_vectors_m1))
4055 if (rgm->type)
4056 num_masks += num_vectors_m1 + 1;
4057 gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4057, __FUNCTION__), 0 : 0))
;
4058
4059 /* In the worst case, we need to generate each mask in the prologue
4060 and in the loop body. One of the loop body mask instructions
4061 replaces the comparison in the scalar loop, and since we don't
4062 count the scalar comparison against the scalar body, we shouldn't
4063 count that vector instruction against the vector body either.
4064
4065 Sometimes we can use unpacks instead of generating prologue
4066 masks and sometimes the prologue mask will fold to a constant,
4067 so the actual prologue cost might be smaller. However, it's
4068 simpler and safer to use the worst-case cost; if this ends up
4069 being the tie-breaker between vectorizing or not, then it's
4070 probably better not to vectorize. */
4071 (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks,
4072 vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4073 (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1,
4074 vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body);
4075 }
4076 else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo
)->lens.is_empty ())
)
4077 {
4078 /* Referring to the functions vect_set_loop_condition_partial_vectors
4079 and vect_set_loop_controls_directly, we need to generate each
4080 length in the prologue and in the loop body if required. Although
4081 there are some possible optimizations, we consider the worst case
4082 here. */
4083
4084 bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
;
4085 bool need_iterate_p
4086 = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)
4087 && !vect_known_niters_smaller_than_vf (loop_vinfo));
4088
4089 /* Calculate how many statements to be added. */
4090 unsigned int prologue_stmts = 0;
4091 unsigned int body_stmts = 0;
4092
4093 rgroup_controls *rgc;
4094 unsigned int num_vectors_m1;
4095 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1
), &(rgc)); ++(num_vectors_m1))
4096 if (rgc->type)
4097 {
4098 /* May need one SHIFT for nitems_total computation. */
4099 unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
4100 if (nitems != 1 && !niters_known_p)
4101 prologue_stmts += 1;
4102
4103 /* May need one MAX and one MINUS for wrap around. */
4104 if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
4105 prologue_stmts += 2;
4106
4107 /* Need one MAX and one MINUS for each batch limit excepting for
4108 the 1st one. */
4109 prologue_stmts += num_vectors_m1 * 2;
4110
4111 unsigned int num_vectors = num_vectors_m1 + 1;
4112
4113 /* Need to set up lengths in prologue, only one MIN required
4114 for each since start index is zero. */
4115 prologue_stmts += num_vectors;
4116
4117 /* Each may need two MINs and one MINUS to update lengths in body
4118 for next iteration. */
4119 if (need_iterate_p)
4120 body_stmts += 3 * num_vectors;
4121 }
4122
4123 (void) add_stmt_cost (loop_vinfo, target_cost_data, prologue_stmts,
4124 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue);
4125 (void) add_stmt_cost (loop_vinfo, target_cost_data, body_stmts,
4126 scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body);
4127 }
4128
4129 /* FORNOW: The scalar outside cost is incremented in one of the
4130 following ways:
4131
4132 1. The vectorizer checks for alignment and aliasing and generates
4133 a condition that allows dynamic vectorization. A cost model
4134 check is ANDED with the versioning condition. Hence scalar code
4135 path now has the added cost of the versioning check.
4136
4137 if (cost > th & versioning_check)
4138 jmp to vector code
4139
4140 Hence run-time scalar is incremented by not-taken branch cost.
4141
4142 2. The vectorizer then checks if a prologue is required. If the
4143 cost model check was not done before during versioning, it has to
4144 be done before the prologue check.
4145
4146 if (cost <= th)
4147 prologue = scalar_iters
4148 if (prologue == 0)
4149 jmp to vector code
4150 else
4151 execute prologue
4152 if (prologue == num_iters)
4153 go to exit
4154
4155 Hence the run-time scalar cost is incremented by a taken branch,
4156 plus a not-taken branch, plus a taken branch cost.
4157
4158 3. The vectorizer then checks if an epilogue is required. If the
4159 cost model check was not done before during prologue check, it
4160 has to be done with the epilogue check.
4161
4162 if (prologue == 0)
4163 jmp to vector code
4164 else
4165 execute prologue
4166 if (prologue == num_iters)
4167 go to exit
4168 vector code:
4169 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
4170 jmp to epilogue
4171
4172 Hence the run-time scalar cost should be incremented by 2 taken
4173 branches.
4174
4175 TODO: The back end may reorder the BBS's differently and reverse
4176 conditions/branch directions. Change the estimates below to
4177 something more reasonable. */
4178
4179 /* If the number of iterations is known and we do not do versioning, we can
4180 decide whether to vectorize at compile time. Hence the scalar version
4181 do not carry cost model guard costs. */
4182 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi
((loop_vinfo)->num_iters) > 0)
4183 || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4184 {
4185 /* Cost model check occurs at versioning. */
4186 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((
loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo
)->check_unequal_addrs.length () > 0 || (loop_vinfo)->
lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions
) || ((loop_vinfo)->simd_if_cond))
)
4187 scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
4188 else
4189 {
4190 /* Cost model check occurs at prologue generation. */
4191 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0)
4192 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
4193 + vect_get_stmt_cost (cond_branch_not_taken);
4194 /* Cost model check occurs at epilogue generation. */
4195 else
4196 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
4197 }
4198 }
4199
4200 /* Complete the target-specific cost calculations. */
4201 finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data, &vec_prologue_cost,
4202 &vec_inside_cost, &vec_epilogue_cost);
4203
4204 vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
4205
4206 /* Stash the costs so that we can compare two loop_vec_infos. */
4207 loop_vinfo->vec_inside_cost = vec_inside_cost;
4208 loop_vinfo->vec_outside_cost = vec_outside_cost;
4209
4210 if (dump_enabled_p ())
4211 {
4212 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
4213 dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
4214 vec_inside_cost);
4215 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
4216 vec_prologue_cost);
4217 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
4218 vec_epilogue_cost);
4219 dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
4220 scalar_single_iter_cost);
4221 dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
4222 scalar_outside_cost);
4223 dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
4224 vec_outside_cost);
4225 dump_printf (MSG_NOTE, " prologue iterations: %d\n",
4226 peel_iters_prologue);
4227 dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
4228 peel_iters_epilogue);
4229 }
4230
4231 /* Calculate number of iterations required to make the vector version
4232 profitable, relative to the loop bodies only. The following condition
4233 must hold true:
4234 SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC
4235 where
4236 SIC = scalar iteration cost, VIC = vector iteration cost,
4237 VOC = vector outside cost, VF = vectorization factor,
4238 NPEEL = prologue iterations + epilogue iterations,
4239 SOC = scalar outside cost for run time cost model check. */
4240
4241 int saving_per_viter = (scalar_single_iter_cost * assumed_vf
4242 - vec_inside_cost);
4243 if (saving_per_viter <= 0)
4244 {
4245 if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize)
4246 warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
4247 "vectorization did not happen for a simd loop");
4248
4249 if (dump_enabled_p ())
4250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4251 "cost model: the vector iteration cost = %d "
4252 "divided by the scalar iteration cost = %d "
4253 "is greater or equal to the vectorization factor = %d"
4254 ".\n",
4255 vec_inside_cost, scalar_single_iter_cost, assumed_vf);
4256 *ret_min_profitable_niters = -1;
4257 *ret_min_profitable_estimate = -1;
4258 return;
4259 }
4260
4261 /* ??? The "if" arm is written to handle all cases; see below for what
4262 we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4263 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4264 {
4265 /* Rewriting the condition above in terms of the number of
4266 vector iterations (vniters) rather than the number of
4267 scalar iterations (niters) gives:
4268
4269 SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC
4270
4271 <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC
4272
4273 For integer N, X and Y when X > 0:
4274
4275 N * X > Y <==> N >= (Y /[floor] X) + 1. */
4276 int outside_overhead = (vec_outside_cost
4277 - scalar_single_iter_cost * peel_iters_prologue
4278 - scalar_single_iter_cost * peel_iters_epilogue
4279 - scalar_outside_cost);
4280 /* We're only interested in cases that require at least one
4281 vector iteration. */
4282 int min_vec_niters = 1;
4283 if (outside_overhead > 0)
4284 min_vec_niters = outside_overhead / saving_per_viter + 1;
4285
4286 if (dump_enabled_p ())
4287 dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
4288 min_vec_niters);
4289
4290 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4291 {
4292 /* Now that we know the minimum number of vector iterations,
4293 find the minimum niters for which the scalar cost is larger:
4294
4295 SIC * niters > VIC * vniters + VOC - SOC
4296
4297 We know that the minimum niters is no more than
4298 vniters * VF + NPEEL, but it might be (and often is) less
4299 than that if a partial vector iteration is cheaper than the
4300 equivalent scalar code. */
4301 int threshold = (vec_inside_cost * min_vec_niters
4302 + vec_outside_cost
4303 - scalar_outside_cost);
4304 if (threshold <= 0)
4305 min_profitable_iters = 1;
4306 else
4307 min_profitable_iters = threshold / scalar_single_iter_cost + 1;
4308 }
4309 else
4310 /* Convert the number of vector iterations into a number of
4311 scalar iterations. */
4312 min_profitable_iters = (min_vec_niters * assumed_vf
4313 + peel_iters_prologue
4314 + peel_iters_epilogue);
4315 }
4316 else
4317 {
4318 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
4319 * assumed_vf
4320 - vec_inside_cost * peel_iters_prologue
4321 - vec_inside_cost * peel_iters_epilogue);
4322 if (min_profitable_iters <= 0)
4323 min_profitable_iters = 0;
4324 else
4325 {
4326 min_profitable_iters /= saving_per_viter;
4327
4328 if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
4329 <= (((int) vec_inside_cost * min_profitable_iters)
4330 + (((int) vec_outside_cost - scalar_outside_cost)
4331 * assumed_vf)))
4332 min_profitable_iters++;
4333 }
4334 }
4335
4336 if (dump_enabled_p ())
4337 dump_printf (MSG_NOTE,
4338 " Calculated minimum iters for profitability: %d\n",
4339 min_profitable_iters);
4340
4341 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p
4342 && min_profitable_iters < (assumed_vf + peel_iters_prologue))
4343 /* We want the vectorized loop to execute at least once. */
4344 min_profitable_iters = assumed_vf + peel_iters_prologue;
4345 else if (min_profitable_iters < peel_iters_prologue)
4346 /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the
4347 vectorized loop executes at least once. */
4348 min_profitable_iters = peel_iters_prologue;
4349
4350 if (dump_enabled_p ())
4351 dump_printf_loc (MSG_NOTE, vect_location,
4352 " Runtime profitability threshold = %d\n",
4353 min_profitable_iters);
4354
4355 *ret_min_profitable_niters = min_profitable_iters;
4356
4357 /* Calculate number of iterations required to make the vector version
4358 profitable, relative to the loop bodies only.
4359
4360 Non-vectorized variant is SIC * niters and it must win over vector
4361 variant on the expected loop trip count. The following condition must hold true:
4362 SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */
4363
4364 if (vec_outside_cost <= 0)
4365 min_profitable_estimate = 0;
4366 /* ??? This "else if" arm is written to handle all cases; see below for
4367 what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
4368 else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4369 {
4370 /* This is a repeat of the code above, but with + SOC rather
4371 than - SOC. */
4372 int outside_overhead = (vec_outside_cost
4373 - scalar_single_iter_cost * peel_iters_prologue
4374 - scalar_single_iter_cost * peel_iters_epilogue
4375 + scalar_outside_cost);
4376 int min_vec_niters = 1;
4377 if (outside_overhead > 0)
4378 min_vec_niters = outside_overhead / saving_per_viter + 1;
4379
4380 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p)
4381 {
4382 int threshold = (vec_inside_cost * min_vec_niters
4383 + vec_outside_cost
4384 + scalar_outside_cost);
4385 min_profitable_estimate = threshold / scalar_single_iter_cost + 1;
4386 }
4387 else
4388 min_profitable_estimate = (min_vec_niters * assumed_vf
4389 + peel_iters_prologue
4390 + peel_iters_epilogue);
4391 }
4392 else
4393 {
4394 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
4395 * assumed_vf
4396 - vec_inside_cost * peel_iters_prologue
4397 - vec_inside_cost * peel_iters_epilogue)
4398 / ((scalar_single_iter_cost * assumed_vf)
4399 - vec_inside_cost);
4400 }
4401 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate
) : (min_profitable_iters))
;
4402 if (dump_enabled_p ())
4403 dump_printf_loc (MSG_NOTE, vect_location,
4404 " Static estimate profitability threshold = %d\n",
4405 min_profitable_estimate);
4406
4407 *ret_min_profitable_estimate = min_profitable_estimate;
4408}
4409
4410/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
4411 vector elements (not bits) for a vector with NELT elements. */
4412static void
4413calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
4414 vec_perm_builder *sel)
4415{
4416 /* The encoding is a single stepped pattern. Any wrap-around is handled
4417 by vec_perm_indices. */
4418 sel->new_vector (nelt, 1, 3);
4419 for (unsigned int i = 0; i < 3; i++)
4420 sel->quick_push (i + offset);
4421}
4422
4423/* Checks whether the target supports whole-vector shifts for vectors of mode
4424 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
4425 it supports vec_perm_const with masks for all necessary shift amounts. */
4426static bool
4427have_whole_vector_shift (machine_mode mode)
4428{
4429 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4430 return true;
4431
4432 /* Variable-length vectors should be handled via the optab. */
4433 unsigned int nelt;
4434 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
4435 return false;
4436
4437 vec_perm_builder sel;
4438 vec_perm_indices indices;
4439 for (unsigned int i = nelt / 2; i >= 1; i /= 2)
4440 {
4441 calc_vec_perm_mask_for_shift (i, nelt, &sel);
4442 indices.new_vector (sel, 2, nelt);
4443 if (!can_vec_perm_const_p (mode, indices, false))
4444 return false;
4445 }
4446 return true;
4447}
4448
4449/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
4450 functions. Design better to avoid maintenance issues. */
4451
4452/* Function vect_model_reduction_cost.
4453
4454 Models cost for a reduction operation, including the vector ops
4455 generated within the strip-mine loop in some cases, the initial
4456 definition before the loop, and the epilogue code that must be generated. */
4457
4458static void
4459vect_model_reduction_cost (loop_vec_info loop_vinfo,
4460 stmt_vec_info stmt_info, internal_fn reduc_fn,
4461 vect_reduction_type reduction_type,
4462 int ncopies, stmt_vector_for_cost *cost_vec)
4463{
4464 int prologue_cost = 0, epilogue_cost = 0, inside_cost;
4465 enum tree_code code;
4466 optab optab;
4467 tree vectype;
4468 machine_mode mode;
4469 class loop *loop = NULLnullptr;
4470
4471 if (loop_vinfo)
4472 loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4473
4474 /* Condition reductions generate two reductions in the loop. */
4475 if (reduction_type == COND_REDUCTION)
4476 ncopies *= 2;
4477
4478 vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype;
4479 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4479, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
4480 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
4481
4482 code = gimple_assign_rhs_code (orig_stmt_info->stmt);
4483
4484 if (reduction_type == EXTRACT_LAST_REDUCTION)
4485 /* No extra instructions are needed in the prologue. The loop body
4486 operations are costed in vectorizable_condition. */
4487 inside_cost = 0;
4488 else if (reduction_type == FOLD_LEFT_REDUCTION)
4489 {
4490 /* No extra instructions needed in the prologue. */
4491 prologue_cost = 0;
4492
4493 if (reduc_fn != IFN_LAST)
4494 /* Count one reduction-like operation per vector. */
4495 inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
4496 stmt_info, 0, vect_body);
4497 else
4498 {
4499 /* Use NELEMENTS extracts and NELEMENTS scalar ops. */
4500 unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
4501 inside_cost = record_stmt_cost (cost_vec, nelements,
4502 vec_to_scalar, stmt_info, 0,
4503 vect_body);
4504 inside_cost += record_stmt_cost (cost_vec, nelements,
4505 scalar_stmt, stmt_info, 0,
4506 vect_body);
4507 }
4508 }
4509 else
4510 {
4511 /* Add in cost for initial definition.
4512 For cond reduction we have four vectors: initial index, step,
4513 initial result of the data reduction, initial value of the index
4514 reduction. */
4515 int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
4516 prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
4517 scalar_to_vec, stmt_info, 0,
4518 vect_prologue);
4519 }
4520
4521 /* Determine cost of epilogue code.
4522
4523 We have a reduction operator that will reduce the vector in one statement.
4524 Also requires scalar extract. */
4525
4526 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info))
4527 {
4528 if (reduc_fn != IFN_LAST)
4529 {
4530 if (reduction_type == COND_REDUCTION)
4531 {
4532 /* An EQ stmt and an COND_EXPR stmt. */
4533 epilogue_cost += record_stmt_cost (cost_vec, 2,
4534 vector_stmt, stmt_info, 0,
4535 vect_epilogue);
4536 /* Reduction of the max index and a reduction of the found
4537 values. */
4538 epilogue_cost += record_stmt_cost (cost_vec, 2,
4539 vec_to_scalar, stmt_info, 0,
4540 vect_epilogue);
4541 /* A broadcast of the max value. */
4542 epilogue_cost += record_stmt_cost (cost_vec, 1,
4543 scalar_to_vec, stmt_info, 0,
4544 vect_epilogue);
4545 }
4546 else
4547 {
4548 epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
4549 stmt_info, 0, vect_epilogue);
4550 epilogue_cost += record_stmt_cost (cost_vec, 1,
4551 vec_to_scalar, stmt_info, 0,
4552 vect_epilogue);
4553 }
4554 }
4555 else if (reduction_type == COND_REDUCTION)
4556 {
4557 unsigned estimated_nunits = vect_nunits_for_cost (vectype);
4558 /* Extraction of scalar elements. */
4559 epilogue_cost += record_stmt_cost (cost_vec,
4560 2 * estimated_nunits,
4561 vec_to_scalar, stmt_info, 0,
4562 vect_epilogue);
4563 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */
4564 epilogue_cost += record_stmt_cost (cost_vec,
4565 2 * estimated_nunits - 3,
4566 scalar_stmt, stmt_info, 0,
4567 vect_epilogue);
4568 }
4569 else if (reduction_type == EXTRACT_LAST_REDUCTION
4570 || reduction_type == FOLD_LEFT_REDUCTION)
4571 /* No extra instructions need in the epilogue. */
4572 ;
4573 else
4574 {
4575 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4575, __FUNCTION__))->type_common.size)
);
4576 tree bitsize =
4577 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt)))((tree_class_check ((((contains_struct_check ((gimple_assign_lhs
(orig_stmt_info->stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4577, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4577, __FUNCTION__))->type_common.size)
;
4578 int element_bitsize = tree_to_uhwi (bitsize);
4579 int nelements = vec_size_in_bits / element_bitsize;
4580
4581 if (code == COND_EXPR)
4582 code = MAX_EXPR;
4583
4584 optab = optab_for_tree_code (code, vectype, optab_default);
4585
4586 /* We have a whole vector shift available. */
4587 if (optab != unknown_optab
4588 && VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || (
(enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum
mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM)
4589 && optab_handler (optab, mode) != CODE_FOR_nothing
4590 && have_whole_vector_shift (mode))
4591 {
4592 /* Final reduction via vector shifts and the reduction operator.
4593 Also requires scalar extract. */
4594 epilogue_cost += record_stmt_cost (cost_vec,
4595 exact_log2 (nelements) * 2,
4596 vector_stmt, stmt_info, 0,
4597 vect_epilogue);
4598 epilogue_cost += record_stmt_cost (cost_vec, 1,
4599 vec_to_scalar, stmt_info, 0,
4600 vect_epilogue);
4601 }
4602 else
4603 /* Use extracts and reduction op for final reduction. For N
4604 elements, we have N extracts and N-1 reduction ops. */
4605 epilogue_cost += record_stmt_cost (cost_vec,
4606 nelements + nelements - 1,
4607 vector_stmt, stmt_info, 0,
4608 vect_epilogue);
4609 }
4610 }
4611
4612 if (dump_enabled_p ())
4613 dump_printf (MSG_NOTE,
4614 "vect_model_reduction_cost: inside_cost = %d, "
4615 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
4616 prologue_cost, epilogue_cost);
4617}
4618
4619
4620
4621/* Function get_initial_def_for_reduction
4622
4623 Input:
4624 STMT_VINFO - a stmt that performs a reduction operation in the loop.
4625 INIT_VAL - the initial value of the reduction variable
4626
4627 Output:
4628 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
4629 of the reduction (used for adjusting the epilog - see below).
4630 Return a vector variable, initialized according to the operation that
4631 STMT_VINFO performs. This vector will be used as the initial value
4632 of the vector of partial results.
4633
4634 Option1 (adjust in epilog): Initialize the vector as follows:
4635 add/bit or/xor: [0,0,...,0,0]
4636 mult/bit and: [1,1,...,1,1]
4637 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
4638 and when necessary (e.g. add/mult case) let the caller know
4639 that it needs to adjust the result by init_val.
4640
4641 Option2: Initialize the vector as follows:
4642 add/bit or/xor: [init_val,0,0,...,0]
4643 mult/bit and: [init_val,1,1,...,1]
4644 min/max/cond_expr: [init_val,init_val,...,init_val]
4645 and no adjustments are needed.
4646
4647 For example, for the following code:
4648
4649 s = init_val;
4650 for (i=0;i<n;i++)
4651 s = s + a[i];
4652
4653 STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'.
4654 For a vector of 4 units, we want to return either [0,0,0,init_val],
4655 or [0,0,0,0] and let the caller know that it needs to adjust
4656 the result at the end by 'init_val'.
4657
4658 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
4659 initialization vector is simpler (same element in all entries), if
4660 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
4661
4662 A cost model should help decide between these two schemes. */
4663
4664static tree
4665get_initial_def_for_reduction (loop_vec_info loop_vinfo,
4666 stmt_vec_info stmt_vinfo,
4667 enum tree_code code, tree init_val,
4668 tree *adjustment_def)
4669{
4670 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop;
4671 tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4671, __FUNCTION__))->typed.type)
;
4672 tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
4673 tree def_for_init;
4674 tree init_def;
4675 REAL_VALUE_TYPEstruct real_value real_init_val = dconst0;
4676 int int_init_val = 0;
4677 gimple_seq stmts = NULLnullptr;
4678
4679 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4679, __FUNCTION__), 0 : 0))
;
4680
4681 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4682, __FUNCTION__), 0 : 0))
4682 || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE
|| ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE
) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE
)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4682, __FUNCTION__), 0 : 0))
;
4683
4684 gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop ==
(gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4685, __FUNCTION__), 0 : 0))
4685 || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop ==
(gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4685, __FUNCTION__), 0 : 0))
;
4686
4687 /* ADJUSTMENT_DEF is NULL when called from
4688 vect_create_epilog_for_reduction to vectorize double reduction. */
4689 if (adjustment_def)
4690 *adjustment_def = NULLnullptr;
4691
4692 switch (code)
4693 {
4694 case WIDEN_SUM_EXPR:
4695 case DOT_PROD_EXPR:
4696 case SAD_EXPR:
4697 case PLUS_EXPR:
4698 case MINUS_EXPR:
4699 case BIT_IOR_EXPR:
4700 case BIT_XOR_EXPR:
4701 case MULT_EXPR:
4702 case BIT_AND_EXPR:
4703 {
4704 if (code == MULT_EXPR)
4705 {
4706 real_init_val = dconst1;
4707 int_init_val = 1;
4708 }
4709
4710 if (code == BIT_AND_EXPR)
4711 int_init_val = -1;
4712
4713 if (SCALAR_FLOAT_TYPE_P (scalar_type)(((enum tree_code) (scalar_type)->base.code) == REAL_TYPE))
4714 def_for_init = build_real (scalar_type, real_init_val);
4715 else
4716 def_for_init = build_int_cst (scalar_type, int_init_val);
4717
4718 if (adjustment_def || operand_equal_p (def_for_init, init_val, 0))
4719 {
4720 /* Option1: the first element is '0' or '1' as well. */
4721 if (!operand_equal_p (def_for_init, init_val, 0))
4722 *adjustment_def = init_val;
4723 init_def = gimple_build_vector_from_val (&stmts, vectype,
4724 def_for_init);
4725 }
4726 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
4727 {
4728 /* Option2 (variable length): the first element is INIT_VAL. */
4729 init_def = gimple_build_vector_from_val (&stmts, vectype,
4730 def_for_init);
4731 init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT,
4732 vectype, init_def, init_val);
4733 }
4734 else
4735 {
4736 /* Option2: the first element is INIT_VAL. */
4737 tree_vector_builder elts (vectype, 1, 2);
4738 elts.quick_push (init_val);
4739 elts.quick_push (def_for_init);
4740 init_def = gimple_build_vector (&stmts, &elts);
4741 }
4742 }
4743 break;
4744
4745 case MIN_EXPR:
4746 case MAX_EXPR:
4747 case COND_EXPR:
4748 {
4749 init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4749, __FUNCTION__))->typed.type)
, init_val);
4750 init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
4751 }
4752 break;
4753
4754 default:
4755 gcc_unreachable ()(fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4755, __FUNCTION__))
;
4756 }
4757
4758 if (stmts)
4759 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4760 return init_def;
4761}
4762
4763/* Get at the initial defs for the reduction PHIs in SLP_NODE.
4764 NUMBER_OF_VECTORS is the number of vector defs to create.
4765 If NEUTRAL_OP is nonnull, introducing extra elements of that
4766 value will not change the result. */
4767
4768static void
4769get_initial_defs_for_reduction (vec_info *vinfo,
4770 slp_tree slp_node,
4771 vec<tree> *vec_oprnds,
4772 unsigned int number_of_vectors,
4773 bool reduc_chain, tree neutral_op)
4774{
4775 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts;
4776 stmt_vec_info stmt_vinfo = stmts[0];
4777 unsigned HOST_WIDE_INTlong nunits;
4778 unsigned j, number_of_places_left_in_vector;
4779 tree vector_type;
4780 unsigned int group_size = stmts.length ();
4781 unsigned int i;
4782 class loop *loop;
4783
4784 vector_type = STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype;
4785
4786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)((void)(!((stmt_vinfo)->def_type == vect_reduction_def) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4786, __FUNCTION__), 0 : 0))
;
4787
4788 loop = (gimple_bb (stmt_vinfo->stmt))->loop_father;
4789 gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4789, __FUNCTION__), 0 : 0))
;
4790 edge pe = loop_preheader_edge (loop);
4791
4792 gcc_assert (!reduc_chain || neutral_op)((void)(!(!reduc_chain || neutral_op) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4792, __FUNCTION__), 0 : 0))
;
4793
4794 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4795 created vectors. It is greater than 1 if unrolling is performed.
4796
4797 For example, we have two scalar operands, s1 and s2 (e.g., group of
4798 strided accesses of size two), while NUNITS is four (i.e., four scalars
4799 of this type can be packed in a vector). The output vector will contain
4800 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4801 will be 2).
4802
4803 If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
4804 vectors containing the operands.
4805
4806 For example, NUNITS is four as before, and the group size is 8
4807 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4808 {s5, s6, s7, s8}. */
4809
4810 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
4811 nunits = group_size;
4812
4813 number_of_places_left_in_vector = nunits;
4814 bool constant_p = true;
4815 tree_vector_builder elts (vector_type, nunits, 1);
4816 elts.quick_grow (nunits);
4817 gimple_seq ctor_seq = NULLnullptr;
4818 for (j = 0; j < nunits * number_of_vectors; ++j)
4819 {
4820 tree op;
4821 i = j % group_size;
4822 stmt_vinfo = stmts[i];
4823
4824 /* Get the def before the loop. In reduction chain we have only
4825 one initial value. Else we have as many as PHIs in the group. */
4826 if (reduc_chain)
4827 op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx
))
;
4828 else if (((vec_oprnds->length () + 1) * nunits
4829 - number_of_places_left_in_vector >= group_size)
4830 && neutral_op)
4831 op = neutral_op;
4832 else
4833 op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx
))
;
4834
4835 /* Create 'vect_ = {op0,op1,...,opn}'. */
4836 number_of_places_left_in_vector--;
4837 elts[nunits - number_of_places_left_in_vector - 1] = op;
4838 if (!CONSTANT_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code))
] == tcc_constant)
)
4839 constant_p = false;
4840
4841 if (number_of_places_left_in_vector == 0)
4842 {
4843 tree init;
4844 if (constant_p && !neutral_op
4845 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
4846 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits)))
4847 /* Build the vector directly from ELTS. */
4848 init = gimple_build_vector (&ctor_seq, &elts);
4849 else if (neutral_op)
4850 {
4851 /* Build a vector of the neutral value and shift the
4852 other elements into place. */
4853 init = gimple_build_vector_from_val (&ctor_seq, vector_type,
4854 neutral_op);
4855 int k = nunits;
4856 while (k > 0 && elts[k - 1] == neutral_op)
4857 k -= 1;
4858 while (k > 0)
4859 {
4860 k -= 1;
4861 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
4862 vector_type, init, elts[k]);
4863 }
4864 }
4865 else
4866 {
4867 /* First time round, duplicate ELTS to fill the
4868 required number of vectors. */
4869 duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts,
4870 number_of_vectors, *vec_oprnds);
4871 break;
4872 }
4873 vec_oprnds->quick_push (init);
4874
4875 number_of_places_left_in_vector = nunits;
4876 elts.new_vector (vector_type, nunits, 1);
4877 elts.quick_grow (nunits);
4878 constant_p = true;
4879 }
4880 }
4881 if (ctor_seq != NULLnullptr)
4882 gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
4883}
4884
4885/* For a statement STMT_INFO taking part in a reduction operation return
4886 the stmt_vec_info the meta information is stored on. */
4887
4888stmt_vec_info
4889info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
4890{
4891 stmt_info = vect_orig_stmt (stmt_info);
4892 gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4892, __FUNCTION__), 0 : 0))
;
4893 if (!is_a <gphi *> (stmt_info->stmt)
4894 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info
)->def_type) == vect_double_reduction_def) || (((stmt_info
)->def_type) == vect_nested_cycle))
)
4895 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
4896 gphi *phi = as_a <gphi *> (stmt_info->stmt);
4897 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
4898 {
4899 if (gimple_phi_num_args (phi) == 1)
4900 stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def;
4901 }
4902 else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle)
4903 {
4904 edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father);
4905 stmt_vec_info info
4906 = vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe)gimple_phi_arg_def (((phi)), ((pe)->dest_idx)));
4907 if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def)
4908 stmt_info = info;
4909 }
4910 return stmt_info;
4911}
4912
4913/* Function vect_create_epilog_for_reduction
4914
4915 Create code at the loop-epilog to finalize the result of a reduction
4916 computation.
4917
4918 STMT_INFO is the scalar reduction stmt that is being vectorized.
4919 SLP_NODE is an SLP node containing a group of reduction statements. The
4920 first one in this group is STMT_INFO.
4921 SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
4922 REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
4923 (counting from 0)
4924
4925 This function:
4926 1. Completes the reduction def-use cycles.
4927 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4928 by calling the function specified by REDUC_FN if available, or by
4929 other means (whole-vector shifts or a scalar loop).
4930 The function also creates a new phi node at the loop exit to preserve
4931 loop-closed form, as illustrated below.
4932
4933 The flow at the entry to this function:
4934
4935 loop:
4936 vec_def = phi <vec_init, null> # REDUCTION_PHI
4937 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
4938 s_loop = scalar_stmt # (scalar) STMT_INFO
4939 loop_exit:
4940 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4941 use <s_out0>
4942 use <s_out0>
4943
4944 The above is transformed by this function into:
4945
4946 loop:
4947 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4948 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
4949 s_loop = scalar_stmt # (scalar) STMT_INFO
4950 loop_exit:
4951 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4952 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4953 v_out2 = reduce <v_out1>
4954 s_out3 = extract_field <v_out2, 0>
4955 s_out4 = adjust_result <s_out3>
4956 use <s_out4>
4957 use <s_out4>
4958*/
4959
4960static void
4961vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
4962 stmt_vec_info stmt_info,
4963 slp_tree slp_node,
4964 slp_instance slp_node_instance)
4965{
4966 stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
4967 gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4967, __FUNCTION__), 0 : 0))
;
4968 /* For double reductions we need to get at the inner loop reduction
4969 stmt which has the meta info attached. Our stmt_info is that of the
4970 loop-closed PHI of the inner loop which we remember as
4971 def for the reduction PHI generation. */
4972 bool double_reduc = false;
4973 stmt_vec_info rdef_info = stmt_info;
4974 if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def)
4975 {
4976 gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 4976, __FUNCTION__), 0 : 0))
;
4977 double_reduc = true;
4978 stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
4979 (stmt_info->stmt, 0));
4980 stmt_info = vect_stmt_to_vectorize (stmt_info);
4981 }
4982 gphi *reduc_def_stmt
4983 = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt);
4984 enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code;
4985 internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn;
4986 tree vectype;
4987 machine_mode mode;
4988 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr;
4989 basic_block exit_bb;
4990 tree scalar_dest;
4991 tree scalar_type;
4992 gimple *new_phi = NULLnullptr, *phi;
4993 gimple_stmt_iterator exit_gsi;
4994 tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest;
4995 gimple *epilog_stmt = NULLnullptr;
4996 gimple *exit_phi;
4997 tree bitsize;
4998 tree def;
4999 tree orig_name, scalar_result;
5000 imm_use_iterator imm_iter, phi_imm_iter;
5001 use_operand_p use_p, phi_use_p;
5002 gimple *use_stmt;
5003 bool nested_in_vect_loop = false;
5004 auto_vec<gimple *> new_phis;
5005 int j, i;
5006 auto_vec<tree> scalar_results;
5007 unsigned int group_size = 1, k;
5008 auto_vec<gimple *> phis;
5009 bool slp_reduc = false;
5010 bool direct_slp_reduc;
5011 tree new_phi_result;
5012 tree induction_index = NULL_TREE(tree) nullptr;
5013
5014 if (slp_node)
5015 group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes;
5016
5017 if (nested_in_vect_loop_p (loop, stmt_info))
5018 {
5019 outer_loop = loop;
5020 loop = loop->inner;
5021 nested_in_vect_loop = true;
5022 gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5022, __FUNCTION__), 0 : 0))
;
5023 }
5024 gcc_assert (!nested_in_vect_loop || double_reduc)((void)(!(!nested_in_vect_loop || double_reduc) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5024, __FUNCTION__), 0 : 0))
;
5025
5026 vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype;
5027 gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5027, __FUNCTION__), 0 : 0))
;
5028 mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5028, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
;
5029
5030 tree initial_def = NULLnullptr;
5031 tree induc_val = NULL_TREE(tree) nullptr;
5032 tree adjustment_def = NULLnullptr;
5033 if (slp_node)
5034 ;
5035 else
5036 {
5037 /* Get at the scalar def before the loop, that defines the initial value
5038 of the reduction variable. */
5039 initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge
(loop))->dest_idx))
5040 loop_preheader_edge (loop))gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge
(loop))->dest_idx))
;
5041 /* Optimize: for induction condition reduction, if we can't use zero
5042 for induc_val, use initial_def. */
5043 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION)
5044 induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val;
5045 else if (double_reduc)
5046 ;
5047 else if (nested_in_vect_loop)
5048 ;
5049 else
5050 adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment;
5051 }
5052
5053 unsigned vec_num;
5054 int ncopies;
5055 if (slp_node)
5056 {
5057 vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length ();
5058 ncopies = 1;
5059 }
5060 else
5061 {
5062 stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt);
5063 vec_num = 1;
5064 ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length ();
5065 }
5066
5067 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
5068 which is updated with the current index of the loop for every match of
5069 the original loop's cond_expr (VEC_STMT). This results in a vector
5070 containing the last time the condition passed for that vector lane.
5071 The first match will be a 1 to allow 0 to be used for non-matching
5072 indexes. If there are no matches at all then the vector will be all
5073 zeroes.
5074
5075 PR92772: This algorithm is broken for architectures that support
5076 masked vectors, but do not provide fold_extract_last. */
5077 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION)
5078 {
5079 auto_vec<std::pair<tree, bool>, 2> ccompares;
5080 stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def;
5081 cond_info = vect_stmt_to_vectorize (cond_info);
5082 while (cond_info != reduc_info)
5083 {
5084 if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
5085 {
5086 gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0];
5087 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)
? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5087, __FUNCTION__), 0 : 0))
;
5088 ccompares.safe_push
5089 (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
5090 STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2));
5091 }
5092 cond_info
5093 = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
5094 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx
5095 (cond_info)(cond_info)->reduc_idx));
5096 cond_info = vect_stmt_to_vectorize (cond_info);
5097 }
5098 gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5098, __FUNCTION__), 0 : 0))
;
5099
5100 tree indx_before_incr, indx_after_incr;
5101 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
5102 int scalar_precision
5103 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check
((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5103, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5103, __FUNCTION__))->type_common.mode))
);
5104 tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
5105 tree cr_index_vector_type = get_related_vectype_for_scalar_type
5106 (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5106, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
, cr_index_scalar_type,
5107 TYPE_VECTOR_SUBPARTS (vectype));
5108
5109 /* First we create a simple vector induction variable which starts
5110 with the values {1,2,3,...} (SERIES_VECT) and increments by the
5111 vector size (STEP). */
5112
5113 /* Create a {1,2,3,...} vector. */
5114 tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
5115
5116 /* Create a vector of the step value. */
5117 tree step = build_int_cst (cr_index_scalar_type, nunits_out);
5118 tree vec_step = build_vector_from_val (cr_index_vector_type, step);
5119
5120 /* Create an induction variable. */
5121 gimple_stmt_iterator incr_gsi;
5122 bool insert_after;
5123 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5124 create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi,
5125 insert_after, &indx_before_incr, &indx_after_incr);
5126
5127 /* Next create a new phi node vector (NEW_PHI_TREE) which starts
5128 filled with zeros (VEC_ZERO). */
5129
5130 /* Create a vector of 0s. */
5131 tree zero = build_zero_cst (cr_index_scalar_type);
5132 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
5133
5134 /* Create a vector phi node. */
5135 tree new_phi_tree = make_ssa_name (cr_index_vector_type);
5136 new_phi = create_phi_node (new_phi_tree, loop->header);
5137 add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
5138 loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5139
5140 /* Now take the condition from the loops original cond_exprs
5141 and produce a new cond_exprs (INDEX_COND_EXPR) which for
5142 every match uses values from the induction variable
5143 (INDEX_BEFORE_INCR) otherwise uses values from the phi node
5144 (NEW_PHI_TREE).
5145 Finally, we update the phi (NEW_PHI_TREE) to take the value of
5146 the new cond_expr (INDEX_COND_EXPR). */
5147 gimple_seq stmts = NULLnullptr;
5148 for (int i = ccompares.length () - 1; i != -1; --i)
5149 {
5150 tree ccompare = ccompares[i].first;
5151 if (ccompares[i].second)
5152 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5153 cr_index_vector_type,
5154 ccompare,
5155 indx_before_incr, new_phi_tree);
5156 else
5157 new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
5158 cr_index_vector_type,
5159 ccompare,
5160 new_phi_tree, indx_before_incr);
5161 }
5162 gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
5163
5164 /* Update the phi with the vec cond. */
5165 induction_index = new_phi_tree;
5166 add_phi_arg (as_a <gphi *> (new_phi), induction_index,
5167 loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0));
5168 }
5169
5170 /* 2. Create epilog code.
5171 The reduction epilog code operates across the elements of the vector
5172 of partial results computed by the vectorized loop.
5173 The reduction epilog code consists of:
5174
5175 step 1: compute the scalar result in a vector (v_out2)
5176 step 2: extract the scalar result (s_out3) from the vector (v_out2)
5177 step 3: adjust the scalar result (s_out3) if needed.
5178
5179 Step 1 can be accomplished using one the following three schemes:
5180 (scheme 1) using reduc_fn, if available.
5181 (scheme 2) using whole-vector shifts, if available.
5182 (scheme 3) using a scalar loop. In this case steps 1+2 above are
5183 combined.
5184
5185 The overall epilog code looks like this:
5186
5187 s_out0 = phi <s_loop> # original EXIT_PHI
5188 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5189 v_out2 = reduce <v_out1> # step 1
5190 s_out3 = extract_field <v_out2, 0> # step 2
5191 s_out4 = adjust_result <s_out3> # step 3
5192
5193 (step 3 is optional, and steps 1 and 2 may be combined).
5194 Lastly, the uses of s_out0 are replaced by s_out4. */
5195
5196
5197 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
5198 v_out1 = phi <VECT_DEF>
5199 Store them in NEW_PHIS. */
5200 if (double_reduc)
5201 loop = outer_loop;
5202 exit_bb = single_exit (loop)->dest;
5203 new_phis.create (slp_node ? vec_num : ncopies);
5204 for (unsigned i = 0; i < vec_num; i++)
5205 {
5206 if (slp_node)
5207 def = vect_get_slp_vect_def (slp_node, i);
5208 else
5209 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]);
5210 for (j = 0; j < ncopies; j++)
5211 {
5212 tree new_def = copy_ssa_name (def);
5213 phi = create_phi_node (new_def, exit_bb);
5214 if (j == 0)
5215 new_phis.quick_push (phi);
5216 else
5217 {
5218 def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]);
5219 new_phis.quick_push (phi);
5220 }
5221
5222 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), ((
single_exit (loop)->dest_idx))), (def))
;
5223 }
5224 }
5225
5226 exit_gsi = gsi_after_labels (exit_bb);
5227
5228 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
5229 (i.e. when reduc_fn is not available) and in the final adjustment
5230 code (if needed). Also get the original scalar reduction variable as
5231 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
5232 represents a reduction pattern), the tree-code and scalar-def are
5233 taken from the original stmt that the pattern-stmt (STMT) replaces.
5234 Otherwise (it is a regular reduction) - the tree-code and scalar-def
5235 are taken from STMT. */
5236
5237 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
5238 if (orig_stmt_info != stmt_info)
5239 {
5240 /* Reduction pattern */
5241 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5241, __FUNCTION__), 0 : 0))
;
5242 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5242, __FUNCTION__), 0 : 0))
;
5243 }
5244
5245 scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt);
5246 scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5246, __FUNCTION__))->typed.type)
;
5247 scalar_results.create (group_size);
5248 new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr);
5249 bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5249, __FUNCTION__))->type_common.size)
;
5250
5251 /* SLP reduction without reduction chain, e.g.,
5252 # a1 = phi <a2, a0>
5253 # b1 = phi <b2, b0>
5254 a2 = operation (a1)
5255 b2 = operation (b1) */
5256 slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5256, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
);
5257
5258 /* True if we should implement SLP_REDUC using native reduction operations
5259 instead of scalar operations. */
5260 direct_slp_reduc = (reduc_fn != IFN_LAST
5261 && slp_reduc
5262 && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
5263
5264 /* In case of reduction chain, e.g.,
5265 # a1 = phi <a3, a0>
5266 a2 = operation (a1)
5267 a3 = operation (a2),
5268
5269 we may end up with more than one vector result. Here we reduce them to
5270 one vector. */
5271 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5271, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
|| direct_slp_reduc)
5272 {
5273 gimple_seq stmts = NULLnullptr;
5274 tree first_vect = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0]));
5275 first_vect = gimple_convert (&stmts, vectype, first_vect);
5276 for (k = 1; k < new_phis.length (); k++)
5277 {
5278 gimple *next_phi = new_phis[k];
5279 tree second_vect = PHI_RESULT (next_phi)get_def_from_ptr (gimple_phi_result_ptr (next_phi));
5280 second_vect = gimple_convert (&stmts, vectype, second_vect);
5281 first_vect = gimple_build (&stmts, code, vectype,
5282 first_vect, second_vect);
5283 }
5284 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5285
5286 new_phi_result = first_vect;
5287 new_phis.truncate (0);
5288 new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)(tree_check ((first_vect), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5288, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
);
5289 }
5290 /* Likewise if we couldn't use a single defuse cycle. */
5291 else if (ncopies > 1)
5292 {
5293 gimple_seq stmts = NULLnullptr;
5294 tree first_vect = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0]));
5295 first_vect = gimple_convert (&stmts, vectype, first_vect);
5296 for (int k = 1; k < ncopies; ++k)
5297 {
5298 tree second_vect = PHI_RESULT (new_phis[k])get_def_from_ptr (gimple_phi_result_ptr (new_phis[k]));
5299 second_vect = gimple_convert (&stmts, vectype, second_vect);
5300 first_vect = gimple_build (&stmts, code, vectype,
5301 first_vect, second_vect);
5302 }
5303 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5304 new_phi_result = first_vect;
5305 new_phis.truncate (0);
5306 new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)(tree_check ((first_vect), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5306, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt
);
5307 }
5308 else
5309 new_phi_result = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0]));
5310
5311 if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION
5312 && reduc_fn != IFN_LAST)
5313 {
5314 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
5315 various data values where the condition matched and another vector
5316 (INDUCTION_INDEX) containing all the indexes of those matches. We
5317 need to extract the last matching index (which will be the index with
5318 highest value) and use this to index into the data vector.
5319 For the case where there were no matches, the data vector will contain
5320 all default values and the index vector will be all zeros. */
5321
5322 /* Get various versions of the type of the vector of indexes. */
5323 tree index_vec_type = TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5323, __FUNCTION__))->typed.type)
;
5324 gcc_checking_assert (TYPE_UNSIGNED (index_vec_type))((void)(!(((tree_class_check ((index_vec_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5324, __FUNCTION__))->base.u.bits.unsigned_flag)) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5324, __FUNCTION__), 0 : 0))
;
5325 tree index_scalar_type = TREE_TYPE (index_vec_type)((contains_struct_check ((index_vec_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5325, __FUNCTION__))->typed.type)
;
5326 tree index_vec_cmp_type = truth_type_for (index_vec_type);
5327
5328 /* Get an unsigned integer version of the type of the data vector. */
5329 int scalar_precision
5330 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)(as_a <scalar_mode> ((tree_class_check ((scalar_type), (
tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5330, __FUNCTION__))->type_common.mode))
);
5331 tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
5332 tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
5333 vectype);
5334
5335 /* First we need to create a vector (ZERO_VEC) of zeros and another
5336 vector (MAX_INDEX_VEC) filled with the last matching index, which we
5337 can create using a MAX reduction and then expanding.
5338 In the case where the loop never made any matches, the max index will
5339 be zero. */
5340
5341 /* Vector of {0, 0, 0,...}. */
5342 tree zero_vec = build_zero_cst (vectype);
5343
5344 gimple_seq stmts = NULLnullptr;
5345 new_phi_result = gimple_convert (&stmts, vectype, new_phi_result);
5346 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5347
5348 /* Find maximum value from the vector of found indexes. */
5349 tree max_index = make_ssa_name (index_scalar_type);
5350 gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
5351 1, induction_index);
5352 gimple_call_set_lhs (max_index_stmt, max_index);
5353 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
5354
5355 /* Vector of {max_index, max_index, max_index,...}. */
5356 tree max_index_vec = make_ssa_name (index_vec_type);
5357 tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
5358 max_index);
5359 gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
5360 max_index_vec_rhs);
5361 gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
5362
5363 /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
5364 with the vector (INDUCTION_INDEX) of found indexes, choosing values
5365 from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
5366 otherwise. Only one value should match, resulting in a vector
5367 (VEC_COND) with one data value and the rest zeros.
5368 In the case where the loop never made any matches, every index will
5369 match, resulting in a vector with all data values (which will all be
5370 the default value). */
5371
5372 /* Compare the max index vector to the vector of found indexes to find
5373 the position of the max value. */
5374 tree vec_compare = make_ssa_name (index_vec_cmp_type);
5375 gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
5376 induction_index,
5377 max_index_vec);
5378 gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
5379
5380 /* Use the compare to choose either values from the data vector or
5381 zero. */
5382 tree vec_cond = make_ssa_name (vectype);
5383 gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
5384 vec_compare, new_phi_result,
5385 zero_vec);
5386 gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
5387
5388 /* Finally we need to extract the data value from the vector (VEC_COND)
5389 into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
5390 reduction, but because this doesn't exist, we can use a MAX reduction
5391 instead. The data value might be signed or a float so we need to cast
5392 it first.
5393 In the case where the loop never made any matches, the data values are
5394 all identical, and so will reduce down correctly. */
5395
5396 /* Make the matched data values unsigned. */
5397 tree vec_cond_cast = make_ssa_name (vectype_unsigned);
5398 tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
5399 vec_cond);
5400 gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
5401 VIEW_CONVERT_EXPR,
5402 vec_cond_cast_rhs);
5403 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
5404
5405 /* Reduce down to a scalar value. */
5406 tree data_reduc = make_ssa_name (scalar_type_unsigned);
5407 gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
5408 1, vec_cond_cast);
5409 gimple_call_set_lhs (data_reduc_stmt, data_reduc);
5410 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
5411
5412 /* Convert the reduced value back to the result type and set as the
5413 result. */
5414 stmts = NULLnullptr;
5415 new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type,
5416 data_reduc);
5417 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5418 scalar_results.safe_push (new_temp);
5419 }
5420 else if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION
5421 && reduc_fn == IFN_LAST)
5422 {
5423 /* Condition reduction without supported IFN_REDUC_MAX. Generate
5424 idx = 0;
5425 idx_val = induction_index[0];
5426 val = data_reduc[0];
5427 for (idx = 0, val = init, i = 0; i < nelts; ++i)
5428 if (induction_index[i] > idx_val)
5429 val = data_reduc[i], idx_val = induction_index[i];
5430 return val; */
5431
5432 tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result))((contains_struct_check ((((contains_struct_check ((new_phi_result
), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5432, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5432, __FUNCTION__))->typed.type)
;
5433 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index))((contains_struct_check ((((contains_struct_check ((induction_index
), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5433, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5433, __FUNCTION__))->typed.type)
;
5434 unsigned HOST_WIDE_INTlong el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)((tree_class_check ((idx_eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5434, __FUNCTION__))->type_common.size)
);
5435 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5435, __FUNCTION__))->typed.type)
);
5436 /* Enforced by vectorizable_reduction, which ensures we have target
5437 support before allowing a conditional reduction on variable-length
5438 vectors. */
5439 unsigned HOST_WIDE_INTlong v_size = el_size * nunits.to_constant ();
5440 tree idx_val = NULL_TREE(tree) nullptr, val = NULL_TREE(tree) nullptr;
5441 for (unsigned HOST_WIDE_INTlong off = 0; off < v_size; off += el_size)
5442 {
5443 tree old_idx_val = idx_val;
5444 tree old_val = val;
5445 idx_val = make_ssa_name (idx_eltype);
5446 epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
5447 build3 (BIT_FIELD_REF, idx_eltype,
5448 induction_index,
5449 bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype),
5450 bitsize_int (off)size_int_kind (off, stk_bitsizetype)));
5451 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5452 val = make_ssa_name (data_eltype);
5453 epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
5454 build3 (BIT_FIELD_REF,
5455 data_eltype,
5456 new_phi_result,
5457 bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype),
5458 bitsize_int (off)size_int_kind (off, stk_bitsizetype)));
5459 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5460 if (off != 0)
5461 {
5462 tree new_idx_val = idx_val;
5463 if (off != v_size - el_size)
5464 {
5465 new_idx_val = make_ssa_name (idx_eltype);
5466 epilog_stmt = gimple_build_assign (new_idx_val,
5467 MAX_EXPR, idx_val,
5468 old_idx_val);
5469 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5470 }
5471 tree new_val = make_ssa_name (data_eltype);
5472 epilog_stmt = gimple_build_assign (new_val,
5473 COND_EXPR,
5474 build2 (GT_EXPR,
5475 boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE],
5476 idx_val,
5477 old_idx_val),
5478 val, old_val);
5479 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5480 idx_val = new_idx_val;
5481 val = new_val;
5482 }
5483 }
5484 /* Convert the reduced value back to the result type and set as the
5485 result. */
5486 gimple_seq stmts = NULLnullptr;
5487 val = gimple_convert (&stmts, scalar_type, val);
5488 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5489 scalar_results.safe_push (val);
5490 }
5491
5492 /* 2.3 Create the reduction code, using one of the three schemes described
5493 above. In SLP we simply need to extract all the elements from the
5494 vector (without reducing them), so we use scalar shifts. */
5495 else if (reduc_fn != IFN_LAST && !slp_reduc)
5496 {
5497 tree tmp;
5498 tree vec_elem_type;
5499
5500 /* Case 1: Create:
5501 v_out2 = reduc_expr <v_out1> */
5502
5503 if (dump_enabled_p ())
5504 dump_printf_loc (MSG_NOTE, vect_location,
5505 "Reduce using direct vector reduction.\n");
5506
5507 gimple_seq stmts = NULLnullptr;
5508 new_phi_result = gimple_convert (&stmts, vectype, new_phi_result);
5509 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result))((contains_struct_check ((((contains_struct_check ((new_phi_result
), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5509, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5509, __FUNCTION__))->typed.type)
;
5510 new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn),
5511 vec_elem_type, new_phi_result);
5512 new_temp = gimple_convert (&stmts, scalar_type, new_temp);
5513 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
5514
5515 if ((STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION)
5516 && induc_val)
5517 {
5518 /* Earlier we set the initial value to be a vector if induc_val
5519 values. Check the result and if it is induc_val then replace
5520 with the original initial value, unless induc_val is
5521 the same as initial_def already. */
5522 tree zcompare = build2 (EQ_EXPR, boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE], new_temp,
5523 induc_val);
5524
5525 tmp = make_ssa_name (new_scalar_dest);
5526 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
5527 initial_def, new_temp);
5528 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5529 new_temp = tmp;
5530 }
5531
5532 scalar_results.safe_push (new_temp);
5533 }
5534 else if (direct_slp_reduc)
5535 {
5536 /* Here we create one vector for each of the REDUC_GROUP_SIZE results,
5537 with the elements for other SLP statements replaced with the
5538 neutral value. We can then do a normal reduction on each vector. */
5539
5540 /* Enforced by vectorizable_reduction. */
5541 gcc_assert (new_phis.length () == 1)((void)(!(new_phis.length () == 1) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5541, __FUNCTION__), 0 : 0))
;
5542 gcc_assert (pow2p_hwi (group_size))((void)(!(pow2p_hwi (group_size)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5542, __FUNCTION__), 0 : 0))
;
5543
5544 slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis;
5545 vec<stmt_vec_info> orig_phis
5546 = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node)(orig_phis_slp_node)->stmts;
5547 gimple_seq seq = NULLnullptr;
5548
5549 /* Build a vector {0, 1, 2, ...}, with the same number of elements
5550 and the same element size as VECTYPE. */
5551 tree index = build_index_vector (vectype, 0, 1);
5552 tree index_type = TREE_TYPE (index)((contains_struct_check ((index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5552, __FUNCTION__))->typed.type)
;
5553 tree index_elt_type = TREE_TYPE (index_type)((contains_struct_check ((index_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5553, __FUNCTION__))->typed.type)
;
5554 tree mask_type = truth_type_for (index_type);
5555
5556 /* Create a vector that, for each element, identifies which of
5557 the REDUC_GROUP_SIZE results should use it. */
5558 tree index_mask = build_int_cst (index_elt_type, group_size - 1);
5559 index = gimple_build (&seq, BIT_AND_EXPR, index_type, index,
5560 build_vector_from_val (index_type, index_mask));
5561
5562 /* Get a neutral vector value. This is simply a splat of the neutral
5563 scalar value if we have one, otherwise the initial scalar value
5564 is itself a neutral value. */
5565 tree vector_identity = NULL_TREE(tree) nullptr;
5566 tree neutral_op = NULL_TREE(tree) nullptr;
5567 if (slp_node)
5568 {
5569 stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5569, __FUNCTION__), 0 : 0)), (stmt_info)->first_element
)
;
5570 neutral_op
5571 = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
5572 vectype, code, first != NULLnullptr);
5573 }
5574 if (neutral_op)
5575 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5576 neutral_op);
5577 for (unsigned int i = 0; i < group_size; ++i)
5578 {
5579 /* If there's no univeral neutral value, we can use the
5580 initial scalar value from the original PHI. This is used
5581 for MIN and MAX reduction, for example. */
5582 if (!neutral_op)
5583 {
5584 tree scalar_value
5585 = PHI_ARG_DEF_FROM_EDGE (orig_phis[i]->stmt,gimple_phi_arg_def (((orig_phis[i]->stmt)), ((loop_preheader_edge
(loop))->dest_idx))
5586 loop_preheader_edge (loop))gimple_phi_arg_def (((orig_phis[i]->stmt)), ((loop_preheader_edge
(loop))->dest_idx))
;
5587 scalar_value = gimple_convert (&seq, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5587, __FUNCTION__))->typed.type)
,
5588 scalar_value);
5589 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5590 scalar_value);
5591 }
5592
5593 /* Calculate the equivalent of:
5594
5595 sel[j] = (index[j] == i);
5596
5597 which selects the elements of NEW_PHI_RESULT that should
5598 be included in the result. */
5599 tree compare_val = build_int_cst (index_elt_type, i);
5600 compare_val = build_vector_from_val (index_type, compare_val);
5601 tree sel = gimple_build (&seq, EQ_EXPR, mask_type,
5602 index, compare_val);
5603
5604 /* Calculate the equivalent of:
5605
5606 vec = seq ? new_phi_result : vector_identity;
5607
5608 VEC is now suitable for a full vector reduction. */
5609 tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
5610 sel, new_phi_result, vector_identity);
5611
5612 /* Do the reduction and convert it to the appropriate type. */
5613 tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
5614 TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5614, __FUNCTION__))->typed.type)
, vec);
5615 scalar = gimple_convert (&seq, scalar_type, scalar);
5616 scalar_results.safe_push (scalar);
5617 }
5618 gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
5619 }
5620 else
5621 {
5622 bool reduce_with_shift;
5623 tree vec_temp;
5624
5625 gcc_assert (slp_reduc || new_phis.length () == 1)((void)(!(slp_reduc || new_phis.length () == 1) ? fancy_abort
("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5625, __FUNCTION__), 0 : 0))
;
5626
5627 /* See if the target wants to do the final (shift) reduction
5628 in a vector mode of smaller size and first reduce upper/lower
5629 halves against each other. */
5630 enum machine_mode mode1 = mode;
5631 tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5631, __FUNCTION__))->typed.type)
;
5632 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5633 unsigned nunits1 = nunits;
5634 if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
5635 && new_phis.length () == 1)
5636 {
5637 nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
5638 /* For SLP reductions we have to make sure lanes match up, but
5639 since we're doing individual element final reduction reducing
5640 vector width here is even more important.
5641 ??? We can also separate lanes with permutes, for the common
5642 case of power-of-two group-size odd/even extracts would work. */
5643 if (slp_reduc && nunits != nunits1)
5644 {
5645 nunits1 = least_common_multiple (nunits1, group_size);
5646 gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits)((void)(!(exact_log2 (nunits1) != -1 && nunits1 <=
nunits) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5646, __FUNCTION__), 0 : 0))
;
5647 }
5648 }
5649 if (!slp_reduc
5650 && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
5651 nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
5652
5653 tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5653, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
,
5654 stype, nunits1);
5655 reduce_with_shift = have_whole_vector_shift (mode1);
5656 if (!VECTOR_MODE_P (mode1)(((enum mode_class) mode_class[mode1]) == MODE_VECTOR_BOOL ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_INT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_FLOAT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_FRACT || (
(enum mode_class) mode_class[mode1]) == MODE_VECTOR_UFRACT ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_ACCUM ||
((enum mode_class) mode_class[mode1]) == MODE_VECTOR_UACCUM)
)
5657 reduce_with_shift = false;
5658 else
5659 {
5660 optab optab = optab_for_tree_code (code, vectype1, optab_default);
5661 if (optab_handler (optab, mode1) == CODE_FOR_nothing)
5662 reduce_with_shift = false;
5663 }
5664
5665 /* First reduce the vector to the desired vector size we should
5666 do shift reduction on by combining upper and lower halves. */
5667 new_temp = new_phi_result;
5668 while (nunits > nunits1)
5669 {
5670 nunits /= 2;
5671 vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type)
, "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5671, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype) : (vectype)->type_common.mode)
,
5672 stype, nunits);
5673 unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5673, __FUNCTION__))->type_common.size)
);
5674
5675 /* The target has to make sure we support lowpart/highpart
5676 extraction, either via direct vector extract or through
5677 an integer mode punning. */
5678 tree dst1, dst2;
5679 if (convert_optab_handler (vec_extract_optab,
5680 TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check
((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5680, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5680, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(((contains_struct_check ((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5680, __FUNCTION__))->typed.type)) : (((contains_struct_check
((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5680, __FUNCTION__))->typed.type))->type_common.mode)
,
5681 TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type
), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5681, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(vectype1) : (vectype1)->type_common.mode)
)
5682 != CODE_FOR_nothing)
5683 {
5684 /* Extract sub-vectors directly once vec_extract becomes
5685 a conversion optab. */
5686 dst1 = make_ssa_name (vectype1);
5687 epilog_stmt
5688 = gimple_build_assign (dst1, BIT_FIELD_REF,
5689 build3 (BIT_FIELD_REF, vectype1,
5690 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5690, __FUNCTION__))->type_common.size)
,
5691 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
5692 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5693 dst2 = make_ssa_name (vectype1);
5694 epilog_stmt
5695 = gimple_build_assign (dst2, BIT_FIELD_REF,
5696 build3 (BIT_FIELD_REF, vectype1,
5697 new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5697, __FUNCTION__))->type_common.size)
,
5698 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
5699 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5700 }
5701 else
5702 {
5703 /* Extract via punning to appropriately sized integer mode
5704 vector. */
5705 tree eltype = build_nonstandard_integer_type (bitsize, 1);
5706 tree etype = build_vector_type (eltype, 2);
5707 gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5710, __FUNCTION__), 0 : 0))
5708 TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5710, __FUNCTION__), 0 : 0))
5709 TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5710, __FUNCTION__), 0 : 0))
5710 != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code
) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(etype) : (etype)->type_common.mode), ((((enum tree_code)
((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode
(eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing
) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5710, __FUNCTION__), 0 : 0))
;
5711 tree tem = make_ssa_name (etype);
5712 epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
5713 build1 (VIEW_CONVERT_EXPR,
5714 etype, new_temp));
5715 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5716 new_temp = tem;
5717 tem = make_ssa_name (eltype);
5718 epilog_stmt
5719 = gimple_build_assign (tem, BIT_FIELD_REF,
5720 build3 (BIT_FIELD_REF, eltype,
5721 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5721, __FUNCTION__))->type_common.size)
,
5722 bitsize_int (0)size_int_kind (0, stk_bitsizetype)));
5723 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5724 dst1 = make_ssa_name (vectype1);
5725 epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
5726 build1 (VIEW_CONVERT_EXPR,
5727 vectype1, tem));
5728 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5729 tem = make_ssa_name (eltype);
5730 epilog_stmt
5731 = gimple_build_assign (tem, BIT_FIELD_REF,
5732 build3 (BIT_FIELD_REF, eltype,
5733 new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5733, __FUNCTION__))->type_common.size)
,
5734 bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype)));
5735 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5736 dst2 = make_ssa_name (vectype1);
5737 epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
5738 build1 (VIEW_CONVERT_EXPR,
5739 vectype1, tem));
5740 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5741 }
5742
5743 new_temp = make_ssa_name (vectype1);
5744 epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
5745 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5746 new_phis[0] = epilog_stmt;
5747 }
5748
5749 if (reduce_with_shift && !slp_reduc)
5750 {
5751 int element_bitsize = tree_to_uhwi (bitsize);
5752 /* Enforced by vectorizable_reduction, which disallows SLP reductions
5753 for variable-length vectors and also requires direct target support
5754 for loop reductions. */
5755 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c"
, 5755, __FUNCTION__))->type_common.size)
);
5756 int nelements = vec_size_in_bits / element_bitsize;
5757 vec_perm_builder sel;
5758 vec_perm_indices indices;
5759
5760 int elt_offset;
5761
5762 tree zero_vec = build_zero_cst (vectype1);
5763 /* Case 2: Create:
5764 for (offset = nelements/2; offset >= 1; offset/=2)
5765 {