File: | build/gcc/tree-vect-loop.c |
Warning: | line 8125, column 8 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Loop Vectorization |
2 | Copyright (C) 2003-2021 Free Software Foundation, Inc. |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> and |
4 | Ira Rosen <irar@il.ibm.com> |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "target.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "gimple.h" |
30 | #include "cfghooks.h" |
31 | #include "tree-pass.h" |
32 | #include "ssa.h" |
33 | #include "optabs-tree.h" |
34 | #include "diagnostic-core.h" |
35 | #include "fold-const.h" |
36 | #include "stor-layout.h" |
37 | #include "cfganal.h" |
38 | #include "gimplify.h" |
39 | #include "gimple-iterator.h" |
40 | #include "gimplify-me.h" |
41 | #include "tree-ssa-loop-ivopts.h" |
42 | #include "tree-ssa-loop-manip.h" |
43 | #include "tree-ssa-loop-niter.h" |
44 | #include "tree-ssa-loop.h" |
45 | #include "cfgloop.h" |
46 | #include "tree-scalar-evolution.h" |
47 | #include "tree-vectorizer.h" |
48 | #include "gimple-fold.h" |
49 | #include "cgraph.h" |
50 | #include "tree-cfg.h" |
51 | #include "tree-if-conv.h" |
52 | #include "internal-fn.h" |
53 | #include "tree-vector-builder.h" |
54 | #include "vec-perm-indices.h" |
55 | #include "tree-eh.h" |
56 | |
57 | /* Loop Vectorization Pass. |
58 | |
59 | This pass tries to vectorize loops. |
60 | |
61 | For example, the vectorizer transforms the following simple loop: |
62 | |
63 | short a[N]; short b[N]; short c[N]; int i; |
64 | |
65 | for (i=0; i<N; i++){ |
66 | a[i] = b[i] + c[i]; |
67 | } |
68 | |
69 | as if it was manually vectorized by rewriting the source code into: |
70 | |
71 | typedef int __attribute__((mode(V8HI))) v8hi; |
72 | short a[N]; short b[N]; short c[N]; int i; |
73 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; |
74 | v8hi va, vb, vc; |
75 | |
76 | for (i=0; i<N/8; i++){ |
77 | vb = pb[i]; |
78 | vc = pc[i]; |
79 | va = vb + vc; |
80 | pa[i] = va; |
81 | } |
82 | |
83 | The main entry to this pass is vectorize_loops(), in which |
84 | the vectorizer applies a set of analyses on a given set of loops, |
85 | followed by the actual vectorization transformation for the loops that |
86 | had successfully passed the analysis phase. |
87 | Throughout this pass we make a distinction between two types of |
88 | data: scalars (which are represented by SSA_NAMES), and memory references |
89 | ("data-refs"). These two types of data require different handling both |
90 | during analysis and transformation. The types of data-refs that the |
91 | vectorizer currently supports are ARRAY_REFS which base is an array DECL |
92 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer |
93 | accesses are required to have a simple (consecutive) access pattern. |
94 | |
95 | Analysis phase: |
96 | =============== |
97 | The driver for the analysis phase is vect_analyze_loop(). |
98 | It applies a set of analyses, some of which rely on the scalar evolution |
99 | analyzer (scev) developed by Sebastian Pop. |
100 | |
101 | During the analysis phase the vectorizer records some information |
102 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the |
103 | loop, as well as general information about the loop as a whole, which is |
104 | recorded in a "loop_vec_info" struct attached to each loop. |
105 | |
106 | Transformation phase: |
107 | ===================== |
108 | The loop transformation phase scans all the stmts in the loop, and |
109 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in |
110 | the loop that needs to be vectorized. It inserts the vector code sequence |
111 | just before the scalar stmt S, and records a pointer to the vector code |
112 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct |
113 | attached to S). This pointer will be used for the vectorization of following |
114 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; |
115 | otherwise, we rely on dead code elimination for removing it. |
116 | |
117 | For example, say stmt S1 was vectorized into stmt VS1: |
118 | |
119 | VS1: vb = px[i]; |
120 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 |
121 | S2: a = b; |
122 | |
123 | To vectorize stmt S2, the vectorizer first finds the stmt that defines |
124 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the |
125 | vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The |
126 | resulting sequence would be: |
127 | |
128 | VS1: vb = px[i]; |
129 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 |
130 | VS2: va = vb; |
131 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 |
132 | |
133 | Operands that are not SSA_NAMEs, are data-refs that appear in |
134 | load/store operations (like 'x[i]' in S1), and are handled differently. |
135 | |
136 | Target modeling: |
137 | ================= |
138 | Currently the only target specific information that is used is the |
139 | size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". |
140 | Targets that can support different sizes of vectors, for now will need |
141 | to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More |
142 | flexibility will be added in the future. |
143 | |
144 | Since we only vectorize operations which vector form can be |
145 | expressed using existing tree codes, to verify that an operation is |
146 | supported, the vectorizer checks the relevant optab at the relevant |
147 | machine_mode (e.g, optab_handler (add_optab, V8HImode)). If |
148 | the value found is CODE_FOR_nothing, then there's no target support, and |
149 | we can't vectorize the stmt. |
150 | |
151 | For additional information on this project see: |
152 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html |
153 | */ |
154 | |
155 | static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); |
156 | static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, |
157 | bool *, bool *); |
158 | |
159 | /* Subroutine of vect_determine_vf_for_stmt that handles only one |
160 | statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE |
161 | may already be set for general statements (not just data refs). */ |
162 | |
163 | static opt_result |
164 | vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, |
165 | bool vectype_maybe_set_p, |
166 | poly_uint64 *vf) |
167 | { |
168 | gimple *stmt = stmt_info->stmt; |
169 | |
170 | if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
171 | && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
172 | || gimple_clobber_p (stmt)) |
173 | { |
174 | if (dump_enabled_p ()) |
175 | dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); |
176 | return opt_result::success (); |
177 | } |
178 | |
179 | tree stmt_vectype, nunits_vectype; |
180 | opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info, |
181 | &stmt_vectype, |
182 | &nunits_vectype); |
183 | if (!res) |
184 | return res; |
185 | |
186 | if (stmt_vectype) |
187 | { |
188 | if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype) |
189 | /* The only case when a vectype had been already set is for stmts |
190 | that contain a data ref, or for "pattern-stmts" (stmts generated |
191 | by the vectorizer to represent/replace a certain idiom). */ |
192 | gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)) |
193 | || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)) |
194 | && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)); |
195 | else |
196 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype; |
197 | } |
198 | |
199 | if (nunits_vectype) |
200 | vect_update_max_nunits (vf, nunits_vectype); |
201 | |
202 | return opt_result::success (); |
203 | } |
204 | |
205 | /* Subroutine of vect_determine_vectorization_factor. Set the vector |
206 | types of STMT_INFO and all attached pattern statements and update |
207 | the vectorization factor VF accordingly. Return true on success |
208 | or false if something prevented vectorization. */ |
209 | |
210 | static opt_result |
211 | vect_determine_vf_for_stmt (vec_info *vinfo, |
212 | stmt_vec_info stmt_info, poly_uint64 *vf) |
213 | { |
214 | if (dump_enabled_p ()) |
215 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", |
216 | stmt_info->stmt); |
217 | opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); |
218 | if (!res) |
219 | return res; |
220 | |
221 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p |
222 | && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt) |
223 | { |
224 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; |
225 | stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
226 | |
227 | /* If a pattern statement has def stmts, analyze them too. */ |
228 | for (gimple_stmt_iterator si = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq)); |
229 | !gsi_end_p (si); gsi_next (&si)) |
230 | { |
231 | stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); |
232 | if (dump_enabled_p ()) |
233 | dump_printf_loc (MSG_NOTE, vect_location, |
234 | "==> examining pattern def stmt: %G", |
235 | def_stmt_info->stmt); |
236 | res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); |
237 | if (!res) |
238 | return res; |
239 | } |
240 | |
241 | if (dump_enabled_p ()) |
242 | dump_printf_loc (MSG_NOTE, vect_location, |
243 | "==> examining pattern statement: %G", |
244 | stmt_info->stmt); |
245 | res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); |
246 | if (!res) |
247 | return res; |
248 | } |
249 | |
250 | return opt_result::success (); |
251 | } |
252 | |
253 | /* Function vect_determine_vectorization_factor |
254 | |
255 | Determine the vectorization factor (VF). VF is the number of data elements |
256 | that are operated upon in parallel in a single iteration of the vectorized |
257 | loop. For example, when vectorizing a loop that operates on 4byte elements, |
258 | on a target with vector size (VS) 16byte, the VF is set to 4, since 4 |
259 | elements can fit in a single vector register. |
260 | |
261 | We currently support vectorization of loops in which all types operated upon |
262 | are of the same size. Therefore this function currently sets VF according to |
263 | the size of the types operated upon, and fails if there are multiple sizes |
264 | in the loop. |
265 | |
266 | VF is also the factor by which the loop iterations are strip-mined, e.g.: |
267 | original loop: |
268 | for (i=0; i<N; i++){ |
269 | a[i] = b[i] + c[i]; |
270 | } |
271 | |
272 | vectorized loop: |
273 | for (i=0; i<N; i+=VF){ |
274 | a[i:VF] = b[i:VF] + c[i:VF]; |
275 | } |
276 | */ |
277 | |
278 | static opt_result |
279 | vect_determine_vectorization_factor (loop_vec_info loop_vinfo) |
280 | { |
281 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
282 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
283 | unsigned nbbs = loop->num_nodes; |
284 | poly_uint64 vectorization_factor = 1; |
285 | tree scalar_type = NULL_TREE(tree) nullptr; |
286 | gphi *phi; |
287 | tree vectype; |
288 | stmt_vec_info stmt_info; |
289 | unsigned i; |
290 | |
291 | DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor", vect_location); |
292 | |
293 | for (i = 0; i < nbbs; i++) |
294 | { |
295 | basic_block bb = bbs[i]; |
296 | |
297 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
298 | gsi_next (&si)) |
299 | { |
300 | phi = si.phi (); |
301 | stmt_info = loop_vinfo->lookup_stmt (phi); |
302 | if (dump_enabled_p ()) |
303 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", |
304 | phi); |
305 | |
306 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 306, __FUNCTION__), 0 : 0)); |
307 | |
308 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
309 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
310 | { |
311 | gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 311, __FUNCTION__), 0 : 0)); |
312 | scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr (phi))), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 312, __FUNCTION__))->typed.type); |
313 | |
314 | if (dump_enabled_p ()) |
315 | dump_printf_loc (MSG_NOTE, vect_location, |
316 | "get vectype for scalar type: %T\n", |
317 | scalar_type); |
318 | |
319 | vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); |
320 | if (!vectype) |
321 | return opt_result::failure_at (phi, |
322 | "not vectorized: unsupported " |
323 | "data-type %T\n", |
324 | scalar_type); |
325 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype; |
326 | |
327 | if (dump_enabled_p ()) |
328 | dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", |
329 | vectype); |
330 | |
331 | if (dump_enabled_p ()) |
332 | { |
333 | dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); |
334 | dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); |
335 | dump_printf (MSG_NOTE, "\n"); |
336 | } |
337 | |
338 | vect_update_max_nunits (&vectorization_factor, vectype); |
339 | } |
340 | } |
341 | |
342 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
343 | gsi_next (&si)) |
344 | { |
345 | if (is_gimple_debug (gsi_stmt (si))) |
346 | continue; |
347 | stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
348 | opt_result res |
349 | = vect_determine_vf_for_stmt (loop_vinfo, |
350 | stmt_info, &vectorization_factor); |
351 | if (!res) |
352 | return res; |
353 | } |
354 | } |
355 | |
356 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ |
357 | if (dump_enabled_p ()) |
358 | { |
359 | dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); |
360 | dump_dec (MSG_NOTE, vectorization_factor); |
361 | dump_printf (MSG_NOTE, "\n"); |
362 | } |
363 | |
364 | if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor))) |
365 | return opt_result::failure_at (vect_location, |
366 | "not vectorized: unsupported data-type\n"); |
367 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; |
368 | return opt_result::success (); |
369 | } |
370 | |
371 | |
372 | /* Function vect_is_simple_iv_evolution. |
373 | |
374 | FORNOW: A simple evolution of an induction variables in the loop is |
375 | considered a polynomial evolution. */ |
376 | |
377 | static bool |
378 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, |
379 | tree * step) |
380 | { |
381 | tree init_expr; |
382 | tree step_expr; |
383 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); |
384 | basic_block bb; |
385 | |
386 | /* When there is no evolution in this loop, the evolution function |
387 | is not "simple". */ |
388 | if (evolution_part == NULL_TREE(tree) nullptr) |
389 | return false; |
390 | |
391 | /* When the evolution is a polynomial of degree >= 2 |
392 | the evolution function is not "simple". */ |
393 | if (tree_is_chrec (evolution_part)) |
394 | return false; |
395 | |
396 | step_expr = evolution_part; |
397 | init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); |
398 | |
399 | if (dump_enabled_p ()) |
400 | dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", |
401 | step_expr, init_expr); |
402 | |
403 | *init = init_expr; |
404 | *step = step_expr; |
405 | |
406 | if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST |
407 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME |
408 | || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 408, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt)) |
409 | && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb)) |
410 | || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE ) |
411 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 411, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE ) |
412 | || !flag_associative_mathglobal_options.x_flag_associative_math))) |
413 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST |
414 | || !flag_associative_mathglobal_options.x_flag_associative_math)) |
415 | { |
416 | if (dump_enabled_p ()) |
417 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
418 | "step unknown.\n"); |
419 | return false; |
420 | } |
421 | |
422 | return true; |
423 | } |
424 | |
425 | /* Return true if PHI, described by STMT_INFO, is the inner PHI in |
426 | what we are assuming is a double reduction. For example, given |
427 | a structure like this: |
428 | |
429 | outer1: |
430 | x_1 = PHI <x_4(outer2), ...>; |
431 | ... |
432 | |
433 | inner: |
434 | x_2 = PHI <x_1(outer1), ...>; |
435 | ... |
436 | x_3 = ...; |
437 | ... |
438 | |
439 | outer2: |
440 | x_4 = PHI <x_3(inner)>; |
441 | ... |
442 | |
443 | outer loop analysis would treat x_1 as a double reduction phi and |
444 | this function would then return true for x_2. */ |
445 | |
446 | static bool |
447 | vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi) |
448 | { |
449 | use_operand_p use_p; |
450 | ssa_op_iter op_iter; |
451 | FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01 ); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use (&(op_iter))) |
452 | if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p))) |
453 | if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def) |
454 | return true; |
455 | return false; |
456 | } |
457 | |
458 | /* Function vect_analyze_scalar_cycles_1. |
459 | |
460 | Examine the cross iteration def-use cycles of scalar variables |
461 | in LOOP. LOOP_VINFO represents the loop that is now being |
462 | considered for vectorization (can be LOOP, or an outer-loop |
463 | enclosing LOOP). */ |
464 | |
465 | static void |
466 | vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop) |
467 | { |
468 | basic_block bb = loop->header; |
469 | tree init, step; |
470 | auto_vec<stmt_vec_info, 64> worklist; |
471 | gphi_iterator gsi; |
472 | bool double_reduc, reduc_chain; |
473 | |
474 | DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location ); |
475 | |
476 | /* First - identify all inductions. Reduction detection assumes that all the |
477 | inductions have been identified, therefore, this order must not be |
478 | changed. */ |
479 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
480 | { |
481 | gphi *phi = gsi.phi (); |
482 | tree access_fn = NULLnullptr; |
483 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
484 | stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi); |
485 | |
486 | if (dump_enabled_p ()) |
487 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); |
488 | |
489 | /* Skip virtual phi's. The data dependences that are associated with |
490 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ |
491 | if (virtual_operand_p (def)) |
492 | continue; |
493 | |
494 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type; |
495 | |
496 | /* Analyze the evolution function. */ |
497 | access_fn = analyze_scalar_evolution (loop, def); |
498 | if (access_fn) |
499 | { |
500 | STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union tree_node *> (((access_fn))))); |
501 | if (dump_enabled_p ()) |
502 | dump_printf_loc (MSG_NOTE, vect_location, |
503 | "Access function of PHI: %T\n", access_fn); |
504 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged |
505 | = initial_condition_in_loop_num (access_fn, loop->num); |
506 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part |
507 | = evolution_part_in_loop_num (access_fn, loop->num); |
508 | } |
509 | |
510 | if (!access_fn |
511 | || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi) |
512 | || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) |
513 | || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop |
514 | && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST)) |
515 | { |
516 | worklist.safe_push (stmt_vinfo); |
517 | continue; |
518 | } |
519 | |
520 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 521, __FUNCTION__), 0 : 0)) |
521 | != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 521, __FUNCTION__), 0 : 0)); |
522 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 522, __FUNCTION__), 0 : 0)); |
523 | |
524 | if (dump_enabled_p ()) |
525 | dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n"); |
526 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def; |
527 | } |
528 | |
529 | |
530 | /* Second - identify all reductions and nested cycles. */ |
531 | while (worklist.length () > 0) |
532 | { |
533 | stmt_vec_info stmt_vinfo = worklist.pop (); |
534 | gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); |
535 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
536 | |
537 | if (dump_enabled_p ()) |
538 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); |
539 | |
540 | gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 541, __FUNCTION__), 0 : 0)) |
541 | && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 541, __FUNCTION__), 0 : 0)); |
542 | |
543 | stmt_vec_info reduc_stmt_info |
544 | = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc, |
545 | &reduc_chain); |
546 | if (reduc_stmt_info) |
547 | { |
548 | STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info; |
549 | STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo; |
550 | if (double_reduc) |
551 | { |
552 | if (dump_enabled_p ()) |
553 | dump_printf_loc (MSG_NOTE, vect_location, |
554 | "Detected double reduction.\n"); |
555 | |
556 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def; |
557 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def; |
558 | } |
559 | else |
560 | { |
561 | if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) |
562 | { |
563 | if (dump_enabled_p ()) |
564 | dump_printf_loc (MSG_NOTE, vect_location, |
565 | "Detected vectorizable nested cycle.\n"); |
566 | |
567 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle; |
568 | } |
569 | else |
570 | { |
571 | if (dump_enabled_p ()) |
572 | dump_printf_loc (MSG_NOTE, vect_location, |
573 | "Detected reduction.\n"); |
574 | |
575 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def; |
576 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def; |
577 | /* Store the reduction cycles for possible vectorization in |
578 | loop-aware SLP if it was not detected as reduction |
579 | chain. */ |
580 | if (! reduc_chain) |
581 | LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push |
582 | (reduc_stmt_info); |
583 | } |
584 | } |
585 | } |
586 | else |
587 | if (dump_enabled_p ()) |
588 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
589 | "Unknown def-use cycle pattern.\n"); |
590 | } |
591 | } |
592 | |
593 | |
594 | /* Function vect_analyze_scalar_cycles. |
595 | |
596 | Examine the cross iteration def-use cycles of scalar variables, by |
597 | analyzing the loop-header PHIs of scalar variables. Classify each |
598 | cycle as one of the following: invariant, induction, reduction, unknown. |
599 | We do that for the loop represented by LOOP_VINFO, and also to its |
600 | inner-loop, if exists. |
601 | Examples for scalar cycles: |
602 | |
603 | Example1: reduction: |
604 | |
605 | loop1: |
606 | for (i=0; i<N; i++) |
607 | sum += a[i]; |
608 | |
609 | Example2: induction: |
610 | |
611 | loop2: |
612 | for (i=0; i<N; i++) |
613 | a[i] = i; */ |
614 | |
615 | static void |
616 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) |
617 | { |
618 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
619 | |
620 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop); |
621 | |
622 | /* When vectorizing an outer-loop, the inner-loop is executed sequentially. |
623 | Reductions in such inner-loop therefore have different properties than |
624 | the reductions in the nest that gets vectorized: |
625 | 1. When vectorized, they are executed in the same order as in the original |
626 | scalar loop, so we can't change the order of computation when |
627 | vectorizing them. |
628 | 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the |
629 | current checks are too strict. */ |
630 | |
631 | if (loop->inner) |
632 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); |
633 | } |
634 | |
635 | /* Transfer group and reduction information from STMT_INFO to its |
636 | pattern stmt. */ |
637 | |
638 | static void |
639 | vect_fixup_reduc_chain (stmt_vec_info stmt_info) |
640 | { |
641 | stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
642 | stmt_vec_info stmtp; |
643 | gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)) |
644 | && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)); |
645 | REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 645, __FUNCTION__), 0 : 0)), (firstp)->size) = REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 645, __FUNCTION__), 0 : 0)), (stmt_info)->size); |
646 | do |
647 | { |
648 | stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
649 | gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 650, __FUNCTION__), 0 : 0)) |
650 | == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 650, __FUNCTION__), 0 : 0)); |
651 | REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 651, __FUNCTION__), 0 : 0)), (stmtp)->first_element) = firstp; |
652 | stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 652, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); |
653 | if (stmt_info) |
654 | REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 654, __FUNCTION__), 0 : 0)), (stmtp)->next_element) |
655 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
656 | } |
657 | while (stmt_info); |
658 | } |
659 | |
660 | /* Fixup scalar cycles that now have their stmts detected as patterns. */ |
661 | |
662 | static void |
663 | vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) |
664 | { |
665 | stmt_vec_info first; |
666 | unsigned i; |
667 | |
668 | FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i), &(first)); ++(i)) |
669 | { |
670 | stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 670, __FUNCTION__), 0 : 0)), (first)->next_element); |
671 | while (next) |
672 | { |
673 | if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p |
674 | != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) |
675 | || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1) |
676 | break; |
677 | next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 677, __FUNCTION__), 0 : 0)), (next)->next_element); |
678 | } |
679 | /* If all reduction chain members are well-formed patterns adjust |
680 | the group to group the pattern stmts instead. */ |
681 | if (! next |
682 | && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1) |
683 | { |
684 | if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) |
685 | { |
686 | vect_fixup_reduc_chain (first); |
687 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i] |
688 | = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt; |
689 | } |
690 | } |
691 | /* If not all stmt in the chain are patterns or if we failed |
692 | to update STMT_VINFO_REDUC_IDX dissolve the chain and handle |
693 | it as regular reduction instead. */ |
694 | else |
695 | { |
696 | stmt_vec_info vinfo = first; |
697 | stmt_vec_info last = NULLnullptr; |
698 | while (vinfo) |
699 | { |
700 | next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 700, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
701 | REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 701, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = NULLnullptr; |
702 | REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 702, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; |
703 | last = vinfo; |
704 | vinfo = next; |
705 | } |
706 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type |
707 | = vect_internal_def; |
708 | loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last)); |
709 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i); |
710 | --i; |
711 | } |
712 | } |
713 | } |
714 | |
715 | /* Function vect_get_loop_niters. |
716 | |
717 | Determine how many iterations the loop is executed and place it |
718 | in NUMBER_OF_ITERATIONS. Place the number of latch iterations |
719 | in NUMBER_OF_ITERATIONSM1. Place the condition under which the |
720 | niter information holds in ASSUMPTIONS. |
721 | |
722 | Return the loop exit condition. */ |
723 | |
724 | |
725 | static gcond * |
726 | vect_get_loop_niters (class loop *loop, tree *assumptions, |
727 | tree *number_of_iterations, tree *number_of_iterationsm1) |
728 | { |
729 | edge exit = single_exit (loop); |
730 | class tree_niter_desc niter_desc; |
731 | tree niter_assumptions, niter, may_be_zero; |
732 | gcond *cond = get_loop_exit_condition (loop); |
733 | |
734 | *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE]; |
735 | *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
736 | *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; |
737 | DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location); |
738 | |
739 | if (!exit) |
740 | return cond; |
741 | |
742 | may_be_zero = NULL_TREE(tree) nullptr; |
743 | if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr) |
744 | || chrec_contains_undetermined (niter_desc.niter)) |
745 | return cond; |
746 | |
747 | niter_assumptions = niter_desc.assumptions; |
748 | may_be_zero = niter_desc.may_be_zero; |
749 | niter = niter_desc.niter; |
750 | |
751 | if (may_be_zero && integer_zerop (may_be_zero)) |
752 | may_be_zero = NULL_TREE(tree) nullptr; |
753 | |
754 | if (may_be_zero) |
755 | { |
756 | if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type[(int) (((enum tree_code) (may_be_zero)->base .code))] == tcc_comparison)) |
757 | { |
758 | /* Try to combine may_be_zero with assumptions, this can simplify |
759 | computation of niter expression. */ |
760 | if (niter_assumptions && !integer_nonzerop (niter_assumptions)) |
761 | niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
762 | niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
763 | fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
764 | boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) |
765 | may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ); |
766 | else |
767 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) |
768 | build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) |
769 | rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ); |
770 | |
771 | may_be_zero = NULL_TREE(tree) nullptr; |
772 | } |
773 | else if (integer_nonzerop (may_be_zero)) |
774 | { |
775 | *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 775, __FUNCTION__))->typed.type), 0); |
776 | *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 776, __FUNCTION__))->typed.type), 1); |
777 | return cond; |
778 | } |
779 | else |
780 | return cond; |
781 | } |
782 | |
783 | *assumptions = niter_assumptions; |
784 | *number_of_iterationsm1 = niter; |
785 | |
786 | /* We want the number of loop header executions which is the number |
787 | of latch executions plus one. |
788 | ??? For UINT_MAX latch executions this number overflows to zero |
789 | for loops like do { n++; } while (n != 0); */ |
790 | if (niter && !chrec_contains_undetermined (niter)) |
791 | niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 792, __FUNCTION__))->typed.type), 1) ) |
792 | build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 792, __FUNCTION__))->typed.type), 1) ); |
793 | *number_of_iterations = niter; |
794 | |
795 | return cond; |
796 | } |
797 | |
798 | /* Function bb_in_loop_p |
799 | |
800 | Used as predicate for dfs order traversal of the loop bbs. */ |
801 | |
802 | static bool |
803 | bb_in_loop_p (const_basic_block bb, const void *data) |
804 | { |
805 | const class loop *const loop = (const class loop *)data; |
806 | if (flow_bb_inside_loop_p (loop, bb)) |
807 | return true; |
808 | return false; |
809 | } |
810 | |
811 | |
812 | /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as |
813 | stmt_vec_info structs for all the stmts in LOOP_IN. */ |
814 | |
815 | _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) |
816 | : vec_info (vec_info::loop, init_cost (loop_in), shared), |
817 | loop (loop_in), |
818 | bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block )))), |
819 | num_itersm1 (NULL_TREE(tree) nullptr), |
820 | num_iters (NULL_TREE(tree) nullptr), |
821 | num_iters_unchanged (NULL_TREE(tree) nullptr), |
822 | num_iters_assumptions (NULL_TREE(tree) nullptr), |
823 | th (0), |
824 | versioning_threshold (0), |
825 | vectorization_factor (0), |
826 | max_vectorization_factor (0), |
827 | mask_skip_niters (NULL_TREE(tree) nullptr), |
828 | rgroup_compare_type (NULL_TREE(tree) nullptr), |
829 | simd_if_cond (NULL_TREE(tree) nullptr), |
830 | unaligned_dr (NULLnullptr), |
831 | peeling_for_alignment (0), |
832 | ptr_mask (0), |
833 | ivexpr_map (NULLnullptr), |
834 | scan_map (NULLnullptr), |
835 | slp_unrolling_factor (1), |
836 | single_scalar_iteration_cost (0), |
837 | vec_outside_cost (0), |
838 | vec_inside_cost (0), |
839 | vectorizable (false), |
840 | can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0), |
841 | using_partial_vectors_p (false), |
842 | epil_using_partial_vectors_p (false), |
843 | peeling_for_gaps (false), |
844 | peeling_for_niter (false), |
845 | no_data_dependencies (false), |
846 | has_mask_store (false), |
847 | scalar_loop_scaling (profile_probability::uninitialized ()), |
848 | scalar_loop (NULLnullptr), |
849 | orig_loop_info (NULLnullptr) |
850 | { |
851 | /* CHECKME: We want to visit all BBs before their successors (except for |
852 | latch blocks, for which this assertion wouldn't hold). In the simple |
853 | case of the loop forms we allow, a dfs order of the BBs would the same |
854 | as reversed postorder traversal, so we are safe. */ |
855 | |
856 | unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, |
857 | bbs, loop->num_nodes, loop); |
858 | gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 858, __FUNCTION__), 0 : 0)); |
859 | |
860 | for (unsigned int i = 0; i < nbbs; i++) |
861 | { |
862 | basic_block bb = bbs[i]; |
863 | gimple_stmt_iterator si; |
864 | |
865 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) |
866 | { |
867 | gimple *phi = gsi_stmt (si); |
868 | gimple_set_uid (phi, 0); |
869 | add_stmt (phi); |
870 | } |
871 | |
872 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) |
873 | { |
874 | gimple *stmt = gsi_stmt (si); |
875 | gimple_set_uid (stmt, 0); |
876 | if (is_gimple_debug (stmt)) |
877 | continue; |
878 | add_stmt (stmt); |
879 | /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the |
880 | third argument is the #pragma omp simd if (x) condition, when 0, |
881 | loop shouldn't be vectorized, when non-zero constant, it should |
882 | be vectorized normally, otherwise versioned with vectorized loop |
883 | done if the condition is non-zero at runtime. */ |
884 | if (loop_in->simduid |
885 | && is_gimple_call (stmt) |
886 | && gimple_call_internal_p (stmt) |
887 | && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE |
888 | && gimple_call_num_args (stmt) >= 3 |
889 | && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME |
890 | && (loop_in->simduid |
891 | == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 891, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr || ((enum tree_code) ((gimple_call_arg (stmt, 0))->ssa_name .var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg (stmt, 0))->ssa_name.var))) |
892 | { |
893 | tree arg = gimple_call_arg (stmt, 2); |
894 | if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME) |
895 | simd_if_cond = arg; |
896 | else |
897 | gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 897, __FUNCTION__), 0 : 0)); |
898 | } |
899 | } |
900 | } |
901 | |
902 | epilogue_vinfos.create (6); |
903 | } |
904 | |
905 | /* Free all levels of rgroup CONTROLS. */ |
906 | |
907 | void |
908 | release_vec_loop_controls (vec<rgroup_controls> *controls) |
909 | { |
910 | rgroup_controls *rgc; |
911 | unsigned int i; |
912 | FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i)) |
913 | rgc->controls.release (); |
914 | controls->release (); |
915 | } |
916 | |
917 | /* Free all memory used by the _loop_vec_info, as well as all the |
918 | stmt_vec_info structs of all the stmts in the loop. */ |
919 | |
920 | _loop_vec_info::~_loop_vec_info () |
921 | { |
922 | free (bbs); |
923 | |
924 | release_vec_loop_controls (&masks); |
925 | release_vec_loop_controls (&lens); |
926 | delete ivexpr_map; |
927 | delete scan_map; |
928 | epilogue_vinfos.release (); |
929 | |
930 | loop->aux = NULLnullptr; |
931 | } |
932 | |
933 | /* Return an invariant or register for EXPR and emit necessary |
934 | computations in the LOOP_VINFO loop preheader. */ |
935 | |
936 | tree |
937 | cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) |
938 | { |
939 | if (is_gimple_reg (expr) |
940 | || is_gimple_min_invariant (expr)) |
941 | return expr; |
942 | |
943 | if (! loop_vinfo->ivexpr_map) |
944 | loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>; |
945 | tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); |
946 | if (! cached) |
947 | { |
948 | gimple_seq stmts = NULLnullptr; |
949 | cached = force_gimple_operand (unshare_expr (expr), |
950 | &stmts, true, NULL_TREE(tree) nullptr); |
951 | if (stmts) |
952 | { |
953 | edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
954 | gsi_insert_seq_on_edge_immediate (e, stmts); |
955 | } |
956 | } |
957 | return cached; |
958 | } |
959 | |
960 | /* Return true if we can use CMP_TYPE as the comparison type to produce |
961 | all masks required to mask LOOP_VINFO. */ |
962 | |
963 | static bool |
964 | can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type) |
965 | { |
966 | rgroup_controls *rgm; |
967 | unsigned int i; |
968 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) |
969 | if (rgm->type != NULL_TREE(tree) nullptr |
970 | && !direct_internal_fn_supported_p (IFN_WHILE_ULT, |
971 | cmp_type, rgm->type, |
972 | OPTIMIZE_FOR_SPEED)) |
973 | return false; |
974 | return true; |
975 | } |
976 | |
977 | /* Calculate the maximum number of scalars per iteration for every |
978 | rgroup in LOOP_VINFO. */ |
979 | |
980 | static unsigned int |
981 | vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) |
982 | { |
983 | unsigned int res = 1; |
984 | unsigned int i; |
985 | rgroup_controls *rgm; |
986 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) |
987 | res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm-> max_nscalars_per_iter)); |
988 | return res; |
989 | } |
990 | |
991 | /* Calculate the minimum precision necessary to represent: |
992 | |
993 | MAX_NITERS * FACTOR |
994 | |
995 | as an unsigned integer, where MAX_NITERS is the maximum number of |
996 | loop header iterations for the original scalar form of LOOP_VINFO. */ |
997 | |
998 | static unsigned |
999 | vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) |
1000 | { |
1001 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1002 | |
1003 | /* Get the maximum number of iterations that is representable |
1004 | in the counter type. */ |
1005 | tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1005, __FUNCTION__))->typed.type); |
1006 | widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1006, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE ), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval )) + 1; |
1007 | |
1008 | /* Get a more refined estimate for the number of iterations. */ |
1009 | widest_int max_back_edges; |
1010 | if (max_loop_iterations (loop, &max_back_edges)) |
1011 | max_ni = wi::smin (max_ni, max_back_edges + 1); |
1012 | |
1013 | /* Work out how many bits we need to represent the limit. */ |
1014 | return wi::min_precision (max_ni * factor, UNSIGNED); |
1015 | } |
1016 | |
1017 | /* True if the loop needs peeling or partial vectors when vectorized. */ |
1018 | |
1019 | static bool |
1020 | vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo) |
1021 | { |
1022 | unsigned HOST_WIDE_INTlong const_vf; |
1023 | HOST_WIDE_INTlong max_niter |
1024 | = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
1025 | |
1026 | unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; |
1027 | if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) |
1028 | th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th |
1029 | (loop_vinfo))((loop_vinfo)->orig_loop_info)->th; |
1030 | |
1031 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
1032 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0) |
1033 | { |
1034 | /* Work out the (constant) number of iterations that need to be |
1035 | peeled for reasons other than niters. */ |
1036 | unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
1037 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) |
1038 | peel_niter += 1; |
1039 | if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1039, __FUNCTION__)))) - peel_niter, |
1040 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) |
1041 | return true; |
1042 | } |
1043 | else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
1044 | /* ??? When peeling for gaps but not alignment, we could |
1045 | try to check whether the (variable) niters is known to be |
1046 | VF * N + 1. That's something of a niche case though. */ |
1047 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
1048 | || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf) |
1049 | || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters) |
1050 | < (unsigned) exact_log2 (const_vf)) |
1051 | /* In case of versioning, check if the maximum number of |
1052 | iterations is greater than th. If they are identical, |
1053 | the epilogue is unnecessary. */ |
1054 | && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) |
1055 | || ((unsigned HOST_WIDE_INTlong) max_niter |
1056 | > (th / const_vf) * const_vf)))) |
1057 | return true; |
1058 | |
1059 | return false; |
1060 | } |
1061 | |
1062 | /* Each statement in LOOP_VINFO can be masked where necessary. Check |
1063 | whether we can actually generate the masks required. Return true if so, |
1064 | storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ |
1065 | |
1066 | static bool |
1067 | vect_verify_full_masking (loop_vec_info loop_vinfo) |
1068 | { |
1069 | unsigned int min_ni_width; |
1070 | unsigned int max_nscalars_per_iter |
1071 | = vect_get_max_nscalars_per_iter (loop_vinfo); |
1072 | |
1073 | /* Use a normal loop if there are no statements that need masking. |
1074 | This only happens in rare degenerate cases: it means that the loop |
1075 | has no loads, no stores, and no live-out values. */ |
1076 | if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()) |
1077 | return false; |
1078 | |
1079 | /* Work out how many bits we need to represent the limit. */ |
1080 | min_ni_width |
1081 | = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); |
1082 | |
1083 | /* Find a scalar mode for which WHILE_ULT is supported. */ |
1084 | opt_scalar_int_mode cmp_mode_iter; |
1085 | tree cmp_type = NULL_TREE(tree) nullptr; |
1086 | tree iv_type = NULL_TREE(tree) nullptr; |
1087 | widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); |
1088 | unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U); |
1089 | |
1090 | if (iv_limit != -1) |
1091 | iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, |
1092 | UNSIGNED); |
1093 | |
1094 | FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator ::iterate_p (&(cmp_mode_iter)); mode_iterator::get_wider ( &(cmp_mode_iter))) |
1095 | { |
1096 | unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); |
1097 | if (cmp_bits >= min_ni_width |
1098 | && targetm.scalar_mode_supported_p (cmp_mode_iter.require ())) |
1099 | { |
1100 | tree this_type = build_nonstandard_integer_type (cmp_bits, true); |
1101 | if (this_type |
1102 | && can_produce_all_loop_masks_p (loop_vinfo, this_type)) |
1103 | { |
1104 | /* Although we could stop as soon as we find a valid mode, |
1105 | there are at least two reasons why that's not always the |
1106 | best choice: |
1107 | |
1108 | - An IV that's Pmode or wider is more likely to be reusable |
1109 | in address calculations than an IV that's narrower than |
1110 | Pmode. |
1111 | |
1112 | - Doing the comparison in IV_PRECISION or wider allows |
1113 | a natural 0-based IV, whereas using a narrower comparison |
1114 | type requires mitigations against wrap-around. |
1115 | |
1116 | Conversely, if the IV limit is variable, doing the comparison |
1117 | in a wider type than the original type can introduce |
1118 | unnecessary extensions, so picking the widest valid mode |
1119 | is not always a good choice either. |
1120 | |
1121 | Here we prefer the first IV type that's Pmode or wider, |
1122 | and the first comparison type that's IV_PRECISION or wider. |
1123 | (The comparison type must be no wider than the IV type, |
1124 | to avoid extensions in the vector loop.) |
1125 | |
1126 | ??? We might want to try continuing beyond Pmode for ILP32 |
1127 | targets if CMP_BITS < IV_PRECISION. */ |
1128 | iv_type = this_type; |
1129 | if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1129, __FUNCTION__))->type_common.precision)) |
1130 | cmp_type = this_type; |
1131 | if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ( (scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode ::from_int) E_SImode))))) |
1132 | break; |
1133 | } |
1134 | } |
1135 | } |
1136 | |
1137 | if (!cmp_type) |
1138 | return false; |
1139 | |
1140 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type; |
1141 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; |
1142 | return true; |
1143 | } |
1144 | |
1145 | /* Check whether we can use vector access with length based on precison |
1146 | comparison. So far, to keep it simple, we only allow the case that the |
1147 | precision of the target supported length is larger than the precision |
1148 | required by loop niters. */ |
1149 | |
1150 | static bool |
1151 | vect_verify_loop_lens (loop_vec_info loop_vinfo) |
1152 | { |
1153 | if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) |
1154 | return false; |
1155 | |
1156 | unsigned int max_nitems_per_iter = 1; |
1157 | unsigned int i; |
1158 | rgroup_controls *rgl; |
1159 | /* Find the maximum number of items per iteration for every rgroup. */ |
1160 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl)) ; ++(i)) |
1161 | { |
1162 | unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor; |
1163 | max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter ) : (nitems_per_iter)); |
1164 | } |
1165 | |
1166 | /* Work out how many bits we need to represent the length limit. */ |
1167 | unsigned int min_ni_prec |
1168 | = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter); |
1169 | |
1170 | /* Now use the maximum of below precisions for one suitable IV type: |
1171 | - the IV's natural precision |
1172 | - the precision needed to hold: the maximum number of scalar |
1173 | iterations multiplied by the scale factor (min_ni_prec above) |
1174 | - the Pmode precision |
1175 | |
1176 | If min_ni_prec is less than the precision of the current niters, |
1177 | we perfer to still use the niters type. Prefer to use Pmode and |
1178 | wider IV to avoid narrow conversions. */ |
1179 | |
1180 | unsigned int ni_prec |
1181 | = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)-> num_iters), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1181, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1181, __FUNCTION__))->type_common.precision); |
1182 | min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec)); |
1183 | min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) ))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) )))); |
1184 | |
1185 | tree iv_type = NULL_TREE(tree) nullptr; |
1186 | opt_scalar_int_mode tmode_iter; |
1187 | FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator ::iterate_p (&(tmode_iter)); mode_iterator::get_wider (& (tmode_iter))) |
1188 | { |
1189 | scalar_mode tmode = tmode_iter.require (); |
1190 | unsigned int tbits = GET_MODE_BITSIZE (tmode); |
1191 | |
1192 | /* ??? Do we really want to construct one IV whose precision exceeds |
1193 | BITS_PER_WORD? */ |
1194 | if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL << 1)) != 0) ? 8 : 4))) |
1195 | break; |
1196 | |
1197 | /* Find the first available standard integral type. */ |
1198 | if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode)) |
1199 | { |
1200 | iv_type = build_nonstandard_integer_type (tbits, true); |
1201 | break; |
1202 | } |
1203 | } |
1204 | |
1205 | if (!iv_type) |
1206 | { |
1207 | if (dump_enabled_p ()) |
1208 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1209 | "can't vectorize with length-based partial vectors" |
1210 | " because there is no suitable iv type.\n"); |
1211 | return false; |
1212 | } |
1213 | |
1214 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type; |
1215 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; |
1216 | |
1217 | return true; |
1218 | } |
1219 | |
1220 | /* Calculate the cost of one scalar iteration of the loop. */ |
1221 | static void |
1222 | vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) |
1223 | { |
1224 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1225 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1226 | int nbbs = loop->num_nodes, factor; |
1227 | int innerloop_iters, i; |
1228 | |
1229 | DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost" , vect_location); |
1230 | |
1231 | /* Gather costs for statements in the scalar loop. */ |
1232 | |
1233 | /* FORNOW. */ |
1234 | innerloop_iters = 1; |
1235 | if (loop->inner) |
1236 | innerloop_iters = 50; /* FIXME */ |
1237 | |
1238 | for (i = 0; i < nbbs; i++) |
1239 | { |
1240 | gimple_stmt_iterator si; |
1241 | basic_block bb = bbs[i]; |
1242 | |
1243 | if (bb->loop_father == loop->inner) |
1244 | factor = innerloop_iters; |
1245 | else |
1246 | factor = 1; |
1247 | |
1248 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) |
1249 | { |
1250 | gimple *stmt = gsi_stmt (si); |
1251 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); |
1252 | |
1253 | if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) |
1254 | continue; |
1255 | |
1256 | /* Skip stmts that are not vectorized inside the loop. */ |
1257 | stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info); |
1258 | if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope) |
1259 | && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live |
1260 | || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)) |
1261 | (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)))) |
1262 | continue; |
1263 | |
1264 | vect_cost_for_stmt kind; |
1265 | if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0)) |
1266 | { |
1267 | if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read) |
1268 | kind = scalar_load; |
1269 | else |
1270 | kind = scalar_store; |
1271 | } |
1272 | else if (vect_nop_conversion_p (stmt_info)) |
1273 | continue; |
1274 | else |
1275 | kind = scalar_stmt; |
1276 | |
1277 | record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, |
1278 | factor, kind, stmt_info, 0, vect_prologue); |
1279 | } |
1280 | } |
1281 | |
1282 | /* Now accumulate cost. */ |
1283 | void *target_cost_data = init_cost (loop); |
1284 | stmt_info_for_cost *si; |
1285 | int j; |
1286 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
1287 | j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
1288 | (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count, |
1289 | si->kind, si->stmt_info, si->vectype, |
1290 | si->misalign, vect_body); |
1291 | unsigned dummy, body_cost = 0; |
1292 | finish_cost (target_cost_data, &dummy, &body_cost, &dummy); |
1293 | destroy_cost_data (target_cost_data); |
1294 | LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost = body_cost; |
1295 | } |
1296 | |
1297 | |
1298 | /* Function vect_analyze_loop_form_1. |
1299 | |
1300 | Verify that certain CFG restrictions hold, including: |
1301 | - the loop has a pre-header |
1302 | - the loop has a single entry and exit |
1303 | - the loop exit condition is simple enough |
1304 | - the number of iterations can be analyzed, i.e, a countable loop. The |
1305 | niter could be analyzed under some assumptions. */ |
1306 | |
1307 | opt_result |
1308 | vect_analyze_loop_form_1 (class loop *loop, gcond **loop_cond, |
1309 | tree *assumptions, tree *number_of_iterationsm1, |
1310 | tree *number_of_iterations, gcond **inner_loop_cond) |
1311 | { |
1312 | DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location ); |
1313 | |
1314 | /* Different restrictions apply when we are considering an inner-most loop, |
1315 | vs. an outer (nested) loop. |
1316 | (FORNOW. May want to relax some of these restrictions in the future). */ |
1317 | |
1318 | if (!loop->inner) |
1319 | { |
1320 | /* Inner-most loop. We currently require that the number of BBs is |
1321 | exactly 2 (the header and latch). Vectorizable inner-most loops |
1322 | look like this: |
1323 | |
1324 | (pre-header) |
1325 | | |
1326 | header <--------+ |
1327 | | | | |
1328 | | +--> latch --+ |
1329 | | |
1330 | (exit-bb) */ |
1331 | |
1332 | if (loop->num_nodes != 2) |
1333 | return opt_result::failure_at (vect_location, |
1334 | "not vectorized:" |
1335 | " control flow in loop.\n"); |
1336 | |
1337 | if (empty_block_p (loop->header)) |
1338 | return opt_result::failure_at (vect_location, |
1339 | "not vectorized: empty loop.\n"); |
1340 | } |
1341 | else |
1342 | { |
1343 | class loop *innerloop = loop->inner; |
1344 | edge entryedge; |
1345 | |
1346 | /* Nested loop. We currently require that the loop is doubly-nested, |
1347 | contains a single inner loop, and the number of BBs is exactly 5. |
1348 | Vectorizable outer-loops look like this: |
1349 | |
1350 | (pre-header) |
1351 | | |
1352 | header <---+ |
1353 | | | |
1354 | inner-loop | |
1355 | | | |
1356 | tail ------+ |
1357 | | |
1358 | (exit-bb) |
1359 | |
1360 | The inner-loop has the properties expected of inner-most loops |
1361 | as described above. */ |
1362 | |
1363 | if ((loop->inner)->inner || (loop->inner)->next) |
1364 | return opt_result::failure_at (vect_location, |
1365 | "not vectorized:" |
1366 | " multiple nested loops.\n"); |
1367 | |
1368 | if (loop->num_nodes != 5) |
1369 | return opt_result::failure_at (vect_location, |
1370 | "not vectorized:" |
1371 | " control flow in loop.\n"); |
1372 | |
1373 | entryedge = loop_preheader_edge (innerloop); |
1374 | if (entryedge->src != loop->header |
1375 | || !single_exit (innerloop) |
1376 | || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src) |
1377 | return opt_result::failure_at (vect_location, |
1378 | "not vectorized:" |
1379 | " unsupported outerloop form.\n"); |
1380 | |
1381 | /* Analyze the inner-loop. */ |
1382 | tree inner_niterm1, inner_niter, inner_assumptions; |
1383 | opt_result res |
1384 | = vect_analyze_loop_form_1 (loop->inner, inner_loop_cond, |
1385 | &inner_assumptions, &inner_niterm1, |
1386 | &inner_niter, NULLnullptr); |
1387 | if (!res) |
1388 | { |
1389 | if (dump_enabled_p ()) |
1390 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1391 | "not vectorized: Bad inner loop.\n"); |
1392 | return res; |
1393 | } |
1394 | |
1395 | /* Don't support analyzing niter under assumptions for inner |
1396 | loop. */ |
1397 | if (!integer_onep (inner_assumptions)) |
1398 | return opt_result::failure_at (vect_location, |
1399 | "not vectorized: Bad inner loop.\n"); |
1400 | |
1401 | if (!expr_invariant_in_loop_p (loop, inner_niter)) |
1402 | return opt_result::failure_at (vect_location, |
1403 | "not vectorized: inner-loop count not" |
1404 | " invariant.\n"); |
1405 | |
1406 | if (dump_enabled_p ()) |
1407 | dump_printf_loc (MSG_NOTE, vect_location, |
1408 | "Considering outer-loop vectorization.\n"); |
1409 | } |
1410 | |
1411 | if (!single_exit (loop)) |
1412 | return opt_result::failure_at (vect_location, |
1413 | "not vectorized: multiple exits.\n"); |
1414 | if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2) |
1415 | return opt_result::failure_at (vect_location, |
1416 | "not vectorized:" |
1417 | " too many incoming edges.\n"); |
1418 | |
1419 | /* We assume that the loop exit condition is at the end of the loop. i.e, |
1420 | that the loop is represented as a do-while (with a proper if-guard |
1421 | before the loop if needed), where the loop header contains all the |
1422 | executable statements, and the latch is empty. */ |
1423 | if (!empty_block_p (loop->latch) |
1424 | || !gimple_seq_empty_p (phi_nodes (loop->latch))) |
1425 | return opt_result::failure_at (vect_location, |
1426 | "not vectorized: latch block not empty.\n"); |
1427 | |
1428 | /* Make sure the exit is not abnormal. */ |
1429 | edge e = single_exit (loop); |
1430 | if (e->flags & EDGE_ABNORMAL) |
1431 | return opt_result::failure_at (vect_location, |
1432 | "not vectorized:" |
1433 | " abnormal loop exit edge.\n"); |
1434 | |
1435 | *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations, |
1436 | number_of_iterationsm1); |
1437 | if (!*loop_cond) |
1438 | return opt_result::failure_at |
1439 | (vect_location, |
1440 | "not vectorized: complicated exit condition.\n"); |
1441 | |
1442 | if (integer_zerop (*assumptions) |
1443 | || !*number_of_iterations |
1444 | || chrec_contains_undetermined (*number_of_iterations)) |
1445 | return opt_result::failure_at |
1446 | (*loop_cond, |
1447 | "not vectorized: number of iterations cannot be computed.\n"); |
1448 | |
1449 | if (integer_zerop (*number_of_iterations)) |
1450 | return opt_result::failure_at |
1451 | (*loop_cond, |
1452 | "not vectorized: number of iterations = 0.\n"); |
1453 | |
1454 | return opt_result::success (); |
1455 | } |
1456 | |
1457 | /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */ |
1458 | |
1459 | opt_loop_vec_info |
1460 | vect_analyze_loop_form (class loop *loop, vec_info_shared *shared) |
1461 | { |
1462 | tree assumptions, number_of_iterations, number_of_iterationsm1; |
1463 | gcond *loop_cond, *inner_loop_cond = NULLnullptr; |
1464 | |
1465 | opt_result res |
1466 | = vect_analyze_loop_form_1 (loop, &loop_cond, |
1467 | &assumptions, &number_of_iterationsm1, |
1468 | &number_of_iterations, &inner_loop_cond); |
1469 | if (!res) |
1470 | return opt_loop_vec_info::propagate_failure (res); |
1471 | |
1472 | loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); |
1473 | LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = number_of_iterationsm1; |
1474 | LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = number_of_iterations; |
1475 | LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = number_of_iterations; |
1476 | if (!integer_onep (assumptions)) |
1477 | { |
1478 | /* We consider to vectorize this loop by versioning it under |
1479 | some assumptions. In order to do this, we need to clear |
1480 | existing information computed by scev and niter analyzer. */ |
1481 | scev_reset_htab (); |
1482 | free_numbers_of_iterations_estimates (loop); |
1483 | /* Also set flag for this loop so that following scev and niter |
1484 | analysis are done under the assumptions. */ |
1485 | loop_constraint_set (loop, LOOP_C_FINITE(1 << 1)); |
1486 | /* Also record the assumptions for versioning. */ |
1487 | LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = assumptions; |
1488 | } |
1489 | |
1490 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) |
1491 | { |
1492 | if (dump_enabled_p ()) |
1493 | { |
1494 | dump_printf_loc (MSG_NOTE, vect_location, |
1495 | "Symbolic number of iterations is "); |
1496 | dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations); |
1497 | dump_printf (MSG_NOTE, "\n"); |
1498 | } |
1499 | } |
1500 | |
1501 | stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (loop_cond); |
1502 | STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type; |
1503 | if (inner_loop_cond) |
1504 | { |
1505 | stmt_vec_info inner_loop_cond_info |
1506 | = loop_vinfo->lookup_stmt (inner_loop_cond); |
1507 | STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type; |
1508 | } |
1509 | |
1510 | gcc_assert (!loop->aux)((void)(!(!loop->aux) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1510, __FUNCTION__), 0 : 0)); |
1511 | loop->aux = loop_vinfo; |
1512 | return opt_loop_vec_info::success (loop_vinfo); |
1513 | } |
1514 | |
1515 | |
1516 | |
1517 | /* Scan the loop stmts and dependent on whether there are any (non-)SLP |
1518 | statements update the vectorization factor. */ |
1519 | |
1520 | static void |
1521 | vect_update_vf_for_slp (loop_vec_info loop_vinfo) |
1522 | { |
1523 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1524 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1525 | int nbbs = loop->num_nodes; |
1526 | poly_uint64 vectorization_factor; |
1527 | int i; |
1528 | |
1529 | DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location ); |
1530 | |
1531 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
1532 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1532, __FUNCTION__), 0 : 0)); |
1533 | |
1534 | /* If all the stmts in the loop can be SLPed, we perform only SLP, and |
1535 | vectorization factor of the loop is the unrolling factor required by |
1536 | the SLP instances. If that unrolling factor is 1, we say, that we |
1537 | perform pure SLP on loop - cross iteration parallelism is not |
1538 | exploited. */ |
1539 | bool only_slp_in_loop = true; |
1540 | for (i = 0; i < nbbs; i++) |
1541 | { |
1542 | basic_block bb = bbs[i]; |
1543 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
1544 | gsi_next (&si)) |
1545 | { |
1546 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); |
1547 | if (!stmt_info) |
1548 | continue; |
1549 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
1550 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
1551 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1552 | /* STMT needs both SLP and loop-based vectorization. */ |
1553 | only_slp_in_loop = false; |
1554 | } |
1555 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1556 | gsi_next (&si)) |
1557 | { |
1558 | if (is_gimple_debug (gsi_stmt (si))) |
1559 | continue; |
1560 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
1561 | stmt_info = vect_stmt_to_vectorize (stmt_info); |
1562 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) |
1563 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
1564 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1565 | /* STMT needs both SLP and loop-based vectorization. */ |
1566 | only_slp_in_loop = false; |
1567 | } |
1568 | } |
1569 | |
1570 | if (only_slp_in_loop) |
1571 | { |
1572 | if (dump_enabled_p ()) |
1573 | dump_printf_loc (MSG_NOTE, vect_location, |
1574 | "Loop contains only SLP stmts\n"); |
1575 | vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor; |
1576 | } |
1577 | else |
1578 | { |
1579 | if (dump_enabled_p ()) |
1580 | dump_printf_loc (MSG_NOTE, vect_location, |
1581 | "Loop contains SLP and non-SLP stmts\n"); |
1582 | /* Both the vectorization factor and unroll factor have the form |
1583 | GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, |
1584 | so they must have a common multiple. */ |
1585 | vectorization_factor |
1586 | = force_common_multiple (vectorization_factor, |
1587 | LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor); |
1588 | } |
1589 | |
1590 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; |
1591 | if (dump_enabled_p ()) |
1592 | { |
1593 | dump_printf_loc (MSG_NOTE, vect_location, |
1594 | "Updating vectorization factor to "); |
1595 | dump_dec (MSG_NOTE, vectorization_factor); |
1596 | dump_printf (MSG_NOTE, ".\n"); |
1597 | } |
1598 | } |
1599 | |
1600 | /* Return true if STMT_INFO describes a double reduction phi and if |
1601 | the other phi in the reduction is also relevant for vectorization. |
1602 | This rejects cases such as: |
1603 | |
1604 | outer1: |
1605 | x_1 = PHI <x_3(outer2), ...>; |
1606 | ... |
1607 | |
1608 | inner: |
1609 | x_2 = ...; |
1610 | ... |
1611 | |
1612 | outer2: |
1613 | x_3 = PHI <x_2(inner)>; |
1614 | |
1615 | if nothing in x_2 or elsewhere makes x_1 relevant. */ |
1616 | |
1617 | static bool |
1618 | vect_active_double_reduction_p (stmt_vec_info stmt_info) |
1619 | { |
1620 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def) |
1621 | return false; |
1622 | |
1623 | return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope ); |
1624 | } |
1625 | |
1626 | /* Function vect_analyze_loop_operations. |
1627 | |
1628 | Scan the loop stmts and make sure they are all vectorizable. */ |
1629 | |
1630 | static opt_result |
1631 | vect_analyze_loop_operations (loop_vec_info loop_vinfo) |
1632 | { |
1633 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1634 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; |
1635 | int nbbs = loop->num_nodes; |
1636 | int i; |
1637 | stmt_vec_info stmt_info; |
1638 | bool need_to_vectorize = false; |
1639 | bool ok; |
1640 | |
1641 | DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location ); |
1642 | |
1643 | auto_vec<stmt_info_for_cost> cost_vec; |
1644 | |
1645 | for (i = 0; i < nbbs; i++) |
1646 | { |
1647 | basic_block bb = bbs[i]; |
1648 | |
1649 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
1650 | gsi_next (&si)) |
1651 | { |
1652 | gphi *phi = si.phi (); |
1653 | ok = true; |
1654 | |
1655 | stmt_info = loop_vinfo->lookup_stmt (phi); |
1656 | if (dump_enabled_p ()) |
1657 | dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi); |
1658 | if (virtual_operand_p (gimple_phi_result (phi))) |
1659 | continue; |
1660 | |
1661 | /* Inner-loop loop-closed exit phi in outer-loop vectorization |
1662 | (i.e., a phi in the tail of the outer-loop). */ |
1663 | if (! is_loop_header_bb_p (bb)) |
1664 | { |
1665 | /* FORNOW: we currently don't support the case that these phis |
1666 | are not used in the outerloop (unless it is double reduction, |
1667 | i.e., this phi is vect_reduction_def), cause this case |
1668 | requires to actually do something here. */ |
1669 | if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live |
1670 | && !vect_active_double_reduction_p (stmt_info)) |
1671 | return opt_result::failure_at (phi, |
1672 | "Unsupported loop-closed phi" |
1673 | " in outer-loop.\n"); |
1674 | |
1675 | /* If PHI is used in the outer loop, we check that its operand |
1676 | is defined in the inner loop. */ |
1677 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) |
1678 | { |
1679 | tree phi_op; |
1680 | |
1681 | if (gimple_phi_num_args (phi) != 1) |
1682 | return opt_result::failure_at (phi, "unsupported phi"); |
1683 | |
1684 | phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0)); |
1685 | stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); |
1686 | if (!op_def_info) |
1687 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1688 | |
1689 | if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer |
1690 | && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant |
1691 | != vect_used_in_outer_by_reduction)) |
1692 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1693 | |
1694 | if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def |
1695 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type |
1696 | == vect_double_reduction_def)) |
1697 | && !vectorizable_lc_phi (loop_vinfo, |
1698 | stmt_info, NULLnullptr, NULLnullptr)) |
1699 | return opt_result::failure_at (phi, "unsupported phi\n"); |
1700 | } |
1701 | |
1702 | continue; |
1703 | } |
1704 | |
1705 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1705, __FUNCTION__), 0 : 0)); |
1706 | |
1707 | if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope |
1708 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) |
1709 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def) |
1710 | /* A scalar-dependence cycle that we don't support. */ |
1711 | return opt_result::failure_at (phi, |
1712 | "not vectorized:" |
1713 | " scalar dependence cycle.\n"); |
1714 | |
1715 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) |
1716 | { |
1717 | need_to_vectorize = true; |
1718 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def |
1719 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1720 | ok = vectorizable_induction (loop_vinfo, |
1721 | stmt_info, NULLnullptr, NULLnullptr, |
1722 | &cost_vec); |
1723 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def |
1724 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type |
1725 | == vect_double_reduction_def) |
1726 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) |
1727 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1728 | ok = vectorizable_reduction (loop_vinfo, |
1729 | stmt_info, NULLnullptr, NULLnullptr, &cost_vec); |
1730 | } |
1731 | |
1732 | /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ |
1733 | if (ok |
1734 | && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live |
1735 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) |
1736 | ok = vectorizable_live_operation (loop_vinfo, |
1737 | stmt_info, NULLnullptr, NULLnullptr, NULLnullptr, |
1738 | -1, false, &cost_vec); |
1739 | |
1740 | if (!ok) |
1741 | return opt_result::failure_at (phi, |
1742 | "not vectorized: relevant phi not " |
1743 | "supported: %G", |
1744 | static_cast <gimple *> (phi)); |
1745 | } |
1746 | |
1747 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1748 | gsi_next (&si)) |
1749 | { |
1750 | gimple *stmt = gsi_stmt (si); |
1751 | if (!gimple_clobber_p (stmt) |
1752 | && !is_gimple_debug (stmt)) |
1753 | { |
1754 | opt_result res |
1755 | = vect_analyze_stmt (loop_vinfo, |
1756 | loop_vinfo->lookup_stmt (stmt), |
1757 | &need_to_vectorize, |
1758 | NULLnullptr, NULLnullptr, &cost_vec); |
1759 | if (!res) |
1760 | return res; |
1761 | } |
1762 | } |
1763 | } /* bbs */ |
1764 | |
1765 | add_stmt_costs (loop_vinfo, loop_vinfo->target_cost_data, &cost_vec); |
1766 | |
1767 | /* All operations in the loop are either irrelevant (deal with loop |
1768 | control, or dead), or only used outside the loop and can be moved |
1769 | out of the loop (e.g. invariants, inductions). The loop can be |
1770 | optimized away by scalar optimizations. We're better off not |
1771 | touching this loop. */ |
1772 | if (!need_to_vectorize) |
1773 | { |
1774 | if (dump_enabled_p ()) |
1775 | dump_printf_loc (MSG_NOTE, vect_location, |
1776 | "All the computation can be taken out of the loop.\n"); |
1777 | return opt_result::failure_at |
1778 | (vect_location, |
1779 | "not vectorized: redundant loop. no profit to vectorize.\n"); |
1780 | } |
1781 | |
1782 | return opt_result::success (); |
1783 | } |
1784 | |
1785 | /* Return true if we know that the iteration count is smaller than the |
1786 | vectorization factor. Return false if it isn't, or if we can't be sure |
1787 | either way. */ |
1788 | |
1789 | static bool |
1790 | vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) |
1791 | { |
1792 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); |
1793 | |
1794 | HOST_WIDE_INTlong max_niter; |
1795 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) |
1796 | max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1796, __FUNCTION__)))); |
1797 | else |
1798 | max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
1799 | |
1800 | if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf) |
1801 | return true; |
1802 | |
1803 | return false; |
1804 | } |
1805 | |
1806 | /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it |
1807 | is worthwhile to vectorize. Return 1 if definitely yes, 0 if |
1808 | definitely no, or -1 if it's worth retrying. */ |
1809 | |
1810 | static int |
1811 | vect_analyze_loop_costing (loop_vec_info loop_vinfo) |
1812 | { |
1813 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
1814 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); |
1815 | |
1816 | /* Only loops that can handle partially-populated vectors can have iteration |
1817 | counts less than the vectorization factor. */ |
1818 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
1819 | { |
1820 | if (vect_known_niters_smaller_than_vf (loop_vinfo)) |
1821 | { |
1822 | if (dump_enabled_p ()) |
1823 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1824 | "not vectorized: iteration count smaller than " |
1825 | "vectorization factor.\n"); |
1826 | return 0; |
1827 | } |
1828 | } |
1829 | |
1830 | /* If using the "very cheap" model. reject cases in which we'd keep |
1831 | a copy of the scalar code (even if we might be able to vectorize it). */ |
1832 | if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP |
1833 | && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
1834 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
1835 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)) |
1836 | { |
1837 | if (dump_enabled_p ()) |
1838 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1839 | "some scalar iterations would need to be peeled\n"); |
1840 | return 0; |
1841 | } |
1842 | |
1843 | int min_profitable_iters, min_profitable_estimate; |
1844 | vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, |
1845 | &min_profitable_estimate); |
1846 | |
1847 | if (min_profitable_iters < 0) |
1848 | { |
1849 | if (dump_enabled_p ()) |
1850 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1851 | "not vectorized: vectorization not profitable.\n"); |
1852 | if (dump_enabled_p ()) |
1853 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1854 | "not vectorized: vector version will never be " |
1855 | "profitable.\n"); |
1856 | return -1; |
1857 | } |
1858 | |
1859 | int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound |
1860 | * assumed_vf); |
1861 | |
1862 | /* Use the cost model only if it is more conservative than user specified |
1863 | threshold. */ |
1864 | unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)) |
1865 | min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)); |
1866 | |
1867 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th; |
1868 | |
1869 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
1870 | && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1870, __FUNCTION__)))) < th) |
1871 | { |
1872 | if (dump_enabled_p ()) |
1873 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1874 | "not vectorized: vectorization not profitable.\n"); |
1875 | if (dump_enabled_p ()) |
1876 | dump_printf_loc (MSG_NOTE, vect_location, |
1877 | "not vectorized: iteration count smaller than user " |
1878 | "specified loop bound parameter or minimum profitable " |
1879 | "iterations (whichever is more conservative).\n"); |
1880 | return 0; |
1881 | } |
1882 | |
1883 | /* The static profitablity threshold min_profitable_estimate includes |
1884 | the cost of having to check at runtime whether the scalar loop |
1885 | should be used instead. If it turns out that we don't need or want |
1886 | such a check, the threshold we should use for the static estimate |
1887 | is simply the point at which the vector loop becomes more profitable |
1888 | than the scalar loop. */ |
1889 | if (min_profitable_estimate > min_profitable_iters |
1890 | && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) |
1891 | && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter |
1892 | && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment |
1893 | && !vect_apply_runtime_profitability_check_p (loop_vinfo)) |
1894 | { |
1895 | if (dump_enabled_p ()) |
1896 | dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime" |
1897 | " choice between the scalar and vector loops\n"); |
1898 | min_profitable_estimate = min_profitable_iters; |
1899 | } |
1900 | |
1901 | /* If the vector loop needs multiple iterations to be beneficial then |
1902 | things are probably too close to call, and the conservative thing |
1903 | would be to stick with the scalar code. */ |
1904 | if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP |
1905 | && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo)) |
1906 | { |
1907 | if (dump_enabled_p ()) |
1908 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1909 | "one iteration of the vector loop would be" |
1910 | " more expensive than the equivalent number of" |
1911 | " iterations of the scalar loop\n"); |
1912 | return 0; |
1913 | } |
1914 | |
1915 | HOST_WIDE_INTlong estimated_niter; |
1916 | |
1917 | /* If we are vectorizing an epilogue then we know the maximum number of |
1918 | scalar iterations it will cover is at least one lower than the |
1919 | vectorization factor of the main loop. */ |
1920 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
1921 | estimated_niter |
1922 | = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1; |
1923 | else |
1924 | { |
1925 | estimated_niter = estimated_stmt_executions_int (loop); |
1926 | if (estimated_niter == -1) |
1927 | estimated_niter = likely_max_stmt_executions_int (loop); |
1928 | } |
1929 | if (estimated_niter != -1 |
1930 | && ((unsigned HOST_WIDE_INTlong) estimated_niter |
1931 | < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned ) min_profitable_estimate)))) |
1932 | { |
1933 | if (dump_enabled_p ()) |
1934 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1935 | "not vectorized: estimated iteration count too " |
1936 | "small.\n"); |
1937 | if (dump_enabled_p ()) |
1938 | dump_printf_loc (MSG_NOTE, vect_location, |
1939 | "not vectorized: estimated iteration count smaller " |
1940 | "than specified loop bound parameter or minimum " |
1941 | "profitable iterations (whichever is more " |
1942 | "conservative).\n"); |
1943 | return -1; |
1944 | } |
1945 | |
1946 | return 1; |
1947 | } |
1948 | |
1949 | static opt_result |
1950 | vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, |
1951 | vec<data_reference_p> *datarefs, |
1952 | unsigned int *n_stmts) |
1953 | { |
1954 | *n_stmts = 0; |
1955 | for (unsigned i = 0; i < loop->num_nodes; i++) |
1956 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); |
1957 | !gsi_end_p (gsi); gsi_next (&gsi)) |
1958 | { |
1959 | gimple *stmt = gsi_stmt (gsi); |
1960 | if (is_gimple_debug (stmt)) |
1961 | continue; |
1962 | ++(*n_stmts); |
1963 | opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, |
1964 | NULLnullptr, 0); |
1965 | if (!res) |
1966 | { |
1967 | if (is_gimple_call (stmt) && loop->safelen) |
1968 | { |
1969 | tree fndecl = gimple_call_fndecl (stmt), op; |
1970 | if (fndecl != NULL_TREE(tree) nullptr) |
1971 | { |
1972 | cgraph_node *node = cgraph_node::get (fndecl); |
1973 | if (node != NULLnullptr && node->simd_clones != NULLnullptr) |
1974 | { |
1975 | unsigned int j, n = gimple_call_num_args (stmt); |
1976 | for (j = 0; j < n; j++) |
1977 | { |
1978 | op = gimple_call_arg (stmt, j); |
1979 | if (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_declaration) |
1980 | || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_reference) |
1981 | && get_base_address (op))) |
1982 | break; |
1983 | } |
1984 | op = gimple_call_lhs (stmt); |
1985 | /* Ignore #pragma omp declare simd functions |
1986 | if they don't have data references in the |
1987 | call stmt itself. */ |
1988 | if (j == n |
1989 | && !(op |
1990 | && (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_declaration) |
1991 | || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_reference) |
1992 | && get_base_address (op))))) |
1993 | continue; |
1994 | } |
1995 | } |
1996 | } |
1997 | return res; |
1998 | } |
1999 | /* If dependence analysis will give up due to the limit on the |
2000 | number of datarefs stop here and fail fatally. */ |
2001 | if (datarefs->length () |
2002 | > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps) |
2003 | return opt_result::failure_at (stmt, "exceeded param " |
2004 | "loop-max-datarefs-for-datadeps\n"); |
2005 | } |
2006 | return opt_result::success (); |
2007 | } |
2008 | |
2009 | /* Look for SLP-only access groups and turn each individual access into its own |
2010 | group. */ |
2011 | static void |
2012 | vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) |
2013 | { |
2014 | unsigned int i; |
2015 | struct data_reference *dr; |
2016 | |
2017 | DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location ); |
2018 | |
2019 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; |
2020 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) |
2021 | { |
2022 | gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2022, __FUNCTION__), 0 : 0)); |
2023 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt); |
2024 | |
2025 | /* Check if the load is a part of an interleaving chain. */ |
2026 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2026, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) |
2027 | { |
2028 | stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2028, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ); |
2029 | unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2029, __FUNCTION__), 0 : 0)), (first_element)->size); |
2030 | |
2031 | /* Check if SLP-only groups. */ |
2032 | if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type |
2033 | && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p) |
2034 | { |
2035 | /* Dissolve the group. */ |
2036 | STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false; |
2037 | |
2038 | stmt_vec_info vinfo = first_element; |
2039 | while (vinfo) |
2040 | { |
2041 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2041, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
2042 | DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2042, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = vinfo; |
2043 | DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2043, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; |
2044 | DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2044, __FUNCTION__), 0 : 0)), (vinfo)->size) = 1; |
2045 | if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p) |
2046 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2046, __FUNCTION__), 0 : 0)), (vinfo)->gap) = 0; |
2047 | else |
2048 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2048, __FUNCTION__), 0 : 0)), (vinfo)->gap) = group_size - 1; |
2049 | vinfo = next; |
2050 | } |
2051 | } |
2052 | } |
2053 | } |
2054 | } |
2055 | |
2056 | /* Determine if operating on full vectors for LOOP_VINFO might leave |
2057 | some scalar iterations still to do. If so, decide how we should |
2058 | handle those scalar iterations. The possibilities are: |
2059 | |
2060 | (1) Make LOOP_VINFO operate on partial vectors instead of full vectors. |
2061 | In this case: |
2062 | |
2063 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == true |
2064 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false |
2065 | LOOP_VINFO_PEELING_FOR_NITER == false |
2066 | |
2067 | (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop |
2068 | to handle the remaining scalar iterations. In this case: |
2069 | |
2070 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == false |
2071 | LOOP_VINFO_PEELING_FOR_NITER == true |
2072 | |
2073 | There are two choices: |
2074 | |
2075 | (2a) Consider vectorizing the epilogue loop at the same VF as the |
2076 | main loop, but using partial vectors instead of full vectors. |
2077 | In this case: |
2078 | |
2079 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true |
2080 | |
2081 | (2b) Consider vectorizing the epilogue loop at lower VFs only. |
2082 | In this case: |
2083 | |
2084 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false |
2085 | |
2086 | When FOR_EPILOGUE_P is true, make this determination based on the |
2087 | assumption that LOOP_VINFO is an epilogue loop, otherwise make it |
2088 | based on the assumption that LOOP_VINFO is the main loop. The caller |
2089 | has made sure that the number of iterations is set appropriately for |
2090 | this value of FOR_EPILOGUE_P. */ |
2091 | |
2092 | opt_result |
2093 | vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, |
2094 | bool for_epilogue_p) |
2095 | { |
2096 | /* Determine whether there would be any scalar iterations left over. */ |
2097 | bool need_peeling_or_partial_vectors_p |
2098 | = vect_need_peeling_or_partial_vectors_p (loop_vinfo); |
2099 | |
2100 | /* Decide whether to vectorize the loop with partial vectors. */ |
2101 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false; |
2102 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false; |
2103 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2104 | && need_peeling_or_partial_vectors_p) |
2105 | { |
2106 | /* For partial-vector-usage=1, try to push the handling of partial |
2107 | vectors to the epilogue, with the main loop continuing to operate |
2108 | on full vectors. |
2109 | |
2110 | ??? We could then end up failing to use partial vectors if we |
2111 | decide to peel iterations into a prologue, and if the main loop |
2112 | then ends up processing fewer than VF iterations. */ |
2113 | if (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1 |
2114 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
2115 | && !vect_known_niters_smaller_than_vf (loop_vinfo)) |
2116 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true; |
2117 | else |
2118 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true; |
2119 | } |
2120 | |
2121 | if (dump_enabled_p ()) |
2122 | { |
2123 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2124 | dump_printf_loc (MSG_NOTE, vect_location, |
2125 | "operating on partial vectors%s.\n", |
2126 | for_epilogue_p ? " for epilogue loop" : ""); |
2127 | else |
2128 | dump_printf_loc (MSG_NOTE, vect_location, |
2129 | "operating only on full vectors%s.\n", |
2130 | for_epilogue_p ? " for epilogue loop" : ""); |
2131 | } |
2132 | |
2133 | if (for_epilogue_p) |
2134 | { |
2135 | loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; |
2136 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2136, __FUNCTION__), 0 : 0)); |
2137 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2138 | gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2139, __FUNCTION__), 0 : 0)) |
2139 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2139, __FUNCTION__), 0 : 0)); |
2140 | } |
2141 | |
2142 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
2143 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2144 | { |
2145 | /* Check that the loop processes at least one full vector. */ |
2146 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2147 | tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters; |
2148 | if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters)))) |
2149 | return opt_result::failure_at (vect_location, |
2150 | "loop does not have enough iterations" |
2151 | " to support vectorization.\n"); |
2152 | |
2153 | /* If we need to peel an extra epilogue iteration to handle data |
2154 | accesses with gaps, check that there are enough scalar iterations |
2155 | available. |
2156 | |
2157 | The check above is redundant with this one when peeling for gaps, |
2158 | but the distinction is useful for diagnostics. */ |
2159 | tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1; |
2160 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
2161 | && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1)))) |
2162 | return opt_result::failure_at (vect_location, |
2163 | "loop does not have enough iterations" |
2164 | " to support peeling for gaps.\n"); |
2165 | } |
2166 | |
2167 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter |
2168 | = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p |
2169 | && need_peeling_or_partial_vectors_p); |
2170 | |
2171 | return opt_result::success (); |
2172 | } |
2173 | |
2174 | /* Function vect_analyze_loop_2. |
2175 | |
2176 | Apply a set of analyses on LOOP, and create a loop_vec_info struct |
2177 | for it. The different analyses will record information in the |
2178 | loop_vec_info struct. */ |
2179 | static opt_result |
2180 | vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) |
2181 | { |
2182 | opt_result ok = opt_result::success (); |
2183 | int res; |
2184 | unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647; |
2185 | poly_uint64 min_vf = 2; |
2186 | loop_vec_info orig_loop_vinfo = NULLnullptr; |
2187 | |
2188 | /* If we are dealing with an epilogue then orig_loop_vinfo points to the |
2189 | loop_vec_info of the first vectorized loop. */ |
2190 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2191 | orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; |
2192 | else |
2193 | orig_loop_vinfo = loop_vinfo; |
2194 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2194, __FUNCTION__), 0 : 0)); |
2195 | |
2196 | /* The first group of checks is independent of the vector size. */ |
2197 | fatal = true; |
2198 | |
2199 | if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond |
2200 | && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond)) |
2201 | return opt_result::failure_at (vect_location, |
2202 | "not vectorized: simd if(0)\n"); |
2203 | |
2204 | /* Find all data references in the loop (which correspond to vdefs/vuses) |
2205 | and analyze their evolution in the loop. */ |
2206 | |
2207 | loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
2208 | |
2209 | /* Gather the data references and count stmts in the loop. */ |
2210 | if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ()) |
2211 | { |
2212 | opt_result res |
2213 | = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs, |
2214 | &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs, |
2215 | n_stmts); |
2216 | if (!res) |
2217 | { |
2218 | if (dump_enabled_p ()) |
2219 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2220 | "not vectorized: loop contains function " |
2221 | "calls or data references that cannot " |
2222 | "be analyzed\n"); |
2223 | return res; |
2224 | } |
2225 | loop_vinfo->shared->save_datarefs (); |
2226 | } |
2227 | else |
2228 | loop_vinfo->shared->check_datarefs (); |
2229 | |
2230 | /* Analyze the data references and also adjust the minimal |
2231 | vectorization factor according to the loads and stores. */ |
2232 | |
2233 | ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); |
2234 | if (!ok) |
2235 | { |
2236 | if (dump_enabled_p ()) |
2237 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2238 | "bad data references.\n"); |
2239 | return ok; |
2240 | } |
2241 | |
2242 | /* Classify all cross-iteration scalar data-flow cycles. |
2243 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ |
2244 | vect_analyze_scalar_cycles (loop_vinfo); |
2245 | |
2246 | vect_pattern_recog (loop_vinfo); |
2247 | |
2248 | vect_fixup_scalar_cycles_with_patterns (loop_vinfo); |
2249 | |
2250 | /* Analyze the access patterns of the data-refs in the loop (consecutive, |
2251 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ |
2252 | |
2253 | ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr); |
2254 | if (!ok) |
2255 | { |
2256 | if (dump_enabled_p ()) |
2257 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2258 | "bad data access.\n"); |
2259 | return ok; |
2260 | } |
2261 | |
2262 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
2263 | |
2264 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal); |
2265 | if (!ok) |
2266 | { |
2267 | if (dump_enabled_p ()) |
2268 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2269 | "unexpected pattern.\n"); |
2270 | return ok; |
2271 | } |
2272 | |
2273 | /* While the rest of the analysis below depends on it in some way. */ |
2274 | fatal = false; |
2275 | |
2276 | /* Analyze data dependences between the data-refs in the loop |
2277 | and adjust the maximum vectorization factor according to |
2278 | the dependences. |
2279 | FORNOW: fail at the first data dependence that we encounter. */ |
2280 | |
2281 | ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); |
2282 | if (!ok) |
2283 | { |
2284 | if (dump_enabled_p ()) |
2285 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2286 | "bad data dependence.\n"); |
2287 | return ok; |
2288 | } |
2289 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 |
2290 | && maybe_lt (max_vf, min_vf)) |
2291 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); |
2292 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf; |
2293 | |
2294 | ok = vect_determine_vectorization_factor (loop_vinfo); |
2295 | if (!ok) |
2296 | { |
2297 | if (dump_enabled_p ()) |
2298 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2299 | "can't determine vectorization factor.\n"); |
2300 | return ok; |
2301 | } |
2302 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 |
2303 | && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) |
2304 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); |
2305 | |
2306 | /* Compute the scalar iteration cost. */ |
2307 | vect_compute_single_scalar_iteration_cost (loop_vinfo); |
2308 | |
2309 | poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2310 | |
2311 | /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ |
2312 | ok = vect_analyze_slp (loop_vinfo, *n_stmts); |
2313 | if (!ok) |
2314 | return ok; |
2315 | |
2316 | /* If there are any SLP instances mark them as pure_slp. */ |
2317 | bool slp = vect_make_slp_decision (loop_vinfo); |
2318 | if (slp) |
2319 | { |
2320 | /* Find stmts that need to be both vectorized and SLPed. */ |
2321 | vect_detect_hybrid_slp (loop_vinfo); |
2322 | |
2323 | /* Update the vectorization factor based on the SLP decision. */ |
2324 | vect_update_vf_for_slp (loop_vinfo); |
2325 | |
2326 | /* Optimize the SLP graph with the vectorization factor fixed. */ |
2327 | vect_optimize_slp (loop_vinfo); |
2328 | |
2329 | /* Gather the loads reachable from the SLP graph entries. */ |
2330 | vect_gather_slp_loads (loop_vinfo); |
2331 | } |
2332 | |
2333 | bool saved_can_use_partial_vectors_p |
2334 | = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p; |
2335 | |
2336 | /* We don't expect to have to roll back to anything other than an empty |
2337 | set of rgroups. */ |
2338 | gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2338, __FUNCTION__), 0 : 0)); |
2339 | |
2340 | /* This is the point where we can re-start analysis with SLP forced off. */ |
2341 | start_over: |
2342 | |
2343 | /* Now the vectorization factor is final. */ |
2344 | poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2345 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2345, __FUNCTION__), 0 : 0)); |
2346 | |
2347 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && dump_enabled_p ()) |
2348 | { |
2349 | dump_printf_loc (MSG_NOTE, vect_location, |
2350 | "vectorization_factor = "); |
2351 | dump_dec (MSG_NOTE, vectorization_factor); |
2352 | dump_printf (MSG_NOTE, ", niters = %wd\n", |
2353 | LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2353, __FUNCTION__))))); |
2354 | } |
2355 | |
2356 | /* Analyze the alignment of the data-refs in the loop. |
2357 | Fail if a data reference is found that cannot be vectorized. */ |
2358 | |
2359 | ok = vect_analyze_data_refs_alignment (loop_vinfo); |
2360 | if (!ok) |
2361 | { |
2362 | if (dump_enabled_p ()) |
2363 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2364 | "bad data alignment.\n"); |
2365 | return ok; |
2366 | } |
2367 | |
2368 | /* Prune the list of ddrs to be tested at run-time by versioning for alias. |
2369 | It is important to call pruning after vect_analyze_data_ref_accesses, |
2370 | since we use grouping information gathered by interleaving analysis. */ |
2371 | ok = vect_prune_runtime_alias_test_list (loop_vinfo); |
2372 | if (!ok) |
2373 | return ok; |
2374 | |
2375 | /* Do not invoke vect_enhance_data_refs_alignment for epilogue |
2376 | vectorization, since we do not want to add extra peeling or |
2377 | add versioning for alignment. */ |
2378 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2379 | /* This pass will decide on using loop versioning and/or loop peeling in |
2380 | order to enhance the alignment of data references in the loop. */ |
2381 | ok = vect_enhance_data_refs_alignment (loop_vinfo); |
2382 | if (!ok) |
2383 | return ok; |
2384 | |
2385 | if (slp) |
2386 | { |
2387 | /* Analyze operations in the SLP instances. Note this may |
2388 | remove unsupported SLP instances which makes the above |
2389 | SLP kind detection invalid. */ |
2390 | unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length (); |
2391 | vect_slp_analyze_operations (loop_vinfo); |
2392 | if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size) |
2393 | { |
2394 | ok = opt_result::failure_at (vect_location, |
2395 | "unsupported SLP instances\n"); |
2396 | goto again; |
2397 | } |
2398 | |
2399 | /* Check whether any load in ALL SLP instances is possibly permuted. */ |
2400 | slp_tree load_node, slp_root; |
2401 | unsigned i, x; |
2402 | slp_instance instance; |
2403 | bool can_use_lanes = true; |
2404 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), & (instance)); ++(x)) |
2405 | { |
2406 | slp_root = SLP_INSTANCE_TREE (instance)(instance)->root; |
2407 | int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes; |
2408 | tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype; |
2409 | bool loads_permuted = false; |
2410 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) |
2411 | { |
2412 | if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ()) |
2413 | continue; |
2414 | unsigned j; |
2415 | stmt_vec_info load_info; |
2416 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info )); ++(j)) |
2417 | if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j) |
2418 | { |
2419 | loads_permuted = true; |
2420 | break; |
2421 | } |
2422 | } |
2423 | |
2424 | /* If the loads and stores can be handled with load/store-lane |
2425 | instructions record it and move on to the next instance. */ |
2426 | if (loads_permuted |
2427 | && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store |
2428 | && vect_store_lanes_supported (vectype, group_size, false)) |
2429 | { |
2430 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) |
2431 | { |
2432 | stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element) |
2433 | (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element); |
2434 | /* Use SLP for strided accesses (or if we can't |
2435 | load-lanes). */ |
2436 | if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p |
2437 | || ! vect_load_lanes_supported |
2438 | (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype, |
2439 | DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2439, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size), false)) |
2440 | break; |
2441 | } |
2442 | |
2443 | can_use_lanes |
2444 | = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length (); |
2445 | |
2446 | if (can_use_lanes && dump_enabled_p ()) |
2447 | dump_printf_loc (MSG_NOTE, vect_location, |
2448 | "SLP instance %p can use load/store-lanes\n", |
2449 | instance); |
2450 | } |
2451 | else |
2452 | { |
2453 | can_use_lanes = false; |
2454 | break; |
2455 | } |
2456 | } |
2457 | |
2458 | /* If all SLP instances can use load/store-lanes abort SLP and try again |
2459 | with SLP disabled. */ |
2460 | if (can_use_lanes) |
2461 | { |
2462 | ok = opt_result::failure_at (vect_location, |
2463 | "Built SLP cancelled: can use " |
2464 | "load/store-lanes\n"); |
2465 | if (dump_enabled_p ()) |
2466 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2467 | "Built SLP cancelled: all SLP instances support " |
2468 | "load/store-lanes\n"); |
2469 | goto again; |
2470 | } |
2471 | } |
2472 | |
2473 | /* Dissolve SLP-only groups. */ |
2474 | vect_dissolve_slp_only_groups (loop_vinfo); |
2475 | |
2476 | /* Scan all the remaining operations in the loop that are not subject |
2477 | to SLP and make sure they are vectorizable. */ |
2478 | ok = vect_analyze_loop_operations (loop_vinfo); |
2479 | if (!ok) |
2480 | { |
2481 | if (dump_enabled_p ()) |
2482 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2483 | "bad operation or unsupported loop bound.\n"); |
2484 | return ok; |
2485 | } |
2486 | |
2487 | /* For now, we don't expect to mix both masking and length approaches for one |
2488 | loop, disable it if both are recorded. */ |
2489 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2490 | && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty () |
2491 | && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) |
2492 | { |
2493 | if (dump_enabled_p ()) |
2494 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2495 | "can't vectorize a loop with partial vectors" |
2496 | " because we don't expect to mix different" |
2497 | " approaches with partial vectors for the" |
2498 | " same loop.\n"); |
2499 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; |
2500 | } |
2501 | |
2502 | /* If we still have the option of using partial vectors, |
2503 | check whether we can generate the necessary loop controls. */ |
2504 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2505 | && !vect_verify_full_masking (loop_vinfo) |
2506 | && !vect_verify_loop_lens (loop_vinfo)) |
2507 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; |
2508 | |
2509 | /* If we're vectorizing an epilogue loop, the vectorized loop either needs |
2510 | to be able to handle fewer than VF scalars, or needs to have a lower VF |
2511 | than the main loop. */ |
2512 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
2513 | && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2514 | && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor) |
2515 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor)) |
2516 | return opt_result::failure_at (vect_location, |
2517 | "Vectorization factor too high for" |
2518 | " epilogue loop.\n"); |
2519 | |
2520 | /* Decide whether this loop_vinfo should use partial vectors or peeling, |
2521 | assuming that the loop will be used as a main loop. We will redo |
2522 | this analysis later if we instead decide to use the loop as an |
2523 | epilogue loop. */ |
2524 | ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); |
2525 | if (!ok) |
2526 | return ok; |
2527 | |
2528 | /* Check the costings of the loop make vectorizing worthwhile. */ |
2529 | res = vect_analyze_loop_costing (loop_vinfo); |
2530 | if (res < 0) |
2531 | { |
2532 | ok = opt_result::failure_at (vect_location, |
2533 | "Loop costings may not be worthwhile.\n"); |
2534 | goto again; |
2535 | } |
2536 | if (!res) |
2537 | return opt_result::failure_at (vect_location, |
2538 | "Loop costings not worthwhile.\n"); |
2539 | |
2540 | /* If an epilogue loop is required make sure we can create one. */ |
2541 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps |
2542 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter) |
2543 | { |
2544 | if (dump_enabled_p ()) |
2545 | dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); |
2546 | if (!vect_can_advance_ivs_p (loop_vinfo) |
2547 | || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, |
2548 | single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop |
2549 | (loop_vinfo)(loop_vinfo)->loop))) |
2550 | { |
2551 | ok = opt_result::failure_at (vect_location, |
2552 | "not vectorized: can't create required " |
2553 | "epilog loop\n"); |
2554 | goto again; |
2555 | } |
2556 | } |
2557 | |
2558 | /* During peeling, we need to check if number of loop iterations is |
2559 | enough for both peeled prolog loop and vector loop. This check |
2560 | can be merged along with threshold check of loop versioning, so |
2561 | increase threshold for this case if necessary. |
2562 | |
2563 | If we are analyzing an epilogue we still want to check what its |
2564 | versioning threshold would be. If we decide to vectorize the epilogues we |
2565 | will want to use the lowest versioning threshold of all epilogues and main |
2566 | loop. This will enable us to enter a vectorized epilogue even when |
2567 | versioning the loop. We can't simply check whether the epilogue requires |
2568 | versioning though since we may have skipped some versioning checks when |
2569 | analyzing the epilogue. For instance, checks for alias versioning will be |
2570 | skipped when dealing with epilogues as we assume we already checked them |
2571 | for the main loop. So instead we always check the 'orig_loop_vinfo'. */ |
2572 | if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) || ((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || ( orig_loop_vinfo)->check_unequal_addrs.length () > 0 || ( orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo )->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond ))) |
2573 | { |
2574 | poly_uint64 niters_th = 0; |
2575 | unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; |
2576 | |
2577 | if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) |
2578 | { |
2579 | /* Niters for peeled prolog loop. */ |
2580 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) |
2581 | { |
2582 | dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr; |
2583 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; |
2584 | niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1; |
2585 | } |
2586 | else |
2587 | niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
2588 | } |
2589 | |
2590 | /* Niters for at least one iteration of vectorized loop. */ |
2591 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
2592 | niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; |
2593 | /* One additional iteration because of peeling for gap. */ |
2594 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) |
2595 | niters_th += 1; |
2596 | |
2597 | /* Use the same condition as vect_transform_loop to decide when to use |
2598 | the cost to determine a versioning threshold. */ |
2599 | if (vect_apply_runtime_profitability_check_p (loop_vinfo) |
2600 | && ordered_p (th, niters_th)) |
2601 | niters_th = ordered_max (poly_uint64 (th), niters_th); |
2602 | |
2603 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th; |
2604 | } |
2605 | |
2606 | gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2607, __FUNCTION__), 0 : 0)) |
2607 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2607, __FUNCTION__), 0 : 0)); |
2608 | |
2609 | /* Ok to vectorize! */ |
2610 | return opt_result::success (); |
2611 | |
2612 | again: |
2613 | /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */ |
2614 | gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2614, __FUNCTION__), 0 : 0)); |
2615 | |
2616 | /* Try again with SLP forced off but if we didn't do any SLP there is |
2617 | no point in re-trying. */ |
2618 | if (!slp) |
2619 | return ok; |
2620 | |
2621 | /* If there are reduction chains re-trying will fail anyway. */ |
2622 | if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ()) |
2623 | return ok; |
2624 | |
2625 | /* Likewise if the grouped loads or stores in the SLP cannot be handled |
2626 | via interleaving or lane instructions. */ |
2627 | slp_instance instance; |
2628 | slp_tree node; |
2629 | unsigned i, j; |
2630 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), & (instance)); ++(i)) |
2631 | { |
2632 | stmt_vec_info vinfo; |
2633 | vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0]; |
2634 | if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux .dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2634, __FUNCTION__), 0 : 0)), (vinfo)->first_element))) |
2635 | continue; |
2636 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2636, __FUNCTION__), 0 : 0)), (vinfo)->first_element); |
2637 | unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2637, __FUNCTION__), 0 : 0)), (vinfo)->size); |
2638 | tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; |
2639 | if (! vect_store_lanes_supported (vectype, size, false) |
2640 | && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U)) |
2641 | && ! vect_grouped_store_supported (vectype, size)) |
2642 | return opt_result::failure_at (vinfo->stmt, |
2643 | "unsupported grouped store\n"); |
2644 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node)) ; ++(j)) |
2645 | { |
2646 | vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0]; |
2647 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2647, __FUNCTION__), 0 : 0)), (vinfo)->first_element); |
2648 | bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2648, __FUNCTION__), 0 : 0)), (vinfo)->next_element); |
2649 | size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2649, __FUNCTION__), 0 : 0)), (vinfo)->size); |
2650 | vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; |
2651 | if (! vect_load_lanes_supported (vectype, size, false) |
2652 | && ! vect_grouped_load_supported (vectype, single_element_p, |
2653 | size)) |
2654 | return opt_result::failure_at (vinfo->stmt, |
2655 | "unsupported grouped load\n"); |
2656 | } |
2657 | } |
2658 | |
2659 | if (dump_enabled_p ()) |
2660 | dump_printf_loc (MSG_NOTE, vect_location, |
2661 | "re-trying with SLP disabled\n"); |
2662 | |
2663 | /* Roll back state appropriately. No SLP this time. */ |
2664 | slp = false; |
2665 | /* Restore vectorization factor as it were without SLP. */ |
2666 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor; |
2667 | /* Free the SLP instances. */ |
2668 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), & (instance)); ++(j)) |
2669 | vect_free_slp_instance (instance); |
2670 | LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release (); |
2671 | /* Reset SLP type to loop_vect on all stmts. */ |
2672 | for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i) |
2673 | { |
2674 | basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i]; |
2675 | for (gimple_stmt_iterator si = gsi_start_phis (bb); |
2676 | !gsi_end_p (si); gsi_next (&si)) |
2677 | { |
2678 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2679 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; |
2680 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def |
2681 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
2682 | { |
2683 | /* vectorizable_reduction adjusts reduction stmt def-types, |
2684 | restore them to that of the PHI. */ |
2685 | STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type |
2686 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; |
2687 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type |
2688 | (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type |
2689 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; |
2690 | } |
2691 | } |
2692 | for (gimple_stmt_iterator si = gsi_start_bb (bb); |
2693 | !gsi_end_p (si); gsi_next (&si)) |
2694 | { |
2695 | if (is_gimple_debug (gsi_stmt (si))) |
2696 | continue; |
2697 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2698 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; |
2699 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p) |
2700 | { |
2701 | stmt_vec_info pattern_stmt_info |
2702 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; |
2703 | if (STMT_VINFO_SLP_VECT_ONLY (pattern_stmt_info)(pattern_stmt_info)->slp_vect_only_p) |
2704 | STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false; |
2705 | |
2706 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; |
2707 | STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect; |
2708 | for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq)); |
2709 | !gsi_end_p (pi); gsi_next (&pi)) |
2710 | STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type |
2711 | = loop_vect; |
2712 | } |
2713 | } |
2714 | } |
2715 | /* Free optimized alias test DDRS. */ |
2716 | LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0); |
2717 | LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release (); |
2718 | LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release (); |
2719 | /* Reset target cost data. */ |
2720 | destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data); |
2721 | LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data |
2722 | = init_cost (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); |
2723 | /* Reset accumulated rgroup information. */ |
2724 | release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks); |
2725 | release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens); |
2726 | /* Reset assorted flags. */ |
2727 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false; |
2728 | LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false; |
2729 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0; |
2730 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0; |
2731 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p |
2732 | = saved_can_use_partial_vectors_p; |
2733 | |
2734 | goto start_over; |
2735 | } |
2736 | |
2737 | /* Return true if vectorizing a loop using NEW_LOOP_VINFO appears |
2738 | to be better than vectorizing it using OLD_LOOP_VINFO. Assume that |
2739 | OLD_LOOP_VINFO is better unless something specifically indicates |
2740 | otherwise. |
2741 | |
2742 | Note that this deliberately isn't a partial order. */ |
2743 | |
2744 | static bool |
2745 | vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, |
2746 | loop_vec_info old_loop_vinfo) |
2747 | { |
2748 | struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop; |
2749 | gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2749, __FUNCTION__), 0 : 0)); |
2750 | |
2751 | poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor; |
2752 | poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor; |
2753 | |
2754 | /* Always prefer a VF of loop->simdlen over any other VF. */ |
2755 | if (loop->simdlen) |
2756 | { |
2757 | bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen)); |
2758 | bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen)); |
2759 | if (new_simdlen_p != old_simdlen_p) |
2760 | return new_simdlen_p; |
2761 | } |
2762 | |
2763 | /* Limit the VFs to what is likely to be the maximum number of iterations, |
2764 | to handle cases in which at least one loop_vinfo is fully-masked. */ |
2765 | HOST_WIDE_INTlong estimated_max_niter = likely_max_stmt_executions_int (loop); |
2766 | if (estimated_max_niter != -1) |
2767 | { |
2768 | if (known_le (estimated_max_niter, new_vf)(!maybe_lt (new_vf, estimated_max_niter))) |
2769 | new_vf = estimated_max_niter; |
2770 | if (known_le (estimated_max_niter, old_vf)(!maybe_lt (old_vf, estimated_max_niter))) |
2771 | old_vf = estimated_max_niter; |
2772 | } |
2773 | |
2774 | /* Check whether the (fractional) cost per scalar iteration is lower |
2775 | or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf. */ |
2776 | poly_int64 rel_new = new_loop_vinfo->vec_inside_cost * old_vf; |
2777 | poly_int64 rel_old = old_loop_vinfo->vec_inside_cost * new_vf; |
2778 | |
2779 | HOST_WIDE_INTlong est_rel_new_min |
2780 | = estimated_poly_value (rel_new, POLY_VALUE_MIN); |
2781 | HOST_WIDE_INTlong est_rel_new_max |
2782 | = estimated_poly_value (rel_new, POLY_VALUE_MAX); |
2783 | |
2784 | HOST_WIDE_INTlong est_rel_old_min |
2785 | = estimated_poly_value (rel_old, POLY_VALUE_MIN); |
2786 | HOST_WIDE_INTlong est_rel_old_max |
2787 | = estimated_poly_value (rel_old, POLY_VALUE_MAX); |
2788 | |
2789 | /* Check first if we can make out an unambigous total order from the minimum |
2790 | and maximum estimates. */ |
2791 | if (est_rel_new_min < est_rel_old_min |
2792 | && est_rel_new_max < est_rel_old_max) |
2793 | return true; |
2794 | else if (est_rel_old_min < est_rel_new_min |
2795 | && est_rel_old_max < est_rel_new_max) |
2796 | return false; |
2797 | /* When old_loop_vinfo uses a variable vectorization factor, |
2798 | we know that it has a lower cost for at least one runtime VF. |
2799 | However, we don't know how likely that VF is. |
2800 | |
2801 | One option would be to compare the costs for the estimated VFs. |
2802 | The problem is that that can put too much pressure on the cost |
2803 | model. E.g. if the estimated VF is also the lowest possible VF, |
2804 | and if old_loop_vinfo is 1 unit worse than new_loop_vinfo |
2805 | for the estimated VF, we'd then choose new_loop_vinfo even |
2806 | though (a) new_loop_vinfo might not actually be better than |
2807 | old_loop_vinfo for that VF and (b) it would be significantly |
2808 | worse at larger VFs. |
2809 | |
2810 | Here we go for a hacky compromise: pick new_loop_vinfo if it is |
2811 | no more expensive than old_loop_vinfo even after doubling the |
2812 | estimated old_loop_vinfo VF. For all but trivial loops, this |
2813 | ensures that we only pick new_loop_vinfo if it is significantly |
2814 | better than old_loop_vinfo at the estimated VF. */ |
2815 | |
2816 | if (est_rel_old_min != est_rel_new_min |
2817 | || est_rel_old_max != est_rel_new_max) |
2818 | { |
2819 | HOST_WIDE_INTlong est_rel_new_likely |
2820 | = estimated_poly_value (rel_new, POLY_VALUE_LIKELY); |
2821 | HOST_WIDE_INTlong est_rel_old_likely |
2822 | = estimated_poly_value (rel_old, POLY_VALUE_LIKELY); |
2823 | |
2824 | return est_rel_new_likely * 2 <= est_rel_old_likely; |
2825 | } |
2826 | |
2827 | /* If there's nothing to choose between the loop bodies, see whether |
2828 | there's a difference in the prologue and epilogue costs. */ |
2829 | if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost) |
2830 | return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost; |
2831 | |
2832 | return false; |
2833 | } |
2834 | |
2835 | /* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return |
2836 | true if we should. */ |
2837 | |
2838 | static bool |
2839 | vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, |
2840 | loop_vec_info old_loop_vinfo) |
2841 | { |
2842 | if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo)) |
2843 | return false; |
2844 | |
2845 | if (dump_enabled_p ()) |
2846 | dump_printf_loc (MSG_NOTE, vect_location, |
2847 | "***** Preferring vector mode %s to vector mode %s\n", |
2848 | GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode], |
2849 | GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]); |
2850 | return true; |
2851 | } |
2852 | |
2853 | /* If LOOP_VINFO is already a main loop, return it unmodified. Otherwise |
2854 | try to reanalyze it as a main loop. Return the loop_vinfo on success |
2855 | and null on failure. */ |
2856 | |
2857 | static loop_vec_info |
2858 | vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts) |
2859 | { |
2860 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) |
2861 | return loop_vinfo; |
2862 | |
2863 | if (dump_enabled_p ()) |
2864 | dump_printf_loc (MSG_NOTE, vect_location, |
2865 | "***** Reanalyzing as a main loop with vector mode %s\n", |
2866 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
2867 | |
2868 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
2869 | vec_info_shared *shared = loop_vinfo->shared; |
2870 | opt_loop_vec_info main_loop_vinfo = vect_analyze_loop_form (loop, shared); |
2871 | gcc_assert (main_loop_vinfo)((void)(!(main_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2871, __FUNCTION__), 0 : 0)); |
2872 | |
2873 | main_loop_vinfo->vector_mode = loop_vinfo->vector_mode; |
2874 | |
2875 | bool fatal = false; |
2876 | bool res = vect_analyze_loop_2 (main_loop_vinfo, fatal, n_stmts); |
2877 | loop->aux = NULLnullptr; |
2878 | if (!res) |
2879 | { |
2880 | if (dump_enabled_p ()) |
2881 | dump_printf_loc (MSG_NOTE, vect_location, |
2882 | "***** Failed to analyze main loop with vector" |
2883 | " mode %s\n", |
2884 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
2885 | delete main_loop_vinfo; |
2886 | return NULLnullptr; |
2887 | } |
2888 | LOOP_VINFO_VECTORIZABLE_P (main_loop_vinfo)(main_loop_vinfo)->vectorizable = 1; |
2889 | return main_loop_vinfo; |
2890 | } |
2891 | |
2892 | /* Function vect_analyze_loop. |
2893 | |
2894 | Apply a set of analyses on LOOP, and create a loop_vec_info struct |
2895 | for it. The different analyses will record information in the |
2896 | loop_vec_info struct. */ |
2897 | opt_loop_vec_info |
2898 | vect_analyze_loop (class loop *loop, vec_info_shared *shared) |
2899 | { |
2900 | auto_vector_modes vector_modes; |
2901 | |
2902 | /* Autodetect first vector size we try. */ |
2903 | unsigned int autovec_flags |
2904 | = targetm.vectorize.autovectorize_vector_modes (&vector_modes, |
2905 | loop->simdlen != 0); |
2906 | unsigned int mode_i = 0; |
2907 | |
2908 | DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location); |
2909 | |
2910 | if (loop_outer (loop) |
2911 | && loop_vec_info_for_loop (loop_outer (loop)) |
2912 | && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable) |
2913 | return opt_loop_vec_info::failure_at (vect_location, |
2914 | "outer-loop already vectorized.\n"); |
2915 | |
2916 | if (!find_loop_nest (loop, &shared->loop_nest)) |
2917 | return opt_loop_vec_info::failure_at |
2918 | (vect_location, |
2919 | "not vectorized: loop nest containing two or more consecutive inner" |
2920 | " loops cannot be vectorized\n"); |
2921 | |
2922 | unsigned n_stmts = 0; |
2923 | machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode); |
2924 | opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
2925 | machine_mode next_vector_mode = VOIDmode((void) 0, E_VOIDmode); |
2926 | poly_uint64 lowest_th = 0; |
2927 | unsigned vectorized_loops = 0; |
2928 | bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) |
2929 | && !unlimited_cost_model (loop)); |
2930 | |
2931 | bool vect_epilogues = false; |
2932 | opt_result res = opt_result::success (); |
2933 | unsigned HOST_WIDE_INTlong simdlen = loop->simdlen; |
2934 | while (1) |
2935 | { |
2936 | /* Check the CFG characteristics of the loop (nesting, entry/exit). */ |
2937 | opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared); |
2938 | if (!loop_vinfo) |
2939 | { |
2940 | if (dump_enabled_p ()) |
2941 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2942 | "bad loop form.\n"); |
2943 | gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2943, __FUNCTION__), 0 : 0)); |
2944 | return loop_vinfo; |
2945 | } |
2946 | loop_vinfo->vector_mode = next_vector_mode; |
2947 | |
2948 | bool fatal = false; |
2949 | |
2950 | /* When pick_lowest_cost_p is true, we should in principle iterate |
2951 | over all the loop_vec_infos that LOOP_VINFO could replace and |
2952 | try to vectorize LOOP_VINFO under the same conditions. |
2953 | E.g. when trying to replace an epilogue loop, we should vectorize |
2954 | LOOP_VINFO as an epilogue loop with the same VF limit. When trying |
2955 | to replace the main loop, we should vectorize LOOP_VINFO as a main |
2956 | loop too. |
2957 | |
2958 | However, autovectorize_vector_modes is usually sorted as follows: |
2959 | |
2960 | - Modes that naturally produce lower VFs usually follow modes that |
2961 | naturally produce higher VFs. |
2962 | |
2963 | - When modes naturally produce the same VF, maskable modes |
2964 | usually follow unmaskable ones, so that the maskable mode |
2965 | can be used to vectorize the epilogue of the unmaskable mode. |
2966 | |
2967 | This order is preferred because it leads to the maximum |
2968 | epilogue vectorization opportunities. Targets should only use |
2969 | a different order if they want to make wide modes available while |
2970 | disparaging them relative to earlier, smaller modes. The assumption |
2971 | in that case is that the wider modes are more expensive in some |
2972 | way that isn't reflected directly in the costs. |
2973 | |
2974 | There should therefore be few interesting cases in which |
2975 | LOOP_VINFO fails when treated as an epilogue loop, succeeds when |
2976 | treated as a standalone loop, and ends up being genuinely cheaper |
2977 | than FIRST_LOOP_VINFO. */ |
2978 | if (vect_epilogues) |
2979 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = first_loop_vinfo; |
2980 | |
2981 | res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); |
2982 | if (mode_i == 0) |
2983 | autodetected_vector_mode = loop_vinfo->vector_mode; |
2984 | if (dump_enabled_p ()) |
2985 | { |
2986 | if (res) |
2987 | dump_printf_loc (MSG_NOTE, vect_location, |
2988 | "***** Analysis succeeded with vector mode %s\n", |
2989 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
2990 | else |
2991 | dump_printf_loc (MSG_NOTE, vect_location, |
2992 | "***** Analysis failed with vector mode %s\n", |
2993 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
2994 | } |
2995 | |
2996 | loop->aux = NULLnullptr; |
2997 | |
2998 | if (!fatal) |
2999 | while (mode_i < vector_modes.length () |
3000 | && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i])) |
3001 | { |
3002 | if (dump_enabled_p ()) |
3003 | dump_printf_loc (MSG_NOTE, vect_location, |
3004 | "***** The result for vector mode %s would" |
3005 | " be the same\n", |
3006 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); |
3007 | mode_i += 1; |
3008 | } |
3009 | |
3010 | if (res) |
3011 | { |
3012 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1; |
3013 | vectorized_loops++; |
3014 | |
3015 | /* Once we hit the desired simdlen for the first time, |
3016 | discard any previous attempts. */ |
3017 | if (simdlen |
3018 | && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen))) |
3019 | { |
3020 | delete first_loop_vinfo; |
3021 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3022 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = NULLnullptr; |
3023 | simdlen = 0; |
3024 | } |
3025 | else if (pick_lowest_cost_p && first_loop_vinfo) |
3026 | { |
3027 | /* Keep trying to roll back vectorization attempts while the |
3028 | loop_vec_infos they produced were worse than this one. */ |
3029 | vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos; |
3030 | while (!vinfos.is_empty () |
3031 | && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) |
3032 | { |
3033 | gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3033, __FUNCTION__), 0 : 0)); |
3034 | delete vinfos.pop (); |
3035 | } |
3036 | if (vinfos.is_empty () |
3037 | && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo)) |
3038 | { |
3039 | loop_vec_info main_loop_vinfo |
3040 | = vect_reanalyze_as_main_loop (loop_vinfo, &n_stmts); |
3041 | if (main_loop_vinfo == loop_vinfo) |
3042 | { |
3043 | delete first_loop_vinfo; |
3044 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3045 | } |
3046 | else if (main_loop_vinfo |
3047 | && vect_joust_loop_vinfos (main_loop_vinfo, |
3048 | first_loop_vinfo)) |
3049 | { |
3050 | delete first_loop_vinfo; |
3051 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3052 | delete loop_vinfo; |
3053 | loop_vinfo |
3054 | = opt_loop_vec_info::success (main_loop_vinfo); |
3055 | } |
3056 | else |
3057 | delete main_loop_vinfo; |
3058 | } |
3059 | } |
3060 | |
3061 | if (first_loop_vinfo == NULLnullptr) |
3062 | { |
3063 | first_loop_vinfo = loop_vinfo; |
3064 | lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold; |
3065 | } |
3066 | else if (vect_epilogues |
3067 | /* For now only allow one epilogue loop. */ |
3068 | && first_loop_vinfo->epilogue_vinfos.is_empty ()) |
3069 | { |
3070 | first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); |
3071 | poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold; |
3072 | gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3073, __FUNCTION__), 0 : 0)) |
3073 | || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3073, __FUNCTION__), 0 : 0)); |
3074 | /* Keep track of the known smallest versioning |
3075 | threshold. */ |
3076 | if (ordered_p (lowest_th, th)) |
3077 | lowest_th = ordered_min (lowest_th, th); |
3078 | } |
3079 | else |
3080 | { |
3081 | delete loop_vinfo; |
3082 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3083 | } |
3084 | |
3085 | /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is |
3086 | enabled, SIMDUID is not set, it is the innermost loop and we have |
3087 | either already found the loop's SIMDLEN or there was no SIMDLEN to |
3088 | begin with. |
3089 | TODO: Enable epilogue vectorization for loops with SIMDUID set. */ |
3090 | vect_epilogues = (!simdlen |
3091 | && loop->inner == NULLnullptr |
3092 | && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask |
3093 | && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter |
3094 | && !loop->simduid |
3095 | /* For now only allow one epilogue loop, but allow |
3096 | pick_lowest_cost_p to replace it. */ |
3097 | && (first_loop_vinfo->epilogue_vinfos.is_empty () |
3098 | || pick_lowest_cost_p)); |
3099 | |
3100 | /* Commit to first_loop_vinfo if we have no reason to try |
3101 | alternatives. */ |
3102 | if (!simdlen && !vect_epilogues && !pick_lowest_cost_p) |
3103 | break; |
3104 | } |
3105 | else |
3106 | { |
3107 | delete loop_vinfo; |
3108 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); |
3109 | if (fatal) |
3110 | { |
3111 | gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3111, __FUNCTION__), 0 : 0)); |
3112 | break; |
3113 | } |
3114 | } |
3115 | |
3116 | /* Handle the case that the original loop can use partial |
3117 | vectorization, but want to only adopt it for the epilogue. |
3118 | The retry should be in the same mode as original. */ |
3119 | if (vect_epilogues |
3120 | && loop_vinfo |
3121 | && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p) |
3122 | { |
3123 | gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)((void)(!((loop_vinfo)->can_use_partial_vectors_p && !(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3124, __FUNCTION__), 0 : 0)) |
3124 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))((void)(!((loop_vinfo)->can_use_partial_vectors_p && !(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3124, __FUNCTION__), 0 : 0)); |
3125 | if (dump_enabled_p ()) |
3126 | dump_printf_loc (MSG_NOTE, vect_location, |
3127 | "***** Re-trying analysis with same vector mode" |
3128 | " %s for epilogue with partial vectors.\n", |
3129 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); |
3130 | continue; |
3131 | } |
3132 | |
3133 | if (mode_i < vector_modes.length () |
3134 | && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL || ((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UACCUM) |
3135 | && (related_vector_mode (vector_modes[mode_i], |
3136 | GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode))) |
3137 | == autodetected_vector_mode) |
3138 | && (related_vector_mode (autodetected_vector_mode, |
3139 | GET_MODE_INNER (vector_modes[mode_i])(mode_to_inner (vector_modes[mode_i]))) |
3140 | == vector_modes[mode_i])) |
3141 | { |
3142 | if (dump_enabled_p ()) |
3143 | dump_printf_loc (MSG_NOTE, vect_location, |
3144 | "***** Skipping vector mode %s, which would" |
3145 | " repeat the analysis for %s\n", |
3146 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]], |
3147 | GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]); |
3148 | mode_i += 1; |
3149 | } |
3150 | |
3151 | if (mode_i == vector_modes.length () |
3152 | || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode)) |
3153 | break; |
3154 | |
3155 | /* Try the next biggest vector size. */ |
3156 | next_vector_mode = vector_modes[mode_i++]; |
3157 | if (dump_enabled_p ()) |
3158 | dump_printf_loc (MSG_NOTE, vect_location, |
3159 | "***** Re-trying analysis with vector mode %s\n", |
3160 | GET_MODE_NAME (next_vector_mode)mode_name[next_vector_mode]); |
3161 | } |
3162 | |
3163 | if (first_loop_vinfo) |
3164 | { |
3165 | loop->aux = (loop_vec_info) first_loop_vinfo; |
3166 | if (dump_enabled_p ()) |
3167 | dump_printf_loc (MSG_NOTE, vect_location, |
3168 | "***** Choosing vector mode %s\n", |
3169 | GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]); |
3170 | LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th; |
3171 | return first_loop_vinfo; |
3172 | } |
3173 | |
3174 | return opt_loop_vec_info::propagate_failure (res); |
3175 | } |
3176 | |
3177 | /* Return true if there is an in-order reduction function for CODE, storing |
3178 | it in *REDUC_FN if so. */ |
3179 | |
3180 | static bool |
3181 | fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) |
3182 | { |
3183 | switch (code) |
3184 | { |
3185 | case PLUS_EXPR: |
3186 | *reduc_fn = IFN_FOLD_LEFT_PLUS; |
3187 | return true; |
3188 | |
3189 | default: |
3190 | return false; |
3191 | } |
3192 | } |
3193 | |
3194 | /* Function reduction_fn_for_scalar_code |
3195 | |
3196 | Input: |
3197 | CODE - tree_code of a reduction operations. |
3198 | |
3199 | Output: |
3200 | REDUC_FN - the corresponding internal function to be used to reduce the |
3201 | vector of partial results into a single scalar result, or IFN_LAST |
3202 | if the operation is a supported reduction operation, but does not have |
3203 | such an internal function. |
3204 | |
3205 | Return FALSE if CODE currently cannot be vectorized as reduction. */ |
3206 | |
3207 | static bool |
3208 | reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) |
3209 | { |
3210 | switch (code) |
3211 | { |
3212 | case MAX_EXPR: |
3213 | *reduc_fn = IFN_REDUC_MAX; |
3214 | return true; |
3215 | |
3216 | case MIN_EXPR: |
3217 | *reduc_fn = IFN_REDUC_MIN; |
3218 | return true; |
3219 | |
3220 | case PLUS_EXPR: |
3221 | *reduc_fn = IFN_REDUC_PLUS; |
3222 | return true; |
3223 | |
3224 | case BIT_AND_EXPR: |
3225 | *reduc_fn = IFN_REDUC_AND; |
3226 | return true; |
3227 | |
3228 | case BIT_IOR_EXPR: |
3229 | *reduc_fn = IFN_REDUC_IOR; |
3230 | return true; |
3231 | |
3232 | case BIT_XOR_EXPR: |
3233 | *reduc_fn = IFN_REDUC_XOR; |
3234 | return true; |
3235 | |
3236 | case MULT_EXPR: |
3237 | case MINUS_EXPR: |
3238 | *reduc_fn = IFN_LAST; |
3239 | return true; |
3240 | |
3241 | default: |
3242 | return false; |
3243 | } |
3244 | } |
3245 | |
3246 | /* If there is a neutral value X such that SLP reduction NODE would not |
3247 | be affected by the introduction of additional X elements, return that X, |
3248 | otherwise return null. CODE is the code of the reduction and VECTOR_TYPE |
3249 | is the vector type that would hold element X. REDUC_CHAIN is true if |
3250 | the SLP statements perform a single reduction, false if each statement |
3251 | performs an independent reduction. */ |
3252 | |
3253 | static tree |
3254 | neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, |
3255 | tree_code code, bool reduc_chain) |
3256 | { |
3257 | vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; |
3258 | stmt_vec_info stmt_vinfo = stmts[0]; |
3259 | tree scalar_type = TREE_TYPE (vector_type)((contains_struct_check ((vector_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3259, __FUNCTION__))->typed.type); |
3260 | class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; |
3261 | gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3261, __FUNCTION__), 0 : 0)); |
3262 | |
3263 | switch (code) |
3264 | { |
3265 | case WIDEN_SUM_EXPR: |
3266 | case DOT_PROD_EXPR: |
3267 | case SAD_EXPR: |
3268 | case PLUS_EXPR: |
3269 | case MINUS_EXPR: |
3270 | case BIT_IOR_EXPR: |
3271 | case BIT_XOR_EXPR: |
3272 | return build_zero_cst (scalar_type); |
3273 | |
3274 | case MULT_EXPR: |
3275 | return build_one_cst (scalar_type); |
3276 | |
3277 | case BIT_AND_EXPR: |
3278 | return build_all_ones_cst (scalar_type); |
3279 | |
3280 | case MAX_EXPR: |
3281 | case MIN_EXPR: |
3282 | /* For MIN/MAX the initial values are neutral. A reduction chain |
3283 | has only a single initial value, so that value is neutral for |
3284 | all statements. */ |
3285 | if (reduc_chain) |
3286 | return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge (loop))->dest_idx)) |
3287 | loop_preheader_edge (loop))gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge (loop))->dest_idx)); |
3288 | return NULL_TREE(tree) nullptr; |
3289 | |
3290 | default: |
3291 | return NULL_TREE(tree) nullptr; |
3292 | } |
3293 | } |
3294 | |
3295 | /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement |
3296 | STMT is printed with a message MSG. */ |
3297 | |
3298 | static void |
3299 | report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) |
3300 | { |
3301 | dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt); |
3302 | } |
3303 | |
3304 | /* Return true if we need an in-order reduction for operation CODE |
3305 | on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer |
3306 | overflow must wrap. */ |
3307 | |
3308 | bool |
3309 | needs_fold_left_reduction_p (tree type, tree_code code) |
3310 | { |
3311 | /* CHECKME: check for !flag_finite_math_only too? */ |
3312 | if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE)) |
3313 | switch (code) |
3314 | { |
3315 | case MIN_EXPR: |
3316 | case MAX_EXPR: |
3317 | return false; |
3318 | |
3319 | default: |
3320 | return !flag_associative_mathglobal_options.x_flag_associative_math; |
3321 | } |
3322 | |
3323 | if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || ( (enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (type)->base.code) == INTEGER_TYPE)) |
3324 | { |
3325 | if (!operation_no_trapping_overflow (type, code)) |
3326 | return true; |
3327 | return false; |
3328 | } |
3329 | |
3330 | if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE && ((tree_not_check4 ((type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3330, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE ), (ARRAY_TYPE)))->base.u.bits.saturating_flag))) |
3331 | return true; |
3332 | |
3333 | return false; |
3334 | } |
3335 | |
3336 | /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and |
3337 | has a handled computation expression. Store the main reduction |
3338 | operation in *CODE. */ |
3339 | |
3340 | static bool |
3341 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, |
3342 | tree loop_arg, enum tree_code *code, |
3343 | vec<std::pair<ssa_op_iter, use_operand_p> > &path) |
3344 | { |
3345 | auto_bitmap visited; |
3346 | tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
3347 | ssa_op_iter curri; |
3348 | use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01); |
3349 | while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg) |
3350 | curr = op_iter_next_use (&curri); |
3351 | curri.i = curri.numops; |
3352 | do |
3353 | { |
3354 | path.safe_push (std::make_pair (curri, curr)); |
3355 | tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr); |
3356 | if (use == lookfor) |
3357 | break; |
3358 | gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3358, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; |
3359 | if (gimple_nop_p (def) |
3360 | || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) |
3361 | { |
3362 | pop: |
3363 | do |
3364 | { |
3365 | std::pair<ssa_op_iter, use_operand_p> x = path.pop (); |
3366 | curri = x.first; |
3367 | curr = x.second; |
3368 | do |
3369 | curr = op_iter_next_use (&curri); |
3370 | /* Skip already visited or non-SSA operands (from iterating |
3371 | over PHI args). */ |
3372 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) |
3373 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME |
3374 | || ! bitmap_set_bit (visited, |
3375 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3376, __FUNCTION__, (SSA_NAME)))->base.u.version |
3376 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3376, __FUNCTION__, (SSA_NAME)))->base.u.version))); |
3377 | } |
3378 | while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ()); |
3379 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) |
3380 | break; |
3381 | } |
3382 | else |
3383 | { |
3384 | if (gimple_code (def) == GIMPLE_PHI) |
3385 | curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01); |
3386 | else |
3387 | curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01); |
3388 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) |
3389 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME |
3390 | || ! bitmap_set_bit (visited, |
3391 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3392, __FUNCTION__, (SSA_NAME)))->base.u.version |
3392 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3392, __FUNCTION__, (SSA_NAME)))->base.u.version))) |
3393 | curr = op_iter_next_use (&curri); |
3394 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) |
3395 | goto pop; |
3396 | } |
3397 | } |
3398 | while (1); |
3399 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3400 | { |
3401 | dump_printf_loc (MSG_NOTE, loc, "reduction path: "); |
3402 | unsigned i; |
3403 | std::pair<ssa_op_iter, use_operand_p> *x; |
3404 | FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i)) |
3405 | dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second)); |
3406 | dump_printf (MSG_NOTE, "\n"); |
3407 | } |
3408 | |
3409 | /* Check whether the reduction path detected is valid. */ |
3410 | bool fail = path.length () == 0; |
3411 | bool neg = false; |
3412 | int sign = -1; |
3413 | *code = ERROR_MARK; |
3414 | for (unsigned i = 1; i < path.length (); ++i) |
3415 | { |
3416 | gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; |
3417 | tree op = USE_FROM_PTR (path[i].second)get_use_from_ptr (path[i].second); |
3418 | if (! is_gimple_assign (use_stmt) |
3419 | /* The following make sure we can compute the operand index |
3420 | easily plus it mostly disallows chaining via COND_EXPR condition |
3421 | operands. */ |
3422 | || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use |
3423 | && (gimple_num_ops (use_stmt) <= 2 |
3424 | || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use) |
3425 | && (gimple_num_ops (use_stmt) <= 3 |
3426 | || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use))) |
3427 | { |
3428 | fail = true; |
3429 | break; |
3430 | } |
3431 | /* Check there's only a single stmt the op is used on. For the |
3432 | not value-changing tail and the last stmt allow out-of-loop uses. |
3433 | ??? We could relax this and handle arbitrary live stmts by |
3434 | forcing a scalar epilogue for example. */ |
3435 | imm_use_iterator imm_iter; |
3436 | gimple *op_use_stmt; |
3437 | unsigned cnt = 0; |
3438 | FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse ((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op) )), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter)); (void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter))) ) |
3439 | if (!is_gimple_debug (op_use_stmt) |
3440 | && (*code != ERROR_MARK |
3441 | || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) |
3442 | { |
3443 | /* We want to allow x + x but not x < 1 ? x : 2. */ |
3444 | if (is_gimple_assign (op_use_stmt) |
3445 | && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR) |
3446 | { |
3447 | use_operand_p use_p; |
3448 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p (&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (& (imm_iter)))) |
3449 | cnt++; |
3450 | } |
3451 | else |
3452 | cnt++; |
3453 | } |
3454 | if (cnt != 1) |
3455 | { |
3456 | fail = true; |
3457 | break; |
3458 | } |
3459 | tree_code use_code = gimple_assign_rhs_code (use_stmt); |
3460 | if (use_code == MINUS_EXPR) |
3461 | { |
3462 | use_code = PLUS_EXPR; |
3463 | /* Track whether we negate the reduction value each iteration. */ |
3464 | if (gimple_assign_rhs2 (use_stmt) == op) |
3465 | neg = ! neg; |
3466 | } |
3467 | if (CONVERT_EXPR_CODE_P (use_code)((use_code) == NOP_EXPR || (use_code) == CONVERT_EXPR) |
3468 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt))((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3468, __FUNCTION__))->typed.type), |
3469 | TREE_TYPE (gimple_assign_rhs1 (use_stmt))((contains_struct_check ((gimple_assign_rhs1 (use_stmt)), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3469, __FUNCTION__))->typed.type))) |
3470 | ; |
3471 | else if (*code == ERROR_MARK) |
3472 | { |
3473 | *code = use_code; |
3474 | sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3474, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3474, __FUNCTION__))->base.u.bits.unsigned_flag)); |
3475 | } |
3476 | else if (use_code != *code) |
3477 | { |
3478 | fail = true; |
3479 | break; |
3480 | } |
3481 | else if ((use_code == MIN_EXPR |
3482 | || use_code == MAX_EXPR) |
3483 | && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3483, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3483, __FUNCTION__))->base.u.bits.unsigned_flag))) |
3484 | { |
3485 | fail = true; |
3486 | break; |
3487 | } |
3488 | } |
3489 | return ! fail && ! neg && *code != ERROR_MARK; |
3490 | } |
3491 | |
3492 | bool |
3493 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, |
3494 | tree loop_arg, enum tree_code code) |
3495 | { |
3496 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; |
3497 | enum tree_code code_; |
3498 | return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) |
3499 | && code_ == code); |
3500 | } |
3501 | |
3502 | |
3503 | |
3504 | /* Function vect_is_simple_reduction |
3505 | |
3506 | (1) Detect a cross-iteration def-use cycle that represents a simple |
3507 | reduction computation. We look for the following pattern: |
3508 | |
3509 | loop_header: |
3510 | a1 = phi < a0, a2 > |
3511 | a3 = ... |
3512 | a2 = operation (a3, a1) |
3513 | |
3514 | or |
3515 | |
3516 | a3 = ... |
3517 | loop_header: |
3518 | a1 = phi < a0, a2 > |
3519 | a2 = operation (a3, a1) |
3520 | |
3521 | such that: |
3522 | 1. operation is commutative and associative and it is safe to |
3523 | change the order of the computation |
3524 | 2. no uses for a2 in the loop (a2 is used out of the loop) |
3525 | 3. no uses of a1 in the loop besides the reduction operation |
3526 | 4. no uses of a1 outside the loop. |
3527 | |
3528 | Conditions 1,4 are tested here. |
3529 | Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. |
3530 | |
3531 | (2) Detect a cross-iteration def-use cycle in nested loops, i.e., |
3532 | nested cycles. |
3533 | |
3534 | (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double |
3535 | reductions: |
3536 | |
3537 | a1 = phi < a0, a2 > |
3538 | inner loop (def of a3) |
3539 | a2 = phi < a3 > |
3540 | |
3541 | (4) Detect condition expressions, ie: |
3542 | for (int i = 0; i < N; i++) |
3543 | if (a[i] < val) |
3544 | ret_val = a[i]; |
3545 | |
3546 | */ |
3547 | |
3548 | static stmt_vec_info |
3549 | vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, |
3550 | bool *double_reduc, bool *reduc_chain_p) |
3551 | { |
3552 | gphi *phi = as_a <gphi *> (phi_info->stmt); |
3553 | gimple *phi_use_stmt = NULLnullptr; |
3554 | imm_use_iterator imm_iter; |
3555 | use_operand_p use_p; |
3556 | |
3557 | *double_reduc = false; |
3558 | *reduc_chain_p = false; |
3559 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION; |
3560 | |
3561 | tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); |
3562 | /* ??? If there are no uses of the PHI result the inner loop reduction |
3563 | won't be detected as possibly double-reduction by vectorizable_reduction |
3564 | because that tries to walk the PHI arg from the preheader edge which |
3565 | can be constant. See PR60382. */ |
3566 | if (has_zero_uses (phi_name)) |
3567 | return NULLnullptr; |
3568 | class loop *loop = (gimple_bb (phi))->loop_father; |
3569 | unsigned nphi_def_loop_uses = 0; |
3570 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) |
3571 | { |
3572 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; |
3573 | if (is_gimple_debug (use_stmt)) |
3574 | continue; |
3575 | |
3576 | if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |
3577 | { |
3578 | if (dump_enabled_p ()) |
3579 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3580 | "intermediate value used outside loop.\n"); |
3581 | |
3582 | return NULLnullptr; |
3583 | } |
3584 | |
3585 | nphi_def_loop_uses++; |
3586 | phi_use_stmt = use_stmt; |
3587 | } |
3588 | |
3589 | tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx )); |
3590 | if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME) |
3591 | { |
3592 | if (dump_enabled_p ()) |
3593 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3594 | "reduction: not ssa_name: %T\n", latch_def); |
3595 | return NULLnullptr; |
3596 | } |
3597 | |
3598 | stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def); |
3599 | if (!def_stmt_info |
3600 | || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) |
3601 | return NULLnullptr; |
3602 | |
3603 | bool nested_in_vect_loop |
3604 | = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop); |
3605 | unsigned nlatch_def_loop_uses = 0; |
3606 | auto_vec<gphi *, 3> lcphis; |
3607 | bool inner_loop_of_double_reduc = false; |
3608 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) |
3609 | { |
3610 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; |
3611 | if (is_gimple_debug (use_stmt)) |
3612 | continue; |
3613 | if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |
3614 | nlatch_def_loop_uses++; |
3615 | else |
3616 | { |
3617 | /* We can have more than one loop-closed PHI. */ |
3618 | lcphis.safe_push (as_a <gphi *> (use_stmt)); |
3619 | if (nested_in_vect_loop |
3620 | && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type |
3621 | == vect_double_reduction_def)) |
3622 | inner_loop_of_double_reduc = true; |
3623 | } |
3624 | } |
3625 | |
3626 | /* If we are vectorizing an inner reduction we are executing that |
3627 | in the original order only in case we are not dealing with a |
3628 | double reduction. */ |
3629 | if (nested_in_vect_loop && !inner_loop_of_double_reduc) |
3630 | { |
3631 | if (dump_enabled_p ()) |
3632 | report_vect_op (MSG_NOTE, def_stmt_info->stmt, |
3633 | "detected nested cycle: "); |
3634 | return def_stmt_info; |
3635 | } |
3636 | |
3637 | /* If this isn't a nested cycle or if the nested cycle reduction value |
3638 | is used ouside of the inner loop we cannot handle uses of the reduction |
3639 | value. */ |
3640 | if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) |
3641 | { |
3642 | if (dump_enabled_p ()) |
3643 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3644 | "reduction used in loop.\n"); |
3645 | return NULLnullptr; |
3646 | } |
3647 | |
3648 | /* If DEF_STMT is a phi node itself, we expect it to have a single argument |
3649 | defined in the inner loop. */ |
3650 | if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt)) |
3651 | { |
3652 | tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0)); |
3653 | if (gimple_phi_num_args (def_stmt) != 1 |
3654 | || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME) |
3655 | { |
3656 | if (dump_enabled_p ()) |
3657 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3658 | "unsupported phi node definition.\n"); |
3659 | |
3660 | return NULLnullptr; |
3661 | } |
3662 | |
3663 | gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3663, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; |
3664 | if (gimple_bb (def1) |
3665 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) |
3666 | && loop->inner |
3667 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) |
3668 | && is_gimple_assign (def1) |
3669 | && is_a <gphi *> (phi_use_stmt) |
3670 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) |
3671 | { |
3672 | if (dump_enabled_p ()) |
3673 | report_vect_op (MSG_NOTE, def_stmt, |
3674 | "detected double reduction: "); |
3675 | |
3676 | *double_reduc = true; |
3677 | return def_stmt_info; |
3678 | } |
3679 | |
3680 | return NULLnullptr; |
3681 | } |
3682 | |
3683 | /* Look for the expression computing latch_def from then loop PHI result. */ |
3684 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; |
3685 | enum tree_code code; |
3686 | if (check_reduction_path (vect_location, loop, phi, latch_def, &code, |
3687 | path)) |
3688 | { |
3689 | STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code; |
3690 | if (code == COND_EXPR && !nested_in_vect_loop) |
3691 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION; |
3692 | |
3693 | /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP |
3694 | reduction chain for which the additional restriction is that |
3695 | all operations in the chain are the same. */ |
3696 | auto_vec<stmt_vec_info, 8> reduc_chain; |
3697 | unsigned i; |
3698 | bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR; |
3699 | for (i = path.length () - 1; i >= 1; --i) |
3700 | { |
3701 | gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; |
3702 | stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); |
3703 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx |
3704 | = path[i].second->use - gimple_assign_rhs1_ptr (stmt); |
3705 | enum tree_code stmt_code = gimple_assign_rhs_code (stmt); |
3706 | bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)((stmt_code) == NOP_EXPR || (stmt_code) == CONVERT_EXPR) |
3707 | && (i == 1 || i == path.length () - 1)); |
3708 | if ((stmt_code != code && !leading_conversion) |
3709 | /* We can only handle the final value in epilogue |
3710 | generation for reduction chains. */ |
3711 | || (i != 1 && !has_single_use (gimple_assign_lhs (stmt)))) |
3712 | is_slp_reduc = false; |
3713 | /* For reduction chains we support a trailing/leading |
3714 | conversions. We do not store those in the actual chain. */ |
3715 | if (leading_conversion) |
3716 | continue; |
3717 | reduc_chain.safe_push (stmt_info); |
3718 | } |
3719 | if (is_slp_reduc && reduc_chain.length () > 1) |
3720 | { |
3721 | for (unsigned i = 0; i < reduc_chain.length () - 1; ++i) |
3722 | { |
3723 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3723, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element ) = reduc_chain[0]; |
3724 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3724, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element ) = reduc_chain[i+1]; |
3725 | } |
3726 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3726, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element ) = reduc_chain[0]; |
3727 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3727, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element ) = NULLnullptr; |
3728 | |
3729 | /* Save the chain for further analysis in SLP detection. */ |
3730 | LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]); |
3731 | REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3731, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size) = reduc_chain.length (); |
3732 | |
3733 | *reduc_chain_p = true; |
3734 | if (dump_enabled_p ()) |
3735 | dump_printf_loc (MSG_NOTE, vect_location, |
3736 | "reduction: detected reduction chain\n"); |
3737 | } |
3738 | else if (dump_enabled_p ()) |
3739 | dump_printf_loc (MSG_NOTE, vect_location, |
3740 | "reduction: detected reduction\n"); |
3741 | |
3742 | return def_stmt_info; |
3743 | } |
3744 | |
3745 | if (dump_enabled_p ()) |
3746 | dump_printf_loc (MSG_NOTE, vect_location, |
3747 | "reduction: unknown pattern\n"); |
3748 | |
3749 | return NULLnullptr; |
3750 | } |
3751 | |
3752 | /* Estimate the number of peeled epilogue iterations for LOOP_VINFO. |
3753 | PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations, |
3754 | or -1 if not known. */ |
3755 | |
3756 | static int |
3757 | vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue) |
3758 | { |
3759 | int assumed_vf = vect_vf_for_cost (loop_vinfo); |
3760 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) || peel_iters_prologue == -1) |
3761 | { |
3762 | if (dump_enabled_p ()) |
3763 | dump_printf_loc (MSG_NOTE, vect_location, |
3764 | "cost model: epilogue peel iters set to vf/2 " |
3765 | "because loop iterations are unknown .\n"); |
3766 | return assumed_vf / 2; |
3767 | } |
3768 | else |
3769 | { |
3770 | int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3770, __FUNCTION__)))); |
3771 | peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue )); |
3772 | int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; |
3773 | /* If we need to peel for gaps, but no peeling is required, we have to |
3774 | peel VF iterations. */ |
3775 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue) |
3776 | peel_iters_epilogue = assumed_vf; |
3777 | return peel_iters_epilogue; |
3778 | } |
3779 | } |
3780 | |
3781 | /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ |
3782 | int |
3783 | vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, |
3784 | int *peel_iters_epilogue, |
3785 | stmt_vector_for_cost *scalar_cost_vec, |
3786 | stmt_vector_for_cost *prologue_cost_vec, |
3787 | stmt_vector_for_cost *epilogue_cost_vec) |
3788 | { |
3789 | int retval = 0; |
3790 | |
3791 | *peel_iters_epilogue |
3792 | = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue); |
3793 | |
3794 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) |
3795 | { |
3796 | /* If peeled iterations are known but number of scalar loop |
3797 | iterations are unknown, count a taken branch per peeled loop. */ |
3798 | if (peel_iters_prologue > 0) |
3799 | retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, |
3800 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3801 | if (*peel_iters_epilogue > 0) |
3802 | retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, |
3803 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue); |
3804 | } |
3805 | |
3806 | stmt_info_for_cost *si; |
3807 | int j; |
3808 | if (peel_iters_prologue) |
3809 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) |
3810 | retval += record_stmt_cost (prologue_cost_vec, |
3811 | si->count * peel_iters_prologue, |
3812 | si->kind, si->stmt_info, si->misalign, |
3813 | vect_prologue); |
3814 | if (*peel_iters_epilogue) |
3815 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) |
3816 | retval += record_stmt_cost (epilogue_cost_vec, |
3817 | si->count * *peel_iters_epilogue, |
3818 | si->kind, si->stmt_info, si->misalign, |
3819 | vect_epilogue); |
3820 | |
3821 | return retval; |
3822 | } |
3823 | |
3824 | /* Function vect_estimate_min_profitable_iters |
3825 | |
3826 | Return the number of iterations required for the vector version of the |
3827 | loop to be profitable relative to the cost of the scalar version of the |
3828 | loop. |
3829 | |
3830 | *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold |
3831 | of iterations for vectorization. -1 value means loop vectorization |
3832 | is not profitable. This returned value may be used for dynamic |
3833 | profitability check. |
3834 | |
3835 | *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used |
3836 | for static check against estimated number of iterations. */ |
3837 | |
3838 | static void |
3839 | vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, |
3840 | int *ret_min_profitable_niters, |
3841 | int *ret_min_profitable_estimate) |
3842 | { |
3843 | int min_profitable_iters; |
3844 | int min_profitable_estimate; |
3845 | int peel_iters_prologue; |
3846 | int peel_iters_epilogue; |
3847 | unsigned vec_inside_cost = 0; |
3848 | int vec_outside_cost = 0; |
3849 | unsigned vec_prologue_cost = 0; |
3850 | unsigned vec_epilogue_cost = 0; |
3851 | int scalar_single_iter_cost = 0; |
3852 | int scalar_outside_cost = 0; |
3853 | int assumed_vf = vect_vf_for_cost (loop_vinfo); |
3854 | int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; |
3855 | void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data; |
3856 | |
3857 | /* Cost model disabled. */ |
3858 | if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) |
3859 | { |
3860 | if (dump_enabled_p ()) |
3861 | dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); |
3862 | *ret_min_profitable_niters = 0; |
3863 | *ret_min_profitable_estimate = 0; |
3864 | return; |
3865 | } |
3866 | |
3867 | /* Requires loop versioning tests to handle misalignment. */ |
3868 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0)) |
3869 | { |
3870 | /* FIXME: Make cost depend on complexity of individual check. */ |
3871 | unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length (); |
3872 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt, |
3873 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3874 | if (dump_enabled_p ()) |
3875 | dump_printf (MSG_NOTE, |
3876 | "cost model: Adding cost of checks for loop " |
3877 | "versioning to treat misalignment.\n"); |
3878 | } |
3879 | |
3880 | /* Requires loop versioning with alias checks. */ |
3881 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0)) |
3882 | { |
3883 | /* FIXME: Make cost depend on complexity of individual check. */ |
3884 | unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length (); |
3885 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt, |
3886 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3887 | len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length (); |
3888 | if (len) |
3889 | /* Count LEN - 1 ANDs and LEN comparisons. */ |
3890 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1, |
3891 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3892 | len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length (); |
3893 | if (len) |
3894 | { |
3895 | /* Count LEN - 1 ANDs and LEN comparisons. */ |
3896 | unsigned int nstmts = len * 2 - 1; |
3897 | /* +1 for each bias that needs adding. */ |
3898 | for (unsigned int i = 0; i < len; ++i) |
3899 | if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p) |
3900 | nstmts += 1; |
3901 | (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts, |
3902 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3903 | } |
3904 | if (dump_enabled_p ()) |
3905 | dump_printf (MSG_NOTE, |
3906 | "cost model: Adding cost of checks for loop " |
3907 | "versioning aliasing.\n"); |
3908 | } |
3909 | |
3910 | /* Requires loop versioning with niter checks. */ |
3911 | if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions)) |
3912 | { |
3913 | /* FIXME: Make cost depend on complexity of individual check. */ |
3914 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt, |
3915 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3916 | if (dump_enabled_p ()) |
3917 | dump_printf (MSG_NOTE, |
3918 | "cost model: Adding cost of checks for loop " |
3919 | "versioning niters.\n"); |
3920 | } |
3921 | |
3922 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
3923 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, |
3924 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
3925 | |
3926 | /* Count statements in scalar loop. Using this as scalar cost for a single |
3927 | iteration for now. |
3928 | |
3929 | TODO: Add outer loop support. |
3930 | |
3931 | TODO: Consider assigning different costs to different scalar |
3932 | statements. */ |
3933 | |
3934 | scalar_single_iter_cost |
3935 | = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost; |
3936 | |
3937 | /* Add additional cost for the peeled instructions in prologue and epilogue |
3938 | loop. (For fully-masked loops there will be no peeling.) |
3939 | |
3940 | FORNOW: If we don't know the value of peel_iters for prologue or epilogue |
3941 | at compile-time - we assume it's vf/2 (the worst would be vf-1). |
3942 | |
3943 | TODO: Build an expression that represents peel_iters for prologue and |
3944 | epilogue to be used in a run-time test. */ |
3945 | |
3946 | bool prologue_need_br_taken_cost = false; |
3947 | bool prologue_need_br_not_taken_cost = false; |
3948 | |
3949 | /* Calculate peel_iters_prologue. */ |
3950 | if (vect_use_loop_mask_for_alignment_p (loop_vinfo)) |
3951 | peel_iters_prologue = 0; |
3952 | else if (npeel < 0) |
3953 | { |
3954 | peel_iters_prologue = assumed_vf / 2; |
3955 | if (dump_enabled_p ()) |
3956 | dump_printf (MSG_NOTE, "cost model: " |
3957 | "prologue peel iters set to vf/2.\n"); |
3958 | |
3959 | /* If peeled iterations are unknown, count a taken branch and a not taken |
3960 | branch per peeled loop. Even if scalar loop iterations are known, |
3961 | vector iterations are not known since peeled prologue iterations are |
3962 | not known. Hence guards remain the same. */ |
3963 | prologue_need_br_taken_cost = true; |
3964 | prologue_need_br_not_taken_cost = true; |
3965 | } |
3966 | else |
3967 | { |
3968 | peel_iters_prologue = npeel; |
3969 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_prologue > 0) |
3970 | /* If peeled iterations are known but number of scalar loop |
3971 | iterations are unknown, count a taken branch per peeled loop. */ |
3972 | prologue_need_br_taken_cost = true; |
3973 | } |
3974 | |
3975 | bool epilogue_need_br_taken_cost = false; |
3976 | bool epilogue_need_br_not_taken_cost = false; |
3977 | |
3978 | /* Calculate peel_iters_epilogue. */ |
3979 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
3980 | /* We need to peel exactly one iteration for gaps. */ |
3981 | peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0; |
3982 | else if (npeel < 0) |
3983 | { |
3984 | /* If peeling for alignment is unknown, loop bound of main loop |
3985 | becomes unknown. */ |
3986 | peel_iters_epilogue = assumed_vf / 2; |
3987 | if (dump_enabled_p ()) |
3988 | dump_printf (MSG_NOTE, "cost model: " |
3989 | "epilogue peel iters set to vf/2 because " |
3990 | "peeling for alignment is unknown.\n"); |
3991 | |
3992 | /* See the same reason above in peel_iters_prologue calculation. */ |
3993 | epilogue_need_br_taken_cost = true; |
3994 | epilogue_need_br_not_taken_cost = true; |
3995 | } |
3996 | else |
3997 | { |
3998 | peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel); |
3999 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_epilogue > 0) |
4000 | /* If peeled iterations are known but number of scalar loop |
4001 | iterations are unknown, count a taken branch per peeled loop. */ |
4002 | epilogue_need_br_taken_cost = true; |
4003 | } |
4004 | |
4005 | stmt_info_for_cost *si; |
4006 | int j; |
4007 | /* Add costs associated with peel_iters_prologue. */ |
4008 | if (peel_iters_prologue) |
4009 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
4010 | { |
4011 | (void) add_stmt_cost (loop_vinfo, target_cost_data, |
4012 | si->count * peel_iters_prologue, si->kind, |
4013 | si->stmt_info, si->vectype, si->misalign, |
4014 | vect_prologue); |
4015 | } |
4016 | |
4017 | /* Add costs associated with peel_iters_epilogue. */ |
4018 | if (peel_iters_epilogue) |
4019 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) |
4020 | { |
4021 | (void) add_stmt_cost (loop_vinfo, target_cost_data, |
4022 | si->count * peel_iters_epilogue, si->kind, |
4023 | si->stmt_info, si->vectype, si->misalign, |
4024 | vect_epilogue); |
4025 | } |
4026 | |
4027 | /* Add possible cond_branch_taken/cond_branch_not_taken cost. */ |
4028 | |
4029 | if (prologue_need_br_taken_cost) |
4030 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, |
4031 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
4032 | |
4033 | if (prologue_need_br_not_taken_cost) |
4034 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, |
4035 | cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0, |
4036 | vect_prologue); |
4037 | |
4038 | if (epilogue_need_br_taken_cost) |
4039 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, |
4040 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue); |
4041 | |
4042 | if (epilogue_need_br_not_taken_cost) |
4043 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, |
4044 | cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0, |
4045 | vect_epilogue); |
4046 | |
4047 | /* Take care of special costs for rgroup controls of partial vectors. */ |
4048 | if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->masks.is_empty ())) |
4049 | { |
4050 | /* Calculate how many masks we need to generate. */ |
4051 | unsigned int num_masks = 0; |
4052 | rgroup_controls *rgm; |
4053 | unsigned int num_vectors_m1; |
4054 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1 ), &(rgm)); ++(num_vectors_m1)) |
4055 | if (rgm->type) |
4056 | num_masks += num_vectors_m1 + 1; |
4057 | gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4057, __FUNCTION__), 0 : 0)); |
4058 | |
4059 | /* In the worst case, we need to generate each mask in the prologue |
4060 | and in the loop body. One of the loop body mask instructions |
4061 | replaces the comparison in the scalar loop, and since we don't |
4062 | count the scalar comparison against the scalar body, we shouldn't |
4063 | count that vector instruction against the vector body either. |
4064 | |
4065 | Sometimes we can use unpacks instead of generating prologue |
4066 | masks and sometimes the prologue mask will fold to a constant, |
4067 | so the actual prologue cost might be smaller. However, it's |
4068 | simpler and safer to use the worst-case cost; if this ends up |
4069 | being the tie-breaker between vectorizing or not, then it's |
4070 | probably better not to vectorize. */ |
4071 | (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks, |
4072 | vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
4073 | (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1, |
4074 | vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body); |
4075 | } |
4076 | else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->lens.is_empty ())) |
4077 | { |
4078 | /* Referring to the functions vect_set_loop_condition_partial_vectors |
4079 | and vect_set_loop_controls_directly, we need to generate each |
4080 | length in the prologue and in the loop body if required. Although |
4081 | there are some possible optimizations, we consider the worst case |
4082 | here. */ |
4083 | |
4084 | bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0); |
4085 | bool need_iterate_p |
4086 | = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) |
4087 | && !vect_known_niters_smaller_than_vf (loop_vinfo)); |
4088 | |
4089 | /* Calculate how many statements to be added. */ |
4090 | unsigned int prologue_stmts = 0; |
4091 | unsigned int body_stmts = 0; |
4092 | |
4093 | rgroup_controls *rgc; |
4094 | unsigned int num_vectors_m1; |
4095 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1 ), &(rgc)); ++(num_vectors_m1)) |
4096 | if (rgc->type) |
4097 | { |
4098 | /* May need one SHIFT for nitems_total computation. */ |
4099 | unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor; |
4100 | if (nitems != 1 && !niters_known_p) |
4101 | prologue_stmts += 1; |
4102 | |
4103 | /* May need one MAX and one MINUS for wrap around. */ |
4104 | if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc)) |
4105 | prologue_stmts += 2; |
4106 | |
4107 | /* Need one MAX and one MINUS for each batch limit excepting for |
4108 | the 1st one. */ |
4109 | prologue_stmts += num_vectors_m1 * 2; |
4110 | |
4111 | unsigned int num_vectors = num_vectors_m1 + 1; |
4112 | |
4113 | /* Need to set up lengths in prologue, only one MIN required |
4114 | for each since start index is zero. */ |
4115 | prologue_stmts += num_vectors; |
4116 | |
4117 | /* Each may need two MINs and one MINUS to update lengths in body |
4118 | for next iteration. */ |
4119 | if (need_iterate_p) |
4120 | body_stmts += 3 * num_vectors; |
4121 | } |
4122 | |
4123 | (void) add_stmt_cost (loop_vinfo, target_cost_data, prologue_stmts, |
4124 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); |
4125 | (void) add_stmt_cost (loop_vinfo, target_cost_data, body_stmts, |
4126 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body); |
4127 | } |
4128 | |
4129 | /* FORNOW: The scalar outside cost is incremented in one of the |
4130 | following ways: |
4131 | |
4132 | 1. The vectorizer checks for alignment and aliasing and generates |
4133 | a condition that allows dynamic vectorization. A cost model |
4134 | check is ANDED with the versioning condition. Hence scalar code |
4135 | path now has the added cost of the versioning check. |
4136 | |
4137 | if (cost > th & versioning_check) |
4138 | jmp to vector code |
4139 | |
4140 | Hence run-time scalar is incremented by not-taken branch cost. |
4141 | |
4142 | 2. The vectorizer then checks if a prologue is required. If the |
4143 | cost model check was not done before during versioning, it has to |
4144 | be done before the prologue check. |
4145 | |
4146 | if (cost <= th) |
4147 | prologue = scalar_iters |
4148 | if (prologue == 0) |
4149 | jmp to vector code |
4150 | else |
4151 | execute prologue |
4152 | if (prologue == num_iters) |
4153 | go to exit |
4154 | |
4155 | Hence the run-time scalar cost is incremented by a taken branch, |
4156 | plus a not-taken branch, plus a taken branch cost. |
4157 | |
4158 | 3. The vectorizer then checks if an epilogue is required. If the |
4159 | cost model check was not done before during prologue check, it |
4160 | has to be done with the epilogue check. |
4161 | |
4162 | if (prologue == 0) |
4163 | jmp to vector code |
4164 | else |
4165 | execute prologue |
4166 | if (prologue == num_iters) |
4167 | go to exit |
4168 | vector code: |
4169 | if ((cost <= th) | (scalar_iters-prologue-epilogue == 0)) |
4170 | jmp to epilogue |
4171 | |
4172 | Hence the run-time scalar cost should be incremented by 2 taken |
4173 | branches. |
4174 | |
4175 | TODO: The back end may reorder the BBS's differently and reverse |
4176 | conditions/branch directions. Change the estimates below to |
4177 | something more reasonable. */ |
4178 | |
4179 | /* If the number of iterations is known and we do not do versioning, we can |
4180 | decide whether to vectorize at compile time. Hence the scalar version |
4181 | do not carry cost model guard costs. */ |
4182 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) |
4183 | || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
4184 | { |
4185 | /* Cost model check occurs at versioning. */ |
4186 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) |
4187 | scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken); |
4188 | else |
4189 | { |
4190 | /* Cost model check occurs at prologue generation. */ |
4191 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) |
4192 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) |
4193 | + vect_get_stmt_cost (cond_branch_not_taken); |
4194 | /* Cost model check occurs at epilogue generation. */ |
4195 | else |
4196 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken); |
4197 | } |
4198 | } |
4199 | |
4200 | /* Complete the target-specific cost calculations. */ |
4201 | finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data, &vec_prologue_cost, |
4202 | &vec_inside_cost, &vec_epilogue_cost); |
4203 | |
4204 | vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost); |
4205 | |
4206 | /* Stash the costs so that we can compare two loop_vec_infos. */ |
4207 | loop_vinfo->vec_inside_cost = vec_inside_cost; |
4208 | loop_vinfo->vec_outside_cost = vec_outside_cost; |
4209 | |
4210 | if (dump_enabled_p ()) |
4211 | { |
4212 | dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); |
4213 | dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", |
4214 | vec_inside_cost); |
4215 | dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", |
4216 | vec_prologue_cost); |
4217 | dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", |
4218 | vec_epilogue_cost); |
4219 | dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", |
4220 | scalar_single_iter_cost); |
4221 | dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", |
4222 | scalar_outside_cost); |
4223 | dump_printf (MSG_NOTE, " Vector outside cost: %d\n", |
4224 | vec_outside_cost); |
4225 | dump_printf (MSG_NOTE, " prologue iterations: %d\n", |
4226 | peel_iters_prologue); |
4227 | dump_printf (MSG_NOTE, " epilogue iterations: %d\n", |
4228 | peel_iters_epilogue); |
4229 | } |
4230 | |
4231 | /* Calculate number of iterations required to make the vector version |
4232 | profitable, relative to the loop bodies only. The following condition |
4233 | must hold true: |
4234 | SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC |
4235 | where |
4236 | SIC = scalar iteration cost, VIC = vector iteration cost, |
4237 | VOC = vector outside cost, VF = vectorization factor, |
4238 | NPEEL = prologue iterations + epilogue iterations, |
4239 | SOC = scalar outside cost for run time cost model check. */ |
4240 | |
4241 | int saving_per_viter = (scalar_single_iter_cost * assumed_vf |
4242 | - vec_inside_cost); |
4243 | if (saving_per_viter <= 0) |
4244 | { |
4245 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize) |
4246 | warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd, |
4247 | "vectorization did not happen for a simd loop"); |
4248 | |
4249 | if (dump_enabled_p ()) |
4250 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
4251 | "cost model: the vector iteration cost = %d " |
4252 | "divided by the scalar iteration cost = %d " |
4253 | "is greater or equal to the vectorization factor = %d" |
4254 | ".\n", |
4255 | vec_inside_cost, scalar_single_iter_cost, assumed_vf); |
4256 | *ret_min_profitable_niters = -1; |
4257 | *ret_min_profitable_estimate = -1; |
4258 | return; |
4259 | } |
4260 | |
4261 | /* ??? The "if" arm is written to handle all cases; see below for what |
4262 | we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ |
4263 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4264 | { |
4265 | /* Rewriting the condition above in terms of the number of |
4266 | vector iterations (vniters) rather than the number of |
4267 | scalar iterations (niters) gives: |
4268 | |
4269 | SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC |
4270 | |
4271 | <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC |
4272 | |
4273 | For integer N, X and Y when X > 0: |
4274 | |
4275 | N * X > Y <==> N >= (Y /[floor] X) + 1. */ |
4276 | int outside_overhead = (vec_outside_cost |
4277 | - scalar_single_iter_cost * peel_iters_prologue |
4278 | - scalar_single_iter_cost * peel_iters_epilogue |
4279 | - scalar_outside_cost); |
4280 | /* We're only interested in cases that require at least one |
4281 | vector iteration. */ |
4282 | int min_vec_niters = 1; |
4283 | if (outside_overhead > 0) |
4284 | min_vec_niters = outside_overhead / saving_per_viter + 1; |
4285 | |
4286 | if (dump_enabled_p ()) |
4287 | dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n", |
4288 | min_vec_niters); |
4289 | |
4290 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4291 | { |
4292 | /* Now that we know the minimum number of vector iterations, |
4293 | find the minimum niters for which the scalar cost is larger: |
4294 | |
4295 | SIC * niters > VIC * vniters + VOC - SOC |
4296 | |
4297 | We know that the minimum niters is no more than |
4298 | vniters * VF + NPEEL, but it might be (and often is) less |
4299 | than that if a partial vector iteration is cheaper than the |
4300 | equivalent scalar code. */ |
4301 | int threshold = (vec_inside_cost * min_vec_niters |
4302 | + vec_outside_cost |
4303 | - scalar_outside_cost); |
4304 | if (threshold <= 0) |
4305 | min_profitable_iters = 1; |
4306 | else |
4307 | min_profitable_iters = threshold / scalar_single_iter_cost + 1; |
4308 | } |
4309 | else |
4310 | /* Convert the number of vector iterations into a number of |
4311 | scalar iterations. */ |
4312 | min_profitable_iters = (min_vec_niters * assumed_vf |
4313 | + peel_iters_prologue |
4314 | + peel_iters_epilogue); |
4315 | } |
4316 | else |
4317 | { |
4318 | min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) |
4319 | * assumed_vf |
4320 | - vec_inside_cost * peel_iters_prologue |
4321 | - vec_inside_cost * peel_iters_epilogue); |
4322 | if (min_profitable_iters <= 0) |
4323 | min_profitable_iters = 0; |
4324 | else |
4325 | { |
4326 | min_profitable_iters /= saving_per_viter; |
4327 | |
4328 | if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters) |
4329 | <= (((int) vec_inside_cost * min_profitable_iters) |
4330 | + (((int) vec_outside_cost - scalar_outside_cost) |
4331 | * assumed_vf))) |
4332 | min_profitable_iters++; |
4333 | } |
4334 | } |
4335 | |
4336 | if (dump_enabled_p ()) |
4337 | dump_printf (MSG_NOTE, |
4338 | " Calculated minimum iters for profitability: %d\n", |
4339 | min_profitable_iters); |
4340 | |
4341 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p |
4342 | && min_profitable_iters < (assumed_vf + peel_iters_prologue)) |
4343 | /* We want the vectorized loop to execute at least once. */ |
4344 | min_profitable_iters = assumed_vf + peel_iters_prologue; |
4345 | else if (min_profitable_iters < peel_iters_prologue) |
4346 | /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the |
4347 | vectorized loop executes at least once. */ |
4348 | min_profitable_iters = peel_iters_prologue; |
4349 | |
4350 | if (dump_enabled_p ()) |
4351 | dump_printf_loc (MSG_NOTE, vect_location, |
4352 | " Runtime profitability threshold = %d\n", |
4353 | min_profitable_iters); |
4354 | |
4355 | *ret_min_profitable_niters = min_profitable_iters; |
4356 | |
4357 | /* Calculate number of iterations required to make the vector version |
4358 | profitable, relative to the loop bodies only. |
4359 | |
4360 | Non-vectorized variant is SIC * niters and it must win over vector |
4361 | variant on the expected loop trip count. The following condition must hold true: |
4362 | SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */ |
4363 | |
4364 | if (vec_outside_cost <= 0) |
4365 | min_profitable_estimate = 0; |
4366 | /* ??? This "else if" arm is written to handle all cases; see below for |
4367 | what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ |
4368 | else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4369 | { |
4370 | /* This is a repeat of the code above, but with + SOC rather |
4371 | than - SOC. */ |
4372 | int outside_overhead = (vec_outside_cost |
4373 | - scalar_single_iter_cost * peel_iters_prologue |
4374 | - scalar_single_iter_cost * peel_iters_epilogue |
4375 | + scalar_outside_cost); |
4376 | int min_vec_niters = 1; |
4377 | if (outside_overhead > 0) |
4378 | min_vec_niters = outside_overhead / saving_per_viter + 1; |
4379 | |
4380 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) |
4381 | { |
4382 | int threshold = (vec_inside_cost * min_vec_niters |
4383 | + vec_outside_cost |
4384 | + scalar_outside_cost); |
4385 | min_profitable_estimate = threshold / scalar_single_iter_cost + 1; |
4386 | } |
4387 | else |
4388 | min_profitable_estimate = (min_vec_niters * assumed_vf |
4389 | + peel_iters_prologue |
4390 | + peel_iters_epilogue); |
4391 | } |
4392 | else |
4393 | { |
4394 | min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) |
4395 | * assumed_vf |
4396 | - vec_inside_cost * peel_iters_prologue |
4397 | - vec_inside_cost * peel_iters_epilogue) |
4398 | / ((scalar_single_iter_cost * assumed_vf) |
4399 | - vec_inside_cost); |
4400 | } |
4401 | min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate ) : (min_profitable_iters)); |
4402 | if (dump_enabled_p ()) |
4403 | dump_printf_loc (MSG_NOTE, vect_location, |
4404 | " Static estimate profitability threshold = %d\n", |
4405 | min_profitable_estimate); |
4406 | |
4407 | *ret_min_profitable_estimate = min_profitable_estimate; |
4408 | } |
4409 | |
4410 | /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET |
4411 | vector elements (not bits) for a vector with NELT elements. */ |
4412 | static void |
4413 | calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, |
4414 | vec_perm_builder *sel) |
4415 | { |
4416 | /* The encoding is a single stepped pattern. Any wrap-around is handled |
4417 | by vec_perm_indices. */ |
4418 | sel->new_vector (nelt, 1, 3); |
4419 | for (unsigned int i = 0; i < 3; i++) |
4420 | sel->quick_push (i + offset); |
4421 | } |
4422 | |
4423 | /* Checks whether the target supports whole-vector shifts for vectors of mode |
4424 | MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ |
4425 | it supports vec_perm_const with masks for all necessary shift amounts. */ |
4426 | static bool |
4427 | have_whole_vector_shift (machine_mode mode) |
4428 | { |
4429 | if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) |
4430 | return true; |
4431 | |
4432 | /* Variable-length vectors should be handled via the optab. */ |
4433 | unsigned int nelt; |
4434 | if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) |
4435 | return false; |
4436 | |
4437 | vec_perm_builder sel; |
4438 | vec_perm_indices indices; |
4439 | for (unsigned int i = nelt / 2; i >= 1; i /= 2) |
4440 | { |
4441 | calc_vec_perm_mask_for_shift (i, nelt, &sel); |
4442 | indices.new_vector (sel, 2, nelt); |
4443 | if (!can_vec_perm_const_p (mode, indices, false)) |
4444 | return false; |
4445 | } |
4446 | return true; |
4447 | } |
4448 | |
4449 | /* TODO: Close dependency between vect_model_*_cost and vectorizable_* |
4450 | functions. Design better to avoid maintenance issues. */ |
4451 | |
4452 | /* Function vect_model_reduction_cost. |
4453 | |
4454 | Models cost for a reduction operation, including the vector ops |
4455 | generated within the strip-mine loop in some cases, the initial |
4456 | definition before the loop, and the epilogue code that must be generated. */ |
4457 | |
4458 | static void |
4459 | vect_model_reduction_cost (loop_vec_info loop_vinfo, |
4460 | stmt_vec_info stmt_info, internal_fn reduc_fn, |
4461 | vect_reduction_type reduction_type, |
4462 | int ncopies, stmt_vector_for_cost *cost_vec) |
4463 | { |
4464 | int prologue_cost = 0, epilogue_cost = 0, inside_cost; |
4465 | enum tree_code code; |
4466 | optab optab; |
4467 | tree vectype; |
4468 | machine_mode mode; |
4469 | class loop *loop = NULLnullptr; |
4470 | |
4471 | if (loop_vinfo) |
4472 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
4473 | |
4474 | /* Condition reductions generate two reductions in the loop. */ |
4475 | if (reduction_type == COND_REDUCTION) |
4476 | ncopies *= 2; |
4477 | |
4478 | vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; |
4479 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4479, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); |
4480 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
4481 | |
4482 | code = gimple_assign_rhs_code (orig_stmt_info->stmt); |
4483 | |
4484 | if (reduction_type == EXTRACT_LAST_REDUCTION) |
4485 | /* No extra instructions are needed in the prologue. The loop body |
4486 | operations are costed in vectorizable_condition. */ |
4487 | inside_cost = 0; |
4488 | else if (reduction_type == FOLD_LEFT_REDUCTION) |
4489 | { |
4490 | /* No extra instructions needed in the prologue. */ |
4491 | prologue_cost = 0; |
4492 | |
4493 | if (reduc_fn != IFN_LAST) |
4494 | /* Count one reduction-like operation per vector. */ |
4495 | inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, |
4496 | stmt_info, 0, vect_body); |
4497 | else |
4498 | { |
4499 | /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ |
4500 | unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); |
4501 | inside_cost = record_stmt_cost (cost_vec, nelements, |
4502 | vec_to_scalar, stmt_info, 0, |
4503 | vect_body); |
4504 | inside_cost += record_stmt_cost (cost_vec, nelements, |
4505 | scalar_stmt, stmt_info, 0, |
4506 | vect_body); |
4507 | } |
4508 | } |
4509 | else |
4510 | { |
4511 | /* Add in cost for initial definition. |
4512 | For cond reduction we have four vectors: initial index, step, |
4513 | initial result of the data reduction, initial value of the index |
4514 | reduction. */ |
4515 | int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1; |
4516 | prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, |
4517 | scalar_to_vec, stmt_info, 0, |
4518 | vect_prologue); |
4519 | } |
4520 | |
4521 | /* Determine cost of epilogue code. |
4522 | |
4523 | We have a reduction operator that will reduce the vector in one statement. |
4524 | Also requires scalar extract. */ |
4525 | |
4526 | if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info)) |
4527 | { |
4528 | if (reduc_fn != IFN_LAST) |
4529 | { |
4530 | if (reduction_type == COND_REDUCTION) |
4531 | { |
4532 | /* An EQ stmt and an COND_EXPR stmt. */ |
4533 | epilogue_cost += record_stmt_cost (cost_vec, 2, |
4534 | vector_stmt, stmt_info, 0, |
4535 | vect_epilogue); |
4536 | /* Reduction of the max index and a reduction of the found |
4537 | values. */ |
4538 | epilogue_cost += record_stmt_cost (cost_vec, 2, |
4539 | vec_to_scalar, stmt_info, 0, |
4540 | vect_epilogue); |
4541 | /* A broadcast of the max value. */ |
4542 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4543 | scalar_to_vec, stmt_info, 0, |
4544 | vect_epilogue); |
4545 | } |
4546 | else |
4547 | { |
4548 | epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, |
4549 | stmt_info, 0, vect_epilogue); |
4550 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4551 | vec_to_scalar, stmt_info, 0, |
4552 | vect_epilogue); |
4553 | } |
4554 | } |
4555 | else if (reduction_type == COND_REDUCTION) |
4556 | { |
4557 | unsigned estimated_nunits = vect_nunits_for_cost (vectype); |
4558 | /* Extraction of scalar elements. */ |
4559 | epilogue_cost += record_stmt_cost (cost_vec, |
4560 | 2 * estimated_nunits, |
4561 | vec_to_scalar, stmt_info, 0, |
4562 | vect_epilogue); |
4563 | /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ |
4564 | epilogue_cost += record_stmt_cost (cost_vec, |
4565 | 2 * estimated_nunits - 3, |
4566 | scalar_stmt, stmt_info, 0, |
4567 | vect_epilogue); |
4568 | } |
4569 | else if (reduction_type == EXTRACT_LAST_REDUCTION |
4570 | || reduction_type == FOLD_LEFT_REDUCTION) |
4571 | /* No extra instructions need in the epilogue. */ |
4572 | ; |
4573 | else |
4574 | { |
4575 | int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4575, __FUNCTION__))->type_common.size)); |
4576 | tree bitsize = |
4577 | TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt)))((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (orig_stmt_info->stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4577, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4577, __FUNCTION__))->type_common.size); |
4578 | int element_bitsize = tree_to_uhwi (bitsize); |
4579 | int nelements = vec_size_in_bits / element_bitsize; |
4580 | |
4581 | if (code == COND_EXPR) |
4582 | code = MAX_EXPR; |
4583 | |
4584 | optab = optab_for_tree_code (code, vectype, optab_default); |
4585 | |
4586 | /* We have a whole vector shift available. */ |
4587 | if (optab != unknown_optab |
4588 | && VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || ( (enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM) |
4589 | && optab_handler (optab, mode) != CODE_FOR_nothing |
4590 | && have_whole_vector_shift (mode)) |
4591 | { |
4592 | /* Final reduction via vector shifts and the reduction operator. |
4593 | Also requires scalar extract. */ |
4594 | epilogue_cost += record_stmt_cost (cost_vec, |
4595 | exact_log2 (nelements) * 2, |
4596 | vector_stmt, stmt_info, 0, |
4597 | vect_epilogue); |
4598 | epilogue_cost += record_stmt_cost (cost_vec, 1, |
4599 | vec_to_scalar, stmt_info, 0, |
4600 | vect_epilogue); |
4601 | } |
4602 | else |
4603 | /* Use extracts and reduction op for final reduction. For N |
4604 | elements, we have N extracts and N-1 reduction ops. */ |
4605 | epilogue_cost += record_stmt_cost (cost_vec, |
4606 | nelements + nelements - 1, |
4607 | vector_stmt, stmt_info, 0, |
4608 | vect_epilogue); |
4609 | } |
4610 | } |
4611 | |
4612 | if (dump_enabled_p ()) |
4613 | dump_printf (MSG_NOTE, |
4614 | "vect_model_reduction_cost: inside_cost = %d, " |
4615 | "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost, |
4616 | prologue_cost, epilogue_cost); |
4617 | } |
4618 | |
4619 | |
4620 | |
4621 | /* Function get_initial_def_for_reduction |
4622 | |
4623 | Input: |
4624 | STMT_VINFO - a stmt that performs a reduction operation in the loop. |
4625 | INIT_VAL - the initial value of the reduction variable |
4626 | |
4627 | Output: |
4628 | ADJUSTMENT_DEF - a tree that holds a value to be added to the final result |
4629 | of the reduction (used for adjusting the epilog - see below). |
4630 | Return a vector variable, initialized according to the operation that |
4631 | STMT_VINFO performs. This vector will be used as the initial value |
4632 | of the vector of partial results. |
4633 | |
4634 | Option1 (adjust in epilog): Initialize the vector as follows: |
4635 | add/bit or/xor: [0,0,...,0,0] |
4636 | mult/bit and: [1,1,...,1,1] |
4637 | min/max/cond_expr: [init_val,init_val,..,init_val,init_val] |
4638 | and when necessary (e.g. add/mult case) let the caller know |
4639 | that it needs to adjust the result by init_val. |
4640 | |
4641 | Option2: Initialize the vector as follows: |
4642 | add/bit or/xor: [init_val,0,0,...,0] |
4643 | mult/bit and: [init_val,1,1,...,1] |
4644 | min/max/cond_expr: [init_val,init_val,...,init_val] |
4645 | and no adjustments are needed. |
4646 | |
4647 | For example, for the following code: |
4648 | |
4649 | s = init_val; |
4650 | for (i=0;i<n;i++) |
4651 | s = s + a[i]; |
4652 | |
4653 | STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'. |
4654 | For a vector of 4 units, we want to return either [0,0,0,init_val], |
4655 | or [0,0,0,0] and let the caller know that it needs to adjust |
4656 | the result at the end by 'init_val'. |
4657 | |
4658 | FORNOW, we are using the 'adjust in epilog' scheme, because this way the |
4659 | initialization vector is simpler (same element in all entries), if |
4660 | ADJUSTMENT_DEF is not NULL, and Option2 otherwise. |
4661 | |
4662 | A cost model should help decide between these two schemes. */ |
4663 | |
4664 | static tree |
4665 | get_initial_def_for_reduction (loop_vec_info loop_vinfo, |
4666 | stmt_vec_info stmt_vinfo, |
4667 | enum tree_code code, tree init_val, |
4668 | tree *adjustment_def) |
4669 | { |
4670 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; |
4671 | tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4671, __FUNCTION__))->typed.type); |
4672 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); |
4673 | tree def_for_init; |
4674 | tree init_def; |
4675 | REAL_VALUE_TYPEstruct real_value real_init_val = dconst0; |
4676 | int int_init_val = 0; |
4677 | gimple_seq stmts = NULLnullptr; |
4678 | |
4679 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4679, __FUNCTION__), 0 : 0)); |
4680 | |
4681 | gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4682, __FUNCTION__), 0 : 0)) |
4682 | || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4682, __FUNCTION__), 0 : 0)); |
4683 | |
4684 | gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4685, __FUNCTION__), 0 : 0)) |
4685 | || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4685, __FUNCTION__), 0 : 0)); |
4686 | |
4687 | /* ADJUSTMENT_DEF is NULL when called from |
4688 | vect_create_epilog_for_reduction to vectorize double reduction. */ |
4689 | if (adjustment_def) |
4690 | *adjustment_def = NULLnullptr; |
4691 | |
4692 | switch (code) |
4693 | { |
4694 | case WIDEN_SUM_EXPR: |
4695 | case DOT_PROD_EXPR: |
4696 | case SAD_EXPR: |
4697 | case PLUS_EXPR: |
4698 | case MINUS_EXPR: |
4699 | case BIT_IOR_EXPR: |
4700 | case BIT_XOR_EXPR: |
4701 | case MULT_EXPR: |
4702 | case BIT_AND_EXPR: |
4703 | { |
4704 | if (code == MULT_EXPR) |
4705 | { |
4706 | real_init_val = dconst1; |
4707 | int_init_val = 1; |
4708 | } |
4709 | |
4710 | if (code == BIT_AND_EXPR) |
4711 | int_init_val = -1; |
4712 | |
4713 | if (SCALAR_FLOAT_TYPE_P (scalar_type)(((enum tree_code) (scalar_type)->base.code) == REAL_TYPE)) |
4714 | def_for_init = build_real (scalar_type, real_init_val); |
4715 | else |
4716 | def_for_init = build_int_cst (scalar_type, int_init_val); |
4717 | |
4718 | if (adjustment_def || operand_equal_p (def_for_init, init_val, 0)) |
4719 | { |
4720 | /* Option1: the first element is '0' or '1' as well. */ |
4721 | if (!operand_equal_p (def_for_init, init_val, 0)) |
4722 | *adjustment_def = init_val; |
4723 | init_def = gimple_build_vector_from_val (&stmts, vectype, |
4724 | def_for_init); |
4725 | } |
4726 | else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) |
4727 | { |
4728 | /* Option2 (variable length): the first element is INIT_VAL. */ |
4729 | init_def = gimple_build_vector_from_val (&stmts, vectype, |
4730 | def_for_init); |
4731 | init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, |
4732 | vectype, init_def, init_val); |
4733 | } |
4734 | else |
4735 | { |
4736 | /* Option2: the first element is INIT_VAL. */ |
4737 | tree_vector_builder elts (vectype, 1, 2); |
4738 | elts.quick_push (init_val); |
4739 | elts.quick_push (def_for_init); |
4740 | init_def = gimple_build_vector (&stmts, &elts); |
4741 | } |
4742 | } |
4743 | break; |
4744 | |
4745 | case MIN_EXPR: |
4746 | case MAX_EXPR: |
4747 | case COND_EXPR: |
4748 | { |
4749 | init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4749, __FUNCTION__))->typed.type), init_val); |
4750 | init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); |
4751 | } |
4752 | break; |
4753 | |
4754 | default: |
4755 | gcc_unreachable ()(fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4755, __FUNCTION__)); |
4756 | } |
4757 | |
4758 | if (stmts) |
4759 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
4760 | return init_def; |
4761 | } |
4762 | |
4763 | /* Get at the initial defs for the reduction PHIs in SLP_NODE. |
4764 | NUMBER_OF_VECTORS is the number of vector defs to create. |
4765 | If NEUTRAL_OP is nonnull, introducing extra elements of that |
4766 | value will not change the result. */ |
4767 | |
4768 | static void |
4769 | get_initial_defs_for_reduction (vec_info *vinfo, |
4770 | slp_tree slp_node, |
4771 | vec<tree> *vec_oprnds, |
4772 | unsigned int number_of_vectors, |
4773 | bool reduc_chain, tree neutral_op) |
4774 | { |
4775 | vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; |
4776 | stmt_vec_info stmt_vinfo = stmts[0]; |
4777 | unsigned HOST_WIDE_INTlong nunits; |
4778 | unsigned j, number_of_places_left_in_vector; |
4779 | tree vector_type; |
4780 | unsigned int group_size = stmts.length (); |
4781 | unsigned int i; |
4782 | class loop *loop; |
4783 | |
4784 | vector_type = STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype; |
4785 | |
4786 | gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)((void)(!((stmt_vinfo)->def_type == vect_reduction_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4786, __FUNCTION__), 0 : 0)); |
4787 | |
4788 | loop = (gimple_bb (stmt_vinfo->stmt))->loop_father; |
4789 | gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4789, __FUNCTION__), 0 : 0)); |
4790 | edge pe = loop_preheader_edge (loop); |
4791 | |
4792 | gcc_assert (!reduc_chain || neutral_op)((void)(!(!reduc_chain || neutral_op) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4792, __FUNCTION__), 0 : 0)); |
4793 | |
4794 | /* NUMBER_OF_COPIES is the number of times we need to use the same values in |
4795 | created vectors. It is greater than 1 if unrolling is performed. |
4796 | |
4797 | For example, we have two scalar operands, s1 and s2 (e.g., group of |
4798 | strided accesses of size two), while NUNITS is four (i.e., four scalars |
4799 | of this type can be packed in a vector). The output vector will contain |
4800 | two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES |
4801 | will be 2). |
4802 | |
4803 | If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several |
4804 | vectors containing the operands. |
4805 | |
4806 | For example, NUNITS is four as before, and the group size is 8 |
4807 | (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and |
4808 | {s5, s6, s7, s8}. */ |
4809 | |
4810 | if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) |
4811 | nunits = group_size; |
4812 | |
4813 | number_of_places_left_in_vector = nunits; |
4814 | bool constant_p = true; |
4815 | tree_vector_builder elts (vector_type, nunits, 1); |
4816 | elts.quick_grow (nunits); |
4817 | gimple_seq ctor_seq = NULLnullptr; |
4818 | for (j = 0; j < nunits * number_of_vectors; ++j) |
4819 | { |
4820 | tree op; |
4821 | i = j % group_size; |
4822 | stmt_vinfo = stmts[i]; |
4823 | |
4824 | /* Get the def before the loop. In reduction chain we have only |
4825 | one initial value. Else we have as many as PHIs in the group. */ |
4826 | if (reduc_chain) |
4827 | op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx )); |
4828 | else if (((vec_oprnds->length () + 1) * nunits |
4829 | - number_of_places_left_in_vector >= group_size) |
4830 | && neutral_op) |
4831 | op = neutral_op; |
4832 | else |
4833 | op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx )); |
4834 | |
4835 | /* Create 'vect_ = {op0,op1,...,opn}'. */ |
4836 | number_of_places_left_in_vector--; |
4837 | elts[nunits - number_of_places_left_in_vector - 1] = op; |
4838 | if (!CONSTANT_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_constant)) |
4839 | constant_p = false; |
4840 | |
4841 | if (number_of_places_left_in_vector == 0) |
4842 | { |
4843 | tree init; |
4844 | if (constant_p && !neutral_op |
4845 | ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) |
4846 | : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits))) |
4847 | /* Build the vector directly from ELTS. */ |
4848 | init = gimple_build_vector (&ctor_seq, &elts); |
4849 | else if (neutral_op) |
4850 | { |
4851 | /* Build a vector of the neutral value and shift the |
4852 | other elements into place. */ |
4853 | init = gimple_build_vector_from_val (&ctor_seq, vector_type, |
4854 | neutral_op); |
4855 | int k = nunits; |
4856 | while (k > 0 && elts[k - 1] == neutral_op) |
4857 | k -= 1; |
4858 | while (k > 0) |
4859 | { |
4860 | k -= 1; |
4861 | init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, |
4862 | vector_type, init, elts[k]); |
4863 | } |
4864 | } |
4865 | else |
4866 | { |
4867 | /* First time round, duplicate ELTS to fill the |
4868 | required number of vectors. */ |
4869 | duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts, |
4870 | number_of_vectors, *vec_oprnds); |
4871 | break; |
4872 | } |
4873 | vec_oprnds->quick_push (init); |
4874 | |
4875 | number_of_places_left_in_vector = nunits; |
4876 | elts.new_vector (vector_type, nunits, 1); |
4877 | elts.quick_grow (nunits); |
4878 | constant_p = true; |
4879 | } |
4880 | } |
4881 | if (ctor_seq != NULLnullptr) |
4882 | gsi_insert_seq_on_edge_immediate (pe, ctor_seq); |
4883 | } |
4884 | |
4885 | /* For a statement STMT_INFO taking part in a reduction operation return |
4886 | the stmt_vec_info the meta information is stored on. */ |
4887 | |
4888 | stmt_vec_info |
4889 | info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) |
4890 | { |
4891 | stmt_info = vect_orig_stmt (stmt_info); |
4892 | gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4892, __FUNCTION__), 0 : 0)); |
4893 | if (!is_a <gphi *> (stmt_info->stmt) |
4894 | || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) |
4895 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; |
4896 | gphi *phi = as_a <gphi *> (stmt_info->stmt); |
4897 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
4898 | { |
4899 | if (gimple_phi_num_args (phi) == 1) |
4900 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; |
4901 | } |
4902 | else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) |
4903 | { |
4904 | edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father); |
4905 | stmt_vec_info info |
4906 | = vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe)gimple_phi_arg_def (((phi)), ((pe)->dest_idx))); |
4907 | if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def) |
4908 | stmt_info = info; |
4909 | } |
4910 | return stmt_info; |
4911 | } |
4912 | |
4913 | /* Function vect_create_epilog_for_reduction |
4914 | |
4915 | Create code at the loop-epilog to finalize the result of a reduction |
4916 | computation. |
4917 | |
4918 | STMT_INFO is the scalar reduction stmt that is being vectorized. |
4919 | SLP_NODE is an SLP node containing a group of reduction statements. The |
4920 | first one in this group is STMT_INFO. |
4921 | SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE |
4922 | REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi |
4923 | (counting from 0) |
4924 | |
4925 | This function: |
4926 | 1. Completes the reduction def-use cycles. |
4927 | 2. "Reduces" each vector of partial results VECT_DEFS into a single result, |
4928 | by calling the function specified by REDUC_FN if available, or by |
4929 | other means (whole-vector shifts or a scalar loop). |
4930 | The function also creates a new phi node at the loop exit to preserve |
4931 | loop-closed form, as illustrated below. |
4932 | |
4933 | The flow at the entry to this function: |
4934 | |
4935 | loop: |
4936 | vec_def = phi <vec_init, null> # REDUCTION_PHI |
4937 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
4938 | s_loop = scalar_stmt # (scalar) STMT_INFO |
4939 | loop_exit: |
4940 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
4941 | use <s_out0> |
4942 | use <s_out0> |
4943 | |
4944 | The above is transformed by this function into: |
4945 | |
4946 | loop: |
4947 | vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI |
4948 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
4949 | s_loop = scalar_stmt # (scalar) STMT_INFO |
4950 | loop_exit: |
4951 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
4952 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI |
4953 | v_out2 = reduce <v_out1> |
4954 | s_out3 = extract_field <v_out2, 0> |
4955 | s_out4 = adjust_result <s_out3> |
4956 | use <s_out4> |
4957 | use <s_out4> |
4958 | */ |
4959 | |
4960 | static void |
4961 | vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, |
4962 | stmt_vec_info stmt_info, |
4963 | slp_tree slp_node, |
4964 | slp_instance slp_node_instance) |
4965 | { |
4966 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); |
4967 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4967, __FUNCTION__), 0 : 0)); |
4968 | /* For double reductions we need to get at the inner loop reduction |
4969 | stmt which has the meta info attached. Our stmt_info is that of the |
4970 | loop-closed PHI of the inner loop which we remember as |
4971 | def for the reduction PHI generation. */ |
4972 | bool double_reduc = false; |
4973 | stmt_vec_info rdef_info = stmt_info; |
4974 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) |
4975 | { |
4976 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4976, __FUNCTION__), 0 : 0)); |
4977 | double_reduc = true; |
4978 | stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def |
4979 | (stmt_info->stmt, 0)); |
4980 | stmt_info = vect_stmt_to_vectorize (stmt_info); |
4981 | } |
4982 | gphi *reduc_def_stmt |
4983 | = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt); |
4984 | enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; |
4985 | internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn; |
4986 | tree vectype; |
4987 | machine_mode mode; |
4988 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr; |
4989 | basic_block exit_bb; |
4990 | tree scalar_dest; |
4991 | tree scalar_type; |
4992 | gimple *new_phi = NULLnullptr, *phi; |
4993 | gimple_stmt_iterator exit_gsi; |
4994 | tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest; |
4995 | gimple *epilog_stmt = NULLnullptr; |
4996 | gimple *exit_phi; |
4997 | tree bitsize; |
4998 | tree def; |
4999 | tree orig_name, scalar_result; |
5000 | imm_use_iterator imm_iter, phi_imm_iter; |
5001 | use_operand_p use_p, phi_use_p; |
5002 | gimple *use_stmt; |
5003 | bool nested_in_vect_loop = false; |
5004 | auto_vec<gimple *> new_phis; |
5005 | int j, i; |
5006 | auto_vec<tree> scalar_results; |
5007 | unsigned int group_size = 1, k; |
5008 | auto_vec<gimple *> phis; |
5009 | bool slp_reduc = false; |
5010 | bool direct_slp_reduc; |
5011 | tree new_phi_result; |
5012 | tree induction_index = NULL_TREE(tree) nullptr; |
5013 | |
5014 | if (slp_node) |
5015 | group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes; |
5016 | |
5017 | if (nested_in_vect_loop_p (loop, stmt_info)) |
5018 | { |
5019 | outer_loop = loop; |
5020 | loop = loop->inner; |
5021 | nested_in_vect_loop = true; |
5022 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5022, __FUNCTION__), 0 : 0)); |
5023 | } |
5024 | gcc_assert (!nested_in_vect_loop || double_reduc)((void)(!(!nested_in_vect_loop || double_reduc) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5024, __FUNCTION__), 0 : 0)); |
5025 | |
5026 | vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype; |
5027 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5027, __FUNCTION__), 0 : 0)); |
5028 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5028, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); |
5029 | |
5030 | tree initial_def = NULLnullptr; |
5031 | tree induc_val = NULL_TREE(tree) nullptr; |
5032 | tree adjustment_def = NULLnullptr; |
5033 | if (slp_node) |
5034 | ; |
5035 | else |
5036 | { |
5037 | /* Get at the scalar def before the loop, that defines the initial value |
5038 | of the reduction variable. */ |
5039 | initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge (loop))->dest_idx)) |
5040 | loop_preheader_edge (loop))gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge (loop))->dest_idx)); |
5041 | /* Optimize: for induction condition reduction, if we can't use zero |
5042 | for induc_val, use initial_def. */ |
5043 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION) |
5044 | induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val; |
5045 | else if (double_reduc) |
5046 | ; |
5047 | else if (nested_in_vect_loop) |
5048 | ; |
5049 | else |
5050 | adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment; |
5051 | } |
5052 | |
5053 | unsigned vec_num; |
5054 | int ncopies; |
5055 | if (slp_node) |
5056 | { |
5057 | vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length (); |
5058 | ncopies = 1; |
5059 | } |
5060 | else |
5061 | { |
5062 | stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt); |
5063 | vec_num = 1; |
5064 | ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length (); |
5065 | } |
5066 | |
5067 | /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) |
5068 | which is updated with the current index of the loop for every match of |
5069 | the original loop's cond_expr (VEC_STMT). This results in a vector |
5070 | containing the last time the condition passed for that vector lane. |
5071 | The first match will be a 1 to allow 0 to be used for non-matching |
5072 | indexes. If there are no matches at all then the vector will be all |
5073 | zeroes. |
5074 | |
5075 | PR92772: This algorithm is broken for architectures that support |
5076 | masked vectors, but do not provide fold_extract_last. */ |
5077 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION) |
5078 | { |
5079 | auto_vec<std::pair<tree, bool>, 2> ccompares; |
5080 | stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def; |
5081 | cond_info = vect_stmt_to_vectorize (cond_info); |
5082 | while (cond_info != reduc_info) |
5083 | { |
5084 | if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) |
5085 | { |
5086 | gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0]; |
5087 | gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5087, __FUNCTION__), 0 : 0)); |
5088 | ccompares.safe_push |
5089 | (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), |
5090 | STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2)); |
5091 | } |
5092 | cond_info |
5093 | = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, |
5094 | 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx |
5095 | (cond_info)(cond_info)->reduc_idx)); |
5096 | cond_info = vect_stmt_to_vectorize (cond_info); |
5097 | } |
5098 | gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5098, __FUNCTION__), 0 : 0)); |
5099 | |
5100 | tree indx_before_incr, indx_after_incr; |
5101 | poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); |
5102 | int scalar_precision |
5103 | = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5103, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5103, __FUNCTION__))->type_common.mode))); |
5104 | tree cr_index_scalar_type = make_unsigned_type (scalar_precision); |
5105 | tree cr_index_vector_type = get_related_vectype_for_scalar_type |
5106 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5106, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), cr_index_scalar_type, |
5107 | TYPE_VECTOR_SUBPARTS (vectype)); |
5108 | |
5109 | /* First we create a simple vector induction variable which starts |
5110 | with the values {1,2,3,...} (SERIES_VECT) and increments by the |
5111 | vector size (STEP). */ |
5112 | |
5113 | /* Create a {1,2,3,...} vector. */ |
5114 | tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); |
5115 | |
5116 | /* Create a vector of the step value. */ |
5117 | tree step = build_int_cst (cr_index_scalar_type, nunits_out); |
5118 | tree vec_step = build_vector_from_val (cr_index_vector_type, step); |
5119 | |
5120 | /* Create an induction variable. */ |
5121 | gimple_stmt_iterator incr_gsi; |
5122 | bool insert_after; |
5123 | standard_iv_increment_position (loop, &incr_gsi, &insert_after); |
5124 | create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi, |
5125 | insert_after, &indx_before_incr, &indx_after_incr); |
5126 | |
5127 | /* Next create a new phi node vector (NEW_PHI_TREE) which starts |
5128 | filled with zeros (VEC_ZERO). */ |
5129 | |
5130 | /* Create a vector of 0s. */ |
5131 | tree zero = build_zero_cst (cr_index_scalar_type); |
5132 | tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); |
5133 | |
5134 | /* Create a vector phi node. */ |
5135 | tree new_phi_tree = make_ssa_name (cr_index_vector_type); |
5136 | new_phi = create_phi_node (new_phi_tree, loop->header); |
5137 | add_phi_arg (as_a <gphi *> (new_phi), vec_zero, |
5138 | loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0)); |
5139 | |
5140 | /* Now take the condition from the loops original cond_exprs |
5141 | and produce a new cond_exprs (INDEX_COND_EXPR) which for |
5142 | every match uses values from the induction variable |
5143 | (INDEX_BEFORE_INCR) otherwise uses values from the phi node |
5144 | (NEW_PHI_TREE). |
5145 | Finally, we update the phi (NEW_PHI_TREE) to take the value of |
5146 | the new cond_expr (INDEX_COND_EXPR). */ |
5147 | gimple_seq stmts = NULLnullptr; |
5148 | for (int i = ccompares.length () - 1; i != -1; --i) |
5149 | { |
5150 | tree ccompare = ccompares[i].first; |
5151 | if (ccompares[i].second) |
5152 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, |
5153 | cr_index_vector_type, |
5154 | ccompare, |
5155 | indx_before_incr, new_phi_tree); |
5156 | else |
5157 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, |
5158 | cr_index_vector_type, |
5159 | ccompare, |
5160 | new_phi_tree, indx_before_incr); |
5161 | } |
5162 | gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); |
5163 | |
5164 | /* Update the phi with the vec cond. */ |
5165 | induction_index = new_phi_tree; |
5166 | add_phi_arg (as_a <gphi *> (new_phi), induction_index, |
5167 | loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0)); |
5168 | } |
5169 | |
5170 | /* 2. Create epilog code. |
5171 | The reduction epilog code operates across the elements of the vector |
5172 | of partial results computed by the vectorized loop. |
5173 | The reduction epilog code consists of: |
5174 | |
5175 | step 1: compute the scalar result in a vector (v_out2) |
5176 | step 2: extract the scalar result (s_out3) from the vector (v_out2) |
5177 | step 3: adjust the scalar result (s_out3) if needed. |
5178 | |
5179 | Step 1 can be accomplished using one the following three schemes: |
5180 | (scheme 1) using reduc_fn, if available. |
5181 | (scheme 2) using whole-vector shifts, if available. |
5182 | (scheme 3) using a scalar loop. In this case steps 1+2 above are |
5183 | combined. |
5184 | |
5185 | The overall epilog code looks like this: |
5186 | |
5187 | s_out0 = phi <s_loop> # original EXIT_PHI |
5188 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI |
5189 | v_out2 = reduce <v_out1> # step 1 |
5190 | s_out3 = extract_field <v_out2, 0> # step 2 |
5191 | s_out4 = adjust_result <s_out3> # step 3 |
5192 | |
5193 | (step 3 is optional, and steps 1 and 2 may be combined). |
5194 | Lastly, the uses of s_out0 are replaced by s_out4. */ |
5195 | |
5196 | |
5197 | /* 2.1 Create new loop-exit-phis to preserve loop-closed form: |
5198 | v_out1 = phi <VECT_DEF> |
5199 | Store them in NEW_PHIS. */ |
5200 | if (double_reduc) |
5201 | loop = outer_loop; |
5202 | exit_bb = single_exit (loop)->dest; |
5203 | new_phis.create (slp_node ? vec_num : ncopies); |
5204 | for (unsigned i = 0; i < vec_num; i++) |
5205 | { |
5206 | if (slp_node) |
5207 | def = vect_get_slp_vect_def (slp_node, i); |
5208 | else |
5209 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]); |
5210 | for (j = 0; j < ncopies; j++) |
5211 | { |
5212 | tree new_def = copy_ssa_name (def); |
5213 | phi = create_phi_node (new_def, exit_bb); |
5214 | if (j == 0) |
5215 | new_phis.quick_push (phi); |
5216 | else |
5217 | { |
5218 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]); |
5219 | new_phis.quick_push (phi); |
5220 | } |
5221 | |
5222 | SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), (( single_exit (loop)->dest_idx))), (def)); |
5223 | } |
5224 | } |
5225 | |
5226 | exit_gsi = gsi_after_labels (exit_bb); |
5227 | |
5228 | /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 |
5229 | (i.e. when reduc_fn is not available) and in the final adjustment |
5230 | code (if needed). Also get the original scalar reduction variable as |
5231 | defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it |
5232 | represents a reduction pattern), the tree-code and scalar-def are |
5233 | taken from the original stmt that the pattern-stmt (STMT) replaces. |
5234 | Otherwise (it is a regular reduction) - the tree-code and scalar-def |
5235 | are taken from STMT. */ |
5236 | |
5237 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
5238 | if (orig_stmt_info != stmt_info) |
5239 | { |
5240 | /* Reduction pattern */ |
5241 | gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5241, __FUNCTION__), 0 : 0)); |
5242 | gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5242, __FUNCTION__), 0 : 0)); |
5243 | } |
5244 | |
5245 | scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt); |
5246 | scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5246, __FUNCTION__))->typed.type); |
5247 | scalar_results.create (group_size); |
5248 | new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr); |
5249 | bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5249, __FUNCTION__))->type_common.size); |
5250 | |
5251 | /* SLP reduction without reduction chain, e.g., |
5252 | # a1 = phi <a2, a0> |
5253 | # b1 = phi <b2, b0> |
5254 | a2 = operation (a1) |
5255 | b2 = operation (b1) */ |
5256 | slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5256, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )); |
5257 | |
5258 | /* True if we should implement SLP_REDUC using native reduction operations |
5259 | instead of scalar operations. */ |
5260 | direct_slp_reduc = (reduc_fn != IFN_LAST |
5261 | && slp_reduc |
5262 | && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ()); |
5263 | |
5264 | /* In case of reduction chain, e.g., |
5265 | # a1 = phi <a3, a0> |
5266 | a2 = operation (a1) |
5267 | a3 = operation (a2), |
5268 | |
5269 | we may end up with more than one vector result. Here we reduce them to |
5270 | one vector. */ |
5271 | if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5271, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ) || direct_slp_reduc) |
5272 | { |
5273 | gimple_seq stmts = NULLnullptr; |
5274 | tree first_vect = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0])); |
5275 | first_vect = gimple_convert (&stmts, vectype, first_vect); |
5276 | for (k = 1; k < new_phis.length (); k++) |
5277 | { |
5278 | gimple *next_phi = new_phis[k]; |
5279 | tree second_vect = PHI_RESULT (next_phi)get_def_from_ptr (gimple_phi_result_ptr (next_phi)); |
5280 | second_vect = gimple_convert (&stmts, vectype, second_vect); |
5281 | first_vect = gimple_build (&stmts, code, vectype, |
5282 | first_vect, second_vect); |
5283 | } |
5284 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5285 | |
5286 | new_phi_result = first_vect; |
5287 | new_phis.truncate (0); |
5288 | new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)(tree_check ((first_vect), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5288, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt); |
5289 | } |
5290 | /* Likewise if we couldn't use a single defuse cycle. */ |
5291 | else if (ncopies > 1) |
5292 | { |
5293 | gimple_seq stmts = NULLnullptr; |
5294 | tree first_vect = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0])); |
5295 | first_vect = gimple_convert (&stmts, vectype, first_vect); |
5296 | for (int k = 1; k < ncopies; ++k) |
5297 | { |
5298 | tree second_vect = PHI_RESULT (new_phis[k])get_def_from_ptr (gimple_phi_result_ptr (new_phis[k])); |
5299 | second_vect = gimple_convert (&stmts, vectype, second_vect); |
5300 | first_vect = gimple_build (&stmts, code, vectype, |
5301 | first_vect, second_vect); |
5302 | } |
5303 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5304 | new_phi_result = first_vect; |
5305 | new_phis.truncate (0); |
5306 | new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)(tree_check ((first_vect), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5306, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt); |
5307 | } |
5308 | else |
5309 | new_phi_result = PHI_RESULT (new_phis[0])get_def_from_ptr (gimple_phi_result_ptr (new_phis[0])); |
5310 | |
5311 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION |
5312 | && reduc_fn != IFN_LAST) |
5313 | { |
5314 | /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing |
5315 | various data values where the condition matched and another vector |
5316 | (INDUCTION_INDEX) containing all the indexes of those matches. We |
5317 | need to extract the last matching index (which will be the index with |
5318 | highest value) and use this to index into the data vector. |
5319 | For the case where there were no matches, the data vector will contain |
5320 | all default values and the index vector will be all zeros. */ |
5321 | |
5322 | /* Get various versions of the type of the vector of indexes. */ |
5323 | tree index_vec_type = TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5323, __FUNCTION__))->typed.type); |
5324 | gcc_checking_assert (TYPE_UNSIGNED (index_vec_type))((void)(!(((tree_class_check ((index_vec_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5324, __FUNCTION__))->base.u.bits.unsigned_flag)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5324, __FUNCTION__), 0 : 0)); |
5325 | tree index_scalar_type = TREE_TYPE (index_vec_type)((contains_struct_check ((index_vec_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5325, __FUNCTION__))->typed.type); |
5326 | tree index_vec_cmp_type = truth_type_for (index_vec_type); |
5327 | |
5328 | /* Get an unsigned integer version of the type of the data vector. */ |
5329 | int scalar_precision |
5330 | = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)(as_a <scalar_mode> ((tree_class_check ((scalar_type), ( tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5330, __FUNCTION__))->type_common.mode))); |
5331 | tree scalar_type_unsigned = make_unsigned_type (scalar_precision); |
5332 | tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned, |
5333 | vectype); |
5334 | |
5335 | /* First we need to create a vector (ZERO_VEC) of zeros and another |
5336 | vector (MAX_INDEX_VEC) filled with the last matching index, which we |
5337 | can create using a MAX reduction and then expanding. |
5338 | In the case where the loop never made any matches, the max index will |
5339 | be zero. */ |
5340 | |
5341 | /* Vector of {0, 0, 0,...}. */ |
5342 | tree zero_vec = build_zero_cst (vectype); |
5343 | |
5344 | gimple_seq stmts = NULLnullptr; |
5345 | new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); |
5346 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5347 | |
5348 | /* Find maximum value from the vector of found indexes. */ |
5349 | tree max_index = make_ssa_name (index_scalar_type); |
5350 | gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX, |
5351 | 1, induction_index); |
5352 | gimple_call_set_lhs (max_index_stmt, max_index); |
5353 | gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT); |
5354 | |
5355 | /* Vector of {max_index, max_index, max_index,...}. */ |
5356 | tree max_index_vec = make_ssa_name (index_vec_type); |
5357 | tree max_index_vec_rhs = build_vector_from_val (index_vec_type, |
5358 | max_index); |
5359 | gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec, |
5360 | max_index_vec_rhs); |
5361 | gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT); |
5362 | |
5363 | /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes |
5364 | with the vector (INDUCTION_INDEX) of found indexes, choosing values |
5365 | from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC) |
5366 | otherwise. Only one value should match, resulting in a vector |
5367 | (VEC_COND) with one data value and the rest zeros. |
5368 | In the case where the loop never made any matches, every index will |
5369 | match, resulting in a vector with all data values (which will all be |
5370 | the default value). */ |
5371 | |
5372 | /* Compare the max index vector to the vector of found indexes to find |
5373 | the position of the max value. */ |
5374 | tree vec_compare = make_ssa_name (index_vec_cmp_type); |
5375 | gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR, |
5376 | induction_index, |
5377 | max_index_vec); |
5378 | gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT); |
5379 | |
5380 | /* Use the compare to choose either values from the data vector or |
5381 | zero. */ |
5382 | tree vec_cond = make_ssa_name (vectype); |
5383 | gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR, |
5384 | vec_compare, new_phi_result, |
5385 | zero_vec); |
5386 | gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT); |
5387 | |
5388 | /* Finally we need to extract the data value from the vector (VEC_COND) |
5389 | into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR |
5390 | reduction, but because this doesn't exist, we can use a MAX reduction |
5391 | instead. The data value might be signed or a float so we need to cast |
5392 | it first. |
5393 | In the case where the loop never made any matches, the data values are |
5394 | all identical, and so will reduce down correctly. */ |
5395 | |
5396 | /* Make the matched data values unsigned. */ |
5397 | tree vec_cond_cast = make_ssa_name (vectype_unsigned); |
5398 | tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned, |
5399 | vec_cond); |
5400 | gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast, |
5401 | VIEW_CONVERT_EXPR, |
5402 | vec_cond_cast_rhs); |
5403 | gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT); |
5404 | |
5405 | /* Reduce down to a scalar value. */ |
5406 | tree data_reduc = make_ssa_name (scalar_type_unsigned); |
5407 | gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX, |
5408 | 1, vec_cond_cast); |
5409 | gimple_call_set_lhs (data_reduc_stmt, data_reduc); |
5410 | gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT); |
5411 | |
5412 | /* Convert the reduced value back to the result type and set as the |
5413 | result. */ |
5414 | stmts = NULLnullptr; |
5415 | new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type, |
5416 | data_reduc); |
5417 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5418 | scalar_results.safe_push (new_temp); |
5419 | } |
5420 | else if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION |
5421 | && reduc_fn == IFN_LAST) |
5422 | { |
5423 | /* Condition reduction without supported IFN_REDUC_MAX. Generate |
5424 | idx = 0; |
5425 | idx_val = induction_index[0]; |
5426 | val = data_reduc[0]; |
5427 | for (idx = 0, val = init, i = 0; i < nelts; ++i) |
5428 | if (induction_index[i] > idx_val) |
5429 | val = data_reduc[i], idx_val = induction_index[i]; |
5430 | return val; */ |
5431 | |
5432 | tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result))((contains_struct_check ((((contains_struct_check ((new_phi_result ), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5432, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5432, __FUNCTION__))->typed.type); |
5433 | tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index))((contains_struct_check ((((contains_struct_check ((induction_index ), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5433, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5433, __FUNCTION__))->typed.type); |
5434 | unsigned HOST_WIDE_INTlong el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)((tree_class_check ((idx_eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5434, __FUNCTION__))->type_common.size)); |
5435 | poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)((contains_struct_check ((induction_index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5435, __FUNCTION__))->typed.type)); |
5436 | /* Enforced by vectorizable_reduction, which ensures we have target |
5437 | support before allowing a conditional reduction on variable-length |
5438 | vectors. */ |
5439 | unsigned HOST_WIDE_INTlong v_size = el_size * nunits.to_constant (); |
5440 | tree idx_val = NULL_TREE(tree) nullptr, val = NULL_TREE(tree) nullptr; |
5441 | for (unsigned HOST_WIDE_INTlong off = 0; off < v_size; off += el_size) |
5442 | { |
5443 | tree old_idx_val = idx_val; |
5444 | tree old_val = val; |
5445 | idx_val = make_ssa_name (idx_eltype); |
5446 | epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF, |
5447 | build3 (BIT_FIELD_REF, idx_eltype, |
5448 | induction_index, |
5449 | bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype), |
5450 | bitsize_int (off)size_int_kind (off, stk_bitsizetype))); |
5451 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5452 | val = make_ssa_name (data_eltype); |
5453 | epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF, |
5454 | build3 (BIT_FIELD_REF, |
5455 | data_eltype, |
5456 | new_phi_result, |
5457 | bitsize_int (el_size)size_int_kind (el_size, stk_bitsizetype), |
5458 | bitsize_int (off)size_int_kind (off, stk_bitsizetype))); |
5459 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5460 | if (off != 0) |
5461 | { |
5462 | tree new_idx_val = idx_val; |
5463 | if (off != v_size - el_size) |
5464 | { |
5465 | new_idx_val = make_ssa_name (idx_eltype); |
5466 | epilog_stmt = gimple_build_assign (new_idx_val, |
5467 | MAX_EXPR, idx_val, |
5468 | old_idx_val); |
5469 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5470 | } |
5471 | tree new_val = make_ssa_name (data_eltype); |
5472 | epilog_stmt = gimple_build_assign (new_val, |
5473 | COND_EXPR, |
5474 | build2 (GT_EXPR, |
5475 | boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE], |
5476 | idx_val, |
5477 | old_idx_val), |
5478 | val, old_val); |
5479 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5480 | idx_val = new_idx_val; |
5481 | val = new_val; |
5482 | } |
5483 | } |
5484 | /* Convert the reduced value back to the result type and set as the |
5485 | result. */ |
5486 | gimple_seq stmts = NULLnullptr; |
5487 | val = gimple_convert (&stmts, scalar_type, val); |
5488 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5489 | scalar_results.safe_push (val); |
5490 | } |
5491 | |
5492 | /* 2.3 Create the reduction code, using one of the three schemes described |
5493 | above. In SLP we simply need to extract all the elements from the |
5494 | vector (without reducing them), so we use scalar shifts. */ |
5495 | else if (reduc_fn != IFN_LAST && !slp_reduc) |
5496 | { |
5497 | tree tmp; |
5498 | tree vec_elem_type; |
5499 | |
5500 | /* Case 1: Create: |
5501 | v_out2 = reduc_expr <v_out1> */ |
5502 | |
5503 | if (dump_enabled_p ()) |
5504 | dump_printf_loc (MSG_NOTE, vect_location, |
5505 | "Reduce using direct vector reduction.\n"); |
5506 | |
5507 | gimple_seq stmts = NULLnullptr; |
5508 | new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); |
5509 | vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result))((contains_struct_check ((((contains_struct_check ((new_phi_result ), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5509, __FUNCTION__))->typed.type)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5509, __FUNCTION__))->typed.type); |
5510 | new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn), |
5511 | vec_elem_type, new_phi_result); |
5512 | new_temp = gimple_convert (&stmts, scalar_type, new_temp); |
5513 | gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
5514 | |
5515 | if ((STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION) |
5516 | && induc_val) |
5517 | { |
5518 | /* Earlier we set the initial value to be a vector if induc_val |
5519 | values. Check the result and if it is induc_val then replace |
5520 | with the original initial value, unless induc_val is |
5521 | the same as initial_def already. */ |
5522 | tree zcompare = build2 (EQ_EXPR, boolean_type_nodeglobal_trees[TI_BOOLEAN_TYPE], new_temp, |
5523 | induc_val); |
5524 | |
5525 | tmp = make_ssa_name (new_scalar_dest); |
5526 | epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, |
5527 | initial_def, new_temp); |
5528 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5529 | new_temp = tmp; |
5530 | } |
5531 | |
5532 | scalar_results.safe_push (new_temp); |
5533 | } |
5534 | else if (direct_slp_reduc) |
5535 | { |
5536 | /* Here we create one vector for each of the REDUC_GROUP_SIZE results, |
5537 | with the elements for other SLP statements replaced with the |
5538 | neutral value. We can then do a normal reduction on each vector. */ |
5539 | |
5540 | /* Enforced by vectorizable_reduction. */ |
5541 | gcc_assert (new_phis.length () == 1)((void)(!(new_phis.length () == 1) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5541, __FUNCTION__), 0 : 0)); |
5542 | gcc_assert (pow2p_hwi (group_size))((void)(!(pow2p_hwi (group_size)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5542, __FUNCTION__), 0 : 0)); |
5543 | |
5544 | slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis; |
5545 | vec<stmt_vec_info> orig_phis |
5546 | = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node)(orig_phis_slp_node)->stmts; |
5547 | gimple_seq seq = NULLnullptr; |
5548 | |
5549 | /* Build a vector {0, 1, 2, ...}, with the same number of elements |
5550 | and the same element size as VECTYPE. */ |
5551 | tree index = build_index_vector (vectype, 0, 1); |
5552 | tree index_type = TREE_TYPE (index)((contains_struct_check ((index), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5552, __FUNCTION__))->typed.type); |
5553 | tree index_elt_type = TREE_TYPE (index_type)((contains_struct_check ((index_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5553, __FUNCTION__))->typed.type); |
5554 | tree mask_type = truth_type_for (index_type); |
5555 | |
5556 | /* Create a vector that, for each element, identifies which of |
5557 | the REDUC_GROUP_SIZE results should use it. */ |
5558 | tree index_mask = build_int_cst (index_elt_type, group_size - 1); |
5559 | index = gimple_build (&seq, BIT_AND_EXPR, index_type, index, |
5560 | build_vector_from_val (index_type, index_mask)); |
5561 | |
5562 | /* Get a neutral vector value. This is simply a splat of the neutral |
5563 | scalar value if we have one, otherwise the initial scalar value |
5564 | is itself a neutral value. */ |
5565 | tree vector_identity = NULL_TREE(tree) nullptr; |
5566 | tree neutral_op = NULL_TREE(tree) nullptr; |
5567 | if (slp_node) |
5568 | { |
5569 | stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5569, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ); |
5570 | neutral_op |
5571 | = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, |
5572 | vectype, code, first != NULLnullptr); |
5573 | } |
5574 | if (neutral_op) |
5575 | vector_identity = gimple_build_vector_from_val (&seq, vectype, |
5576 | neutral_op); |
5577 | for (unsigned int i = 0; i < group_size; ++i) |
5578 | { |
5579 | /* If there's no univeral neutral value, we can use the |
5580 | initial scalar value from the original PHI. This is used |
5581 | for MIN and MAX reduction, for example. */ |
5582 | if (!neutral_op) |
5583 | { |
5584 | tree scalar_value |
5585 | = PHI_ARG_DEF_FROM_EDGE (orig_phis[i]->stmt,gimple_phi_arg_def (((orig_phis[i]->stmt)), ((loop_preheader_edge (loop))->dest_idx)) |
5586 | loop_preheader_edge (loop))gimple_phi_arg_def (((orig_phis[i]->stmt)), ((loop_preheader_edge (loop))->dest_idx)); |
5587 | scalar_value = gimple_convert (&seq, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5587, __FUNCTION__))->typed.type), |
5588 | scalar_value); |
5589 | vector_identity = gimple_build_vector_from_val (&seq, vectype, |
5590 | scalar_value); |
5591 | } |
5592 | |
5593 | /* Calculate the equivalent of: |
5594 | |
5595 | sel[j] = (index[j] == i); |
5596 | |
5597 | which selects the elements of NEW_PHI_RESULT that should |
5598 | be included in the result. */ |
5599 | tree compare_val = build_int_cst (index_elt_type, i); |
5600 | compare_val = build_vector_from_val (index_type, compare_val); |
5601 | tree sel = gimple_build (&seq, EQ_EXPR, mask_type, |
5602 | index, compare_val); |
5603 | |
5604 | /* Calculate the equivalent of: |
5605 | |
5606 | vec = seq ? new_phi_result : vector_identity; |
5607 | |
5608 | VEC is now suitable for a full vector reduction. */ |
5609 | tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype, |
5610 | sel, new_phi_result, vector_identity); |
5611 | |
5612 | /* Do the reduction and convert it to the appropriate type. */ |
5613 | tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn), |
5614 | TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5614, __FUNCTION__))->typed.type), vec); |
5615 | scalar = gimple_convert (&seq, scalar_type, scalar); |
5616 | scalar_results.safe_push (scalar); |
5617 | } |
5618 | gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT); |
5619 | } |
5620 | else |
5621 | { |
5622 | bool reduce_with_shift; |
5623 | tree vec_temp; |
5624 | |
5625 | gcc_assert (slp_reduc || new_phis.length () == 1)((void)(!(slp_reduc || new_phis.length () == 1) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5625, __FUNCTION__), 0 : 0)); |
5626 | |
5627 | /* See if the target wants to do the final (shift) reduction |
5628 | in a vector mode of smaller size and first reduce upper/lower |
5629 | halves against each other. */ |
5630 | enum machine_mode mode1 = mode; |
5631 | tree stype = TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5631, __FUNCTION__))->typed.type); |
5632 | unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); |
5633 | unsigned nunits1 = nunits; |
5634 | if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode |
5635 | && new_phis.length () == 1) |
5636 | { |
5637 | nunits1 = GET_MODE_NUNITS (mode1).to_constant (); |
5638 | /* For SLP reductions we have to make sure lanes match up, but |
5639 | since we're doing individual element final reduction reducing |
5640 | vector width here is even more important. |
5641 | ??? We can also separate lanes with permutes, for the common |
5642 | case of power-of-two group-size odd/even extracts would work. */ |
5643 | if (slp_reduc && nunits != nunits1) |
5644 | { |
5645 | nunits1 = least_common_multiple (nunits1, group_size); |
5646 | gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits)((void)(!(exact_log2 (nunits1) != -1 && nunits1 <= nunits) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5646, __FUNCTION__), 0 : 0)); |
5647 | } |
5648 | } |
5649 | if (!slp_reduc |
5650 | && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) |
5651 | nunits1 = GET_MODE_NUNITS (mode1).to_constant (); |
5652 | |
5653 | tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5653, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), |
5654 | stype, nunits1); |
5655 | reduce_with_shift = have_whole_vector_shift (mode1); |
5656 | if (!VECTOR_MODE_P (mode1)(((enum mode_class) mode_class[mode1]) == MODE_VECTOR_BOOL || ((enum mode_class) mode_class[mode1]) == MODE_VECTOR_INT || ( (enum mode_class) mode_class[mode1]) == MODE_VECTOR_FLOAT || ( (enum mode_class) mode_class[mode1]) == MODE_VECTOR_FRACT || ( (enum mode_class) mode_class[mode1]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[mode1]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[mode1]) == MODE_VECTOR_UACCUM)) |
5657 | reduce_with_shift = false; |
5658 | else |
5659 | { |
5660 | optab optab = optab_for_tree_code (code, vectype1, optab_default); |
5661 | if (optab_handler (optab, mode1) == CODE_FOR_nothing) |
5662 | reduce_with_shift = false; |
5663 | } |
5664 | |
5665 | /* First reduce the vector to the desired vector size we should |
5666 | do shift reduction on by combining upper and lower halves. */ |
5667 | new_temp = new_phi_result; |
5668 | while (nunits > nunits1) |
5669 | { |
5670 | nunits /= 2; |
5671 | vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5671, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), |
5672 | stype, nunits); |
5673 | unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5673, __FUNCTION__))->type_common.size)); |
5674 | |
5675 | /* The target has to make sure we support lowpart/highpart |
5676 | extraction, either via direct vector extract or through |
5677 | an integer mode punning. */ |
5678 | tree dst1, dst2; |
5679 | if (convert_optab_handler (vec_extract_optab, |
5680 | TYPE_MODE (TREE_TYPE (new_temp))((((enum tree_code) ((tree_class_check ((((contains_struct_check ((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5680, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5680, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (((contains_struct_check ((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5680, __FUNCTION__))->typed.type)) : (((contains_struct_check ((new_temp), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5680, __FUNCTION__))->typed.type))->type_common.mode), |
5681 | TYPE_MODE (vectype1)((((enum tree_code) ((tree_class_check ((vectype1), (tcc_type ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5681, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype1) : (vectype1)->type_common.mode)) |
5682 | != CODE_FOR_nothing) |
5683 | { |
5684 | /* Extract sub-vectors directly once vec_extract becomes |
5685 | a conversion optab. */ |
5686 | dst1 = make_ssa_name (vectype1); |
5687 | epilog_stmt |
5688 | = gimple_build_assign (dst1, BIT_FIELD_REF, |
5689 | build3 (BIT_FIELD_REF, vectype1, |
5690 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5690, __FUNCTION__))->type_common.size), |
5691 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); |
5692 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5693 | dst2 = make_ssa_name (vectype1); |
5694 | epilog_stmt |
5695 | = gimple_build_assign (dst2, BIT_FIELD_REF, |
5696 | build3 (BIT_FIELD_REF, vectype1, |
5697 | new_temp, TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5697, __FUNCTION__))->type_common.size), |
5698 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); |
5699 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5700 | } |
5701 | else |
5702 | { |
5703 | /* Extract via punning to appropriately sized integer mode |
5704 | vector. */ |
5705 | tree eltype = build_nonstandard_integer_type (bitsize, 1); |
5706 | tree etype = build_vector_type (eltype, 2); |
5707 | gcc_assert (convert_optab_handler (vec_extract_optab,((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5710, __FUNCTION__), 0 : 0)) |
5708 | TYPE_MODE (etype),((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5710, __FUNCTION__), 0 : 0)) |
5709 | TYPE_MODE (eltype))((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5710, __FUNCTION__), 0 : 0)) |
5710 | != CODE_FOR_nothing)((void)(!(convert_optab_handler (vec_extract_optab, ((((enum tree_code ) ((tree_class_check ((etype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5708, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (etype) : (etype)->type_common.mode), ((((enum tree_code) ((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5709, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (eltype) : (eltype)->type_common.mode)) != CODE_FOR_nothing ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5710, __FUNCTION__), 0 : 0)); |
5711 | tree tem = make_ssa_name (etype); |
5712 | epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, |
5713 | build1 (VIEW_CONVERT_EXPR, |
5714 | etype, new_temp)); |
5715 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5716 | new_temp = tem; |
5717 | tem = make_ssa_name (eltype); |
5718 | epilog_stmt |
5719 | = gimple_build_assign (tem, BIT_FIELD_REF, |
5720 | build3 (BIT_FIELD_REF, eltype, |
5721 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5721, __FUNCTION__))->type_common.size), |
5722 | bitsize_int (0)size_int_kind (0, stk_bitsizetype))); |
5723 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5724 | dst1 = make_ssa_name (vectype1); |
5725 | epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, |
5726 | build1 (VIEW_CONVERT_EXPR, |
5727 | vectype1, tem)); |
5728 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5729 | tem = make_ssa_name (eltype); |
5730 | epilog_stmt |
5731 | = gimple_build_assign (tem, BIT_FIELD_REF, |
5732 | build3 (BIT_FIELD_REF, eltype, |
5733 | new_temp, TYPE_SIZE (eltype)((tree_class_check ((eltype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5733, __FUNCTION__))->type_common.size), |
5734 | bitsize_int (bitsize)size_int_kind (bitsize, stk_bitsizetype))); |
5735 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5736 | dst2 = make_ssa_name (vectype1); |
5737 | epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, |
5738 | build1 (VIEW_CONVERT_EXPR, |
5739 | vectype1, tem)); |
5740 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5741 | } |
5742 | |
5743 | new_temp = make_ssa_name (vectype1); |
5744 | epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); |
5745 | gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5746 | new_phis[0] = epilog_stmt; |
5747 | } |
5748 | |
5749 | if (reduce_with_shift && !slp_reduc) |
5750 | { |
5751 | int element_bitsize = tree_to_uhwi (bitsize); |
5752 | /* Enforced by vectorizable_reduction, which disallows SLP reductions |
5753 | for variable-length vectors and also requires direct target support |
5754 | for loop reductions. */ |
5755 | int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)((tree_class_check ((vectype1), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5755, __FUNCTION__))->type_common.size)); |
5756 | int nelements = vec_size_in_bits / element_bitsize; |
5757 | vec_perm_builder sel; |
5758 | vec_perm_indices indices; |
5759 | |
5760 | int elt_offset; |
5761 | |
5762 | tree zero_vec = build_zero_cst (vectype1); |
5763 | /* Case 2: Create: |
5764 | for (offset = nelements/2; offset >= 1; offset/=2) |
5765 | { |