File: | build/gcc/tree-vect-loop.c |
Warning: | line 5989, column 20 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Loop Vectorization | ||||||||
2 | Copyright (C) 2003-2021 Free Software Foundation, Inc. | ||||||||
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> and | ||||||||
4 | Ira Rosen <irar@il.ibm.com> | ||||||||
5 | |||||||||
6 | This file is part of GCC. | ||||||||
7 | |||||||||
8 | GCC is free software; you can redistribute it and/or modify it under | ||||||||
9 | the terms of the GNU General Public License as published by the Free | ||||||||
10 | Software Foundation; either version 3, or (at your option) any later | ||||||||
11 | version. | ||||||||
12 | |||||||||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | ||||||||
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||||||
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||||||||
16 | for more details. | ||||||||
17 | |||||||||
18 | You should have received a copy of the GNU General Public License | ||||||||
19 | along with GCC; see the file COPYING3. If not see | ||||||||
20 | <http://www.gnu.org/licenses/>. */ | ||||||||
21 | |||||||||
22 | #include "config.h" | ||||||||
23 | #include "system.h" | ||||||||
24 | #include "coretypes.h" | ||||||||
25 | #include "backend.h" | ||||||||
26 | #include "target.h" | ||||||||
27 | #include "rtl.h" | ||||||||
28 | #include "tree.h" | ||||||||
29 | #include "gimple.h" | ||||||||
30 | #include "cfghooks.h" | ||||||||
31 | #include "tree-pass.h" | ||||||||
32 | #include "ssa.h" | ||||||||
33 | #include "optabs-tree.h" | ||||||||
34 | #include "diagnostic-core.h" | ||||||||
35 | #include "fold-const.h" | ||||||||
36 | #include "stor-layout.h" | ||||||||
37 | #include "cfganal.h" | ||||||||
38 | #include "gimplify.h" | ||||||||
39 | #include "gimple-iterator.h" | ||||||||
40 | #include "gimplify-me.h" | ||||||||
41 | #include "tree-ssa-loop-ivopts.h" | ||||||||
42 | #include "tree-ssa-loop-manip.h" | ||||||||
43 | #include "tree-ssa-loop-niter.h" | ||||||||
44 | #include "tree-ssa-loop.h" | ||||||||
45 | #include "cfgloop.h" | ||||||||
46 | #include "tree-scalar-evolution.h" | ||||||||
47 | #include "tree-vectorizer.h" | ||||||||
48 | #include "gimple-fold.h" | ||||||||
49 | #include "cgraph.h" | ||||||||
50 | #include "tree-cfg.h" | ||||||||
51 | #include "tree-if-conv.h" | ||||||||
52 | #include "internal-fn.h" | ||||||||
53 | #include "tree-vector-builder.h" | ||||||||
54 | #include "vec-perm-indices.h" | ||||||||
55 | #include "tree-eh.h" | ||||||||
56 | |||||||||
57 | /* Loop Vectorization Pass. | ||||||||
58 | |||||||||
59 | This pass tries to vectorize loops. | ||||||||
60 | |||||||||
61 | For example, the vectorizer transforms the following simple loop: | ||||||||
62 | |||||||||
63 | short a[N]; short b[N]; short c[N]; int i; | ||||||||
64 | |||||||||
65 | for (i=0; i<N; i++){ | ||||||||
66 | a[i] = b[i] + c[i]; | ||||||||
67 | } | ||||||||
68 | |||||||||
69 | as if it was manually vectorized by rewriting the source code into: | ||||||||
70 | |||||||||
71 | typedef int __attribute__((mode(V8HI))) v8hi; | ||||||||
72 | short a[N]; short b[N]; short c[N]; int i; | ||||||||
73 | v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | ||||||||
74 | v8hi va, vb, vc; | ||||||||
75 | |||||||||
76 | for (i=0; i<N/8; i++){ | ||||||||
77 | vb = pb[i]; | ||||||||
78 | vc = pc[i]; | ||||||||
79 | va = vb + vc; | ||||||||
80 | pa[i] = va; | ||||||||
81 | } | ||||||||
82 | |||||||||
83 | The main entry to this pass is vectorize_loops(), in which | ||||||||
84 | the vectorizer applies a set of analyses on a given set of loops, | ||||||||
85 | followed by the actual vectorization transformation for the loops that | ||||||||
86 | had successfully passed the analysis phase. | ||||||||
87 | Throughout this pass we make a distinction between two types of | ||||||||
88 | data: scalars (which are represented by SSA_NAMES), and memory references | ||||||||
89 | ("data-refs"). These two types of data require different handling both | ||||||||
90 | during analysis and transformation. The types of data-refs that the | ||||||||
91 | vectorizer currently supports are ARRAY_REFS which base is an array DECL | ||||||||
92 | (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | ||||||||
93 | accesses are required to have a simple (consecutive) access pattern. | ||||||||
94 | |||||||||
95 | Analysis phase: | ||||||||
96 | =============== | ||||||||
97 | The driver for the analysis phase is vect_analyze_loop(). | ||||||||
98 | It applies a set of analyses, some of which rely on the scalar evolution | ||||||||
99 | analyzer (scev) developed by Sebastian Pop. | ||||||||
100 | |||||||||
101 | During the analysis phase the vectorizer records some information | ||||||||
102 | per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | ||||||||
103 | loop, as well as general information about the loop as a whole, which is | ||||||||
104 | recorded in a "loop_vec_info" struct attached to each loop. | ||||||||
105 | |||||||||
106 | Transformation phase: | ||||||||
107 | ===================== | ||||||||
108 | The loop transformation phase scans all the stmts in the loop, and | ||||||||
109 | creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | ||||||||
110 | the loop that needs to be vectorized. It inserts the vector code sequence | ||||||||
111 | just before the scalar stmt S, and records a pointer to the vector code | ||||||||
112 | in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | ||||||||
113 | attached to S). This pointer will be used for the vectorization of following | ||||||||
114 | stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | ||||||||
115 | otherwise, we rely on dead code elimination for removing it. | ||||||||
116 | |||||||||
117 | For example, say stmt S1 was vectorized into stmt VS1: | ||||||||
118 | |||||||||
119 | VS1: vb = px[i]; | ||||||||
120 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | ||||||||
121 | S2: a = b; | ||||||||
122 | |||||||||
123 | To vectorize stmt S2, the vectorizer first finds the stmt that defines | ||||||||
124 | the operand 'b' (S1), and gets the relevant vector def 'vb' from the | ||||||||
125 | vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | ||||||||
126 | resulting sequence would be: | ||||||||
127 | |||||||||
128 | VS1: vb = px[i]; | ||||||||
129 | S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | ||||||||
130 | VS2: va = vb; | ||||||||
131 | S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | ||||||||
132 | |||||||||
133 | Operands that are not SSA_NAMEs, are data-refs that appear in | ||||||||
134 | load/store operations (like 'x[i]' in S1), and are handled differently. | ||||||||
135 | |||||||||
136 | Target modeling: | ||||||||
137 | ================= | ||||||||
138 | Currently the only target specific information that is used is the | ||||||||
139 | size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". | ||||||||
140 | Targets that can support different sizes of vectors, for now will need | ||||||||
141 | to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More | ||||||||
142 | flexibility will be added in the future. | ||||||||
143 | |||||||||
144 | Since we only vectorize operations which vector form can be | ||||||||
145 | expressed using existing tree codes, to verify that an operation is | ||||||||
146 | supported, the vectorizer checks the relevant optab at the relevant | ||||||||
147 | machine_mode (e.g, optab_handler (add_optab, V8HImode)). If | ||||||||
148 | the value found is CODE_FOR_nothing, then there's no target support, and | ||||||||
149 | we can't vectorize the stmt. | ||||||||
150 | |||||||||
151 | For additional information on this project see: | ||||||||
152 | http://gcc.gnu.org/projects/tree-ssa/vectorization.html | ||||||||
153 | */ | ||||||||
154 | |||||||||
155 | static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); | ||||||||
156 | static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, | ||||||||
157 | bool *, bool *); | ||||||||
158 | |||||||||
159 | /* Subroutine of vect_determine_vf_for_stmt that handles only one | ||||||||
160 | statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE | ||||||||
161 | may already be set for general statements (not just data refs). */ | ||||||||
162 | |||||||||
163 | static opt_result | ||||||||
164 | vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, | ||||||||
165 | bool vectype_maybe_set_p, | ||||||||
166 | poly_uint64 *vf) | ||||||||
167 | { | ||||||||
168 | gimple *stmt = stmt_info->stmt; | ||||||||
169 | |||||||||
170 | if ((!STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | ||||||||
171 | && !STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | ||||||||
172 | || gimple_clobber_p (stmt)) | ||||||||
173 | { | ||||||||
174 | if (dump_enabled_p ()) | ||||||||
175 | dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); | ||||||||
176 | return opt_result::success (); | ||||||||
177 | } | ||||||||
178 | |||||||||
179 | tree stmt_vectype, nunits_vectype; | ||||||||
180 | opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info, | ||||||||
181 | &stmt_vectype, | ||||||||
182 | &nunits_vectype); | ||||||||
183 | if (!res) | ||||||||
184 | return res; | ||||||||
185 | |||||||||
186 | if (stmt_vectype) | ||||||||
187 | { | ||||||||
188 | if (STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype) | ||||||||
189 | /* The only case when a vectype had been already set is for stmts | ||||||||
190 | that contain a data ref, or for "pattern-stmts" (stmts generated | ||||||||
191 | by the vectorizer to represent/replace a certain idiom). */ | ||||||||
192 | gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)) | ||||||||
193 | || vectype_maybe_set_p)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)) | ||||||||
194 | && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype)((void)(!((((stmt_info)->dr_aux.dr + 0) || vectype_maybe_set_p ) && (stmt_info)->vectype == stmt_vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 194, __FUNCTION__), 0 : 0)); | ||||||||
195 | else | ||||||||
196 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = stmt_vectype; | ||||||||
197 | } | ||||||||
198 | |||||||||
199 | if (nunits_vectype) | ||||||||
200 | vect_update_max_nunits (vf, nunits_vectype); | ||||||||
201 | |||||||||
202 | return opt_result::success (); | ||||||||
203 | } | ||||||||
204 | |||||||||
205 | /* Subroutine of vect_determine_vectorization_factor. Set the vector | ||||||||
206 | types of STMT_INFO and all attached pattern statements and update | ||||||||
207 | the vectorization factor VF accordingly. Return true on success | ||||||||
208 | or false if something prevented vectorization. */ | ||||||||
209 | |||||||||
210 | static opt_result | ||||||||
211 | vect_determine_vf_for_stmt (vec_info *vinfo, | ||||||||
212 | stmt_vec_info stmt_info, poly_uint64 *vf) | ||||||||
213 | { | ||||||||
214 | if (dump_enabled_p ()) | ||||||||
215 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", | ||||||||
216 | stmt_info->stmt); | ||||||||
217 | opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); | ||||||||
218 | if (!res) | ||||||||
219 | return res; | ||||||||
220 | |||||||||
221 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p | ||||||||
222 | && STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt) | ||||||||
223 | { | ||||||||
224 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; | ||||||||
225 | stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | ||||||||
226 | |||||||||
227 | /* If a pattern statement has def stmts, analyze them too. */ | ||||||||
228 | for (gimple_stmt_iterator si = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq)); | ||||||||
229 | !gsi_end_p (si); gsi_next (&si)) | ||||||||
230 | { | ||||||||
231 | stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); | ||||||||
232 | if (dump_enabled_p ()) | ||||||||
233 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
234 | "==> examining pattern def stmt: %G", | ||||||||
235 | def_stmt_info->stmt); | ||||||||
236 | res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); | ||||||||
237 | if (!res) | ||||||||
238 | return res; | ||||||||
239 | } | ||||||||
240 | |||||||||
241 | if (dump_enabled_p ()) | ||||||||
242 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
243 | "==> examining pattern statement: %G", | ||||||||
244 | stmt_info->stmt); | ||||||||
245 | res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); | ||||||||
246 | if (!res) | ||||||||
247 | return res; | ||||||||
248 | } | ||||||||
249 | |||||||||
250 | return opt_result::success (); | ||||||||
251 | } | ||||||||
252 | |||||||||
253 | /* Function vect_determine_vectorization_factor | ||||||||
254 | |||||||||
255 | Determine the vectorization factor (VF). VF is the number of data elements | ||||||||
256 | that are operated upon in parallel in a single iteration of the vectorized | ||||||||
257 | loop. For example, when vectorizing a loop that operates on 4byte elements, | ||||||||
258 | on a target with vector size (VS) 16byte, the VF is set to 4, since 4 | ||||||||
259 | elements can fit in a single vector register. | ||||||||
260 | |||||||||
261 | We currently support vectorization of loops in which all types operated upon | ||||||||
262 | are of the same size. Therefore this function currently sets VF according to | ||||||||
263 | the size of the types operated upon, and fails if there are multiple sizes | ||||||||
264 | in the loop. | ||||||||
265 | |||||||||
266 | VF is also the factor by which the loop iterations are strip-mined, e.g.: | ||||||||
267 | original loop: | ||||||||
268 | for (i=0; i<N; i++){ | ||||||||
269 | a[i] = b[i] + c[i]; | ||||||||
270 | } | ||||||||
271 | |||||||||
272 | vectorized loop: | ||||||||
273 | for (i=0; i<N; i+=VF){ | ||||||||
274 | a[i:VF] = b[i:VF] + c[i:VF]; | ||||||||
275 | } | ||||||||
276 | */ | ||||||||
277 | |||||||||
278 | static opt_result | ||||||||
279 | vect_determine_vectorization_factor (loop_vec_info loop_vinfo) | ||||||||
280 | { | ||||||||
281 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
282 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | ||||||||
283 | unsigned nbbs = loop->num_nodes; | ||||||||
284 | poly_uint64 vectorization_factor = 1; | ||||||||
285 | tree scalar_type = NULL_TREE(tree) nullptr; | ||||||||
286 | gphi *phi; | ||||||||
287 | tree vectype; | ||||||||
288 | stmt_vec_info stmt_info; | ||||||||
289 | unsigned i; | ||||||||
290 | |||||||||
291 | DUMP_VECT_SCOPE ("vect_determine_vectorization_factor")auto_dump_scope scope ("vect_determine_vectorization_factor", vect_location); | ||||||||
292 | |||||||||
293 | for (i = 0; i < nbbs; i++) | ||||||||
294 | { | ||||||||
295 | basic_block bb = bbs[i]; | ||||||||
296 | |||||||||
297 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | ||||||||
298 | gsi_next (&si)) | ||||||||
299 | { | ||||||||
300 | phi = si.phi (); | ||||||||
301 | stmt_info = loop_vinfo->lookup_stmt (phi); | ||||||||
302 | if (dump_enabled_p ()) | ||||||||
303 | dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", | ||||||||
304 | phi); | ||||||||
305 | |||||||||
306 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 306, __FUNCTION__), 0 : 0)); | ||||||||
307 | |||||||||
308 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | ||||||||
309 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | ||||||||
310 | { | ||||||||
311 | gcc_assert (!STMT_VINFO_VECTYPE (stmt_info))((void)(!(!(stmt_info)->vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 311, __FUNCTION__), 0 : 0)); | ||||||||
312 | scalar_type = TREE_TYPE (PHI_RESULT (phi))((contains_struct_check ((get_def_from_ptr (gimple_phi_result_ptr (phi))), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 312, __FUNCTION__))->typed.type); | ||||||||
313 | |||||||||
314 | if (dump_enabled_p ()) | ||||||||
315 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
316 | "get vectype for scalar type: %T\n", | ||||||||
317 | scalar_type); | ||||||||
318 | |||||||||
319 | vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); | ||||||||
320 | if (!vectype) | ||||||||
321 | return opt_result::failure_at (phi, | ||||||||
322 | "not vectorized: unsupported " | ||||||||
323 | "data-type %T\n", | ||||||||
324 | scalar_type); | ||||||||
325 | STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype = vectype; | ||||||||
326 | |||||||||
327 | if (dump_enabled_p ()) | ||||||||
328 | dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", | ||||||||
329 | vectype); | ||||||||
330 | |||||||||
331 | if (dump_enabled_p ()) | ||||||||
332 | { | ||||||||
333 | dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); | ||||||||
334 | dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); | ||||||||
335 | dump_printf (MSG_NOTE, "\n"); | ||||||||
336 | } | ||||||||
337 | |||||||||
338 | vect_update_max_nunits (&vectorization_factor, vectype); | ||||||||
339 | } | ||||||||
340 | } | ||||||||
341 | |||||||||
342 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | ||||||||
343 | gsi_next (&si)) | ||||||||
344 | { | ||||||||
345 | if (is_gimple_debug (gsi_stmt (si))) | ||||||||
346 | continue; | ||||||||
347 | stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | ||||||||
348 | opt_result res | ||||||||
349 | = vect_determine_vf_for_stmt (loop_vinfo, | ||||||||
350 | stmt_info, &vectorization_factor); | ||||||||
351 | if (!res) | ||||||||
352 | return res; | ||||||||
353 | } | ||||||||
354 | } | ||||||||
355 | |||||||||
356 | /* TODO: Analyze cost. Decide if worth while to vectorize. */ | ||||||||
357 | if (dump_enabled_p ()) | ||||||||
358 | { | ||||||||
359 | dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); | ||||||||
360 | dump_dec (MSG_NOTE, vectorization_factor); | ||||||||
361 | dump_printf (MSG_NOTE, "\n"); | ||||||||
362 | } | ||||||||
363 | |||||||||
364 | if (known_le (vectorization_factor, 1U)(!maybe_lt (1U, vectorization_factor))) | ||||||||
365 | return opt_result::failure_at (vect_location, | ||||||||
366 | "not vectorized: unsupported data-type\n"); | ||||||||
367 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; | ||||||||
368 | return opt_result::success (); | ||||||||
369 | } | ||||||||
370 | |||||||||
371 | |||||||||
372 | /* Function vect_is_simple_iv_evolution. | ||||||||
373 | |||||||||
374 | FORNOW: A simple evolution of an induction variables in the loop is | ||||||||
375 | considered a polynomial evolution. */ | ||||||||
376 | |||||||||
377 | static bool | ||||||||
378 | vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | ||||||||
379 | tree * step) | ||||||||
380 | { | ||||||||
381 | tree init_expr; | ||||||||
382 | tree step_expr; | ||||||||
383 | tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | ||||||||
384 | basic_block bb; | ||||||||
385 | |||||||||
386 | /* When there is no evolution in this loop, the evolution function | ||||||||
387 | is not "simple". */ | ||||||||
388 | if (evolution_part == NULL_TREE(tree) nullptr) | ||||||||
389 | return false; | ||||||||
390 | |||||||||
391 | /* When the evolution is a polynomial of degree >= 2 | ||||||||
392 | the evolution function is not "simple". */ | ||||||||
393 | if (tree_is_chrec (evolution_part)) | ||||||||
394 | return false; | ||||||||
395 | |||||||||
396 | step_expr = evolution_part; | ||||||||
397 | init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); | ||||||||
398 | |||||||||
399 | if (dump_enabled_p ()) | ||||||||
400 | dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", | ||||||||
401 | step_expr, init_expr); | ||||||||
402 | |||||||||
403 | *init = init_expr; | ||||||||
404 | *step = step_expr; | ||||||||
405 | |||||||||
406 | if (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != INTEGER_CST | ||||||||
407 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != SSA_NAME | ||||||||
408 | || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)(tree_check ((step_expr), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 408, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt)) | ||||||||
409 | && flow_bb_inside_loop_p (get_loop (cfun(cfun + 0), loop_nb), bb)) | ||||||||
410 | || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == ENUMERAL_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == BOOLEAN_TYPE || ((enum tree_code) (((contains_struct_check ((step_expr), ( TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 410, __FUNCTION__))->typed.type))->base.code) == INTEGER_TYPE ) | ||||||||
411 | && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))(((enum tree_code) (((contains_struct_check ((step_expr), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 411, __FUNCTION__))->typed.type))->base.code) == REAL_TYPE ) | ||||||||
412 | || !flag_associative_mathglobal_options.x_flag_associative_math))) | ||||||||
413 | && (TREE_CODE (step_expr)((enum tree_code) (step_expr)->base.code) != REAL_CST | ||||||||
414 | || !flag_associative_mathglobal_options.x_flag_associative_math)) | ||||||||
415 | { | ||||||||
416 | if (dump_enabled_p ()) | ||||||||
417 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
418 | "step unknown.\n"); | ||||||||
419 | return false; | ||||||||
420 | } | ||||||||
421 | |||||||||
422 | return true; | ||||||||
423 | } | ||||||||
424 | |||||||||
425 | /* Return true if PHI, described by STMT_INFO, is the inner PHI in | ||||||||
426 | what we are assuming is a double reduction. For example, given | ||||||||
427 | a structure like this: | ||||||||
428 | |||||||||
429 | outer1: | ||||||||
430 | x_1 = PHI <x_4(outer2), ...>; | ||||||||
431 | ... | ||||||||
432 | |||||||||
433 | inner: | ||||||||
434 | x_2 = PHI <x_1(outer1), ...>; | ||||||||
435 | ... | ||||||||
436 | x_3 = ...; | ||||||||
437 | ... | ||||||||
438 | |||||||||
439 | outer2: | ||||||||
440 | x_4 = PHI <x_3(inner)>; | ||||||||
441 | ... | ||||||||
442 | |||||||||
443 | outer loop analysis would treat x_1 as a double reduction phi and | ||||||||
444 | this function would then return true for x_2. */ | ||||||||
445 | |||||||||
446 | static bool | ||||||||
447 | vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi) | ||||||||
448 | { | ||||||||
449 | use_operand_p use_p; | ||||||||
450 | ssa_op_iter op_iter; | ||||||||
451 | FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)for ((use_p) = op_iter_init_phiuse (&(op_iter), phi, 0x01 ); !op_iter_done (&(op_iter)); (use_p) = op_iter_next_use (&(op_iter))) | ||||||||
452 | if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)get_use_from_ptr (use_p))) | ||||||||
453 | if (STMT_VINFO_DEF_TYPE (def_info)(def_info)->def_type == vect_double_reduction_def) | ||||||||
454 | return true; | ||||||||
455 | return false; | ||||||||
456 | } | ||||||||
457 | |||||||||
458 | /* Function vect_analyze_scalar_cycles_1. | ||||||||
459 | |||||||||
460 | Examine the cross iteration def-use cycles of scalar variables | ||||||||
461 | in LOOP. LOOP_VINFO represents the loop that is now being | ||||||||
462 | considered for vectorization (can be LOOP, or an outer-loop | ||||||||
463 | enclosing LOOP). */ | ||||||||
464 | |||||||||
465 | static void | ||||||||
466 | vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop) | ||||||||
467 | { | ||||||||
468 | basic_block bb = loop->header; | ||||||||
469 | tree init, step; | ||||||||
470 | auto_vec<stmt_vec_info, 64> worklist; | ||||||||
471 | gphi_iterator gsi; | ||||||||
472 | bool double_reduc, reduc_chain; | ||||||||
473 | |||||||||
474 | DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles")auto_dump_scope scope ("vect_analyze_scalar_cycles", vect_location ); | ||||||||
475 | |||||||||
476 | /* First - identify all inductions. Reduction detection assumes that all the | ||||||||
477 | inductions have been identified, therefore, this order must not be | ||||||||
478 | changed. */ | ||||||||
479 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | ||||||||
480 | { | ||||||||
481 | gphi *phi = gsi.phi (); | ||||||||
482 | tree access_fn = NULLnullptr; | ||||||||
483 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | ||||||||
484 | stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi); | ||||||||
485 | |||||||||
486 | if (dump_enabled_p ()) | ||||||||
487 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); | ||||||||
488 | |||||||||
489 | /* Skip virtual phi's. The data dependences that are associated with | ||||||||
490 | virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | ||||||||
491 | if (virtual_operand_p (def)) | ||||||||
492 | continue; | ||||||||
493 | |||||||||
494 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_unknown_def_type; | ||||||||
495 | |||||||||
496 | /* Analyze the evolution function. */ | ||||||||
497 | access_fn = analyze_scalar_evolution (loop, def); | ||||||||
498 | if (access_fn) | ||||||||
499 | { | ||||||||
500 | STRIP_NOPS (access_fn)(access_fn) = tree_strip_nop_conversions ((const_cast<union tree_node *> (((access_fn))))); | ||||||||
501 | if (dump_enabled_p ()) | ||||||||
502 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
503 | "Access function of PHI: %T\n", access_fn); | ||||||||
504 | STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_base_unchanged | ||||||||
505 | = initial_condition_in_loop_num (access_fn, loop->num); | ||||||||
506 | STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)(stmt_vinfo)->loop_phi_evolution_part | ||||||||
507 | = evolution_part_in_loop_num (access_fn, loop->num); | ||||||||
508 | } | ||||||||
509 | |||||||||
510 | if (!access_fn | ||||||||
511 | || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi) | ||||||||
512 | || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) | ||||||||
513 | || (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop != loop | ||||||||
514 | && TREE_CODE (step)((enum tree_code) (step)->base.code) != INTEGER_CST)) | ||||||||
515 | { | ||||||||
516 | worklist.safe_push (stmt_vinfo); | ||||||||
517 | continue; | ||||||||
518 | } | ||||||||
519 | |||||||||
520 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 521, __FUNCTION__), 0 : 0)) | ||||||||
521 | != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_base_unchanged != (tree) nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 521, __FUNCTION__), 0 : 0)); | ||||||||
522 | gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE)((void)(!((stmt_vinfo)->loop_phi_evolution_part != (tree) nullptr ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 522, __FUNCTION__), 0 : 0)); | ||||||||
523 | |||||||||
524 | if (dump_enabled_p ()) | ||||||||
525 | dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n"); | ||||||||
526 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_induction_def; | ||||||||
527 | } | ||||||||
528 | |||||||||
529 | |||||||||
530 | /* Second - identify all reductions and nested cycles. */ | ||||||||
531 | while (worklist.length () > 0) | ||||||||
532 | { | ||||||||
533 | stmt_vec_info stmt_vinfo = worklist.pop (); | ||||||||
534 | gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); | ||||||||
535 | tree def = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | ||||||||
536 | |||||||||
537 | if (dump_enabled_p ()) | ||||||||
538 | dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); | ||||||||
539 | |||||||||
540 | gcc_assert (!virtual_operand_p (def)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 541, __FUNCTION__), 0 : 0)) | ||||||||
541 | && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type)((void)(!(!virtual_operand_p (def) && (stmt_vinfo)-> def_type == vect_unknown_def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 541, __FUNCTION__), 0 : 0)); | ||||||||
542 | |||||||||
543 | stmt_vec_info reduc_stmt_info | ||||||||
544 | = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc, | ||||||||
545 | &reduc_chain); | ||||||||
546 | if (reduc_stmt_info) | ||||||||
547 | { | ||||||||
548 | STMT_VINFO_REDUC_DEF (stmt_vinfo)(stmt_vinfo)->reduc_def = reduc_stmt_info; | ||||||||
549 | STMT_VINFO_REDUC_DEF (reduc_stmt_info)(reduc_stmt_info)->reduc_def = stmt_vinfo; | ||||||||
550 | if (double_reduc) | ||||||||
551 | { | ||||||||
552 | if (dump_enabled_p ()) | ||||||||
553 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
554 | "Detected double reduction.\n"); | ||||||||
555 | |||||||||
556 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_double_reduction_def; | ||||||||
557 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_double_reduction_def; | ||||||||
558 | } | ||||||||
559 | else | ||||||||
560 | { | ||||||||
561 | if (loop != LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop) | ||||||||
562 | { | ||||||||
563 | if (dump_enabled_p ()) | ||||||||
564 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
565 | "Detected vectorizable nested cycle.\n"); | ||||||||
566 | |||||||||
567 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_nested_cycle; | ||||||||
568 | } | ||||||||
569 | else | ||||||||
570 | { | ||||||||
571 | if (dump_enabled_p ()) | ||||||||
572 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
573 | "Detected reduction.\n"); | ||||||||
574 | |||||||||
575 | STMT_VINFO_DEF_TYPE (stmt_vinfo)(stmt_vinfo)->def_type = vect_reduction_def; | ||||||||
576 | STMT_VINFO_DEF_TYPE (reduc_stmt_info)(reduc_stmt_info)->def_type = vect_reduction_def; | ||||||||
577 | /* Store the reduction cycles for possible vectorization in | ||||||||
578 | loop-aware SLP if it was not detected as reduction | ||||||||
579 | chain. */ | ||||||||
580 | if (! reduc_chain) | ||||||||
581 | LOOP_VINFO_REDUCTIONS (loop_vinfo)(loop_vinfo)->reductions.safe_push | ||||||||
582 | (reduc_stmt_info); | ||||||||
583 | } | ||||||||
584 | } | ||||||||
585 | } | ||||||||
586 | else | ||||||||
587 | if (dump_enabled_p ()) | ||||||||
588 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
589 | "Unknown def-use cycle pattern.\n"); | ||||||||
590 | } | ||||||||
591 | } | ||||||||
592 | |||||||||
593 | |||||||||
594 | /* Function vect_analyze_scalar_cycles. | ||||||||
595 | |||||||||
596 | Examine the cross iteration def-use cycles of scalar variables, by | ||||||||
597 | analyzing the loop-header PHIs of scalar variables. Classify each | ||||||||
598 | cycle as one of the following: invariant, induction, reduction, unknown. | ||||||||
599 | We do that for the loop represented by LOOP_VINFO, and also to its | ||||||||
600 | inner-loop, if exists. | ||||||||
601 | Examples for scalar cycles: | ||||||||
602 | |||||||||
603 | Example1: reduction: | ||||||||
604 | |||||||||
605 | loop1: | ||||||||
606 | for (i=0; i<N; i++) | ||||||||
607 | sum += a[i]; | ||||||||
608 | |||||||||
609 | Example2: induction: | ||||||||
610 | |||||||||
611 | loop2: | ||||||||
612 | for (i=0; i<N; i++) | ||||||||
613 | a[i] = i; */ | ||||||||
614 | |||||||||
615 | static void | ||||||||
616 | vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) | ||||||||
617 | { | ||||||||
618 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
619 | |||||||||
620 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop); | ||||||||
621 | |||||||||
622 | /* When vectorizing an outer-loop, the inner-loop is executed sequentially. | ||||||||
623 | Reductions in such inner-loop therefore have different properties than | ||||||||
624 | the reductions in the nest that gets vectorized: | ||||||||
625 | 1. When vectorized, they are executed in the same order as in the original | ||||||||
626 | scalar loop, so we can't change the order of computation when | ||||||||
627 | vectorizing them. | ||||||||
628 | 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the | ||||||||
629 | current checks are too strict. */ | ||||||||
630 | |||||||||
631 | if (loop->inner) | ||||||||
632 | vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); | ||||||||
633 | } | ||||||||
634 | |||||||||
635 | /* Transfer group and reduction information from STMT_INFO to its | ||||||||
636 | pattern stmt. */ | ||||||||
637 | |||||||||
638 | static void | ||||||||
639 | vect_fixup_reduc_chain (stmt_vec_info stmt_info) | ||||||||
640 | { | ||||||||
641 | stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | ||||||||
642 | stmt_vec_info stmtp; | ||||||||
643 | gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)) | ||||||||
644 | && REDUC_GROUP_FIRST_ELEMENT (stmt_info))((void)(!(!(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 643, __FUNCTION__), 0 : 0)), (firstp)->first_element) && (((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)), (stmt_info)->first_element) ) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 644, __FUNCTION__), 0 : 0)); | ||||||||
645 | REDUC_GROUP_SIZE (firstp)(((void)(!(!(firstp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 645, __FUNCTION__), 0 : 0)), (firstp)->size) = REDUC_GROUP_SIZE (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 645, __FUNCTION__), 0 : 0)), (stmt_info)->size); | ||||||||
646 | do | ||||||||
647 | { | ||||||||
648 | stmtp = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | ||||||||
649 | gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 650, __FUNCTION__), 0 : 0)) | ||||||||
650 | == STMT_VINFO_DEF_TYPE (stmt_info))((void)(!((stmtp)->def_type == (stmt_info)->def_type) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 650, __FUNCTION__), 0 : 0)); | ||||||||
651 | REDUC_GROUP_FIRST_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 651, __FUNCTION__), 0 : 0)), (stmtp)->first_element) = firstp; | ||||||||
652 | stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info)(((void)(!(!(stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 652, __FUNCTION__), 0 : 0)), (stmt_info)->next_element); | ||||||||
653 | if (stmt_info) | ||||||||
654 | REDUC_GROUP_NEXT_ELEMENT (stmtp)(((void)(!(!(stmtp)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 654, __FUNCTION__), 0 : 0)), (stmtp)->next_element) | ||||||||
655 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | ||||||||
656 | } | ||||||||
657 | while (stmt_info); | ||||||||
658 | } | ||||||||
659 | |||||||||
660 | /* Fixup scalar cycles that now have their stmts detected as patterns. */ | ||||||||
661 | |||||||||
662 | static void | ||||||||
663 | vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) | ||||||||
664 | { | ||||||||
665 | stmt_vec_info first; | ||||||||
666 | unsigned i; | ||||||||
667 | |||||||||
668 | FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)for (i = 0; ((loop_vinfo)->reduction_chains).iterate ((i), &(first)); ++(i)) | ||||||||
669 | { | ||||||||
670 | stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first)(((void)(!(!(first)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 670, __FUNCTION__), 0 : 0)), (first)->next_element); | ||||||||
671 | while (next) | ||||||||
672 | { | ||||||||
673 | if ((STMT_VINFO_IN_PATTERN_P (next)(next)->in_pattern_p | ||||||||
674 | != STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) | ||||||||
675 | || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next))(vect_stmt_to_vectorize (next))->reduc_idx == -1) | ||||||||
676 | break; | ||||||||
677 | next = REDUC_GROUP_NEXT_ELEMENT (next)(((void)(!(!(next)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 677, __FUNCTION__), 0 : 0)), (next)->next_element); | ||||||||
678 | } | ||||||||
679 | /* If all reduction chain members are well-formed patterns adjust | ||||||||
680 | the group to group the pattern stmts instead. */ | ||||||||
681 | if (! next | ||||||||
682 | && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->reduc_idx != -1) | ||||||||
683 | { | ||||||||
684 | if (STMT_VINFO_IN_PATTERN_P (first)(first)->in_pattern_p) | ||||||||
685 | { | ||||||||
686 | vect_fixup_reduc_chain (first); | ||||||||
687 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains[i] | ||||||||
688 | = STMT_VINFO_RELATED_STMT (first)(first)->related_stmt; | ||||||||
689 | } | ||||||||
690 | } | ||||||||
691 | /* If not all stmt in the chain are patterns or if we failed | ||||||||
692 | to update STMT_VINFO_REDUC_IDX dissolve the chain and handle | ||||||||
693 | it as regular reduction instead. */ | ||||||||
694 | else | ||||||||
695 | { | ||||||||
696 | stmt_vec_info vinfo = first; | ||||||||
697 | stmt_vec_info last = NULLnullptr; | ||||||||
698 | while (vinfo) | ||||||||
699 | { | ||||||||
700 | next = REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 700, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | ||||||||
701 | REDUC_GROUP_FIRST_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 701, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = NULLnullptr; | ||||||||
702 | REDUC_GROUP_NEXT_ELEMENT (vinfo)(((void)(!(!(vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 702, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; | ||||||||
703 | last = vinfo; | ||||||||
704 | vinfo = next; | ||||||||
705 | } | ||||||||
706 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))(vect_stmt_to_vectorize (first))->def_type | ||||||||
707 | = vect_internal_def; | ||||||||
708 | loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last)); | ||||||||
709 | LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.unordered_remove (i); | ||||||||
710 | --i; | ||||||||
711 | } | ||||||||
712 | } | ||||||||
713 | } | ||||||||
714 | |||||||||
715 | /* Function vect_get_loop_niters. | ||||||||
716 | |||||||||
717 | Determine how many iterations the loop is executed and place it | ||||||||
718 | in NUMBER_OF_ITERATIONS. Place the number of latch iterations | ||||||||
719 | in NUMBER_OF_ITERATIONSM1. Place the condition under which the | ||||||||
720 | niter information holds in ASSUMPTIONS. | ||||||||
721 | |||||||||
722 | Return the loop exit condition. */ | ||||||||
723 | |||||||||
724 | |||||||||
725 | static gcond * | ||||||||
726 | vect_get_loop_niters (class loop *loop, tree *assumptions, | ||||||||
727 | tree *number_of_iterations, tree *number_of_iterationsm1) | ||||||||
728 | { | ||||||||
729 | edge exit = single_exit (loop); | ||||||||
730 | class tree_niter_desc niter_desc; | ||||||||
731 | tree niter_assumptions, niter, may_be_zero; | ||||||||
732 | gcond *cond = get_loop_exit_condition (loop); | ||||||||
733 | |||||||||
734 | *assumptions = boolean_true_nodeglobal_trees[TI_BOOLEAN_TRUE]; | ||||||||
735 | *number_of_iterationsm1 = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; | ||||||||
736 | *number_of_iterations = chrec_dont_knowglobal_trees[TI_CHREC_DONT_KNOW]; | ||||||||
737 | DUMP_VECT_SCOPE ("get_loop_niters")auto_dump_scope scope ("get_loop_niters", vect_location); | ||||||||
738 | |||||||||
739 | if (!exit) | ||||||||
740 | return cond; | ||||||||
741 | |||||||||
742 | may_be_zero = NULL_TREE(tree) nullptr; | ||||||||
743 | if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULLnullptr) | ||||||||
744 | || chrec_contains_undetermined (niter_desc.niter)) | ||||||||
745 | return cond; | ||||||||
746 | |||||||||
747 | niter_assumptions = niter_desc.assumptions; | ||||||||
748 | may_be_zero = niter_desc.may_be_zero; | ||||||||
749 | niter = niter_desc.niter; | ||||||||
750 | |||||||||
751 | if (may_be_zero && integer_zerop (may_be_zero)) | ||||||||
752 | may_be_zero = NULL_TREE(tree) nullptr; | ||||||||
753 | |||||||||
754 | if (may_be_zero) | ||||||||
755 | { | ||||||||
756 | if (COMPARISON_CLASS_P (may_be_zero)(tree_code_type[(int) (((enum tree_code) (may_be_zero)->base .code))] == tcc_comparison)) | ||||||||
757 | { | ||||||||
758 | /* Try to combine may_be_zero with assumptions, this can simplify | ||||||||
759 | computation of niter expression. */ | ||||||||
760 | if (niter_assumptions && !integer_nonzerop (niter_assumptions)) | ||||||||
761 | niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | ||||||||
762 | niter_assumptions,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | ||||||||
763 | fold_build1 (TRUTH_NOT_EXPR,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | ||||||||
764 | boolean_type_node,fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ) | ||||||||
765 | may_be_zero))fold_build2_loc (((location_t) 0), TRUTH_AND_EXPR, global_trees [TI_BOOLEAN_TYPE], niter_assumptions, fold_build1_loc (((location_t ) 0), TRUTH_NOT_EXPR, global_trees[TI_BOOLEAN_TYPE], may_be_zero ) ); | ||||||||
766 | else | ||||||||
767 | niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) | ||||||||
768 | build_int_cst (TREE_TYPE (niter), 0),fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ) | ||||||||
769 | rewrite_to_non_trapping_overflow (niter))fold_build3_loc (((location_t) 0), COND_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 767, __FUNCTION__))->typed.type), may_be_zero, build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 768, __FUNCTION__))->typed.type), 0), rewrite_to_non_trapping_overflow (niter) ); | ||||||||
770 | |||||||||
771 | may_be_zero = NULL_TREE(tree) nullptr; | ||||||||
772 | } | ||||||||
773 | else if (integer_nonzerop (may_be_zero)) | ||||||||
774 | { | ||||||||
775 | *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 775, __FUNCTION__))->typed.type), 0); | ||||||||
776 | *number_of_iterations = build_int_cst (TREE_TYPE (niter)((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 776, __FUNCTION__))->typed.type), 1); | ||||||||
777 | return cond; | ||||||||
778 | } | ||||||||
779 | else | ||||||||
780 | return cond; | ||||||||
781 | } | ||||||||
782 | |||||||||
783 | *assumptions = niter_assumptions; | ||||||||
784 | *number_of_iterationsm1 = niter; | ||||||||
785 | |||||||||
786 | /* We want the number of loop header executions which is the number | ||||||||
787 | of latch executions plus one. | ||||||||
788 | ??? For UINT_MAX latch executions this number overflows to zero | ||||||||
789 | for loops like do { n++; } while (n != 0); */ | ||||||||
790 | if (niter && !chrec_contains_undetermined (niter)) | ||||||||
791 | niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 792, __FUNCTION__))->typed.type), 1) ) | ||||||||
792 | build_int_cst (TREE_TYPE (niter), 1))fold_build2_loc (((location_t) 0), PLUS_EXPR, ((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 791, __FUNCTION__))->typed.type), unshare_expr (niter), build_int_cst (((contains_struct_check ((niter), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 792, __FUNCTION__))->typed.type), 1) ); | ||||||||
793 | *number_of_iterations = niter; | ||||||||
794 | |||||||||
795 | return cond; | ||||||||
796 | } | ||||||||
797 | |||||||||
798 | /* Function bb_in_loop_p | ||||||||
799 | |||||||||
800 | Used as predicate for dfs order traversal of the loop bbs. */ | ||||||||
801 | |||||||||
802 | static bool | ||||||||
803 | bb_in_loop_p (const_basic_block bb, const void *data) | ||||||||
804 | { | ||||||||
805 | const class loop *const loop = (const class loop *)data; | ||||||||
806 | if (flow_bb_inside_loop_p (loop, bb)) | ||||||||
807 | return true; | ||||||||
808 | return false; | ||||||||
809 | } | ||||||||
810 | |||||||||
811 | |||||||||
812 | /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as | ||||||||
813 | stmt_vec_info structs for all the stmts in LOOP_IN. */ | ||||||||
814 | |||||||||
815 | _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) | ||||||||
816 | : vec_info (vec_info::loop, init_cost (loop_in), shared), | ||||||||
817 | loop (loop_in), | ||||||||
818 | bbs (XCNEWVEC (basic_block, loop->num_nodes)((basic_block *) xcalloc ((loop->num_nodes), sizeof (basic_block )))), | ||||||||
819 | num_itersm1 (NULL_TREE(tree) nullptr), | ||||||||
820 | num_iters (NULL_TREE(tree) nullptr), | ||||||||
821 | num_iters_unchanged (NULL_TREE(tree) nullptr), | ||||||||
822 | num_iters_assumptions (NULL_TREE(tree) nullptr), | ||||||||
823 | th (0), | ||||||||
824 | versioning_threshold (0), | ||||||||
825 | vectorization_factor (0), | ||||||||
826 | max_vectorization_factor (0), | ||||||||
827 | mask_skip_niters (NULL_TREE(tree) nullptr), | ||||||||
828 | rgroup_compare_type (NULL_TREE(tree) nullptr), | ||||||||
829 | simd_if_cond (NULL_TREE(tree) nullptr), | ||||||||
830 | unaligned_dr (NULLnullptr), | ||||||||
831 | peeling_for_alignment (0), | ||||||||
832 | ptr_mask (0), | ||||||||
833 | ivexpr_map (NULLnullptr), | ||||||||
834 | scan_map (NULLnullptr), | ||||||||
835 | slp_unrolling_factor (1), | ||||||||
836 | single_scalar_iteration_cost (0), | ||||||||
837 | vec_outside_cost (0), | ||||||||
838 | vec_inside_cost (0), | ||||||||
839 | vectorizable (false), | ||||||||
840 | can_use_partial_vectors_p (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage != 0), | ||||||||
841 | using_partial_vectors_p (false), | ||||||||
842 | epil_using_partial_vectors_p (false), | ||||||||
843 | peeling_for_gaps (false), | ||||||||
844 | peeling_for_niter (false), | ||||||||
845 | no_data_dependencies (false), | ||||||||
846 | has_mask_store (false), | ||||||||
847 | scalar_loop_scaling (profile_probability::uninitialized ()), | ||||||||
848 | scalar_loop (NULLnullptr), | ||||||||
849 | orig_loop_info (NULLnullptr) | ||||||||
850 | { | ||||||||
851 | /* CHECKME: We want to visit all BBs before their successors (except for | ||||||||
852 | latch blocks, for which this assertion wouldn't hold). In the simple | ||||||||
853 | case of the loop forms we allow, a dfs order of the BBs would the same | ||||||||
854 | as reversed postorder traversal, so we are safe. */ | ||||||||
855 | |||||||||
856 | unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, | ||||||||
857 | bbs, loop->num_nodes, loop); | ||||||||
858 | gcc_assert (nbbs == loop->num_nodes)((void)(!(nbbs == loop->num_nodes) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 858, __FUNCTION__), 0 : 0)); | ||||||||
859 | |||||||||
860 | for (unsigned int i = 0; i < nbbs; i++) | ||||||||
861 | { | ||||||||
862 | basic_block bb = bbs[i]; | ||||||||
863 | gimple_stmt_iterator si; | ||||||||
864 | |||||||||
865 | for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | ||||||||
866 | { | ||||||||
867 | gimple *phi = gsi_stmt (si); | ||||||||
868 | gimple_set_uid (phi, 0); | ||||||||
869 | add_stmt (phi); | ||||||||
870 | } | ||||||||
871 | |||||||||
872 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||||||||
873 | { | ||||||||
874 | gimple *stmt = gsi_stmt (si); | ||||||||
875 | gimple_set_uid (stmt, 0); | ||||||||
876 | if (is_gimple_debug (stmt)) | ||||||||
877 | continue; | ||||||||
878 | add_stmt (stmt); | ||||||||
879 | /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the | ||||||||
880 | third argument is the #pragma omp simd if (x) condition, when 0, | ||||||||
881 | loop shouldn't be vectorized, when non-zero constant, it should | ||||||||
882 | be vectorized normally, otherwise versioned with vectorized loop | ||||||||
883 | done if the condition is non-zero at runtime. */ | ||||||||
884 | if (loop_in->simduid | ||||||||
885 | && is_gimple_call (stmt) | ||||||||
886 | && gimple_call_internal_p (stmt) | ||||||||
887 | && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE | ||||||||
888 | && gimple_call_num_args (stmt) >= 3 | ||||||||
889 | && TREE_CODE (gimple_call_arg (stmt, 0))((enum tree_code) (gimple_call_arg (stmt, 0))->base.code) == SSA_NAME | ||||||||
890 | && (loop_in->simduid | ||||||||
891 | == SSA_NAME_VAR (gimple_call_arg (stmt, 0))((tree_check ((gimple_call_arg (stmt, 0)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 891, __FUNCTION__, (SSA_NAME)))->ssa_name.var == (tree) nullptr || ((enum tree_code) ((gimple_call_arg (stmt, 0))->ssa_name .var)->base.code) == IDENTIFIER_NODE ? (tree) nullptr : (gimple_call_arg (stmt, 0))->ssa_name.var))) | ||||||||
892 | { | ||||||||
893 | tree arg = gimple_call_arg (stmt, 2); | ||||||||
894 | if (integer_zerop (arg) || TREE_CODE (arg)((enum tree_code) (arg)->base.code) == SSA_NAME) | ||||||||
895 | simd_if_cond = arg; | ||||||||
896 | else | ||||||||
897 | gcc_assert (integer_nonzerop (arg))((void)(!(integer_nonzerop (arg)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 897, __FUNCTION__), 0 : 0)); | ||||||||
898 | } | ||||||||
899 | } | ||||||||
900 | } | ||||||||
901 | |||||||||
902 | epilogue_vinfos.create (6); | ||||||||
903 | } | ||||||||
904 | |||||||||
905 | /* Free all levels of rgroup CONTROLS. */ | ||||||||
906 | |||||||||
907 | void | ||||||||
908 | release_vec_loop_controls (vec<rgroup_controls> *controls) | ||||||||
909 | { | ||||||||
910 | rgroup_controls *rgc; | ||||||||
911 | unsigned int i; | ||||||||
912 | FOR_EACH_VEC_ELT (*controls, i, rgc)for (i = 0; (*controls).iterate ((i), &(rgc)); ++(i)) | ||||||||
913 | rgc->controls.release (); | ||||||||
914 | controls->release (); | ||||||||
915 | } | ||||||||
916 | |||||||||
917 | /* Free all memory used by the _loop_vec_info, as well as all the | ||||||||
918 | stmt_vec_info structs of all the stmts in the loop. */ | ||||||||
919 | |||||||||
920 | _loop_vec_info::~_loop_vec_info () | ||||||||
921 | { | ||||||||
922 | free (bbs); | ||||||||
923 | |||||||||
924 | release_vec_loop_controls (&masks); | ||||||||
925 | release_vec_loop_controls (&lens); | ||||||||
926 | delete ivexpr_map; | ||||||||
927 | delete scan_map; | ||||||||
928 | epilogue_vinfos.release (); | ||||||||
929 | |||||||||
930 | loop->aux = NULLnullptr; | ||||||||
931 | } | ||||||||
932 | |||||||||
933 | /* Return an invariant or register for EXPR and emit necessary | ||||||||
934 | computations in the LOOP_VINFO loop preheader. */ | ||||||||
935 | |||||||||
936 | tree | ||||||||
937 | cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) | ||||||||
938 | { | ||||||||
939 | if (is_gimple_reg (expr) | ||||||||
940 | || is_gimple_min_invariant (expr)) | ||||||||
941 | return expr; | ||||||||
942 | |||||||||
943 | if (! loop_vinfo->ivexpr_map) | ||||||||
944 | loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>; | ||||||||
945 | tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); | ||||||||
946 | if (! cached) | ||||||||
947 | { | ||||||||
948 | gimple_seq stmts = NULLnullptr; | ||||||||
949 | cached = force_gimple_operand (unshare_expr (expr), | ||||||||
950 | &stmts, true, NULL_TREE(tree) nullptr); | ||||||||
951 | if (stmts) | ||||||||
952 | { | ||||||||
953 | edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | ||||||||
954 | gsi_insert_seq_on_edge_immediate (e, stmts); | ||||||||
955 | } | ||||||||
956 | } | ||||||||
957 | return cached; | ||||||||
958 | } | ||||||||
959 | |||||||||
960 | /* Return true if we can use CMP_TYPE as the comparison type to produce | ||||||||
961 | all masks required to mask LOOP_VINFO. */ | ||||||||
962 | |||||||||
963 | static bool | ||||||||
964 | can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type) | ||||||||
965 | { | ||||||||
966 | rgroup_controls *rgm; | ||||||||
967 | unsigned int i; | ||||||||
968 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) | ||||||||
969 | if (rgm->type != NULL_TREE(tree) nullptr | ||||||||
970 | && !direct_internal_fn_supported_p (IFN_WHILE_ULT, | ||||||||
971 | cmp_type, rgm->type, | ||||||||
972 | OPTIMIZE_FOR_SPEED)) | ||||||||
973 | return false; | ||||||||
974 | return true; | ||||||||
975 | } | ||||||||
976 | |||||||||
977 | /* Calculate the maximum number of scalars per iteration for every | ||||||||
978 | rgroup in LOOP_VINFO. */ | ||||||||
979 | |||||||||
980 | static unsigned int | ||||||||
981 | vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) | ||||||||
982 | { | ||||||||
983 | unsigned int res = 1; | ||||||||
984 | unsigned int i; | ||||||||
985 | rgroup_controls *rgm; | ||||||||
986 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)for (i = 0; ((loop_vinfo)->masks).iterate ((i), &(rgm) ); ++(i)) | ||||||||
987 | res = MAX (res, rgm->max_nscalars_per_iter)((res) > (rgm->max_nscalars_per_iter) ? (res) : (rgm-> max_nscalars_per_iter)); | ||||||||
988 | return res; | ||||||||
989 | } | ||||||||
990 | |||||||||
991 | /* Calculate the minimum precision necessary to represent: | ||||||||
992 | |||||||||
993 | MAX_NITERS * FACTOR | ||||||||
994 | |||||||||
995 | as an unsigned integer, where MAX_NITERS is the maximum number of | ||||||||
996 | loop header iterations for the original scalar form of LOOP_VINFO. */ | ||||||||
997 | |||||||||
998 | static unsigned | ||||||||
999 | vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) | ||||||||
1000 | { | ||||||||
1001 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
1002 | |||||||||
1003 | /* Get the maximum number of iterations that is representable | ||||||||
1004 | in the counter type. */ | ||||||||
1005 | tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo))((contains_struct_check (((loop_vinfo)->num_itersm1), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1005, __FUNCTION__))->typed.type); | ||||||||
1006 | widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)((tree_check5 ((ni_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1006, __FUNCTION__, (INTEGER_TYPE), (ENUMERAL_TYPE), (BOOLEAN_TYPE ), (REAL_TYPE), (FIXED_POINT_TYPE)))->type_non_common.maxval )) + 1; | ||||||||
1007 | |||||||||
1008 | /* Get a more refined estimate for the number of iterations. */ | ||||||||
1009 | widest_int max_back_edges; | ||||||||
1010 | if (max_loop_iterations (loop, &max_back_edges)) | ||||||||
1011 | max_ni = wi::smin (max_ni, max_back_edges + 1); | ||||||||
1012 | |||||||||
1013 | /* Work out how many bits we need to represent the limit. */ | ||||||||
1014 | return wi::min_precision (max_ni * factor, UNSIGNED); | ||||||||
1015 | } | ||||||||
1016 | |||||||||
1017 | /* True if the loop needs peeling or partial vectors when vectorized. */ | ||||||||
1018 | |||||||||
1019 | static bool | ||||||||
1020 | vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo) | ||||||||
1021 | { | ||||||||
1022 | unsigned HOST_WIDE_INTlong const_vf; | ||||||||
1023 | HOST_WIDE_INTlong max_niter | ||||||||
1024 | = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | ||||||||
1025 | |||||||||
1026 | unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; | ||||||||
1027 | if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) | ||||||||
1028 | th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO((loop_vinfo)->orig_loop_info)->th | ||||||||
1029 | (loop_vinfo))((loop_vinfo)->orig_loop_info)->th; | ||||||||
1030 | |||||||||
1031 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | ||||||||
1032 | && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment >= 0) | ||||||||
1033 | { | ||||||||
1034 | /* Work out the (constant) number of iterations that need to be | ||||||||
1035 | peeled for reasons other than niters. */ | ||||||||
1036 | unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | ||||||||
1037 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) | ||||||||
1038 | peel_niter += 1; | ||||||||
1039 | if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1039, __FUNCTION__)))) - peel_niter, | ||||||||
1040 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) | ||||||||
1041 | return true; | ||||||||
1042 | } | ||||||||
1043 | else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | ||||||||
1044 | /* ??? When peeling for gaps but not alignment, we could | ||||||||
1045 | try to check whether the (variable) niters is known to be | ||||||||
1046 | VF * N + 1. That's something of a niche case though. */ | ||||||||
1047 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | ||||||||
1048 | || !LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor.is_constant (&const_vf) | ||||||||
1049 | || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters) | ||||||||
1050 | < (unsigned) exact_log2 (const_vf)) | ||||||||
1051 | /* In case of versioning, check if the maximum number of | ||||||||
1052 | iterations is greater than th. If they are identical, | ||||||||
1053 | the epilogue is unnecessary. */ | ||||||||
1054 | && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) | ||||||||
1055 | || ((unsigned HOST_WIDE_INTlong) max_niter | ||||||||
1056 | > (th / const_vf) * const_vf)))) | ||||||||
1057 | return true; | ||||||||
1058 | |||||||||
1059 | return false; | ||||||||
1060 | } | ||||||||
1061 | |||||||||
1062 | /* Each statement in LOOP_VINFO can be masked where necessary. Check | ||||||||
1063 | whether we can actually generate the masks required. Return true if so, | ||||||||
1064 | storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ | ||||||||
1065 | |||||||||
1066 | static bool | ||||||||
1067 | vect_verify_full_masking (loop_vec_info loop_vinfo) | ||||||||
1068 | { | ||||||||
1069 | unsigned int min_ni_width; | ||||||||
1070 | unsigned int max_nscalars_per_iter | ||||||||
1071 | = vect_get_max_nscalars_per_iter (loop_vinfo); | ||||||||
1072 | |||||||||
1073 | /* Use a normal loop if there are no statements that need masking. | ||||||||
1074 | This only happens in rare degenerate cases: it means that the loop | ||||||||
1075 | has no loads, no stores, and no live-out values. */ | ||||||||
1076 | if (LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty ()) | ||||||||
1077 | return false; | ||||||||
1078 | |||||||||
1079 | /* Work out how many bits we need to represent the limit. */ | ||||||||
1080 | min_ni_width | ||||||||
1081 | = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); | ||||||||
1082 | |||||||||
1083 | /* Find a scalar mode for which WHILE_ULT is supported. */ | ||||||||
1084 | opt_scalar_int_mode cmp_mode_iter; | ||||||||
1085 | tree cmp_type = NULL_TREE(tree) nullptr; | ||||||||
1086 | tree iv_type = NULL_TREE(tree) nullptr; | ||||||||
1087 | widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); | ||||||||
1088 | unsigned int iv_precision = UINT_MAX(2147483647 *2U +1U); | ||||||||
1089 | |||||||||
1090 | if (iv_limit != -1) | ||||||||
1091 | iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, | ||||||||
1092 | UNSIGNED); | ||||||||
1093 | |||||||||
1094 | FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)for (mode_iterator::start (&(cmp_mode_iter), MODE_INT); mode_iterator ::iterate_p (&(cmp_mode_iter)); mode_iterator::get_wider ( &(cmp_mode_iter))) | ||||||||
1095 | { | ||||||||
1096 | unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); | ||||||||
1097 | if (cmp_bits >= min_ni_width | ||||||||
1098 | && targetm.scalar_mode_supported_p (cmp_mode_iter.require ())) | ||||||||
1099 | { | ||||||||
1100 | tree this_type = build_nonstandard_integer_type (cmp_bits, true); | ||||||||
1101 | if (this_type | ||||||||
1102 | && can_produce_all_loop_masks_p (loop_vinfo, this_type)) | ||||||||
1103 | { | ||||||||
1104 | /* Although we could stop as soon as we find a valid mode, | ||||||||
1105 | there are at least two reasons why that's not always the | ||||||||
1106 | best choice: | ||||||||
1107 | |||||||||
1108 | - An IV that's Pmode or wider is more likely to be reusable | ||||||||
1109 | in address calculations than an IV that's narrower than | ||||||||
1110 | Pmode. | ||||||||
1111 | |||||||||
1112 | - Doing the comparison in IV_PRECISION or wider allows | ||||||||
1113 | a natural 0-based IV, whereas using a narrower comparison | ||||||||
1114 | type requires mitigations against wrap-around. | ||||||||
1115 | |||||||||
1116 | Conversely, if the IV limit is variable, doing the comparison | ||||||||
1117 | in a wider type than the original type can introduce | ||||||||
1118 | unnecessary extensions, so picking the widest valid mode | ||||||||
1119 | is not always a good choice either. | ||||||||
1120 | |||||||||
1121 | Here we prefer the first IV type that's Pmode or wider, | ||||||||
1122 | and the first comparison type that's IV_PRECISION or wider. | ||||||||
1123 | (The comparison type must be no wider than the IV type, | ||||||||
1124 | to avoid extensions in the vector loop.) | ||||||||
1125 | |||||||||
1126 | ??? We might want to try continuing beyond Pmode for ILP32 | ||||||||
1127 | targets if CMP_BITS < IV_PRECISION. */ | ||||||||
1128 | iv_type = this_type; | ||||||||
1129 | if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)((tree_class_check ((cmp_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1129, __FUNCTION__))->type_common.precision)) | ||||||||
1130 | cmp_type = this_type; | ||||||||
1131 | if (cmp_bits >= GET_MODE_BITSIZE (Pmode(global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ( (scalar_int_mode::from_int) E_DImode)) : (scalar_int_mode ((scalar_int_mode ::from_int) E_SImode))))) | ||||||||
1132 | break; | ||||||||
1133 | } | ||||||||
1134 | } | ||||||||
1135 | } | ||||||||
1136 | |||||||||
1137 | if (!cmp_type) | ||||||||
1138 | return false; | ||||||||
1139 | |||||||||
1140 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = cmp_type; | ||||||||
1141 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; | ||||||||
1142 | return true; | ||||||||
1143 | } | ||||||||
1144 | |||||||||
1145 | /* Check whether we can use vector access with length based on precison | ||||||||
1146 | comparison. So far, to keep it simple, we only allow the case that the | ||||||||
1147 | precision of the target supported length is larger than the precision | ||||||||
1148 | required by loop niters. */ | ||||||||
1149 | |||||||||
1150 | static bool | ||||||||
1151 | vect_verify_loop_lens (loop_vec_info loop_vinfo) | ||||||||
1152 | { | ||||||||
1153 | if (LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) | ||||||||
1154 | return false; | ||||||||
1155 | |||||||||
1156 | unsigned int max_nitems_per_iter = 1; | ||||||||
1157 | unsigned int i; | ||||||||
1158 | rgroup_controls *rgl; | ||||||||
1159 | /* Find the maximum number of items per iteration for every rgroup. */ | ||||||||
1160 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)for (i = 0; ((loop_vinfo)->lens).iterate ((i), &(rgl)) ; ++(i)) | ||||||||
1161 | { | ||||||||
1162 | unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor; | ||||||||
1163 | max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter)((max_nitems_per_iter) > (nitems_per_iter) ? (max_nitems_per_iter ) : (nitems_per_iter)); | ||||||||
1164 | } | ||||||||
1165 | |||||||||
1166 | /* Work out how many bits we need to represent the length limit. */ | ||||||||
1167 | unsigned int min_ni_prec | ||||||||
1168 | = vect_min_prec_for_max_niters (loop_vinfo, max_nitems_per_iter); | ||||||||
1169 | |||||||||
1170 | /* Now use the maximum of below precisions for one suitable IV type: | ||||||||
1171 | - the IV's natural precision | ||||||||
1172 | - the precision needed to hold: the maximum number of scalar | ||||||||
1173 | iterations multiplied by the scale factor (min_ni_prec above) | ||||||||
1174 | - the Pmode precision | ||||||||
1175 | |||||||||
1176 | If min_ni_prec is less than the precision of the current niters, | ||||||||
1177 | we perfer to still use the niters type. Prefer to use Pmode and | ||||||||
1178 | wider IV to avoid narrow conversions. */ | ||||||||
1179 | |||||||||
1180 | unsigned int ni_prec | ||||||||
1181 | = TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)))((tree_class_check ((((contains_struct_check (((loop_vinfo)-> num_iters), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1181, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1181, __FUNCTION__))->type_common.precision); | ||||||||
1182 | min_ni_prec = MAX (min_ni_prec, ni_prec)((min_ni_prec) > (ni_prec) ? (min_ni_prec) : (ni_prec)); | ||||||||
1183 | min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode))((min_ni_prec) > (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) ))) ? (min_ni_prec) : (GET_MODE_BITSIZE ((global_options.x_ix86_pmode == PMODE_DI ? (scalar_int_mode ((scalar_int_mode::from_int) E_DImode )) : (scalar_int_mode ((scalar_int_mode::from_int) E_SImode)) )))); | ||||||||
1184 | |||||||||
1185 | tree iv_type = NULL_TREE(tree) nullptr; | ||||||||
1186 | opt_scalar_int_mode tmode_iter; | ||||||||
1187 | FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)for (mode_iterator::start (&(tmode_iter), MODE_INT); mode_iterator ::iterate_p (&(tmode_iter)); mode_iterator::get_wider (& (tmode_iter))) | ||||||||
1188 | { | ||||||||
1189 | scalar_mode tmode = tmode_iter.require (); | ||||||||
1190 | unsigned int tbits = GET_MODE_BITSIZE (tmode); | ||||||||
1191 | |||||||||
1192 | /* ??? Do we really want to construct one IV whose precision exceeds | ||||||||
1193 | BITS_PER_WORD? */ | ||||||||
1194 | if (tbits > BITS_PER_WORD((8) * (((global_options.x_ix86_isa_flags & (1UL << 1)) != 0) ? 8 : 4))) | ||||||||
1195 | break; | ||||||||
1196 | |||||||||
1197 | /* Find the first available standard integral type. */ | ||||||||
1198 | if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode)) | ||||||||
1199 | { | ||||||||
1200 | iv_type = build_nonstandard_integer_type (tbits, true); | ||||||||
1201 | break; | ||||||||
1202 | } | ||||||||
1203 | } | ||||||||
1204 | |||||||||
1205 | if (!iv_type) | ||||||||
1206 | { | ||||||||
1207 | if (dump_enabled_p ()) | ||||||||
1208 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1209 | "can't vectorize with length-based partial vectors" | ||||||||
1210 | " because there is no suitable iv type.\n"); | ||||||||
1211 | return false; | ||||||||
1212 | } | ||||||||
1213 | |||||||||
1214 | LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo)(loop_vinfo)->rgroup_compare_type = iv_type; | ||||||||
1215 | LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)(loop_vinfo)->rgroup_iv_type = iv_type; | ||||||||
1216 | |||||||||
1217 | return true; | ||||||||
1218 | } | ||||||||
1219 | |||||||||
1220 | /* Calculate the cost of one scalar iteration of the loop. */ | ||||||||
1221 | static void | ||||||||
1222 | vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) | ||||||||
1223 | { | ||||||||
1224 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
1225 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | ||||||||
1226 | int nbbs = loop->num_nodes, factor; | ||||||||
1227 | int innerloop_iters, i; | ||||||||
1228 | |||||||||
1229 | DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost")auto_dump_scope scope ("vect_compute_single_scalar_iteration_cost" , vect_location); | ||||||||
1230 | |||||||||
1231 | /* Gather costs for statements in the scalar loop. */ | ||||||||
1232 | |||||||||
1233 | /* FORNOW. */ | ||||||||
1234 | innerloop_iters = 1; | ||||||||
1235 | if (loop->inner) | ||||||||
1236 | innerloop_iters = 50; /* FIXME */ | ||||||||
1237 | |||||||||
1238 | for (i = 0; i < nbbs; i++) | ||||||||
1239 | { | ||||||||
1240 | gimple_stmt_iterator si; | ||||||||
1241 | basic_block bb = bbs[i]; | ||||||||
1242 | |||||||||
1243 | if (bb->loop_father == loop->inner) | ||||||||
1244 | factor = innerloop_iters; | ||||||||
1245 | else | ||||||||
1246 | factor = 1; | ||||||||
1247 | |||||||||
1248 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||||||||
1249 | { | ||||||||
1250 | gimple *stmt = gsi_stmt (si); | ||||||||
1251 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); | ||||||||
1252 | |||||||||
1253 | if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) | ||||||||
1254 | continue; | ||||||||
1255 | |||||||||
1256 | /* Skip stmts that are not vectorized inside the loop. */ | ||||||||
1257 | stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info); | ||||||||
1258 | if (!STMT_VINFO_RELEVANT_P (vstmt_info)((vstmt_info)->relevant != vect_unused_in_scope) | ||||||||
1259 | && (!STMT_VINFO_LIVE_P (vstmt_info)(vstmt_info)->live | ||||||||
1260 | || !VECTORIZABLE_CYCLE_DEF((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)) | ||||||||
1261 | (STMT_VINFO_DEF_TYPE (vstmt_info))((((vstmt_info)->def_type) == vect_reduction_def) || (((vstmt_info )->def_type) == vect_double_reduction_def) || (((vstmt_info )->def_type) == vect_nested_cycle)))) | ||||||||
1262 | continue; | ||||||||
1263 | |||||||||
1264 | vect_cost_for_stmt kind; | ||||||||
1265 | if (STMT_VINFO_DATA_REF (stmt_info)((stmt_info)->dr_aux.dr + 0)) | ||||||||
1266 | { | ||||||||
1267 | if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))(((stmt_info)->dr_aux.dr + 0))->is_read) | ||||||||
1268 | kind = scalar_load; | ||||||||
1269 | else | ||||||||
1270 | kind = scalar_store; | ||||||||
1271 | } | ||||||||
1272 | else if (vect_nop_conversion_p (stmt_info)) | ||||||||
1273 | continue; | ||||||||
1274 | else | ||||||||
1275 | kind = scalar_stmt; | ||||||||
1276 | |||||||||
1277 | record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->scalar_cost_vec, | ||||||||
1278 | factor, kind, stmt_info, 0, vect_prologue); | ||||||||
1279 | } | ||||||||
1280 | } | ||||||||
1281 | |||||||||
1282 | /* Now accumulate cost. */ | ||||||||
1283 | void *target_cost_data = init_cost (loop); | ||||||||
1284 | stmt_info_for_cost *si; | ||||||||
1285 | int j; | ||||||||
1286 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | ||||||||
1287 | j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | ||||||||
1288 | (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count, | ||||||||
1289 | si->kind, si->stmt_info, si->vectype, | ||||||||
1290 | si->misalign, vect_body); | ||||||||
1291 | unsigned dummy, body_cost = 0; | ||||||||
1292 | finish_cost (target_cost_data, &dummy, &body_cost, &dummy); | ||||||||
1293 | destroy_cost_data (target_cost_data); | ||||||||
1294 | LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost = body_cost; | ||||||||
1295 | } | ||||||||
1296 | |||||||||
1297 | |||||||||
1298 | /* Function vect_analyze_loop_form_1. | ||||||||
1299 | |||||||||
1300 | Verify that certain CFG restrictions hold, including: | ||||||||
1301 | - the loop has a pre-header | ||||||||
1302 | - the loop has a single entry and exit | ||||||||
1303 | - the loop exit condition is simple enough | ||||||||
1304 | - the number of iterations can be analyzed, i.e, a countable loop. The | ||||||||
1305 | niter could be analyzed under some assumptions. */ | ||||||||
1306 | |||||||||
1307 | opt_result | ||||||||
1308 | vect_analyze_loop_form_1 (class loop *loop, gcond **loop_cond, | ||||||||
1309 | tree *assumptions, tree *number_of_iterationsm1, | ||||||||
1310 | tree *number_of_iterations, gcond **inner_loop_cond) | ||||||||
1311 | { | ||||||||
1312 | DUMP_VECT_SCOPE ("vect_analyze_loop_form")auto_dump_scope scope ("vect_analyze_loop_form", vect_location ); | ||||||||
1313 | |||||||||
1314 | /* Different restrictions apply when we are considering an inner-most loop, | ||||||||
1315 | vs. an outer (nested) loop. | ||||||||
1316 | (FORNOW. May want to relax some of these restrictions in the future). */ | ||||||||
1317 | |||||||||
1318 | if (!loop->inner) | ||||||||
1319 | { | ||||||||
1320 | /* Inner-most loop. We currently require that the number of BBs is | ||||||||
1321 | exactly 2 (the header and latch). Vectorizable inner-most loops | ||||||||
1322 | look like this: | ||||||||
1323 | |||||||||
1324 | (pre-header) | ||||||||
1325 | | | ||||||||
1326 | header <--------+ | ||||||||
1327 | | | | | ||||||||
1328 | | +--> latch --+ | ||||||||
1329 | | | ||||||||
1330 | (exit-bb) */ | ||||||||
1331 | |||||||||
1332 | if (loop->num_nodes != 2) | ||||||||
1333 | return opt_result::failure_at (vect_location, | ||||||||
1334 | "not vectorized:" | ||||||||
1335 | " control flow in loop.\n"); | ||||||||
1336 | |||||||||
1337 | if (empty_block_p (loop->header)) | ||||||||
1338 | return opt_result::failure_at (vect_location, | ||||||||
1339 | "not vectorized: empty loop.\n"); | ||||||||
1340 | } | ||||||||
1341 | else | ||||||||
1342 | { | ||||||||
1343 | class loop *innerloop = loop->inner; | ||||||||
1344 | edge entryedge; | ||||||||
1345 | |||||||||
1346 | /* Nested loop. We currently require that the loop is doubly-nested, | ||||||||
1347 | contains a single inner loop, and the number of BBs is exactly 5. | ||||||||
1348 | Vectorizable outer-loops look like this: | ||||||||
1349 | |||||||||
1350 | (pre-header) | ||||||||
1351 | | | ||||||||
1352 | header <---+ | ||||||||
1353 | | | | ||||||||
1354 | inner-loop | | ||||||||
1355 | | | | ||||||||
1356 | tail ------+ | ||||||||
1357 | | | ||||||||
1358 | (exit-bb) | ||||||||
1359 | |||||||||
1360 | The inner-loop has the properties expected of inner-most loops | ||||||||
1361 | as described above. */ | ||||||||
1362 | |||||||||
1363 | if ((loop->inner)->inner || (loop->inner)->next) | ||||||||
1364 | return opt_result::failure_at (vect_location, | ||||||||
1365 | "not vectorized:" | ||||||||
1366 | " multiple nested loops.\n"); | ||||||||
1367 | |||||||||
1368 | if (loop->num_nodes != 5) | ||||||||
1369 | return opt_result::failure_at (vect_location, | ||||||||
1370 | "not vectorized:" | ||||||||
1371 | " control flow in loop.\n"); | ||||||||
1372 | |||||||||
1373 | entryedge = loop_preheader_edge (innerloop); | ||||||||
1374 | if (entryedge->src != loop->header | ||||||||
1375 | || !single_exit (innerloop) | ||||||||
1376 | || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)(*(loop->latch)->preds)[(0)]->src) | ||||||||
1377 | return opt_result::failure_at (vect_location, | ||||||||
1378 | "not vectorized:" | ||||||||
1379 | " unsupported outerloop form.\n"); | ||||||||
1380 | |||||||||
1381 | /* Analyze the inner-loop. */ | ||||||||
1382 | tree inner_niterm1, inner_niter, inner_assumptions; | ||||||||
1383 | opt_result res | ||||||||
1384 | = vect_analyze_loop_form_1 (loop->inner, inner_loop_cond, | ||||||||
1385 | &inner_assumptions, &inner_niterm1, | ||||||||
1386 | &inner_niter, NULLnullptr); | ||||||||
1387 | if (!res) | ||||||||
1388 | { | ||||||||
1389 | if (dump_enabled_p ()) | ||||||||
1390 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1391 | "not vectorized: Bad inner loop.\n"); | ||||||||
1392 | return res; | ||||||||
1393 | } | ||||||||
1394 | |||||||||
1395 | /* Don't support analyzing niter under assumptions for inner | ||||||||
1396 | loop. */ | ||||||||
1397 | if (!integer_onep (inner_assumptions)) | ||||||||
1398 | return opt_result::failure_at (vect_location, | ||||||||
1399 | "not vectorized: Bad inner loop.\n"); | ||||||||
1400 | |||||||||
1401 | if (!expr_invariant_in_loop_p (loop, inner_niter)) | ||||||||
1402 | return opt_result::failure_at (vect_location, | ||||||||
1403 | "not vectorized: inner-loop count not" | ||||||||
1404 | " invariant.\n"); | ||||||||
1405 | |||||||||
1406 | if (dump_enabled_p ()) | ||||||||
1407 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1408 | "Considering outer-loop vectorization.\n"); | ||||||||
1409 | } | ||||||||
1410 | |||||||||
1411 | if (!single_exit (loop)) | ||||||||
1412 | return opt_result::failure_at (vect_location, | ||||||||
1413 | "not vectorized: multiple exits.\n"); | ||||||||
1414 | if (EDGE_COUNT (loop->header->preds)vec_safe_length (loop->header->preds) != 2) | ||||||||
1415 | return opt_result::failure_at (vect_location, | ||||||||
1416 | "not vectorized:" | ||||||||
1417 | " too many incoming edges.\n"); | ||||||||
1418 | |||||||||
1419 | /* We assume that the loop exit condition is at the end of the loop. i.e, | ||||||||
1420 | that the loop is represented as a do-while (with a proper if-guard | ||||||||
1421 | before the loop if needed), where the loop header contains all the | ||||||||
1422 | executable statements, and the latch is empty. */ | ||||||||
1423 | if (!empty_block_p (loop->latch) | ||||||||
1424 | || !gimple_seq_empty_p (phi_nodes (loop->latch))) | ||||||||
1425 | return opt_result::failure_at (vect_location, | ||||||||
1426 | "not vectorized: latch block not empty.\n"); | ||||||||
1427 | |||||||||
1428 | /* Make sure the exit is not abnormal. */ | ||||||||
1429 | edge e = single_exit (loop); | ||||||||
1430 | if (e->flags & EDGE_ABNORMAL) | ||||||||
1431 | return opt_result::failure_at (vect_location, | ||||||||
1432 | "not vectorized:" | ||||||||
1433 | " abnormal loop exit edge.\n"); | ||||||||
1434 | |||||||||
1435 | *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations, | ||||||||
1436 | number_of_iterationsm1); | ||||||||
1437 | if (!*loop_cond) | ||||||||
1438 | return opt_result::failure_at | ||||||||
1439 | (vect_location, | ||||||||
1440 | "not vectorized: complicated exit condition.\n"); | ||||||||
1441 | |||||||||
1442 | if (integer_zerop (*assumptions) | ||||||||
1443 | || !*number_of_iterations | ||||||||
1444 | || chrec_contains_undetermined (*number_of_iterations)) | ||||||||
1445 | return opt_result::failure_at | ||||||||
1446 | (*loop_cond, | ||||||||
1447 | "not vectorized: number of iterations cannot be computed.\n"); | ||||||||
1448 | |||||||||
1449 | if (integer_zerop (*number_of_iterations)) | ||||||||
1450 | return opt_result::failure_at | ||||||||
1451 | (*loop_cond, | ||||||||
1452 | "not vectorized: number of iterations = 0.\n"); | ||||||||
1453 | |||||||||
1454 | return opt_result::success (); | ||||||||
1455 | } | ||||||||
1456 | |||||||||
1457 | /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */ | ||||||||
1458 | |||||||||
1459 | opt_loop_vec_info | ||||||||
1460 | vect_analyze_loop_form (class loop *loop, vec_info_shared *shared) | ||||||||
1461 | { | ||||||||
1462 | tree assumptions, number_of_iterations, number_of_iterationsm1; | ||||||||
1463 | gcond *loop_cond, *inner_loop_cond = NULLnullptr; | ||||||||
1464 | |||||||||
1465 | opt_result res | ||||||||
1466 | = vect_analyze_loop_form_1 (loop, &loop_cond, | ||||||||
1467 | &assumptions, &number_of_iterationsm1, | ||||||||
1468 | &number_of_iterations, &inner_loop_cond); | ||||||||
1469 | if (!res) | ||||||||
1470 | return opt_loop_vec_info::propagate_failure (res); | ||||||||
1471 | |||||||||
1472 | loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); | ||||||||
1473 | LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1 = number_of_iterationsm1; | ||||||||
1474 | LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters = number_of_iterations; | ||||||||
1475 | LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo)(loop_vinfo)->num_iters_unchanged = number_of_iterations; | ||||||||
1476 | if (!integer_onep (assumptions)) | ||||||||
1477 | { | ||||||||
1478 | /* We consider to vectorize this loop by versioning it under | ||||||||
1479 | some assumptions. In order to do this, we need to clear | ||||||||
1480 | existing information computed by scev and niter analyzer. */ | ||||||||
1481 | scev_reset_htab (); | ||||||||
1482 | free_numbers_of_iterations_estimates (loop); | ||||||||
1483 | /* Also set flag for this loop so that following scev and niter | ||||||||
1484 | analysis are done under the assumptions. */ | ||||||||
1485 | loop_constraint_set (loop, LOOP_C_FINITE(1 << 1)); | ||||||||
1486 | /* Also record the assumptions for versioning. */ | ||||||||
1487 | LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo)(loop_vinfo)->num_iters_assumptions = assumptions; | ||||||||
1488 | } | ||||||||
1489 | |||||||||
1490 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | ||||||||
1491 | { | ||||||||
1492 | if (dump_enabled_p ()) | ||||||||
1493 | { | ||||||||
1494 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1495 | "Symbolic number of iterations is "); | ||||||||
1496 | dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations); | ||||||||
1497 | dump_printf (MSG_NOTE, "\n"); | ||||||||
1498 | } | ||||||||
1499 | } | ||||||||
1500 | |||||||||
1501 | stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (loop_cond); | ||||||||
1502 | STMT_VINFO_TYPE (loop_cond_info)(loop_cond_info)->type = loop_exit_ctrl_vec_info_type; | ||||||||
1503 | if (inner_loop_cond) | ||||||||
1504 | { | ||||||||
1505 | stmt_vec_info inner_loop_cond_info | ||||||||
1506 | = loop_vinfo->lookup_stmt (inner_loop_cond); | ||||||||
1507 | STMT_VINFO_TYPE (inner_loop_cond_info)(inner_loop_cond_info)->type = loop_exit_ctrl_vec_info_type; | ||||||||
1508 | } | ||||||||
1509 | |||||||||
1510 | gcc_assert (!loop->aux)((void)(!(!loop->aux) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1510, __FUNCTION__), 0 : 0)); | ||||||||
1511 | loop->aux = loop_vinfo; | ||||||||
1512 | return opt_loop_vec_info::success (loop_vinfo); | ||||||||
1513 | } | ||||||||
1514 | |||||||||
1515 | |||||||||
1516 | |||||||||
1517 | /* Scan the loop stmts and dependent on whether there are any (non-)SLP | ||||||||
1518 | statements update the vectorization factor. */ | ||||||||
1519 | |||||||||
1520 | static void | ||||||||
1521 | vect_update_vf_for_slp (loop_vec_info loop_vinfo) | ||||||||
1522 | { | ||||||||
1523 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
1524 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | ||||||||
1525 | int nbbs = loop->num_nodes; | ||||||||
1526 | poly_uint64 vectorization_factor; | ||||||||
1527 | int i; | ||||||||
1528 | |||||||||
1529 | DUMP_VECT_SCOPE ("vect_update_vf_for_slp")auto_dump_scope scope ("vect_update_vf_for_slp", vect_location ); | ||||||||
1530 | |||||||||
1531 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||||||
1532 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1532, __FUNCTION__), 0 : 0)); | ||||||||
1533 | |||||||||
1534 | /* If all the stmts in the loop can be SLPed, we perform only SLP, and | ||||||||
1535 | vectorization factor of the loop is the unrolling factor required by | ||||||||
1536 | the SLP instances. If that unrolling factor is 1, we say, that we | ||||||||
1537 | perform pure SLP on loop - cross iteration parallelism is not | ||||||||
1538 | exploited. */ | ||||||||
1539 | bool only_slp_in_loop = true; | ||||||||
1540 | for (i = 0; i < nbbs; i++) | ||||||||
1541 | { | ||||||||
1542 | basic_block bb = bbs[i]; | ||||||||
1543 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | ||||||||
1544 | gsi_next (&si)) | ||||||||
1545 | { | ||||||||
1546 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); | ||||||||
1547 | if (!stmt_info) | ||||||||
1548 | continue; | ||||||||
1549 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | ||||||||
1550 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | ||||||||
1551 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||||||
1552 | /* STMT needs both SLP and loop-based vectorization. */ | ||||||||
1553 | only_slp_in_loop = false; | ||||||||
1554 | } | ||||||||
1555 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | ||||||||
1556 | gsi_next (&si)) | ||||||||
1557 | { | ||||||||
1558 | if (is_gimple_debug (gsi_stmt (si))) | ||||||||
1559 | continue; | ||||||||
1560 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | ||||||||
1561 | stmt_info = vect_stmt_to_vectorize (stmt_info); | ||||||||
1562 | if ((STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope) | ||||||||
1563 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | ||||||||
1564 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||||||
1565 | /* STMT needs both SLP and loop-based vectorization. */ | ||||||||
1566 | only_slp_in_loop = false; | ||||||||
1567 | } | ||||||||
1568 | } | ||||||||
1569 | |||||||||
1570 | if (only_slp_in_loop) | ||||||||
1571 | { | ||||||||
1572 | if (dump_enabled_p ()) | ||||||||
1573 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1574 | "Loop contains only SLP stmts\n"); | ||||||||
1575 | vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor; | ||||||||
1576 | } | ||||||||
1577 | else | ||||||||
1578 | { | ||||||||
1579 | if (dump_enabled_p ()) | ||||||||
1580 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1581 | "Loop contains SLP and non-SLP stmts\n"); | ||||||||
1582 | /* Both the vectorization factor and unroll factor have the form | ||||||||
1583 | GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, | ||||||||
1584 | so they must have a common multiple. */ | ||||||||
1585 | vectorization_factor | ||||||||
1586 | = force_common_multiple (vectorization_factor, | ||||||||
1587 | LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)(loop_vinfo)->slp_unrolling_factor); | ||||||||
1588 | } | ||||||||
1589 | |||||||||
1590 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = vectorization_factor; | ||||||||
1591 | if (dump_enabled_p ()) | ||||||||
1592 | { | ||||||||
1593 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1594 | "Updating vectorization factor to "); | ||||||||
1595 | dump_dec (MSG_NOTE, vectorization_factor); | ||||||||
1596 | dump_printf (MSG_NOTE, ".\n"); | ||||||||
1597 | } | ||||||||
1598 | } | ||||||||
1599 | |||||||||
1600 | /* Return true if STMT_INFO describes a double reduction phi and if | ||||||||
1601 | the other phi in the reduction is also relevant for vectorization. | ||||||||
1602 | This rejects cases such as: | ||||||||
1603 | |||||||||
1604 | outer1: | ||||||||
1605 | x_1 = PHI <x_3(outer2), ...>; | ||||||||
1606 | ... | ||||||||
1607 | |||||||||
1608 | inner: | ||||||||
1609 | x_2 = ...; | ||||||||
1610 | ... | ||||||||
1611 | |||||||||
1612 | outer2: | ||||||||
1613 | x_3 = PHI <x_2(inner)>; | ||||||||
1614 | |||||||||
1615 | if nothing in x_2 or elsewhere makes x_1 relevant. */ | ||||||||
1616 | |||||||||
1617 | static bool | ||||||||
1618 | vect_active_double_reduction_p (stmt_vec_info stmt_info) | ||||||||
1619 | { | ||||||||
1620 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_double_reduction_def) | ||||||||
1621 | return false; | ||||||||
1622 | |||||||||
1623 | return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info))(((stmt_info)->reduc_def)->relevant != vect_unused_in_scope ); | ||||||||
1624 | } | ||||||||
1625 | |||||||||
1626 | /* Function vect_analyze_loop_operations. | ||||||||
1627 | |||||||||
1628 | Scan the loop stmts and make sure they are all vectorizable. */ | ||||||||
1629 | |||||||||
1630 | static opt_result | ||||||||
1631 | vect_analyze_loop_operations (loop_vec_info loop_vinfo) | ||||||||
1632 | { | ||||||||
1633 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
1634 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs; | ||||||||
1635 | int nbbs = loop->num_nodes; | ||||||||
1636 | int i; | ||||||||
1637 | stmt_vec_info stmt_info; | ||||||||
1638 | bool need_to_vectorize = false; | ||||||||
1639 | bool ok; | ||||||||
1640 | |||||||||
1641 | DUMP_VECT_SCOPE ("vect_analyze_loop_operations")auto_dump_scope scope ("vect_analyze_loop_operations", vect_location ); | ||||||||
1642 | |||||||||
1643 | auto_vec<stmt_info_for_cost> cost_vec; | ||||||||
1644 | |||||||||
1645 | for (i = 0; i < nbbs; i++) | ||||||||
1646 | { | ||||||||
1647 | basic_block bb = bbs[i]; | ||||||||
1648 | |||||||||
1649 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | ||||||||
1650 | gsi_next (&si)) | ||||||||
1651 | { | ||||||||
1652 | gphi *phi = si.phi (); | ||||||||
1653 | ok = true; | ||||||||
1654 | |||||||||
1655 | stmt_info = loop_vinfo->lookup_stmt (phi); | ||||||||
1656 | if (dump_enabled_p ()) | ||||||||
1657 | dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi); | ||||||||
1658 | if (virtual_operand_p (gimple_phi_result (phi))) | ||||||||
1659 | continue; | ||||||||
1660 | |||||||||
1661 | /* Inner-loop loop-closed exit phi in outer-loop vectorization | ||||||||
1662 | (i.e., a phi in the tail of the outer-loop). */ | ||||||||
1663 | if (! is_loop_header_bb_p (bb)) | ||||||||
1664 | { | ||||||||
1665 | /* FORNOW: we currently don't support the case that these phis | ||||||||
1666 | are not used in the outerloop (unless it is double reduction, | ||||||||
1667 | i.e., this phi is vect_reduction_def), cause this case | ||||||||
1668 | requires to actually do something here. */ | ||||||||
1669 | if (STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live | ||||||||
1670 | && !vect_active_double_reduction_p (stmt_info)) | ||||||||
1671 | return opt_result::failure_at (phi, | ||||||||
1672 | "Unsupported loop-closed phi" | ||||||||
1673 | " in outer-loop.\n"); | ||||||||
1674 | |||||||||
1675 | /* If PHI is used in the outer loop, we check that its operand | ||||||||
1676 | is defined in the inner loop. */ | ||||||||
1677 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) | ||||||||
1678 | { | ||||||||
1679 | tree phi_op; | ||||||||
1680 | |||||||||
1681 | if (gimple_phi_num_args (phi) != 1) | ||||||||
1682 | return opt_result::failure_at (phi, "unsupported phi"); | ||||||||
1683 | |||||||||
1684 | phi_op = PHI_ARG_DEF (phi, 0)gimple_phi_arg_def ((phi), (0)); | ||||||||
1685 | stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); | ||||||||
1686 | if (!op_def_info) | ||||||||
1687 | return opt_result::failure_at (phi, "unsupported phi\n"); | ||||||||
1688 | |||||||||
1689 | if (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant != vect_used_in_outer | ||||||||
1690 | && (STMT_VINFO_RELEVANT (op_def_info)(op_def_info)->relevant | ||||||||
1691 | != vect_used_in_outer_by_reduction)) | ||||||||
1692 | return opt_result::failure_at (phi, "unsupported phi\n"); | ||||||||
1693 | |||||||||
1694 | if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_internal_def | ||||||||
1695 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type | ||||||||
1696 | == vect_double_reduction_def)) | ||||||||
1697 | && !vectorizable_lc_phi (loop_vinfo, | ||||||||
1698 | stmt_info, NULLnullptr, NULLnullptr)) | ||||||||
1699 | return opt_result::failure_at (phi, "unsupported phi\n"); | ||||||||
1700 | } | ||||||||
1701 | |||||||||
1702 | continue; | ||||||||
1703 | } | ||||||||
1704 | |||||||||
1705 | gcc_assert (stmt_info)((void)(!(stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1705, __FUNCTION__), 0 : 0)); | ||||||||
1706 | |||||||||
1707 | if ((STMT_VINFO_RELEVANT (stmt_info)(stmt_info)->relevant == vect_used_in_scope | ||||||||
1708 | || STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live) | ||||||||
1709 | && STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type != vect_induction_def) | ||||||||
1710 | /* A scalar-dependence cycle that we don't support. */ | ||||||||
1711 | return opt_result::failure_at (phi, | ||||||||
1712 | "not vectorized:" | ||||||||
1713 | " scalar dependence cycle.\n"); | ||||||||
1714 | |||||||||
1715 | if (STMT_VINFO_RELEVANT_P (stmt_info)((stmt_info)->relevant != vect_unused_in_scope)) | ||||||||
1716 | { | ||||||||
1717 | need_to_vectorize = true; | ||||||||
1718 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_induction_def | ||||||||
1719 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||||||
1720 | ok = vectorizable_induction (loop_vinfo, | ||||||||
1721 | stmt_info, NULLnullptr, NULLnullptr, | ||||||||
1722 | &cost_vec); | ||||||||
1723 | else if ((STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def | ||||||||
1724 | || (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type | ||||||||
1725 | == vect_double_reduction_def) | ||||||||
1726 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) | ||||||||
1727 | && ! PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||||||
1728 | ok = vectorizable_reduction (loop_vinfo, | ||||||||
1729 | stmt_info, NULLnullptr, NULLnullptr, &cost_vec); | ||||||||
1730 | } | ||||||||
1731 | |||||||||
1732 | /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ | ||||||||
1733 | if (ok | ||||||||
1734 | && STMT_VINFO_LIVE_P (stmt_info)(stmt_info)->live | ||||||||
1735 | && !PURE_SLP_STMT (stmt_info)((stmt_info)->slp_type == pure_slp)) | ||||||||
1736 | ok = vectorizable_live_operation (loop_vinfo, | ||||||||
1737 | stmt_info, NULLnullptr, NULLnullptr, NULLnullptr, | ||||||||
1738 | -1, false, &cost_vec); | ||||||||
1739 | |||||||||
1740 | if (!ok) | ||||||||
1741 | return opt_result::failure_at (phi, | ||||||||
1742 | "not vectorized: relevant phi not " | ||||||||
1743 | "supported: %G", | ||||||||
1744 | static_cast <gimple *> (phi)); | ||||||||
1745 | } | ||||||||
1746 | |||||||||
1747 | for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | ||||||||
1748 | gsi_next (&si)) | ||||||||
1749 | { | ||||||||
1750 | gimple *stmt = gsi_stmt (si); | ||||||||
1751 | if (!gimple_clobber_p (stmt) | ||||||||
1752 | && !is_gimple_debug (stmt)) | ||||||||
1753 | { | ||||||||
1754 | opt_result res | ||||||||
1755 | = vect_analyze_stmt (loop_vinfo, | ||||||||
1756 | loop_vinfo->lookup_stmt (stmt), | ||||||||
1757 | &need_to_vectorize, | ||||||||
1758 | NULLnullptr, NULLnullptr, &cost_vec); | ||||||||
1759 | if (!res) | ||||||||
1760 | return res; | ||||||||
1761 | } | ||||||||
1762 | } | ||||||||
1763 | } /* bbs */ | ||||||||
1764 | |||||||||
1765 | add_stmt_costs (loop_vinfo, loop_vinfo->target_cost_data, &cost_vec); | ||||||||
1766 | |||||||||
1767 | /* All operations in the loop are either irrelevant (deal with loop | ||||||||
1768 | control, or dead), or only used outside the loop and can be moved | ||||||||
1769 | out of the loop (e.g. invariants, inductions). The loop can be | ||||||||
1770 | optimized away by scalar optimizations. We're better off not | ||||||||
1771 | touching this loop. */ | ||||||||
1772 | if (!need_to_vectorize) | ||||||||
1773 | { | ||||||||
1774 | if (dump_enabled_p ()) | ||||||||
1775 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1776 | "All the computation can be taken out of the loop.\n"); | ||||||||
1777 | return opt_result::failure_at | ||||||||
1778 | (vect_location, | ||||||||
1779 | "not vectorized: redundant loop. no profit to vectorize.\n"); | ||||||||
1780 | } | ||||||||
1781 | |||||||||
1782 | return opt_result::success (); | ||||||||
1783 | } | ||||||||
1784 | |||||||||
1785 | /* Return true if we know that the iteration count is smaller than the | ||||||||
1786 | vectorization factor. Return false if it isn't, or if we can't be sure | ||||||||
1787 | either way. */ | ||||||||
1788 | |||||||||
1789 | static bool | ||||||||
1790 | vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) | ||||||||
1791 | { | ||||||||
1792 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | ||||||||
1793 | |||||||||
1794 | HOST_WIDE_INTlong max_niter; | ||||||||
1795 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | ||||||||
1796 | max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1796, __FUNCTION__)))); | ||||||||
1797 | else | ||||||||
1798 | max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | ||||||||
1799 | |||||||||
1800 | if (max_niter != -1 && (unsigned HOST_WIDE_INTlong) max_niter < assumed_vf) | ||||||||
1801 | return true; | ||||||||
1802 | |||||||||
1803 | return false; | ||||||||
1804 | } | ||||||||
1805 | |||||||||
1806 | /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it | ||||||||
1807 | is worthwhile to vectorize. Return 1 if definitely yes, 0 if | ||||||||
1808 | definitely no, or -1 if it's worth retrying. */ | ||||||||
1809 | |||||||||
1810 | static int | ||||||||
1811 | vect_analyze_loop_costing (loop_vec_info loop_vinfo) | ||||||||
1812 | { | ||||||||
1813 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
1814 | unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | ||||||||
1815 | |||||||||
1816 | /* Only loops that can handle partially-populated vectors can have iteration | ||||||||
1817 | counts less than the vectorization factor. */ | ||||||||
1818 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
1819 | { | ||||||||
1820 | if (vect_known_niters_smaller_than_vf (loop_vinfo)) | ||||||||
1821 | { | ||||||||
1822 | if (dump_enabled_p ()) | ||||||||
1823 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1824 | "not vectorized: iteration count smaller than " | ||||||||
1825 | "vectorization factor.\n"); | ||||||||
1826 | return 0; | ||||||||
1827 | } | ||||||||
1828 | } | ||||||||
1829 | |||||||||
1830 | /* If using the "very cheap" model. reject cases in which we'd keep | ||||||||
1831 | a copy of the scalar code (even if we might be able to vectorize it). */ | ||||||||
1832 | if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP | ||||||||
1833 | && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | ||||||||
1834 | || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | ||||||||
1835 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter)) | ||||||||
1836 | { | ||||||||
1837 | if (dump_enabled_p ()) | ||||||||
1838 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1839 | "some scalar iterations would need to be peeled\n"); | ||||||||
1840 | return 0; | ||||||||
1841 | } | ||||||||
1842 | |||||||||
1843 | int min_profitable_iters, min_profitable_estimate; | ||||||||
1844 | vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, | ||||||||
1845 | &min_profitable_estimate); | ||||||||
1846 | |||||||||
1847 | if (min_profitable_iters < 0) | ||||||||
1848 | { | ||||||||
1849 | if (dump_enabled_p ()) | ||||||||
1850 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1851 | "not vectorized: vectorization not profitable.\n"); | ||||||||
1852 | if (dump_enabled_p ()) | ||||||||
1853 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1854 | "not vectorized: vector version will never be " | ||||||||
1855 | "profitable.\n"); | ||||||||
1856 | return -1; | ||||||||
1857 | } | ||||||||
1858 | |||||||||
1859 | int min_scalar_loop_bound = (param_min_vect_loop_boundglobal_options.x_param_min_vect_loop_bound | ||||||||
1860 | * assumed_vf); | ||||||||
1861 | |||||||||
1862 | /* Use the cost model only if it is more conservative than user specified | ||||||||
1863 | threshold. */ | ||||||||
1864 | unsigned int th = (unsigned) MAX (min_scalar_loop_bound,((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)) | ||||||||
1865 | min_profitable_iters)((min_scalar_loop_bound) > (min_profitable_iters) ? (min_scalar_loop_bound ) : (min_profitable_iters)); | ||||||||
1866 | |||||||||
1867 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = th; | ||||||||
1868 | |||||||||
1869 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | ||||||||
1870 | && LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 1870, __FUNCTION__)))) < th) | ||||||||
1871 | { | ||||||||
1872 | if (dump_enabled_p ()) | ||||||||
1873 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1874 | "not vectorized: vectorization not profitable.\n"); | ||||||||
1875 | if (dump_enabled_p ()) | ||||||||
1876 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1877 | "not vectorized: iteration count smaller than user " | ||||||||
1878 | "specified loop bound parameter or minimum profitable " | ||||||||
1879 | "iterations (whichever is more conservative).\n"); | ||||||||
1880 | return 0; | ||||||||
1881 | } | ||||||||
1882 | |||||||||
1883 | /* The static profitablity threshold min_profitable_estimate includes | ||||||||
1884 | the cost of having to check at runtime whether the scalar loop | ||||||||
1885 | should be used instead. If it turns out that we don't need or want | ||||||||
1886 | such a check, the threshold we should use for the static estimate | ||||||||
1887 | is simply the point at which the vector loop becomes more profitable | ||||||||
1888 | than the scalar loop. */ | ||||||||
1889 | if (min_profitable_estimate > min_profitable_iters | ||||||||
1890 | && !LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) | ||||||||
1891 | && !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter | ||||||||
1892 | && !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment | ||||||||
1893 | && !vect_apply_runtime_profitability_check_p (loop_vinfo)) | ||||||||
1894 | { | ||||||||
1895 | if (dump_enabled_p ()) | ||||||||
1896 | dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime" | ||||||||
1897 | " choice between the scalar and vector loops\n"); | ||||||||
1898 | min_profitable_estimate = min_profitable_iters; | ||||||||
1899 | } | ||||||||
1900 | |||||||||
1901 | /* If the vector loop needs multiple iterations to be beneficial then | ||||||||
1902 | things are probably too close to call, and the conservative thing | ||||||||
1903 | would be to stick with the scalar code. */ | ||||||||
1904 | if (flag_vect_cost_modelglobal_options.x_flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP | ||||||||
1905 | && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo)) | ||||||||
1906 | { | ||||||||
1907 | if (dump_enabled_p ()) | ||||||||
1908 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1909 | "one iteration of the vector loop would be" | ||||||||
1910 | " more expensive than the equivalent number of" | ||||||||
1911 | " iterations of the scalar loop\n"); | ||||||||
1912 | return 0; | ||||||||
1913 | } | ||||||||
1914 | |||||||||
1915 | HOST_WIDE_INTlong estimated_niter; | ||||||||
1916 | |||||||||
1917 | /* If we are vectorizing an epilogue then we know the maximum number of | ||||||||
1918 | scalar iterations it will cover is at least one lower than the | ||||||||
1919 | vectorization factor of the main loop. */ | ||||||||
1920 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | ||||||||
1921 | estimated_niter | ||||||||
1922 | = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info) - 1; | ||||||||
1923 | else | ||||||||
1924 | { | ||||||||
1925 | estimated_niter = estimated_stmt_executions_int (loop); | ||||||||
1926 | if (estimated_niter == -1) | ||||||||
1927 | estimated_niter = likely_max_stmt_executions_int (loop); | ||||||||
1928 | } | ||||||||
1929 | if (estimated_niter != -1 | ||||||||
1930 | && ((unsigned HOST_WIDE_INTlong) estimated_niter | ||||||||
1931 | < MAX (th, (unsigned) min_profitable_estimate)((th) > ((unsigned) min_profitable_estimate) ? (th) : ((unsigned ) min_profitable_estimate)))) | ||||||||
1932 | { | ||||||||
1933 | if (dump_enabled_p ()) | ||||||||
1934 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
1935 | "not vectorized: estimated iteration count too " | ||||||||
1936 | "small.\n"); | ||||||||
1937 | if (dump_enabled_p ()) | ||||||||
1938 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
1939 | "not vectorized: estimated iteration count smaller " | ||||||||
1940 | "than specified loop bound parameter or minimum " | ||||||||
1941 | "profitable iterations (whichever is more " | ||||||||
1942 | "conservative).\n"); | ||||||||
1943 | return -1; | ||||||||
1944 | } | ||||||||
1945 | |||||||||
1946 | return 1; | ||||||||
1947 | } | ||||||||
1948 | |||||||||
1949 | static opt_result | ||||||||
1950 | vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, | ||||||||
1951 | vec<data_reference_p> *datarefs, | ||||||||
1952 | unsigned int *n_stmts) | ||||||||
1953 | { | ||||||||
1954 | *n_stmts = 0; | ||||||||
1955 | for (unsigned i = 0; i < loop->num_nodes; i++) | ||||||||
1956 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); | ||||||||
1957 | !gsi_end_p (gsi); gsi_next (&gsi)) | ||||||||
1958 | { | ||||||||
1959 | gimple *stmt = gsi_stmt (gsi); | ||||||||
1960 | if (is_gimple_debug (stmt)) | ||||||||
1961 | continue; | ||||||||
1962 | ++(*n_stmts); | ||||||||
1963 | opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, | ||||||||
1964 | NULLnullptr, 0); | ||||||||
1965 | if (!res) | ||||||||
1966 | { | ||||||||
1967 | if (is_gimple_call (stmt) && loop->safelen) | ||||||||
1968 | { | ||||||||
1969 | tree fndecl = gimple_call_fndecl (stmt), op; | ||||||||
1970 | if (fndecl != NULL_TREE(tree) nullptr) | ||||||||
1971 | { | ||||||||
1972 | cgraph_node *node = cgraph_node::get (fndecl); | ||||||||
1973 | if (node != NULLnullptr && node->simd_clones != NULLnullptr) | ||||||||
1974 | { | ||||||||
1975 | unsigned int j, n = gimple_call_num_args (stmt); | ||||||||
1976 | for (j = 0; j < n; j++) | ||||||||
1977 | { | ||||||||
1978 | op = gimple_call_arg (stmt, j); | ||||||||
1979 | if (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_declaration) | ||||||||
1980 | || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_reference) | ||||||||
1981 | && get_base_address (op))) | ||||||||
1982 | break; | ||||||||
1983 | } | ||||||||
1984 | op = gimple_call_lhs (stmt); | ||||||||
1985 | /* Ignore #pragma omp declare simd functions | ||||||||
1986 | if they don't have data references in the | ||||||||
1987 | call stmt itself. */ | ||||||||
1988 | if (j == n | ||||||||
1989 | && !(op | ||||||||
1990 | && (DECL_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_declaration) | ||||||||
1991 | || (REFERENCE_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_reference) | ||||||||
1992 | && get_base_address (op))))) | ||||||||
1993 | continue; | ||||||||
1994 | } | ||||||||
1995 | } | ||||||||
1996 | } | ||||||||
1997 | return res; | ||||||||
1998 | } | ||||||||
1999 | /* If dependence analysis will give up due to the limit on the | ||||||||
2000 | number of datarefs stop here and fail fatally. */ | ||||||||
2001 | if (datarefs->length () | ||||||||
2002 | > (unsigned)param_loop_max_datarefs_for_datadepsglobal_options.x_param_loop_max_datarefs_for_datadeps) | ||||||||
2003 | return opt_result::failure_at (stmt, "exceeded param " | ||||||||
2004 | "loop-max-datarefs-for-datadeps\n"); | ||||||||
2005 | } | ||||||||
2006 | return opt_result::success (); | ||||||||
2007 | } | ||||||||
2008 | |||||||||
2009 | /* Look for SLP-only access groups and turn each individual access into its own | ||||||||
2010 | group. */ | ||||||||
2011 | static void | ||||||||
2012 | vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) | ||||||||
2013 | { | ||||||||
2014 | unsigned int i; | ||||||||
2015 | struct data_reference *dr; | ||||||||
2016 | |||||||||
2017 | DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups")auto_dump_scope scope ("vect_dissolve_slp_only_groups", vect_location ); | ||||||||
2018 | |||||||||
2019 | vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs; | ||||||||
2020 | FOR_EACH_VEC_ELT (datarefs, i, dr)for (i = 0; (datarefs).iterate ((i), &(dr)); ++(i)) | ||||||||
2021 | { | ||||||||
2022 | gcc_assert (DR_REF (dr))((void)(!((dr)->ref) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2022, __FUNCTION__), 0 : 0)); | ||||||||
2023 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr)(dr)->stmt); | ||||||||
2024 | |||||||||
2025 | /* Check if the load is a part of an interleaving chain. */ | ||||||||
2026 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)((stmt_info)->dr_aux.dr && (((void)(!((stmt_info)-> dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2026, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ))) | ||||||||
2027 | { | ||||||||
2028 | stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info)(((void)(!((stmt_info)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2028, __FUNCTION__), 0 : 0)), (stmt_info)->first_element ); | ||||||||
2029 | unsigned int group_size = DR_GROUP_SIZE (first_element)(((void)(!((first_element)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2029, __FUNCTION__), 0 : 0)), (first_element)->size); | ||||||||
2030 | |||||||||
2031 | /* Check if SLP-only groups. */ | ||||||||
2032 | if (!STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type | ||||||||
2033 | && STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p) | ||||||||
2034 | { | ||||||||
2035 | /* Dissolve the group. */ | ||||||||
2036 | STMT_VINFO_SLP_VECT_ONLY (first_element)(first_element)->slp_vect_only_p = false; | ||||||||
2037 | |||||||||
2038 | stmt_vec_info vinfo = first_element; | ||||||||
2039 | while (vinfo) | ||||||||
2040 | { | ||||||||
2041 | stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2041, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | ||||||||
2042 | DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2042, __FUNCTION__), 0 : 0)), (vinfo)->first_element) = vinfo; | ||||||||
2043 | DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2043, __FUNCTION__), 0 : 0)), (vinfo)->next_element) = NULLnullptr; | ||||||||
2044 | DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2044, __FUNCTION__), 0 : 0)), (vinfo)->size) = 1; | ||||||||
2045 | if (STMT_VINFO_STRIDED_P (first_element)(first_element)->strided_p) | ||||||||
2046 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2046, __FUNCTION__), 0 : 0)), (vinfo)->gap) = 0; | ||||||||
2047 | else | ||||||||
2048 | DR_GROUP_GAP (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2048, __FUNCTION__), 0 : 0)), (vinfo)->gap) = group_size - 1; | ||||||||
2049 | vinfo = next; | ||||||||
2050 | } | ||||||||
2051 | } | ||||||||
2052 | } | ||||||||
2053 | } | ||||||||
2054 | } | ||||||||
2055 | |||||||||
2056 | /* Determine if operating on full vectors for LOOP_VINFO might leave | ||||||||
2057 | some scalar iterations still to do. If so, decide how we should | ||||||||
2058 | handle those scalar iterations. The possibilities are: | ||||||||
2059 | |||||||||
2060 | (1) Make LOOP_VINFO operate on partial vectors instead of full vectors. | ||||||||
2061 | In this case: | ||||||||
2062 | |||||||||
2063 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == true | ||||||||
2064 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false | ||||||||
2065 | LOOP_VINFO_PEELING_FOR_NITER == false | ||||||||
2066 | |||||||||
2067 | (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop | ||||||||
2068 | to handle the remaining scalar iterations. In this case: | ||||||||
2069 | |||||||||
2070 | LOOP_VINFO_USING_PARTIAL_VECTORS_P == false | ||||||||
2071 | LOOP_VINFO_PEELING_FOR_NITER == true | ||||||||
2072 | |||||||||
2073 | There are two choices: | ||||||||
2074 | |||||||||
2075 | (2a) Consider vectorizing the epilogue loop at the same VF as the | ||||||||
2076 | main loop, but using partial vectors instead of full vectors. | ||||||||
2077 | In this case: | ||||||||
2078 | |||||||||
2079 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true | ||||||||
2080 | |||||||||
2081 | (2b) Consider vectorizing the epilogue loop at lower VFs only. | ||||||||
2082 | In this case: | ||||||||
2083 | |||||||||
2084 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false | ||||||||
2085 | |||||||||
2086 | When FOR_EPILOGUE_P is true, make this determination based on the | ||||||||
2087 | assumption that LOOP_VINFO is an epilogue loop, otherwise make it | ||||||||
2088 | based on the assumption that LOOP_VINFO is the main loop. The caller | ||||||||
2089 | has made sure that the number of iterations is set appropriately for | ||||||||
2090 | this value of FOR_EPILOGUE_P. */ | ||||||||
2091 | |||||||||
2092 | opt_result | ||||||||
2093 | vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, | ||||||||
2094 | bool for_epilogue_p) | ||||||||
2095 | { | ||||||||
2096 | /* Determine whether there would be any scalar iterations left over. */ | ||||||||
2097 | bool need_peeling_or_partial_vectors_p | ||||||||
2098 | = vect_need_peeling_or_partial_vectors_p (loop_vinfo); | ||||||||
2099 | |||||||||
2100 | /* Decide whether to vectorize the loop with partial vectors. */ | ||||||||
2101 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = false; | ||||||||
2102 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = false; | ||||||||
2103 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | ||||||||
2104 | && need_peeling_or_partial_vectors_p) | ||||||||
2105 | { | ||||||||
2106 | /* For partial-vector-usage=1, try to push the handling of partial | ||||||||
2107 | vectors to the epilogue, with the main loop continuing to operate | ||||||||
2108 | on full vectors. | ||||||||
2109 | |||||||||
2110 | ??? We could then end up failing to use partial vectors if we | ||||||||
2111 | decide to peel iterations into a prologue, and if the main loop | ||||||||
2112 | then ends up processing fewer than VF iterations. */ | ||||||||
2113 | if (param_vect_partial_vector_usageglobal_options.x_param_vect_partial_vector_usage == 1 | ||||||||
2114 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | ||||||||
2115 | && !vect_known_niters_smaller_than_vf (loop_vinfo)) | ||||||||
2116 | LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p = true; | ||||||||
2117 | else | ||||||||
2118 | LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p = true; | ||||||||
2119 | } | ||||||||
2120 | |||||||||
2121 | if (dump_enabled_p ()) | ||||||||
2122 | { | ||||||||
2123 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
2124 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2125 | "operating on partial vectors%s.\n", | ||||||||
2126 | for_epilogue_p ? " for epilogue loop" : ""); | ||||||||
2127 | else | ||||||||
2128 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2129 | "operating only on full vectors%s.\n", | ||||||||
2130 | for_epilogue_p ? " for epilogue loop" : ""); | ||||||||
2131 | } | ||||||||
2132 | |||||||||
2133 | if (for_epilogue_p) | ||||||||
2134 | { | ||||||||
2135 | loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | ||||||||
2136 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2136, __FUNCTION__), 0 : 0)); | ||||||||
2137 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
2138 | gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2139, __FUNCTION__), 0 : 0)) | ||||||||
2139 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))((void)(!((!maybe_le ((orig_loop_vinfo)->vectorization_factor , (loop_vinfo)->vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2139, __FUNCTION__), 0 : 0)); | ||||||||
2140 | } | ||||||||
2141 | |||||||||
2142 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | ||||||||
2143 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
2144 | { | ||||||||
2145 | /* Check that the loop processes at least one full vector. */ | ||||||||
2146 | poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||||||
2147 | tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo)(loop_vinfo)->num_iters; | ||||||||
2148 | if (known_lt (wi::to_widest (scalar_niters), vf)(!maybe_le (vf, wi::to_widest (scalar_niters)))) | ||||||||
2149 | return opt_result::failure_at (vect_location, | ||||||||
2150 | "loop does not have enough iterations" | ||||||||
2151 | " to support vectorization.\n"); | ||||||||
2152 | |||||||||
2153 | /* If we need to peel an extra epilogue iteration to handle data | ||||||||
2154 | accesses with gaps, check that there are enough scalar iterations | ||||||||
2155 | available. | ||||||||
2156 | |||||||||
2157 | The check above is redundant with this one when peeling for gaps, | ||||||||
2158 | but the distinction is useful for diagnostics. */ | ||||||||
2159 | tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo)(loop_vinfo)->num_itersm1; | ||||||||
2160 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | ||||||||
2161 | && known_lt (wi::to_widest (scalar_nitersm1), vf)(!maybe_le (vf, wi::to_widest (scalar_nitersm1)))) | ||||||||
2162 | return opt_result::failure_at (vect_location, | ||||||||
2163 | "loop does not have enough iterations" | ||||||||
2164 | " to support peeling for gaps.\n"); | ||||||||
2165 | } | ||||||||
2166 | |||||||||
2167 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter | ||||||||
2168 | = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p | ||||||||
2169 | && need_peeling_or_partial_vectors_p); | ||||||||
2170 | |||||||||
2171 | return opt_result::success (); | ||||||||
2172 | } | ||||||||
2173 | |||||||||
2174 | /* Function vect_analyze_loop_2. | ||||||||
2175 | |||||||||
2176 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | ||||||||
2177 | for it. The different analyses will record information in the | ||||||||
2178 | loop_vec_info struct. */ | ||||||||
2179 | static opt_result | ||||||||
2180 | vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) | ||||||||
2181 | { | ||||||||
2182 | opt_result ok = opt_result::success (); | ||||||||
2183 | int res; | ||||||||
2184 | unsigned int max_vf = MAX_VECTORIZATION_FACTOR2147483647; | ||||||||
2185 | poly_uint64 min_vf = 2; | ||||||||
2186 | loop_vec_info orig_loop_vinfo = NULLnullptr; | ||||||||
2187 | |||||||||
2188 | /* If we are dealing with an epilogue then orig_loop_vinfo points to the | ||||||||
2189 | loop_vec_info of the first vectorized loop. */ | ||||||||
2190 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | ||||||||
2191 | orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info; | ||||||||
2192 | else | ||||||||
2193 | orig_loop_vinfo = loop_vinfo; | ||||||||
2194 | gcc_assert (orig_loop_vinfo)((void)(!(orig_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2194, __FUNCTION__), 0 : 0)); | ||||||||
2195 | |||||||||
2196 | /* The first group of checks is independent of the vector size. */ | ||||||||
2197 | fatal = true; | ||||||||
2198 | |||||||||
2199 | if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond | ||||||||
2200 | && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)(loop_vinfo)->simd_if_cond)) | ||||||||
2201 | return opt_result::failure_at (vect_location, | ||||||||
2202 | "not vectorized: simd if(0)\n"); | ||||||||
2203 | |||||||||
2204 | /* Find all data references in the loop (which correspond to vdefs/vuses) | ||||||||
2205 | and analyze their evolution in the loop. */ | ||||||||
2206 | |||||||||
2207 | loop_p loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
2208 | |||||||||
2209 | /* Gather the data references and count stmts in the loop. */ | ||||||||
2210 | if (!LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs.exists ()) | ||||||||
2211 | { | ||||||||
2212 | opt_result res | ||||||||
2213 | = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs, | ||||||||
2214 | &LOOP_VINFO_DATAREFS (loop_vinfo)(loop_vinfo)->shared->datarefs, | ||||||||
2215 | n_stmts); | ||||||||
2216 | if (!res) | ||||||||
2217 | { | ||||||||
2218 | if (dump_enabled_p ()) | ||||||||
2219 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2220 | "not vectorized: loop contains function " | ||||||||
2221 | "calls or data references that cannot " | ||||||||
2222 | "be analyzed\n"); | ||||||||
2223 | return res; | ||||||||
2224 | } | ||||||||
2225 | loop_vinfo->shared->save_datarefs (); | ||||||||
2226 | } | ||||||||
2227 | else | ||||||||
2228 | loop_vinfo->shared->check_datarefs (); | ||||||||
2229 | |||||||||
2230 | /* Analyze the data references and also adjust the minimal | ||||||||
2231 | vectorization factor according to the loads and stores. */ | ||||||||
2232 | |||||||||
2233 | ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); | ||||||||
2234 | if (!ok) | ||||||||
2235 | { | ||||||||
2236 | if (dump_enabled_p ()) | ||||||||
2237 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2238 | "bad data references.\n"); | ||||||||
2239 | return ok; | ||||||||
2240 | } | ||||||||
2241 | |||||||||
2242 | /* Classify all cross-iteration scalar data-flow cycles. | ||||||||
2243 | Cross-iteration cycles caused by virtual phis are analyzed separately. */ | ||||||||
2244 | vect_analyze_scalar_cycles (loop_vinfo); | ||||||||
2245 | |||||||||
2246 | vect_pattern_recog (loop_vinfo); | ||||||||
2247 | |||||||||
2248 | vect_fixup_scalar_cycles_with_patterns (loop_vinfo); | ||||||||
2249 | |||||||||
2250 | /* Analyze the access patterns of the data-refs in the loop (consecutive, | ||||||||
2251 | complex, etc.). FORNOW: Only handle consecutive access pattern. */ | ||||||||
2252 | |||||||||
2253 | ok = vect_analyze_data_ref_accesses (loop_vinfo, NULLnullptr); | ||||||||
2254 | if (!ok) | ||||||||
2255 | { | ||||||||
2256 | if (dump_enabled_p ()) | ||||||||
2257 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2258 | "bad data access.\n"); | ||||||||
2259 | return ok; | ||||||||
2260 | } | ||||||||
2261 | |||||||||
2262 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | ||||||||
2263 | |||||||||
2264 | ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal); | ||||||||
2265 | if (!ok) | ||||||||
2266 | { | ||||||||
2267 | if (dump_enabled_p ()) | ||||||||
2268 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2269 | "unexpected pattern.\n"); | ||||||||
2270 | return ok; | ||||||||
2271 | } | ||||||||
2272 | |||||||||
2273 | /* While the rest of the analysis below depends on it in some way. */ | ||||||||
2274 | fatal = false; | ||||||||
2275 | |||||||||
2276 | /* Analyze data dependences between the data-refs in the loop | ||||||||
2277 | and adjust the maximum vectorization factor according to | ||||||||
2278 | the dependences. | ||||||||
2279 | FORNOW: fail at the first data dependence that we encounter. */ | ||||||||
2280 | |||||||||
2281 | ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); | ||||||||
2282 | if (!ok) | ||||||||
2283 | { | ||||||||
2284 | if (dump_enabled_p ()) | ||||||||
2285 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2286 | "bad data dependence.\n"); | ||||||||
2287 | return ok; | ||||||||
2288 | } | ||||||||
2289 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 | ||||||||
2290 | && maybe_lt (max_vf, min_vf)) | ||||||||
2291 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); | ||||||||
2292 | LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)(loop_vinfo)->max_vectorization_factor = max_vf; | ||||||||
2293 | |||||||||
2294 | ok = vect_determine_vectorization_factor (loop_vinfo); | ||||||||
2295 | if (!ok) | ||||||||
2296 | { | ||||||||
2297 | if (dump_enabled_p ()) | ||||||||
2298 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2299 | "can't determine vectorization factor.\n"); | ||||||||
2300 | return ok; | ||||||||
2301 | } | ||||||||
2302 | if (max_vf != MAX_VECTORIZATION_FACTOR2147483647 | ||||||||
2303 | && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor)) | ||||||||
2304 | return opt_result::failure_at (vect_location, "bad data dependence.\n"); | ||||||||
2305 | |||||||||
2306 | /* Compute the scalar iteration cost. */ | ||||||||
2307 | vect_compute_single_scalar_iteration_cost (loop_vinfo); | ||||||||
2308 | |||||||||
2309 | poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||||||
2310 | |||||||||
2311 | /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ | ||||||||
2312 | ok = vect_analyze_slp (loop_vinfo, *n_stmts); | ||||||||
2313 | if (!ok) | ||||||||
2314 | return ok; | ||||||||
2315 | |||||||||
2316 | /* If there are any SLP instances mark them as pure_slp. */ | ||||||||
2317 | bool slp = vect_make_slp_decision (loop_vinfo); | ||||||||
2318 | if (slp) | ||||||||
2319 | { | ||||||||
2320 | /* Find stmts that need to be both vectorized and SLPed. */ | ||||||||
2321 | vect_detect_hybrid_slp (loop_vinfo); | ||||||||
2322 | |||||||||
2323 | /* Update the vectorization factor based on the SLP decision. */ | ||||||||
2324 | vect_update_vf_for_slp (loop_vinfo); | ||||||||
2325 | |||||||||
2326 | /* Optimize the SLP graph with the vectorization factor fixed. */ | ||||||||
2327 | vect_optimize_slp (loop_vinfo); | ||||||||
2328 | |||||||||
2329 | /* Gather the loads reachable from the SLP graph entries. */ | ||||||||
2330 | vect_gather_slp_loads (loop_vinfo); | ||||||||
2331 | } | ||||||||
2332 | |||||||||
2333 | bool saved_can_use_partial_vectors_p | ||||||||
2334 | = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p; | ||||||||
2335 | |||||||||
2336 | /* We don't expect to have to roll back to anything other than an empty | ||||||||
2337 | set of rgroups. */ | ||||||||
2338 | gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())((void)(!((loop_vinfo)->masks.is_empty ()) ? fancy_abort ( "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2338, __FUNCTION__), 0 : 0)); | ||||||||
2339 | |||||||||
2340 | /* This is the point where we can re-start analysis with SLP forced off. */ | ||||||||
2341 | start_over: | ||||||||
2342 | |||||||||
2343 | /* Now the vectorization factor is final. */ | ||||||||
2344 | poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||||||
2345 | gcc_assert (known_ne (vectorization_factor, 0U))((void)(!((!maybe_eq (vectorization_factor, 0U))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2345, __FUNCTION__), 0 : 0)); | ||||||||
2346 | |||||||||
2347 | if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && dump_enabled_p ()) | ||||||||
2348 | { | ||||||||
2349 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2350 | "vectorization_factor = "); | ||||||||
2351 | dump_dec (MSG_NOTE, vectorization_factor); | ||||||||
2352 | dump_printf (MSG_NOTE, ", niters = %wd\n", | ||||||||
2353 | LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2353, __FUNCTION__))))); | ||||||||
2354 | } | ||||||||
2355 | |||||||||
2356 | /* Analyze the alignment of the data-refs in the loop. | ||||||||
2357 | Fail if a data reference is found that cannot be vectorized. */ | ||||||||
2358 | |||||||||
2359 | ok = vect_analyze_data_refs_alignment (loop_vinfo); | ||||||||
2360 | if (!ok) | ||||||||
2361 | { | ||||||||
2362 | if (dump_enabled_p ()) | ||||||||
2363 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2364 | "bad data alignment.\n"); | ||||||||
2365 | return ok; | ||||||||
2366 | } | ||||||||
2367 | |||||||||
2368 | /* Prune the list of ddrs to be tested at run-time by versioning for alias. | ||||||||
2369 | It is important to call pruning after vect_analyze_data_ref_accesses, | ||||||||
2370 | since we use grouping information gathered by interleaving analysis. */ | ||||||||
2371 | ok = vect_prune_runtime_alias_test_list (loop_vinfo); | ||||||||
2372 | if (!ok) | ||||||||
2373 | return ok; | ||||||||
2374 | |||||||||
2375 | /* Do not invoke vect_enhance_data_refs_alignment for epilogue | ||||||||
2376 | vectorization, since we do not want to add extra peeling or | ||||||||
2377 | add versioning for alignment. */ | ||||||||
2378 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | ||||||||
2379 | /* This pass will decide on using loop versioning and/or loop peeling in | ||||||||
2380 | order to enhance the alignment of data references in the loop. */ | ||||||||
2381 | ok = vect_enhance_data_refs_alignment (loop_vinfo); | ||||||||
2382 | if (!ok) | ||||||||
2383 | return ok; | ||||||||
2384 | |||||||||
2385 | if (slp) | ||||||||
2386 | { | ||||||||
2387 | /* Analyze operations in the SLP instances. Note this may | ||||||||
2388 | remove unsupported SLP instances which makes the above | ||||||||
2389 | SLP kind detection invalid. */ | ||||||||
2390 | unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length (); | ||||||||
2391 | vect_slp_analyze_operations (loop_vinfo); | ||||||||
2392 | if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.length () != old_size) | ||||||||
2393 | { | ||||||||
2394 | ok = opt_result::failure_at (vect_location, | ||||||||
2395 | "unsupported SLP instances\n"); | ||||||||
2396 | goto again; | ||||||||
2397 | } | ||||||||
2398 | |||||||||
2399 | /* Check whether any load in ALL SLP instances is possibly permuted. */ | ||||||||
2400 | slp_tree load_node, slp_root; | ||||||||
2401 | unsigned i, x; | ||||||||
2402 | slp_instance instance; | ||||||||
2403 | bool can_use_lanes = true; | ||||||||
2404 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)for (x = 0; ((loop_vinfo)->slp_instances).iterate ((x), & (instance)); ++(x)) | ||||||||
2405 | { | ||||||||
2406 | slp_root = SLP_INSTANCE_TREE (instance)(instance)->root; | ||||||||
2407 | int group_size = SLP_TREE_LANES (slp_root)(slp_root)->lanes; | ||||||||
2408 | tree vectype = SLP_TREE_VECTYPE (slp_root)(slp_root)->vectype; | ||||||||
2409 | bool loads_permuted = false; | ||||||||
2410 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) | ||||||||
2411 | { | ||||||||
2412 | if (!SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation.exists ()) | ||||||||
2413 | continue; | ||||||||
2414 | unsigned j; | ||||||||
2415 | stmt_vec_info load_info; | ||||||||
2416 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)for (j = 0; ((load_node)->stmts).iterate ((j), &(load_info )); ++(j)) | ||||||||
2417 | if (SLP_TREE_LOAD_PERMUTATION (load_node)(load_node)->load_permutation[j] != j) | ||||||||
2418 | { | ||||||||
2419 | loads_permuted = true; | ||||||||
2420 | break; | ||||||||
2421 | } | ||||||||
2422 | } | ||||||||
2423 | |||||||||
2424 | /* If the loads and stores can be handled with load/store-lane | ||||||||
2425 | instructions record it and move on to the next instance. */ | ||||||||
2426 | if (loads_permuted | ||||||||
2427 | && SLP_INSTANCE_KIND (instance)(instance)->kind == slp_inst_kind_store | ||||||||
2428 | && vect_store_lanes_supported (vectype, group_size, false)) | ||||||||
2429 | { | ||||||||
2430 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)for (i = 0; ((instance)->loads).iterate ((i), &(load_node )); ++(i)) | ||||||||
2431 | { | ||||||||
2432 | stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element) | ||||||||
2433 | (SLP_TREE_SCALAR_STMTS (load_node)[0])(((void)(!(((load_node)->stmts[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2433, __FUNCTION__), 0 : 0)), ((load_node)->stmts[0])-> first_element); | ||||||||
2434 | /* Use SLP for strided accesses (or if we can't | ||||||||
2435 | load-lanes). */ | ||||||||
2436 | if (STMT_VINFO_STRIDED_P (stmt_vinfo)(stmt_vinfo)->strided_p | ||||||||
2437 | || ! vect_load_lanes_supported | ||||||||
2438 | (STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype, | ||||||||
2439 | DR_GROUP_SIZE (stmt_vinfo)(((void)(!((stmt_vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2439, __FUNCTION__), 0 : 0)), (stmt_vinfo)->size), false)) | ||||||||
2440 | break; | ||||||||
2441 | } | ||||||||
2442 | |||||||||
2443 | can_use_lanes | ||||||||
2444 | = can_use_lanes && i == SLP_INSTANCE_LOADS (instance)(instance)->loads.length (); | ||||||||
2445 | |||||||||
2446 | if (can_use_lanes && dump_enabled_p ()) | ||||||||
2447 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2448 | "SLP instance %p can use load/store-lanes\n", | ||||||||
2449 | instance); | ||||||||
2450 | } | ||||||||
2451 | else | ||||||||
2452 | { | ||||||||
2453 | can_use_lanes = false; | ||||||||
2454 | break; | ||||||||
2455 | } | ||||||||
2456 | } | ||||||||
2457 | |||||||||
2458 | /* If all SLP instances can use load/store-lanes abort SLP and try again | ||||||||
2459 | with SLP disabled. */ | ||||||||
2460 | if (can_use_lanes) | ||||||||
2461 | { | ||||||||
2462 | ok = opt_result::failure_at (vect_location, | ||||||||
2463 | "Built SLP cancelled: can use " | ||||||||
2464 | "load/store-lanes\n"); | ||||||||
2465 | if (dump_enabled_p ()) | ||||||||
2466 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2467 | "Built SLP cancelled: all SLP instances support " | ||||||||
2468 | "load/store-lanes\n"); | ||||||||
2469 | goto again; | ||||||||
2470 | } | ||||||||
2471 | } | ||||||||
2472 | |||||||||
2473 | /* Dissolve SLP-only groups. */ | ||||||||
2474 | vect_dissolve_slp_only_groups (loop_vinfo); | ||||||||
2475 | |||||||||
2476 | /* Scan all the remaining operations in the loop that are not subject | ||||||||
2477 | to SLP and make sure they are vectorizable. */ | ||||||||
2478 | ok = vect_analyze_loop_operations (loop_vinfo); | ||||||||
2479 | if (!ok) | ||||||||
2480 | { | ||||||||
2481 | if (dump_enabled_p ()) | ||||||||
2482 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2483 | "bad operation or unsupported loop bound.\n"); | ||||||||
2484 | return ok; | ||||||||
2485 | } | ||||||||
2486 | |||||||||
2487 | /* For now, we don't expect to mix both masking and length approaches for one | ||||||||
2488 | loop, disable it if both are recorded. */ | ||||||||
2489 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | ||||||||
2490 | && !LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks.is_empty () | ||||||||
2491 | && !LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens.is_empty ()) | ||||||||
2492 | { | ||||||||
2493 | if (dump_enabled_p ()) | ||||||||
2494 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2495 | "can't vectorize a loop with partial vectors" | ||||||||
2496 | " because we don't expect to mix different" | ||||||||
2497 | " approaches with partial vectors for the" | ||||||||
2498 | " same loop.\n"); | ||||||||
2499 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; | ||||||||
2500 | } | ||||||||
2501 | |||||||||
2502 | /* If we still have the option of using partial vectors, | ||||||||
2503 | check whether we can generate the necessary loop controls. */ | ||||||||
2504 | if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | ||||||||
2505 | && !vect_verify_full_masking (loop_vinfo) | ||||||||
2506 | && !vect_verify_loop_lens (loop_vinfo)) | ||||||||
2507 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p = false; | ||||||||
2508 | |||||||||
2509 | /* If we're vectorizing an epilogue loop, the vectorized loop either needs | ||||||||
2510 | to be able to handle fewer than VF scalars, or needs to have a lower VF | ||||||||
2511 | than the main loop. */ | ||||||||
2512 | if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | ||||||||
2513 | && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | ||||||||
2514 | && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor) | ||||||||
2515 | LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))maybe_le ((orig_loop_vinfo)->vectorization_factor, (loop_vinfo )->vectorization_factor)) | ||||||||
2516 | return opt_result::failure_at (vect_location, | ||||||||
2517 | "Vectorization factor too high for" | ||||||||
2518 | " epilogue loop.\n"); | ||||||||
2519 | |||||||||
2520 | /* Decide whether this loop_vinfo should use partial vectors or peeling, | ||||||||
2521 | assuming that the loop will be used as a main loop. We will redo | ||||||||
2522 | this analysis later if we instead decide to use the loop as an | ||||||||
2523 | epilogue loop. */ | ||||||||
2524 | ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); | ||||||||
2525 | if (!ok) | ||||||||
2526 | return ok; | ||||||||
2527 | |||||||||
2528 | /* Check the costings of the loop make vectorizing worthwhile. */ | ||||||||
2529 | res = vect_analyze_loop_costing (loop_vinfo); | ||||||||
2530 | if (res < 0) | ||||||||
2531 | { | ||||||||
2532 | ok = opt_result::failure_at (vect_location, | ||||||||
2533 | "Loop costings may not be worthwhile.\n"); | ||||||||
2534 | goto again; | ||||||||
2535 | } | ||||||||
2536 | if (!res) | ||||||||
2537 | return opt_result::failure_at (vect_location, | ||||||||
2538 | "Loop costings not worthwhile.\n"); | ||||||||
2539 | |||||||||
2540 | /* If an epilogue loop is required make sure we can create one. */ | ||||||||
2541 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps | ||||||||
2542 | || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter) | ||||||||
2543 | { | ||||||||
2544 | if (dump_enabled_p ()) | ||||||||
2545 | dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); | ||||||||
2546 | if (!vect_can_advance_ivs_p (loop_vinfo) | ||||||||
2547 | || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, | ||||||||
2548 | single_exit (LOOP_VINFO_LOOP(loop_vinfo)->loop | ||||||||
2549 | (loop_vinfo)(loop_vinfo)->loop))) | ||||||||
2550 | { | ||||||||
2551 | ok = opt_result::failure_at (vect_location, | ||||||||
2552 | "not vectorized: can't create required " | ||||||||
2553 | "epilog loop\n"); | ||||||||
2554 | goto again; | ||||||||
2555 | } | ||||||||
2556 | } | ||||||||
2557 | |||||||||
2558 | /* During peeling, we need to check if number of loop iterations is | ||||||||
2559 | enough for both peeled prolog loop and vector loop. This check | ||||||||
2560 | can be merged along with threshold check of loop versioning, so | ||||||||
2561 | increase threshold for this case if necessary. | ||||||||
2562 | |||||||||
2563 | If we are analyzing an epilogue we still want to check what its | ||||||||
2564 | versioning threshold would be. If we decide to vectorize the epilogues we | ||||||||
2565 | will want to use the lowest versioning threshold of all epilogues and main | ||||||||
2566 | loop. This will enable us to enter a vectorized epilogue even when | ||||||||
2567 | versioning the loop. We can't simply check whether the epilogue requires | ||||||||
2568 | versioning though since we may have skipped some versioning checks when | ||||||||
2569 | analyzing the epilogue. For instance, checks for alias versioning will be | ||||||||
2570 | skipped when dealing with epilogues as we assume we already checked them | ||||||||
2571 | for the main loop. So instead we always check the 'orig_loop_vinfo'. */ | ||||||||
2572 | if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo)(((orig_loop_vinfo)->may_misalign_stmts.length () > 0) || ((orig_loop_vinfo)->comp_alias_ddrs.length () > 0 || ( orig_loop_vinfo)->check_unequal_addrs.length () > 0 || ( orig_loop_vinfo)->lower_bounds.length () > 0) || ((orig_loop_vinfo )->num_iters_assumptions) || ((orig_loop_vinfo)->simd_if_cond ))) | ||||||||
2573 | { | ||||||||
2574 | poly_uint64 niters_th = 0; | ||||||||
2575 | unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th; | ||||||||
2576 | |||||||||
2577 | if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) | ||||||||
2578 | { | ||||||||
2579 | /* Niters for peeled prolog loop. */ | ||||||||
2580 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) | ||||||||
2581 | { | ||||||||
2582 | dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo)(loop_vinfo)->unaligned_dr; | ||||||||
2583 | tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt)(dr_info->stmt)->vectype; | ||||||||
2584 | niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1; | ||||||||
2585 | } | ||||||||
2586 | else | ||||||||
2587 | niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | ||||||||
2588 | } | ||||||||
2589 | |||||||||
2590 | /* Niters for at least one iteration of vectorized loop. */ | ||||||||
2591 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
2592 | niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor; | ||||||||
2593 | /* One additional iteration because of peeling for gap. */ | ||||||||
2594 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps) | ||||||||
2595 | niters_th += 1; | ||||||||
2596 | |||||||||
2597 | /* Use the same condition as vect_transform_loop to decide when to use | ||||||||
2598 | the cost to determine a versioning threshold. */ | ||||||||
2599 | if (vect_apply_runtime_profitability_check_p (loop_vinfo) | ||||||||
2600 | && ordered_p (th, niters_th)) | ||||||||
2601 | niters_th = ordered_max (poly_uint64 (th), niters_th); | ||||||||
2602 | |||||||||
2603 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = niters_th; | ||||||||
2604 | } | ||||||||
2605 | |||||||||
2606 | gcc_assert (known_eq (vectorization_factor,((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2607, __FUNCTION__), 0 : 0)) | ||||||||
2607 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)))((void)(!((!maybe_ne (vectorization_factor, (loop_vinfo)-> vectorization_factor))) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2607, __FUNCTION__), 0 : 0)); | ||||||||
2608 | |||||||||
2609 | /* Ok to vectorize! */ | ||||||||
2610 | return opt_result::success (); | ||||||||
2611 | |||||||||
2612 | again: | ||||||||
2613 | /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */ | ||||||||
2614 | gcc_assert (!ok)((void)(!(!ok) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2614, __FUNCTION__), 0 : 0)); | ||||||||
2615 | |||||||||
2616 | /* Try again with SLP forced off but if we didn't do any SLP there is | ||||||||
2617 | no point in re-trying. */ | ||||||||
2618 | if (!slp) | ||||||||
2619 | return ok; | ||||||||
2620 | |||||||||
2621 | /* If there are reduction chains re-trying will fail anyway. */ | ||||||||
2622 | if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)(loop_vinfo)->reduction_chains.is_empty ()) | ||||||||
2623 | return ok; | ||||||||
2624 | |||||||||
2625 | /* Likewise if the grouped loads or stores in the SLP cannot be handled | ||||||||
2626 | via interleaving or lane instructions. */ | ||||||||
2627 | slp_instance instance; | ||||||||
2628 | slp_tree node; | ||||||||
2629 | unsigned i, j; | ||||||||
2630 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)for (i = 0; ((loop_vinfo)->slp_instances).iterate ((i), & (instance)); ++(i)) | ||||||||
2631 | { | ||||||||
2632 | stmt_vec_info vinfo; | ||||||||
2633 | vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))((instance)->root)->stmts[0]; | ||||||||
2634 | if (! STMT_VINFO_GROUPED_ACCESS (vinfo)((vinfo)->dr_aux.dr && (((void)(!((vinfo)->dr_aux .dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2634, __FUNCTION__), 0 : 0)), (vinfo)->first_element))) | ||||||||
2635 | continue; | ||||||||
2636 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2636, __FUNCTION__), 0 : 0)), (vinfo)->first_element); | ||||||||
2637 | unsigned int size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2637, __FUNCTION__), 0 : 0)), (vinfo)->size); | ||||||||
2638 | tree vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; | ||||||||
2639 | if (! vect_store_lanes_supported (vectype, size, false) | ||||||||
2640 | && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), 1U)) | ||||||||
2641 | && ! vect_grouped_store_supported (vectype, size)) | ||||||||
2642 | return opt_result::failure_at (vinfo->stmt, | ||||||||
2643 | "unsupported grouped store\n"); | ||||||||
2644 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)for (j = 0; ((instance)->loads).iterate ((j), &(node)) ; ++(j)) | ||||||||
2645 | { | ||||||||
2646 | vinfo = SLP_TREE_SCALAR_STMTS (node)(node)->stmts[0]; | ||||||||
2647 | vinfo = DR_GROUP_FIRST_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2647, __FUNCTION__), 0 : 0)), (vinfo)->first_element); | ||||||||
2648 | bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2648, __FUNCTION__), 0 : 0)), (vinfo)->next_element); | ||||||||
2649 | size = DR_GROUP_SIZE (vinfo)(((void)(!((vinfo)->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2649, __FUNCTION__), 0 : 0)), (vinfo)->size); | ||||||||
2650 | vectype = STMT_VINFO_VECTYPE (vinfo)(vinfo)->vectype; | ||||||||
2651 | if (! vect_load_lanes_supported (vectype, size, false) | ||||||||
2652 | && ! vect_grouped_load_supported (vectype, single_element_p, | ||||||||
2653 | size)) | ||||||||
2654 | return opt_result::failure_at (vinfo->stmt, | ||||||||
2655 | "unsupported grouped load\n"); | ||||||||
2656 | } | ||||||||
2657 | } | ||||||||
2658 | |||||||||
2659 | if (dump_enabled_p ()) | ||||||||
2660 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2661 | "re-trying with SLP disabled\n"); | ||||||||
2662 | |||||||||
2663 | /* Roll back state appropriately. No SLP this time. */ | ||||||||
2664 | slp = false; | ||||||||
2665 | /* Restore vectorization factor as it were without SLP. */ | ||||||||
2666 | LOOP_VINFO_VECT_FACTOR (loop_vinfo)(loop_vinfo)->vectorization_factor = saved_vectorization_factor; | ||||||||
2667 | /* Free the SLP instances. */ | ||||||||
2668 | FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)for (j = 0; ((loop_vinfo)->slp_instances).iterate ((j), & (instance)); ++(j)) | ||||||||
2669 | vect_free_slp_instance (instance); | ||||||||
2670 | LOOP_VINFO_SLP_INSTANCES (loop_vinfo)(loop_vinfo)->slp_instances.release (); | ||||||||
2671 | /* Reset SLP type to loop_vect on all stmts. */ | ||||||||
2672 | for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->num_nodes; ++i) | ||||||||
2673 | { | ||||||||
2674 | basic_block bb = LOOP_VINFO_BBS (loop_vinfo)(loop_vinfo)->bbs[i]; | ||||||||
2675 | for (gimple_stmt_iterator si = gsi_start_phis (bb); | ||||||||
2676 | !gsi_end_p (si); gsi_next (&si)) | ||||||||
2677 | { | ||||||||
2678 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | ||||||||
2679 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; | ||||||||
2680 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_reduction_def | ||||||||
2681 | || STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | ||||||||
2682 | { | ||||||||
2683 | /* vectorizable_reduction adjusts reduction stmt def-types, | ||||||||
2684 | restore them to that of the PHI. */ | ||||||||
2685 | STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))((stmt_info)->reduc_def)->def_type | ||||||||
2686 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; | ||||||||
2687 | STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type | ||||||||
2688 | (STMT_VINFO_REDUC_DEF (stmt_info)))(vect_stmt_to_vectorize ((stmt_info)->reduc_def))->def_type | ||||||||
2689 | = STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type; | ||||||||
2690 | } | ||||||||
2691 | } | ||||||||
2692 | for (gimple_stmt_iterator si = gsi_start_bb (bb); | ||||||||
2693 | !gsi_end_p (si); gsi_next (&si)) | ||||||||
2694 | { | ||||||||
2695 | if (is_gimple_debug (gsi_stmt (si))) | ||||||||
2696 | continue; | ||||||||
2697 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); | ||||||||
2698 | STMT_SLP_TYPE (stmt_info)(stmt_info)->slp_type = loop_vect; | ||||||||
2699 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p) | ||||||||
2700 | { | ||||||||
2701 | stmt_vec_info pattern_stmt_info | ||||||||
2702 | = STMT_VINFO_RELATED_STMT (stmt_info)(stmt_info)->related_stmt; | ||||||||
2703 | if (STMT_VINFO_SLP_VECT_ONLY (pattern_stmt_info)(pattern_stmt_info)->slp_vect_only_p) | ||||||||
2704 | STMT_VINFO_IN_PATTERN_P (stmt_info)(stmt_info)->in_pattern_p = false; | ||||||||
2705 | |||||||||
2706 | gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)(stmt_info)->pattern_def_seq; | ||||||||
2707 | STMT_SLP_TYPE (pattern_stmt_info)(pattern_stmt_info)->slp_type = loop_vect; | ||||||||
2708 | for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq)gsi_start_1 (&(pattern_def_seq)); | ||||||||
2709 | !gsi_end_p (pi); gsi_next (&pi)) | ||||||||
2710 | STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))(loop_vinfo->lookup_stmt (gsi_stmt (pi)))->slp_type | ||||||||
2711 | = loop_vect; | ||||||||
2712 | } | ||||||||
2713 | } | ||||||||
2714 | } | ||||||||
2715 | /* Free optimized alias test DDRS. */ | ||||||||
2716 | LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.truncate (0); | ||||||||
2717 | LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.release (); | ||||||||
2718 | LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.release (); | ||||||||
2719 | /* Reset target cost data. */ | ||||||||
2720 | destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data); | ||||||||
2721 | LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data | ||||||||
2722 | = init_cost (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop); | ||||||||
2723 | /* Reset accumulated rgroup information. */ | ||||||||
2724 | release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo)(loop_vinfo)->masks); | ||||||||
2725 | release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo)(loop_vinfo)->lens); | ||||||||
2726 | /* Reset assorted flags. */ | ||||||||
2727 | LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)(loop_vinfo)->peeling_for_niter = false; | ||||||||
2728 | LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps = false; | ||||||||
2729 | LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)(loop_vinfo)->th = 0; | ||||||||
2730 | LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold = 0; | ||||||||
2731 | LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->can_use_partial_vectors_p | ||||||||
2732 | = saved_can_use_partial_vectors_p; | ||||||||
2733 | |||||||||
2734 | goto start_over; | ||||||||
2735 | } | ||||||||
2736 | |||||||||
2737 | /* Return true if vectorizing a loop using NEW_LOOP_VINFO appears | ||||||||
2738 | to be better than vectorizing it using OLD_LOOP_VINFO. Assume that | ||||||||
2739 | OLD_LOOP_VINFO is better unless something specifically indicates | ||||||||
2740 | otherwise. | ||||||||
2741 | |||||||||
2742 | Note that this deliberately isn't a partial order. */ | ||||||||
2743 | |||||||||
2744 | static bool | ||||||||
2745 | vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo, | ||||||||
2746 | loop_vec_info old_loop_vinfo) | ||||||||
2747 | { | ||||||||
2748 | struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo)(new_loop_vinfo)->loop; | ||||||||
2749 | gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop)((void)(!((old_loop_vinfo)->loop == loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2749, __FUNCTION__), 0 : 0)); | ||||||||
2750 | |||||||||
2751 | poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo)(new_loop_vinfo)->vectorization_factor; | ||||||||
2752 | poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo)(old_loop_vinfo)->vectorization_factor; | ||||||||
2753 | |||||||||
2754 | /* Always prefer a VF of loop->simdlen over any other VF. */ | ||||||||
2755 | if (loop->simdlen) | ||||||||
2756 | { | ||||||||
2757 | bool new_simdlen_p = known_eq (new_vf, loop->simdlen)(!maybe_ne (new_vf, loop->simdlen)); | ||||||||
2758 | bool old_simdlen_p = known_eq (old_vf, loop->simdlen)(!maybe_ne (old_vf, loop->simdlen)); | ||||||||
2759 | if (new_simdlen_p != old_simdlen_p) | ||||||||
2760 | return new_simdlen_p; | ||||||||
2761 | } | ||||||||
2762 | |||||||||
2763 | /* Limit the VFs to what is likely to be the maximum number of iterations, | ||||||||
2764 | to handle cases in which at least one loop_vinfo is fully-masked. */ | ||||||||
2765 | HOST_WIDE_INTlong estimated_max_niter = likely_max_stmt_executions_int (loop); | ||||||||
2766 | if (estimated_max_niter != -1) | ||||||||
2767 | { | ||||||||
2768 | if (known_le (estimated_max_niter, new_vf)(!maybe_lt (new_vf, estimated_max_niter))) | ||||||||
2769 | new_vf = estimated_max_niter; | ||||||||
2770 | if (known_le (estimated_max_niter, old_vf)(!maybe_lt (old_vf, estimated_max_niter))) | ||||||||
2771 | old_vf = estimated_max_niter; | ||||||||
2772 | } | ||||||||
2773 | |||||||||
2774 | /* Check whether the (fractional) cost per scalar iteration is lower | ||||||||
2775 | or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf. */ | ||||||||
2776 | poly_int64 rel_new = new_loop_vinfo->vec_inside_cost * old_vf; | ||||||||
2777 | poly_int64 rel_old = old_loop_vinfo->vec_inside_cost * new_vf; | ||||||||
2778 | |||||||||
2779 | HOST_WIDE_INTlong est_rel_new_min | ||||||||
2780 | = estimated_poly_value (rel_new, POLY_VALUE_MIN); | ||||||||
2781 | HOST_WIDE_INTlong est_rel_new_max | ||||||||
2782 | = estimated_poly_value (rel_new, POLY_VALUE_MAX); | ||||||||
2783 | |||||||||
2784 | HOST_WIDE_INTlong est_rel_old_min | ||||||||
2785 | = estimated_poly_value (rel_old, POLY_VALUE_MIN); | ||||||||
2786 | HOST_WIDE_INTlong est_rel_old_max | ||||||||
2787 | = estimated_poly_value (rel_old, POLY_VALUE_MAX); | ||||||||
2788 | |||||||||
2789 | /* Check first if we can make out an unambigous total order from the minimum | ||||||||
2790 | and maximum estimates. */ | ||||||||
2791 | if (est_rel_new_min < est_rel_old_min | ||||||||
2792 | && est_rel_new_max < est_rel_old_max) | ||||||||
2793 | return true; | ||||||||
2794 | else if (est_rel_old_min < est_rel_new_min | ||||||||
2795 | && est_rel_old_max < est_rel_new_max) | ||||||||
2796 | return false; | ||||||||
2797 | /* When old_loop_vinfo uses a variable vectorization factor, | ||||||||
2798 | we know that it has a lower cost for at least one runtime VF. | ||||||||
2799 | However, we don't know how likely that VF is. | ||||||||
2800 | |||||||||
2801 | One option would be to compare the costs for the estimated VFs. | ||||||||
2802 | The problem is that that can put too much pressure on the cost | ||||||||
2803 | model. E.g. if the estimated VF is also the lowest possible VF, | ||||||||
2804 | and if old_loop_vinfo is 1 unit worse than new_loop_vinfo | ||||||||
2805 | for the estimated VF, we'd then choose new_loop_vinfo even | ||||||||
2806 | though (a) new_loop_vinfo might not actually be better than | ||||||||
2807 | old_loop_vinfo for that VF and (b) it would be significantly | ||||||||
2808 | worse at larger VFs. | ||||||||
2809 | |||||||||
2810 | Here we go for a hacky compromise: pick new_loop_vinfo if it is | ||||||||
2811 | no more expensive than old_loop_vinfo even after doubling the | ||||||||
2812 | estimated old_loop_vinfo VF. For all but trivial loops, this | ||||||||
2813 | ensures that we only pick new_loop_vinfo if it is significantly | ||||||||
2814 | better than old_loop_vinfo at the estimated VF. */ | ||||||||
2815 | |||||||||
2816 | if (est_rel_old_min != est_rel_new_min | ||||||||
2817 | || est_rel_old_max != est_rel_new_max) | ||||||||
2818 | { | ||||||||
2819 | HOST_WIDE_INTlong est_rel_new_likely | ||||||||
2820 | = estimated_poly_value (rel_new, POLY_VALUE_LIKELY); | ||||||||
2821 | HOST_WIDE_INTlong est_rel_old_likely | ||||||||
2822 | = estimated_poly_value (rel_old, POLY_VALUE_LIKELY); | ||||||||
2823 | |||||||||
2824 | return est_rel_new_likely * 2 <= est_rel_old_likely; | ||||||||
2825 | } | ||||||||
2826 | |||||||||
2827 | /* If there's nothing to choose between the loop bodies, see whether | ||||||||
2828 | there's a difference in the prologue and epilogue costs. */ | ||||||||
2829 | if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost) | ||||||||
2830 | return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost; | ||||||||
2831 | |||||||||
2832 | return false; | ||||||||
2833 | } | ||||||||
2834 | |||||||||
2835 | /* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return | ||||||||
2836 | true if we should. */ | ||||||||
2837 | |||||||||
2838 | static bool | ||||||||
2839 | vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, | ||||||||
2840 | loop_vec_info old_loop_vinfo) | ||||||||
2841 | { | ||||||||
2842 | if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo)) | ||||||||
2843 | return false; | ||||||||
2844 | |||||||||
2845 | if (dump_enabled_p ()) | ||||||||
2846 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2847 | "***** Preferring vector mode %s to vector mode %s\n", | ||||||||
2848 | GET_MODE_NAME (new_loop_vinfo->vector_mode)mode_name[new_loop_vinfo->vector_mode], | ||||||||
2849 | GET_MODE_NAME (old_loop_vinfo->vector_mode)mode_name[old_loop_vinfo->vector_mode]); | ||||||||
2850 | return true; | ||||||||
2851 | } | ||||||||
2852 | |||||||||
2853 | /* If LOOP_VINFO is already a main loop, return it unmodified. Otherwise | ||||||||
2854 | try to reanalyze it as a main loop. Return the loop_vinfo on success | ||||||||
2855 | and null on failure. */ | ||||||||
2856 | |||||||||
2857 | static loop_vec_info | ||||||||
2858 | vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts) | ||||||||
2859 | { | ||||||||
2860 | if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr)) | ||||||||
2861 | return loop_vinfo; | ||||||||
2862 | |||||||||
2863 | if (dump_enabled_p ()) | ||||||||
2864 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2865 | "***** Reanalyzing as a main loop with vector mode %s\n", | ||||||||
2866 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | ||||||||
2867 | |||||||||
2868 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
2869 | vec_info_shared *shared = loop_vinfo->shared; | ||||||||
2870 | opt_loop_vec_info main_loop_vinfo = vect_analyze_loop_form (loop, shared); | ||||||||
2871 | gcc_assert (main_loop_vinfo)((void)(!(main_loop_vinfo) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2871, __FUNCTION__), 0 : 0)); | ||||||||
2872 | |||||||||
2873 | main_loop_vinfo->vector_mode = loop_vinfo->vector_mode; | ||||||||
2874 | |||||||||
2875 | bool fatal = false; | ||||||||
2876 | bool res = vect_analyze_loop_2 (main_loop_vinfo, fatal, n_stmts); | ||||||||
2877 | loop->aux = NULLnullptr; | ||||||||
2878 | if (!res) | ||||||||
2879 | { | ||||||||
2880 | if (dump_enabled_p ()) | ||||||||
2881 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2882 | "***** Failed to analyze main loop with vector" | ||||||||
2883 | " mode %s\n", | ||||||||
2884 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | ||||||||
2885 | delete main_loop_vinfo; | ||||||||
2886 | return NULLnullptr; | ||||||||
2887 | } | ||||||||
2888 | LOOP_VINFO_VECTORIZABLE_P (main_loop_vinfo)(main_loop_vinfo)->vectorizable = 1; | ||||||||
2889 | return main_loop_vinfo; | ||||||||
2890 | } | ||||||||
2891 | |||||||||
2892 | /* Function vect_analyze_loop. | ||||||||
2893 | |||||||||
2894 | Apply a set of analyses on LOOP, and create a loop_vec_info struct | ||||||||
2895 | for it. The different analyses will record information in the | ||||||||
2896 | loop_vec_info struct. */ | ||||||||
2897 | opt_loop_vec_info | ||||||||
2898 | vect_analyze_loop (class loop *loop, vec_info_shared *shared) | ||||||||
2899 | { | ||||||||
2900 | auto_vector_modes vector_modes; | ||||||||
2901 | |||||||||
2902 | /* Autodetect first vector size we try. */ | ||||||||
2903 | unsigned int autovec_flags | ||||||||
2904 | = targetm.vectorize.autovectorize_vector_modes (&vector_modes, | ||||||||
2905 | loop->simdlen != 0); | ||||||||
2906 | unsigned int mode_i = 0; | ||||||||
2907 | |||||||||
2908 | DUMP_VECT_SCOPE ("analyze_loop_nest")auto_dump_scope scope ("analyze_loop_nest", vect_location); | ||||||||
2909 | |||||||||
2910 | if (loop_outer (loop) | ||||||||
2911 | && loop_vec_info_for_loop (loop_outer (loop)) | ||||||||
2912 | && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))(loop_vec_info_for_loop (loop_outer (loop)))->vectorizable) | ||||||||
2913 | return opt_loop_vec_info::failure_at (vect_location, | ||||||||
2914 | "outer-loop already vectorized.\n"); | ||||||||
2915 | |||||||||
2916 | if (!find_loop_nest (loop, &shared->loop_nest)) | ||||||||
2917 | return opt_loop_vec_info::failure_at | ||||||||
2918 | (vect_location, | ||||||||
2919 | "not vectorized: loop nest containing two or more consecutive inner" | ||||||||
2920 | " loops cannot be vectorized\n"); | ||||||||
2921 | |||||||||
2922 | unsigned n_stmts = 0; | ||||||||
2923 | machine_mode autodetected_vector_mode = VOIDmode((void) 0, E_VOIDmode); | ||||||||
2924 | opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
2925 | machine_mode next_vector_mode = VOIDmode((void) 0, E_VOIDmode); | ||||||||
2926 | poly_uint64 lowest_th = 0; | ||||||||
2927 | unsigned vectorized_loops = 0; | ||||||||
2928 | bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) | ||||||||
2929 | && !unlimited_cost_model (loop)); | ||||||||
2930 | |||||||||
2931 | bool vect_epilogues = false; | ||||||||
2932 | opt_result res = opt_result::success (); | ||||||||
2933 | unsigned HOST_WIDE_INTlong simdlen = loop->simdlen; | ||||||||
2934 | while (1) | ||||||||
2935 | { | ||||||||
2936 | /* Check the CFG characteristics of the loop (nesting, entry/exit). */ | ||||||||
2937 | opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared); | ||||||||
2938 | if (!loop_vinfo) | ||||||||
2939 | { | ||||||||
2940 | if (dump_enabled_p ()) | ||||||||
2941 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
2942 | "bad loop form.\n"); | ||||||||
2943 | gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 2943, __FUNCTION__), 0 : 0)); | ||||||||
2944 | return loop_vinfo; | ||||||||
2945 | } | ||||||||
2946 | loop_vinfo->vector_mode = next_vector_mode; | ||||||||
2947 | |||||||||
2948 | bool fatal = false; | ||||||||
2949 | |||||||||
2950 | /* When pick_lowest_cost_p is true, we should in principle iterate | ||||||||
2951 | over all the loop_vec_infos that LOOP_VINFO could replace and | ||||||||
2952 | try to vectorize LOOP_VINFO under the same conditions. | ||||||||
2953 | E.g. when trying to replace an epilogue loop, we should vectorize | ||||||||
2954 | LOOP_VINFO as an epilogue loop with the same VF limit. When trying | ||||||||
2955 | to replace the main loop, we should vectorize LOOP_VINFO as a main | ||||||||
2956 | loop too. | ||||||||
2957 | |||||||||
2958 | However, autovectorize_vector_modes is usually sorted as follows: | ||||||||
2959 | |||||||||
2960 | - Modes that naturally produce lower VFs usually follow modes that | ||||||||
2961 | naturally produce higher VFs. | ||||||||
2962 | |||||||||
2963 | - When modes naturally produce the same VF, maskable modes | ||||||||
2964 | usually follow unmaskable ones, so that the maskable mode | ||||||||
2965 | can be used to vectorize the epilogue of the unmaskable mode. | ||||||||
2966 | |||||||||
2967 | This order is preferred because it leads to the maximum | ||||||||
2968 | epilogue vectorization opportunities. Targets should only use | ||||||||
2969 | a different order if they want to make wide modes available while | ||||||||
2970 | disparaging them relative to earlier, smaller modes. The assumption | ||||||||
2971 | in that case is that the wider modes are more expensive in some | ||||||||
2972 | way that isn't reflected directly in the costs. | ||||||||
2973 | |||||||||
2974 | There should therefore be few interesting cases in which | ||||||||
2975 | LOOP_VINFO fails when treated as an epilogue loop, succeeds when | ||||||||
2976 | treated as a standalone loop, and ends up being genuinely cheaper | ||||||||
2977 | than FIRST_LOOP_VINFO. */ | ||||||||
2978 | if (vect_epilogues) | ||||||||
2979 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = first_loop_vinfo; | ||||||||
2980 | |||||||||
2981 | res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); | ||||||||
2982 | if (mode_i == 0) | ||||||||
2983 | autodetected_vector_mode = loop_vinfo->vector_mode; | ||||||||
2984 | if (dump_enabled_p ()) | ||||||||
2985 | { | ||||||||
2986 | if (res) | ||||||||
2987 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2988 | "***** Analysis succeeded with vector mode %s\n", | ||||||||
2989 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | ||||||||
2990 | else | ||||||||
2991 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
2992 | "***** Analysis failed with vector mode %s\n", | ||||||||
2993 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | ||||||||
2994 | } | ||||||||
2995 | |||||||||
2996 | loop->aux = NULLnullptr; | ||||||||
2997 | |||||||||
2998 | if (!fatal) | ||||||||
2999 | while (mode_i < vector_modes.length () | ||||||||
3000 | && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i])) | ||||||||
3001 | { | ||||||||
3002 | if (dump_enabled_p ()) | ||||||||
3003 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3004 | "***** The result for vector mode %s would" | ||||||||
3005 | " be the same\n", | ||||||||
3006 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]]); | ||||||||
3007 | mode_i += 1; | ||||||||
3008 | } | ||||||||
3009 | |||||||||
3010 | if (res) | ||||||||
3011 | { | ||||||||
3012 | LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)(loop_vinfo)->vectorizable = 1; | ||||||||
3013 | vectorized_loops++; | ||||||||
3014 | |||||||||
3015 | /* Once we hit the desired simdlen for the first time, | ||||||||
3016 | discard any previous attempts. */ | ||||||||
3017 | if (simdlen | ||||||||
3018 | && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)(!maybe_ne ((loop_vinfo)->vectorization_factor, simdlen))) | ||||||||
3019 | { | ||||||||
3020 | delete first_loop_vinfo; | ||||||||
3021 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
3022 | LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)(loop_vinfo)->orig_loop_info = NULLnullptr; | ||||||||
3023 | simdlen = 0; | ||||||||
3024 | } | ||||||||
3025 | else if (pick_lowest_cost_p && first_loop_vinfo) | ||||||||
3026 | { | ||||||||
3027 | /* Keep trying to roll back vectorization attempts while the | ||||||||
3028 | loop_vec_infos they produced were worse than this one. */ | ||||||||
3029 | vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos; | ||||||||
3030 | while (!vinfos.is_empty () | ||||||||
3031 | && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) | ||||||||
3032 | { | ||||||||
3033 | gcc_assert (vect_epilogues)((void)(!(vect_epilogues) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3033, __FUNCTION__), 0 : 0)); | ||||||||
3034 | delete vinfos.pop (); | ||||||||
3035 | } | ||||||||
3036 | if (vinfos.is_empty () | ||||||||
3037 | && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo)) | ||||||||
3038 | { | ||||||||
3039 | loop_vec_info main_loop_vinfo | ||||||||
3040 | = vect_reanalyze_as_main_loop (loop_vinfo, &n_stmts); | ||||||||
3041 | if (main_loop_vinfo == loop_vinfo) | ||||||||
3042 | { | ||||||||
3043 | delete first_loop_vinfo; | ||||||||
3044 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
3045 | } | ||||||||
3046 | else if (main_loop_vinfo | ||||||||
3047 | && vect_joust_loop_vinfos (main_loop_vinfo, | ||||||||
3048 | first_loop_vinfo)) | ||||||||
3049 | { | ||||||||
3050 | delete first_loop_vinfo; | ||||||||
3051 | first_loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
3052 | delete loop_vinfo; | ||||||||
3053 | loop_vinfo | ||||||||
3054 | = opt_loop_vec_info::success (main_loop_vinfo); | ||||||||
3055 | } | ||||||||
3056 | else | ||||||||
3057 | delete main_loop_vinfo; | ||||||||
3058 | } | ||||||||
3059 | } | ||||||||
3060 | |||||||||
3061 | if (first_loop_vinfo == NULLnullptr) | ||||||||
3062 | { | ||||||||
3063 | first_loop_vinfo = loop_vinfo; | ||||||||
3064 | lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold; | ||||||||
3065 | } | ||||||||
3066 | else if (vect_epilogues | ||||||||
3067 | /* For now only allow one epilogue loop. */ | ||||||||
3068 | && first_loop_vinfo->epilogue_vinfos.is_empty ()) | ||||||||
3069 | { | ||||||||
3070 | first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); | ||||||||
3071 | poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo)(loop_vinfo)->versioning_threshold; | ||||||||
3072 | gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3073, __FUNCTION__), 0 : 0)) | ||||||||
3073 | || maybe_ne (lowest_th, 0U))((void)(!(!(((loop_vinfo)->may_misalign_stmts.length () > 0) || ((loop_vinfo)->comp_alias_ddrs.length () > 0 || ( loop_vinfo)->check_unequal_addrs.length () > 0 || (loop_vinfo )->lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond)) || maybe_ne (lowest_th, 0U)) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3073, __FUNCTION__), 0 : 0)); | ||||||||
3074 | /* Keep track of the known smallest versioning | ||||||||
3075 | threshold. */ | ||||||||
3076 | if (ordered_p (lowest_th, th)) | ||||||||
3077 | lowest_th = ordered_min (lowest_th, th); | ||||||||
3078 | } | ||||||||
3079 | else | ||||||||
3080 | { | ||||||||
3081 | delete loop_vinfo; | ||||||||
3082 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
3083 | } | ||||||||
3084 | |||||||||
3085 | /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is | ||||||||
3086 | enabled, SIMDUID is not set, it is the innermost loop and we have | ||||||||
3087 | either already found the loop's SIMDLEN or there was no SIMDLEN to | ||||||||
3088 | begin with. | ||||||||
3089 | TODO: Enable epilogue vectorization for loops with SIMDUID set. */ | ||||||||
3090 | vect_epilogues = (!simdlen | ||||||||
3091 | && loop->inner == NULLnullptr | ||||||||
3092 | && param_vect_epilogues_nomaskglobal_options.x_param_vect_epilogues_nomask | ||||||||
3093 | && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)(first_loop_vinfo)->peeling_for_niter | ||||||||
3094 | && !loop->simduid | ||||||||
3095 | /* For now only allow one epilogue loop, but allow | ||||||||
3096 | pick_lowest_cost_p to replace it. */ | ||||||||
3097 | && (first_loop_vinfo->epilogue_vinfos.is_empty () | ||||||||
3098 | || pick_lowest_cost_p)); | ||||||||
3099 | |||||||||
3100 | /* Commit to first_loop_vinfo if we have no reason to try | ||||||||
3101 | alternatives. */ | ||||||||
3102 | if (!simdlen && !vect_epilogues && !pick_lowest_cost_p) | ||||||||
3103 | break; | ||||||||
3104 | } | ||||||||
3105 | else | ||||||||
3106 | { | ||||||||
3107 | delete loop_vinfo; | ||||||||
3108 | loop_vinfo = opt_loop_vec_info::success (NULLnullptr); | ||||||||
3109 | if (fatal) | ||||||||
3110 | { | ||||||||
3111 | gcc_checking_assert (first_loop_vinfo == NULL)((void)(!(first_loop_vinfo == nullptr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3111, __FUNCTION__), 0 : 0)); | ||||||||
3112 | break; | ||||||||
3113 | } | ||||||||
3114 | } | ||||||||
3115 | |||||||||
3116 | /* Handle the case that the original loop can use partial | ||||||||
3117 | vectorization, but want to only adopt it for the epilogue. | ||||||||
3118 | The retry should be in the same mode as original. */ | ||||||||
3119 | if (vect_epilogues | ||||||||
3120 | && loop_vinfo | ||||||||
3121 | && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->epil_using_partial_vectors_p) | ||||||||
3122 | { | ||||||||
3123 | gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)((void)(!((loop_vinfo)->can_use_partial_vectors_p && !(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3124, __FUNCTION__), 0 : 0)) | ||||||||
3124 | && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))((void)(!((loop_vinfo)->can_use_partial_vectors_p && !(loop_vinfo)->using_partial_vectors_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3124, __FUNCTION__), 0 : 0)); | ||||||||
3125 | if (dump_enabled_p ()) | ||||||||
3126 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3127 | "***** Re-trying analysis with same vector mode" | ||||||||
3128 | " %s for epilogue with partial vectors.\n", | ||||||||
3129 | GET_MODE_NAME (loop_vinfo->vector_mode)mode_name[loop_vinfo->vector_mode]); | ||||||||
3130 | continue; | ||||||||
3131 | } | ||||||||
3132 | |||||||||
3133 | if (mode_i < vector_modes.length () | ||||||||
3134 | && VECTOR_MODE_P (autodetected_vector_mode)(((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_BOOL || ((enum mode_class) mode_class[autodetected_vector_mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[autodetected_vector_mode ]) == MODE_VECTOR_UACCUM) | ||||||||
3135 | && (related_vector_mode (vector_modes[mode_i], | ||||||||
3136 | GET_MODE_INNER (autodetected_vector_mode)(mode_to_inner (autodetected_vector_mode))) | ||||||||
3137 | == autodetected_vector_mode) | ||||||||
3138 | && (related_vector_mode (autodetected_vector_mode, | ||||||||
3139 | GET_MODE_INNER (vector_modes[mode_i])(mode_to_inner (vector_modes[mode_i]))) | ||||||||
3140 | == vector_modes[mode_i])) | ||||||||
3141 | { | ||||||||
3142 | if (dump_enabled_p ()) | ||||||||
3143 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3144 | "***** Skipping vector mode %s, which would" | ||||||||
3145 | " repeat the analysis for %s\n", | ||||||||
3146 | GET_MODE_NAME (vector_modes[mode_i])mode_name[vector_modes[mode_i]], | ||||||||
3147 | GET_MODE_NAME (autodetected_vector_mode)mode_name[autodetected_vector_mode]); | ||||||||
3148 | mode_i += 1; | ||||||||
3149 | } | ||||||||
3150 | |||||||||
3151 | if (mode_i == vector_modes.length () | ||||||||
3152 | || autodetected_vector_mode == VOIDmode((void) 0, E_VOIDmode)) | ||||||||
3153 | break; | ||||||||
3154 | |||||||||
3155 | /* Try the next biggest vector size. */ | ||||||||
3156 | next_vector_mode = vector_modes[mode_i++]; | ||||||||
3157 | if (dump_enabled_p ()) | ||||||||
3158 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3159 | "***** Re-trying analysis with vector mode %s\n", | ||||||||
3160 | GET_MODE_NAME (next_vector_mode)mode_name[next_vector_mode]); | ||||||||
3161 | } | ||||||||
3162 | |||||||||
3163 | if (first_loop_vinfo) | ||||||||
3164 | { | ||||||||
3165 | loop->aux = (loop_vec_info) first_loop_vinfo; | ||||||||
3166 | if (dump_enabled_p ()) | ||||||||
3167 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3168 | "***** Choosing vector mode %s\n", | ||||||||
3169 | GET_MODE_NAME (first_loop_vinfo->vector_mode)mode_name[first_loop_vinfo->vector_mode]); | ||||||||
3170 | LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo)(first_loop_vinfo)->versioning_threshold = lowest_th; | ||||||||
3171 | return first_loop_vinfo; | ||||||||
3172 | } | ||||||||
3173 | |||||||||
3174 | return opt_loop_vec_info::propagate_failure (res); | ||||||||
3175 | } | ||||||||
3176 | |||||||||
3177 | /* Return true if there is an in-order reduction function for CODE, storing | ||||||||
3178 | it in *REDUC_FN if so. */ | ||||||||
3179 | |||||||||
3180 | static bool | ||||||||
3181 | fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) | ||||||||
3182 | { | ||||||||
3183 | switch (code) | ||||||||
3184 | { | ||||||||
3185 | case PLUS_EXPR: | ||||||||
3186 | *reduc_fn = IFN_FOLD_LEFT_PLUS; | ||||||||
3187 | return true; | ||||||||
3188 | |||||||||
3189 | default: | ||||||||
3190 | return false; | ||||||||
3191 | } | ||||||||
3192 | } | ||||||||
3193 | |||||||||
3194 | /* Function reduction_fn_for_scalar_code | ||||||||
3195 | |||||||||
3196 | Input: | ||||||||
3197 | CODE - tree_code of a reduction operations. | ||||||||
3198 | |||||||||
3199 | Output: | ||||||||
3200 | REDUC_FN - the corresponding internal function to be used to reduce the | ||||||||
3201 | vector of partial results into a single scalar result, or IFN_LAST | ||||||||
3202 | if the operation is a supported reduction operation, but does not have | ||||||||
3203 | such an internal function. | ||||||||
3204 | |||||||||
3205 | Return FALSE if CODE currently cannot be vectorized as reduction. */ | ||||||||
3206 | |||||||||
3207 | static bool | ||||||||
3208 | reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) | ||||||||
3209 | { | ||||||||
3210 | switch (code) | ||||||||
3211 | { | ||||||||
3212 | case MAX_EXPR: | ||||||||
3213 | *reduc_fn = IFN_REDUC_MAX; | ||||||||
3214 | return true; | ||||||||
3215 | |||||||||
3216 | case MIN_EXPR: | ||||||||
3217 | *reduc_fn = IFN_REDUC_MIN; | ||||||||
3218 | return true; | ||||||||
3219 | |||||||||
3220 | case PLUS_EXPR: | ||||||||
3221 | *reduc_fn = IFN_REDUC_PLUS; | ||||||||
3222 | return true; | ||||||||
3223 | |||||||||
3224 | case BIT_AND_EXPR: | ||||||||
3225 | *reduc_fn = IFN_REDUC_AND; | ||||||||
3226 | return true; | ||||||||
3227 | |||||||||
3228 | case BIT_IOR_EXPR: | ||||||||
3229 | *reduc_fn = IFN_REDUC_IOR; | ||||||||
3230 | return true; | ||||||||
3231 | |||||||||
3232 | case BIT_XOR_EXPR: | ||||||||
3233 | *reduc_fn = IFN_REDUC_XOR; | ||||||||
3234 | return true; | ||||||||
3235 | |||||||||
3236 | case MULT_EXPR: | ||||||||
3237 | case MINUS_EXPR: | ||||||||
3238 | *reduc_fn = IFN_LAST; | ||||||||
3239 | return true; | ||||||||
3240 | |||||||||
3241 | default: | ||||||||
3242 | return false; | ||||||||
3243 | } | ||||||||
3244 | } | ||||||||
3245 | |||||||||
3246 | /* If there is a neutral value X such that SLP reduction NODE would not | ||||||||
3247 | be affected by the introduction of additional X elements, return that X, | ||||||||
3248 | otherwise return null. CODE is the code of the reduction and VECTOR_TYPE | ||||||||
3249 | is the vector type that would hold element X. REDUC_CHAIN is true if | ||||||||
3250 | the SLP statements perform a single reduction, false if each statement | ||||||||
3251 | performs an independent reduction. */ | ||||||||
3252 | |||||||||
3253 | static tree | ||||||||
3254 | neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, | ||||||||
3255 | tree_code code, bool reduc_chain) | ||||||||
3256 | { | ||||||||
3257 | vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; | ||||||||
3258 | stmt_vec_info stmt_vinfo = stmts[0]; | ||||||||
3259 | tree scalar_type = TREE_TYPE (vector_type)((contains_struct_check ((vector_type), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3259, __FUNCTION__))->typed.type); | ||||||||
3260 | class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; | ||||||||
3261 | gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3261, __FUNCTION__), 0 : 0)); | ||||||||
3262 | |||||||||
3263 | switch (code) | ||||||||
3264 | { | ||||||||
3265 | case WIDEN_SUM_EXPR: | ||||||||
3266 | case DOT_PROD_EXPR: | ||||||||
3267 | case SAD_EXPR: | ||||||||
3268 | case PLUS_EXPR: | ||||||||
3269 | case MINUS_EXPR: | ||||||||
3270 | case BIT_IOR_EXPR: | ||||||||
3271 | case BIT_XOR_EXPR: | ||||||||
3272 | return build_zero_cst (scalar_type); | ||||||||
3273 | |||||||||
3274 | case MULT_EXPR: | ||||||||
3275 | return build_one_cst (scalar_type); | ||||||||
3276 | |||||||||
3277 | case BIT_AND_EXPR: | ||||||||
3278 | return build_all_ones_cst (scalar_type); | ||||||||
3279 | |||||||||
3280 | case MAX_EXPR: | ||||||||
3281 | case MIN_EXPR: | ||||||||
3282 | /* For MIN/MAX the initial values are neutral. A reduction chain | ||||||||
3283 | has only a single initial value, so that value is neutral for | ||||||||
3284 | all statements. */ | ||||||||
3285 | if (reduc_chain) | ||||||||
3286 | return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge (loop))->dest_idx)) | ||||||||
3287 | loop_preheader_edge (loop))gimple_phi_arg_def (((stmt_vinfo->stmt)), ((loop_preheader_edge (loop))->dest_idx)); | ||||||||
3288 | return NULL_TREE(tree) nullptr; | ||||||||
3289 | |||||||||
3290 | default: | ||||||||
3291 | return NULL_TREE(tree) nullptr; | ||||||||
3292 | } | ||||||||
3293 | } | ||||||||
3294 | |||||||||
3295 | /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement | ||||||||
3296 | STMT is printed with a message MSG. */ | ||||||||
3297 | |||||||||
3298 | static void | ||||||||
3299 | report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) | ||||||||
3300 | { | ||||||||
3301 | dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt); | ||||||||
3302 | } | ||||||||
3303 | |||||||||
3304 | /* Return true if we need an in-order reduction for operation CODE | ||||||||
3305 | on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer | ||||||||
3306 | overflow must wrap. */ | ||||||||
3307 | |||||||||
3308 | bool | ||||||||
3309 | needs_fold_left_reduction_p (tree type, tree_code code) | ||||||||
3310 | { | ||||||||
3311 | /* CHECKME: check for !flag_finite_math_only too? */ | ||||||||
3312 | if (SCALAR_FLOAT_TYPE_P (type)(((enum tree_code) (type)->base.code) == REAL_TYPE)) | ||||||||
3313 | switch (code) | ||||||||
3314 | { | ||||||||
3315 | case MIN_EXPR: | ||||||||
3316 | case MAX_EXPR: | ||||||||
3317 | return false; | ||||||||
3318 | |||||||||
3319 | default: | ||||||||
3320 | return !flag_associative_mathglobal_options.x_flag_associative_math; | ||||||||
3321 | } | ||||||||
3322 | |||||||||
3323 | if (INTEGRAL_TYPE_P (type)(((enum tree_code) (type)->base.code) == ENUMERAL_TYPE || ( (enum tree_code) (type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (type)->base.code) == INTEGER_TYPE)) | ||||||||
3324 | { | ||||||||
3325 | if (!operation_no_trapping_overflow (type, code)) | ||||||||
3326 | return true; | ||||||||
3327 | return false; | ||||||||
3328 | } | ||||||||
3329 | |||||||||
3330 | if (SAT_FIXED_POINT_TYPE_P (type)(((enum tree_code) (type)->base.code) == FIXED_POINT_TYPE && ((tree_not_check4 ((type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3330, __FUNCTION__, (RECORD_TYPE), (UNION_TYPE), (QUAL_UNION_TYPE ), (ARRAY_TYPE)))->base.u.bits.saturating_flag))) | ||||||||
3331 | return true; | ||||||||
3332 | |||||||||
3333 | return false; | ||||||||
3334 | } | ||||||||
3335 | |||||||||
3336 | /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and | ||||||||
3337 | has a handled computation expression. Store the main reduction | ||||||||
3338 | operation in *CODE. */ | ||||||||
3339 | |||||||||
3340 | static bool | ||||||||
3341 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, | ||||||||
3342 | tree loop_arg, enum tree_code *code, | ||||||||
3343 | vec<std::pair<ssa_op_iter, use_operand_p> > &path) | ||||||||
3344 | { | ||||||||
3345 | auto_bitmap visited; | ||||||||
3346 | tree lookfor = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | ||||||||
3347 | ssa_op_iter curri; | ||||||||
3348 | use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE0x01); | ||||||||
3349 | while (USE_FROM_PTR (curr)get_use_from_ptr (curr) != loop_arg) | ||||||||
3350 | curr = op_iter_next_use (&curri); | ||||||||
3351 | curri.i = curri.numops; | ||||||||
3352 | do | ||||||||
3353 | { | ||||||||
3354 | path.safe_push (std::make_pair (curri, curr)); | ||||||||
3355 | tree use = USE_FROM_PTR (curr)get_use_from_ptr (curr); | ||||||||
3356 | if (use == lookfor) | ||||||||
3357 | break; | ||||||||
3358 | gimple *def = SSA_NAME_DEF_STMT (use)(tree_check ((use), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3358, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; | ||||||||
3359 | if (gimple_nop_p (def) | ||||||||
3360 | || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) | ||||||||
3361 | { | ||||||||
3362 | pop: | ||||||||
3363 | do | ||||||||
3364 | { | ||||||||
3365 | std::pair<ssa_op_iter, use_operand_p> x = path.pop (); | ||||||||
3366 | curri = x.first; | ||||||||
3367 | curr = x.second; | ||||||||
3368 | do | ||||||||
3369 | curr = op_iter_next_use (&curri); | ||||||||
3370 | /* Skip already visited or non-SSA operands (from iterating | ||||||||
3371 | over PHI args). */ | ||||||||
3372 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) | ||||||||
3373 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME | ||||||||
3374 | || ! bitmap_set_bit (visited, | ||||||||
3375 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3376, __FUNCTION__, (SSA_NAME)))->base.u.version | ||||||||
3376 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3376, __FUNCTION__, (SSA_NAME)))->base.u.version))); | ||||||||
3377 | } | ||||||||
3378 | while (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr) && ! path.is_empty ()); | ||||||||
3379 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) | ||||||||
3380 | break; | ||||||||
3381 | } | ||||||||
3382 | else | ||||||||
3383 | { | ||||||||
3384 | if (gimple_code (def) == GIMPLE_PHI) | ||||||||
3385 | curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE0x01); | ||||||||
3386 | else | ||||||||
3387 | curr = op_iter_init_use (&curri, def, SSA_OP_USE0x01); | ||||||||
3388 | while (curr != NULL_USE_OPERAND_P((use_operand_p)nullptr) | ||||||||
3389 | && (TREE_CODE (USE_FROM_PTR (curr))((enum tree_code) (get_use_from_ptr (curr))->base.code) != SSA_NAME | ||||||||
3390 | || ! bitmap_set_bit (visited, | ||||||||
3391 | SSA_NAME_VERSION(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3392, __FUNCTION__, (SSA_NAME)))->base.u.version | ||||||||
3392 | (USE_FROM_PTR (curr))(tree_check ((get_use_from_ptr (curr)), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3392, __FUNCTION__, (SSA_NAME)))->base.u.version))) | ||||||||
3393 | curr = op_iter_next_use (&curri); | ||||||||
3394 | if (curr == NULL_USE_OPERAND_P((use_operand_p)nullptr)) | ||||||||
3395 | goto pop; | ||||||||
3396 | } | ||||||||
3397 | } | ||||||||
3398 | while (1); | ||||||||
3399 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||||||||
3400 | { | ||||||||
3401 | dump_printf_loc (MSG_NOTE, loc, "reduction path: "); | ||||||||
3402 | unsigned i; | ||||||||
3403 | std::pair<ssa_op_iter, use_operand_p> *x; | ||||||||
3404 | FOR_EACH_VEC_ELT (path, i, x)for (i = 0; (path).iterate ((i), &(x)); ++(i)) | ||||||||
3405 | dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)get_use_from_ptr (x->second)); | ||||||||
3406 | dump_printf (MSG_NOTE, "\n"); | ||||||||
3407 | } | ||||||||
3408 | |||||||||
3409 | /* Check whether the reduction path detected is valid. */ | ||||||||
3410 | bool fail = path.length () == 0; | ||||||||
3411 | bool neg = false; | ||||||||
3412 | int sign = -1; | ||||||||
3413 | *code = ERROR_MARK; | ||||||||
3414 | for (unsigned i = 1; i < path.length (); ++i) | ||||||||
3415 | { | ||||||||
3416 | gimple *use_stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; | ||||||||
3417 | tree op = USE_FROM_PTR (path[i].second)get_use_from_ptr (path[i].second); | ||||||||
3418 | if (! is_gimple_assign (use_stmt) | ||||||||
3419 | /* The following make sure we can compute the operand index | ||||||||
3420 | easily plus it mostly disallows chaining via COND_EXPR condition | ||||||||
3421 | operands. */ | ||||||||
3422 | || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use | ||||||||
3423 | && (gimple_num_ops (use_stmt) <= 2 | ||||||||
3424 | || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use) | ||||||||
3425 | && (gimple_num_ops (use_stmt) <= 3 | ||||||||
3426 | || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use))) | ||||||||
3427 | { | ||||||||
3428 | fail = true; | ||||||||
3429 | break; | ||||||||
3430 | } | ||||||||
3431 | /* Check there's only a single stmt the op is used on. For the | ||||||||
3432 | not value-changing tail and the last stmt allow out-of-loop uses. | ||||||||
3433 | ??? We could relax this and handle arbitrary live stmts by | ||||||||
3434 | forcing a scalar epilogue for example. */ | ||||||||
3435 | imm_use_iterator imm_iter; | ||||||||
3436 | gimple *op_use_stmt; | ||||||||
3437 | unsigned cnt = 0; | ||||||||
3438 | FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)for (struct auto_end_imm_use_stmt_traverse auto_end_imm_use_stmt_traverse ((((op_use_stmt) = first_imm_use_stmt (&(imm_iter), (op) )), &(imm_iter))); !end_imm_use_stmt_p (&(imm_iter)); (void) ((op_use_stmt) = next_imm_use_stmt (&(imm_iter))) ) | ||||||||
3439 | if (!is_gimple_debug (op_use_stmt) | ||||||||
3440 | && (*code != ERROR_MARK | ||||||||
3441 | || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) | ||||||||
3442 | { | ||||||||
3443 | /* We want to allow x + x but not x < 1 ? x : 2. */ | ||||||||
3444 | if (is_gimple_assign (op_use_stmt) | ||||||||
3445 | && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR) | ||||||||
3446 | { | ||||||||
3447 | use_operand_p use_p; | ||||||||
3448 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)for ((use_p) = first_imm_use_on_stmt (&(imm_iter)); !end_imm_use_on_stmt_p (&(imm_iter)); (void) ((use_p) = next_imm_use_on_stmt (& (imm_iter)))) | ||||||||
3449 | cnt++; | ||||||||
3450 | } | ||||||||
3451 | else | ||||||||
3452 | cnt++; | ||||||||
3453 | } | ||||||||
3454 | if (cnt != 1) | ||||||||
3455 | { | ||||||||
3456 | fail = true; | ||||||||
3457 | break; | ||||||||
3458 | } | ||||||||
3459 | tree_code use_code = gimple_assign_rhs_code (use_stmt); | ||||||||
3460 | if (use_code == MINUS_EXPR) | ||||||||
3461 | { | ||||||||
3462 | use_code = PLUS_EXPR; | ||||||||
3463 | /* Track whether we negate the reduction value each iteration. */ | ||||||||
3464 | if (gimple_assign_rhs2 (use_stmt) == op) | ||||||||
3465 | neg = ! neg; | ||||||||
3466 | } | ||||||||
3467 | if (CONVERT_EXPR_CODE_P (use_code)((use_code) == NOP_EXPR || (use_code) == CONVERT_EXPR) | ||||||||
3468 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt))((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3468, __FUNCTION__))->typed.type), | ||||||||
3469 | TREE_TYPE (gimple_assign_rhs1 (use_stmt))((contains_struct_check ((gimple_assign_rhs1 (use_stmt)), (TS_TYPED ), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3469, __FUNCTION__))->typed.type))) | ||||||||
3470 | ; | ||||||||
3471 | else if (*code == ERROR_MARK) | ||||||||
3472 | { | ||||||||
3473 | *code = use_code; | ||||||||
3474 | sign = TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3474, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3474, __FUNCTION__))->base.u.bits.unsigned_flag)); | ||||||||
3475 | } | ||||||||
3476 | else if (use_code != *code) | ||||||||
3477 | { | ||||||||
3478 | fail = true; | ||||||||
3479 | break; | ||||||||
3480 | } | ||||||||
3481 | else if ((use_code == MIN_EXPR | ||||||||
3482 | || use_code == MAX_EXPR) | ||||||||
3483 | && sign != TYPE_SIGN (TREE_TYPE (gimple_assign_lhs (use_stmt)))((signop) ((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (use_stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3483, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3483, __FUNCTION__))->base.u.bits.unsigned_flag))) | ||||||||
3484 | { | ||||||||
3485 | fail = true; | ||||||||
3486 | break; | ||||||||
3487 | } | ||||||||
3488 | } | ||||||||
3489 | return ! fail && ! neg && *code != ERROR_MARK; | ||||||||
3490 | } | ||||||||
3491 | |||||||||
3492 | bool | ||||||||
3493 | check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, | ||||||||
3494 | tree loop_arg, enum tree_code code) | ||||||||
3495 | { | ||||||||
3496 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | ||||||||
3497 | enum tree_code code_; | ||||||||
3498 | return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) | ||||||||
3499 | && code_ == code); | ||||||||
3500 | } | ||||||||
3501 | |||||||||
3502 | |||||||||
3503 | |||||||||
3504 | /* Function vect_is_simple_reduction | ||||||||
3505 | |||||||||
3506 | (1) Detect a cross-iteration def-use cycle that represents a simple | ||||||||
3507 | reduction computation. We look for the following pattern: | ||||||||
3508 | |||||||||
3509 | loop_header: | ||||||||
3510 | a1 = phi < a0, a2 > | ||||||||
3511 | a3 = ... | ||||||||
3512 | a2 = operation (a3, a1) | ||||||||
3513 | |||||||||
3514 | or | ||||||||
3515 | |||||||||
3516 | a3 = ... | ||||||||
3517 | loop_header: | ||||||||
3518 | a1 = phi < a0, a2 > | ||||||||
3519 | a2 = operation (a3, a1) | ||||||||
3520 | |||||||||
3521 | such that: | ||||||||
3522 | 1. operation is commutative and associative and it is safe to | ||||||||
3523 | change the order of the computation | ||||||||
3524 | 2. no uses for a2 in the loop (a2 is used out of the loop) | ||||||||
3525 | 3. no uses of a1 in the loop besides the reduction operation | ||||||||
3526 | 4. no uses of a1 outside the loop. | ||||||||
3527 | |||||||||
3528 | Conditions 1,4 are tested here. | ||||||||
3529 | Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. | ||||||||
3530 | |||||||||
3531 | (2) Detect a cross-iteration def-use cycle in nested loops, i.e., | ||||||||
3532 | nested cycles. | ||||||||
3533 | |||||||||
3534 | (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double | ||||||||
3535 | reductions: | ||||||||
3536 | |||||||||
3537 | a1 = phi < a0, a2 > | ||||||||
3538 | inner loop (def of a3) | ||||||||
3539 | a2 = phi < a3 > | ||||||||
3540 | |||||||||
3541 | (4) Detect condition expressions, ie: | ||||||||
3542 | for (int i = 0; i < N; i++) | ||||||||
3543 | if (a[i] < val) | ||||||||
3544 | ret_val = a[i]; | ||||||||
3545 | |||||||||
3546 | */ | ||||||||
3547 | |||||||||
3548 | static stmt_vec_info | ||||||||
3549 | vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, | ||||||||
3550 | bool *double_reduc, bool *reduc_chain_p) | ||||||||
3551 | { | ||||||||
3552 | gphi *phi = as_a <gphi *> (phi_info->stmt); | ||||||||
3553 | gimple *phi_use_stmt = NULLnullptr; | ||||||||
3554 | imm_use_iterator imm_iter; | ||||||||
3555 | use_operand_p use_p; | ||||||||
3556 | |||||||||
3557 | *double_reduc = false; | ||||||||
3558 | *reduc_chain_p = false; | ||||||||
3559 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = TREE_CODE_REDUCTION; | ||||||||
3560 | |||||||||
3561 | tree phi_name = PHI_RESULT (phi)get_def_from_ptr (gimple_phi_result_ptr (phi)); | ||||||||
3562 | /* ??? If there are no uses of the PHI result the inner loop reduction | ||||||||
3563 | won't be detected as possibly double-reduction by vectorizable_reduction | ||||||||
3564 | because that tries to walk the PHI arg from the preheader edge which | ||||||||
3565 | can be constant. See PR60382. */ | ||||||||
3566 | if (has_zero_uses (phi_name)) | ||||||||
3567 | return NULLnullptr; | ||||||||
3568 | class loop *loop = (gimple_bb (phi))->loop_father; | ||||||||
3569 | unsigned nphi_def_loop_uses = 0; | ||||||||
3570 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)for ((use_p) = first_readonly_imm_use (&(imm_iter), (phi_name )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) | ||||||||
3571 | { | ||||||||
3572 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; | ||||||||
3573 | if (is_gimple_debug (use_stmt)) | ||||||||
3574 | continue; | ||||||||
3575 | |||||||||
3576 | if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | ||||||||
3577 | { | ||||||||
3578 | if (dump_enabled_p ()) | ||||||||
3579 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
3580 | "intermediate value used outside loop.\n"); | ||||||||
3581 | |||||||||
3582 | return NULLnullptr; | ||||||||
3583 | } | ||||||||
3584 | |||||||||
3585 | nphi_def_loop_uses++; | ||||||||
3586 | phi_use_stmt = use_stmt; | ||||||||
3587 | } | ||||||||
3588 | |||||||||
3589 | tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop))gimple_phi_arg_def (((phi)), ((loop_latch_edge (loop))->dest_idx )); | ||||||||
3590 | if (TREE_CODE (latch_def)((enum tree_code) (latch_def)->base.code) != SSA_NAME) | ||||||||
3591 | { | ||||||||
3592 | if (dump_enabled_p ()) | ||||||||
3593 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
3594 | "reduction: not ssa_name: %T\n", latch_def); | ||||||||
3595 | return NULLnullptr; | ||||||||
3596 | } | ||||||||
3597 | |||||||||
3598 | stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def); | ||||||||
3599 | if (!def_stmt_info | ||||||||
3600 | || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) | ||||||||
3601 | return NULLnullptr; | ||||||||
3602 | |||||||||
3603 | bool nested_in_vect_loop | ||||||||
3604 | = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info)(loop_info)->loop, loop); | ||||||||
3605 | unsigned nlatch_def_loop_uses = 0; | ||||||||
3606 | auto_vec<gphi *, 3> lcphis; | ||||||||
3607 | bool inner_loop_of_double_reduc = false; | ||||||||
3608 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)for ((use_p) = first_readonly_imm_use (&(imm_iter), (latch_def )); !end_readonly_imm_use_p (&(imm_iter)); (void) ((use_p ) = next_readonly_imm_use (&(imm_iter)))) | ||||||||
3609 | { | ||||||||
3610 | gimple *use_stmt = USE_STMT (use_p)(use_p)->loc.stmt; | ||||||||
3611 | if (is_gimple_debug (use_stmt)) | ||||||||
3612 | continue; | ||||||||
3613 | if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | ||||||||
3614 | nlatch_def_loop_uses++; | ||||||||
3615 | else | ||||||||
3616 | { | ||||||||
3617 | /* We can have more than one loop-closed PHI. */ | ||||||||
3618 | lcphis.safe_push (as_a <gphi *> (use_stmt)); | ||||||||
3619 | if (nested_in_vect_loop | ||||||||
3620 | && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))(loop_info->lookup_stmt (use_stmt))->def_type | ||||||||
3621 | == vect_double_reduction_def)) | ||||||||
3622 | inner_loop_of_double_reduc = true; | ||||||||
3623 | } | ||||||||
3624 | } | ||||||||
3625 | |||||||||
3626 | /* If we are vectorizing an inner reduction we are executing that | ||||||||
3627 | in the original order only in case we are not dealing with a | ||||||||
3628 | double reduction. */ | ||||||||
3629 | if (nested_in_vect_loop && !inner_loop_of_double_reduc) | ||||||||
3630 | { | ||||||||
3631 | if (dump_enabled_p ()) | ||||||||
3632 | report_vect_op (MSG_NOTE, def_stmt_info->stmt, | ||||||||
3633 | "detected nested cycle: "); | ||||||||
3634 | return def_stmt_info; | ||||||||
3635 | } | ||||||||
3636 | |||||||||
3637 | /* If this isn't a nested cycle or if the nested cycle reduction value | ||||||||
3638 | is used ouside of the inner loop we cannot handle uses of the reduction | ||||||||
3639 | value. */ | ||||||||
3640 | if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) | ||||||||
3641 | { | ||||||||
3642 | if (dump_enabled_p ()) | ||||||||
3643 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
3644 | "reduction used in loop.\n"); | ||||||||
3645 | return NULLnullptr; | ||||||||
3646 | } | ||||||||
3647 | |||||||||
3648 | /* If DEF_STMT is a phi node itself, we expect it to have a single argument | ||||||||
3649 | defined in the inner loop. */ | ||||||||
3650 | if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt)) | ||||||||
3651 | { | ||||||||
3652 | tree op1 = PHI_ARG_DEF (def_stmt, 0)gimple_phi_arg_def ((def_stmt), (0)); | ||||||||
3653 | if (gimple_phi_num_args (def_stmt) != 1 | ||||||||
3654 | || TREE_CODE (op1)((enum tree_code) (op1)->base.code) != SSA_NAME) | ||||||||
3655 | { | ||||||||
3656 | if (dump_enabled_p ()) | ||||||||
3657 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
3658 | "unsupported phi node definition.\n"); | ||||||||
3659 | |||||||||
3660 | return NULLnullptr; | ||||||||
3661 | } | ||||||||
3662 | |||||||||
3663 | gimple *def1 = SSA_NAME_DEF_STMT (op1)(tree_check ((op1), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3663, __FUNCTION__, (SSA_NAME)))->ssa_name.def_stmt; | ||||||||
3664 | if (gimple_bb (def1) | ||||||||
3665 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||||||||
3666 | && loop->inner | ||||||||
3667 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) | ||||||||
3668 | && is_gimple_assign (def1) | ||||||||
3669 | && is_a <gphi *> (phi_use_stmt) | ||||||||
3670 | && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) | ||||||||
3671 | { | ||||||||
3672 | if (dump_enabled_p ()) | ||||||||
3673 | report_vect_op (MSG_NOTE, def_stmt, | ||||||||
3674 | "detected double reduction: "); | ||||||||
3675 | |||||||||
3676 | *double_reduc = true; | ||||||||
3677 | return def_stmt_info; | ||||||||
3678 | } | ||||||||
3679 | |||||||||
3680 | return NULLnullptr; | ||||||||
3681 | } | ||||||||
3682 | |||||||||
3683 | /* Look for the expression computing latch_def from then loop PHI result. */ | ||||||||
3684 | auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | ||||||||
3685 | enum tree_code code; | ||||||||
3686 | if (check_reduction_path (vect_location, loop, phi, latch_def, &code, | ||||||||
3687 | path)) | ||||||||
3688 | { | ||||||||
3689 | STMT_VINFO_REDUC_CODE (phi_info)(phi_info)->reduc_code = code; | ||||||||
3690 | if (code == COND_EXPR && !nested_in_vect_loop) | ||||||||
3691 | STMT_VINFO_REDUC_TYPE (phi_info)(phi_info)->reduc_type = COND_REDUCTION; | ||||||||
3692 | |||||||||
3693 | /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP | ||||||||
3694 | reduction chain for which the additional restriction is that | ||||||||
3695 | all operations in the chain are the same. */ | ||||||||
3696 | auto_vec<stmt_vec_info, 8> reduc_chain; | ||||||||
3697 | unsigned i; | ||||||||
3698 | bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR; | ||||||||
3699 | for (i = path.length () - 1; i >= 1; --i) | ||||||||
3700 | { | ||||||||
3701 | gimple *stmt = USE_STMT (path[i].second)(path[i].second)->loc.stmt; | ||||||||
3702 | stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); | ||||||||
3703 | STMT_VINFO_REDUC_IDX (stmt_info)(stmt_info)->reduc_idx | ||||||||
3704 | = path[i].second->use - gimple_assign_rhs1_ptr (stmt); | ||||||||
3705 | enum tree_code stmt_code = gimple_assign_rhs_code (stmt); | ||||||||
3706 | bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)((stmt_code) == NOP_EXPR || (stmt_code) == CONVERT_EXPR) | ||||||||
3707 | && (i == 1 || i == path.length () - 1)); | ||||||||
3708 | if ((stmt_code != code && !leading_conversion) | ||||||||
3709 | /* We can only handle the final value in epilogue | ||||||||
3710 | generation for reduction chains. */ | ||||||||
3711 | || (i != 1 && !has_single_use (gimple_assign_lhs (stmt)))) | ||||||||
3712 | is_slp_reduc = false; | ||||||||
3713 | /* For reduction chains we support a trailing/leading | ||||||||
3714 | conversions. We do not store those in the actual chain. */ | ||||||||
3715 | if (leading_conversion) | ||||||||
3716 | continue; | ||||||||
3717 | reduc_chain.safe_push (stmt_info); | ||||||||
3718 | } | ||||||||
3719 | if (is_slp_reduc && reduc_chain.length () > 1) | ||||||||
3720 | { | ||||||||
3721 | for (unsigned i = 0; i < reduc_chain.length () - 1; ++i) | ||||||||
3722 | { | ||||||||
3723 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3723, __FUNCTION__), 0 : 0)), (reduc_chain[i])->first_element ) = reduc_chain[0]; | ||||||||
3724 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i])(((void)(!(!(reduc_chain[i])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3724, __FUNCTION__), 0 : 0)), (reduc_chain[i])->next_element ) = reduc_chain[i+1]; | ||||||||
3725 | } | ||||||||
3726 | REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3726, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->first_element ) = reduc_chain[0]; | ||||||||
3727 | REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ())(((void)(!(!(reduc_chain.last ())->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3727, __FUNCTION__), 0 : 0)), (reduc_chain.last ())->next_element ) = NULLnullptr; | ||||||||
3728 | |||||||||
3729 | /* Save the chain for further analysis in SLP detection. */ | ||||||||
3730 | LOOP_VINFO_REDUCTION_CHAINS (loop_info)(loop_info)->reduction_chains.safe_push (reduc_chain[0]); | ||||||||
3731 | REDUC_GROUP_SIZE (reduc_chain[0])(((void)(!(!(reduc_chain[0])->dr_aux.dr) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3731, __FUNCTION__), 0 : 0)), (reduc_chain[0])->size) = reduc_chain.length (); | ||||||||
3732 | |||||||||
3733 | *reduc_chain_p = true; | ||||||||
3734 | if (dump_enabled_p ()) | ||||||||
3735 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3736 | "reduction: detected reduction chain\n"); | ||||||||
3737 | } | ||||||||
3738 | else if (dump_enabled_p ()) | ||||||||
3739 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3740 | "reduction: detected reduction\n"); | ||||||||
3741 | |||||||||
3742 | return def_stmt_info; | ||||||||
3743 | } | ||||||||
3744 | |||||||||
3745 | if (dump_enabled_p ()) | ||||||||
3746 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3747 | "reduction: unknown pattern\n"); | ||||||||
3748 | |||||||||
3749 | return NULLnullptr; | ||||||||
3750 | } | ||||||||
3751 | |||||||||
3752 | /* Estimate the number of peeled epilogue iterations for LOOP_VINFO. | ||||||||
3753 | PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations, | ||||||||
3754 | or -1 if not known. */ | ||||||||
3755 | |||||||||
3756 | static int | ||||||||
3757 | vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue) | ||||||||
3758 | { | ||||||||
3759 | int assumed_vf = vect_vf_for_cost (loop_vinfo); | ||||||||
3760 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) || peel_iters_prologue == -1) | ||||||||
3761 | { | ||||||||
3762 | if (dump_enabled_p ()) | ||||||||
3763 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
3764 | "cost model: epilogue peel iters set to vf/2 " | ||||||||
3765 | "because loop iterations are unknown .\n"); | ||||||||
3766 | return assumed_vf / 2; | ||||||||
3767 | } | ||||||||
3768 | else | ||||||||
3769 | { | ||||||||
3770 | int niters = LOOP_VINFO_INT_NITERS (loop_vinfo)(((unsigned long) (*tree_int_cst_elt_check (((loop_vinfo)-> num_iters), (0), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 3770, __FUNCTION__)))); | ||||||||
3771 | peel_iters_prologue = MIN (niters, peel_iters_prologue)((niters) < (peel_iters_prologue) ? (niters) : (peel_iters_prologue )); | ||||||||
3772 | int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; | ||||||||
3773 | /* If we need to peel for gaps, but no peeling is required, we have to | ||||||||
3774 | peel VF iterations. */ | ||||||||
3775 | if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps && !peel_iters_epilogue) | ||||||||
3776 | peel_iters_epilogue = assumed_vf; | ||||||||
3777 | return peel_iters_epilogue; | ||||||||
3778 | } | ||||||||
3779 | } | ||||||||
3780 | |||||||||
3781 | /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ | ||||||||
3782 | int | ||||||||
3783 | vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, | ||||||||
3784 | int *peel_iters_epilogue, | ||||||||
3785 | stmt_vector_for_cost *scalar_cost_vec, | ||||||||
3786 | stmt_vector_for_cost *prologue_cost_vec, | ||||||||
3787 | stmt_vector_for_cost *epilogue_cost_vec) | ||||||||
3788 | { | ||||||||
3789 | int retval = 0; | ||||||||
3790 | |||||||||
3791 | *peel_iters_epilogue | ||||||||
3792 | = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue); | ||||||||
3793 | |||||||||
3794 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0)) | ||||||||
3795 | { | ||||||||
3796 | /* If peeled iterations are known but number of scalar loop | ||||||||
3797 | iterations are unknown, count a taken branch per peeled loop. */ | ||||||||
3798 | if (peel_iters_prologue > 0) | ||||||||
3799 | retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, | ||||||||
3800 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3801 | if (*peel_iters_epilogue > 0) | ||||||||
3802 | retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken, | ||||||||
3803 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue); | ||||||||
3804 | } | ||||||||
3805 | |||||||||
3806 | stmt_info_for_cost *si; | ||||||||
3807 | int j; | ||||||||
3808 | if (peel_iters_prologue) | ||||||||
3809 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) | ||||||||
3810 | retval += record_stmt_cost (prologue_cost_vec, | ||||||||
3811 | si->count * peel_iters_prologue, | ||||||||
3812 | si->kind, si->stmt_info, si->misalign, | ||||||||
3813 | vect_prologue); | ||||||||
3814 | if (*peel_iters_epilogue) | ||||||||
3815 | FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)for (j = 0; (*scalar_cost_vec).iterate ((j), &(si)); ++(j )) | ||||||||
3816 | retval += record_stmt_cost (epilogue_cost_vec, | ||||||||
3817 | si->count * *peel_iters_epilogue, | ||||||||
3818 | si->kind, si->stmt_info, si->misalign, | ||||||||
3819 | vect_epilogue); | ||||||||
3820 | |||||||||
3821 | return retval; | ||||||||
3822 | } | ||||||||
3823 | |||||||||
3824 | /* Function vect_estimate_min_profitable_iters | ||||||||
3825 | |||||||||
3826 | Return the number of iterations required for the vector version of the | ||||||||
3827 | loop to be profitable relative to the cost of the scalar version of the | ||||||||
3828 | loop. | ||||||||
3829 | |||||||||
3830 | *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold | ||||||||
3831 | of iterations for vectorization. -1 value means loop vectorization | ||||||||
3832 | is not profitable. This returned value may be used for dynamic | ||||||||
3833 | profitability check. | ||||||||
3834 | |||||||||
3835 | *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used | ||||||||
3836 | for static check against estimated number of iterations. */ | ||||||||
3837 | |||||||||
3838 | static void | ||||||||
3839 | vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, | ||||||||
3840 | int *ret_min_profitable_niters, | ||||||||
3841 | int *ret_min_profitable_estimate) | ||||||||
3842 | { | ||||||||
3843 | int min_profitable_iters; | ||||||||
3844 | int min_profitable_estimate; | ||||||||
3845 | int peel_iters_prologue; | ||||||||
3846 | int peel_iters_epilogue; | ||||||||
3847 | unsigned vec_inside_cost = 0; | ||||||||
3848 | int vec_outside_cost = 0; | ||||||||
3849 | unsigned vec_prologue_cost = 0; | ||||||||
3850 | unsigned vec_epilogue_cost = 0; | ||||||||
3851 | int scalar_single_iter_cost = 0; | ||||||||
3852 | int scalar_outside_cost = 0; | ||||||||
3853 | int assumed_vf = vect_vf_for_cost (loop_vinfo); | ||||||||
3854 | int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment; | ||||||||
3855 | void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data; | ||||||||
3856 | |||||||||
3857 | /* Cost model disabled. */ | ||||||||
3858 | if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop)) | ||||||||
3859 | { | ||||||||
3860 | if (dump_enabled_p ()) | ||||||||
3861 | dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); | ||||||||
3862 | *ret_min_profitable_niters = 0; | ||||||||
3863 | *ret_min_profitable_estimate = 0; | ||||||||
3864 | return; | ||||||||
3865 | } | ||||||||
3866 | |||||||||
3867 | /* Requires loop versioning tests to handle misalignment. */ | ||||||||
3868 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)((loop_vinfo)->may_misalign_stmts.length () > 0)) | ||||||||
3869 | { | ||||||||
3870 | /* FIXME: Make cost depend on complexity of individual check. */ | ||||||||
3871 | unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)(loop_vinfo)->may_misalign_stmts.length (); | ||||||||
3872 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt, | ||||||||
3873 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3874 | if (dump_enabled_p ()) | ||||||||
3875 | dump_printf (MSG_NOTE, | ||||||||
3876 | "cost model: Adding cost of checks for loop " | ||||||||
3877 | "versioning to treat misalignment.\n"); | ||||||||
3878 | } | ||||||||
3879 | |||||||||
3880 | /* Requires loop versioning with alias checks. */ | ||||||||
3881 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)((loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0)) | ||||||||
3882 | { | ||||||||
3883 | /* FIXME: Make cost depend on complexity of individual check. */ | ||||||||
3884 | unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo)(loop_vinfo)->comp_alias_ddrs.length (); | ||||||||
3885 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt, | ||||||||
3886 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3887 | len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo)(loop_vinfo)->check_unequal_addrs.length (); | ||||||||
3888 | if (len) | ||||||||
3889 | /* Count LEN - 1 ANDs and LEN comparisons. */ | ||||||||
3890 | (void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1, | ||||||||
3891 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3892 | len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds.length (); | ||||||||
3893 | if (len) | ||||||||
3894 | { | ||||||||
3895 | /* Count LEN - 1 ANDs and LEN comparisons. */ | ||||||||
3896 | unsigned int nstmts = len * 2 - 1; | ||||||||
3897 | /* +1 for each bias that needs adding. */ | ||||||||
3898 | for (unsigned int i = 0; i < len; ++i) | ||||||||
3899 | if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)(loop_vinfo)->lower_bounds[i].unsigned_p) | ||||||||
3900 | nstmts += 1; | ||||||||
3901 | (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts, | ||||||||
3902 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3903 | } | ||||||||
3904 | if (dump_enabled_p ()) | ||||||||
3905 | dump_printf (MSG_NOTE, | ||||||||
3906 | "cost model: Adding cost of checks for loop " | ||||||||
3907 | "versioning aliasing.\n"); | ||||||||
3908 | } | ||||||||
3909 | |||||||||
3910 | /* Requires loop versioning with niter checks. */ | ||||||||
3911 | if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo)((loop_vinfo)->num_iters_assumptions)) | ||||||||
3912 | { | ||||||||
3913 | /* FIXME: Make cost depend on complexity of individual check. */ | ||||||||
3914 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt, | ||||||||
3915 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3916 | if (dump_enabled_p ()) | ||||||||
3917 | dump_printf (MSG_NOTE, | ||||||||
3918 | "cost model: Adding cost of checks for loop " | ||||||||
3919 | "versioning niters.\n"); | ||||||||
3920 | } | ||||||||
3921 | |||||||||
3922 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | ||||||||
3923 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, | ||||||||
3924 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
3925 | |||||||||
3926 | /* Count statements in scalar loop. Using this as scalar cost for a single | ||||||||
3927 | iteration for now. | ||||||||
3928 | |||||||||
3929 | TODO: Add outer loop support. | ||||||||
3930 | |||||||||
3931 | TODO: Consider assigning different costs to different scalar | ||||||||
3932 | statements. */ | ||||||||
3933 | |||||||||
3934 | scalar_single_iter_cost | ||||||||
3935 | = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)(loop_vinfo)->single_scalar_iteration_cost; | ||||||||
3936 | |||||||||
3937 | /* Add additional cost for the peeled instructions in prologue and epilogue | ||||||||
3938 | loop. (For fully-masked loops there will be no peeling.) | ||||||||
3939 | |||||||||
3940 | FORNOW: If we don't know the value of peel_iters for prologue or epilogue | ||||||||
3941 | at compile-time - we assume it's vf/2 (the worst would be vf-1). | ||||||||
3942 | |||||||||
3943 | TODO: Build an expression that represents peel_iters for prologue and | ||||||||
3944 | epilogue to be used in a run-time test. */ | ||||||||
3945 | |||||||||
3946 | bool prologue_need_br_taken_cost = false; | ||||||||
3947 | bool prologue_need_br_not_taken_cost = false; | ||||||||
3948 | |||||||||
3949 | /* Calculate peel_iters_prologue. */ | ||||||||
3950 | if (vect_use_loop_mask_for_alignment_p (loop_vinfo)) | ||||||||
3951 | peel_iters_prologue = 0; | ||||||||
3952 | else if (npeel < 0) | ||||||||
3953 | { | ||||||||
3954 | peel_iters_prologue = assumed_vf / 2; | ||||||||
3955 | if (dump_enabled_p ()) | ||||||||
3956 | dump_printf (MSG_NOTE, "cost model: " | ||||||||
3957 | "prologue peel iters set to vf/2.\n"); | ||||||||
3958 | |||||||||
3959 | /* If peeled iterations are unknown, count a taken branch and a not taken | ||||||||
3960 | branch per peeled loop. Even if scalar loop iterations are known, | ||||||||
3961 | vector iterations are not known since peeled prologue iterations are | ||||||||
3962 | not known. Hence guards remain the same. */ | ||||||||
3963 | prologue_need_br_taken_cost = true; | ||||||||
3964 | prologue_need_br_not_taken_cost = true; | ||||||||
3965 | } | ||||||||
3966 | else | ||||||||
3967 | { | ||||||||
3968 | peel_iters_prologue = npeel; | ||||||||
3969 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_prologue > 0) | ||||||||
3970 | /* If peeled iterations are known but number of scalar loop | ||||||||
3971 | iterations are unknown, count a taken branch per peeled loop. */ | ||||||||
3972 | prologue_need_br_taken_cost = true; | ||||||||
3973 | } | ||||||||
3974 | |||||||||
3975 | bool epilogue_need_br_taken_cost = false; | ||||||||
3976 | bool epilogue_need_br_not_taken_cost = false; | ||||||||
3977 | |||||||||
3978 | /* Calculate peel_iters_epilogue. */ | ||||||||
3979 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
3980 | /* We need to peel exactly one iteration for gaps. */ | ||||||||
3981 | peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)(loop_vinfo)->peeling_for_gaps ? 1 : 0; | ||||||||
3982 | else if (npeel < 0) | ||||||||
3983 | { | ||||||||
3984 | /* If peeling for alignment is unknown, loop bound of main loop | ||||||||
3985 | becomes unknown. */ | ||||||||
3986 | peel_iters_epilogue = assumed_vf / 2; | ||||||||
3987 | if (dump_enabled_p ()) | ||||||||
3988 | dump_printf (MSG_NOTE, "cost model: " | ||||||||
3989 | "epilogue peel iters set to vf/2 because " | ||||||||
3990 | "peeling for alignment is unknown.\n"); | ||||||||
3991 | |||||||||
3992 | /* See the same reason above in peel_iters_prologue calculation. */ | ||||||||
3993 | epilogue_need_br_taken_cost = true; | ||||||||
3994 | epilogue_need_br_not_taken_cost = true; | ||||||||
3995 | } | ||||||||
3996 | else | ||||||||
3997 | { | ||||||||
3998 | peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, npeel); | ||||||||
3999 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) && peel_iters_epilogue > 0) | ||||||||
4000 | /* If peeled iterations are known but number of scalar loop | ||||||||
4001 | iterations are unknown, count a taken branch per peeled loop. */ | ||||||||
4002 | epilogue_need_br_taken_cost = true; | ||||||||
4003 | } | ||||||||
4004 | |||||||||
4005 | stmt_info_for_cost *si; | ||||||||
4006 | int j; | ||||||||
4007 | /* Add costs associated with peel_iters_prologue. */ | ||||||||
4008 | if (peel_iters_prologue) | ||||||||
4009 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | ||||||||
4010 | { | ||||||||
4011 | (void) add_stmt_cost (loop_vinfo, target_cost_data, | ||||||||
4012 | si->count * peel_iters_prologue, si->kind, | ||||||||
4013 | si->stmt_info, si->vectype, si->misalign, | ||||||||
4014 | vect_prologue); | ||||||||
4015 | } | ||||||||
4016 | |||||||||
4017 | /* Add costs associated with peel_iters_epilogue. */ | ||||||||
4018 | if (peel_iters_epilogue) | ||||||||
4019 | FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)for (j = 0; ((loop_vinfo)->scalar_cost_vec).iterate ((j), & (si)); ++(j)) | ||||||||
4020 | { | ||||||||
4021 | (void) add_stmt_cost (loop_vinfo, target_cost_data, | ||||||||
4022 | si->count * peel_iters_epilogue, si->kind, | ||||||||
4023 | si->stmt_info, si->vectype, si->misalign, | ||||||||
4024 | vect_epilogue); | ||||||||
4025 | } | ||||||||
4026 | |||||||||
4027 | /* Add possible cond_branch_taken/cond_branch_not_taken cost. */ | ||||||||
4028 | |||||||||
4029 | if (prologue_need_br_taken_cost) | ||||||||
4030 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, | ||||||||
4031 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
4032 | |||||||||
4033 | if (prologue_need_br_not_taken_cost) | ||||||||
4034 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, | ||||||||
4035 | cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0, | ||||||||
4036 | vect_prologue); | ||||||||
4037 | |||||||||
4038 | if (epilogue_need_br_taken_cost) | ||||||||
4039 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken, | ||||||||
4040 | NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_epilogue); | ||||||||
4041 | |||||||||
4042 | if (epilogue_need_br_not_taken_cost) | ||||||||
4043 | (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, | ||||||||
4044 | cond_branch_not_taken, NULLnullptr, NULL_TREE(tree) nullptr, 0, | ||||||||
4045 | vect_epilogue); | ||||||||
4046 | |||||||||
4047 | /* Take care of special costs for rgroup controls of partial vectors. */ | ||||||||
4048 | if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->masks.is_empty ())) | ||||||||
4049 | { | ||||||||
4050 | /* Calculate how many masks we need to generate. */ | ||||||||
4051 | unsigned int num_masks = 0; | ||||||||
4052 | rgroup_controls *rgm; | ||||||||
4053 | unsigned int num_vectors_m1; | ||||||||
4054 | FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)for (num_vectors_m1 = 0; ((loop_vinfo)->masks).iterate ((num_vectors_m1 ), &(rgm)); ++(num_vectors_m1)) | ||||||||
4055 | if (rgm->type) | ||||||||
4056 | num_masks += num_vectors_m1 + 1; | ||||||||
4057 | gcc_assert (num_masks > 0)((void)(!(num_masks > 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4057, __FUNCTION__), 0 : 0)); | ||||||||
4058 | |||||||||
4059 | /* In the worst case, we need to generate each mask in the prologue | ||||||||
4060 | and in the loop body. One of the loop body mask instructions | ||||||||
4061 | replaces the comparison in the scalar loop, and since we don't | ||||||||
4062 | count the scalar comparison against the scalar body, we shouldn't | ||||||||
4063 | count that vector instruction against the vector body either. | ||||||||
4064 | |||||||||
4065 | Sometimes we can use unpacks instead of generating prologue | ||||||||
4066 | masks and sometimes the prologue mask will fold to a constant, | ||||||||
4067 | so the actual prologue cost might be smaller. However, it's | ||||||||
4068 | simpler and safer to use the worst-case cost; if this ends up | ||||||||
4069 | being the tie-breaker between vectorizing or not, then it's | ||||||||
4070 | probably better not to vectorize. */ | ||||||||
4071 | (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks, | ||||||||
4072 | vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
4073 | (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1, | ||||||||
4074 | vector_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body); | ||||||||
4075 | } | ||||||||
4076 | else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)((loop_vinfo)->using_partial_vectors_p && !(loop_vinfo )->lens.is_empty ())) | ||||||||
4077 | { | ||||||||
4078 | /* Referring to the functions vect_set_loop_condition_partial_vectors | ||||||||
4079 | and vect_set_loop_controls_directly, we need to generate each | ||||||||
4080 | length in the prologue and in the loop body if required. Although | ||||||||
4081 | there are some possible optimizations, we consider the worst case | ||||||||
4082 | here. */ | ||||||||
4083 | |||||||||
4084 | bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0); | ||||||||
4085 | bool need_iterate_p | ||||||||
4086 | = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)((loop_vinfo)->orig_loop_info != nullptr) | ||||||||
4087 | && !vect_known_niters_smaller_than_vf (loop_vinfo)); | ||||||||
4088 | |||||||||
4089 | /* Calculate how many statements to be added. */ | ||||||||
4090 | unsigned int prologue_stmts = 0; | ||||||||
4091 | unsigned int body_stmts = 0; | ||||||||
4092 | |||||||||
4093 | rgroup_controls *rgc; | ||||||||
4094 | unsigned int num_vectors_m1; | ||||||||
4095 | FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)for (num_vectors_m1 = 0; ((loop_vinfo)->lens).iterate ((num_vectors_m1 ), &(rgc)); ++(num_vectors_m1)) | ||||||||
4096 | if (rgc->type) | ||||||||
4097 | { | ||||||||
4098 | /* May need one SHIFT for nitems_total computation. */ | ||||||||
4099 | unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor; | ||||||||
4100 | if (nitems != 1 && !niters_known_p) | ||||||||
4101 | prologue_stmts += 1; | ||||||||
4102 | |||||||||
4103 | /* May need one MAX and one MINUS for wrap around. */ | ||||||||
4104 | if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc)) | ||||||||
4105 | prologue_stmts += 2; | ||||||||
4106 | |||||||||
4107 | /* Need one MAX and one MINUS for each batch limit excepting for | ||||||||
4108 | the 1st one. */ | ||||||||
4109 | prologue_stmts += num_vectors_m1 * 2; | ||||||||
4110 | |||||||||
4111 | unsigned int num_vectors = num_vectors_m1 + 1; | ||||||||
4112 | |||||||||
4113 | /* Need to set up lengths in prologue, only one MIN required | ||||||||
4114 | for each since start index is zero. */ | ||||||||
4115 | prologue_stmts += num_vectors; | ||||||||
4116 | |||||||||
4117 | /* Each may need two MINs and one MINUS to update lengths in body | ||||||||
4118 | for next iteration. */ | ||||||||
4119 | if (need_iterate_p) | ||||||||
4120 | body_stmts += 3 * num_vectors; | ||||||||
4121 | } | ||||||||
4122 | |||||||||
4123 | (void) add_stmt_cost (loop_vinfo, target_cost_data, prologue_stmts, | ||||||||
4124 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_prologue); | ||||||||
4125 | (void) add_stmt_cost (loop_vinfo, target_cost_data, body_stmts, | ||||||||
4126 | scalar_stmt, NULLnullptr, NULL_TREE(tree) nullptr, 0, vect_body); | ||||||||
4127 | } | ||||||||
4128 | |||||||||
4129 | /* FORNOW: The scalar outside cost is incremented in one of the | ||||||||
4130 | following ways: | ||||||||
4131 | |||||||||
4132 | 1. The vectorizer checks for alignment and aliasing and generates | ||||||||
4133 | a condition that allows dynamic vectorization. A cost model | ||||||||
4134 | check is ANDED with the versioning condition. Hence scalar code | ||||||||
4135 | path now has the added cost of the versioning check. | ||||||||
4136 | |||||||||
4137 | if (cost > th & versioning_check) | ||||||||
4138 | jmp to vector code | ||||||||
4139 | |||||||||
4140 | Hence run-time scalar is incremented by not-taken branch cost. | ||||||||
4141 | |||||||||
4142 | 2. The vectorizer then checks if a prologue is required. If the | ||||||||
4143 | cost model check was not done before during versioning, it has to | ||||||||
4144 | be done before the prologue check. | ||||||||
4145 | |||||||||
4146 | if (cost <= th) | ||||||||
4147 | prologue = scalar_iters | ||||||||
4148 | if (prologue == 0) | ||||||||
4149 | jmp to vector code | ||||||||
4150 | else | ||||||||
4151 | execute prologue | ||||||||
4152 | if (prologue == num_iters) | ||||||||
4153 | go to exit | ||||||||
4154 | |||||||||
4155 | Hence the run-time scalar cost is incremented by a taken branch, | ||||||||
4156 | plus a not-taken branch, plus a taken branch cost. | ||||||||
4157 | |||||||||
4158 | 3. The vectorizer then checks if an epilogue is required. If the | ||||||||
4159 | cost model check was not done before during prologue check, it | ||||||||
4160 | has to be done with the epilogue check. | ||||||||
4161 | |||||||||
4162 | if (prologue == 0) | ||||||||
4163 | jmp to vector code | ||||||||
4164 | else | ||||||||
4165 | execute prologue | ||||||||
4166 | if (prologue == num_iters) | ||||||||
4167 | go to exit | ||||||||
4168 | vector code: | ||||||||
4169 | if ((cost <= th) | (scalar_iters-prologue-epilogue == 0)) | ||||||||
4170 | jmp to epilogue | ||||||||
4171 | |||||||||
4172 | Hence the run-time scalar cost should be incremented by 2 taken | ||||||||
4173 | branches. | ||||||||
4174 | |||||||||
4175 | TODO: The back end may reorder the BBS's differently and reverse | ||||||||
4176 | conditions/branch directions. Change the estimates below to | ||||||||
4177 | something more reasonable. */ | ||||||||
4178 | |||||||||
4179 | /* If the number of iterations is known and we do not do versioning, we can | ||||||||
4180 | decide whether to vectorize at compile time. Hence the scalar version | ||||||||
4181 | do not carry cost model guard costs. */ | ||||||||
4182 | if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)(tree_fits_shwi_p ((loop_vinfo)->num_iters) && tree_to_shwi ((loop_vinfo)->num_iters) > 0) | ||||||||
4183 | || LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | ||||||||
4184 | { | ||||||||
4185 | /* Cost model check occurs at versioning. */ | ||||||||
4186 | if (LOOP_REQUIRES_VERSIONING (loop_vinfo)(((loop_vinfo)->may_misalign_stmts.length () > 0) || (( loop_vinfo)->comp_alias_ddrs.length () > 0 || (loop_vinfo )->check_unequal_addrs.length () > 0 || (loop_vinfo)-> lower_bounds.length () > 0) || ((loop_vinfo)->num_iters_assumptions ) || ((loop_vinfo)->simd_if_cond))) | ||||||||
4187 | scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken); | ||||||||
4188 | else | ||||||||
4189 | { | ||||||||
4190 | /* Cost model check occurs at prologue generation. */ | ||||||||
4191 | if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)(loop_vinfo)->peeling_for_alignment < 0) | ||||||||
4192 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) | ||||||||
4193 | + vect_get_stmt_cost (cond_branch_not_taken); | ||||||||
4194 | /* Cost model check occurs at epilogue generation. */ | ||||||||
4195 | else | ||||||||
4196 | scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken); | ||||||||
4197 | } | ||||||||
4198 | } | ||||||||
4199 | |||||||||
4200 | /* Complete the target-specific cost calculations. */ | ||||||||
4201 | finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)(loop_vinfo)->target_cost_data, &vec_prologue_cost, | ||||||||
4202 | &vec_inside_cost, &vec_epilogue_cost); | ||||||||
4203 | |||||||||
4204 | vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost); | ||||||||
4205 | |||||||||
4206 | /* Stash the costs so that we can compare two loop_vec_infos. */ | ||||||||
4207 | loop_vinfo->vec_inside_cost = vec_inside_cost; | ||||||||
4208 | loop_vinfo->vec_outside_cost = vec_outside_cost; | ||||||||
4209 | |||||||||
4210 | if (dump_enabled_p ()) | ||||||||
4211 | { | ||||||||
4212 | dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); | ||||||||
4213 | dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n", | ||||||||
4214 | vec_inside_cost); | ||||||||
4215 | dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", | ||||||||
4216 | vec_prologue_cost); | ||||||||
4217 | dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", | ||||||||
4218 | vec_epilogue_cost); | ||||||||
4219 | dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n", | ||||||||
4220 | scalar_single_iter_cost); | ||||||||
4221 | dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", | ||||||||
4222 | scalar_outside_cost); | ||||||||
4223 | dump_printf (MSG_NOTE, " Vector outside cost: %d\n", | ||||||||
4224 | vec_outside_cost); | ||||||||
4225 | dump_printf (MSG_NOTE, " prologue iterations: %d\n", | ||||||||
4226 | peel_iters_prologue); | ||||||||
4227 | dump_printf (MSG_NOTE, " epilogue iterations: %d\n", | ||||||||
4228 | peel_iters_epilogue); | ||||||||
4229 | } | ||||||||
4230 | |||||||||
4231 | /* Calculate number of iterations required to make the vector version | ||||||||
4232 | profitable, relative to the loop bodies only. The following condition | ||||||||
4233 | must hold true: | ||||||||
4234 | SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC | ||||||||
4235 | where | ||||||||
4236 | SIC = scalar iteration cost, VIC = vector iteration cost, | ||||||||
4237 | VOC = vector outside cost, VF = vectorization factor, | ||||||||
4238 | NPEEL = prologue iterations + epilogue iterations, | ||||||||
4239 | SOC = scalar outside cost for run time cost model check. */ | ||||||||
4240 | |||||||||
4241 | int saving_per_viter = (scalar_single_iter_cost * assumed_vf | ||||||||
4242 | - vec_inside_cost); | ||||||||
4243 | if (saving_per_viter <= 0) | ||||||||
4244 | { | ||||||||
4245 | if (LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop->force_vectorize) | ||||||||
4246 | warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd, | ||||||||
4247 | "vectorization did not happen for a simd loop"); | ||||||||
4248 | |||||||||
4249 | if (dump_enabled_p ()) | ||||||||
4250 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | ||||||||
4251 | "cost model: the vector iteration cost = %d " | ||||||||
4252 | "divided by the scalar iteration cost = %d " | ||||||||
4253 | "is greater or equal to the vectorization factor = %d" | ||||||||
4254 | ".\n", | ||||||||
4255 | vec_inside_cost, scalar_single_iter_cost, assumed_vf); | ||||||||
4256 | *ret_min_profitable_niters = -1; | ||||||||
4257 | *ret_min_profitable_estimate = -1; | ||||||||
4258 | return; | ||||||||
4259 | } | ||||||||
4260 | |||||||||
4261 | /* ??? The "if" arm is written to handle all cases; see below for what | ||||||||
4262 | we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ | ||||||||
4263 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
4264 | { | ||||||||
4265 | /* Rewriting the condition above in terms of the number of | ||||||||
4266 | vector iterations (vniters) rather than the number of | ||||||||
4267 | scalar iterations (niters) gives: | ||||||||
4268 | |||||||||
4269 | SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC | ||||||||
4270 | |||||||||
4271 | <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC | ||||||||
4272 | |||||||||
4273 | For integer N, X and Y when X > 0: | ||||||||
4274 | |||||||||
4275 | N * X > Y <==> N >= (Y /[floor] X) + 1. */ | ||||||||
4276 | int outside_overhead = (vec_outside_cost | ||||||||
4277 | - scalar_single_iter_cost * peel_iters_prologue | ||||||||
4278 | - scalar_single_iter_cost * peel_iters_epilogue | ||||||||
4279 | - scalar_outside_cost); | ||||||||
4280 | /* We're only interested in cases that require at least one | ||||||||
4281 | vector iteration. */ | ||||||||
4282 | int min_vec_niters = 1; | ||||||||
4283 | if (outside_overhead > 0) | ||||||||
4284 | min_vec_niters = outside_overhead / saving_per_viter + 1; | ||||||||
4285 | |||||||||
4286 | if (dump_enabled_p ()) | ||||||||
4287 | dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n", | ||||||||
4288 | min_vec_niters); | ||||||||
4289 | |||||||||
4290 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
4291 | { | ||||||||
4292 | /* Now that we know the minimum number of vector iterations, | ||||||||
4293 | find the minimum niters for which the scalar cost is larger: | ||||||||
4294 | |||||||||
4295 | SIC * niters > VIC * vniters + VOC - SOC | ||||||||
4296 | |||||||||
4297 | We know that the minimum niters is no more than | ||||||||
4298 | vniters * VF + NPEEL, but it might be (and often is) less | ||||||||
4299 | than that if a partial vector iteration is cheaper than the | ||||||||
4300 | equivalent scalar code. */ | ||||||||
4301 | int threshold = (vec_inside_cost * min_vec_niters | ||||||||
4302 | + vec_outside_cost | ||||||||
4303 | - scalar_outside_cost); | ||||||||
4304 | if (threshold <= 0) | ||||||||
4305 | min_profitable_iters = 1; | ||||||||
4306 | else | ||||||||
4307 | min_profitable_iters = threshold / scalar_single_iter_cost + 1; | ||||||||
4308 | } | ||||||||
4309 | else | ||||||||
4310 | /* Convert the number of vector iterations into a number of | ||||||||
4311 | scalar iterations. */ | ||||||||
4312 | min_profitable_iters = (min_vec_niters * assumed_vf | ||||||||
4313 | + peel_iters_prologue | ||||||||
4314 | + peel_iters_epilogue); | ||||||||
4315 | } | ||||||||
4316 | else | ||||||||
4317 | { | ||||||||
4318 | min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) | ||||||||
4319 | * assumed_vf | ||||||||
4320 | - vec_inside_cost * peel_iters_prologue | ||||||||
4321 | - vec_inside_cost * peel_iters_epilogue); | ||||||||
4322 | if (min_profitable_iters <= 0) | ||||||||
4323 | min_profitable_iters = 0; | ||||||||
4324 | else | ||||||||
4325 | { | ||||||||
4326 | min_profitable_iters /= saving_per_viter; | ||||||||
4327 | |||||||||
4328 | if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters) | ||||||||
4329 | <= (((int) vec_inside_cost * min_profitable_iters) | ||||||||
4330 | + (((int) vec_outside_cost - scalar_outside_cost) | ||||||||
4331 | * assumed_vf))) | ||||||||
4332 | min_profitable_iters++; | ||||||||
4333 | } | ||||||||
4334 | } | ||||||||
4335 | |||||||||
4336 | if (dump_enabled_p ()) | ||||||||
4337 | dump_printf (MSG_NOTE, | ||||||||
4338 | " Calculated minimum iters for profitability: %d\n", | ||||||||
4339 | min_profitable_iters); | ||||||||
4340 | |||||||||
4341 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p | ||||||||
4342 | && min_profitable_iters < (assumed_vf + peel_iters_prologue)) | ||||||||
4343 | /* We want the vectorized loop to execute at least once. */ | ||||||||
4344 | min_profitable_iters = assumed_vf + peel_iters_prologue; | ||||||||
4345 | else if (min_profitable_iters < peel_iters_prologue) | ||||||||
4346 | /* For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the | ||||||||
4347 | vectorized loop executes at least once. */ | ||||||||
4348 | min_profitable_iters = peel_iters_prologue; | ||||||||
4349 | |||||||||
4350 | if (dump_enabled_p ()) | ||||||||
4351 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
4352 | " Runtime profitability threshold = %d\n", | ||||||||
4353 | min_profitable_iters); | ||||||||
4354 | |||||||||
4355 | *ret_min_profitable_niters = min_profitable_iters; | ||||||||
4356 | |||||||||
4357 | /* Calculate number of iterations required to make the vector version | ||||||||
4358 | profitable, relative to the loop bodies only. | ||||||||
4359 | |||||||||
4360 | Non-vectorized variant is SIC * niters and it must win over vector | ||||||||
4361 | variant on the expected loop trip count. The following condition must hold true: | ||||||||
4362 | SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */ | ||||||||
4363 | |||||||||
4364 | if (vec_outside_cost <= 0) | ||||||||
4365 | min_profitable_estimate = 0; | ||||||||
4366 | /* ??? This "else if" arm is written to handle all cases; see below for | ||||||||
4367 | what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ | ||||||||
4368 | else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
4369 | { | ||||||||
4370 | /* This is a repeat of the code above, but with + SOC rather | ||||||||
4371 | than - SOC. */ | ||||||||
4372 | int outside_overhead = (vec_outside_cost | ||||||||
4373 | - scalar_single_iter_cost * peel_iters_prologue | ||||||||
4374 | - scalar_single_iter_cost * peel_iters_epilogue | ||||||||
4375 | + scalar_outside_cost); | ||||||||
4376 | int min_vec_niters = 1; | ||||||||
4377 | if (outside_overhead > 0) | ||||||||
4378 | min_vec_niters = outside_overhead / saving_per_viter + 1; | ||||||||
4379 | |||||||||
4380 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)(loop_vinfo)->using_partial_vectors_p) | ||||||||
4381 | { | ||||||||
4382 | int threshold = (vec_inside_cost * min_vec_niters | ||||||||
4383 | + vec_outside_cost | ||||||||
4384 | + scalar_outside_cost); | ||||||||
4385 | min_profitable_estimate = threshold / scalar_single_iter_cost + 1; | ||||||||
4386 | } | ||||||||
4387 | else | ||||||||
4388 | min_profitable_estimate = (min_vec_niters * assumed_vf | ||||||||
4389 | + peel_iters_prologue | ||||||||
4390 | + peel_iters_epilogue); | ||||||||
4391 | } | ||||||||
4392 | else | ||||||||
4393 | { | ||||||||
4394 | min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) | ||||||||
4395 | * assumed_vf | ||||||||
4396 | - vec_inside_cost * peel_iters_prologue | ||||||||
4397 | - vec_inside_cost * peel_iters_epilogue) | ||||||||
4398 | / ((scalar_single_iter_cost * assumed_vf) | ||||||||
4399 | - vec_inside_cost); | ||||||||
4400 | } | ||||||||
4401 | min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters)((min_profitable_estimate) > (min_profitable_iters) ? (min_profitable_estimate ) : (min_profitable_iters)); | ||||||||
4402 | if (dump_enabled_p ()) | ||||||||
4403 | dump_printf_loc (MSG_NOTE, vect_location, | ||||||||
4404 | " Static estimate profitability threshold = %d\n", | ||||||||
4405 | min_profitable_estimate); | ||||||||
4406 | |||||||||
4407 | *ret_min_profitable_estimate = min_profitable_estimate; | ||||||||
4408 | } | ||||||||
4409 | |||||||||
4410 | /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET | ||||||||
4411 | vector elements (not bits) for a vector with NELT elements. */ | ||||||||
4412 | static void | ||||||||
4413 | calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, | ||||||||
4414 | vec_perm_builder *sel) | ||||||||
4415 | { | ||||||||
4416 | /* The encoding is a single stepped pattern. Any wrap-around is handled | ||||||||
4417 | by vec_perm_indices. */ | ||||||||
4418 | sel->new_vector (nelt, 1, 3); | ||||||||
4419 | for (unsigned int i = 0; i < 3; i++) | ||||||||
4420 | sel->quick_push (i + offset); | ||||||||
4421 | } | ||||||||
4422 | |||||||||
4423 | /* Checks whether the target supports whole-vector shifts for vectors of mode | ||||||||
4424 | MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ | ||||||||
4425 | it supports vec_perm_const with masks for all necessary shift amounts. */ | ||||||||
4426 | static bool | ||||||||
4427 | have_whole_vector_shift (machine_mode mode) | ||||||||
4428 | { | ||||||||
4429 | if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) | ||||||||
4430 | return true; | ||||||||
4431 | |||||||||
4432 | /* Variable-length vectors should be handled via the optab. */ | ||||||||
4433 | unsigned int nelt; | ||||||||
4434 | if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) | ||||||||
4435 | return false; | ||||||||
4436 | |||||||||
4437 | vec_perm_builder sel; | ||||||||
4438 | vec_perm_indices indices; | ||||||||
4439 | for (unsigned int i = nelt / 2; i >= 1; i /= 2) | ||||||||
4440 | { | ||||||||
4441 | calc_vec_perm_mask_for_shift (i, nelt, &sel); | ||||||||
4442 | indices.new_vector (sel, 2, nelt); | ||||||||
4443 | if (!can_vec_perm_const_p (mode, indices, false)) | ||||||||
4444 | return false; | ||||||||
4445 | } | ||||||||
4446 | return true; | ||||||||
4447 | } | ||||||||
4448 | |||||||||
4449 | /* TODO: Close dependency between vect_model_*_cost and vectorizable_* | ||||||||
4450 | functions. Design better to avoid maintenance issues. */ | ||||||||
4451 | |||||||||
4452 | /* Function vect_model_reduction_cost. | ||||||||
4453 | |||||||||
4454 | Models cost for a reduction operation, including the vector ops | ||||||||
4455 | generated within the strip-mine loop in some cases, the initial | ||||||||
4456 | definition before the loop, and the epilogue code that must be generated. */ | ||||||||
4457 | |||||||||
4458 | static void | ||||||||
4459 | vect_model_reduction_cost (loop_vec_info loop_vinfo, | ||||||||
4460 | stmt_vec_info stmt_info, internal_fn reduc_fn, | ||||||||
4461 | vect_reduction_type reduction_type, | ||||||||
4462 | int ncopies, stmt_vector_for_cost *cost_vec) | ||||||||
4463 | { | ||||||||
4464 | int prologue_cost = 0, epilogue_cost = 0, inside_cost; | ||||||||
4465 | enum tree_code code; | ||||||||
4466 | optab optab; | ||||||||
4467 | tree vectype; | ||||||||
4468 | machine_mode mode; | ||||||||
4469 | class loop *loop = NULLnullptr; | ||||||||
4470 | |||||||||
4471 | if (loop_vinfo) | ||||||||
4472 | loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
4473 | |||||||||
4474 | /* Condition reductions generate two reductions in the loop. */ | ||||||||
4475 | if (reduction_type == COND_REDUCTION) | ||||||||
4476 | ncopies *= 2; | ||||||||
4477 | |||||||||
4478 | vectype = STMT_VINFO_VECTYPE (stmt_info)(stmt_info)->vectype; | ||||||||
4479 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4479, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); | ||||||||
4480 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); | ||||||||
4481 | |||||||||
4482 | code = gimple_assign_rhs_code (orig_stmt_info->stmt); | ||||||||
4483 | |||||||||
4484 | if (reduction_type == EXTRACT_LAST_REDUCTION) | ||||||||
4485 | /* No extra instructions are needed in the prologue. The loop body | ||||||||
4486 | operations are costed in vectorizable_condition. */ | ||||||||
4487 | inside_cost = 0; | ||||||||
4488 | else if (reduction_type == FOLD_LEFT_REDUCTION) | ||||||||
4489 | { | ||||||||
4490 | /* No extra instructions needed in the prologue. */ | ||||||||
4491 | prologue_cost = 0; | ||||||||
4492 | |||||||||
4493 | if (reduc_fn != IFN_LAST) | ||||||||
4494 | /* Count one reduction-like operation per vector. */ | ||||||||
4495 | inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, | ||||||||
4496 | stmt_info, 0, vect_body); | ||||||||
4497 | else | ||||||||
4498 | { | ||||||||
4499 | /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ | ||||||||
4500 | unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); | ||||||||
4501 | inside_cost = record_stmt_cost (cost_vec, nelements, | ||||||||
4502 | vec_to_scalar, stmt_info, 0, | ||||||||
4503 | vect_body); | ||||||||
4504 | inside_cost += record_stmt_cost (cost_vec, nelements, | ||||||||
4505 | scalar_stmt, stmt_info, 0, | ||||||||
4506 | vect_body); | ||||||||
4507 | } | ||||||||
4508 | } | ||||||||
4509 | else | ||||||||
4510 | { | ||||||||
4511 | /* Add in cost for initial definition. | ||||||||
4512 | For cond reduction we have four vectors: initial index, step, | ||||||||
4513 | initial result of the data reduction, initial value of the index | ||||||||
4514 | reduction. */ | ||||||||
4515 | int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1; | ||||||||
4516 | prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, | ||||||||
4517 | scalar_to_vec, stmt_info, 0, | ||||||||
4518 | vect_prologue); | ||||||||
4519 | } | ||||||||
4520 | |||||||||
4521 | /* Determine cost of epilogue code. | ||||||||
4522 | |||||||||
4523 | We have a reduction operator that will reduce the vector in one statement. | ||||||||
4524 | Also requires scalar extract. */ | ||||||||
4525 | |||||||||
4526 | if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info)) | ||||||||
4527 | { | ||||||||
4528 | if (reduc_fn != IFN_LAST) | ||||||||
4529 | { | ||||||||
4530 | if (reduction_type == COND_REDUCTION) | ||||||||
4531 | { | ||||||||
4532 | /* An EQ stmt and an COND_EXPR stmt. */ | ||||||||
4533 | epilogue_cost += record_stmt_cost (cost_vec, 2, | ||||||||
4534 | vector_stmt, stmt_info, 0, | ||||||||
4535 | vect_epilogue); | ||||||||
4536 | /* Reduction of the max index and a reduction of the found | ||||||||
4537 | values. */ | ||||||||
4538 | epilogue_cost += record_stmt_cost (cost_vec, 2, | ||||||||
4539 | vec_to_scalar, stmt_info, 0, | ||||||||
4540 | vect_epilogue); | ||||||||
4541 | /* A broadcast of the max value. */ | ||||||||
4542 | epilogue_cost += record_stmt_cost (cost_vec, 1, | ||||||||
4543 | scalar_to_vec, stmt_info, 0, | ||||||||
4544 | vect_epilogue); | ||||||||
4545 | } | ||||||||
4546 | else | ||||||||
4547 | { | ||||||||
4548 | epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, | ||||||||
4549 | stmt_info, 0, vect_epilogue); | ||||||||
4550 | epilogue_cost += record_stmt_cost (cost_vec, 1, | ||||||||
4551 | vec_to_scalar, stmt_info, 0, | ||||||||
4552 | vect_epilogue); | ||||||||
4553 | } | ||||||||
4554 | } | ||||||||
4555 | else if (reduction_type == COND_REDUCTION) | ||||||||
4556 | { | ||||||||
4557 | unsigned estimated_nunits = vect_nunits_for_cost (vectype); | ||||||||
4558 | /* Extraction of scalar elements. */ | ||||||||
4559 | epilogue_cost += record_stmt_cost (cost_vec, | ||||||||
4560 | 2 * estimated_nunits, | ||||||||
4561 | vec_to_scalar, stmt_info, 0, | ||||||||
4562 | vect_epilogue); | ||||||||
4563 | /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ | ||||||||
4564 | epilogue_cost += record_stmt_cost (cost_vec, | ||||||||
4565 | 2 * estimated_nunits - 3, | ||||||||
4566 | scalar_stmt, stmt_info, 0, | ||||||||
4567 | vect_epilogue); | ||||||||
4568 | } | ||||||||
4569 | else if (reduction_type == EXTRACT_LAST_REDUCTION | ||||||||
4570 | || reduction_type == FOLD_LEFT_REDUCTION) | ||||||||
4571 | /* No extra instructions need in the epilogue. */ | ||||||||
4572 | ; | ||||||||
4573 | else | ||||||||
4574 | { | ||||||||
4575 | int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)((tree_class_check ((vectype), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4575, __FUNCTION__))->type_common.size)); | ||||||||
4576 | tree bitsize = | ||||||||
4577 | TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt)))((tree_class_check ((((contains_struct_check ((gimple_assign_lhs (orig_stmt_info->stmt)), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4577, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4577, __FUNCTION__))->type_common.size); | ||||||||
4578 | int element_bitsize = tree_to_uhwi (bitsize); | ||||||||
4579 | int nelements = vec_size_in_bits / element_bitsize; | ||||||||
4580 | |||||||||
4581 | if (code == COND_EXPR) | ||||||||
4582 | code = MAX_EXPR; | ||||||||
4583 | |||||||||
4584 | optab = optab_for_tree_code (code, vectype, optab_default); | ||||||||
4585 | |||||||||
4586 | /* We have a whole vector shift available. */ | ||||||||
4587 | if (optab != unknown_optab | ||||||||
4588 | && VECTOR_MODE_P (mode)(((enum mode_class) mode_class[mode]) == MODE_VECTOR_BOOL || ( (enum mode_class) mode_class[mode]) == MODE_VECTOR_INT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FLOAT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_FRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UFRACT || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_ACCUM || ((enum mode_class) mode_class[mode]) == MODE_VECTOR_UACCUM) | ||||||||
4589 | && optab_handler (optab, mode) != CODE_FOR_nothing | ||||||||
4590 | && have_whole_vector_shift (mode)) | ||||||||
4591 | { | ||||||||
4592 | /* Final reduction via vector shifts and the reduction operator. | ||||||||
4593 | Also requires scalar extract. */ | ||||||||
4594 | epilogue_cost += record_stmt_cost (cost_vec, | ||||||||
4595 | exact_log2 (nelements) * 2, | ||||||||
4596 | vector_stmt, stmt_info, 0, | ||||||||
4597 | vect_epilogue); | ||||||||
4598 | epilogue_cost += record_stmt_cost (cost_vec, 1, | ||||||||
4599 | vec_to_scalar, stmt_info, 0, | ||||||||
4600 | vect_epilogue); | ||||||||
4601 | } | ||||||||
4602 | else | ||||||||
4603 | /* Use extracts and reduction op for final reduction. For N | ||||||||
4604 | elements, we have N extracts and N-1 reduction ops. */ | ||||||||
4605 | epilogue_cost += record_stmt_cost (cost_vec, | ||||||||
4606 | nelements + nelements - 1, | ||||||||
4607 | vector_stmt, stmt_info, 0, | ||||||||
4608 | vect_epilogue); | ||||||||
4609 | } | ||||||||
4610 | } | ||||||||
4611 | |||||||||
4612 | if (dump_enabled_p ()) | ||||||||
4613 | dump_printf (MSG_NOTE, | ||||||||
4614 | "vect_model_reduction_cost: inside_cost = %d, " | ||||||||
4615 | "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost, | ||||||||
4616 | prologue_cost, epilogue_cost); | ||||||||
4617 | } | ||||||||
4618 | |||||||||
4619 | |||||||||
4620 | |||||||||
4621 | /* Function get_initial_def_for_reduction | ||||||||
4622 | |||||||||
4623 | Input: | ||||||||
4624 | STMT_VINFO - a stmt that performs a reduction operation in the loop. | ||||||||
4625 | INIT_VAL - the initial value of the reduction variable | ||||||||
4626 | |||||||||
4627 | Output: | ||||||||
4628 | ADJUSTMENT_DEF - a tree that holds a value to be added to the final result | ||||||||
4629 | of the reduction (used for adjusting the epilog - see below). | ||||||||
4630 | Return a vector variable, initialized according to the operation that | ||||||||
4631 | STMT_VINFO performs. This vector will be used as the initial value | ||||||||
4632 | of the vector of partial results. | ||||||||
4633 | |||||||||
4634 | Option1 (adjust in epilog): Initialize the vector as follows: | ||||||||
4635 | add/bit or/xor: [0,0,...,0,0] | ||||||||
4636 | mult/bit and: [1,1,...,1,1] | ||||||||
4637 | min/max/cond_expr: [init_val,init_val,..,init_val,init_val] | ||||||||
4638 | and when necessary (e.g. add/mult case) let the caller know | ||||||||
4639 | that it needs to adjust the result by init_val. | ||||||||
4640 | |||||||||
4641 | Option2: Initialize the vector as follows: | ||||||||
4642 | add/bit or/xor: [init_val,0,0,...,0] | ||||||||
4643 | mult/bit and: [init_val,1,1,...,1] | ||||||||
4644 | min/max/cond_expr: [init_val,init_val,...,init_val] | ||||||||
4645 | and no adjustments are needed. | ||||||||
4646 | |||||||||
4647 | For example, for the following code: | ||||||||
4648 | |||||||||
4649 | s = init_val; | ||||||||
4650 | for (i=0;i<n;i++) | ||||||||
4651 | s = s + a[i]; | ||||||||
4652 | |||||||||
4653 | STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'. | ||||||||
4654 | For a vector of 4 units, we want to return either [0,0,0,init_val], | ||||||||
4655 | or [0,0,0,0] and let the caller know that it needs to adjust | ||||||||
4656 | the result at the end by 'init_val'. | ||||||||
4657 | |||||||||
4658 | FORNOW, we are using the 'adjust in epilog' scheme, because this way the | ||||||||
4659 | initialization vector is simpler (same element in all entries), if | ||||||||
4660 | ADJUSTMENT_DEF is not NULL, and Option2 otherwise. | ||||||||
4661 | |||||||||
4662 | A cost model should help decide between these two schemes. */ | ||||||||
4663 | |||||||||
4664 | static tree | ||||||||
4665 | get_initial_def_for_reduction (loop_vec_info loop_vinfo, | ||||||||
4666 | stmt_vec_info stmt_vinfo, | ||||||||
4667 | enum tree_code code, tree init_val, | ||||||||
4668 | tree *adjustment_def) | ||||||||
4669 | { | ||||||||
4670 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop; | ||||||||
4671 | tree scalar_type = TREE_TYPE (init_val)((contains_struct_check ((init_val), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4671, __FUNCTION__))->typed.type); | ||||||||
4672 | tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); | ||||||||
4673 | tree def_for_init; | ||||||||
4674 | tree init_def; | ||||||||
4675 | REAL_VALUE_TYPEstruct real_value real_init_val = dconst0; | ||||||||
4676 | int int_init_val = 0; | ||||||||
4677 | gimple_seq stmts = NULLnullptr; | ||||||||
4678 | |||||||||
4679 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4679, __FUNCTION__), 0 : 0)); | ||||||||
4680 | |||||||||
4681 | gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4682, __FUNCTION__), 0 : 0)) | ||||||||
4682 | || SCALAR_FLOAT_TYPE_P (scalar_type))((void)(!((((enum tree_code) (scalar_type)->base.code) == POINTER_TYPE || ((enum tree_code) (scalar_type)->base.code) == REFERENCE_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == ENUMERAL_TYPE || ((enum tree_code) (scalar_type)->base.code) == BOOLEAN_TYPE || ((enum tree_code) (scalar_type)->base.code) == INTEGER_TYPE ) || (((enum tree_code) (scalar_type)->base.code) == REAL_TYPE )) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4682, __FUNCTION__), 0 : 0)); | ||||||||
4683 | |||||||||
4684 | gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4685, __FUNCTION__), 0 : 0)) | ||||||||
4685 | || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father)((void)(!(nested_in_vect_loop_p (loop, stmt_vinfo) || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4685, __FUNCTION__), 0 : 0)); | ||||||||
4686 | |||||||||
4687 | /* ADJUSTMENT_DEF is NULL when called from | ||||||||
4688 | vect_create_epilog_for_reduction to vectorize double reduction. */ | ||||||||
4689 | if (adjustment_def) | ||||||||
4690 | *adjustment_def = NULLnullptr; | ||||||||
4691 | |||||||||
4692 | switch (code) | ||||||||
4693 | { | ||||||||
4694 | case WIDEN_SUM_EXPR: | ||||||||
4695 | case DOT_PROD_EXPR: | ||||||||
4696 | case SAD_EXPR: | ||||||||
4697 | case PLUS_EXPR: | ||||||||
4698 | case MINUS_EXPR: | ||||||||
4699 | case BIT_IOR_EXPR: | ||||||||
4700 | case BIT_XOR_EXPR: | ||||||||
4701 | case MULT_EXPR: | ||||||||
4702 | case BIT_AND_EXPR: | ||||||||
4703 | { | ||||||||
4704 | if (code == MULT_EXPR) | ||||||||
4705 | { | ||||||||
4706 | real_init_val = dconst1; | ||||||||
4707 | int_init_val = 1; | ||||||||
4708 | } | ||||||||
4709 | |||||||||
4710 | if (code == BIT_AND_EXPR) | ||||||||
4711 | int_init_val = -1; | ||||||||
4712 | |||||||||
4713 | if (SCALAR_FLOAT_TYPE_P (scalar_type)(((enum tree_code) (scalar_type)->base.code) == REAL_TYPE)) | ||||||||
4714 | def_for_init = build_real (scalar_type, real_init_val); | ||||||||
4715 | else | ||||||||
4716 | def_for_init = build_int_cst (scalar_type, int_init_val); | ||||||||
4717 | |||||||||
4718 | if (adjustment_def || operand_equal_p (def_for_init, init_val, 0)) | ||||||||
4719 | { | ||||||||
4720 | /* Option1: the first element is '0' or '1' as well. */ | ||||||||
4721 | if (!operand_equal_p (def_for_init, init_val, 0)) | ||||||||
4722 | *adjustment_def = init_val; | ||||||||
4723 | init_def = gimple_build_vector_from_val (&stmts, vectype, | ||||||||
4724 | def_for_init); | ||||||||
4725 | } | ||||||||
4726 | else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) | ||||||||
4727 | { | ||||||||
4728 | /* Option2 (variable length): the first element is INIT_VAL. */ | ||||||||
4729 | init_def = gimple_build_vector_from_val (&stmts, vectype, | ||||||||
4730 | def_for_init); | ||||||||
4731 | init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, | ||||||||
4732 | vectype, init_def, init_val); | ||||||||
4733 | } | ||||||||
4734 | else | ||||||||
4735 | { | ||||||||
4736 | /* Option2: the first element is INIT_VAL. */ | ||||||||
4737 | tree_vector_builder elts (vectype, 1, 2); | ||||||||
4738 | elts.quick_push (init_val); | ||||||||
4739 | elts.quick_push (def_for_init); | ||||||||
4740 | init_def = gimple_build_vector (&stmts, &elts); | ||||||||
4741 | } | ||||||||
4742 | } | ||||||||
4743 | break; | ||||||||
4744 | |||||||||
4745 | case MIN_EXPR: | ||||||||
4746 | case MAX_EXPR: | ||||||||
4747 | case COND_EXPR: | ||||||||
4748 | { | ||||||||
4749 | init_val = gimple_convert (&stmts, TREE_TYPE (vectype)((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4749, __FUNCTION__))->typed.type), init_val); | ||||||||
4750 | init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); | ||||||||
4751 | } | ||||||||
4752 | break; | ||||||||
4753 | |||||||||
4754 | default: | ||||||||
4755 | gcc_unreachable ()(fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4755, __FUNCTION__)); | ||||||||
4756 | } | ||||||||
4757 | |||||||||
4758 | if (stmts) | ||||||||
4759 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); | ||||||||
4760 | return init_def; | ||||||||
4761 | } | ||||||||
4762 | |||||||||
4763 | /* Get at the initial defs for the reduction PHIs in SLP_NODE. | ||||||||
4764 | NUMBER_OF_VECTORS is the number of vector defs to create. | ||||||||
4765 | If NEUTRAL_OP is nonnull, introducing extra elements of that | ||||||||
4766 | value will not change the result. */ | ||||||||
4767 | |||||||||
4768 | static void | ||||||||
4769 | get_initial_defs_for_reduction (vec_info *vinfo, | ||||||||
4770 | slp_tree slp_node, | ||||||||
4771 | vec<tree> *vec_oprnds, | ||||||||
4772 | unsigned int number_of_vectors, | ||||||||
4773 | bool reduc_chain, tree neutral_op) | ||||||||
4774 | { | ||||||||
4775 | vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node)(slp_node)->stmts; | ||||||||
4776 | stmt_vec_info stmt_vinfo = stmts[0]; | ||||||||
4777 | unsigned HOST_WIDE_INTlong nunits; | ||||||||
4778 | unsigned j, number_of_places_left_in_vector; | ||||||||
4779 | tree vector_type; | ||||||||
4780 | unsigned int group_size = stmts.length (); | ||||||||
4781 | unsigned int i; | ||||||||
4782 | class loop *loop; | ||||||||
4783 | |||||||||
4784 | vector_type = STMT_VINFO_VECTYPE (stmt_vinfo)(stmt_vinfo)->vectype; | ||||||||
4785 | |||||||||
4786 | gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)((void)(!((stmt_vinfo)->def_type == vect_reduction_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4786, __FUNCTION__), 0 : 0)); | ||||||||
4787 | |||||||||
4788 | loop = (gimple_bb (stmt_vinfo->stmt))->loop_father; | ||||||||
4789 | gcc_assert (loop)((void)(!(loop) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4789, __FUNCTION__), 0 : 0)); | ||||||||
4790 | edge pe = loop_preheader_edge (loop); | ||||||||
4791 | |||||||||
4792 | gcc_assert (!reduc_chain || neutral_op)((void)(!(!reduc_chain || neutral_op) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4792, __FUNCTION__), 0 : 0)); | ||||||||
4793 | |||||||||
4794 | /* NUMBER_OF_COPIES is the number of times we need to use the same values in | ||||||||
4795 | created vectors. It is greater than 1 if unrolling is performed. | ||||||||
4796 | |||||||||
4797 | For example, we have two scalar operands, s1 and s2 (e.g., group of | ||||||||
4798 | strided accesses of size two), while NUNITS is four (i.e., four scalars | ||||||||
4799 | of this type can be packed in a vector). The output vector will contain | ||||||||
4800 | two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES | ||||||||
4801 | will be 2). | ||||||||
4802 | |||||||||
4803 | If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several | ||||||||
4804 | vectors containing the operands. | ||||||||
4805 | |||||||||
4806 | For example, NUNITS is four as before, and the group size is 8 | ||||||||
4807 | (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | ||||||||
4808 | {s5, s6, s7, s8}. */ | ||||||||
4809 | |||||||||
4810 | if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) | ||||||||
4811 | nunits = group_size; | ||||||||
4812 | |||||||||
4813 | number_of_places_left_in_vector = nunits; | ||||||||
4814 | bool constant_p = true; | ||||||||
4815 | tree_vector_builder elts (vector_type, nunits, 1); | ||||||||
4816 | elts.quick_grow (nunits); | ||||||||
4817 | gimple_seq ctor_seq = NULLnullptr; | ||||||||
4818 | for (j = 0; j < nunits * number_of_vectors; ++j) | ||||||||
4819 | { | ||||||||
4820 | tree op; | ||||||||
4821 | i = j % group_size; | ||||||||
4822 | stmt_vinfo = stmts[i]; | ||||||||
4823 | |||||||||
4824 | /* Get the def before the loop. In reduction chain we have only | ||||||||
4825 | one initial value. Else we have as many as PHIs in the group. */ | ||||||||
4826 | if (reduc_chain) | ||||||||
4827 | op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx )); | ||||||||
4828 | else if (((vec_oprnds->length () + 1) * nunits | ||||||||
4829 | - number_of_places_left_in_vector >= group_size) | ||||||||
4830 | && neutral_op) | ||||||||
4831 | op = neutral_op; | ||||||||
4832 | else | ||||||||
4833 | op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe)gimple_phi_arg_def (((stmt_vinfo->stmt)), ((pe)->dest_idx )); | ||||||||
4834 | |||||||||
4835 | /* Create 'vect_ = {op0,op1,...,opn}'. */ | ||||||||
4836 | number_of_places_left_in_vector--; | ||||||||
4837 | elts[nunits - number_of_places_left_in_vector - 1] = op; | ||||||||
4838 | if (!CONSTANT_CLASS_P (op)(tree_code_type[(int) (((enum tree_code) (op)->base.code)) ] == tcc_constant)) | ||||||||
4839 | constant_p = false; | ||||||||
4840 | |||||||||
4841 | if (number_of_places_left_in_vector == 0) | ||||||||
4842 | { | ||||||||
4843 | tree init; | ||||||||
4844 | if (constant_p && !neutral_op | ||||||||
4845 | ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) | ||||||||
4846 | : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)(!maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type), nunits))) | ||||||||
4847 | /* Build the vector directly from ELTS. */ | ||||||||
4848 | init = gimple_build_vector (&ctor_seq, &elts); | ||||||||
4849 | else if (neutral_op) | ||||||||
4850 | { | ||||||||
4851 | /* Build a vector of the neutral value and shift the | ||||||||
4852 | other elements into place. */ | ||||||||
4853 | init = gimple_build_vector_from_val (&ctor_seq, vector_type, | ||||||||
4854 | neutral_op); | ||||||||
4855 | int k = nunits; | ||||||||
4856 | while (k > 0 && elts[k - 1] == neutral_op) | ||||||||
4857 | k -= 1; | ||||||||
4858 | while (k > 0) | ||||||||
4859 | { | ||||||||
4860 | k -= 1; | ||||||||
4861 | init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, | ||||||||
4862 | vector_type, init, elts[k]); | ||||||||
4863 | } | ||||||||
4864 | } | ||||||||
4865 | else | ||||||||
4866 | { | ||||||||
4867 | /* First time round, duplicate ELTS to fill the | ||||||||
4868 | required number of vectors. */ | ||||||||
4869 | duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts, | ||||||||
4870 | number_of_vectors, *vec_oprnds); | ||||||||
4871 | break; | ||||||||
4872 | } | ||||||||
4873 | vec_oprnds->quick_push (init); | ||||||||
4874 | |||||||||
4875 | number_of_places_left_in_vector = nunits; | ||||||||
4876 | elts.new_vector (vector_type, nunits, 1); | ||||||||
4877 | elts.quick_grow (nunits); | ||||||||
4878 | constant_p = true; | ||||||||
4879 | } | ||||||||
4880 | } | ||||||||
4881 | if (ctor_seq != NULLnullptr) | ||||||||
4882 | gsi_insert_seq_on_edge_immediate (pe, ctor_seq); | ||||||||
4883 | } | ||||||||
4884 | |||||||||
4885 | /* For a statement STMT_INFO taking part in a reduction operation return | ||||||||
4886 | the stmt_vec_info the meta information is stored on. */ | ||||||||
4887 | |||||||||
4888 | stmt_vec_info | ||||||||
4889 | info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) | ||||||||
4890 | { | ||||||||
4891 | stmt_info = vect_orig_stmt (stmt_info); | ||||||||
4892 | gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info))((void)(!((stmt_info)->reduc_def) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4892, __FUNCTION__), 0 : 0)); | ||||||||
4893 | if (!is_a <gphi *> (stmt_info->stmt) | ||||||||
4894 | || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))((((stmt_info)->def_type) == vect_reduction_def) || (((stmt_info )->def_type) == vect_double_reduction_def) || (((stmt_info )->def_type) == vect_nested_cycle))) | ||||||||
4895 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; | ||||||||
4896 | gphi *phi = as_a <gphi *> (stmt_info->stmt); | ||||||||
4897 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | ||||||||
4898 | { | ||||||||
4899 | if (gimple_phi_num_args (phi) == 1) | ||||||||
4900 | stmt_info = STMT_VINFO_REDUC_DEF (stmt_info)(stmt_info)->reduc_def; | ||||||||
4901 | } | ||||||||
4902 | else if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_nested_cycle) | ||||||||
4903 | { | ||||||||
4904 | edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father); | ||||||||
4905 | stmt_vec_info info | ||||||||
4906 | = vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe)gimple_phi_arg_def (((phi)), ((pe)->dest_idx))); | ||||||||
4907 | if (info && STMT_VINFO_DEF_TYPE (info)(info)->def_type == vect_double_reduction_def) | ||||||||
4908 | stmt_info = info; | ||||||||
4909 | } | ||||||||
4910 | return stmt_info; | ||||||||
4911 | } | ||||||||
4912 | |||||||||
4913 | /* Function vect_create_epilog_for_reduction | ||||||||
4914 | |||||||||
4915 | Create code at the loop-epilog to finalize the result of a reduction | ||||||||
4916 | computation. | ||||||||
4917 | |||||||||
4918 | STMT_INFO is the scalar reduction stmt that is being vectorized. | ||||||||
4919 | SLP_NODE is an SLP node containing a group of reduction statements. The | ||||||||
4920 | first one in this group is STMT_INFO. | ||||||||
4921 | SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE | ||||||||
4922 | REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi | ||||||||
4923 | (counting from 0) | ||||||||
4924 | |||||||||
4925 | This function: | ||||||||
4926 | 1. Completes the reduction def-use cycles. | ||||||||
4927 | 2. "Reduces" each vector of partial results VECT_DEFS into a single result, | ||||||||
4928 | by calling the function specified by REDUC_FN if available, or by | ||||||||
4929 | other means (whole-vector shifts or a scalar loop). | ||||||||
4930 | The function also creates a new phi node at the loop exit to preserve | ||||||||
4931 | loop-closed form, as illustrated below. | ||||||||
4932 | |||||||||
4933 | The flow at the entry to this function: | ||||||||
4934 | |||||||||
4935 | loop: | ||||||||
4936 | vec_def = phi <vec_init, null> # REDUCTION_PHI | ||||||||
4937 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO | ||||||||
4938 | s_loop = scalar_stmt # (scalar) STMT_INFO | ||||||||
4939 | loop_exit: | ||||||||
4940 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI | ||||||||
4941 | use <s_out0> | ||||||||
4942 | use <s_out0> | ||||||||
4943 | |||||||||
4944 | The above is transformed by this function into: | ||||||||
4945 | |||||||||
4946 | loop: | ||||||||
4947 | vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI | ||||||||
4948 | VECT_DEF = vector_stmt # vectorized form of STMT_INFO | ||||||||
4949 | s_loop = scalar_stmt # (scalar) STMT_INFO | ||||||||
4950 | loop_exit: | ||||||||
4951 | s_out0 = phi <s_loop> # (scalar) EXIT_PHI | ||||||||
4952 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI | ||||||||
4953 | v_out2 = reduce <v_out1> | ||||||||
4954 | s_out3 = extract_field <v_out2, 0> | ||||||||
4955 | s_out4 = adjust_result <s_out3> | ||||||||
4956 | use <s_out4> | ||||||||
4957 | use <s_out4> | ||||||||
4958 | */ | ||||||||
4959 | |||||||||
4960 | static void | ||||||||
4961 | vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, | ||||||||
4962 | stmt_vec_info stmt_info, | ||||||||
4963 | slp_tree slp_node, | ||||||||
4964 | slp_instance slp_node_instance) | ||||||||
4965 | { | ||||||||
4966 | stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); | ||||||||
4967 | gcc_assert (reduc_info->is_reduc_info)((void)(!(reduc_info->is_reduc_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4967, __FUNCTION__), 0 : 0)); | ||||||||
| |||||||||
4968 | /* For double reductions we need to get at the inner loop reduction | ||||||||
4969 | stmt which has the meta info attached. Our stmt_info is that of the | ||||||||
4970 | loop-closed PHI of the inner loop which we remember as | ||||||||
4971 | def for the reduction PHI generation. */ | ||||||||
4972 | bool double_reduc = false; | ||||||||
4973 | stmt_vec_info rdef_info = stmt_info; | ||||||||
4974 | if (STMT_VINFO_DEF_TYPE (stmt_info)(stmt_info)->def_type == vect_double_reduction_def) | ||||||||
4975 | { | ||||||||
4976 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 4976, __FUNCTION__), 0 : 0)); | ||||||||
4977 | double_reduc = true; | ||||||||
4978 | stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def | ||||||||
4979 | (stmt_info->stmt, 0)); | ||||||||
4980 | stmt_info = vect_stmt_to_vectorize (stmt_info); | ||||||||
4981 | } | ||||||||
4982 | gphi *reduc_def_stmt | ||||||||
4983 | = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))(vect_orig_stmt (stmt_info))->reduc_def->stmt); | ||||||||
4984 | enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info)(reduc_info)->reduc_code; | ||||||||
4985 | internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info)(reduc_info)->reduc_fn; | ||||||||
4986 | tree vectype; | ||||||||
4987 | machine_mode mode; | ||||||||
4988 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo)(loop_vinfo)->loop, *outer_loop = NULLnullptr; | ||||||||
4989 | basic_block exit_bb; | ||||||||
4990 | tree scalar_dest; | ||||||||
4991 | tree scalar_type; | ||||||||
4992 | gimple *new_phi = NULLnullptr, *phi; | ||||||||
4993 | gimple_stmt_iterator exit_gsi; | ||||||||
4994 | tree new_temp = NULL_TREE(tree) nullptr, new_name, new_scalar_dest; | ||||||||
4995 | gimple *epilog_stmt = NULLnullptr; | ||||||||
4996 | gimple *exit_phi; | ||||||||
4997 | tree bitsize; | ||||||||
4998 | tree def; | ||||||||
4999 | tree orig_name, scalar_result; | ||||||||
5000 | imm_use_iterator imm_iter, phi_imm_iter; | ||||||||
5001 | use_operand_p use_p, phi_use_p; | ||||||||
5002 | gimple *use_stmt; | ||||||||
5003 | bool nested_in_vect_loop = false; | ||||||||
5004 | auto_vec<gimple *> new_phis; | ||||||||
5005 | int j, i; | ||||||||
5006 | auto_vec<tree> scalar_results; | ||||||||
5007 | unsigned int group_size = 1, k; | ||||||||
5008 | auto_vec<gimple *> phis; | ||||||||
5009 | bool slp_reduc = false; | ||||||||
5010 | bool direct_slp_reduc; | ||||||||
5011 | tree new_phi_result; | ||||||||
5012 | tree induction_index = NULL_TREE(tree) nullptr; | ||||||||
5013 | |||||||||
5014 | if (slp_node) | ||||||||
5015 | group_size = SLP_TREE_LANES (slp_node)(slp_node)->lanes; | ||||||||
5016 | |||||||||
5017 | if (nested_in_vect_loop_p (loop, stmt_info)) | ||||||||
5018 | { | ||||||||
5019 | outer_loop = loop; | ||||||||
5020 | loop = loop->inner; | ||||||||
5021 | nested_in_vect_loop = true; | ||||||||
5022 | gcc_assert (!slp_node)((void)(!(!slp_node) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5022, __FUNCTION__), 0 : 0)); | ||||||||
5023 | } | ||||||||
5024 | gcc_assert (!nested_in_vect_loop || double_reduc)((void)(!(!nested_in_vect_loop || double_reduc) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5024, __FUNCTION__), 0 : 0)); | ||||||||
5025 | |||||||||
5026 | vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info)(reduc_info)->reduc_vectype; | ||||||||
5027 | gcc_assert (vectype)((void)(!(vectype) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5027, __FUNCTION__), 0 : 0)); | ||||||||
5028 | mode = TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5028, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode); | ||||||||
5029 | |||||||||
5030 | tree initial_def = NULLnullptr; | ||||||||
5031 | tree induc_val = NULL_TREE(tree) nullptr; | ||||||||
5032 | tree adjustment_def = NULLnullptr; | ||||||||
5033 | if (slp_node
| ||||||||
5034 | ; | ||||||||
5035 | else | ||||||||
5036 | { | ||||||||
5037 | /* Get at the scalar def before the loop, that defines the initial value | ||||||||
5038 | of the reduction variable. */ | ||||||||
5039 | initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge (loop))->dest_idx)) | ||||||||
5040 | loop_preheader_edge (loop))gimple_phi_arg_def (((reduc_def_stmt)), ((loop_preheader_edge (loop))->dest_idx)); | ||||||||
5041 | /* Optimize: for induction condition reduction, if we can't use zero | ||||||||
5042 | for induc_val, use initial_def. */ | ||||||||
5043 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == INTEGER_INDUC_COND_REDUCTION) | ||||||||
5044 | induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)(reduc_info)->induc_cond_initial_val; | ||||||||
5045 | else if (double_reduc
| ||||||||
5046 | ; | ||||||||
5047 | else if (nested_in_vect_loop
| ||||||||
5048 | ; | ||||||||
5049 | else | ||||||||
5050 | adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)(reduc_info)->reduc_epilogue_adjustment; | ||||||||
5051 | } | ||||||||
5052 | |||||||||
5053 | unsigned vec_num; | ||||||||
5054 | int ncopies; | ||||||||
5055 | if (slp_node
| ||||||||
5056 | { | ||||||||
5057 | vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)(slp_node_instance->reduc_phis)->vec_stmts.length (); | ||||||||
5058 | ncopies = 1; | ||||||||
5059 | } | ||||||||
5060 | else | ||||||||
5061 | { | ||||||||
5062 | stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt); | ||||||||
5063 | vec_num = 1; | ||||||||
5064 | ncopies = STMT_VINFO_VEC_STMTS (reduc_info)(reduc_info)->vec_stmts.length (); | ||||||||
5065 | } | ||||||||
5066 | |||||||||
5067 | /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) | ||||||||
5068 | which is updated with the current index of the loop for every match of | ||||||||
5069 | the original loop's cond_expr (VEC_STMT). This results in a vector | ||||||||
5070 | containing the last time the condition passed for that vector lane. | ||||||||
5071 | The first match will be a 1 to allow 0 to be used for non-matching | ||||||||
5072 | indexes. If there are no matches at all then the vector will be all | ||||||||
5073 | zeroes. | ||||||||
5074 | |||||||||
5075 | PR92772: This algorithm is broken for architectures that support | ||||||||
5076 | masked vectors, but do not provide fold_extract_last. */ | ||||||||
5077 | if (STMT_VINFO_REDUC_TYPE (reduc_info)(reduc_info)->reduc_type == COND_REDUCTION) | ||||||||
5078 | { | ||||||||
5079 | auto_vec<std::pair<tree, bool>, 2> ccompares; | ||||||||
5080 | stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info)(reduc_info)->reduc_def; | ||||||||
5081 | cond_info = vect_stmt_to_vectorize (cond_info); | ||||||||
5082 | while (cond_info != reduc_info) | ||||||||
5083 | { | ||||||||
5084 | if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) | ||||||||
5085 | { | ||||||||
5086 | gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)(cond_info)->vec_stmts[0]; | ||||||||
5087 | gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR)((void)(!(gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5087, __FUNCTION__), 0 : 0)); | ||||||||
5088 | ccompares.safe_push | ||||||||
5089 | (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), | ||||||||
5090 | STMT_VINFO_REDUC_IDX (cond_info)(cond_info)->reduc_idx == 2)); | ||||||||
5091 | } | ||||||||
5092 | cond_info | ||||||||
5093 | = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, | ||||||||
5094 | 1 + STMT_VINFO_REDUC_IDX(cond_info)->reduc_idx | ||||||||
5095 | (cond_info)(cond_info)->reduc_idx)); | ||||||||
5096 | cond_info = vect_stmt_to_vectorize (cond_info); | ||||||||
5097 | } | ||||||||
5098 | gcc_assert (ccompares.length () != 0)((void)(!(ccompares.length () != 0) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5098, __FUNCTION__), 0 : 0)); | ||||||||
5099 | |||||||||
5100 | tree indx_before_incr, indx_after_incr; | ||||||||
5101 | poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); | ||||||||
5102 | int scalar_precision | ||||||||
5103 | = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))(as_a <scalar_mode> ((tree_class_check ((((contains_struct_check ((vectype), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5103, __FUNCTION__))->typed.type)), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5103, __FUNCTION__))->type_common.mode))); | ||||||||
5104 | tree cr_index_scalar_type = make_unsigned_type (scalar_precision); | ||||||||
5105 | tree cr_index_vector_type = get_related_vectype_for_scalar_type | ||||||||
5106 | (TYPE_MODE (vectype)((((enum tree_code) ((tree_class_check ((vectype), (tcc_type) , "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5106, __FUNCTION__)))->base.code) == VECTOR_TYPE) ? vector_type_mode (vectype) : (vectype)->type_common.mode), cr_index_scalar_type, | ||||||||
5107 | TYPE_VECTOR_SUBPARTS (vectype)); | ||||||||
5108 | |||||||||
5109 | /* First we create a simple vector induction variable which starts | ||||||||
5110 | with the values {1,2,3,...} (SERIES_VECT) and increments by the | ||||||||
5111 | vector size (STEP). */ | ||||||||
5112 | |||||||||
5113 | /* Create a {1,2,3,...} vector. */ | ||||||||
5114 | tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); | ||||||||
5115 | |||||||||
5116 | /* Create a vector of the step value. */ | ||||||||
5117 | tree step = build_int_cst (cr_index_scalar_type, nunits_out); | ||||||||
5118 | tree vec_step = build_vector_from_val (cr_index_vector_type, step); | ||||||||
5119 | |||||||||
5120 | /* Create an induction variable. */ | ||||||||
5121 | gimple_stmt_iterator incr_gsi; | ||||||||
5122 | bool insert_after; | ||||||||
5123 | standard_iv_increment_position (loop, &incr_gsi, &insert_after); | ||||||||
5124 | create_iv (series_vect, vec_step, NULL_TREE(tree) nullptr, loop, &incr_gsi, | ||||||||
5125 | insert_after, &indx_before_incr, &indx_after_incr); | ||||||||
5126 | |||||||||
5127 | /* Next create a new phi node vector (NEW_PHI_TREE) which starts | ||||||||
5128 | filled with zeros (VEC_ZERO). */ | ||||||||
5129 | |||||||||
5130 | /* Create a vector of 0s. */ | ||||||||
5131 | tree zero = build_zero_cst (cr_index_scalar_type); | ||||||||
5132 | tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); | ||||||||
5133 | |||||||||
5134 | /* Create a vector phi node. */ | ||||||||
5135 | tree new_phi_tree = make_ssa_name (cr_index_vector_type); | ||||||||
5136 | new_phi = create_phi_node (new_phi_tree, loop->header); | ||||||||
5137 | add_phi_arg (as_a <gphi *> (new_phi), vec_zero, | ||||||||
5138 | loop_preheader_edge (loop), UNKNOWN_LOCATION((location_t) 0)); | ||||||||
5139 | |||||||||
5140 | /* Now take the condition from the loops original cond_exprs | ||||||||
5141 | and produce a new cond_exprs (INDEX_COND_EXPR) which for | ||||||||
5142 | every match uses values from the induction variable | ||||||||
5143 | (INDEX_BEFORE_INCR) otherwise uses values from the phi node | ||||||||
5144 | (NEW_PHI_TREE). | ||||||||
5145 | Finally, we update the phi (NEW_PHI_TREE) to take the value of | ||||||||
5146 | the new cond_expr (INDEX_COND_EXPR). */ | ||||||||
5147 | gimple_seq stmts = NULLnullptr; | ||||||||
5148 | for (int i = ccompares.length () - 1; i != -1; --i) | ||||||||
5149 | { | ||||||||
5150 | tree ccompare = ccompares[i].first; | ||||||||
5151 | if (ccompares[i].second) | ||||||||
5152 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, | ||||||||
5153 | cr_index_vector_type, | ||||||||
5154 | ccompare, | ||||||||
5155 | indx_before_incr, new_phi_tree); | ||||||||
5156 | else | ||||||||
5157 | new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, | ||||||||
5158 | cr_index_vector_type, | ||||||||
5159 | ccompare, | ||||||||
5160 | new_phi_tree, indx_before_incr); | ||||||||
5161 | } | ||||||||
5162 | gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); | ||||||||
5163 | |||||||||
5164 | /* Update the phi with the vec cond. */ | ||||||||
5165 | induction_index = new_phi_tree; | ||||||||
5166 | add_phi_arg (as_a <gphi *> (new_phi), induction_index, | ||||||||
5167 | loop_latch_edge (loop), UNKNOWN_LOCATION((location_t) 0)); | ||||||||
5168 | } | ||||||||
5169 | |||||||||
5170 | /* 2. Create epilog code. | ||||||||
5171 | The reduction epilog code operates across the elements of the vector | ||||||||
5172 | of partial results computed by the vectorized loop. | ||||||||
5173 | The reduction epilog code consists of: | ||||||||
5174 | |||||||||
5175 | step 1: compute the scalar result in a vector (v_out2) | ||||||||
5176 | step 2: extract the scalar result (s_out3) from the vector (v_out2) | ||||||||
5177 | step 3: adjust the scalar result (s_out3) if needed. | ||||||||
5178 | |||||||||
5179 | Step 1 can be accomplished using one the following three schemes: | ||||||||
5180 | (scheme 1) using reduc_fn, if available. | ||||||||
5181 | (scheme 2) using whole-vector shifts, if available. | ||||||||
5182 | (scheme 3) using a scalar loop. In this case steps 1+2 above are | ||||||||
5183 | combined. | ||||||||
5184 | |||||||||
5185 | The overall epilog code looks like this: | ||||||||
5186 | |||||||||
5187 | s_out0 = phi <s_loop> # original EXIT_PHI | ||||||||
5188 | v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI | ||||||||
5189 | v_out2 = reduce <v_out1> # step 1 | ||||||||
5190 | s_out3 = extract_field <v_out2, 0> # step 2 | ||||||||
5191 | s_out4 = adjust_result <s_out3> # step 3 | ||||||||
5192 | |||||||||
5193 | (step 3 is optional, and steps 1 and 2 may be combined). | ||||||||
5194 | Lastly, the uses of s_out0 are replaced by s_out4. */ | ||||||||
5195 | |||||||||
5196 | |||||||||
5197 | /* 2.1 Create new loop-exit-phis to preserve loop-closed form: | ||||||||
5198 | v_out1 = phi <VECT_DEF> | ||||||||
5199 | Store them in NEW_PHIS. */ | ||||||||
5200 | if (double_reduc
| ||||||||
5201 | loop = outer_loop; | ||||||||
5202 | exit_bb = single_exit (loop)->dest; | ||||||||
5203 | new_phis.create (slp_node
| ||||||||
5204 | for (unsigned i = 0; i < vec_num; i++) | ||||||||
5205 | { | ||||||||
5206 | if (slp_node
| ||||||||
5207 | def = vect_get_slp_vect_def (slp_node, i); | ||||||||
5208 | else | ||||||||
5209 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[0]); | ||||||||
5210 | for (j = 0; j < ncopies; j++) | ||||||||
5211 | { | ||||||||
5212 | tree new_def = copy_ssa_name (def); | ||||||||
5213 | phi = create_phi_node (new_def, exit_bb); | ||||||||
5214 | if (j == 0) | ||||||||
5215 | new_phis.quick_push (phi); | ||||||||
5216 | else | ||||||||
5217 | { | ||||||||
5218 | def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)(rdef_info)->vec_stmts[j]); | ||||||||
5219 | new_phis.quick_push (phi); | ||||||||
5220 | } | ||||||||
5221 | |||||||||
5222 | SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def)set_ssa_use_from_ptr (gimple_phi_arg_imm_use_ptr (((phi)), (( single_exit (loop)->dest_idx))), (def)); | ||||||||
5223 | } | ||||||||
5224 | } | ||||||||
5225 | |||||||||
5226 | exit_gsi = gsi_after_labels (exit_bb); | ||||||||
5227 | |||||||||
5228 | /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 | ||||||||
5229 | (i.e. when reduc_fn is not available) and in the final adjustment | ||||||||
5230 | code (if needed). Also get the original scalar reduction variable as | ||||||||
5231 | defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it | ||||||||
5232 | represents a reduction pattern), the tree-code and scalar-def are | ||||||||
5233 | taken from the original stmt that the pattern-stmt (STMT) replaces. | ||||||||
5234 | Otherwise (it is a regular reduction) - the tree-code and scalar-def | ||||||||
5235 | are taken from STMT. */ | ||||||||
5236 | |||||||||
5237 | stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); | ||||||||
5238 | if (orig_stmt_info
| ||||||||
5239 | { | ||||||||
5240 | /* Reduction pattern */ | ||||||||
5241 | gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info))((void)(!((orig_stmt_info)->in_pattern_p) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5241, __FUNCTION__), 0 : 0)); | ||||||||
5242 | gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info)((void)(!((orig_stmt_info)->related_stmt == stmt_info) ? fancy_abort ("/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5242, __FUNCTION__), 0 : 0)); | ||||||||
5243 | } | ||||||||
5244 | |||||||||
5245 | scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt); | ||||||||
5246 | scalar_type = TREE_TYPE (scalar_dest)((contains_struct_check ((scalar_dest), (TS_TYPED), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5246, __FUNCTION__))->typed.type); | ||||||||
5247 | scalar_results.create (group_size); | ||||||||
5248 | new_scalar_dest = vect_create_destination_var (scalar_dest, NULLnullptr); | ||||||||
5249 | bitsize = TYPE_SIZE (scalar_type)((tree_class_check ((scalar_type), (tcc_type), "/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang-static-analyzer/build/gcc/tree-vect-loop.c" , 5249, __FUNCTION__))->type_common.size); | ||||||||
5250 | |||||||||
5251 | /* SLP reduction without reduction chain, e.g., | ||||||||
5252 | # a1 = phi <a2, a0> | ||||||||
5253 | # b1 = phi <b2, b0> | ||||||||
5254 | a2 = operation (a1) | ||||||||
5255 | b2 = operation (b1) */ | ||||||||
5256 | slp_reduc = (slp_node
, 5256, __FUNCTION__), 0 : 0)), (stmt_info)->first_element )); | ||||||||
5257 | |||||||||
5258 | /* True if we should implement SLP_REDUC using native reduction operations | ||||||||
5259 | instead of scalar operations. */ | ||||||||
5260 | direct_slp_reduc = (reduc_fn != IFN_LAST | ||||||||
5261 | && slp_reduc
|
33.1 | 'slp_reduc' is false |
38.3 | 'reduc_fn' is not equal to IFN_LAST |
38.3 | 'reduc_fn' is not equal to IFN_LAST |
38.4 | 'slp_reduc' is false |
38.4 | 'slp_reduc' is false |