LCOV - code coverage report
Current view: top level - gcc - vec-perm-indices.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 114 125 91.2 %
Date: 2020-04-04 11:58:09 Functions: 9 10 90.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* A representation of vector permutation indices.
       2                 :            :    Copyright (C) 2017-2020 Free Software Foundation, Inc.
       3                 :            : 
       4                 :            : This file is part of GCC.
       5                 :            : 
       6                 :            : GCC is free software; you can redistribute it and/or modify it under
       7                 :            : the terms of the GNU General Public License as published by the Free
       8                 :            : Software Foundation; either version 3, or (at your option) any later
       9                 :            : version.
      10                 :            : 
      11                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :            : for more details.
      15                 :            : 
      16                 :            : You should have received a copy of the GNU General Public License
      17                 :            : along with GCC; see the file COPYING3.  If not see
      18                 :            : <http://www.gnu.org/licenses/>.  */
      19                 :            : 
      20                 :            : #include "config.h"
      21                 :            : #include "system.h"
      22                 :            : #include "coretypes.h"
      23                 :            : #include "vec-perm-indices.h"
      24                 :            : #include "tree.h"
      25                 :            : #include "fold-const.h"
      26                 :            : #include "tree-vector-builder.h"
      27                 :            : #include "backend.h"
      28                 :            : #include "rtl.h"
      29                 :            : #include "memmodel.h"
      30                 :            : #include "emit-rtl.h"
      31                 :            : #include "selftest.h"
      32                 :            : #include "rtx-vector-builder.h"
      33                 :            : 
      34                 :            : /* Switch to a new permutation vector that selects between NINPUTS vector
      35                 :            :    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
      36                 :            :    new permutation vector from ELEMENTS, clamping each one to be in range.  */
      37                 :            : 
      38                 :            : void
      39                 :     745507 : vec_perm_indices::new_vector (const vec_perm_builder &elements,
      40                 :            :                               unsigned int ninputs,
      41                 :            :                               poly_uint64 nelts_per_input)
      42                 :            : {
      43                 :     745507 :   m_ninputs = ninputs;
      44                 :     745507 :   m_nelts_per_input = nelts_per_input;
      45                 :            :   /* If the vector has a constant number of elements, expand the
      46                 :            :      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
      47                 :            :      wrap halfway if there is only one vector input, and we want
      48                 :            :      the wrapped form to be the canonical one.
      49                 :            : 
      50                 :            :      If the vector has a variable number of elements, just copy
      51                 :            :      the encoding.  In that case the unwrapped form is canonical
      52                 :            :      and there is no way of representing the wrapped form.  */
      53                 :     745507 :   poly_uint64 full_nelts = elements.full_nelts ();
      54                 :     745507 :   unsigned HOST_WIDE_INT copy_nelts;
      55                 :     745507 :   if (full_nelts.is_constant (&copy_nelts))
      56                 :     745507 :     m_encoding.new_vector (full_nelts, copy_nelts, 1);
      57                 :            :   else
      58                 :            :     {
      59                 :            :       copy_nelts = elements.encoded_nelts ();
      60                 :            :       m_encoding.new_vector (full_nelts, elements.npatterns (),
      61                 :            :                              elements.nelts_per_pattern ());
      62                 :            :     }
      63                 :     745507 :   unsigned int npatterns = m_encoding.npatterns ();
      64                 :    4751090 :   for (unsigned int i = 0; i < npatterns; ++i)
      65                 :    4005720 :     m_encoding.quick_push (clamp (elements.elt (i)));
      66                 :            :   /* Use the fact that:
      67                 :            : 
      68                 :            :         (a + b) % c == ((a % c) + (b % c)) % c
      69                 :            : 
      70                 :            :      to simplify the clamping of variable-length vectors.  */
      71                 :     745507 :   for (unsigned int i = npatterns; i < copy_nelts; ++i)
      72                 :            :     {
      73                 :          0 :       element_type step = clamp (elements.elt (i)
      74                 :          0 :                                  - elements.elt (i - npatterns));
      75                 :          0 :       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
      76                 :            :     }
      77                 :     745507 :   m_encoding.finalize ();
      78                 :     745507 : }
      79                 :            : 
      80                 :            : /* Switch to a new permutation vector that selects the same input elements
      81                 :            :    as ORIG, but with each element split into FACTOR pieces.  For example,
      82                 :            :    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
      83                 :            :    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
      84                 :            : 
      85                 :            : void
      86                 :      37113 : vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
      87                 :            :                                        unsigned int factor)
      88                 :            : {
      89                 :      37113 :   m_ninputs = orig.m_ninputs;
      90                 :      37113 :   m_nelts_per_input = orig.m_nelts_per_input * factor;
      91                 :      37113 :   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
      92                 :      37113 :                          orig.m_encoding.npatterns () * factor,
      93                 :            :                          orig.m_encoding.nelts_per_pattern ());
      94                 :      37113 :   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
      95                 :     152879 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
      96                 :            :     {
      97                 :     115766 :       element_type base = orig.m_encoding[i] * factor;
      98                 :     605022 :       for (unsigned int j = 0; j < factor; ++j)
      99                 :     489256 :         m_encoding.quick_push (base + j);
     100                 :            :     }
     101                 :      37113 :   m_encoding.finalize ();
     102                 :      37113 : }
     103                 :            : 
     104                 :            : /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
     105                 :            :    the values of the permutation vector but it doesn't change the way that
     106                 :            :    the elements are encoded.  */
     107                 :            : 
     108                 :            : void
     109                 :       9293 : vec_perm_indices::rotate_inputs (int delta)
     110                 :            : {
     111                 :       9293 :   element_type element_delta = delta * m_nelts_per_input;
     112                 :      96098 :   for (unsigned int i = 0; i < m_encoding.length (); ++i)
     113                 :      38756 :     m_encoding[i] = clamp (m_encoding[i] + element_delta);
     114                 :       9293 : }
     115                 :            : 
     116                 :            : /* Return true if index OUT_BASE + I * OUT_STEP selects input
     117                 :            :    element IN_BASE + I * IN_STEP.  For example, the call to test
     118                 :            :    whether a permute reverses a vector of N elements would be:
     119                 :            : 
     120                 :            :      series_p (0, 1, N - 1, -1)
     121                 :            : 
     122                 :            :    which would return true for { N - 1, N - 2, N - 3, ... }.
     123                 :            :    The calls to test for an interleaving of elements starting
     124                 :            :    at N1 and N2 would be:
     125                 :            : 
     126                 :            :      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
     127                 :            : 
     128                 :            :    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
     129                 :            : 
     130                 :            : bool
     131                 :    1379430 : vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
     132                 :            :                             element_type in_base, element_type in_step) const
     133                 :            : {
     134                 :            :   /* Check the base value.  */
     135                 :    1379430 :   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
     136                 :            :     return false;
     137                 :            : 
     138                 :     206416 :   element_type full_nelts = m_encoding.full_nelts ();
     139                 :     206416 :   unsigned int npatterns = m_encoding.npatterns ();
     140                 :            : 
     141                 :            :   /* Calculate which multiple of OUT_STEP elements we need to get
     142                 :            :      back to the same pattern.  */
     143                 :     206416 :   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
     144                 :            : 
     145                 :            :   /* Check the steps.  */
     146                 :     206416 :   in_step = clamp (in_step);
     147                 :     206416 :   out_base += out_step;
     148                 :     206416 :   unsigned int limit = 0;
     149                 :     345688 :   for (;;)
     150                 :            :     {
     151                 :            :       /* Succeed if we've checked all the elements in the vector.  */
     152                 :     276052 :       if (known_ge (out_base, full_nelts))
     153                 :     206416 :         return true;
     154                 :            : 
     155                 :     274646 :       if (out_base >= npatterns)
     156                 :            :         {
     157                 :            :           /* We've got to the end of the "foreground" values.  Check
     158                 :            :              2 elements from each pattern in the "background" values.  */
     159                 :     123777 :           if (limit == 0)
     160                 :     100724 :             limit = out_base + cycle_length * 2;
     161                 :      23053 :           else if (out_base >= limit)
     162                 :            :             return true;
     163                 :            :         }
     164                 :            : 
     165                 :     265230 :       element_type v0 = m_encoding.elt (out_base - out_step);
     166                 :     265230 :       element_type v1 = m_encoding.elt (out_base);
     167                 :     272555 :       if (maybe_ne (clamp (v1 - v0), in_step))
     168                 :            :         return false;
     169                 :            : 
     170                 :      69636 :       out_base += out_step;
     171                 :      69636 :     }
     172                 :            :   return true;
     173                 :            : }
     174                 :            : 
     175                 :            : /* Return true if all elements of the permutation vector are in the range
     176                 :            :    [START, START + SIZE).  */
     177                 :            : 
     178                 :            : bool
     179                 :    1068710 : vec_perm_indices::all_in_range_p (element_type start, element_type size) const
     180                 :            : {
     181                 :            :   /* Check the first two elements of each pattern.  */
     182                 :    1068710 :   unsigned int npatterns = m_encoding.npatterns ();
     183                 :    1068710 :   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
     184                 :    1068710 :   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
     185                 :    2517080 :   for (unsigned int i = 0; i < base_nelts; ++i)
     186                 :    3719140 :     if (!known_in_range_p (m_encoding[i], start, size))
     187                 :            :       return false;
     188                 :            : 
     189                 :            :   /* For stepped encodings, check the full range of the series.  */
     190                 :     246303 :   if (nelts_per_pattern == 3)
     191                 :            :     {
     192                 :     193188 :       element_type limit = input_nelts ();
     193                 :            : 
     194                 :            :       /* The number of elements in each pattern beyond the first two
     195                 :            :          that we checked above.  */
     196                 :     193188 :       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
     197                 :     193188 :                                          npatterns) - 2;
     198                 :     331816 :       for (unsigned int i = 0; i < npatterns; ++i)
     199                 :            :         {
     200                 :            :           /* BASE1 has been checked but BASE2 hasn't.   */
     201                 :     262802 :           element_type base1 = m_encoding[i + npatterns];
     202                 :     262802 :           element_type base2 = m_encoding[i + base_nelts];
     203                 :            : 
     204                 :            :           /* The step to add to get from BASE1 to each subsequent value.  */
     205                 :     262802 :           element_type step = clamp (base2 - base1);
     206                 :            : 
     207                 :            :           /* STEP has no inherent sign, so a value near LIMIT can
     208                 :            :              act as a negative step.  The series is in range if it
     209                 :            :              is in range according to one of the two interpretations.
     210                 :            : 
     211                 :            :              Since we're dealing with clamped values, ELEMENT_TYPE is
     212                 :            :              wide enough for overflow not to be a problem.  */
     213                 :     262802 :           element_type headroom_down = base1 - start;
     214                 :     262802 :           element_type headroom_up = size - headroom_down - 1;
     215                 :     262802 :           HOST_WIDE_INT diff;
     216                 :     262802 :           if ((!step.is_constant (&diff)
     217                 :     262802 :                || maybe_lt (headroom_up, diff * step_nelts))
     218                 :     124281 :               && (!(limit - step).is_constant (&diff)
     219                 :     124281 :                   || maybe_lt (headroom_down, diff * step_nelts)))
     220                 :    1068710 :             return false;
     221                 :            :         }
     222                 :            :     }
     223                 :            :   return true;
     224                 :            : }
     225                 :            : 
     226                 :            : /* Try to read the contents of VECTOR_CST CST as a constant permutation
     227                 :            :    vector.  Return true and add the elements to BUILDER on success,
     228                 :            :    otherwise return false without modifying BUILDER.  */
     229                 :            : 
     230                 :            : bool
     231                 :     676734 : tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
     232                 :            : {
     233                 :     676734 :   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
     234                 :    3308080 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     235                 :    2631350 :     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
     236                 :            :       return false;
     237                 :            : 
     238                 :    1353470 :   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
     239                 :     676734 :                        VECTOR_CST_NPATTERNS (cst),
     240                 :     676734 :                        VECTOR_CST_NELTS_PER_PATTERN (cst));
     241                 :    3308080 :   for (unsigned int i = 0; i < encoded_nelts; ++i)
     242                 :    2631350 :     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
     243                 :            :   return true;
     244                 :            : }
     245                 :            : 
     246                 :            : /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
     247                 :            : 
     248                 :            : tree
     249                 :      31670 : vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
     250                 :            : {
     251                 :      31670 :   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
     252                 :      31670 :   tree_vector_builder sel (type, indices.encoding ().npatterns (),
     253                 :      31670 :                            indices.encoding ().nelts_per_pattern ());
     254                 :      31670 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     255                 :     151179 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     256                 :     119509 :     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
     257                 :      31670 :   return sel.build ();
     258                 :            : }
     259                 :            : 
     260                 :            : /* Return a CONST_VECTOR of mode MODE that contains the elements of
     261                 :            :    INDICES.  */
     262                 :            : 
     263                 :            : rtx
     264                 :          0 : vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
     265                 :            : {
     266                 :          0 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
     267                 :            :               && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
     268                 :          0 :   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
     269                 :          0 :                           indices.encoding ().nelts_per_pattern ());
     270                 :          0 :   unsigned int encoded_nelts = sel.encoded_nelts ();
     271                 :          0 :   for (unsigned int i = 0; i < encoded_nelts; i++)
     272                 :          0 :     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
     273                 :          0 :   return sel.build ();
     274                 :            : }
     275                 :            : 
     276                 :            : #if CHECKING_P
     277                 :            : 
     278                 :            : namespace selftest {
     279                 :            : 
     280                 :            : /* Test a 12-element vector.  */
     281                 :            : 
     282                 :            : static void
     283                 :          2 : test_vec_perm_12 (void)
     284                 :            : {
     285                 :          2 :   vec_perm_builder builder (12, 12, 1);
     286                 :         10 :   for (unsigned int i = 0; i < 4; ++i)
     287                 :            :     {
     288                 :          8 :       builder.quick_push (i * 5);
     289                 :          8 :       builder.quick_push (3 + i);
     290                 :          8 :       builder.quick_push (2 + 3 * i);
     291                 :            :     }
     292                 :          4 :   vec_perm_indices indices (builder, 1, 12);
     293                 :          2 :   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
     294                 :          2 :   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
     295                 :          2 :   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
     296                 :          2 :   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
     297                 :          2 :   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
     298                 :            : 
     299                 :          2 :   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
     300                 :          2 :   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
     301                 :            : 
     302                 :          2 :   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
     303                 :          2 :   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
     304                 :            : 
     305                 :          2 :   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
     306                 :          2 :   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
     307                 :            : 
     308                 :          2 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
     309                 :          2 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
     310                 :          2 :   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
     311                 :          2 : }
     312                 :            : 
     313                 :            : /* Run selftests for this file.  */
     314                 :            : 
     315                 :            : void
     316                 :          2 : vec_perm_indices_c_tests ()
     317                 :            : {
     318                 :          2 :   test_vec_perm_12 ();
     319                 :          2 : }
     320                 :            : 
     321                 :            : } // namespace selftest
     322                 :            : 
     323                 :            : #endif

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.