LCOV - code coverage report
Current view: top level - gcc - modulo-sched.c (source / functions) Hit Total Coverage
Test: gcc.info Lines: 133 1405 9.5 %
Date: 2020-04-04 11:58:09 Functions: 8 63 12.7 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* Swing Modulo Scheduling implementation.
       2                 :            :    Copyright (C) 2004-2020 Free Software Foundation, Inc.
       3                 :            :    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
       4                 :            : 
       5                 :            : This file is part of GCC.
       6                 :            : 
       7                 :            : GCC is free software; you can redistribute it and/or modify it under
       8                 :            : the terms of the GNU General Public License as published by the Free
       9                 :            : Software Foundation; either version 3, or (at your option) any later
      10                 :            : version.
      11                 :            : 
      12                 :            : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :            : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :            : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :            : for more details.
      16                 :            : 
      17                 :            : You should have received a copy of the GNU General Public License
      18                 :            : along with GCC; see the file COPYING3.  If not see
      19                 :            : <http://www.gnu.org/licenses/>.  */
      20                 :            : 
      21                 :            : 
      22                 :            : #include "config.h"
      23                 :            : #include "system.h"
      24                 :            : #include "coretypes.h"
      25                 :            : #include "backend.h"
      26                 :            : #include "target.h"
      27                 :            : #include "rtl.h"
      28                 :            : #include "tree.h"
      29                 :            : #include "cfghooks.h"
      30                 :            : #include "df.h"
      31                 :            : #include "memmodel.h"
      32                 :            : #include "optabs.h"
      33                 :            : #include "regs.h"
      34                 :            : #include "emit-rtl.h"
      35                 :            : #include "gcov-io.h"
      36                 :            : #include "profile.h"
      37                 :            : #include "insn-attr.h"
      38                 :            : #include "cfgrtl.h"
      39                 :            : #include "sched-int.h"
      40                 :            : #include "cfgloop.h"
      41                 :            : #include "expr.h"
      42                 :            : #include "ddg.h"
      43                 :            : #include "tree-pass.h"
      44                 :            : #include "dbgcnt.h"
      45                 :            : #include "loop-unroll.h"
      46                 :            : 
      47                 :            : #ifdef INSN_SCHEDULING
      48                 :            : 
      49                 :            : /* This file contains the implementation of the Swing Modulo Scheduler,
      50                 :            :    described in the following references:
      51                 :            :    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
      52                 :            :        Lifetime--sensitive modulo scheduling in a production environment.
      53                 :            :        IEEE Trans. on Comps., 50(3), March 2001
      54                 :            :    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
      55                 :            :        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
      56                 :            :        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
      57                 :            : 
      58                 :            :    The basic structure is:
      59                 :            :    1. Build a data-dependence graph (DDG) for each loop.
      60                 :            :    2. Use the DDG to order the insns of a loop (not in topological order
      61                 :            :       necessarily, but rather) trying to place each insn after all its
      62                 :            :       predecessors _or_ after all its successors.
      63                 :            :    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
      64                 :            :    4. Use the ordering to perform list-scheduling of the loop:
      65                 :            :       1. Set II = MII.  We will try to schedule the loop within II cycles.
      66                 :            :       2. Try to schedule the insns one by one according to the ordering.
      67                 :            :          For each insn compute an interval of cycles by considering already-
      68                 :            :          scheduled preds and succs (and associated latencies); try to place
      69                 :            :          the insn in the cycles of this window checking for potential
      70                 :            :          resource conflicts (using the DFA interface).
      71                 :            :          Note: this is different from the cycle-scheduling of schedule_insns;
      72                 :            :          here the insns are not scheduled monotonically top-down (nor bottom-
      73                 :            :          up).
      74                 :            :       3. If failed in scheduling all insns - bump II++ and try again, unless
      75                 :            :          II reaches an upper bound MaxII, in which case report failure.
      76                 :            :    5. If we succeeded in scheduling the loop within II cycles, we now
      77                 :            :       generate prolog and epilog, decrease the counter of the loop, and
      78                 :            :       perform modulo variable expansion for live ranges that span more than
      79                 :            :       II cycles (i.e. use register copies to prevent a def from overwriting
      80                 :            :       itself before reaching the use).
      81                 :            : 
      82                 :            :     SMS works with countable loops (1) whose control part can be easily
      83                 :            :     decoupled from the rest of the loop and (2) whose loop count can
      84                 :            :     be easily adjusted.  This is because we peel a constant number of
      85                 :            :     iterations into a prologue and epilogue for which we want to avoid
      86                 :            :     emitting the control part, and a kernel which is to iterate that
      87                 :            :     constant number of iterations less than the original loop.  So the
      88                 :            :     control part should be a set of insns clearly identified and having
      89                 :            :     its own iv, not otherwise used in the loop (at-least for now), which
      90                 :            :     initializes a register before the loop to the number of iterations.
      91                 :            :     Currently SMS relies on the do-loop pattern to recognize such loops,
      92                 :            :     where (1) the control part comprises of all insns defining and/or
      93                 :            :     using a certain 'count' register and (2) the loop count can be
      94                 :            :     adjusted by modifying this register prior to the loop.
      95                 :            :     TODO: Rely on cfgloop analysis instead.  */
      96                 :            : 
      97                 :            : /* This page defines partial-schedule structures and functions for
      98                 :            :    modulo scheduling.  */
      99                 :            : 
     100                 :            : typedef struct partial_schedule *partial_schedule_ptr;
     101                 :            : typedef struct ps_insn *ps_insn_ptr;
     102                 :            : 
     103                 :            : /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
     104                 :            : #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
     105                 :            : 
     106                 :            : /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
     107                 :            : #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
     108                 :            : 
     109                 :            : /* Perform signed modulo, always returning a non-negative value.  */
     110                 :            : #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
     111                 :            : 
     112                 :            : /* The number of different iterations the nodes in ps span, assuming
     113                 :            :    the stage boundaries are placed efficiently.  */
     114                 :            : #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
     115                 :            :                          + 1 + ii - 1) / ii)
     116                 :            : /* The stage count of ps.  */
     117                 :            : #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
     118                 :            : 
     119                 :            : /* A single instruction in the partial schedule.  */
     120                 :            : struct ps_insn
     121                 :            : {
     122                 :            :   /* Identifies the instruction to be scheduled.  Values smaller than
     123                 :            :      the ddg's num_nodes refer directly to ddg nodes.  A value of
     124                 :            :      X - num_nodes refers to register move X.  */
     125                 :            :   int id;
     126                 :            : 
     127                 :            :   /* The (absolute) cycle in which the PS instruction is scheduled.
     128                 :            :      Same as SCHED_TIME (node).  */
     129                 :            :   int cycle;
     130                 :            : 
     131                 :            :   /* The next/prev PS_INSN in the same row.  */
     132                 :            :   ps_insn_ptr next_in_row,
     133                 :            :               prev_in_row;
     134                 :            : 
     135                 :            : };
     136                 :            : 
     137                 :            : /* Information about a register move that has been added to a partial
     138                 :            :    schedule.  */
     139                 :            : struct ps_reg_move_info
     140                 :            : {
     141                 :            :   /* The source of the move is defined by the ps_insn with id DEF.
     142                 :            :      The destination is used by the ps_insns with the ids in USES.  */
     143                 :            :   int def;
     144                 :            :   sbitmap uses;
     145                 :            : 
     146                 :            :   /* The original form of USES' instructions used OLD_REG, but they
     147                 :            :      should now use NEW_REG.  */
     148                 :            :   rtx old_reg;
     149                 :            :   rtx new_reg;
     150                 :            : 
     151                 :            :   /* The number of consecutive stages that the move occupies.  */
     152                 :            :   int num_consecutive_stages;
     153                 :            : 
     154                 :            :   /* An instruction that sets NEW_REG to the correct value.  The first
     155                 :            :      move associated with DEF will have an rhs of OLD_REG; later moves
     156                 :            :      use the result of the previous move.  */
     157                 :            :   rtx_insn *insn;
     158                 :            : };
     159                 :            : 
     160                 :            : /* Holds the partial schedule as an array of II rows.  Each entry of the
     161                 :            :    array points to a linked list of PS_INSNs, which represents the
     162                 :            :    instructions that are scheduled for that row.  */
     163                 :            : struct partial_schedule
     164                 :            : {
     165                 :            :   int ii;       /* Number of rows in the partial schedule.  */
     166                 :            :   int history;  /* Threshold for conflict checking using DFA.  */
     167                 :            : 
     168                 :            :   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
     169                 :            :   ps_insn_ptr *rows;
     170                 :            : 
     171                 :            :   /* All the moves added for this partial schedule.  Index X has
     172                 :            :      a ps_insn id of X + g->num_nodes.  */
     173                 :            :   vec<ps_reg_move_info> reg_moves;
     174                 :            : 
     175                 :            :   /*  rows_length[i] holds the number of instructions in the row.
     176                 :            :       It is used only (as an optimization) to back off quickly from
     177                 :            :       trying to schedule a node in a full row; that is, to avoid running
     178                 :            :       through futile DFA state transitions.  */
     179                 :            :   int *rows_length;
     180                 :            :   
     181                 :            :   /* The earliest absolute cycle of an insn in the partial schedule.  */
     182                 :            :   int min_cycle;
     183                 :            : 
     184                 :            :   /* The latest absolute cycle of an insn in the partial schedule.  */
     185                 :            :   int max_cycle;
     186                 :            : 
     187                 :            :   ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
     188                 :            : 
     189                 :            :   int stage_count;  /* The stage count of the partial schedule.  */
     190                 :            : };
     191                 :            : 
     192                 :            : 
     193                 :            : static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
     194                 :            : static void free_partial_schedule (partial_schedule_ptr);
     195                 :            : static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
     196                 :            : void print_partial_schedule (partial_schedule_ptr, FILE *);
     197                 :            : static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
     198                 :            : static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
     199                 :            :                                                 int, int, sbitmap, sbitmap);
     200                 :            : static void rotate_partial_schedule (partial_schedule_ptr, int);
     201                 :            : void set_row_column_for_ps (partial_schedule_ptr);
     202                 :            : static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
     203                 :            : static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
     204                 :            : 
     205                 :            : 
     206                 :            : /* This page defines constants and structures for the modulo scheduling
     207                 :            :    driver.  */
     208                 :            : 
     209                 :            : static int sms_order_nodes (ddg_ptr, int, int *, int *);
     210                 :            : static void set_node_sched_params (ddg_ptr);
     211                 :            : static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
     212                 :            : static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
     213                 :            : static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
     214                 :            :                                     rtx, rtx);
     215                 :            : static int calculate_stage_count (partial_schedule_ptr, int);
     216                 :            : static void calculate_must_precede_follow (ddg_node_ptr, int, int,
     217                 :            :                                            int, int, sbitmap, sbitmap, sbitmap);
     218                 :            : static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, 
     219                 :            :                              sbitmap, int, int *, int *, int *);
     220                 :            : static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
     221                 :            :                                           sbitmap, int *, sbitmap, sbitmap);
     222                 :            : static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
     223                 :            : 
     224                 :            : #define NODE_ASAP(node) ((node)->aux.count)
     225                 :            : 
     226                 :            : #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
     227                 :            : #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
     228                 :            : #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
     229                 :            : #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
     230                 :            : #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
     231                 :            : 
     232                 :            : /* The scheduling parameters held for each node.  */
     233                 :            : typedef struct node_sched_params
     234                 :            : {
     235                 :            :   int time;     /* The absolute scheduling cycle.  */
     236                 :            : 
     237                 :            :   int row;    /* Holds time % ii.  */
     238                 :            :   int stage;  /* Holds time / ii.  */
     239                 :            : 
     240                 :            :   /* The column of a node inside the ps.  If nodes u, v are on the same row,
     241                 :            :      u will precede v if column (u) < column (v).  */
     242                 :            :   int column;
     243                 :            : } *node_sched_params_ptr;
     244                 :            : 
     245                 :            : /* The following three functions are copied from the current scheduler
     246                 :            :    code in order to use sched_analyze() for computing the dependencies.
     247                 :            :    They are used when initializing the sched_info structure.  */
     248                 :            : static const char *
     249                 :          0 : sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
     250                 :            : {
     251                 :          0 :   static char tmp[80];
     252                 :            : 
     253                 :          0 :   sprintf (tmp, "i%4d", INSN_UID (insn));
     254                 :          0 :   return tmp;
     255                 :            : }
     256                 :            : 
     257                 :            : static void
     258                 :          0 : compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
     259                 :            :                                regset used ATTRIBUTE_UNUSED)
     260                 :            : {
     261                 :          0 : }
     262                 :            : 
     263                 :            : static struct common_sched_info_def sms_common_sched_info;
     264                 :            : 
     265                 :            : static struct sched_deps_info_def sms_sched_deps_info =
     266                 :            :   {
     267                 :            :     compute_jump_reg_dependencies,
     268                 :            :     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     269                 :            :     NULL,
     270                 :            :     0, 0, 0
     271                 :            :   };
     272                 :            : 
     273                 :            : static struct haifa_sched_info sms_sched_info =
     274                 :            : {
     275                 :            :   NULL,
     276                 :            :   NULL,
     277                 :            :   NULL,
     278                 :            :   NULL,
     279                 :            :   NULL,
     280                 :            :   sms_print_insn,
     281                 :            :   NULL,
     282                 :            :   NULL, /* insn_finishes_block_p */
     283                 :            :   NULL, NULL,
     284                 :            :   NULL, NULL,
     285                 :            :   0, 0,
     286                 :            : 
     287                 :            :   NULL, NULL, NULL, NULL,
     288                 :            :   NULL, NULL,
     289                 :            :   0
     290                 :            : };
     291                 :            : 
     292                 :            : /* Partial schedule instruction ID in PS is a register move.  Return
     293                 :            :    information about it.  */
     294                 :            : static struct ps_reg_move_info *
     295                 :          0 : ps_reg_move (partial_schedule_ptr ps, int id)
     296                 :            : {
     297                 :          0 :   gcc_checking_assert (id >= ps->g->num_nodes);
     298                 :          0 :   return &ps->reg_moves[id - ps->g->num_nodes];
     299                 :            : }
     300                 :            : 
     301                 :            : /* Return the rtl instruction that is being scheduled by partial schedule
     302                 :            :    instruction ID, which belongs to schedule PS.  */
     303                 :            : static rtx_insn *
     304                 :          0 : ps_rtl_insn (partial_schedule_ptr ps, int id)
     305                 :            : {
     306                 :          0 :   if (id < ps->g->num_nodes)
     307                 :          0 :     return ps->g->nodes[id].insn;
     308                 :            :   else
     309                 :          0 :     return ps_reg_move (ps, id)->insn;
     310                 :            : }
     311                 :            : 
     312                 :            : /* Partial schedule instruction ID, which belongs to PS, occurred in
     313                 :            :    the original (unscheduled) loop.  Return the first instruction
     314                 :            :    in the loop that was associated with ps_rtl_insn (PS, ID).
     315                 :            :    If the instruction had some notes before it, this is the first
     316                 :            :    of those notes.  */
     317                 :            : static rtx_insn *
     318                 :          0 : ps_first_note (partial_schedule_ptr ps, int id)
     319                 :            : {
     320                 :          0 :   gcc_assert (id < ps->g->num_nodes);
     321                 :          0 :   return ps->g->nodes[id].first_note;
     322                 :            : }
     323                 :            : 
     324                 :            : /* Return the number of consecutive stages that are occupied by
     325                 :            :    partial schedule instruction ID in PS.  */
     326                 :            : static int
     327                 :          0 : ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
     328                 :            : {
     329                 :          0 :   if (id < ps->g->num_nodes)
     330                 :            :     return 1;
     331                 :            :   else
     332                 :          0 :     return ps_reg_move (ps, id)->num_consecutive_stages;
     333                 :            : }
     334                 :            : 
     335                 :            : /* Given HEAD and TAIL which are the first and last insns in a loop;
     336                 :            :    return the register which controls the loop.  Return zero if it has
     337                 :            :    more than one occurrence in the loop besides the control part or the
     338                 :            :    do-loop pattern is not of the form we expect.  */
     339                 :            : static rtx
     340                 :         97 : doloop_register_get (rtx_insn *head, rtx_insn *tail)
     341                 :            : {
     342                 :         97 :   rtx reg, condition;
     343                 :         97 :   rtx_insn *insn, *first_insn_not_to_check;
     344                 :            : 
     345                 :         97 :   if (!JUMP_P (tail))
     346                 :            :     return NULL_RTX;
     347                 :            : 
     348                 :         97 :   if (!targetm.code_for_doloop_end)
     349                 :            :     return NULL_RTX;
     350                 :            : 
     351                 :            :   /* TODO: Free SMS's dependence on doloop_condition_get.  */
     352                 :          0 :   condition = doloop_condition_get (tail);
     353                 :          0 :   if (! condition)
     354                 :            :     return NULL_RTX;
     355                 :            : 
     356                 :          0 :   if (REG_P (XEXP (condition, 0)))
     357                 :            :     reg = XEXP (condition, 0);
     358                 :          0 :   else if (GET_CODE (XEXP (condition, 0)) == PLUS
     359                 :          0 :            && REG_P (XEXP (XEXP (condition, 0), 0)))
     360                 :            :     reg = XEXP (XEXP (condition, 0), 0);
     361                 :            :   else
     362                 :          0 :     gcc_unreachable ();
     363                 :            : 
     364                 :            :   /* Check that the COUNT_REG has no other occurrences in the loop
     365                 :            :      until the decrement.  We assume the control part consists of
     366                 :            :      either a single (parallel) branch-on-count or a (non-parallel)
     367                 :            :      branch immediately preceded by a single (decrement) insn.  */
     368                 :          0 :   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
     369                 :          0 :                              : prev_nondebug_insn (tail));
     370                 :            : 
     371                 :          0 :   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
     372                 :          0 :     if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
     373                 :            :       {
     374                 :          0 :         if (dump_file)
     375                 :            :         {
     376                 :          0 :           fprintf (dump_file, "SMS count_reg found ");
     377                 :          0 :           print_rtl_single (dump_file, reg);
     378                 :          0 :           fprintf (dump_file, " outside control in insn:\n");
     379                 :          0 :           print_rtl_single (dump_file, insn);
     380                 :            :         }
     381                 :            : 
     382                 :          0 :         return NULL_RTX;
     383                 :            :       }
     384                 :            : 
     385                 :            :   return reg;
     386                 :            : }
     387                 :            : 
     388                 :            : /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
     389                 :            :    that the number of iterations is a compile-time constant.  If so,
     390                 :            :    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
     391                 :            :    this constant.  Otherwise return 0.  */
     392                 :            : static rtx_insn *
     393                 :          0 : const_iteration_count (rtx count_reg, basic_block pre_header,
     394                 :            :                        int64_t * count)
     395                 :            : {
     396                 :          0 :   rtx_insn *insn;
     397                 :          0 :   rtx_insn *head, *tail;
     398                 :            : 
     399                 :          0 :   if (! pre_header)
     400                 :            :     return NULL;
     401                 :            : 
     402                 :          0 :   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
     403                 :            : 
     404                 :          0 :   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
     405                 :          0 :     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
     406                 :          0 :         rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
     407                 :            :       {
     408                 :          0 :         rtx pat = single_set (insn);
     409                 :            : 
     410                 :          0 :         if (CONST_INT_P (SET_SRC (pat)))
     411                 :            :           {
     412                 :          0 :             *count = INTVAL (SET_SRC (pat));
     413                 :          0 :             return insn;
     414                 :            :           }
     415                 :            : 
     416                 :            :         return NULL;
     417                 :            :       }
     418                 :            : 
     419                 :            :   return NULL;
     420                 :            : }
     421                 :            : 
     422                 :            : /* A very simple resource-based lower bound on the initiation interval.
     423                 :            :    ??? Improve the accuracy of this bound by considering the
     424                 :            :    utilization of various units.  */
     425                 :            : static int
     426                 :          0 : res_MII (ddg_ptr g)
     427                 :            : {
     428                 :          0 :   if (targetm.sched.sms_res_mii)
     429                 :          0 :     return targetm.sched.sms_res_mii (g);
     430                 :            : 
     431                 :          0 :   return g->num_nodes / issue_rate;
     432                 :            : }
     433                 :            : 
     434                 :            : 
     435                 :            : /* A vector that contains the sched data for each ps_insn.  */
     436                 :            : static vec<node_sched_params> node_sched_param_vec;
     437                 :            : 
     438                 :            : /* Allocate sched_params for each node and initialize it.  */
     439                 :            : static void
     440                 :          0 : set_node_sched_params (ddg_ptr g)
     441                 :            : {
     442                 :          0 :   node_sched_param_vec.truncate (0);
     443                 :          0 :   node_sched_param_vec.safe_grow_cleared (g->num_nodes);
     444                 :          0 : }
     445                 :            : 
     446                 :            : /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
     447                 :            : static void
     448                 :          0 : extend_node_sched_params (partial_schedule_ptr ps)
     449                 :            : {
     450                 :          0 :   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
     451                 :          0 :                                           + ps->reg_moves.length ());
     452                 :          0 : }
     453                 :            : 
     454                 :            : /* Update the sched_params (time, row and stage) for node U using the II,
     455                 :            :    the CYCLE of U and MIN_CYCLE.
     456                 :            :    We're not simply taking the following
     457                 :            :    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
     458                 :            :    because the stages may not be aligned on cycle 0.  */
     459                 :            : static void
     460                 :          0 : update_node_sched_params (int u, int ii, int cycle, int min_cycle)
     461                 :            : {
     462                 :          0 :   int sc_until_cycle_zero;
     463                 :          0 :   int stage;
     464                 :            : 
     465                 :          0 :   SCHED_TIME (u) = cycle;
     466                 :          0 :   SCHED_ROW (u) = SMODULO (cycle, ii);
     467                 :            : 
     468                 :            :   /* The calculation of stage count is done adding the number
     469                 :            :      of stages before cycle zero and after cycle zero.  */
     470                 :          0 :   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
     471                 :            : 
     472                 :          0 :   if (SCHED_TIME (u) < 0)
     473                 :            :     {
     474                 :          0 :       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
     475                 :          0 :       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
     476                 :            :     }
     477                 :            :   else
     478                 :            :     {
     479                 :          0 :       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
     480                 :          0 :       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
     481                 :            :     }
     482                 :          0 : }
     483                 :            : 
     484                 :            : static void
     485                 :          0 : print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
     486                 :            : {
     487                 :          0 :   int i;
     488                 :            : 
     489                 :          0 :   if (! file)
     490                 :            :     return;
     491                 :          0 :   for (i = 0; i < num_nodes; i++)
     492                 :            :     {
     493                 :          0 :       node_sched_params_ptr nsp = SCHED_PARAMS (i);
     494                 :            : 
     495                 :          0 :       fprintf (file, "Node = %d; INSN = %d\n", i,
     496                 :          0 :                INSN_UID (ps_rtl_insn (ps, i)));
     497                 :          0 :       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
     498                 :          0 :       fprintf (file, " time = %d:\n", nsp->time);
     499                 :          0 :       fprintf (file, " stage = %d:\n", nsp->stage);
     500                 :            :     }
     501                 :            : }
     502                 :            : 
     503                 :            : /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
     504                 :            : static void
     505                 :          0 : set_columns_for_row (partial_schedule_ptr ps, int row)
     506                 :            : {
     507                 :          0 :   ps_insn_ptr cur_insn;
     508                 :          0 :   int column;
     509                 :            : 
     510                 :          0 :   column = 0;
     511                 :          0 :   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
     512                 :          0 :     SCHED_COLUMN (cur_insn->id) = column++;
     513                 :          0 : }
     514                 :            : 
     515                 :            : /* Set SCHED_COLUMN for each instruction in PS.  */
     516                 :            : static void
     517                 :          0 : set_columns_for_ps (partial_schedule_ptr ps)
     518                 :            : {
     519                 :          0 :   int row;
     520                 :            : 
     521                 :          0 :   for (row = 0; row < ps->ii; row++)
     522                 :          0 :     set_columns_for_row (ps, row);
     523                 :          0 : }
     524                 :            : 
     525                 :            : /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
     526                 :            :    Its single predecessor has already been scheduled, as has its
     527                 :            :    ddg node successors.  (The move may have also another move as its
     528                 :            :    successor, in which case that successor will be scheduled later.)
     529                 :            : 
     530                 :            :    The move is part of a chain that satisfies register dependencies
     531                 :            :    between a producing ddg node and various consuming ddg nodes.
     532                 :            :    If some of these dependencies have a distance of 1 (meaning that
     533                 :            :    the use is upward-exposed) then DISTANCE1_USES is nonnull and
     534                 :            :    contains the set of uses with distance-1 dependencies.
     535                 :            :    DISTANCE1_USES is null otherwise.
     536                 :            : 
     537                 :            :    MUST_FOLLOW is a scratch bitmap that is big enough to hold
     538                 :            :    all current ps_insn ids.
     539                 :            : 
     540                 :            :    Return true on success.  */
     541                 :            : static bool
     542                 :          0 : schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
     543                 :            :                    sbitmap distance1_uses, sbitmap must_follow)
     544                 :            : {
     545                 :          0 :   unsigned int u;
     546                 :          0 :   int this_time, this_distance, this_start, this_end, this_latency;
     547                 :          0 :   int start, end, c, ii;
     548                 :          0 :   sbitmap_iterator sbi;
     549                 :          0 :   ps_reg_move_info *move;
     550                 :          0 :   rtx_insn *this_insn;
     551                 :          0 :   ps_insn_ptr psi;
     552                 :            : 
     553                 :          0 :   move = ps_reg_move (ps, i_reg_move);
     554                 :          0 :   ii = ps->ii;
     555                 :          0 :   if (dump_file)
     556                 :            :     {
     557                 :          0 :       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
     558                 :          0 :                ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
     559                 :            :                PS_MIN_CYCLE (ps));
     560                 :          0 :       print_rtl_single (dump_file, move->insn);
     561                 :          0 :       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
     562                 :          0 :       fprintf (dump_file, "=========== =========== =====\n");
     563                 :            :     }
     564                 :            : 
     565                 :          0 :   start = INT_MIN;
     566                 :          0 :   end = INT_MAX;
     567                 :            : 
     568                 :            :   /* For dependencies of distance 1 between a producer ddg node A
     569                 :            :      and consumer ddg node B, we have a chain of dependencies:
     570                 :            : 
     571                 :            :         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
     572                 :            : 
     573                 :            :      where Mi is the ith move.  For dependencies of distance 0 between
     574                 :            :      a producer ddg node A and consumer ddg node C, we have a chain of
     575                 :            :      dependencies:
     576                 :            : 
     577                 :            :         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
     578                 :            : 
     579                 :            :      where Mi' occupies the same position as Mi but occurs a stage later.
     580                 :            :      We can only schedule each move once, so if we have both types of
     581                 :            :      chain, we model the second as:
     582                 :            : 
     583                 :            :         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
     584                 :            : 
     585                 :            :      First handle the dependencies between the previously-scheduled
     586                 :            :      predecessor and the move.  */
     587                 :          0 :   this_insn = ps_rtl_insn (ps, move->def);
     588                 :          0 :   this_latency = insn_latency (this_insn, move->insn);
     589                 :          0 :   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
     590                 :          0 :   this_time = SCHED_TIME (move->def) - this_distance * ii;
     591                 :          0 :   this_start = this_time + this_latency;
     592                 :          0 :   this_end = this_time + ii;
     593                 :          0 :   if (dump_file)
     594                 :          0 :     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
     595                 :          0 :              this_start, this_end, SCHED_TIME (move->def),
     596                 :          0 :              INSN_UID (this_insn), this_latency, this_distance,
     597                 :          0 :              INSN_UID (move->insn));
     598                 :            : 
     599                 :          0 :   if (start < this_start)
     600                 :            :     start = this_start;
     601                 :          0 :   if (end > this_end)
     602                 :          0 :     end = this_end;
     603                 :            : 
     604                 :            :   /* Handle the dependencies between the move and previously-scheduled
     605                 :            :      successors.  */
     606                 :          0 :   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
     607                 :            :     {
     608                 :          0 :       this_insn = ps_rtl_insn (ps, u);
     609                 :          0 :       this_latency = insn_latency (move->insn, this_insn);
     610                 :          0 :       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
     611                 :            :         this_distance = -1;
     612                 :            :       else
     613                 :            :         this_distance = 0;
     614                 :          0 :       this_time = SCHED_TIME (u) + this_distance * ii;
     615                 :          0 :       this_start = this_time - ii;
     616                 :          0 :       this_end = this_time - this_latency;
     617                 :          0 :       if (dump_file)
     618                 :          0 :         fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
     619                 :          0 :                  this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
     620                 :          0 :                  this_latency, this_distance, INSN_UID (this_insn));
     621                 :            : 
     622                 :          0 :       if (start < this_start)
     623                 :            :         start = this_start;
     624                 :          0 :       if (end > this_end)
     625                 :            :         end = this_end;
     626                 :            :     }
     627                 :            : 
     628                 :          0 :   if (dump_file)
     629                 :            :     {
     630                 :          0 :       fprintf (dump_file, "----------- ----------- -----\n");
     631                 :          0 :       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
     632                 :            :     }
     633                 :            : 
     634                 :          0 :   bitmap_clear (must_follow);
     635                 :          0 :   bitmap_set_bit (must_follow, move->def);
     636                 :            : 
     637                 :          0 :   start = MAX (start, end - (ii - 1));
     638                 :          0 :   for (c = end; c >= start; c--)
     639                 :            :     {
     640                 :          0 :       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
     641                 :            :                                          move->uses, must_follow);
     642                 :          0 :       if (psi)
     643                 :            :         {
     644                 :          0 :           update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
     645                 :          0 :           if (dump_file)
     646                 :          0 :             fprintf (dump_file, "\nScheduled register move INSN %d at"
     647                 :          0 :                      " time %d, row %d\n\n", INSN_UID (move->insn), c,
     648                 :          0 :                      SCHED_ROW (i_reg_move));
     649                 :          0 :           return true;
     650                 :            :         }
     651                 :            :     }
     652                 :            : 
     653                 :          0 :   if (dump_file)
     654                 :          0 :     fprintf (dump_file, "\nNo available slot\n\n");
     655                 :            : 
     656                 :            :   return false;
     657                 :            : }
     658                 :            : 
     659                 :            : /*
     660                 :            :    Breaking intra-loop register anti-dependences:
     661                 :            :    Each intra-loop register anti-dependence implies a cross-iteration true
     662                 :            :    dependence of distance 1. Therefore, we can remove such false dependencies
     663                 :            :    and figure out if the partial schedule broke them by checking if (for a
     664                 :            :    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
     665                 :            :    if so generate a register move.   The number of such moves is equal to:
     666                 :            :               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
     667                 :            :    nreg_moves = ----------------------------------- + 1 - {   dependence.
     668                 :            :                             ii                          { 1 if not.
     669                 :            : */
     670                 :            : static bool
     671                 :          0 : schedule_reg_moves (partial_schedule_ptr ps)
     672                 :            : {
     673                 :          0 :   ddg_ptr g = ps->g;
     674                 :          0 :   int ii = ps->ii;
     675                 :          0 :   int i;
     676                 :            : 
     677                 :          0 :   for (i = 0; i < g->num_nodes; i++)
     678                 :            :     {
     679                 :          0 :       ddg_node_ptr u = &g->nodes[i];
     680                 :          0 :       ddg_edge_ptr e;
     681                 :          0 :       int nreg_moves = 0, i_reg_move;
     682                 :          0 :       rtx prev_reg, old_reg;
     683                 :          0 :       int first_move;
     684                 :          0 :       int distances[2];
     685                 :          0 :       sbitmap distance1_uses;
     686                 :          0 :       rtx set = single_set (u->insn);
     687                 :            :       
     688                 :            :       /* Skip instructions that do not set a register.  */
     689                 :          0 :       if (set && !REG_P (SET_DEST (set)))
     690                 :          0 :         continue;
     691                 :            : 
     692                 :            :       /* Compute the number of reg_moves needed for u, by looking at life
     693                 :            :          ranges started at u (excluding self-loops).  */
     694                 :          0 :       distances[0] = distances[1] = false;
     695                 :          0 :       for (e = u->out; e; e = e->next_out)
     696                 :          0 :         if (e->type == TRUE_DEP && e->dest != e->src)
     697                 :            :           {
     698                 :          0 :             int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
     699                 :          0 :                                 - SCHED_TIME (e->src->cuid)) / ii;
     700                 :            : 
     701                 :          0 :             if (e->distance == 1)
     702                 :          0 :               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
     703                 :          0 :                               - SCHED_TIME (e->src->cuid) + ii) / ii;
     704                 :            : 
     705                 :            :             /* If dest precedes src in the schedule of the kernel, then dest
     706                 :            :                will read before src writes and we can save one reg_copy.  */
     707                 :          0 :             if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
     708                 :          0 :                 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
     709                 :          0 :               nreg_moves4e--;
     710                 :            : 
     711                 :          0 :             if (nreg_moves4e >= 1)
     712                 :            :               {
     713                 :            :                 /* !single_set instructions are not supported yet and
     714                 :            :                    thus we do not except to encounter them in the loop
     715                 :            :                    except from the doloop part.  For the latter case
     716                 :            :                    we assume no regmoves are generated as the doloop
     717                 :            :                    instructions are tied to the branch with an edge.  */
     718                 :          0 :                 gcc_assert (set);
     719                 :            :                 /* If the instruction contains auto-inc register then
     720                 :            :                    validate that the regmov is being generated for the
     721                 :            :                    target regsiter rather then the inc'ed register.     */
     722                 :          0 :                 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
     723                 :            :               }
     724                 :            :             
     725                 :          0 :             if (nreg_moves4e)
     726                 :            :               {
     727                 :          0 :                 gcc_assert (e->distance < 2);
     728                 :          0 :                 distances[e->distance] = true;
     729                 :            :               }
     730                 :          0 :             nreg_moves = MAX (nreg_moves, nreg_moves4e);
     731                 :            :           }
     732                 :            : 
     733                 :          0 :       if (nreg_moves == 0)
     734                 :          0 :         continue;
     735                 :            : 
     736                 :            :       /* Create NREG_MOVES register moves.  */
     737                 :          0 :       first_move = ps->reg_moves.length ();
     738                 :          0 :       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
     739                 :          0 :       extend_node_sched_params (ps);
     740                 :            : 
     741                 :            :       /* Record the moves associated with this node.  */
     742                 :          0 :       first_move += ps->g->num_nodes;
     743                 :            : 
     744                 :            :       /* Generate each move.  */
     745                 :          0 :       old_reg = prev_reg = SET_DEST (set);
     746                 :          0 :       if (HARD_REGISTER_P (old_reg))
     747                 :          0 :         return false;
     748                 :            : 
     749                 :          0 :       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
     750                 :            :         {
     751                 :          0 :           ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
     752                 :            : 
     753                 :          0 :           move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
     754                 :          0 :           move->uses = sbitmap_alloc (first_move + nreg_moves);
     755                 :          0 :           move->old_reg = old_reg;
     756                 :          0 :           move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
     757                 :          0 :           move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
     758                 :          0 :           move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
     759                 :          0 :           bitmap_clear (move->uses);
     760                 :            : 
     761                 :          0 :           prev_reg = move->new_reg;
     762                 :            :         }
     763                 :            : 
     764                 :          0 :       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
     765                 :            : 
     766                 :          0 :       if (distance1_uses)
     767                 :          0 :         bitmap_clear (distance1_uses);
     768                 :            : 
     769                 :            :       /* Every use of the register defined by node may require a different
     770                 :            :          copy of this register, depending on the time the use is scheduled.
     771                 :            :          Record which uses require which move results.  */
     772                 :          0 :       for (e = u->out; e; e = e->next_out)
     773                 :          0 :         if (e->type == TRUE_DEP && e->dest != e->src)
     774                 :            :           {
     775                 :          0 :             int dest_copy = (SCHED_TIME (e->dest->cuid)
     776                 :          0 :                              - SCHED_TIME (e->src->cuid)) / ii;
     777                 :            : 
     778                 :          0 :             if (e->distance == 1)
     779                 :          0 :               dest_copy = (SCHED_TIME (e->dest->cuid)
     780                 :          0 :                            - SCHED_TIME (e->src->cuid) + ii) / ii;
     781                 :            : 
     782                 :          0 :             if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
     783                 :          0 :                 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
     784                 :          0 :               dest_copy--;
     785                 :            : 
     786                 :          0 :             if (dest_copy)
     787                 :            :               {
     788                 :          0 :                 ps_reg_move_info *move;
     789                 :            : 
     790                 :          0 :                 move = ps_reg_move (ps, first_move + dest_copy - 1);
     791                 :          0 :                 bitmap_set_bit (move->uses, e->dest->cuid);
     792                 :          0 :                 if (e->distance == 1)
     793                 :          0 :                   bitmap_set_bit (distance1_uses, e->dest->cuid);
     794                 :            :               }
     795                 :            :           }
     796                 :            : 
     797                 :          0 :       auto_sbitmap must_follow (first_move + nreg_moves);
     798                 :          0 :       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
     799                 :          0 :         if (!schedule_reg_move (ps, first_move + i_reg_move,
     800                 :            :                                 distance1_uses, must_follow))
     801                 :            :           break;
     802                 :          0 :       if (distance1_uses)
     803                 :          0 :         sbitmap_free (distance1_uses);
     804                 :          0 :       if (i_reg_move < nreg_moves)
     805                 :          0 :         return false;
     806                 :            :     }
     807                 :            :   return true;
     808                 :            : }
     809                 :            : 
     810                 :            : /* Emit the moves associated with PS.  Apply the substitutions
     811                 :            :    associated with them.  */
     812                 :            : static void
     813                 :          0 : apply_reg_moves (partial_schedule_ptr ps)
     814                 :            : {
     815                 :          0 :   ps_reg_move_info *move;
     816                 :          0 :   int i;
     817                 :            : 
     818                 :          0 :   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
     819                 :            :     {
     820                 :          0 :       unsigned int i_use;
     821                 :          0 :       sbitmap_iterator sbi;
     822                 :            : 
     823                 :          0 :       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
     824                 :            :         {
     825                 :          0 :           replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
     826                 :          0 :           df_insn_rescan (ps->g->nodes[i_use].insn);
     827                 :            :         }
     828                 :            :     }
     829                 :          0 : }
     830                 :            : 
     831                 :            : /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
     832                 :            :    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
     833                 :            :    will move to cycle zero.  */
     834                 :            : static void
     835                 :          0 : reset_sched_times (partial_schedule_ptr ps, int amount)
     836                 :            : {
     837                 :          0 :   int row;
     838                 :          0 :   int ii = ps->ii;
     839                 :          0 :   ps_insn_ptr crr_insn;
     840                 :            : 
     841                 :          0 :   for (row = 0; row < ii; row++)
     842                 :          0 :     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
     843                 :            :       {
     844                 :          0 :         int u = crr_insn->id;
     845                 :          0 :         int normalized_time = SCHED_TIME (u) - amount;
     846                 :          0 :         int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
     847                 :            : 
     848                 :          0 :         if (dump_file)
     849                 :            :           {
     850                 :            :             /* Print the scheduling times after the rotation.  */
     851                 :          0 :             rtx_insn *insn = ps_rtl_insn (ps, u);
     852                 :            : 
     853                 :          0 :             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
     854                 :            :                      "crr_insn->cycle=%d, min_cycle=%d", u,
     855                 :          0 :                      INSN_UID (insn), normalized_time, new_min_cycle);
     856                 :          0 :             if (JUMP_P (insn))
     857                 :          0 :               fprintf (dump_file, " (branch)");
     858                 :          0 :             fprintf (dump_file, "\n");
     859                 :            :           }
     860                 :            :         
     861                 :          0 :         gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
     862                 :          0 :         gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
     863                 :            : 
     864                 :          0 :         crr_insn->cycle = normalized_time;
     865                 :          0 :         update_node_sched_params (u, ii, normalized_time, new_min_cycle);
     866                 :            :       }
     867                 :          0 : }
     868                 :            :  
     869                 :            : /* Permute the insns according to their order in PS, from row 0 to
     870                 :            :    row ii-1, and position them right before LAST.  This schedules
     871                 :            :    the insns of the loop kernel.  */
     872                 :            : static void
     873                 :          0 : permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
     874                 :            : {
     875                 :          0 :   int ii = ps->ii;
     876                 :          0 :   int row;
     877                 :          0 :   ps_insn_ptr ps_ij;
     878                 :            : 
     879                 :          0 :   for (row = 0; row < ii ; row++)
     880                 :          0 :     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
     881                 :            :       {
     882                 :          0 :         rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
     883                 :            : 
     884                 :          0 :         if (PREV_INSN (last) != insn)
     885                 :            :           {
     886                 :          0 :             if (ps_ij->id < ps->g->num_nodes)
     887                 :          0 :               reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
     888                 :            :                                   PREV_INSN (last));
     889                 :            :             else
     890                 :          0 :               add_insn_before (insn, last, NULL);
     891                 :            :           }
     892                 :            :       }
     893                 :          0 : }
     894                 :            : 
     895                 :            : /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
     896                 :            :    respectively only if cycle C falls on the border of the scheduling
     897                 :            :    window boundaries marked by START and END cycles.  STEP is the
     898                 :            :    direction of the window.  */
     899                 :            : static inline void
     900                 :          0 : set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
     901                 :            :                          sbitmap *tmp_precede, sbitmap must_precede, int c,
     902                 :            :                          int start, int end, int step)
     903                 :            : {
     904                 :          0 :   *tmp_precede = NULL;
     905                 :          0 :   *tmp_follow = NULL;
     906                 :            : 
     907                 :          0 :   if (c == start)
     908                 :            :     {
     909                 :          0 :       if (step == 1)
     910                 :            :         *tmp_precede = must_precede;
     911                 :            :       else                      /* step == -1.  */
     912                 :          0 :         *tmp_follow = must_follow;
     913                 :            :     }
     914                 :          0 :   if (c == end - step)
     915                 :            :     {
     916                 :          0 :       if (step == 1)
     917                 :            :         *tmp_follow = must_follow;
     918                 :            :       else                      /* step == -1.  */
     919                 :          0 :         *tmp_precede = must_precede;
     920                 :            :     }
     921                 :            : 
     922                 :            : }
     923                 :            : 
     924                 :            : /* Return True if the branch can be moved to row ii-1 while
     925                 :            :    normalizing the partial schedule PS to start from cycle zero and thus
     926                 :            :    optimize the SC.  Otherwise return False.  */
     927                 :            : static bool
     928                 :          0 : optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
     929                 :            : {
     930                 :          0 :   int amount = PS_MIN_CYCLE (ps);
     931                 :          0 :   int start, end, step;
     932                 :          0 :   int ii = ps->ii;
     933                 :          0 :   bool ok = false;
     934                 :          0 :   int stage_count, stage_count_curr;
     935                 :            : 
     936                 :            :   /* Compare the SC after normalization and SC after bringing the branch
     937                 :            :      to row ii-1.  If they are equal just bail out.  */
     938                 :          0 :   stage_count = calculate_stage_count (ps, amount);
     939                 :          0 :   stage_count_curr =
     940                 :          0 :     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
     941                 :            : 
     942                 :          0 :   if (stage_count == stage_count_curr)
     943                 :            :     {
     944                 :          0 :       if (dump_file)
     945                 :          0 :         fprintf (dump_file, "SMS SC already optimized.\n");
     946                 :            : 
     947                 :          0 :       return false;
     948                 :            :     }
     949                 :            : 
     950                 :          0 :   if (dump_file)
     951                 :            :     {
     952                 :          0 :       fprintf (dump_file, "SMS Trying to optimize branch location\n");
     953                 :          0 :       fprintf (dump_file, "SMS partial schedule before trial:\n");
     954                 :          0 :       print_partial_schedule (ps, dump_file);
     955                 :            :     }
     956                 :            : 
     957                 :            :   /* First, normalize the partial scheduling.  */
     958                 :          0 :   reset_sched_times (ps, amount);
     959                 :          0 :   rotate_partial_schedule (ps, amount);
     960                 :          0 :   if (dump_file)
     961                 :            :     {
     962                 :          0 :       fprintf (dump_file,
     963                 :            :                "SMS partial schedule after normalization (ii, %d, SC %d):\n",
     964                 :            :                ii, stage_count);
     965                 :          0 :       print_partial_schedule (ps, dump_file);
     966                 :            :     }
     967                 :            : 
     968                 :          0 :   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
     969                 :            :     return true;
     970                 :            : 
     971                 :          0 :   auto_sbitmap sched_nodes (g->num_nodes);
     972                 :          0 :   bitmap_ones (sched_nodes);
     973                 :            : 
     974                 :            :   /* Calculate the new placement of the branch.  It should be in row
     975                 :            :      ii-1 and fall into it's scheduling window.  */
     976                 :          0 :   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
     977                 :            :                         &step, &end) == 0)
     978                 :            :     {
     979                 :          0 :       bool success;
     980                 :          0 :       ps_insn_ptr next_ps_i;
     981                 :          0 :       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
     982                 :          0 :       int row = SMODULO (branch_cycle, ps->ii);
     983                 :          0 :       int num_splits = 0;
     984                 :          0 :       sbitmap tmp_precede, tmp_follow;
     985                 :          0 :       int min_cycle, c;
     986                 :            : 
     987                 :          0 :       if (dump_file)
     988                 :          0 :         fprintf (dump_file, "\nTrying to schedule node %d "
     989                 :            :                  "INSN = %d  in (%d .. %d) step %d\n",
     990                 :            :                  g->closing_branch->cuid,
     991                 :          0 :                  (INSN_UID (g->closing_branch->insn)), start, end, step);
     992                 :            : 
     993                 :          0 :       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
     994                 :          0 :       if (step == 1)
     995                 :            :         {
     996                 :          0 :           c = start + ii - SMODULO (start, ii) - 1;
     997                 :          0 :           gcc_assert (c >= start);
     998                 :          0 :           if (c >= end)
     999                 :            :             {
    1000                 :          0 :               if (dump_file)
    1001                 :          0 :                 fprintf (dump_file,
    1002                 :            :                          "SMS failed to schedule branch at cycle: %d\n", c);
    1003                 :          0 :               return false;
    1004                 :            :             }
    1005                 :            :         }
    1006                 :            :       else
    1007                 :            :         {
    1008                 :          0 :           c = start - SMODULO (start, ii) - 1;
    1009                 :          0 :           gcc_assert (c <= start);
    1010                 :            : 
    1011                 :          0 :           if (c <= end)
    1012                 :            :             {
    1013                 :          0 :               if (dump_file)
    1014                 :          0 :                 fprintf (dump_file,
    1015                 :            :                          "SMS failed to schedule branch at cycle: %d\n", c);
    1016                 :          0 :               return false;
    1017                 :            :             }
    1018                 :            :         }
    1019                 :            : 
    1020                 :          0 :       auto_sbitmap must_precede (g->num_nodes);
    1021                 :          0 :       auto_sbitmap must_follow (g->num_nodes);
    1022                 :            : 
    1023                 :            :       /* Try to schedule the branch is it's new cycle.  */
    1024                 :          0 :       calculate_must_precede_follow (g->closing_branch, start, end,
    1025                 :            :                                      step, ii, sched_nodes,
    1026                 :            :                                      must_precede, must_follow);
    1027                 :            : 
    1028                 :          0 :       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
    1029                 :            :                                must_precede, c, start, end, step);
    1030                 :            : 
    1031                 :            :       /* Find the element in the partial schedule related to the closing
    1032                 :            :          branch so we can remove it from it's current cycle.  */
    1033                 :          0 :       for (next_ps_i = ps->rows[row];
    1034                 :          0 :            next_ps_i; next_ps_i = next_ps_i->next_in_row)
    1035                 :          0 :         if (next_ps_i->id == g->closing_branch->cuid)
    1036                 :            :           break;
    1037                 :            : 
    1038                 :          0 :       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
    1039                 :          0 :       remove_node_from_ps (ps, next_ps_i);
    1040                 :          0 :       success =
    1041                 :          0 :         try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
    1042                 :            :                                       sched_nodes, &num_splits,
    1043                 :            :                                       tmp_precede, tmp_follow);
    1044                 :          0 :       gcc_assert (num_splits == 0);
    1045                 :          0 :       if (!success)
    1046                 :            :         {
    1047                 :          0 :           if (dump_file)
    1048                 :          0 :             fprintf (dump_file,
    1049                 :            :                      "SMS failed to schedule branch at cycle: %d, "
    1050                 :            :                      "bringing it back to cycle %d\n", c, branch_cycle);
    1051                 :            : 
    1052                 :            :           /* The branch was failed to be placed in row ii - 1.
    1053                 :            :              Put it back in it's original place in the partial
    1054                 :            :              schedualing.  */
    1055                 :          0 :           set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
    1056                 :            :                                    must_precede, branch_cycle, start, end,
    1057                 :            :                                    step);
    1058                 :          0 :           success =
    1059                 :          0 :             try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
    1060                 :            :                                           branch_cycle, sched_nodes,
    1061                 :            :                                           &num_splits, tmp_precede,
    1062                 :            :                                           tmp_follow);
    1063                 :          0 :           gcc_assert (success && (num_splits == 0));
    1064                 :            :           ok = false;
    1065                 :            :         }
    1066                 :            :       else
    1067                 :            :         {
    1068                 :            :           /* The branch is placed in row ii - 1.  */
    1069                 :          0 :           if (dump_file)
    1070                 :          0 :             fprintf (dump_file,
    1071                 :            :                      "SMS success in moving branch to cycle %d\n", c);
    1072                 :            : 
    1073                 :          0 :           update_node_sched_params (g->closing_branch->cuid, ii, c,
    1074                 :            :                                     PS_MIN_CYCLE (ps));
    1075                 :          0 :           ok = true;
    1076                 :            :         }
    1077                 :            : 
    1078                 :            :       /* This might have been added to a new first stage.  */
    1079                 :          0 :       if (PS_MIN_CYCLE (ps) < min_cycle)
    1080                 :          0 :         reset_sched_times (ps, 0);
    1081                 :            :     }
    1082                 :            : 
    1083                 :            :   return ok;
    1084                 :            : }
    1085                 :            : 
    1086                 :            : static void
    1087                 :          0 : duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
    1088                 :            :                            int to_stage, rtx count_reg)
    1089                 :            : {
    1090                 :          0 :   int row;
    1091                 :          0 :   ps_insn_ptr ps_ij;
    1092                 :            : 
    1093                 :          0 :   for (row = 0; row < ps->ii; row++)
    1094                 :          0 :     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
    1095                 :            :       {
    1096                 :          0 :         int u = ps_ij->id;
    1097                 :          0 :         int first_u, last_u;
    1098                 :          0 :         rtx_insn *u_insn;
    1099                 :            : 
    1100                 :            :         /* Do not duplicate any insn which refers to count_reg as it
    1101                 :            :            belongs to the control part.
    1102                 :            :            The closing branch is scheduled as well and thus should
    1103                 :            :            be ignored.
    1104                 :            :            TODO: This should be done by analyzing the control part of
    1105                 :            :            the loop.  */
    1106                 :          0 :         u_insn = ps_rtl_insn (ps, u);
    1107                 :          0 :         if (reg_mentioned_p (count_reg, u_insn)
    1108                 :          0 :             || JUMP_P (u_insn))
    1109                 :          0 :           continue;
    1110                 :            : 
    1111                 :          0 :         first_u = SCHED_STAGE (u);
    1112                 :          0 :         last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
    1113                 :          0 :         if (from_stage <= last_u && to_stage >= first_u)
    1114                 :            :           {
    1115                 :          0 :             if (u < ps->g->num_nodes)
    1116                 :          0 :               duplicate_insn_chain (ps_first_note (ps, u), u_insn);
    1117                 :            :             else
    1118                 :          0 :               emit_insn (copy_rtx (PATTERN (u_insn)));
    1119                 :            :           }
    1120                 :            :       }
    1121                 :          0 : }
    1122                 :            : 
    1123                 :            : 
    1124                 :            : /* Generate the instructions (including reg_moves) for prolog & epilog.  */
    1125                 :            : static void
    1126                 :          0 : generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
    1127                 :            :                         rtx count_reg, rtx count_init)
    1128                 :            : {
    1129                 :          0 :   int i;
    1130                 :          0 :   int last_stage = PS_STAGE_COUNT (ps) - 1;
    1131                 :          0 :   edge e;
    1132                 :            : 
    1133                 :            :   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
    1134                 :          0 :   start_sequence ();
    1135                 :            : 
    1136                 :          0 :   if (!count_init)
    1137                 :            :     {
    1138                 :            :       /* Generate instructions at the beginning of the prolog to
    1139                 :            :          adjust the loop count by STAGE_COUNT.  If loop count is constant
    1140                 :            :          (count_init), this constant is adjusted by STAGE_COUNT in
    1141                 :            :          generate_prolog_epilog function.  */
    1142                 :          0 :       rtx sub_reg = NULL_RTX;
    1143                 :            : 
    1144                 :          0 :       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
    1145                 :            :                                      gen_int_mode (last_stage,
    1146                 :          0 :                                                    GET_MODE (count_reg)),
    1147                 :            :                                      count_reg, 1, OPTAB_DIRECT);
    1148                 :          0 :       gcc_assert (REG_P (sub_reg));
    1149                 :          0 :       if (REGNO (sub_reg) != REGNO (count_reg))
    1150                 :          0 :         emit_move_insn (count_reg, sub_reg);
    1151                 :            :     }
    1152                 :            : 
    1153                 :          0 :   for (i = 0; i < last_stage; i++)
    1154                 :          0 :     duplicate_insns_of_cycles (ps, 0, i, count_reg);
    1155                 :            : 
    1156                 :            :   /* Put the prolog on the entry edge.  */
    1157                 :          0 :   e = loop_preheader_edge (loop);
    1158                 :          0 :   split_edge_and_insert (e, get_insns ());
    1159                 :          0 :   if (!flag_resched_modulo_sched)
    1160                 :          0 :     e->dest->flags |= BB_DISABLE_SCHEDULE;
    1161                 :            : 
    1162                 :          0 :   end_sequence ();
    1163                 :            : 
    1164                 :            :   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
    1165                 :          0 :   start_sequence ();
    1166                 :            : 
    1167                 :          0 :   for (i = 0; i < last_stage; i++)
    1168                 :          0 :     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
    1169                 :            : 
    1170                 :            :   /* Put the epilogue on the exit edge.  */
    1171                 :          0 :   gcc_assert (single_exit (loop));
    1172                 :          0 :   e = single_exit (loop);
    1173                 :          0 :   split_edge_and_insert (e, get_insns ());
    1174                 :          0 :   if (!flag_resched_modulo_sched)
    1175                 :          0 :     e->dest->flags |= BB_DISABLE_SCHEDULE;
    1176                 :            : 
    1177                 :          0 :   end_sequence ();
    1178                 :          0 : }
    1179                 :            : 
    1180                 :            : /* Mark LOOP as software pipelined so the later
    1181                 :            :    scheduling passes don't touch it.  */
    1182                 :            : static void
    1183                 :          0 : mark_loop_unsched (class loop *loop)
    1184                 :            : {
    1185                 :          0 :   unsigned i;
    1186                 :          0 :   basic_block *bbs = get_loop_body (loop);
    1187                 :            : 
    1188                 :          0 :   for (i = 0; i < loop->num_nodes; i++)
    1189                 :          0 :     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
    1190                 :            : 
    1191                 :          0 :   free (bbs);
    1192                 :          0 : }
    1193                 :            : 
    1194                 :            : /* Return true if all the BBs of the loop are empty except the
    1195                 :            :    loop header.  */
    1196                 :            : static bool
    1197                 :        251 : loop_single_full_bb_p (class loop *loop)
    1198                 :            : {
    1199                 :        251 :   unsigned i;
    1200                 :        251 :   basic_block *bbs = get_loop_body (loop);
    1201                 :            : 
    1202                 :        503 :   for (i = 0; i < loop->num_nodes ; i++)
    1203                 :            :     {
    1204                 :        310 :       rtx_insn *head, *tail;
    1205                 :        310 :       bool empty_bb = true;
    1206                 :            : 
    1207                 :        310 :       if (bbs[i] == loop->header)
    1208                 :        251 :         continue;
    1209                 :            : 
    1210                 :            :       /* Make sure that basic blocks other than the header
    1211                 :            :          have only notes labels or jumps.  */
    1212                 :         59 :       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
    1213                 :         66 :       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
    1214                 :            :         {
    1215                 :         65 :           if (NOTE_P (head) || LABEL_P (head)
    1216                 :         64 :               || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
    1217                 :          7 :             continue;
    1218                 :            :           empty_bb = false;
    1219                 :            :           break;
    1220                 :            :         }
    1221                 :            : 
    1222                 :         59 :       if (! empty_bb)
    1223                 :            :         {
    1224                 :         58 :           free (bbs);
    1225                 :         58 :           return false;
    1226                 :            :         }
    1227                 :            :     }
    1228                 :        193 :   free (bbs);
    1229                 :        193 :   return true;
    1230                 :            : }
    1231                 :            : 
    1232                 :            : /* Dump file:line from INSN's location info to dump_file.  */
    1233                 :            : 
    1234                 :            : static void
    1235                 :         22 : dump_insn_location (rtx_insn *insn)
    1236                 :            : {
    1237                 :         44 :   if (dump_file && INSN_HAS_LOCATION (insn))
    1238                 :            :     {
    1239                 :         19 :       expanded_location xloc = insn_location (insn);
    1240                 :         19 :       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
    1241                 :            :     }
    1242                 :         22 : }
    1243                 :            : 
    1244                 :            : /* A simple loop from SMS point of view; it is a loop that is composed of
    1245                 :            :    either a single basic block or two BBs - a header and a latch.  */
    1246                 :            : #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )               \
    1247                 :            :                                   && (EDGE_COUNT (loop->latch->preds) == 1) \
    1248                 :            :                                   && (EDGE_COUNT (loop->latch->succs) == 1))
    1249                 :            : 
    1250                 :            : /* Return true if the loop is in its canonical form and false if not.
    1251                 :            :    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
    1252                 :            : static bool
    1253                 :        197 : loop_canon_p (class loop *loop)
    1254                 :            : {
    1255                 :            : 
    1256                 :        364 :   if (loop->inner || !loop_outer (loop))
    1257                 :            :   {
    1258                 :         30 :     if (dump_file)
    1259                 :          3 :       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
    1260                 :         30 :     return false;
    1261                 :            :   }
    1262                 :            : 
    1263                 :        167 :   if (!single_exit (loop))
    1264                 :            :     {
    1265                 :         12 :       if (dump_file)
    1266                 :            :         {
    1267                 :          3 :           rtx_insn *insn = BB_END (loop->header);
    1268                 :            : 
    1269                 :          3 :           fprintf (dump_file, "SMS loop many exits");
    1270                 :          3 :           dump_insn_location (insn);
    1271                 :          3 :           fprintf (dump_file, "\n");
    1272                 :            :         }
    1273                 :         12 :       return false;
    1274                 :            :     }
    1275                 :            : 
    1276                 :        155 :   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
    1277                 :            :     {
    1278                 :          0 :       if (dump_file)
    1279                 :            :         {
    1280                 :          0 :           rtx_insn *insn = BB_END (loop->header);
    1281                 :            : 
    1282                 :          0 :           fprintf (dump_file, "SMS loop many BBs.");
    1283                 :          0 :           dump_insn_location (insn);
    1284                 :          0 :           fprintf (dump_file, "\n");
    1285                 :            :         }
    1286                 :          0 :       return false;
    1287                 :            :     }
    1288                 :            : 
    1289                 :            :     return true;
    1290                 :            : }
    1291                 :            : 
    1292                 :            : /* If there are more than one entry for the loop,
    1293                 :            :    make it one by splitting the first entry edge and
    1294                 :            :    redirecting the others to the new BB.  */
    1295                 :            : static void
    1296                 :          0 : canon_loop (class loop *loop)
    1297                 :            : {
    1298                 :          0 :   edge e;
    1299                 :          0 :   edge_iterator i;
    1300                 :            : 
    1301                 :            :   /* Avoid annoying special cases of edges going to exit
    1302                 :            :      block.  */
    1303                 :          0 :   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
    1304                 :          0 :     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
    1305                 :          0 :       split_edge (e);
    1306                 :            : 
    1307                 :          0 :   if (loop->latch == loop->header
    1308                 :          0 :       || EDGE_COUNT (loop->latch->succs) > 1)
    1309                 :            :     {
    1310                 :          0 :       FOR_EACH_EDGE (e, i, loop->header->preds)
    1311                 :          0 :         if (e->src == loop->latch)
    1312                 :            :           break;
    1313                 :          0 :       split_edge (e);
    1314                 :            :     }
    1315                 :          0 : }
    1316                 :            : 
    1317                 :            : /* Setup infos.  */
    1318                 :            : static void
    1319                 :        140 : setup_sched_infos (void)
    1320                 :            : {
    1321                 :        140 :   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
    1322                 :            :           sizeof (sms_common_sched_info));
    1323                 :        140 :   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
    1324                 :        140 :   common_sched_info = &sms_common_sched_info;
    1325                 :            : 
    1326                 :        140 :   sched_deps_info = &sms_sched_deps_info;
    1327                 :        140 :   current_sched_info = &sms_sched_info;
    1328                 :          0 : }
    1329                 :            : 
    1330                 :            : /* Probability in % that the sms-ed loop rolls enough so that optimized
    1331                 :            :    version may be entered.  Just a guess.  */
    1332                 :            : #define PROB_SMS_ENOUGH_ITERATIONS 80
    1333                 :            : 
    1334                 :            : /* Main entry point, perform SMS scheduling on the loops of the function
    1335                 :            :    that consist of single basic blocks.  */
    1336                 :            : static void
    1337                 :        162 : sms_schedule (void)
    1338                 :            : {
    1339                 :        162 :   rtx_insn *insn;
    1340                 :        162 :   ddg_ptr *g_arr, g;
    1341                 :        162 :   int * node_order;
    1342                 :        162 :   int maxii, max_asap;
    1343                 :        162 :   partial_schedule_ptr ps;
    1344                 :        162 :   basic_block bb = NULL;
    1345                 :        162 :   class loop *loop;
    1346                 :        162 :   basic_block condition_bb = NULL;
    1347                 :        162 :   edge latch_edge;
    1348                 :        162 :   HOST_WIDE_INT trip_count, max_trip_count;
    1349                 :            : 
    1350                 :        162 :   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
    1351                 :            :                        | LOOPS_HAVE_RECORDED_EXITS);
    1352                 :        324 :   if (number_of_loops (cfun) <= 1)
    1353                 :            :     {
    1354                 :         22 :       loop_optimizer_finalize ();
    1355                 :         22 :       return;  /* There are no loops to schedule.  */
    1356                 :            :     }
    1357                 :            : 
    1358                 :            :   /* Initialize issue_rate.  */
    1359                 :        140 :   if (targetm.sched.issue_rate)
    1360                 :            :     {
    1361                 :        140 :       int temp = reload_completed;
    1362                 :            : 
    1363                 :        140 :       reload_completed = 1;
    1364                 :        140 :       issue_rate = targetm.sched.issue_rate ();
    1365                 :        140 :       reload_completed = temp;
    1366                 :            :     }
    1367                 :            :   else
    1368                 :          0 :     issue_rate = 1;
    1369                 :            : 
    1370                 :            :   /* Initialize the scheduler.  */
    1371                 :        140 :   setup_sched_infos ();
    1372                 :        140 :   haifa_sched_init ();
    1373                 :            : 
    1374                 :            :   /* Allocate memory to hold the DDG array one entry for each loop.
    1375                 :            :      We use loop->num as index into this array.  */
    1376                 :        280 :   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
    1377                 :            : 
    1378                 :        140 :   if (dump_file)
    1379                 :            :   {
    1380                 :         12 :     fprintf (dump_file, "\n\nSMS analysis phase\n");
    1381                 :         12 :     fprintf (dump_file, "===================\n\n");
    1382                 :            :   }
    1383                 :            : 
    1384                 :            :   /* Build DDGs for all the relevant loops and hold them in G_ARR
    1385                 :            :      indexed by the loop index.  */
    1386                 :        337 :   FOR_EACH_LOOP (loop, 0)
    1387                 :            :     {
    1388                 :        197 :       rtx_insn *head, *tail;
    1389                 :        197 :       rtx count_reg;
    1390                 :            : 
    1391                 :            :       /* For debugging.  */
    1392                 :        197 :       if (dbg_cnt (sms_sched_loop) == false)
    1393                 :            :         {
    1394                 :          0 :           if (dump_file)
    1395                 :          0 :             fprintf (dump_file, "SMS reached max limit... \n");
    1396                 :            : 
    1397                 :          0 :           break;
    1398                 :            :         }
    1399                 :            : 
    1400                 :        197 :       if (dump_file)
    1401                 :            :         {
    1402                 :         19 :           rtx_insn *insn = BB_END (loop->header);
    1403                 :            : 
    1404                 :         19 :           fprintf (dump_file, "SMS loop num: %d", loop->num);
    1405                 :         19 :           dump_insn_location (insn);
    1406                 :         19 :           fprintf (dump_file, "\n");
    1407                 :            :         }
    1408                 :            : 
    1409                 :        197 :       if (! loop_canon_p (loop))
    1410                 :        197 :         continue;
    1411                 :            : 
    1412                 :        155 :       if (! loop_single_full_bb_p (loop))
    1413                 :            :       {
    1414                 :         58 :         if (dump_file)
    1415                 :          0 :           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
    1416                 :         58 :         continue;
    1417                 :            :       }
    1418                 :            : 
    1419                 :         97 :       bb = loop->header;
    1420                 :            : 
    1421                 :         97 :       get_ebb_head_tail (bb, bb, &head, &tail);
    1422                 :         97 :       latch_edge = loop_latch_edge (loop);
    1423                 :         97 :       gcc_assert (single_exit (loop));
    1424                 :         97 :       trip_count = get_estimated_loop_iterations_int (loop);
    1425                 :         97 :       max_trip_count = get_max_loop_iterations_int (loop);
    1426                 :            : 
    1427                 :            :       /* Perform SMS only on loops that their average count is above threshold.  */
    1428                 :            : 
    1429                 :         97 :       if ( latch_edge->count () > profile_count::zero ()
    1430                 :        291 :           && (latch_edge->count()
    1431                 :        194 :               < single_exit (loop)->count ().apply_scale
    1432                 :         97 :                                  (param_sms_loop_average_count_threshold, 1)))
    1433                 :            :         {
    1434                 :          0 :           if (dump_file)
    1435                 :            :             {
    1436                 :          0 :               dump_insn_location (tail);
    1437                 :          0 :               fprintf (dump_file, "\nSMS single-bb-loop\n");
    1438                 :          0 :               if (profile_info && flag_branch_probabilities)
    1439                 :            :                 {
    1440                 :          0 :                   fprintf (dump_file, "SMS loop-count ");
    1441                 :          0 :                   fprintf (dump_file, "%" PRId64,
    1442                 :          0 :                            (int64_t) bb->count.to_gcov_type ());
    1443                 :          0 :                   fprintf (dump_file, "\n");
    1444                 :          0 :                   fprintf (dump_file, "SMS trip-count ");
    1445                 :          0 :                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
    1446                 :            :                            (int64_t) trip_count, (int64_t) max_trip_count);
    1447                 :          0 :                   fprintf (dump_file, "\n");
    1448                 :            :                 }
    1449                 :            :             }
    1450                 :          0 :           continue;
    1451                 :            :         }
    1452                 :            : 
    1453                 :            :       /* Make sure this is a doloop.  */
    1454                 :         97 :       if ( !(count_reg = doloop_register_get (head, tail)))
    1455                 :            :       {
    1456                 :         97 :         if (dump_file)
    1457                 :         13 :           fprintf (dump_file, "SMS doloop_register_get failed\n");
    1458                 :         97 :         continue;
    1459                 :            :       }
    1460                 :            : 
    1461                 :            :       /* Don't handle BBs with calls or barriers
    1462                 :            :          or !single_set with the exception of instructions that include
    1463                 :            :          count_reg---these instructions are part of the control part
    1464                 :            :          that do-loop recognizes.
    1465                 :            :          ??? Should handle insns defining subregs.  */
    1466                 :          0 :      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
    1467                 :            :       {
    1468                 :          0 :          rtx set;
    1469                 :            : 
    1470                 :          0 :         if (CALL_P (insn)
    1471                 :          0 :             || BARRIER_P (insn)
    1472                 :          0 :             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
    1473                 :          0 :                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
    1474                 :          0 :                 && !reg_mentioned_p (count_reg, insn))
    1475                 :          0 :             || (INSN_P (insn) && (set = single_set (insn))
    1476                 :          0 :                 && GET_CODE (SET_DEST (set)) == SUBREG))
    1477                 :            :         break;
    1478                 :            :       }
    1479                 :            : 
    1480                 :          0 :       if (insn != NEXT_INSN (tail))
    1481                 :            :         {
    1482                 :          0 :           if (dump_file)
    1483                 :            :             {
    1484                 :          0 :               if (CALL_P (insn))
    1485                 :          0 :                 fprintf (dump_file, "SMS loop-with-call\n");
    1486                 :          0 :               else if (BARRIER_P (insn))
    1487                 :          0 :                 fprintf (dump_file, "SMS loop-with-barrier\n");
    1488                 :          0 :               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
    1489                 :          0 :                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
    1490                 :          0 :                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
    1491                 :            :               else
    1492                 :          0 :                fprintf (dump_file, "SMS loop with subreg in lhs\n");
    1493                 :          0 :               print_rtl_single (dump_file, insn);
    1494                 :            :             }
    1495                 :            : 
    1496                 :          0 :           continue;
    1497                 :            :         }
    1498                 :            : 
    1499                 :            :       /* Always schedule the closing branch with the rest of the
    1500                 :            :          instructions. The branch is rotated to be in row ii-1 at the
    1501                 :            :          end of the scheduling procedure to make sure it's the last
    1502                 :            :          instruction in the iteration.  */
    1503                 :          0 :       if (! (g = create_ddg (bb, 1)))
    1504                 :            :         {
    1505                 :          0 :           if (dump_file)
    1506                 :          0 :             fprintf (dump_file, "SMS create_ddg failed\n");
    1507                 :          0 :           continue;
    1508                 :            :         }
    1509                 :            : 
    1510                 :          0 :       g_arr[loop->num] = g;
    1511                 :          0 :       if (dump_file)
    1512                 :          0 :         fprintf (dump_file, "...OK\n");
    1513                 :            : 
    1514                 :            :     }
    1515                 :        140 :   if (dump_file)
    1516                 :            :   {
    1517                 :         12 :     fprintf (dump_file, "\nSMS transformation phase\n");
    1518                 :         12 :     fprintf (dump_file, "=========================\n\n");
    1519                 :            :   }
    1520                 :            : 
    1521                 :            :   /* We don't want to perform SMS on new loops - created by versioning.  */
    1522                 :        337 :   FOR_EACH_LOOP (loop, 0)
    1523                 :            :     {
    1524                 :        197 :       rtx_insn *head, *tail;
    1525                 :        197 :       rtx count_reg;
    1526                 :        197 :       rtx_insn *count_init;
    1527                 :        197 :       int mii, rec_mii, stage_count, min_cycle;
    1528                 :        197 :       int64_t loop_count = 0;
    1529                 :        197 :       bool opt_sc_p;
    1530                 :            : 
    1531                 :        197 :       if (! (g = g_arr[loop->num]))
    1532                 :        197 :         continue;
    1533                 :            : 
    1534                 :          0 :       if (dump_file)
    1535                 :            :         {
    1536                 :          0 :           rtx_insn *insn = BB_END (loop->header);
    1537                 :            : 
    1538                 :          0 :           fprintf (dump_file, "SMS loop num: %d", loop->num);
    1539                 :          0 :           dump_insn_location (insn);
    1540                 :          0 :           fprintf (dump_file, "\n");
    1541                 :            : 
    1542                 :          0 :           print_ddg (dump_file, g);
    1543                 :            :         }
    1544                 :            : 
    1545                 :          0 :       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
    1546                 :            : 
    1547                 :          0 :       latch_edge = loop_latch_edge (loop);
    1548                 :          0 :       gcc_assert (single_exit (loop));
    1549                 :          0 :       trip_count = get_estimated_loop_iterations_int (loop);
    1550                 :          0 :       max_trip_count = get_max_loop_iterations_int (loop);
    1551                 :            : 
    1552                 :          0 :       if (dump_file)
    1553                 :            :         {
    1554                 :          0 :           dump_insn_location (tail);
    1555                 :          0 :           fprintf (dump_file, "\nSMS single-bb-loop\n");
    1556                 :          0 :           if (profile_info && flag_branch_probabilities)
    1557                 :            :             {
    1558                 :          0 :               fprintf (dump_file, "SMS loop-count ");
    1559                 :          0 :               fprintf (dump_file, "%" PRId64,
    1560                 :          0 :                        (int64_t) bb->count.to_gcov_type ());
    1561                 :          0 :               fprintf (dump_file, "\n");
    1562                 :            :             }
    1563                 :          0 :           fprintf (dump_file, "SMS doloop\n");
    1564                 :          0 :           fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
    1565                 :          0 :           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
    1566                 :          0 :           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
    1567                 :            :         }
    1568                 :            : 
    1569                 :            : 
    1570                 :            :       /* In case of th loop have doloop register it gets special
    1571                 :            :          handling.  */
    1572                 :          0 :       count_init = NULL;
    1573                 :          0 :       if ((count_reg = doloop_register_get (head, tail)))
    1574                 :            :         {
    1575                 :          0 :           basic_block pre_header;
    1576                 :            : 
    1577                 :          0 :           pre_header = loop_preheader_edge (loop)->src;
    1578                 :          0 :           count_init = const_iteration_count (count_reg, pre_header,
    1579                 :            :                                               &loop_count);
    1580                 :            :         }
    1581                 :          0 :       gcc_assert (count_reg);
    1582                 :            : 
    1583                 :          0 :       if (dump_file && count_init)
    1584                 :            :         {
    1585                 :          0 :           fprintf (dump_file, "SMS const-doloop ");
    1586                 :          0 :           fprintf (dump_file, "%" PRId64,
    1587                 :            :                      loop_count);
    1588                 :          0 :           fprintf (dump_file, "\n");
    1589                 :            :         }
    1590                 :            : 
    1591                 :          0 :       node_order = XNEWVEC (int, g->num_nodes);
    1592                 :            : 
    1593                 :          0 :       mii = 1; /* Need to pass some estimate of mii.  */
    1594                 :          0 :       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
    1595                 :          0 :       mii = MAX (res_MII (g), rec_mii);
    1596                 :          0 :       mii = MAX (mii, 1);
    1597                 :          0 :       maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
    1598                 :            : 
    1599                 :          0 :       if (dump_file)
    1600                 :          0 :         fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
    1601                 :            :                  rec_mii, mii, maxii);
    1602                 :            : 
    1603                 :          0 :       for (;;)
    1604                 :            :         {
    1605                 :          0 :           set_node_sched_params (g);
    1606                 :            : 
    1607                 :          0 :           stage_count = 0;
    1608                 :          0 :           opt_sc_p = false;
    1609                 :          0 :           ps = sms_schedule_by_order (g, mii, maxii, node_order);
    1610                 :            : 
    1611                 :          0 :           if (ps)
    1612                 :            :             {
    1613                 :            :               /* Try to achieve optimized SC by normalizing the partial
    1614                 :            :                  schedule (having the cycles start from cycle zero).
    1615                 :            :                  The branch location must be placed in row ii-1 in the
    1616                 :            :                  final scheduling.      If failed, shift all instructions to
    1617                 :            :                  position the branch in row ii-1.  */
    1618                 :          0 :               opt_sc_p = optimize_sc (ps, g);
    1619                 :          0 :               if (opt_sc_p)
    1620                 :          0 :                 stage_count = calculate_stage_count (ps, 0);
    1621                 :            :               else
    1622                 :            :                 {
    1623                 :            :                   /* Bring the branch to cycle ii-1.  */
    1624                 :          0 :                   int amount = (SCHED_TIME (g->closing_branch->cuid)
    1625                 :          0 :                                 - (ps->ii - 1));
    1626                 :            : 
    1627                 :          0 :                   if (dump_file)
    1628                 :          0 :                     fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
    1629                 :            : 
    1630                 :          0 :                   stage_count = calculate_stage_count (ps, amount);
    1631                 :            :                 }
    1632                 :            : 
    1633                 :          0 :               gcc_assert (stage_count >= 1);
    1634                 :            :             }
    1635                 :            : 
    1636                 :            :           /* The default value of param_sms_min_sc is 2 as stage count of
    1637                 :            :              1 means that there is no interleaving between iterations thus
    1638                 :            :              we let the scheduling passes do the job in this case.  */
    1639                 :          0 :           if (stage_count < param_sms_min_sc
    1640                 :          0 :               || (count_init && (loop_count <= stage_count))
    1641                 :          0 :               || (max_trip_count >= 0 && max_trip_count <= stage_count)
    1642                 :          0 :               || (trip_count >= 0 && trip_count <= stage_count))
    1643                 :            :             {
    1644                 :          0 :               if (dump_file)
    1645                 :            :                 {
    1646                 :          0 :                   fprintf (dump_file, "SMS failed... \n");
    1647                 :          0 :                   fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
    1648                 :            :                            " loop-count=", stage_count);
    1649                 :          0 :                   fprintf (dump_file, "%" PRId64, loop_count);
    1650                 :          0 :                   fprintf (dump_file, ", trip-count=");
    1651                 :          0 :                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
    1652                 :            :                            (int64_t) trip_count, (int64_t) max_trip_count);
    1653                 :          0 :                   fprintf (dump_file, ")\n");
    1654                 :            :                 }
    1655                 :            :               break;
    1656                 :            :             }
    1657                 :            : 
    1658                 :          0 :           if (!opt_sc_p)
    1659                 :            :             {
    1660                 :            :               /* Rotate the partial schedule to have the branch in row ii-1.  */
    1661                 :          0 :               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
    1662                 :            :               
    1663                 :          0 :               reset_sched_times (ps, amount);
    1664                 :          0 :               rotate_partial_schedule (ps, amount);
    1665                 :            :             }
    1666                 :            :           
    1667                 :          0 :           set_columns_for_ps (ps);
    1668                 :            : 
    1669                 :          0 :           min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
    1670                 :          0 :           if (!schedule_reg_moves (ps))
    1671                 :            :             {
    1672                 :          0 :               mii = ps->ii + 1;
    1673                 :          0 :               free_partial_schedule (ps);
    1674                 :          0 :               continue;
    1675                 :            :             }
    1676                 :            : 
    1677                 :            :           /* Moves that handle incoming values might have been added
    1678                 :            :              to a new first stage.  Bump the stage count if so.
    1679                 :            : 
    1680                 :            :              ??? Perhaps we could consider rotating the schedule here
    1681                 :            :              instead?  */
    1682                 :          0 :           if (PS_MIN_CYCLE (ps) < min_cycle)
    1683                 :            :             {
    1684                 :          0 :               reset_sched_times (ps, 0);
    1685                 :          0 :               stage_count++;
    1686                 :            :             }
    1687                 :            : 
    1688                 :            :           /* The stage count should now be correct without rotation.  */
    1689                 :          0 :           gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
    1690                 :          0 :           PS_STAGE_COUNT (ps) = stage_count;
    1691                 :            : 
    1692                 :          0 :           canon_loop (loop);
    1693                 :            : 
    1694                 :          0 :           if (dump_file)
    1695                 :            :             {
    1696                 :          0 :               dump_insn_location (tail);
    1697                 :          0 :               fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
    1698                 :            :                        ps->ii, stage_count);
    1699                 :          0 :               print_partial_schedule (ps, dump_file);
    1700                 :            :             }
    1701                 :            :  
    1702                 :            :           /* case the BCT count is not known , Do loop-versioning */
    1703                 :          0 :           if (count_reg && ! count_init)
    1704                 :            :             {
    1705                 :          0 :               rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
    1706                 :            :                                          gen_int_mode (stage_count,
    1707                 :            :                                                        GET_MODE (count_reg)));
    1708                 :          0 :               profile_probability prob = profile_probability::guessed_always ()
    1709                 :          0 :                                 .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
    1710                 :            : 
    1711                 :          0 :               loop_version (loop, comp_rtx, &condition_bb,
    1712                 :            :                             prob, prob.invert (),
    1713                 :            :                             prob, prob.invert (), true);
    1714                 :            :              }
    1715                 :            : 
    1716                 :            :           /* Set new iteration count of loop kernel.  */
    1717                 :          0 :           if (count_reg && count_init)
    1718                 :          0 :             SET_SRC (single_set (count_init)) = GEN_INT (loop_count
    1719                 :            :                                                      - stage_count + 1);
    1720                 :            : 
    1721                 :            :           /* Now apply the scheduled kernel to the RTL of the loop.  */
    1722                 :          0 :           permute_partial_schedule (ps, g->closing_branch->first_note);
    1723                 :            : 
    1724                 :            :           /* Mark this loop as software pipelined so the later
    1725                 :            :              scheduling passes don't touch it.  */
    1726                 :          0 :           if (! flag_resched_modulo_sched)
    1727                 :          0 :             mark_loop_unsched (loop);
    1728                 :            :           
    1729                 :            :           /* The life-info is not valid any more.  */
    1730                 :          0 :           df_set_bb_dirty (g->bb);
    1731                 :            : 
    1732                 :          0 :           apply_reg_moves (ps);
    1733                 :          0 :           if (dump_file)
    1734                 :          0 :             print_node_sched_params (dump_file, g->num_nodes, ps);
    1735                 :            :           /* Generate prolog and epilog.  */
    1736                 :          0 :           generate_prolog_epilog (ps, loop, count_reg, count_init);
    1737                 :          0 :           break;
    1738                 :          0 :         }
    1739                 :            : 
    1740                 :          0 :       free_partial_schedule (ps);
    1741                 :          0 :       node_sched_param_vec.release ();
    1742                 :          0 :       free (node_order);
    1743                 :          0 :       free_ddg (g);
    1744                 :            :     }
    1745                 :            : 
    1746                 :        140 :   free (g_arr);
    1747                 :            : 
    1748                 :            :   /* Release scheduler data, needed until now because of DFA.  */
    1749                 :        140 :   haifa_sched_finish ();
    1750                 :        140 :   loop_optimizer_finalize ();
    1751                 :            : }
    1752                 :            : 
    1753                 :            : /* The SMS scheduling algorithm itself
    1754                 :            :    -----------------------------------
    1755                 :            :    Input: 'O' an ordered list of insns of a loop.
    1756                 :            :    Output: A scheduling of the loop - kernel, prolog, and epilogue.
    1757                 :            : 
    1758                 :            :    'Q' is the empty Set
    1759                 :            :    'PS' is the partial schedule; it holds the currently scheduled nodes with
    1760                 :            :         their cycle/slot.
    1761                 :            :    'PSP' previously scheduled predecessors.
    1762                 :            :    'PSS' previously scheduled successors.
    1763                 :            :    't(u)' the cycle where u is scheduled.
    1764                 :            :    'l(u)' is the latency of u.
    1765                 :            :    'd(v,u)' is the dependence distance from v to u.
    1766                 :            :    'ASAP(u)' the earliest time at which u could be scheduled as computed in
    1767                 :            :              the node ordering phase.
    1768                 :            :    'check_hardware_resources_conflicts(u, PS, c)'
    1769                 :            :                              run a trace around cycle/slot through DFA model
    1770                 :            :                              to check resource conflicts involving instruction u
    1771                 :            :                              at cycle c given the partial schedule PS.
    1772                 :            :    'add_to_partial_schedule_at_time(u, PS, c)'
    1773                 :            :                              Add the node/instruction u to the partial schedule
    1774                 :            :                              PS at time c.
    1775                 :            :    'calculate_register_pressure(PS)'
    1776                 :            :                              Given a schedule of instructions, calculate the register
    1777                 :            :                              pressure it implies.  One implementation could be the
    1778                 :            :                              maximum number of overlapping live ranges.
    1779                 :            :    'maxRP' The maximum allowed register pressure, it is usually derived from the number
    1780                 :            :            registers available in the hardware.
    1781                 :            : 
    1782                 :            :    1. II = MII.
    1783                 :            :    2. PS = empty list
    1784                 :            :    3. for each node u in O in pre-computed order
    1785                 :            :    4.   if (PSP(u) != Q && PSS(u) == Q) then
    1786                 :            :    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
    1787                 :            :    6.     start = Early_start; end = Early_start + II - 1; step = 1
    1788                 :            :    11.  else if (PSP(u) == Q && PSS(u) != Q) then
    1789                 :            :    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
    1790                 :            :    13.     start = Late_start; end = Late_start - II + 1; step = -1
    1791                 :            :    14.  else if (PSP(u) != Q && PSS(u) != Q) then
    1792                 :            :    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
    1793                 :            :    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
    1794                 :            :    17.     start = Early_start;
    1795                 :            :    18.     end = min(Early_start + II - 1 , Late_start);
    1796                 :            :    19.     step = 1
    1797                 :            :    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
    1798                 :            :    21.    start = ASAP(u); end = start + II - 1; step = 1
    1799                 :            :    22.  endif
    1800                 :            : 
    1801                 :            :    23.  success = false
    1802                 :            :    24.  for (c = start ; c != end ; c += step)
    1803                 :            :    25.     if check_hardware_resources_conflicts(u, PS, c) then
    1804                 :            :    26.       add_to_partial_schedule_at_time(u, PS, c)
    1805                 :            :    27.       success = true
    1806                 :            :    28.       break
    1807                 :            :    29.     endif
    1808                 :            :    30.  endfor
    1809                 :            :    31.  if (success == false) then
    1810                 :            :    32.    II = II + 1
    1811                 :            :    33.    if (II > maxII) then
    1812                 :            :    34.       finish - failed to schedule
    1813                 :            :    35.   endif
    1814                 :            :    36.    goto 2.
    1815                 :            :    37.  endif
    1816                 :            :    38. endfor
    1817                 :            :    39. if (calculate_register_pressure(PS) > maxRP) then
    1818                 :            :    40.    goto 32.
    1819                 :            :    41. endif
    1820                 :            :    42. compute epilogue & prologue
    1821                 :            :    43. finish - succeeded to schedule
    1822                 :            : 
    1823                 :            :    ??? The algorithm restricts the scheduling window to II cycles.
    1824                 :            :    In rare cases, it may be better to allow windows of II+1 cycles.
    1825                 :            :    The window would then start and end on the same row, but with
    1826                 :            :    different "must precede" and "must follow" requirements.  */
    1827                 :            : 
    1828                 :            : /* A threshold for the number of repeated unsuccessful attempts to insert
    1829                 :            :    an empty row, before we flush the partial schedule and start over.  */
    1830                 :            : #define MAX_SPLIT_NUM 10
    1831                 :            : /* Given the partial schedule PS, this function calculates and returns the
    1832                 :            :    cycles in which we can schedule the node with the given index I.
    1833                 :            :    NOTE: Here we do the backtracking in SMS, in some special cases. We have
    1834                 :            :    noticed that there are several cases in which we fail    to SMS the loop
    1835                 :            :    because the sched window of a node is empty    due to tight data-deps. In
    1836                 :            :    such cases we want to unschedule    some of the predecessors/successors
    1837                 :            :    until we get non-empty    scheduling window.  It returns -1 if the
    1838                 :            :    scheduling window is empty and zero otherwise.  */
    1839                 :            : 
    1840                 :            : static int
    1841                 :          0 : get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
    1842                 :            :                   sbitmap sched_nodes, int ii, int *start_p, int *step_p,
    1843                 :            :                   int *end_p)
    1844                 :            : {
    1845                 :          0 :   int start, step, end;
    1846                 :          0 :   int early_start, late_start;
    1847                 :          0 :   ddg_edge_ptr e;
    1848                 :          0 :   auto_sbitmap psp (ps->g->num_nodes);
    1849                 :          0 :   auto_sbitmap pss (ps->g->num_nodes);
    1850                 :          0 :   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
    1851                 :          0 :   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
    1852                 :          0 :   int psp_not_empty;
    1853                 :          0 :   int pss_not_empty;
    1854                 :          0 :   int count_preds;
    1855                 :          0 :   int count_succs;
    1856                 :            : 
    1857                 :            :   /* 1. compute sched window for u (start, end, step).  */
    1858                 :          0 :   bitmap_clear (psp);
    1859                 :          0 :   bitmap_clear (pss);
    1860                 :          0 :   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
    1861                 :          0 :   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
    1862                 :            : 
    1863                 :            :   /* We first compute a forward range (start <= end), then decide whether
    1864                 :            :      to reverse it.  */
    1865                 :          0 :   early_start = INT_MIN;
    1866                 :          0 :   late_start = INT_MAX;
    1867                 :          0 :   start = INT_MIN;
    1868                 :          0 :   end = INT_MAX;
    1869                 :          0 :   step = 1;
    1870                 :            : 
    1871                 :          0 :   count_preds = 0;
    1872                 :          0 :   count_succs = 0;
    1873                 :            : 
    1874                 :          0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1875                 :            :     {
    1876                 :          0 :       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
    1877                 :          0 :                "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
    1878                 :          0 :       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
    1879                 :            :                "start", "early start", "late start", "end", "time");
    1880                 :          0 :       fprintf (dump_file, "=========== =========== =========== ==========="
    1881                 :            :                " =====\n");
    1882                 :            :     }
    1883                 :            :   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
    1884                 :          0 :   if (psp_not_empty)
    1885                 :          0 :     for (e = u_node->in; e != 0; e = e->next_in)
    1886                 :            :       {
    1887                 :          0 :         int v = e->src->cuid;
    1888                 :            : 
    1889                 :          0 :         if (bitmap_bit_p (sched_nodes, v))
    1890                 :            :           {
    1891                 :          0 :             int p_st = SCHED_TIME (v);
    1892                 :          0 :             int earliest = p_st + e->latency - (e->distance * ii);
    1893                 :          0 :             int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
    1894                 :            : 
    1895                 :          0 :             if (dump_file)
    1896                 :            :               {
    1897                 :          0 :                 fprintf (dump_file, "%11s %11d %11s %11d %5d",
    1898                 :            :                          "", earliest, "", latest, p_st);
    1899                 :          0 :                 print_ddg_edge (dump_file, e);
    1900                 :          0 :                 fprintf (dump_file, "\n");
    1901                 :            :               }
    1902                 :            : 
    1903                 :          0 :             early_start = MAX (early_start, earliest);
    1904                 :          0 :             end = MIN (end, latest);
    1905                 :            : 
    1906                 :          0 :             if (e->type == TRUE_DEP && e->data_type == REG_DEP)
    1907                 :          0 :               count_preds++;
    1908                 :            :           }
    1909                 :            :       }
    1910                 :            : 
    1911                 :            :   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
    1912                 :          0 :   if (pss_not_empty)
    1913                 :          0 :     for (e = u_node->out; e != 0; e = e->next_out)
    1914                 :            :       {
    1915                 :          0 :         int v = e->dest->cuid;
    1916                 :            : 
    1917                 :          0 :         if (bitmap_bit_p (sched_nodes, v))
    1918                 :            :           {
    1919                 :          0 :             int s_st = SCHED_TIME (v);
    1920                 :          0 :             int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
    1921                 :          0 :             int latest = s_st - e->latency + (e->distance * ii);
    1922                 :            : 
    1923                 :          0 :             if (dump_file)
    1924                 :            :               {
    1925                 :          0 :                 fprintf (dump_file, "%11d %11s %11d %11s %5d",
    1926                 :            :                          earliest, "", latest, "", s_st);
    1927                 :          0 :                 print_ddg_edge (dump_file, e);
    1928                 :          0 :                 fprintf (dump_file, "\n");
    1929                 :            :               }
    1930                 :            : 
    1931                 :          0 :             start = MAX (start, earliest);
    1932                 :          0 :             late_start = MIN (late_start, latest);
    1933                 :            : 
    1934                 :          0 :             if (e->type == TRUE_DEP && e->data_type == REG_DEP)
    1935                 :          0 :               count_succs++;
    1936                 :            :           }
    1937                 :            :       }
    1938                 :            : 
    1939                 :          0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1940                 :            :     {
    1941                 :          0 :       fprintf (dump_file, "----------- ----------- ----------- -----------"
    1942                 :            :                " -----\n");
    1943                 :          0 :       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
    1944                 :            :                start, early_start, late_start, end, "",
    1945                 :            :                "(max, max, min, min)");
    1946                 :            :     }
    1947                 :            : 
    1948                 :            :   /* Get a target scheduling window no bigger than ii.  */
    1949                 :          0 :   if (early_start == INT_MIN && late_start == INT_MAX)
    1950                 :          0 :     early_start = NODE_ASAP (u_node);
    1951                 :          0 :   else if (early_start == INT_MIN)
    1952                 :          0 :     early_start = late_start - (ii - 1);
    1953                 :          0 :   late_start = MIN (late_start, early_start + (ii - 1));
    1954                 :            : 
    1955                 :            :   /* Apply memory dependence limits.  */
    1956                 :          0 :   start = MAX (start, early_start);
    1957                 :          0 :   end = MIN (end, late_start);
    1958                 :            : 
    1959                 :          0 :   if (dump_file && (psp_not_empty || pss_not_empty))
    1960                 :          0 :     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
    1961                 :            :              "", start, end, "", "");
    1962                 :            : 
    1963                 :            :   /* If there are at least as many successors as predecessors, schedule the
    1964                 :            :      node close to its successors.  */
    1965                 :          0 :   if (pss_not_empty && count_succs >= count_preds)
    1966                 :            :     {
    1967                 :          0 :       std::swap (start, end);
    1968                 :          0 :       step = -1;
    1969                 :            :     }
    1970                 :            : 
    1971                 :            :   /* Now that we've finalized the window, make END an exclusive rather
    1972                 :            :      than an inclusive bound.  */
    1973                 :          0 :   end += step;
    1974                 :            : 
    1975                 :          0 :   *start_p = start;
    1976                 :          0 :   *step_p = step;
    1977                 :          0 :   *end_p = end;
    1978                 :            : 
    1979                 :          0 :   if ((start >= end && step == 1) || (start <= end && step == -1))
    1980                 :            :     {
    1981                 :          0 :       if (dump_file)
    1982                 :          0 :         fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
    1983                 :            :                  start, end, step);
    1984                 :          0 :       return -1;
    1985                 :            :     }
    1986                 :            : 
    1987                 :            :   return 0;
    1988                 :            : }
    1989                 :            : 
    1990                 :            : /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
    1991                 :            :    node currently been scheduled.  At the end of the calculation
    1992                 :            :    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
    1993                 :            :    U_NODE which are (1) already scheduled in the first/last row of
    1994                 :            :    U_NODE's scheduling window, (2) whose dependence inequality with U
    1995                 :            :    becomes an equality when U is scheduled in this same row, and (3)
    1996                 :            :    whose dependence latency is zero.
    1997                 :            : 
    1998                 :            :    The first and last rows are calculated using the following parameters:
    1999                 :            :    START/END rows - The cycles that begins/ends the traversal on the window;
    2000                 :            :    searching for an empty cycle to schedule U_NODE.
    2001                 :            :    STEP - The direction in which we traverse the window.
    2002                 :            :    II - The initiation interval.  */
    2003                 :            : 
    2004                 :            : static void
    2005                 :          0 : calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
    2006                 :            :                                int step, int ii, sbitmap sched_nodes,
    2007                 :            :                                sbitmap must_precede, sbitmap must_follow)
    2008                 :            : {
    2009                 :          0 :   ddg_edge_ptr e;
    2010                 :          0 :   int first_cycle_in_window, last_cycle_in_window;
    2011                 :            : 
    2012                 :          0 :   gcc_assert (must_precede && must_follow);
    2013                 :            : 
    2014                 :            :   /* Consider the following scheduling window:
    2015                 :            :      {first_cycle_in_window, first_cycle_in_window+1, ...,
    2016                 :            :      last_cycle_in_window}.  If step is 1 then the following will be
    2017                 :            :      the order we traverse the window: {start=first_cycle_in_window,
    2018                 :            :      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
    2019                 :            :      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
    2020                 :            :      end=first_cycle_in_window-1} if step is -1.  */
    2021                 :          0 :   first_cycle_in_window = (step == 1) ? start : end - step;
    2022                 :          0 :   last_cycle_in_window = (step == 1) ? end - step : start;
    2023                 :            : 
    2024                 :          0 :   bitmap_clear (must_precede);
    2025                 :          0 :   bitmap_clear (must_follow);
    2026                 :            : 
    2027                 :          0 :   if (dump_file)
    2028                 :          0 :     fprintf (dump_file, "\nmust_precede: ");
    2029                 :            : 
    2030                 :            :   /* Instead of checking if:
    2031                 :            :       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
    2032                 :            :       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
    2033                 :            :              first_cycle_in_window)
    2034                 :            :       && e->latency == 0
    2035                 :            :      we use the fact that latency is non-negative:
    2036                 :            :       SCHED_TIME (e->src) - (e->distance * ii) <=
    2037                 :            :       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
    2038                 :            :       first_cycle_in_window
    2039                 :            :      and check only if
    2040                 :            :       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
    2041                 :          0 :   for (e = u_node->in; e != 0; e = e->next_in)
    2042                 :          0 :     if (bitmap_bit_p (sched_nodes, e->src->cuid)
    2043                 :          0 :         && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
    2044                 :            :              first_cycle_in_window))
    2045                 :            :       {
    2046                 :          0 :         if (dump_file)
    2047                 :          0 :           fprintf (dump_file, "%d ", e->src->cuid);
    2048                 :            : 
    2049                 :          0 :         bitmap_set_bit (must_precede, e->src->cuid);
    2050                 :            :       }
    2051                 :            : 
    2052                 :          0 :   if (dump_file)
    2053                 :          0 :     fprintf (dump_file, "\nmust_follow: ");
    2054                 :            : 
    2055                 :            :   /* Instead of checking if:
    2056                 :            :       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
    2057                 :            :       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
    2058                 :            :              last_cycle_in_window)
    2059                 :            :       && e->latency == 0
    2060                 :            :      we use the fact that latency is non-negative:
    2061                 :            :       SCHED_TIME (e->dest) + (e->distance * ii) >=
    2062                 :            :       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
    2063                 :            :       last_cycle_in_window
    2064                 :            :      and check only if
    2065                 :            :       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
    2066                 :          0 :   for (e = u_node->out; e != 0; e = e->next_out)
    2067                 :          0 :     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
    2068                 :          0 :         && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
    2069                 :            :              last_cycle_in_window))
    2070                 :            :       {
    2071                 :          0 :         if (dump_file)
    2072                 :          0 :           fprintf (dump_file, "%d ", e->dest->cuid);
    2073                 :            : 
    2074                 :          0 :         bitmap_set_bit (must_follow, e->dest->cuid);
    2075                 :            :       }
    2076                 :            : 
    2077                 :          0 :   if (dump_file)
    2078                 :          0 :     fprintf (dump_file, "\n");
    2079                 :          0 : }
    2080                 :            : 
    2081                 :            : /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
    2082                 :            :    parameters to decide if that's possible:
    2083                 :            :    PS - The partial schedule.
    2084                 :            :    U - The serial number of U_NODE.
    2085                 :            :    NUM_SPLITS - The number of row splits made so far.
    2086                 :            :    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
    2087                 :            :    the first row of the scheduling window)
    2088                 :            :    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
    2089                 :            :    last row of the scheduling window)  */
    2090                 :            : 
    2091                 :            : static bool
    2092                 :          0 : try_scheduling_node_in_cycle (partial_schedule_ptr ps,
    2093                 :            :                               int u, int cycle, sbitmap sched_nodes,
    2094                 :            :                               int *num_splits, sbitmap must_precede,
    2095                 :            :                               sbitmap must_follow)
    2096                 :            : {
    2097                 :          0 :   ps_insn_ptr psi;
    2098                 :          0 :   bool success = 0;
    2099                 :            : 
    2100                 :          0 :   verify_partial_schedule (ps, sched_nodes);
    2101                 :          0 :   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
    2102                 :          0 :   if (psi)
    2103                 :            :     {
    2104                 :          0 :       SCHED_TIME (u) = cycle;
    2105                 :          0 :       bitmap_set_bit (sched_nodes, u);
    2106                 :          0 :       success = 1;
    2107                 :          0 :       *num_splits = 0;
    2108                 :          0 :       if (dump_file)
    2109                 :          0 :         fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
    2110                 :            : 
    2111                 :            :     }
    2112                 :            : 
    2113                 :          0 :   return success;
    2114                 :            : }
    2115                 :            : 
    2116                 :            : /* This function implements the scheduling algorithm for SMS according to the
    2117                 :            :    above algorithm.  */
    2118                 :            : static partial_schedule_ptr
    2119                 :          0 : sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
    2120                 :            : {
    2121                 :          0 :   int ii = mii;
    2122                 :          0 :   int i, c, success, num_splits = 0;
    2123                 :          0 :   int flush_and_start_over = true;
    2124                 :          0 :   int num_nodes = g->num_nodes;
    2125                 :          0 :   int start, end, step; /* Place together into one struct?  */
    2126                 :          0 :   auto_sbitmap sched_nodes (num_nodes);
    2127                 :          0 :   auto_sbitmap must_precede (num_nodes);
    2128                 :          0 :   auto_sbitmap must_follow (num_nodes);
    2129                 :          0 :   auto_sbitmap tobe_scheduled (num_nodes);
    2130                 :            : 
    2131                 :            :   /* Value of param_sms_dfa_history is a limit on the number of cycles that
    2132                 :            :      resource conflicts can span.  ??? Should be provided by DFA, and be
    2133                 :            :      dependent on the type of insn scheduled.  Set to 0 by default to save
    2134                 :            :      compile time.  */
    2135                 :          0 :   partial_schedule_ptr ps = create_partial_schedule (ii, g,
    2136                 :            :                                                      param_sms_dfa_history);
    2137                 :            : 
    2138                 :          0 :   bitmap_ones (tobe_scheduled);
    2139                 :          0 :   bitmap_clear (sched_nodes);
    2140                 :            : 
    2141                 :          0 :   while (flush_and_start_over && (ii < maxii))
    2142                 :            :     {
    2143                 :            : 
    2144                 :          0 :       if (dump_file)
    2145                 :          0 :         fprintf (dump_file, "Starting with ii=%d\n", ii);
    2146                 :          0 :       flush_and_start_over = false;
    2147                 :          0 :       bitmap_clear (sched_nodes);
    2148                 :            : 
    2149                 :          0 :       for (i = 0; i < num_nodes; i++)
    2150                 :            :         {
    2151                 :          0 :           int u = nodes_order[i];
    2152                 :          0 :           ddg_node_ptr u_node = &ps->g->nodes[u];
    2153                 :          0 :           rtx_insn *insn = u_node->insn;
    2154                 :            : 
    2155                 :          0 :           gcc_checking_assert (NONDEBUG_INSN_P (insn));
    2156                 :            : 
    2157                 :          0 :           if (bitmap_bit_p (sched_nodes, u))
    2158                 :          0 :             continue;
    2159                 :            : 
    2160                 :            :           /* Try to get non-empty scheduling window.  */
    2161                 :          0 :          success = 0;
    2162                 :          0 :          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
    2163                 :            :                                 &step, &end) == 0)
    2164                 :            :             {
    2165                 :          0 :               if (dump_file)
    2166                 :          0 :                 fprintf (dump_file, "\nTrying to schedule node %d "
    2167                 :            :                          "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
    2168                 :          0 :                         (g->nodes[u].insn)), start, end, step);
    2169                 :            : 
    2170                 :          0 :               gcc_assert ((step > 0 && start < end)
    2171                 :            :                           || (step < 0 && start > end));
    2172                 :            : 
    2173                 :          0 :               calculate_must_precede_follow (u_node, start, end, step, ii,
    2174                 :            :                                              sched_nodes, must_precede,
    2175                 :            :                                              must_follow);
    2176                 :            : 
    2177                 :          0 :               for (c = start; c != end; c += step)
    2178                 :            :                 {
    2179                 :          0 :                   sbitmap tmp_precede, tmp_follow;
    2180                 :            : 
    2181                 :          0 :                   set_must_precede_follow (&tmp_follow, must_follow, 
    2182                 :            :                                            &tmp_precede, must_precede, 
    2183                 :            :                                            c, start, end, step);
    2184                 :          0 :                   success =
    2185                 :          0 :                     try_scheduling_node_in_cycle (ps, u, c,
    2186                 :            :                                                   sched_nodes,
    2187                 :            :                                                   &num_splits, tmp_precede,
    2188                 :            :                                                   tmp_follow);
    2189                 :          0 :                   if (success)
    2190                 :            :                     break;
    2191                 :            :                 }
    2192                 :            : 
    2193                 :          0 :               verify_partial_schedule (ps, sched_nodes);
    2194                 :            :             }
    2195                 :          0 :             if (!success)
    2196                 :            :             {
    2197                 :          0 :               int split_row;
    2198                 :            : 
    2199                 :          0 :               if (ii++ == maxii)
    2200                 :            :                 break;
    2201                 :            : 
    2202                 :          0 :               if (num_splits >= MAX_SPLIT_NUM)
    2203                 :            :                 {
    2204                 :          0 :                   num_splits = 0;
    2205                 :          0 :                   flush_and_start_over = true;
    2206                 :          0 :                   verify_partial_schedule (ps, sched_nodes);
    2207                 :          0 :                   reset_partial_schedule (ps, ii);
    2208                 :          0 :                   verify_partial_schedule (ps, sched_nodes);
    2209                 :          0 :                   break;
    2210                 :            :                 }
    2211                 :            : 
    2212                 :          0 :               num_splits++;
    2213                 :            :               /* The scheduling window is exclusive of 'end'
    2214                 :            :                  whereas compute_split_window() expects an inclusive,
    2215                 :            :                  ordered range.  */
    2216                 :          0 :               if (step == 1)
    2217                 :          0 :                 split_row = compute_split_row (sched_nodes, start, end - 1,
    2218                 :            :                                                ps->ii, u_node);
    2219                 :            :               else
    2220                 :          0 :                 split_row = compute_split_row (sched_nodes, end + 1, start,
    2221                 :            :                                                ps->ii, u_node);
    2222                 :            : 
    2223                 :          0 :               ps_insert_empty_row (ps, split_row, sched_nodes);
    2224                 :          0 :               i--;              /* Go back and retry node i.  */
    2225                 :            : 
    2226                 :          0 :               if (dump_file)
    2227                 :          0 :                 fprintf (dump_file, "num_splits=%d\n", num_splits);
    2228                 :            :             }
    2229                 :            : 
    2230                 :            :           /* ??? If (success), check register pressure estimates.  */
    2231                 :            :         }                       /* Continue with next node.  */
    2232                 :            :     }                           /* While flush_and_start_over.  */
    2233                 :          0 :   if (ii >= maxii)
    2234                 :            :     {
    2235                 :          0 :       free_partial_schedule (ps);
    2236                 :          0 :       ps = NULL;
    2237                 :            :     }
    2238                 :            :   else
    2239                 :          0 :     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
    2240                 :            : 
    2241                 :          0 :   return ps;
    2242                 :            : }
    2243                 :            : 
    2244                 :            : /* This function inserts a new empty row into PS at the position
    2245                 :            :    according to SPLITROW, keeping all already scheduled instructions
    2246                 :            :    intact and updating their SCHED_TIME and cycle accordingly.  */
    2247                 :            : static void
    2248                 :          0 : ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
    2249                 :            :                      sbitmap sched_nodes)
    2250                 :            : {
    2251                 :          0 :   ps_insn_ptr crr_insn;
    2252                 :          0 :   ps_insn_ptr *rows_new;
    2253                 :          0 :   int ii = ps->ii;
    2254                 :          0 :   int new_ii = ii + 1;
    2255                 :          0 :   int row;
    2256                 :          0 :   int *rows_length_new;
    2257                 :            : 
    2258                 :          0 :   verify_partial_schedule (ps, sched_nodes);
    2259                 :            : 
    2260                 :            :   /* We normalize sched_time and rotate ps to have only non-negative sched
    2261                 :            :      times, for simplicity of updating cycles after inserting new row.  */
    2262                 :          0 :   split_row -= ps->min_cycle;
    2263                 :          0 :   split_row = SMODULO (split_row, ii);
    2264                 :          0 :   if (dump_file)
    2265                 :          0 :     fprintf (dump_file, "split_row=%d\n", split_row);
    2266                 :            : 
    2267                 :          0 :   reset_sched_times (ps, PS_MIN_CYCLE (ps));
    2268                 :          0 :   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
    2269                 :            : 
    2270                 :          0 :   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
    2271                 :          0 :   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
    2272                 :          0 :   for (row = 0; row < split_row; row++)
    2273                 :            :     {
    2274                 :          0 :       rows_new[row] = ps->rows[row];
    2275                 :          0 :       rows_length_new[row] = ps->rows_length[row];
    2276                 :          0 :       ps->rows[row] = NULL;
    2277                 :          0 :       for (crr_insn = rows_new[row];
    2278                 :          0 :            crr_insn; crr_insn = crr_insn->next_in_row)
    2279                 :            :         {
    2280                 :          0 :           int u = crr_insn->id;
    2281                 :          0 :           int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
    2282                 :            : 
    2283                 :          0 :           SCHED_TIME (u) = new_time;
    2284                 :          0 :           crr_insn->cycle = new_time;
    2285                 :          0 :           SCHED_ROW (u) = new_time % new_ii;
    2286                 :          0 :           SCHED_STAGE (u) = new_time / new_ii;
    2287                 :            :         }
    2288                 :            : 
    2289                 :            :     }
    2290                 :            : 
    2291                 :          0 :   rows_new[split_row] = NULL;
    2292                 :            : 
    2293                 :          0 :   for (row = split_row; row < ii; row++)
    2294                 :            :     {
    2295                 :          0 :       rows_new[row + 1] = ps->rows[row];
    2296                 :          0 :       rows_length_new[row + 1] = ps->rows_length[row];
    2297                 :          0 :       ps->rows[row] = NULL;
    2298                 :          0 :       for (crr_insn = rows_new[row + 1];
    2299                 :          0 :            crr_insn; crr_insn = crr_insn->next_in_row)
    2300                 :            :         {
    2301                 :          0 :           int u = crr_insn->id;
    2302                 :          0 :           int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
    2303                 :            : 
    2304                 :          0 :           SCHED_TIME (u) = new_time;
    2305                 :          0 :           crr_insn->cycle = new_time;
    2306                 :          0 :           SCHED_ROW (u) = new_time % new_ii;
    2307                 :          0 :           SCHED_STAGE (u) = new_time / new_ii;
    2308                 :            :         }
    2309                 :            :     }
    2310                 :            : 
    2311                 :            :   /* Updating ps.  */
    2312                 :          0 :   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
    2313                 :          0 :     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
    2314                 :          0 :   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
    2315                 :          0 :     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
    2316                 :          0 :   free (ps->rows);
    2317                 :          0 :   ps->rows = rows_new;
    2318                 :          0 :   free (ps->rows_length);
    2319                 :          0 :   ps->rows_length = rows_length_new;
    2320                 :          0 :   ps->ii = new_ii;
    2321                 :          0 :   gcc_assert (ps->min_cycle >= 0);
    2322                 :            : 
    2323                 :          0 :   verify_partial_schedule (ps, sched_nodes);
    2324                 :            : 
    2325                 :          0 :   if (dump_file)
    2326                 :          0 :     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
    2327                 :            :              ps->max_cycle);
    2328                 :          0 : }
    2329                 :            : 
    2330                 :            : /* Given U_NODE which is the node that failed to be scheduled; LOW and
    2331                 :            :    UP which are the boundaries of it's scheduling window; compute using
    2332                 :            :    SCHED_NODES and II a row in the partial schedule that can be split
    2333                 :            :    which will separate a critical predecessor from a critical successor
    2334                 :            :    thereby expanding the window, and return it.  */
    2335                 :            : static int
    2336                 :          0 : compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
    2337                 :            :                    ddg_node_ptr u_node)
    2338                 :            : {
    2339                 :          0 :   ddg_edge_ptr e;
    2340                 :          0 :   int lower = INT_MIN, upper = INT_MAX;
    2341                 :          0 :   int crit_pred = -1;
    2342                 :          0 :   int crit_succ = -1;
    2343                 :          0 :   int crit_cycle;
    2344                 :            : 
    2345                 :          0 :   for (e = u_node->in; e != 0; e = e->next_in)
    2346                 :            :     {
    2347                 :          0 :       int v = e->src->cuid;
    2348                 :            : 
    2349                 :          0 :       if (bitmap_bit_p (sched_nodes, v)
    2350                 :          0 :           && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
    2351                 :          0 :         if (SCHED_TIME (v) > lower)
    2352                 :            :           {
    2353                 :          0 :             crit_pred = v;
    2354                 :          0 :             lower = SCHED_TIME (v);
    2355                 :            :           }
    2356                 :            :     }
    2357                 :            : 
    2358                 :          0 :   if (crit_pred >= 0)
    2359                 :            :     {
    2360                 :          0 :       crit_cycle = SCHED_TIME (crit_pred) + 1;
    2361                 :          0 :       return SMODULO (crit_cycle, ii);
    2362                 :            :     }
    2363                 :            : 
    2364                 :          0 :   for (e = u_node->out; e != 0; e = e->next_out)
    2365                 :            :     {
    2366                 :          0 :       int v = e->dest->cuid;
    2367                 :            : 
    2368                 :          0 :       if (bitmap_bit_p (sched_nodes, v)
    2369                 :          0 :           && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
    2370                 :          0 :         if (SCHED_TIME (v) < upper)
    2371                 :            :           {
    2372                 :          0 :             crit_succ = v;
    2373                 :          0 :             upper = SCHED_TIME (v);
    2374                 :            :           }
    2375                 :            :     }
    2376                 :            : 
    2377                 :          0 :   if (crit_succ >= 0)
    2378                 :            :     {
    2379                 :          0 :       crit_cycle = SCHED_TIME (crit_succ);
    2380                 :          0 :       return SMODULO (crit_cycle, ii);
    2381                 :            :     }
    2382                 :            : 
    2383                 :          0 :   if (dump_file)
    2384                 :          0 :     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
    2385                 :            : 
    2386                 :          0 :   return SMODULO ((low + up + 1) / 2, ii);
    2387                 :            : }
    2388                 :            : 
    2389                 :            : static void
    2390                 :          0 : verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
    2391                 :            : {
    2392                 :          0 :   int row;
    2393                 :          0 :   ps_insn_ptr crr_insn;
    2394                 :            : 
    2395                 :          0 :   for (row = 0; row < ps->ii; row++)
    2396                 :            :     {
    2397                 :          0 :       int length = 0;
    2398                 :            :       
    2399                 :          0 :       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
    2400                 :            :         {
    2401                 :          0 :           int u = crr_insn->id;
    2402                 :            :           
    2403                 :          0 :           length++;
    2404                 :          0 :           gcc_assert (bitmap_bit_p (sched_nodes, u));
    2405                 :            :           /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
    2406                 :            :              popcount (sched_nodes) == number of insns in ps.  */
    2407                 :          0 :           gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
    2408                 :          0 :           gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
    2409                 :            :         }
    2410                 :            :       
    2411                 :          0 :       gcc_assert (ps->rows_length[row] == length);
    2412                 :            :     }
    2413                 :          0 : }
    2414                 :            : 
    2415                 :            : 
    2416                 :            : /* This page implements the algorithm for ordering the nodes of a DDG
    2417                 :            :    for modulo scheduling, activated through the
    2418                 :            :    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
    2419                 :            : 
    2420                 :            : #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
    2421                 :            : #define ASAP(x) (ORDER_PARAMS ((x))->asap)
    2422                 :            : #define ALAP(x) (ORDER_PARAMS ((x))->alap)
    2423                 :            : #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
    2424                 :            : #define MOB(x) (ALAP ((x)) - ASAP ((x)))
    2425                 :            : #define DEPTH(x) (ASAP ((x)))
    2426                 :            : 
    2427                 :            : typedef struct node_order_params * nopa;
    2428                 :            : 
    2429                 :            : static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
    2430                 :            : static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
    2431                 :            : static nopa  calculate_order_params (ddg_ptr, int, int *);
    2432                 :            : static int find_max_asap (ddg_ptr, sbitmap);
    2433                 :            : static int find_max_hv_min_mob (ddg_ptr, sbitmap);
    2434                 :            : static int find_max_dv_min_mob (ddg_ptr, sbitmap);
    2435                 :            : 
    2436                 :            : enum sms_direction {BOTTOMUP, TOPDOWN};
    2437                 :            : 
    2438                 :            : struct node_order_params
    2439                 :            : {
    2440                 :            :   int asap;
    2441                 :            :   int alap;
    2442                 :            :   int height;
    2443                 :            : };
    2444                 :            : 
    2445                 :            : /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
    2446                 :            : static void
    2447                 :          0 : check_nodes_order (int *node_order, int num_nodes)
    2448                 :            : {
    2449                 :          0 :   int i;
    2450                 :          0 :   auto_sbitmap tmp (num_nodes);
    2451                 :            : 
    2452                 :          0 :   bitmap_clear (tmp);
    2453                 :            : 
    2454                 :          0 :   if (dump_file)
    2455                 :          0 :     fprintf (dump_file, "SMS final nodes order: \n");
    2456                 :            : 
    2457                 :          0 :   for (i = 0; i < num_nodes; i++)
    2458                 :            :     {
    2459                 :          0 :       int u = node_order[i];
    2460                 :            : 
    2461                 :          0 :       if (dump_file)
    2462                 :          0 :         fprintf (dump_file, "%d ", u);
    2463                 :          0 :       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
    2464                 :            : 
    2465                 :          0 :       bitmap_set_bit (tmp, u);
    2466                 :            :     }
    2467                 :            : 
    2468                 :          0 :   if (dump_file)
    2469                 :          0 :     fprintf (dump_file, "\n");
    2470                 :          0 : }
    2471                 :            : 
    2472                 :            : /* Order the nodes of G for scheduling and pass the result in
    2473                 :            :    NODE_ORDER.  Also set aux.count of each node to ASAP.
    2474                 :            :    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
    2475                 :            : static int
    2476                 :          0 : sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
    2477                 :            : {
    2478                 :          0 :   int i;
    2479                 :          0 :   int rec_mii = 0;
    2480                 :          0 :   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
    2481                 :            : 
    2482                 :          0 :   nopa nops = calculate_order_params (g, mii, pmax_asap);
    2483                 :            : 
    2484                 :          0 :   if (dump_file)
    2485                 :          0 :     print_sccs (dump_file, sccs, g);
    2486                 :            : 
    2487                 :          0 :   order_nodes_of_sccs (sccs, node_order);
    2488                 :            : 
    2489                 :          0 :   if (sccs->num_sccs > 0)
    2490                 :            :     /* First SCC has the largest recurrence_length.  */
    2491                 :          0 :     rec_mii = sccs->sccs[0]->recurrence_length;
    2492                 :            : 
    2493                 :            :   /* Save ASAP before destroying node_order_params.  */
    2494                 :          0 :   for (i = 0; i < g->num_nodes; i++)
    2495                 :            :     {
    2496                 :          0 :       ddg_node_ptr v = &g->nodes[i];
    2497                 :          0 :       v->aux.count = ASAP (v);
    2498                 :            :     }
    2499                 :            : 
    2500                 :          0 :   free (nops);
    2501                 :          0 :   free_ddg_all_sccs (sccs);
    2502                 :          0 :   check_nodes_order (node_order, g->num_nodes);
    2503                 :            : 
    2504                 :          0 :   return rec_mii;
    2505                 :            : }
    2506                 :            : 
    2507                 :            : static void
    2508                 :          0 : order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
    2509                 :            : {
    2510                 :          0 :   int i, pos = 0;
    2511                 :          0 :   ddg_ptr g = all_sccs->ddg;
    2512                 :          0 :   int num_nodes = g->num_nodes;
    2513                 :          0 :   auto_sbitmap prev_sccs (num_nodes);
    2514                 :          0 :   auto_sbitmap on_path (num_nodes);
    2515                 :          0 :   auto_sbitmap tmp (num_nodes);
    2516                 :          0 :   auto_sbitmap ones (num_nodes);
    2517                 :            : 
    2518                 :          0 :   bitmap_clear (prev_sccs);
    2519                 :          0 :   bitmap_ones (ones);
    2520                 :            : 
    2521                 :            :   /* Perform the node ordering starting from the SCC with the highest recMII.
    2522                 :            :      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
    2523                 :          0 :   for (i = 0; i < all_sccs->num_sccs; i++)
    2524                 :            :     {
    2525                 :          0 :       ddg_scc_ptr scc = all_sccs->sccs[i];
    2526                 :            : 
    2527                 :            :       /* Add nodes on paths from previous SCCs to the current SCC.  */
    2528                 :          0 :       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
    2529                 :          0 :       bitmap_ior (tmp, scc->nodes, on_path);
    2530                 :            : 
    2531                 :            :       /* Add nodes on paths from the current SCC to previous SCCs.  */
    2532                 :          0 :       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
    2533                 :          0 :       bitmap_ior (tmp, tmp, on_path);
    2534                 :            : 
    2535                 :            :       /* Remove nodes of previous SCCs from current extended SCC.  */
    2536                 :          0 :       bitmap_and_compl (tmp, tmp, prev_sccs);
    2537                 :            : 
    2538                 :          0 :       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
    2539                 :            :       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
    2540                 :            :     }
    2541                 :            : 
    2542                 :            :   /* Handle the remaining nodes that do not belong to any scc.  Each call
    2543                 :            :      to order_nodes_in_scc handles a single connected component.  */
    2544                 :          0 :   while (pos < g->num_nodes)
    2545                 :            :     {
    2546                 :          0 :       bitmap_and_compl (tmp, ones, prev_sccs);
    2547                 :          0 :       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
    2548                 :            :     }
    2549                 :          0 : }
    2550                 :            : 
    2551                 :            : /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
    2552                 :            : static struct node_order_params *
    2553                 :          0 : calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
    2554                 :            : {
    2555                 :          0 :   int u;
    2556                 :          0 :   int max_asap;
    2557                 :          0 :   int num_nodes = g->num_nodes;
    2558                 :          0 :   ddg_edge_ptr e;
    2559                 :            :   /* Allocate a place to hold ordering params for each node in the DDG.  */
    2560                 :          0 :   nopa node_order_params_arr;
    2561                 :            : 
    2562                 :            :   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
    2563                 :          0 :   node_order_params_arr = (nopa) xcalloc (num_nodes,
    2564                 :            :                                           sizeof (struct node_order_params));
    2565                 :            : 
    2566                 :            :   /* Set the aux pointer of each node to point to its order_params structure.  */
    2567                 :          0 :   for (u = 0; u < num_nodes; u++)
    2568                 :          0 :     g->nodes[u].aux.info = &node_order_params_arr[u];
    2569                 :            : 
    2570                 :            :   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
    2571                 :            :      calculate ASAP, ALAP, mobility, distance, and height for each node
    2572                 :            :      in the dependence (direct acyclic) graph.  */
    2573                 :            : 
    2574                 :            :   /* We assume that the nodes in the array are in topological order.  */
    2575                 :            : 
    2576                 :            :   max_asap = 0;
    2577                 :          0 :   for (u = 0; u < num_nodes; u++)
    2578                 :            :     {
    2579                 :          0 :       ddg_node_ptr u_node = &g->nodes[u];
    2580                 :            : 
    2581                 :          0 :       ASAP (u_node) = 0;
    2582                 :          0 :       for (e = u_node->in; e; e = e->next_in)
    2583                 :          0 :         if (e->distance == 0)
    2584                 :          0 :           ASAP (u_node) = MAX (ASAP (u_node),
    2585                 :            :                                ASAP (e->src) + e->latency);
    2586                 :          0 :       max_asap = MAX (max_asap, ASAP (u_node));
    2587                 :            :     }
    2588                 :            : 
    2589                 :          0 :   for (u = num_nodes - 1; u > -1; u--)
    2590                 :            :     {
    2591                 :          0 :       ddg_node_ptr u_node = &g->nodes[u];
    2592                 :            : 
    2593                 :          0 :       ALAP (u_node) = max_asap;
    2594                 :          0 :       HEIGHT (u_node) = 0;
    2595                 :          0 :       for (e = u_node->out; e; e = e->next_out)
    2596                 :          0 :         if (e->distance == 0)
    2597                 :            :           {
    2598                 :          0 :             ALAP (u_node) = MIN (ALAP (u_node),
    2599                 :            :                                  ALAP (e->dest) - e->latency);
    2600                 :          0 :             HEIGHT (u_node) = MAX (HEIGHT (u_node),
    2601                 :            :                                    HEIGHT (e->dest) + e->latency);
    2602                 :            :           }
    2603                 :            :     }
    2604                 :          0 :   if (dump_file)
    2605                 :            :   {
    2606                 :          0 :     fprintf (dump_file, "\nOrder params\n");
    2607                 :          0 :     for (u = 0; u < num_nodes; u++)
    2608                 :            :       {
    2609                 :          0 :         ddg_node_ptr u_node = &g->nodes[u];
    2610                 :            : 
    2611                 :          0 :         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
    2612                 :          0 :                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
    2613                 :            :       }
    2614                 :            :   }
    2615                 :            : 
    2616                 :          0 :   *pmax_asap = max_asap;
    2617                 :          0 :   return node_order_params_arr;
    2618                 :            : }
    2619                 :            : 
    2620                 :            : static int
    2621                 :          0 : find_max_asap (ddg_ptr g, sbitmap nodes)
    2622                 :            : {
    2623                 :          0 :   unsigned int u = 0;
    2624                 :          0 :   int max_asap = -1;
    2625                 :          0 :   int result = -1;
    2626                 :          0 :   sbitmap_iterator sbi;
    2627                 :            : 
    2628                 :          0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2629                 :            :     {
    2630                 :          0 :       ddg_node_ptr u_node = &g->nodes[u];
    2631                 :            : 
    2632                 :          0 :       if (max_asap < ASAP (u_node))
    2633                 :            :         {
    2634                 :          0 :           max_asap = ASAP (u_node);
    2635                 :          0 :           result = u;
    2636                 :            :         }
    2637                 :            :     }
    2638                 :          0 :   return result;
    2639                 :            : }
    2640                 :            : 
    2641                 :            : static int
    2642                 :          0 : find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
    2643                 :            : {
    2644                 :          0 :   unsigned int u = 0;
    2645                 :          0 :   int max_hv = -1;
    2646                 :          0 :   int min_mob = INT_MAX;
    2647                 :          0 :   int result = -1;
    2648                 :          0 :   sbitmap_iterator sbi;
    2649                 :            : 
    2650                 :          0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2651                 :            :     {
    2652                 :          0 :       ddg_node_ptr u_node = &g->nodes[u];
    2653                 :            : 
    2654                 :          0 :       if (max_hv < HEIGHT (u_node))
    2655                 :            :         {
    2656                 :          0 :           max_hv = HEIGHT (u_node);
    2657                 :          0 :           min_mob = MOB (u_node);
    2658                 :          0 :           result = u;
    2659                 :            :         }
    2660                 :          0 :       else if ((max_hv == HEIGHT (u_node))
    2661                 :          0 :                && (min_mob > MOB (u_node)))
    2662                 :            :         {
    2663                 :          0 :           min_mob = MOB (u_node);
    2664                 :          0 :           result = u;
    2665                 :            :         }
    2666                 :            :     }
    2667                 :          0 :   return result;
    2668                 :            : }
    2669                 :            : 
    2670                 :            : static int
    2671                 :          0 : find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
    2672                 :            : {
    2673                 :          0 :   unsigned int u = 0;
    2674                 :          0 :   int max_dv = -1;
    2675                 :          0 :   int min_mob = INT_MAX;
    2676                 :          0 :   int result = -1;
    2677                 :          0 :   sbitmap_iterator sbi;
    2678                 :            : 
    2679                 :          0 :   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
    2680                 :            :     {
    2681                 :          0 :       ddg_node_ptr u_node = &g->nodes[u];
    2682                 :            : 
    2683                 :          0 :       if (max_dv < DEPTH (u_node))
    2684                 :            :         {
    2685                 :          0 :           max_dv = DEPTH (u_node);
    2686                 :          0 :           min_mob = MOB (u_node);
    2687                 :          0 :           result = u;
    2688                 :            :         }
    2689                 :          0 :       else if ((max_dv == DEPTH (u_node))
    2690                 :          0 :                && (min_mob > MOB (u_node)))
    2691                 :            :         {
    2692                 :          0 :           min_mob = MOB (u_node);
    2693                 :          0 :           result = u;
    2694                 :            :         }
    2695                 :            :     }
    2696                 :          0 :   return result;
    2697                 :            : }
    2698                 :            : 
    2699                 :            : /* Places the nodes of SCC into the NODE_ORDER array starting
    2700                 :            :    at position POS, according to the SMS ordering algorithm.
    2701                 :            :    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
    2702                 :            :    the NODE_ORDER array, starting from position zero.  */
    2703                 :            : static int
    2704                 :          0 : order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
    2705                 :            :                     int * node_order, int pos)
    2706                 :            : {
    2707                 :          0 :   enum sms_direction dir;
    2708                 :          0 :   int num_nodes = g->num_nodes;
    2709                 :          0 :   auto_sbitmap workset (num_nodes);
    2710                 :          0 :   auto_sbitmap tmp (num_nodes);
    2711                 :          0 :   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
    2712                 :          0 :   auto_sbitmap predecessors (num_nodes);
    2713                 :          0 :   auto_sbitmap successors (num_nodes);
    2714                 :            : 
    2715                 :          0 :   bitmap_clear (predecessors);
    2716                 :          0 :   find_predecessors (predecessors, g, nodes_ordered);
    2717                 :            : 
    2718                 :          0 :   bitmap_clear (successors);
    2719                 :          0 :   find_successors (successors, g, nodes_ordered);
    2720                 :            : 
    2721                 :          0 :   bitmap_clear (tmp);
    2722                 :          0 :   if (bitmap_and (tmp, predecessors, scc))
    2723                 :            :     {
    2724                 :          0 :       bitmap_copy (workset, tmp);
    2725                 :          0 :       dir = BOTTOMUP;
    2726                 :            :     }
    2727                 :          0 :   else if (bitmap_and (tmp, successors, scc))
    2728                 :            :     {
    2729                 :          0 :       bitmap_copy (workset, tmp);
    2730                 :          0 :       dir = TOPDOWN;
    2731                 :            :     }
    2732                 :            :   else
    2733                 :            :     {
    2734                 :          0 :       int u;
    2735                 :            : 
    2736                 :          0 :       bitmap_clear (workset);
    2737                 :          0 :       if ((u = find_max_asap (g, scc)) >= 0)
    2738                 :          0 :         bitmap_set_bit (workset, u);
    2739                 :            :       dir = BOTTOMUP;
    2740                 :            :     }
    2741                 :            : 
    2742                 :          0 :   bitmap_clear (zero_bitmap);
    2743                 :          0 :   while (!bitmap_equal_p (workset, zero_bitmap))
    2744                 :            :     {
    2745                 :          0 :       int v;
    2746                 :          0 :       ddg_node_ptr v_node;
    2747                 :          0 :       sbitmap v_node_preds;
    2748                 :          0 :       sbitmap v_node_succs;
    2749                 :            : 
    2750                 :          0 :       if (dir == TOPDOWN)
    2751                 :            :         {
    2752                 :          0 :           while (!bitmap_equal_p (workset, zero_bitmap))
    2753                 :            :             {
    2754                 :          0 :               v = find_max_hv_min_mob (g, workset);
    2755                 :          0 :               v_node = &g->nodes[v];
    2756                 :          0 :               node_order[pos++] = v;
    2757                 :          0 :               v_node_succs = NODE_SUCCESSORS (v_node);
    2758                 :          0 :               bitmap_and (tmp, v_node_succs, scc);
    2759                 :            : 
    2760                 :            :               /* Don't consider the already ordered successors again.  */
    2761                 :          0 :               bitmap_and_compl (tmp, tmp, nodes_ordered);
    2762                 :          0 :               bitmap_ior (workset, workset, tmp);
    2763                 :          0 :               bitmap_clear_bit (workset, v);
    2764                 :          0 :               bitmap_set_bit (nodes_ordered, v);
    2765                 :            :             }
    2766                 :          0 :           dir = BOTTOMUP;
    2767                 :          0 :           bitmap_clear (predecessors);
    2768                 :          0 :           find_predecessors (predecessors, g, nodes_ordered);
    2769                 :          0 :           bitmap_and (workset, predecessors, scc);
    2770                 :            :         }
    2771                 :            :       else
    2772                 :            :         {
    2773                 :          0 :           while (!bitmap_equal_p (workset, zero_bitmap))
    2774                 :            :             {
    2775                 :          0 :               v = find_max_dv_min_mob (g, workset);
    2776                 :          0 :               v_node = &g->nodes[v];
    2777                 :          0 :               node_order[pos++] = v;
    2778                 :          0 :               v_node_preds = NODE_PREDECESSORS (v_node);
    2779                 :          0 :               bitmap_and (tmp, v_node_preds, scc);
    2780                 :            : 
    2781                 :            :               /* Don't consider the already ordered predecessors again.  */
    2782                 :          0 :               bitmap_and_compl (tmp, tmp, nodes_ordered);
    2783                 :          0 :               bitmap_ior (workset, workset, tmp);
    2784                 :          0 :               bitmap_clear_bit (workset, v);
    2785                 :          0 :               bitmap_set_bit (nodes_ordered, v);
    2786                 :            :             }
    2787                 :          0 :           dir = TOPDOWN;
    2788                 :          0 :           bitmap_clear (successors);
    2789                 :          0 :           find_successors (successors, g, nodes_ordered);
    2790                 :          0 :           bitmap_and (workset, successors, scc);
    2791                 :            :         }
    2792                 :            :     }
    2793                 :          0 :   sbitmap_free (zero_bitmap);
    2794                 :          0 :   return pos;
    2795                 :            : }
    2796                 :            : 
    2797                 :            : 
    2798                 :            : /* This page contains functions for manipulating partial-schedules during
    2799                 :            :    modulo scheduling.  */
    2800                 :            : 
    2801                 :            : /* Create a partial schedule and allocate a memory to hold II rows.  */
    2802                 :            : 
    2803                 :            : static partial_schedule_ptr
    2804                 :          0 : create_partial_schedule (int ii, ddg_ptr g, int history)
    2805                 :            : {
    2806                 :          0 :   partial_schedule_ptr ps = XNEW (struct partial_schedule);
    2807                 :          0 :   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
    2808                 :          0 :   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
    2809                 :          0 :   ps->reg_moves.create (0);
    2810                 :          0 :   ps->ii = ii;
    2811                 :          0 :   ps->history = history;
    2812                 :          0 :   ps->min_cycle = INT_MAX;
    2813                 :          0 :   ps->max_cycle = INT_MIN;
    2814                 :          0 :   ps->g = g;
    2815                 :            : 
    2816                 :          0 :   return ps;
    2817                 :            : }
    2818                 :            : 
    2819                 :            : /* Free the PS_INSNs in rows array of the given partial schedule.
    2820                 :            :    ??? Consider caching the PS_INSN's.  */
    2821                 :            : static void
    2822                 :          0 : free_ps_insns (partial_schedule_ptr ps)
    2823                 :            : {
    2824                 :          0 :   int i;
    2825                 :            : 
    2826                 :          0 :   for (i = 0; i < ps->ii; i++)
    2827                 :            :     {
    2828                 :          0 :       while (ps->rows[i])
    2829                 :            :         {
    2830                 :          0 :           ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
    2831                 :            : 
    2832                 :          0 :           free (ps->rows[i]);
    2833                 :          0 :           ps->rows[i] = ps_insn;
    2834                 :            :         }
    2835                 :          0 :       ps->rows[i] = NULL;
    2836                 :            :     }
    2837                 :          0 : }
    2838                 :            : 
    2839                 :            : /* Free all the memory allocated to the partial schedule.  */
    2840                 :            : 
    2841                 :            : static void
    2842                 :          0 : free_partial_schedule (partial_schedule_ptr ps)
    2843                 :            : {
    2844                 :          0 :   ps_reg_move_info *move;
    2845                 :          0 :   unsigned int i;
    2846                 :            : 
    2847                 :          0 :   if (!ps)
    2848                 :          0 :     return;
    2849                 :            : 
    2850                 :          0 :   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
    2851                 :          0 :     sbitmap_free (move->uses);
    2852                 :          0 :   ps->reg_moves.release ();
    2853                 :            : 
    2854                 :          0 :   free_ps_insns (ps);
    2855                 :          0 :   free (ps->rows);
    2856                 :          0 :   free (ps->rows_length);
    2857                 :          0 :   free (ps);
    2858                 :            : }
    2859                 :            : 
    2860                 :            : /* Clear the rows array with its PS_INSNs, and create a new one with
    2861                 :            :    NEW_II rows.  */
    2862                 :            : 
    2863                 :            : static void
    2864                 :          0 : reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
    2865                 :            : {
    2866                 :          0 :   if (!ps)
    2867                 :            :     return;
    2868                 :          0 :   free_ps_insns (ps);
    2869                 :          0 :   if (new_ii == ps->ii)
    2870                 :            :     return;
    2871                 :          0 :   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
    2872                 :            :                                                  * sizeof (ps_insn_ptr));
    2873                 :          0 :   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
    2874                 :          0 :   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
    2875                 :          0 :   memset (ps->rows_length, 0, new_ii * sizeof (int));
    2876                 :          0 :   ps->ii = new_ii;
    2877                 :          0 :   ps->min_cycle = INT_MAX;
    2878                 :          0 :   ps->max_cycle = INT_MIN;
    2879                 :            : }
    2880                 :            : 
    2881                 :            : /* Prints the partial schedule as an ii rows array, for each rows
    2882                 :            :    print the ids of the insns in it.  */
    2883                 :            : void
    2884                 :          0 : print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
    2885                 :            : {
    2886                 :          0 :   int i;
    2887                 :            : 
    2888                 :          0 :   for (i = 0; i < ps->ii; i++)
    2889                 :            :     {
    2890                 :          0 :       ps_insn_ptr ps_i = ps->rows[i];
    2891                 :            : 
    2892                 :          0 :       fprintf (dump, "\n[ROW %d ]: ", i);
    2893                 :          0 :       while (ps_i)
    2894                 :            :         {
    2895                 :          0 :           rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
    2896                 :            : 
    2897                 :          0 :           if (JUMP_P (insn))
    2898                 :          0 :             fprintf (dump, "%d (branch), ", INSN_UID (insn));
    2899                 :            :           else
    2900                 :          0 :             fprintf (dump, "%d, ", INSN_UID (insn));
    2901                 :            :         
    2902                 :          0 :           ps_i = ps_i->next_in_row;
    2903                 :            :         }
    2904                 :            :     }
    2905                 :          0 : }
    2906                 :            : 
    2907                 :            : /* Creates an object of PS_INSN and initializes it to the given parameters.  */
    2908                 :            : static ps_insn_ptr
    2909                 :          0 : create_ps_insn (int id, int cycle)
    2910                 :            : {
    2911                 :          0 :   ps_insn_ptr ps_i = XNEW (struct ps_insn);
    2912                 :            : 
    2913                 :          0 :   ps_i->id = id;
    2914                 :          0 :   ps_i->next_in_row = NULL;
    2915                 :          0 :   ps_i->prev_in_row = NULL;
    2916                 :          0 :   ps_i->cycle = cycle;
    2917                 :            : 
    2918                 :          0 :   return ps_i;
    2919                 :            : }
    2920                 :            : 
    2921                 :            : 
    2922                 :            : /* Removes the given PS_INSN from the partial schedule.  */  
    2923                 :            : static void 
    2924                 :          0 : remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
    2925                 :            : {
    2926                 :          0 :   int row;
    2927                 :            : 
    2928                 :          0 :   gcc_assert (ps && ps_i);
    2929                 :            :   
    2930                 :          0 :   row = SMODULO (ps_i->cycle, ps->ii);
    2931                 :          0 :   if (! ps_i->prev_in_row)
    2932                 :            :     {
    2933                 :          0 :       gcc_assert (ps_i == ps->rows[row]);
    2934                 :          0 :       ps->rows[row] = ps_i->next_in_row;
    2935                 :          0 :       if (ps->rows[row])
    2936                 :          0 :         ps->rows[row]->prev_in_row = NULL;
    2937                 :            :     }
    2938                 :            :   else
    2939                 :            :     {
    2940                 :          0 :       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
    2941                 :          0 :       if (ps_i->next_in_row)
    2942                 :          0 :         ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
    2943                 :            :     }
    2944                 :            :    
    2945                 :          0 :   ps->rows_length[row] -= 1; 
    2946                 :          0 :   free (ps_i);
    2947                 :          0 :   return;
    2948                 :            : }
    2949                 :            : 
    2950                 :            : /* Unlike what literature describes for modulo scheduling (which focuses
    2951                 :            :    on VLIW machines) the order of the instructions inside a cycle is
    2952                 :            :    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
    2953                 :            :    where the current instruction should go relative to the already
    2954                 :            :    scheduled instructions in the given cycle.  Go over these
    2955                 :            :    instructions and find the first possible column to put it in.  */
    2956                 :            : static bool
    2957                 :          0 : ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
    2958                 :            :                      sbitmap must_precede, sbitmap must_follow)
    2959                 :            : {
    2960                 :          0 :   ps_insn_ptr next_ps_i;
    2961                 :          0 :   ps_insn_ptr first_must_follow = NULL;
    2962                 :          0 :   ps_insn_ptr last_must_precede = NULL;
    2963                 :          0 :   ps_insn_ptr last_in_row = NULL;
    2964                 :          0 :   int row;
    2965                 :            : 
    2966                 :          0 :   if (! ps_i)
    2967                 :            :     return false;
    2968                 :            : 
    2969                 :          0 :   row = SMODULO (ps_i->cycle, ps->ii);
    2970                 :            : 
    2971                 :            :   /* Find the first must follow and the last must precede
    2972                 :            :      and insert the node immediately after the must precede
    2973                 :            :      but make sure that it there is no must follow after it.  */
    2974                 :          0 :   for (next_ps_i = ps->rows[row];
    2975                 :          0 :        next_ps_i;
    2976                 :          0 :        next_ps_i = next_ps_i->next_in_row)
    2977                 :            :     {
    2978                 :          0 :       if (must_follow
    2979                 :          0 :           && bitmap_bit_p (must_follow, next_ps_i->id)
    2980                 :          0 :           && ! first_must_follow)
    2981                 :            :         first_must_follow = next_ps_i;
    2982                 :          0 :       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
    2983                 :            :         {
    2984                 :            :           /* If we have already met a node that must follow, then
    2985                 :            :              there is no possible column.  */
    2986                 :          0 :           if (first_must_follow)
    2987                 :            :             return false;
    2988                 :            :           else
    2989                 :            :             last_must_precede = next_ps_i;
    2990                 :            :         }
    2991                 :            :       /* The closing branch must be the last in the row.  */
    2992                 :          0 :       if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
    2993                 :            :         return false;
    2994                 :            :              
    2995                 :          0 :        last_in_row = next_ps_i;
    2996                 :            :     }
    2997                 :            : 
    2998                 :            :   /* The closing branch is scheduled as well.  Make sure there is no
    2999                 :            :      dependent instruction after it as the branch should be the last
    3000                 :            :      instruction in the row.  */
    3001                 :          0 :   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
    3002                 :            :     {
    3003                 :          0 :       if (first_must_follow)
    3004                 :            :         return false;
    3005                 :          0 :       if (last_in_row)
    3006                 :            :         {
    3007                 :            :           /* Make the branch the last in the row.  New instructions
    3008                 :            :              will be inserted at the beginning of the row or after the
    3009                 :            :              last must_precede instruction thus the branch is guaranteed
    3010                 :            :              to remain the last instruction in the row.  */
    3011                 :          0 :           last_in_row->next_in_row = ps_i;
    3012                 :          0 :           ps_i->prev_in_row = last_in_row;
    3013                 :          0 :           ps_i->next_in_row = NULL;
    3014                 :            :         }
    3015                 :            :       else
    3016                 :          0 :         ps->rows[row] = ps_i;
    3017                 :          0 :       return true;
    3018                 :            :     }
    3019                 :            :   
    3020                 :            :   /* Now insert the node after INSERT_AFTER_PSI.  */
    3021                 :            : 
    3022                 :          0 :   if (! last_must_precede)
    3023                 :            :     {
    3024                 :          0 :       ps_i->next_in_row = ps->rows[row];
    3025                 :          0 :       ps_i->prev_in_row = NULL;
    3026                 :          0 :       if (ps_i->next_in_row)
    3027                 :          0 :         ps_i->next_in_row->prev_in_row = ps_i;
    3028                 :          0 :       ps->rows[row] = ps_i;
    3029                 :            :     }
    3030                 :            :   else
    3031                 :            :     {
    3032                 :          0 :       ps_i->next_in_row = last_must_precede->next_in_row;
    3033                 :          0 :       last_must_precede->next_in_row = ps_i;
    3034                 :          0 :       ps_i->prev_in_row = last_must_precede;
    3035                 :          0 :       if (ps_i->next_in_row)
    3036                 :          0 :         ps_i->next_in_row->prev_in_row = ps_i;
    3037                 :            :     }
    3038                 :            : 
    3039                 :            :   return true;
    3040                 :            : }
    3041                 :            : 
    3042                 :            : /* Advances the PS_INSN one column in its current row; returns false
    3043                 :            :    in failure and true in success.  Bit N is set in MUST_FOLLOW if
    3044                 :            :    the node with cuid N must be come after the node pointed to by
    3045                 :            :    PS_I when scheduled in the same cycle.  */
    3046                 :            : static int
    3047                 :          0 : ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
    3048                 :            :                         sbitmap must_follow)
    3049                 :            : {
    3050                 :          0 :   ps_insn_ptr prev, next;
    3051                 :          0 :   int row;
    3052                 :            : 
    3053                 :          0 :   if (!ps || !ps_i)
    3054                 :            :     return false;
    3055                 :            : 
    3056                 :          0 :   row = SMODULO (ps_i->cycle, ps->ii);
    3057                 :            : 
    3058                 :          0 :   if (! ps_i->next_in_row)
    3059                 :            :     return false;
    3060                 :            : 
    3061                 :            :   /* Check if next_in_row is dependent on ps_i, both having same sched
    3062                 :            :      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
    3063                 :          0 :   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
    3064                 :            :     return false;
    3065                 :            : 
    3066                 :            :   /* Advance PS_I over its next_in_row in the doubly linked list.  */
    3067                 :          0 :   prev = ps_i->prev_in_row;
    3068                 :          0 :   next = ps_i->next_in_row;
    3069                 :            : 
    3070                 :          0 :   if (ps_i == ps->rows[row])
    3071                 :          0 :     ps->rows[row] = next;
    3072                 :            : 
    3073                 :          0 :   ps_i->next_in_row = next->next_in_row;
    3074                 :            : 
    3075                 :          0 :   if (next->next_in_row)
    3076                 :          0 :     next->next_in_row->prev_in_row = ps_i;
    3077                 :            : 
    3078                 :          0 :   next->next_in_row = ps_i;
    3079                 :          0 :   ps_i->prev_in_row = next;
    3080                 :            : 
    3081                 :          0 :   next->prev_in_row = prev;
    3082                 :          0 :   if (prev)
    3083                 :          0 :     prev->next_in_row = next;
    3084                 :            : 
    3085                 :            :   return true;
    3086                 :            : }
    3087                 :            : 
    3088                 :            : /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
    3089                 :            :    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
    3090                 :            :    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
    3091                 :            :    before/after (respectively) the node pointed to by PS_I when scheduled
    3092                 :            :    in the same cycle.  */
    3093                 :            : static ps_insn_ptr
    3094                 :          0 : add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
    3095                 :            :                 sbitmap must_precede, sbitmap must_follow)
    3096                 :            : {
    3097                 :          0 :   ps_insn_ptr ps_i;
    3098                 :          0 :   int row = SMODULO (cycle, ps->ii);
    3099                 :            : 
    3100                 :          0 :   if (ps->rows_length[row] >= issue_rate)
    3101                 :            :     return NULL;
    3102                 :            : 
    3103                 :          0 :   ps_i = create_ps_insn (id, cycle);
    3104                 :            : 
    3105                 :            :   /* Finds and inserts PS_I according to MUST_FOLLOW and
    3106                 :            :      MUST_PRECEDE.  */
    3107                 :          0 :   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
    3108                 :            :     {
    3109                 :          0 :       free (ps_i);
    3110                 :          0 :       return NULL;
    3111                 :            :     }
    3112                 :            : 
    3113                 :          0 :   ps->rows_length[row] += 1;
    3114                 :          0 :   return ps_i;
    3115                 :            : }
    3116                 :            : 
    3117                 :            : /* Advance time one cycle.  Assumes DFA is being used.  */
    3118                 :            : static void
    3119                 :          0 : advance_one_cycle (void)
    3120                 :            : {
    3121                 :          0 :   if (targetm.sched.dfa_pre_cycle_insn)
    3122                 :          0 :     state_transition (curr_state,
    3123                 :            :                       targetm.sched.dfa_pre_cycle_insn ());
    3124                 :            : 
    3125                 :          0 :   state_transition (curr_state, NULL);
    3126                 :            : 
    3127                 :          0 :   if (targetm.sched.dfa_post_cycle_insn)
    3128                 :          0 :     state_transition (curr_state,
    3129                 :          0 :                       targetm.sched.dfa_post_cycle_insn ());
    3130                 :          0 : }
    3131                 :            : 
    3132                 :            : 
    3133                 :            : 
    3134                 :            : /* Checks if PS has resource conflicts according to DFA, starting from
    3135                 :            :    FROM cycle to TO cycle; returns true if there are conflicts and false
    3136                 :            :    if there are no conflicts.  Assumes DFA is being used.  */
    3137                 :            : static int
    3138                 :          0 : ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
    3139                 :            : {
    3140                 :          0 :   int cycle;
    3141                 :            : 
    3142                 :          0 :   state_reset (curr_state);
    3143                 :            : 
    3144                 :          0 :   for (cycle = from; cycle <= to; cycle++)
    3145                 :            :     {
    3146                 :          0 :       ps_insn_ptr crr_insn;
    3147                 :            :       /* Holds the remaining issue slots in the current row.  */
    3148                 :          0 :       int can_issue_more = issue_rate;
    3149                 :            : 
    3150                 :            :       /* Walk through the DFA for the current row.  */
    3151                 :          0 :       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
    3152                 :          0 :            crr_insn;
    3153                 :          0 :            crr_insn = crr_insn->next_in_row)
    3154                 :            :         {
    3155                 :          0 :           rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
    3156                 :            : 
    3157                 :            :           /* Check if there is room for the current insn.  */
    3158                 :          0 :           if (!can_issue_more || state_dead_lock_p (curr_state))
    3159                 :          0 :             return true;
    3160                 :            : 
    3161                 :            :           /* Update the DFA state and return with failure if the DFA found
    3162                 :            :              resource conflicts.  */
    3163                 :          0 :           if (state_transition (curr_state, insn) >= 0)
    3164                 :            :             return true;
    3165                 :            : 
    3166                 :          0 :           if (targetm.sched.variable_issue)
    3167                 :          0 :             can_issue_more =
    3168                 :          0 :               targetm.sched.variable_issue (sched_dump, sched_verbose,
    3169                 :            :                                             insn, can_issue_more);
    3170                 :            :           /* A naked CLOBBER or USE generates no instruction, so don't
    3171                 :            :              let them consume issue slots.  */
    3172                 :          0 :           else if (GET_CODE (PATTERN (insn)) != USE
    3173                 :          0 :                    && GET_CODE (PATTERN (insn)) != CLOBBER)
    3174                 :          0 :             can_issue_more--;
    3175                 :            :         }
    3176                 :            : 
    3177                 :            :       /* Advance the DFA to the next cycle.  */
    3178                 :          0 :       advance_one_cycle ();
    3179                 :            :     }
    3180                 :            :   return false;
    3181                 :            : }
    3182                 :            : 
    3183                 :            : /* Checks if the given node causes resource conflicts when added to PS at
    3184                 :            :    cycle C.  If not the node is added to PS and returned; otherwise zero
    3185                 :            :    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
    3186                 :            :    cuid N must be come before/after (respectively) the node pointed to by
    3187                 :            :    PS_I when scheduled in the same cycle.  */
    3188                 :            : ps_insn_ptr
    3189                 :          0 : ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
    3190                 :            :                              int c, sbitmap must_precede,
    3191                 :            :                              sbitmap must_follow)
    3192                 :            : {
    3193                 :          0 :   int i, first, amount, has_conflicts = 0;
    3194                 :          0 :   ps_insn_ptr ps_i;
    3195                 :            : 
    3196                 :            :   /* First add the node to the PS, if this succeeds check for
    3197                 :            :      conflicts, trying different issue slots in the same row.  */
    3198                 :          0 :   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
    3199                 :            :     return NULL; /* Failed to insert the node at the given cycle.  */
    3200                 :            : 
    3201                 :          0 :   while (1)
    3202                 :            :     {
    3203                 :          0 :       has_conflicts = ps_has_conflicts (ps, c, c);
    3204                 :          0 :       if (ps->history > 0 && !has_conflicts)
    3205                 :            :         {
    3206                 :            :           /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
    3207                 :            :              but not more than ii intervals.  */
    3208                 :          0 :           first = c - ps->history;
    3209                 :          0 :           amount = 2 * ps->history + 1;
    3210                 :          0 :           if (amount > ps->ii)
    3211                 :            :             amount = ps->ii;
    3212                 :          0 :           for (i = first; i < first + amount; i++)
    3213                 :            :             {
    3214                 :          0 :               has_conflicts = ps_has_conflicts (ps,
    3215                 :            :                                                 i - ps->history,
    3216                 :          0 :                                                 i + ps->history);
    3217                 :          0 :               if (has_conflicts)
    3218                 :            :                 break;
    3219                 :            :             }
    3220                 :            :         }
    3221                 :          0 :       if (!has_conflicts)
    3222                 :            :         break;
    3223                 :            :       /* Try different issue slots to find one that the given node can be
    3224                 :            :          scheduled in without conflicts.  */
    3225                 :          0 :       if (! ps_insn_advance_column (ps, ps_i, must_follow))
    3226                 :            :         break;
    3227                 :            :     }
    3228                 :            : 
    3229                 :          0 :   if (has_conflicts)
    3230                 :            :     {
    3231                 :          0 :       remove_node_from_ps (ps, ps_i);
    3232                 :          0 :       return NULL;
    3233                 :            :     }
    3234                 :            : 
    3235                 :          0 :   ps->min_cycle = MIN (ps->min_cycle, c);
    3236                 :          0 :   ps->max_cycle = MAX (ps->max_cycle, c);
    3237                 :          0 :   return ps_i;
    3238                 :            : }
    3239                 :            : 
    3240                 :            : /* Calculate the stage count of the partial schedule PS.  The calculation
    3241                 :            :    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
    3242                 :            : int
    3243                 :          0 : calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
    3244                 :            : {
    3245                 :          0 :   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
    3246                 :          0 :   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
    3247                 :          0 :   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
    3248                 :            : 
    3249                 :            :   /* The calculation of stage count is done adding the number of stages
    3250                 :            :      before cycle zero and after cycle zero.  */ 
    3251                 :          0 :   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
    3252                 :            : 
    3253                 :          0 :   return stage_count;
    3254                 :            : }
    3255                 :            : 
    3256                 :            : /* Rotate the rows of PS such that insns scheduled at time
    3257                 :            :    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
    3258                 :            : void
    3259                 :          0 : rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
    3260                 :            : {
    3261                 :          0 :   int i, row, backward_rotates;
    3262                 :          0 :   int last_row = ps->ii - 1;
    3263                 :            : 
    3264                 :          0 :   if (start_cycle == 0)
    3265                 :            :     return;
    3266                 :            : 
    3267                 :          0 :   backward_rotates = SMODULO (start_cycle, ps->ii);
    3268                 :            : 
    3269                 :            :   /* Revisit later and optimize this into a single loop.  */
    3270                 :          0 :   for (i = 0; i < backward_rotates; i++)
    3271                 :            :     {
    3272                 :          0 :       ps_insn_ptr first_row = ps->rows[0];
    3273                 :          0 :       int first_row_length = ps->rows_length[0];
    3274                 :            : 
    3275                 :          0 :       for (row = 0; row < last_row; row++)
    3276                 :            :         {
    3277                 :          0 :           ps->rows[row] = ps->rows[row + 1];
    3278                 :          0 :           ps->rows_length[row] = ps->rows_length[row + 1]; 
    3279                 :            :         }
    3280                 :            : 
    3281                 :          0 :       ps->rows[last_row] = first_row;
    3282                 :          0 :       ps->rows_length[last_row] = first_row_length;
    3283                 :            :     }
    3284                 :            : 
    3285                 :          0 :   ps->max_cycle -= start_cycle;
    3286                 :          0 :   ps->min_cycle -= start_cycle;
    3287                 :            : }
    3288                 :            : 
    3289                 :            : #endif /* INSN_SCHEDULING */
    3290                 :            : 
    3291                 :            : /* Run instruction scheduler.  */
    3292                 :            : /* Perform SMS module scheduling.  */
    3293                 :            : 
    3294                 :            : namespace {
    3295                 :            : 
    3296                 :            : const pass_data pass_data_sms =
    3297                 :            : {
    3298                 :            :   RTL_PASS, /* type */
    3299                 :            :   "sms", /* name */
    3300                 :            :   OPTGROUP_NONE, /* optinfo_flags */
    3301                 :            :   TV_SMS, /* tv_id */
    3302                 :            :   0, /* properties_required */
    3303                 :            :   0, /* properties_provided */
    3304                 :            :   0, /* properties_destroyed */
    3305                 :            :   0, /* todo_flags_start */
    3306                 :            :   TODO_df_finish, /* todo_flags_finish */
    3307                 :            : };
    3308                 :            : 
    3309                 :            : class pass_sms : public rtl_opt_pass
    3310                 :            : {
    3311                 :            : public:
    3312                 :     200773 :   pass_sms (gcc::context *ctxt)
    3313                 :     401546 :     : rtl_opt_pass (pass_data_sms, ctxt)
    3314                 :            :   {}
    3315                 :            : 
    3316                 :            :   /* opt_pass methods: */
    3317                 :     944101 :   virtual bool gate (function *)
    3318                 :            : {
    3319                 :     944101 :   return (optimize > 0 && flag_modulo_sched);
    3320                 :            : }
    3321                 :            : 
    3322                 :            :   virtual unsigned int execute (function *);
    3323                 :            : 
    3324                 :            : }; // class pass_sms
    3325                 :            : 
    3326                 :            : unsigned int
    3327                 :        162 : pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
    3328                 :            : {
    3329                 :            : #ifdef INSN_SCHEDULING
    3330                 :        162 :   basic_block bb;
    3331                 :            : 
    3332                 :            :   /* Collect loop information to be used in SMS.  */
    3333                 :        162 :   cfg_layout_initialize (0);
    3334                 :        162 :   sms_schedule ();
    3335                 :            : 
    3336                 :            :   /* Update the life information, because we add pseudos.  */
    3337                 :        162 :   max_regno = max_reg_num ();
    3338                 :            : 
    3339                 :            :   /* Finalize layout changes.  */
    3340                 :       1112 :   FOR_EACH_BB_FN (bb, fun)
    3341                 :        950 :     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
    3342                 :        788 :       bb->aux = bb->next_bb;
    3343                 :        162 :   free_dominance_info (CDI_DOMINATORS);
    3344                 :        162 :   cfg_layout_finalize ();
    3345                 :            : #endif /* INSN_SCHEDULING */
    3346                 :        162 :   return 0;
    3347                 :            : }
    3348                 :            : 
    3349                 :            : } // anon namespace
    3350                 :            : 
    3351                 :            : rtl_opt_pass *
    3352                 :     200773 : make_pass_sms (gcc::context *ctxt)
    3353                 :            : {
    3354                 :     200773 :   return new pass_sms (ctxt);
    3355                 :            : }

Generated by: LCOV version 1.0

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto --enable-host-shared. GCC test suite is run with the built compiler.