| 1 | // Copyright (c) 2018 Google LLC. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #ifndef SOURCE_OPT_LOOP_PEELING_H_ | 
|---|
| 16 | #define SOURCE_OPT_LOOP_PEELING_H_ | 
|---|
| 17 |  | 
|---|
| 18 | #include <algorithm> | 
|---|
| 19 | #include <limits> | 
|---|
| 20 | #include <memory> | 
|---|
| 21 | #include <tuple> | 
|---|
| 22 | #include <unordered_map> | 
|---|
| 23 | #include <unordered_set> | 
|---|
| 24 | #include <utility> | 
|---|
| 25 | #include <vector> | 
|---|
| 26 |  | 
|---|
| 27 | #include "source/opt/ir_context.h" | 
|---|
| 28 | #include "source/opt/loop_descriptor.h" | 
|---|
| 29 | #include "source/opt/loop_utils.h" | 
|---|
| 30 | #include "source/opt/pass.h" | 
|---|
| 31 | #include "source/opt/scalar_analysis.h" | 
|---|
| 32 |  | 
|---|
| 33 | namespace spvtools { | 
|---|
| 34 | namespace opt { | 
|---|
| 35 |  | 
|---|
| 36 | // Utility class to perform the peeling of a given loop. | 
|---|
| 37 | // The loop peeling transformation make a certain amount of a loop iterations to | 
|---|
| 38 | // be executed either before (peel before) or after (peel after) the transformed | 
|---|
| 39 | // loop. | 
|---|
| 40 | // | 
|---|
| 41 | // For peeling cases the transformation does the following steps: | 
|---|
| 42 | //   - It clones the loop and inserts the cloned loop before the original loop; | 
|---|
| 43 | //   - It connects all iterating values of the cloned loop with the | 
|---|
| 44 | //     corresponding original loop values so that the second loop starts with | 
|---|
| 45 | //     the appropriate values. | 
|---|
| 46 | //   - It inserts a new induction variable "i" is inserted into the cloned that | 
|---|
| 47 | //     starts with the value 0 and increment by step of one. | 
|---|
| 48 | // | 
|---|
| 49 | // The last step is specific to each case: | 
|---|
| 50 | //   - Peel before: the transformation is to peel the "N" first iterations. | 
|---|
| 51 | //     The exit condition of the cloned loop is changed so that the loop | 
|---|
| 52 | //     exits when "i < N" becomes false. The original loop is then protected to | 
|---|
| 53 | //     only execute if there is any iteration left to do. | 
|---|
| 54 | //   - Peel after: the transformation is to peel the "N" last iterations, | 
|---|
| 55 | //     then the exit condition of the cloned loop is changed so that the loop | 
|---|
| 56 | //     exits when "i + N < max_iteration" becomes false, where "max_iteration" | 
|---|
| 57 | //     is the upper bound of the loop. The cloned loop is then protected to | 
|---|
| 58 | //     only execute if there is any iteration left to do no covered by the | 
|---|
| 59 | //     second. | 
|---|
| 60 | // | 
|---|
| 61 | // To be peelable: | 
|---|
| 62 | //   - The loop must be in LCSSA form; | 
|---|
| 63 | //   - The loop must not contain any breaks; | 
|---|
| 64 | //   - The loop must not have any ambiguous iterators updates (see | 
|---|
| 65 | //     "CanPeelLoop"). | 
|---|
| 66 | // The method "CanPeelLoop" checks that those constrained are met. | 
|---|
| 67 | class LoopPeeling { | 
|---|
| 68 | public: | 
|---|
| 69 | // LoopPeeling constructor. | 
|---|
| 70 | // |loop| is the loop to peel. | 
|---|
| 71 | // |loop_iteration_count| is the instruction holding the |loop| iteration | 
|---|
| 72 | // count, must be invariant for |loop| and must be of an int 32 type (signed | 
|---|
| 73 | // or unsigned). | 
|---|
| 74 | // |canonical_induction_variable| is an induction variable that can be used to | 
|---|
| 75 | // count the number of iterations, must be of the same type as | 
|---|
| 76 | // |loop_iteration_count| and start at 0 and increase by step of one at each | 
|---|
| 77 | // iteration. The value nullptr is interpreted as no suitable variable exists | 
|---|
| 78 | // and one will be created. | 
|---|
| 79 | LoopPeeling(Loop* loop, Instruction* loop_iteration_count, | 
|---|
| 80 | Instruction* canonical_induction_variable = nullptr) | 
|---|
| 81 | : context_(loop->GetContext()), | 
|---|
| 82 | loop_utils_(loop->GetContext(), loop), | 
|---|
| 83 | loop_(loop), | 
|---|
| 84 | loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) | 
|---|
| 85 | ? loop_iteration_count | 
|---|
| 86 | : nullptr), | 
|---|
| 87 | int_type_(nullptr), | 
|---|
| 88 | original_loop_canonical_induction_variable_( | 
|---|
| 89 | canonical_induction_variable), | 
|---|
| 90 | canonical_induction_variable_(nullptr) { | 
|---|
| 91 | if (loop_iteration_count_) { | 
|---|
| 92 | int_type_ = context_->get_type_mgr() | 
|---|
| 93 | ->GetType(loop_iteration_count_->type_id()) | 
|---|
| 94 | ->AsInteger(); | 
|---|
| 95 | if (canonical_induction_variable_) { | 
|---|
| 96 | assert(canonical_induction_variable_->type_id() == | 
|---|
| 97 | loop_iteration_count_->type_id() && | 
|---|
| 98 | "loop_iteration_count and canonical_induction_variable do not " | 
|---|
| 99 | "have the same type"); | 
|---|
| 100 | } | 
|---|
| 101 | } | 
|---|
| 102 | GetIteratingExitValues(); | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | // Returns true if the loop can be peeled. | 
|---|
| 106 | // To be peelable, all operation involved in the update of the loop iterators | 
|---|
| 107 | // must not dominates the exit condition. This restriction is a work around to | 
|---|
| 108 | // not miss compile code like: | 
|---|
| 109 | // | 
|---|
| 110 | //   for (int i = 0; i + 1 < N; i++) {} | 
|---|
| 111 | //   for (int i = 0; ++i < N; i++) {} | 
|---|
| 112 | // | 
|---|
| 113 | // The increment will happen before the test on the exit condition leading to | 
|---|
| 114 | // very look-a-like code. | 
|---|
| 115 | // | 
|---|
| 116 | // This restriction will not apply if a loop rotate is applied before (i.e. | 
|---|
| 117 | // becomes a do-while loop). | 
|---|
| 118 | bool CanPeelLoop() const { | 
|---|
| 119 | CFG& cfg = *context_->cfg(); | 
|---|
| 120 |  | 
|---|
| 121 | if (!loop_iteration_count_) { | 
|---|
| 122 | return false; | 
|---|
| 123 | } | 
|---|
| 124 | if (!int_type_) { | 
|---|
| 125 | return false; | 
|---|
| 126 | } | 
|---|
| 127 | if (int_type_->width() != 32) { | 
|---|
| 128 | return false; | 
|---|
| 129 | } | 
|---|
| 130 | if (!loop_->IsLCSSA()) { | 
|---|
| 131 | return false; | 
|---|
| 132 | } | 
|---|
| 133 | if (!loop_->GetMergeBlock()) { | 
|---|
| 134 | return false; | 
|---|
| 135 | } | 
|---|
| 136 | if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { | 
|---|
| 137 | return false; | 
|---|
| 138 | } | 
|---|
| 139 | if (!IsConditionCheckSideEffectFree()) { | 
|---|
| 140 | return false; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), | 
|---|
| 144 | [](std::pair<uint32_t, Instruction*> it) { | 
|---|
| 145 | return it.second == nullptr; | 
|---|
| 146 | }); | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | // Moves the execution of the |factor| first iterations of the loop into a | 
|---|
| 150 | // dedicated loop. | 
|---|
| 151 | void PeelBefore(uint32_t factor); | 
|---|
| 152 |  | 
|---|
| 153 | // Moves the execution of the |factor| last iterations of the loop into a | 
|---|
| 154 | // dedicated loop. | 
|---|
| 155 | void PeelAfter(uint32_t factor); | 
|---|
| 156 |  | 
|---|
| 157 | // Returns the cloned loop. | 
|---|
| 158 | Loop* GetClonedLoop() { return cloned_loop_; } | 
|---|
| 159 | // Returns the original loop. | 
|---|
| 160 | Loop* GetOriginalLoop() { return loop_; } | 
|---|
| 161 |  | 
|---|
| 162 | private: | 
|---|
| 163 | IRContext* context_; | 
|---|
| 164 | LoopUtils loop_utils_; | 
|---|
| 165 | // The original loop. | 
|---|
| 166 | Loop* loop_; | 
|---|
| 167 | // The initial |loop_| upper bound. | 
|---|
| 168 | Instruction* loop_iteration_count_; | 
|---|
| 169 | // The int type to use for the canonical_induction_variable_. | 
|---|
| 170 | analysis::Integer* int_type_; | 
|---|
| 171 | // The cloned loop. | 
|---|
| 172 | Loop* cloned_loop_; | 
|---|
| 173 | // This is set to true when the exit and back-edge branch instruction is the | 
|---|
| 174 | // same. | 
|---|
| 175 | bool do_while_form_; | 
|---|
| 176 | // The canonical induction variable from the original loop if it exists. | 
|---|
| 177 | Instruction* original_loop_canonical_induction_variable_; | 
|---|
| 178 | // The canonical induction variable of the cloned loop. The induction variable | 
|---|
| 179 | // is initialized to 0 and incremented by step of 1. | 
|---|
| 180 | Instruction* canonical_induction_variable_; | 
|---|
| 181 | // Map between loop iterators and exit values. Loop iterators | 
|---|
| 182 | std::unordered_map<uint32_t, Instruction*> exit_value_; | 
|---|
| 183 |  | 
|---|
| 184 | // Duplicate |loop_| and place the new loop before the cloned loop. Iterating | 
|---|
| 185 | // values from the cloned loop are then connected to the original loop as | 
|---|
| 186 | // initializer. | 
|---|
| 187 | void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); | 
|---|
| 188 |  | 
|---|
| 189 | // Insert the canonical induction variable into the first loop as a simplified | 
|---|
| 190 | // counter. | 
|---|
| 191 | void InsertCanonicalInductionVariable( | 
|---|
| 192 | LoopUtils::LoopCloningResult* clone_results); | 
|---|
| 193 |  | 
|---|
| 194 | // Fixes the exit condition of the before loop. The function calls | 
|---|
| 195 | // |condition_builder| to get the condition to use in the conditional branch | 
|---|
| 196 | // of the loop exit. The loop will be exited if the condition evaluate to | 
|---|
| 197 | // true. |condition_builder| takes an Instruction* that represent the | 
|---|
| 198 | // insertion point. | 
|---|
| 199 | void FixExitCondition( | 
|---|
| 200 | const std::function<uint32_t(Instruction*)>& condition_builder); | 
|---|
| 201 |  | 
|---|
| 202 | // Gathers all operations involved in the update of |iterator| into | 
|---|
| 203 | // |operations|. | 
|---|
| 204 | void GetIteratorUpdateOperations( | 
|---|
| 205 | const Loop* loop, Instruction* iterator, | 
|---|
| 206 | std::unordered_set<Instruction*>* operations); | 
|---|
| 207 |  | 
|---|
| 208 | // Gathers exiting iterator values. The function builds a map between each | 
|---|
| 209 | // iterating value in the loop (a phi instruction in the loop header) and its | 
|---|
| 210 | // SSA value when it exit the loop. If no exit value can be accurately found, | 
|---|
| 211 | // it is map to nullptr (see comment on CanPeelLoop). | 
|---|
| 212 | void GetIteratingExitValues(); | 
|---|
| 213 |  | 
|---|
| 214 | // Returns true if a for-loop has no instruction with effects before the | 
|---|
| 215 | // condition check. | 
|---|
| 216 | bool IsConditionCheckSideEffectFree() const; | 
|---|
| 217 |  | 
|---|
| 218 | // Creates a new basic block and insert it between |bb| and the predecessor of | 
|---|
| 219 | // |bb|. | 
|---|
| 220 | BasicBlock* CreateBlockBefore(BasicBlock* bb); | 
|---|
| 221 |  | 
|---|
| 222 | // Inserts code to only execute |loop| only if the given |condition| is true. | 
|---|
| 223 | // |if_merge| is a suitable basic block to be used by the if condition as | 
|---|
| 224 | // merge block. | 
|---|
| 225 | // The function returns the if block protecting the loop. | 
|---|
| 226 | BasicBlock* ProtectLoop(Loop* loop, Instruction* condition, | 
|---|
| 227 | BasicBlock* if_merge); | 
|---|
| 228 | }; | 
|---|
| 229 |  | 
|---|
| 230 | // Implements a loop peeling optimization. | 
|---|
| 231 | // For each loop, the pass will try to peel it if there is conditions that | 
|---|
| 232 | // are true for the "N" first or last iterations of the loop. | 
|---|
| 233 | // To avoid code size explosion, too large loops will not be peeled. | 
|---|
| 234 | class LoopPeelingPass : public Pass { | 
|---|
| 235 | public: | 
|---|
| 236 | // Describes the peeling direction. | 
|---|
| 237 | enum class PeelDirection { | 
|---|
| 238 | kNone,    // Cannot peel | 
|---|
| 239 | kBefore,  // Can peel before | 
|---|
| 240 | kAfter    // Can peel last | 
|---|
| 241 | }; | 
|---|
| 242 |  | 
|---|
| 243 | // Holds some statistics about peeled function. | 
|---|
| 244 | struct LoopPeelingStats { | 
|---|
| 245 | std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_; | 
|---|
| 246 | }; | 
|---|
| 247 |  | 
|---|
| 248 | LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} | 
|---|
| 249 |  | 
|---|
| 250 | // Sets the loop peeling growth threshold. If the code size increase is above | 
|---|
| 251 | // |code_grow_threshold|, the loop will not be peeled. The code size is | 
|---|
| 252 | // measured in terms of SPIR-V instructions. | 
|---|
| 253 | static void SetLoopPeelingThreshold(size_t code_grow_threshold) { | 
|---|
| 254 | code_grow_threshold_ = code_grow_threshold; | 
|---|
| 255 | } | 
|---|
| 256 |  | 
|---|
| 257 | // Returns the loop peeling code growth threshold. | 
|---|
| 258 | static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } | 
|---|
| 259 |  | 
|---|
| 260 | const char* name() const override { return "loop-peeling"; } | 
|---|
| 261 |  | 
|---|
| 262 | // Processes the given |module|. Returns Status::Failure if errors occur when | 
|---|
| 263 | // processing. Returns the corresponding Status::Success if processing is | 
|---|
| 264 | // succesful to indicate whether changes have been made to the modue. | 
|---|
| 265 | Pass::Status Process() override; | 
|---|
| 266 |  | 
|---|
| 267 | private: | 
|---|
| 268 | // Describes the peeling direction. | 
|---|
| 269 | enum class CmpOperator { | 
|---|
| 270 | kLT,  // less than | 
|---|
| 271 | kGT,  // greater than | 
|---|
| 272 | kLE,  // less than or equal | 
|---|
| 273 | kGE,  // greater than or equal | 
|---|
| 274 | }; | 
|---|
| 275 |  | 
|---|
| 276 | class LoopPeelingInfo { | 
|---|
| 277 | public: | 
|---|
| 278 | using Direction = std::pair<PeelDirection, uint32_t>; | 
|---|
| 279 |  | 
|---|
| 280 | LoopPeelingInfo(Loop* loop, size_t loop_max_iterations, | 
|---|
| 281 | ScalarEvolutionAnalysis* scev_analysis) | 
|---|
| 282 | : context_(loop->GetContext()), | 
|---|
| 283 | loop_(loop), | 
|---|
| 284 | scev_analysis_(scev_analysis), | 
|---|
| 285 | loop_max_iterations_(loop_max_iterations) {} | 
|---|
| 286 |  | 
|---|
| 287 | // Returns by how much and to which direction a loop should be peeled to | 
|---|
| 288 | // make the conditional branch of the basic block |bb| an unconditional | 
|---|
| 289 | // branch. If |bb|'s terminator is not a conditional branch or the condition | 
|---|
| 290 | // is not workable then it returns PeelDirection::kNone and a 0 factor. | 
|---|
| 291 | Direction GetPeelingInfo(BasicBlock* bb) const; | 
|---|
| 292 |  | 
|---|
| 293 | private: | 
|---|
| 294 | // Returns the id of the loop invariant operand of the conditional | 
|---|
| 295 | // expression |condition|. It returns if no operand is invariant. | 
|---|
| 296 | uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const; | 
|---|
| 297 | // Returns the id of the non loop invariant operand of the conditional | 
|---|
| 298 | // expression |condition|. It returns if all operands are invariant. | 
|---|
| 299 | uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const; | 
|---|
| 300 |  | 
|---|
| 301 | // Returns the value of |rec| at the first loop iteration. | 
|---|
| 302 | SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; | 
|---|
| 303 | // Returns the value of |rec| at the given |iteration|. | 
|---|
| 304 | SExpression GetValueAtIteration(SERecurrentNode* rec, | 
|---|
| 305 | int64_t iteration) const; | 
|---|
| 306 | // Returns the value of |rec| at the last loop iteration. | 
|---|
| 307 | SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; | 
|---|
| 308 |  | 
|---|
| 309 | bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, | 
|---|
| 310 | bool* result) const; | 
|---|
| 311 |  | 
|---|
| 312 | Direction HandleEquality(SExpression lhs, SExpression rhs) const; | 
|---|
| 313 | Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, | 
|---|
| 314 | SERecurrentNode* rhs) const; | 
|---|
| 315 |  | 
|---|
| 316 | static Direction GetNoneDirection() { | 
|---|
| 317 | return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; | 
|---|
| 318 | } | 
|---|
| 319 | IRContext* context_; | 
|---|
| 320 | Loop* loop_; | 
|---|
| 321 | ScalarEvolutionAnalysis* scev_analysis_; | 
|---|
| 322 | size_t loop_max_iterations_; | 
|---|
| 323 | }; | 
|---|
| 324 | // Peel profitable loops in |f|. | 
|---|
| 325 | bool ProcessFunction(Function* f); | 
|---|
| 326 | // Peel |loop| if profitable. | 
|---|
| 327 | std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size); | 
|---|
| 328 |  | 
|---|
| 329 | static size_t code_grow_threshold_; | 
|---|
| 330 | LoopPeelingStats* stats_; | 
|---|
| 331 | }; | 
|---|
| 332 |  | 
|---|
| 333 | }  // namespace opt | 
|---|
| 334 | }  // namespace spvtools | 
|---|
| 335 |  | 
|---|
| 336 | #endif  // SOURCE_OPT_LOOP_PEELING_H_ | 
|---|
| 337 |  | 
|---|