| 1 | // Copyright (c) 2018 Google LLC. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef SOURCE_OPT_LOOP_PEELING_H_ |
| 16 | #define SOURCE_OPT_LOOP_PEELING_H_ |
| 17 | |
| 18 | #include <algorithm> |
| 19 | #include <limits> |
| 20 | #include <memory> |
| 21 | #include <tuple> |
| 22 | #include <unordered_map> |
| 23 | #include <unordered_set> |
| 24 | #include <utility> |
| 25 | #include <vector> |
| 26 | |
| 27 | #include "source/opt/ir_context.h" |
| 28 | #include "source/opt/loop_descriptor.h" |
| 29 | #include "source/opt/loop_utils.h" |
| 30 | #include "source/opt/pass.h" |
| 31 | #include "source/opt/scalar_analysis.h" |
| 32 | |
| 33 | namespace spvtools { |
| 34 | namespace opt { |
| 35 | |
| 36 | // Utility class to perform the peeling of a given loop. |
| 37 | // The loop peeling transformation make a certain amount of a loop iterations to |
| 38 | // be executed either before (peel before) or after (peel after) the transformed |
| 39 | // loop. |
| 40 | // |
| 41 | // For peeling cases the transformation does the following steps: |
| 42 | // - It clones the loop and inserts the cloned loop before the original loop; |
| 43 | // - It connects all iterating values of the cloned loop with the |
| 44 | // corresponding original loop values so that the second loop starts with |
| 45 | // the appropriate values. |
| 46 | // - It inserts a new induction variable "i" is inserted into the cloned that |
| 47 | // starts with the value 0 and increment by step of one. |
| 48 | // |
| 49 | // The last step is specific to each case: |
| 50 | // - Peel before: the transformation is to peel the "N" first iterations. |
| 51 | // The exit condition of the cloned loop is changed so that the loop |
| 52 | // exits when "i < N" becomes false. The original loop is then protected to |
| 53 | // only execute if there is any iteration left to do. |
| 54 | // - Peel after: the transformation is to peel the "N" last iterations, |
| 55 | // then the exit condition of the cloned loop is changed so that the loop |
| 56 | // exits when "i + N < max_iteration" becomes false, where "max_iteration" |
| 57 | // is the upper bound of the loop. The cloned loop is then protected to |
| 58 | // only execute if there is any iteration left to do no covered by the |
| 59 | // second. |
| 60 | // |
| 61 | // To be peelable: |
| 62 | // - The loop must be in LCSSA form; |
| 63 | // - The loop must not contain any breaks; |
| 64 | // - The loop must not have any ambiguous iterators updates (see |
| 65 | // "CanPeelLoop"). |
| 66 | // The method "CanPeelLoop" checks that those constrained are met. |
| 67 | class LoopPeeling { |
| 68 | public: |
| 69 | // LoopPeeling constructor. |
| 70 | // |loop| is the loop to peel. |
| 71 | // |loop_iteration_count| is the instruction holding the |loop| iteration |
| 72 | // count, must be invariant for |loop| and must be of an int 32 type (signed |
| 73 | // or unsigned). |
| 74 | // |canonical_induction_variable| is an induction variable that can be used to |
| 75 | // count the number of iterations, must be of the same type as |
| 76 | // |loop_iteration_count| and start at 0 and increase by step of one at each |
| 77 | // iteration. The value nullptr is interpreted as no suitable variable exists |
| 78 | // and one will be created. |
| 79 | LoopPeeling(Loop* loop, Instruction* loop_iteration_count, |
| 80 | Instruction* canonical_induction_variable = nullptr) |
| 81 | : context_(loop->GetContext()), |
| 82 | loop_utils_(loop->GetContext(), loop), |
| 83 | loop_(loop), |
| 84 | loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) |
| 85 | ? loop_iteration_count |
| 86 | : nullptr), |
| 87 | int_type_(nullptr), |
| 88 | original_loop_canonical_induction_variable_( |
| 89 | canonical_induction_variable), |
| 90 | canonical_induction_variable_(nullptr) { |
| 91 | if (loop_iteration_count_) { |
| 92 | int_type_ = context_->get_type_mgr() |
| 93 | ->GetType(loop_iteration_count_->type_id()) |
| 94 | ->AsInteger(); |
| 95 | if (canonical_induction_variable_) { |
| 96 | assert(canonical_induction_variable_->type_id() == |
| 97 | loop_iteration_count_->type_id() && |
| 98 | "loop_iteration_count and canonical_induction_variable do not " |
| 99 | "have the same type" ); |
| 100 | } |
| 101 | } |
| 102 | GetIteratingExitValues(); |
| 103 | } |
| 104 | |
| 105 | // Returns true if the loop can be peeled. |
| 106 | // To be peelable, all operation involved in the update of the loop iterators |
| 107 | // must not dominates the exit condition. This restriction is a work around to |
| 108 | // not miss compile code like: |
| 109 | // |
| 110 | // for (int i = 0; i + 1 < N; i++) {} |
| 111 | // for (int i = 0; ++i < N; i++) {} |
| 112 | // |
| 113 | // The increment will happen before the test on the exit condition leading to |
| 114 | // very look-a-like code. |
| 115 | // |
| 116 | // This restriction will not apply if a loop rotate is applied before (i.e. |
| 117 | // becomes a do-while loop). |
| 118 | bool CanPeelLoop() const { |
| 119 | CFG& cfg = *context_->cfg(); |
| 120 | |
| 121 | if (!loop_iteration_count_) { |
| 122 | return false; |
| 123 | } |
| 124 | if (!int_type_) { |
| 125 | return false; |
| 126 | } |
| 127 | if (int_type_->width() != 32) { |
| 128 | return false; |
| 129 | } |
| 130 | if (!loop_->IsLCSSA()) { |
| 131 | return false; |
| 132 | } |
| 133 | if (!loop_->GetMergeBlock()) { |
| 134 | return false; |
| 135 | } |
| 136 | if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { |
| 137 | return false; |
| 138 | } |
| 139 | if (!IsConditionCheckSideEffectFree()) { |
| 140 | return false; |
| 141 | } |
| 142 | |
| 143 | return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), |
| 144 | [](std::pair<uint32_t, Instruction*> it) { |
| 145 | return it.second == nullptr; |
| 146 | }); |
| 147 | } |
| 148 | |
| 149 | // Moves the execution of the |factor| first iterations of the loop into a |
| 150 | // dedicated loop. |
| 151 | void PeelBefore(uint32_t factor); |
| 152 | |
| 153 | // Moves the execution of the |factor| last iterations of the loop into a |
| 154 | // dedicated loop. |
| 155 | void PeelAfter(uint32_t factor); |
| 156 | |
| 157 | // Returns the cloned loop. |
| 158 | Loop* GetClonedLoop() { return cloned_loop_; } |
| 159 | // Returns the original loop. |
| 160 | Loop* GetOriginalLoop() { return loop_; } |
| 161 | |
| 162 | private: |
| 163 | IRContext* context_; |
| 164 | LoopUtils loop_utils_; |
| 165 | // The original loop. |
| 166 | Loop* loop_; |
| 167 | // The initial |loop_| upper bound. |
| 168 | Instruction* loop_iteration_count_; |
| 169 | // The int type to use for the canonical_induction_variable_. |
| 170 | analysis::Integer* int_type_; |
| 171 | // The cloned loop. |
| 172 | Loop* cloned_loop_; |
| 173 | // This is set to true when the exit and back-edge branch instruction is the |
| 174 | // same. |
| 175 | bool do_while_form_; |
| 176 | // The canonical induction variable from the original loop if it exists. |
| 177 | Instruction* original_loop_canonical_induction_variable_; |
| 178 | // The canonical induction variable of the cloned loop. The induction variable |
| 179 | // is initialized to 0 and incremented by step of 1. |
| 180 | Instruction* canonical_induction_variable_; |
| 181 | // Map between loop iterators and exit values. Loop iterators |
| 182 | std::unordered_map<uint32_t, Instruction*> exit_value_; |
| 183 | |
| 184 | // Duplicate |loop_| and place the new loop before the cloned loop. Iterating |
| 185 | // values from the cloned loop are then connected to the original loop as |
| 186 | // initializer. |
| 187 | void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); |
| 188 | |
| 189 | // Insert the canonical induction variable into the first loop as a simplified |
| 190 | // counter. |
| 191 | void InsertCanonicalInductionVariable( |
| 192 | LoopUtils::LoopCloningResult* clone_results); |
| 193 | |
| 194 | // Fixes the exit condition of the before loop. The function calls |
| 195 | // |condition_builder| to get the condition to use in the conditional branch |
| 196 | // of the loop exit. The loop will be exited if the condition evaluate to |
| 197 | // true. |condition_builder| takes an Instruction* that represent the |
| 198 | // insertion point. |
| 199 | void FixExitCondition( |
| 200 | const std::function<uint32_t(Instruction*)>& condition_builder); |
| 201 | |
| 202 | // Gathers all operations involved in the update of |iterator| into |
| 203 | // |operations|. |
| 204 | void GetIteratorUpdateOperations( |
| 205 | const Loop* loop, Instruction* iterator, |
| 206 | std::unordered_set<Instruction*>* operations); |
| 207 | |
| 208 | // Gathers exiting iterator values. The function builds a map between each |
| 209 | // iterating value in the loop (a phi instruction in the loop header) and its |
| 210 | // SSA value when it exit the loop. If no exit value can be accurately found, |
| 211 | // it is map to nullptr (see comment on CanPeelLoop). |
| 212 | void GetIteratingExitValues(); |
| 213 | |
| 214 | // Returns true if a for-loop has no instruction with effects before the |
| 215 | // condition check. |
| 216 | bool IsConditionCheckSideEffectFree() const; |
| 217 | |
| 218 | // Creates a new basic block and insert it between |bb| and the predecessor of |
| 219 | // |bb|. |
| 220 | BasicBlock* CreateBlockBefore(BasicBlock* bb); |
| 221 | |
| 222 | // Inserts code to only execute |loop| only if the given |condition| is true. |
| 223 | // |if_merge| is a suitable basic block to be used by the if condition as |
| 224 | // merge block. |
| 225 | // The function returns the if block protecting the loop. |
| 226 | BasicBlock* ProtectLoop(Loop* loop, Instruction* condition, |
| 227 | BasicBlock* if_merge); |
| 228 | }; |
| 229 | |
| 230 | // Implements a loop peeling optimization. |
| 231 | // For each loop, the pass will try to peel it if there is conditions that |
| 232 | // are true for the "N" first or last iterations of the loop. |
| 233 | // To avoid code size explosion, too large loops will not be peeled. |
| 234 | class LoopPeelingPass : public Pass { |
| 235 | public: |
| 236 | // Describes the peeling direction. |
| 237 | enum class PeelDirection { |
| 238 | kNone, // Cannot peel |
| 239 | kBefore, // Can peel before |
| 240 | kAfter // Can peel last |
| 241 | }; |
| 242 | |
| 243 | // Holds some statistics about peeled function. |
| 244 | struct LoopPeelingStats { |
| 245 | std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_; |
| 246 | }; |
| 247 | |
| 248 | LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} |
| 249 | |
| 250 | // Sets the loop peeling growth threshold. If the code size increase is above |
| 251 | // |code_grow_threshold|, the loop will not be peeled. The code size is |
| 252 | // measured in terms of SPIR-V instructions. |
| 253 | static void SetLoopPeelingThreshold(size_t code_grow_threshold) { |
| 254 | code_grow_threshold_ = code_grow_threshold; |
| 255 | } |
| 256 | |
| 257 | // Returns the loop peeling code growth threshold. |
| 258 | static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } |
| 259 | |
| 260 | const char* name() const override { return "loop-peeling" ; } |
| 261 | |
| 262 | // Processes the given |module|. Returns Status::Failure if errors occur when |
| 263 | // processing. Returns the corresponding Status::Success if processing is |
| 264 | // succesful to indicate whether changes have been made to the modue. |
| 265 | Pass::Status Process() override; |
| 266 | |
| 267 | private: |
| 268 | // Describes the peeling direction. |
| 269 | enum class CmpOperator { |
| 270 | kLT, // less than |
| 271 | kGT, // greater than |
| 272 | kLE, // less than or equal |
| 273 | kGE, // greater than or equal |
| 274 | }; |
| 275 | |
| 276 | class LoopPeelingInfo { |
| 277 | public: |
| 278 | using Direction = std::pair<PeelDirection, uint32_t>; |
| 279 | |
| 280 | LoopPeelingInfo(Loop* loop, size_t loop_max_iterations, |
| 281 | ScalarEvolutionAnalysis* scev_analysis) |
| 282 | : context_(loop->GetContext()), |
| 283 | loop_(loop), |
| 284 | scev_analysis_(scev_analysis), |
| 285 | loop_max_iterations_(loop_max_iterations) {} |
| 286 | |
| 287 | // Returns by how much and to which direction a loop should be peeled to |
| 288 | // make the conditional branch of the basic block |bb| an unconditional |
| 289 | // branch. If |bb|'s terminator is not a conditional branch or the condition |
| 290 | // is not workable then it returns PeelDirection::kNone and a 0 factor. |
| 291 | Direction GetPeelingInfo(BasicBlock* bb) const; |
| 292 | |
| 293 | private: |
| 294 | // Returns the id of the loop invariant operand of the conditional |
| 295 | // expression |condition|. It returns if no operand is invariant. |
| 296 | uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const; |
| 297 | // Returns the id of the non loop invariant operand of the conditional |
| 298 | // expression |condition|. It returns if all operands are invariant. |
| 299 | uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const; |
| 300 | |
| 301 | // Returns the value of |rec| at the first loop iteration. |
| 302 | SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; |
| 303 | // Returns the value of |rec| at the given |iteration|. |
| 304 | SExpression GetValueAtIteration(SERecurrentNode* rec, |
| 305 | int64_t iteration) const; |
| 306 | // Returns the value of |rec| at the last loop iteration. |
| 307 | SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; |
| 308 | |
| 309 | bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, |
| 310 | bool* result) const; |
| 311 | |
| 312 | Direction HandleEquality(SExpression lhs, SExpression rhs) const; |
| 313 | Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, |
| 314 | SERecurrentNode* rhs) const; |
| 315 | |
| 316 | static Direction GetNoneDirection() { |
| 317 | return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; |
| 318 | } |
| 319 | IRContext* context_; |
| 320 | Loop* loop_; |
| 321 | ScalarEvolutionAnalysis* scev_analysis_; |
| 322 | size_t loop_max_iterations_; |
| 323 | }; |
| 324 | // Peel profitable loops in |f|. |
| 325 | bool ProcessFunction(Function* f); |
| 326 | // Peel |loop| if profitable. |
| 327 | std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size); |
| 328 | |
| 329 | static size_t code_grow_threshold_; |
| 330 | LoopPeelingStats* stats_; |
| 331 | }; |
| 332 | |
| 333 | } // namespace opt |
| 334 | } // namespace spvtools |
| 335 | |
| 336 | #endif // SOURCE_OPT_LOOP_PEELING_H_ |
| 337 | |