1 | // Copyright (c) 2018 Google LLC. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef SOURCE_OPT_LOOP_PEELING_H_ |
16 | #define SOURCE_OPT_LOOP_PEELING_H_ |
17 | |
18 | #include <algorithm> |
19 | #include <limits> |
20 | #include <memory> |
21 | #include <tuple> |
22 | #include <unordered_map> |
23 | #include <unordered_set> |
24 | #include <utility> |
25 | #include <vector> |
26 | |
27 | #include "source/opt/ir_context.h" |
28 | #include "source/opt/loop_descriptor.h" |
29 | #include "source/opt/loop_utils.h" |
30 | #include "source/opt/pass.h" |
31 | #include "source/opt/scalar_analysis.h" |
32 | |
33 | namespace spvtools { |
34 | namespace opt { |
35 | |
36 | // Utility class to perform the peeling of a given loop. |
37 | // The loop peeling transformation make a certain amount of a loop iterations to |
38 | // be executed either before (peel before) or after (peel after) the transformed |
39 | // loop. |
40 | // |
41 | // For peeling cases the transformation does the following steps: |
42 | // - It clones the loop and inserts the cloned loop before the original loop; |
43 | // - It connects all iterating values of the cloned loop with the |
44 | // corresponding original loop values so that the second loop starts with |
45 | // the appropriate values. |
46 | // - It inserts a new induction variable "i" is inserted into the cloned that |
47 | // starts with the value 0 and increment by step of one. |
48 | // |
49 | // The last step is specific to each case: |
50 | // - Peel before: the transformation is to peel the "N" first iterations. |
51 | // The exit condition of the cloned loop is changed so that the loop |
52 | // exits when "i < N" becomes false. The original loop is then protected to |
53 | // only execute if there is any iteration left to do. |
54 | // - Peel after: the transformation is to peel the "N" last iterations, |
55 | // then the exit condition of the cloned loop is changed so that the loop |
56 | // exits when "i + N < max_iteration" becomes false, where "max_iteration" |
57 | // is the upper bound of the loop. The cloned loop is then protected to |
58 | // only execute if there is any iteration left to do no covered by the |
59 | // second. |
60 | // |
61 | // To be peelable: |
62 | // - The loop must be in LCSSA form; |
63 | // - The loop must not contain any breaks; |
64 | // - The loop must not have any ambiguous iterators updates (see |
65 | // "CanPeelLoop"). |
66 | // The method "CanPeelLoop" checks that those constrained are met. |
67 | class LoopPeeling { |
68 | public: |
69 | // LoopPeeling constructor. |
70 | // |loop| is the loop to peel. |
71 | // |loop_iteration_count| is the instruction holding the |loop| iteration |
72 | // count, must be invariant for |loop| and must be of an int 32 type (signed |
73 | // or unsigned). |
74 | // |canonical_induction_variable| is an induction variable that can be used to |
75 | // count the number of iterations, must be of the same type as |
76 | // |loop_iteration_count| and start at 0 and increase by step of one at each |
77 | // iteration. The value nullptr is interpreted as no suitable variable exists |
78 | // and one will be created. |
79 | LoopPeeling(Loop* loop, Instruction* loop_iteration_count, |
80 | Instruction* canonical_induction_variable = nullptr) |
81 | : context_(loop->GetContext()), |
82 | loop_utils_(loop->GetContext(), loop), |
83 | loop_(loop), |
84 | loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) |
85 | ? loop_iteration_count |
86 | : nullptr), |
87 | int_type_(nullptr), |
88 | original_loop_canonical_induction_variable_( |
89 | canonical_induction_variable), |
90 | canonical_induction_variable_(nullptr) { |
91 | if (loop_iteration_count_) { |
92 | int_type_ = context_->get_type_mgr() |
93 | ->GetType(loop_iteration_count_->type_id()) |
94 | ->AsInteger(); |
95 | if (canonical_induction_variable_) { |
96 | assert(canonical_induction_variable_->type_id() == |
97 | loop_iteration_count_->type_id() && |
98 | "loop_iteration_count and canonical_induction_variable do not " |
99 | "have the same type" ); |
100 | } |
101 | } |
102 | GetIteratingExitValues(); |
103 | } |
104 | |
105 | // Returns true if the loop can be peeled. |
106 | // To be peelable, all operation involved in the update of the loop iterators |
107 | // must not dominates the exit condition. This restriction is a work around to |
108 | // not miss compile code like: |
109 | // |
110 | // for (int i = 0; i + 1 < N; i++) {} |
111 | // for (int i = 0; ++i < N; i++) {} |
112 | // |
113 | // The increment will happen before the test on the exit condition leading to |
114 | // very look-a-like code. |
115 | // |
116 | // This restriction will not apply if a loop rotate is applied before (i.e. |
117 | // becomes a do-while loop). |
118 | bool CanPeelLoop() const { |
119 | CFG& cfg = *context_->cfg(); |
120 | |
121 | if (!loop_iteration_count_) { |
122 | return false; |
123 | } |
124 | if (!int_type_) { |
125 | return false; |
126 | } |
127 | if (int_type_->width() != 32) { |
128 | return false; |
129 | } |
130 | if (!loop_->IsLCSSA()) { |
131 | return false; |
132 | } |
133 | if (!loop_->GetMergeBlock()) { |
134 | return false; |
135 | } |
136 | if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { |
137 | return false; |
138 | } |
139 | if (!IsConditionCheckSideEffectFree()) { |
140 | return false; |
141 | } |
142 | |
143 | return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), |
144 | [](std::pair<uint32_t, Instruction*> it) { |
145 | return it.second == nullptr; |
146 | }); |
147 | } |
148 | |
149 | // Moves the execution of the |factor| first iterations of the loop into a |
150 | // dedicated loop. |
151 | void PeelBefore(uint32_t factor); |
152 | |
153 | // Moves the execution of the |factor| last iterations of the loop into a |
154 | // dedicated loop. |
155 | void PeelAfter(uint32_t factor); |
156 | |
157 | // Returns the cloned loop. |
158 | Loop* GetClonedLoop() { return cloned_loop_; } |
159 | // Returns the original loop. |
160 | Loop* GetOriginalLoop() { return loop_; } |
161 | |
162 | private: |
163 | IRContext* context_; |
164 | LoopUtils loop_utils_; |
165 | // The original loop. |
166 | Loop* loop_; |
167 | // The initial |loop_| upper bound. |
168 | Instruction* loop_iteration_count_; |
169 | // The int type to use for the canonical_induction_variable_. |
170 | analysis::Integer* int_type_; |
171 | // The cloned loop. |
172 | Loop* cloned_loop_; |
173 | // This is set to true when the exit and back-edge branch instruction is the |
174 | // same. |
175 | bool do_while_form_; |
176 | // The canonical induction variable from the original loop if it exists. |
177 | Instruction* original_loop_canonical_induction_variable_; |
178 | // The canonical induction variable of the cloned loop. The induction variable |
179 | // is initialized to 0 and incremented by step of 1. |
180 | Instruction* canonical_induction_variable_; |
181 | // Map between loop iterators and exit values. Loop iterators |
182 | std::unordered_map<uint32_t, Instruction*> exit_value_; |
183 | |
184 | // Duplicate |loop_| and place the new loop before the cloned loop. Iterating |
185 | // values from the cloned loop are then connected to the original loop as |
186 | // initializer. |
187 | void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); |
188 | |
189 | // Insert the canonical induction variable into the first loop as a simplified |
190 | // counter. |
191 | void InsertCanonicalInductionVariable( |
192 | LoopUtils::LoopCloningResult* clone_results); |
193 | |
194 | // Fixes the exit condition of the before loop. The function calls |
195 | // |condition_builder| to get the condition to use in the conditional branch |
196 | // of the loop exit. The loop will be exited if the condition evaluate to |
197 | // true. |condition_builder| takes an Instruction* that represent the |
198 | // insertion point. |
199 | void FixExitCondition( |
200 | const std::function<uint32_t(Instruction*)>& condition_builder); |
201 | |
202 | // Gathers all operations involved in the update of |iterator| into |
203 | // |operations|. |
204 | void GetIteratorUpdateOperations( |
205 | const Loop* loop, Instruction* iterator, |
206 | std::unordered_set<Instruction*>* operations); |
207 | |
208 | // Gathers exiting iterator values. The function builds a map between each |
209 | // iterating value in the loop (a phi instruction in the loop header) and its |
210 | // SSA value when it exit the loop. If no exit value can be accurately found, |
211 | // it is map to nullptr (see comment on CanPeelLoop). |
212 | void GetIteratingExitValues(); |
213 | |
214 | // Returns true if a for-loop has no instruction with effects before the |
215 | // condition check. |
216 | bool IsConditionCheckSideEffectFree() const; |
217 | |
218 | // Creates a new basic block and insert it between |bb| and the predecessor of |
219 | // |bb|. |
220 | BasicBlock* CreateBlockBefore(BasicBlock* bb); |
221 | |
222 | // Inserts code to only execute |loop| only if the given |condition| is true. |
223 | // |if_merge| is a suitable basic block to be used by the if condition as |
224 | // merge block. |
225 | // The function returns the if block protecting the loop. |
226 | BasicBlock* ProtectLoop(Loop* loop, Instruction* condition, |
227 | BasicBlock* if_merge); |
228 | }; |
229 | |
230 | // Implements a loop peeling optimization. |
231 | // For each loop, the pass will try to peel it if there is conditions that |
232 | // are true for the "N" first or last iterations of the loop. |
233 | // To avoid code size explosion, too large loops will not be peeled. |
234 | class LoopPeelingPass : public Pass { |
235 | public: |
236 | // Describes the peeling direction. |
237 | enum class PeelDirection { |
238 | kNone, // Cannot peel |
239 | kBefore, // Can peel before |
240 | kAfter // Can peel last |
241 | }; |
242 | |
243 | // Holds some statistics about peeled function. |
244 | struct LoopPeelingStats { |
245 | std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_; |
246 | }; |
247 | |
248 | LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} |
249 | |
250 | // Sets the loop peeling growth threshold. If the code size increase is above |
251 | // |code_grow_threshold|, the loop will not be peeled. The code size is |
252 | // measured in terms of SPIR-V instructions. |
253 | static void SetLoopPeelingThreshold(size_t code_grow_threshold) { |
254 | code_grow_threshold_ = code_grow_threshold; |
255 | } |
256 | |
257 | // Returns the loop peeling code growth threshold. |
258 | static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } |
259 | |
260 | const char* name() const override { return "loop-peeling" ; } |
261 | |
262 | // Processes the given |module|. Returns Status::Failure if errors occur when |
263 | // processing. Returns the corresponding Status::Success if processing is |
264 | // succesful to indicate whether changes have been made to the modue. |
265 | Pass::Status Process() override; |
266 | |
267 | private: |
268 | // Describes the peeling direction. |
269 | enum class CmpOperator { |
270 | kLT, // less than |
271 | kGT, // greater than |
272 | kLE, // less than or equal |
273 | kGE, // greater than or equal |
274 | }; |
275 | |
276 | class LoopPeelingInfo { |
277 | public: |
278 | using Direction = std::pair<PeelDirection, uint32_t>; |
279 | |
280 | LoopPeelingInfo(Loop* loop, size_t loop_max_iterations, |
281 | ScalarEvolutionAnalysis* scev_analysis) |
282 | : context_(loop->GetContext()), |
283 | loop_(loop), |
284 | scev_analysis_(scev_analysis), |
285 | loop_max_iterations_(loop_max_iterations) {} |
286 | |
287 | // Returns by how much and to which direction a loop should be peeled to |
288 | // make the conditional branch of the basic block |bb| an unconditional |
289 | // branch. If |bb|'s terminator is not a conditional branch or the condition |
290 | // is not workable then it returns PeelDirection::kNone and a 0 factor. |
291 | Direction GetPeelingInfo(BasicBlock* bb) const; |
292 | |
293 | private: |
294 | // Returns the id of the loop invariant operand of the conditional |
295 | // expression |condition|. It returns if no operand is invariant. |
296 | uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const; |
297 | // Returns the id of the non loop invariant operand of the conditional |
298 | // expression |condition|. It returns if all operands are invariant. |
299 | uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const; |
300 | |
301 | // Returns the value of |rec| at the first loop iteration. |
302 | SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; |
303 | // Returns the value of |rec| at the given |iteration|. |
304 | SExpression GetValueAtIteration(SERecurrentNode* rec, |
305 | int64_t iteration) const; |
306 | // Returns the value of |rec| at the last loop iteration. |
307 | SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; |
308 | |
309 | bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, |
310 | bool* result) const; |
311 | |
312 | Direction HandleEquality(SExpression lhs, SExpression rhs) const; |
313 | Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, |
314 | SERecurrentNode* rhs) const; |
315 | |
316 | static Direction GetNoneDirection() { |
317 | return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; |
318 | } |
319 | IRContext* context_; |
320 | Loop* loop_; |
321 | ScalarEvolutionAnalysis* scev_analysis_; |
322 | size_t loop_max_iterations_; |
323 | }; |
324 | // Peel profitable loops in |f|. |
325 | bool ProcessFunction(Function* f); |
326 | // Peel |loop| if profitable. |
327 | std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size); |
328 | |
329 | static size_t code_grow_threshold_; |
330 | LoopPeelingStats* stats_; |
331 | }; |
332 | |
333 | } // namespace opt |
334 | } // namespace spvtools |
335 | |
336 | #endif // SOURCE_OPT_LOOP_PEELING_H_ |
337 | |