1// Copyright (c) 2018 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef SOURCE_OPT_LOOP_PEELING_H_
16#define SOURCE_OPT_LOOP_PEELING_H_
17
18#include <algorithm>
19#include <limits>
20#include <memory>
21#include <tuple>
22#include <unordered_map>
23#include <unordered_set>
24#include <utility>
25#include <vector>
26
27#include "source/opt/ir_context.h"
28#include "source/opt/loop_descriptor.h"
29#include "source/opt/loop_utils.h"
30#include "source/opt/pass.h"
31#include "source/opt/scalar_analysis.h"
32
33namespace spvtools {
34namespace opt {
35
36// Utility class to perform the peeling of a given loop.
37// The loop peeling transformation make a certain amount of a loop iterations to
38// be executed either before (peel before) or after (peel after) the transformed
39// loop.
40//
41// For peeling cases the transformation does the following steps:
42// - It clones the loop and inserts the cloned loop before the original loop;
43// - It connects all iterating values of the cloned loop with the
44// corresponding original loop values so that the second loop starts with
45// the appropriate values.
46// - It inserts a new induction variable "i" is inserted into the cloned that
47// starts with the value 0 and increment by step of one.
48//
49// The last step is specific to each case:
50// - Peel before: the transformation is to peel the "N" first iterations.
51// The exit condition of the cloned loop is changed so that the loop
52// exits when "i < N" becomes false. The original loop is then protected to
53// only execute if there is any iteration left to do.
54// - Peel after: the transformation is to peel the "N" last iterations,
55// then the exit condition of the cloned loop is changed so that the loop
56// exits when "i + N < max_iteration" becomes false, where "max_iteration"
57// is the upper bound of the loop. The cloned loop is then protected to
58// only execute if there is any iteration left to do no covered by the
59// second.
60//
61// To be peelable:
62// - The loop must be in LCSSA form;
63// - The loop must not contain any breaks;
64// - The loop must not have any ambiguous iterators updates (see
65// "CanPeelLoop").
66// The method "CanPeelLoop" checks that those constrained are met.
67class LoopPeeling {
68 public:
69 // LoopPeeling constructor.
70 // |loop| is the loop to peel.
71 // |loop_iteration_count| is the instruction holding the |loop| iteration
72 // count, must be invariant for |loop| and must be of an int 32 type (signed
73 // or unsigned).
74 // |canonical_induction_variable| is an induction variable that can be used to
75 // count the number of iterations, must be of the same type as
76 // |loop_iteration_count| and start at 0 and increase by step of one at each
77 // iteration. The value nullptr is interpreted as no suitable variable exists
78 // and one will be created.
79 LoopPeeling(Loop* loop, Instruction* loop_iteration_count,
80 Instruction* canonical_induction_variable = nullptr)
81 : context_(loop->GetContext()),
82 loop_utils_(loop->GetContext(), loop),
83 loop_(loop),
84 loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
85 ? loop_iteration_count
86 : nullptr),
87 int_type_(nullptr),
88 original_loop_canonical_induction_variable_(
89 canonical_induction_variable),
90 canonical_induction_variable_(nullptr) {
91 if (loop_iteration_count_) {
92 int_type_ = context_->get_type_mgr()
93 ->GetType(loop_iteration_count_->type_id())
94 ->AsInteger();
95 if (canonical_induction_variable_) {
96 assert(canonical_induction_variable_->type_id() ==
97 loop_iteration_count_->type_id() &&
98 "loop_iteration_count and canonical_induction_variable do not "
99 "have the same type");
100 }
101 }
102 GetIteratingExitValues();
103 }
104
105 // Returns true if the loop can be peeled.
106 // To be peelable, all operation involved in the update of the loop iterators
107 // must not dominates the exit condition. This restriction is a work around to
108 // not miss compile code like:
109 //
110 // for (int i = 0; i + 1 < N; i++) {}
111 // for (int i = 0; ++i < N; i++) {}
112 //
113 // The increment will happen before the test on the exit condition leading to
114 // very look-a-like code.
115 //
116 // This restriction will not apply if a loop rotate is applied before (i.e.
117 // becomes a do-while loop).
118 bool CanPeelLoop() const {
119 CFG& cfg = *context_->cfg();
120
121 if (!loop_iteration_count_) {
122 return false;
123 }
124 if (!int_type_) {
125 return false;
126 }
127 if (int_type_->width() != 32) {
128 return false;
129 }
130 if (!loop_->IsLCSSA()) {
131 return false;
132 }
133 if (!loop_->GetMergeBlock()) {
134 return false;
135 }
136 if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
137 return false;
138 }
139 if (!IsConditionCheckSideEffectFree()) {
140 return false;
141 }
142
143 return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
144 [](std::pair<uint32_t, Instruction*> it) {
145 return it.second == nullptr;
146 });
147 }
148
149 // Moves the execution of the |factor| first iterations of the loop into a
150 // dedicated loop.
151 void PeelBefore(uint32_t factor);
152
153 // Moves the execution of the |factor| last iterations of the loop into a
154 // dedicated loop.
155 void PeelAfter(uint32_t factor);
156
157 // Returns the cloned loop.
158 Loop* GetClonedLoop() { return cloned_loop_; }
159 // Returns the original loop.
160 Loop* GetOriginalLoop() { return loop_; }
161
162 private:
163 IRContext* context_;
164 LoopUtils loop_utils_;
165 // The original loop.
166 Loop* loop_;
167 // The initial |loop_| upper bound.
168 Instruction* loop_iteration_count_;
169 // The int type to use for the canonical_induction_variable_.
170 analysis::Integer* int_type_;
171 // The cloned loop.
172 Loop* cloned_loop_;
173 // This is set to true when the exit and back-edge branch instruction is the
174 // same.
175 bool do_while_form_;
176 // The canonical induction variable from the original loop if it exists.
177 Instruction* original_loop_canonical_induction_variable_;
178 // The canonical induction variable of the cloned loop. The induction variable
179 // is initialized to 0 and incremented by step of 1.
180 Instruction* canonical_induction_variable_;
181 // Map between loop iterators and exit values. Loop iterators
182 std::unordered_map<uint32_t, Instruction*> exit_value_;
183
184 // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
185 // values from the cloned loop are then connected to the original loop as
186 // initializer.
187 void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
188
189 // Insert the canonical induction variable into the first loop as a simplified
190 // counter.
191 void InsertCanonicalInductionVariable(
192 LoopUtils::LoopCloningResult* clone_results);
193
194 // Fixes the exit condition of the before loop. The function calls
195 // |condition_builder| to get the condition to use in the conditional branch
196 // of the loop exit. The loop will be exited if the condition evaluate to
197 // true. |condition_builder| takes an Instruction* that represent the
198 // insertion point.
199 void FixExitCondition(
200 const std::function<uint32_t(Instruction*)>& condition_builder);
201
202 // Gathers all operations involved in the update of |iterator| into
203 // |operations|.
204 void GetIteratorUpdateOperations(
205 const Loop* loop, Instruction* iterator,
206 std::unordered_set<Instruction*>* operations);
207
208 // Gathers exiting iterator values. The function builds a map between each
209 // iterating value in the loop (a phi instruction in the loop header) and its
210 // SSA value when it exit the loop. If no exit value can be accurately found,
211 // it is map to nullptr (see comment on CanPeelLoop).
212 void GetIteratingExitValues();
213
214 // Returns true if a for-loop has no instruction with effects before the
215 // condition check.
216 bool IsConditionCheckSideEffectFree() const;
217
218 // Creates a new basic block and insert it between |bb| and the predecessor of
219 // |bb|.
220 BasicBlock* CreateBlockBefore(BasicBlock* bb);
221
222 // Inserts code to only execute |loop| only if the given |condition| is true.
223 // |if_merge| is a suitable basic block to be used by the if condition as
224 // merge block.
225 // The function returns the if block protecting the loop.
226 BasicBlock* ProtectLoop(Loop* loop, Instruction* condition,
227 BasicBlock* if_merge);
228};
229
230// Implements a loop peeling optimization.
231// For each loop, the pass will try to peel it if there is conditions that
232// are true for the "N" first or last iterations of the loop.
233// To avoid code size explosion, too large loops will not be peeled.
234class LoopPeelingPass : public Pass {
235 public:
236 // Describes the peeling direction.
237 enum class PeelDirection {
238 kNone, // Cannot peel
239 kBefore, // Can peel before
240 kAfter // Can peel last
241 };
242
243 // Holds some statistics about peeled function.
244 struct LoopPeelingStats {
245 std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_;
246 };
247
248 LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
249
250 // Sets the loop peeling growth threshold. If the code size increase is above
251 // |code_grow_threshold|, the loop will not be peeled. The code size is
252 // measured in terms of SPIR-V instructions.
253 static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
254 code_grow_threshold_ = code_grow_threshold;
255 }
256
257 // Returns the loop peeling code growth threshold.
258 static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
259
260 const char* name() const override { return "loop-peeling"; }
261
262 // Processes the given |module|. Returns Status::Failure if errors occur when
263 // processing. Returns the corresponding Status::Success if processing is
264 // succesful to indicate whether changes have been made to the modue.
265 Pass::Status Process() override;
266
267 private:
268 // Describes the peeling direction.
269 enum class CmpOperator {
270 kLT, // less than
271 kGT, // greater than
272 kLE, // less than or equal
273 kGE, // greater than or equal
274 };
275
276 class LoopPeelingInfo {
277 public:
278 using Direction = std::pair<PeelDirection, uint32_t>;
279
280 LoopPeelingInfo(Loop* loop, size_t loop_max_iterations,
281 ScalarEvolutionAnalysis* scev_analysis)
282 : context_(loop->GetContext()),
283 loop_(loop),
284 scev_analysis_(scev_analysis),
285 loop_max_iterations_(loop_max_iterations) {}
286
287 // Returns by how much and to which direction a loop should be peeled to
288 // make the conditional branch of the basic block |bb| an unconditional
289 // branch. If |bb|'s terminator is not a conditional branch or the condition
290 // is not workable then it returns PeelDirection::kNone and a 0 factor.
291 Direction GetPeelingInfo(BasicBlock* bb) const;
292
293 private:
294 // Returns the id of the loop invariant operand of the conditional
295 // expression |condition|. It returns if no operand is invariant.
296 uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const;
297 // Returns the id of the non loop invariant operand of the conditional
298 // expression |condition|. It returns if all operands are invariant.
299 uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const;
300
301 // Returns the value of |rec| at the first loop iteration.
302 SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
303 // Returns the value of |rec| at the given |iteration|.
304 SExpression GetValueAtIteration(SERecurrentNode* rec,
305 int64_t iteration) const;
306 // Returns the value of |rec| at the last loop iteration.
307 SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
308
309 bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
310 bool* result) const;
311
312 Direction HandleEquality(SExpression lhs, SExpression rhs) const;
313 Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
314 SERecurrentNode* rhs) const;
315
316 static Direction GetNoneDirection() {
317 return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
318 }
319 IRContext* context_;
320 Loop* loop_;
321 ScalarEvolutionAnalysis* scev_analysis_;
322 size_t loop_max_iterations_;
323 };
324 // Peel profitable loops in |f|.
325 bool ProcessFunction(Function* f);
326 // Peel |loop| if profitable.
327 std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size);
328
329 static size_t code_grow_threshold_;
330 LoopPeelingStats* stats_;
331};
332
333} // namespace opt
334} // namespace spvtools
335
336#endif // SOURCE_OPT_LOOP_PEELING_H_
337