1 | // Copyright (c) 2018 Google LLC. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "source/opt/loop_fission.h" |
16 | |
17 | #include <set> |
18 | |
19 | #include "source/opt/register_pressure.h" |
20 | |
21 | // Implement loop fission with an optional parameter to split only |
22 | // if the register pressure in a given loop meets a certain criteria. This is |
23 | // controlled via the constructors of LoopFissionPass. |
24 | // |
25 | // 1 - Build a list of loops to be split, these are top level loops (loops |
26 | // without child loops themselves) which meet the register pressure criteria, as |
27 | // determined by the ShouldSplitLoop method of LoopFissionPass. |
28 | // |
29 | // 2 - For each loop in the list, group each instruction into a set of related |
30 | // instructions by traversing each instructions users and operands recursively. |
31 | // We stop if we encounter an instruction we have seen before or an instruction |
32 | // which we don't consider relevent (i.e OpLoopMerge). We then group these |
33 | // groups into two different sets, one for the first loop and one for the |
34 | // second. |
35 | // |
36 | // 3 - We then run CanPerformSplit to check that it would be legal to split a |
37 | // loop using those two sets. We check that we haven't altered the relative |
38 | // order load/stores appear in the binary and that we aren't breaking any |
39 | // dependency between load/stores by splitting them into two loops. We also |
40 | // check that none of the OpBranch instructions are dependent on a load as we |
41 | // leave control flow structure intact and move only instructions in the body so |
42 | // we want to avoid any loads with side affects or aliasing. |
43 | // |
44 | // 4 - We then split the loop by calling SplitLoop. This function clones the |
45 | // loop and attaches it to the preheader and connects the new loops merge block |
46 | // to the current loop header block. We then use the two sets built in step 2 to |
47 | // remove instructions from each loop. If an instruction appears in the first |
48 | // set it is removed from the second loop and vice versa. |
49 | // |
50 | // 5 - If the multiple split passes flag is set we check if each of the loops |
51 | // still meet the register pressure criteria. If they do then we add them to the |
52 | // list of loops to be split (created in step one) to allow for loops to be |
53 | // split multiple times. |
54 | // |
55 | |
56 | namespace spvtools { |
57 | namespace opt { |
58 | |
59 | class LoopFissionImpl { |
60 | public: |
61 | LoopFissionImpl(IRContext* context, Loop* loop) |
62 | : context_(context), loop_(loop), load_used_in_condition_(false) {} |
63 | |
64 | // Group each instruction in the loop into sets of instructions related by |
65 | // their usedef chains. An instruction which uses another will appear in the |
66 | // same set. Then merge those sets into just two sets. Returns false if there |
67 | // was one or less sets created. |
68 | bool GroupInstructionsByUseDef(); |
69 | |
70 | // Check if the sets built by GroupInstructionsByUseDef violate any data |
71 | // dependence rules. |
72 | bool CanPerformSplit(); |
73 | |
74 | // Split the loop and return a pointer to the new loop. |
75 | Loop* SplitLoop(); |
76 | |
77 | // Checks if |inst| is safe to move. We can only move instructions which don't |
78 | // have any side effects and OpLoads and OpStores. |
79 | bool MovableInstruction(const Instruction& inst) const; |
80 | |
81 | private: |
82 | // Traverse the def use chain of |inst| and add the users and uses of |inst| |
83 | // which are in the same loop to the |returned_set|. |
84 | void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set, |
85 | bool ignore_phi_users = false, bool report_loads = false); |
86 | |
87 | // We group the instructions in the block into two different groups, the |
88 | // instructions to be kept in the original loop and the ones to be cloned into |
89 | // the new loop. As the cloned loop is attached to the preheader it will be |
90 | // the first loop and the second loop will be the original. |
91 | std::set<Instruction*> cloned_loop_instructions_; |
92 | std::set<Instruction*> original_loop_instructions_; |
93 | |
94 | // We need a set of all the instructions to be seen so we can break any |
95 | // recursion and also so we can ignore certain instructions by preemptively |
96 | // adding them to this set. |
97 | std::set<Instruction*> seen_instructions_; |
98 | |
99 | // A map of instructions to their relative position in the function. |
100 | std::map<Instruction*, size_t> instruction_order_; |
101 | |
102 | IRContext* context_; |
103 | |
104 | Loop* loop_; |
105 | |
106 | // This is set to true by TraverseUseDef when traversing the instructions |
107 | // related to the loop condition and any if conditions should any of those |
108 | // instructions be a load. |
109 | bool load_used_in_condition_; |
110 | }; |
111 | |
112 | bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const { |
113 | return inst.opcode() == SpvOp::SpvOpLoad || |
114 | inst.opcode() == SpvOp::SpvOpStore || |
115 | inst.opcode() == SpvOp::SpvOpSelectionMerge || |
116 | inst.opcode() == SpvOp::SpvOpPhi || inst.IsOpcodeCodeMotionSafe(); |
117 | } |
118 | |
119 | void LoopFissionImpl::TraverseUseDef(Instruction* inst, |
120 | std::set<Instruction*>* returned_set, |
121 | bool ignore_phi_users, bool report_loads) { |
122 | assert(returned_set && "Set to be returned cannot be null." ); |
123 | |
124 | analysis::DefUseManager* def_use = context_->get_def_use_mgr(); |
125 | std::set<Instruction*>& inst_set = *returned_set; |
126 | |
127 | // We create this functor to traverse the use def chain to build the |
128 | // grouping of related instructions. The lambda captures the std::function |
129 | // to allow it to recurse. |
130 | std::function<void(Instruction*)> traverser_functor; |
131 | traverser_functor = [this, def_use, &inst_set, &traverser_functor, |
132 | ignore_phi_users, report_loads](Instruction* user) { |
133 | // If we've seen the instruction before or it is not inside the loop end the |
134 | // traversal. |
135 | if (!user || seen_instructions_.count(user) != 0 || |
136 | !context_->get_instr_block(user) || |
137 | !loop_->IsInsideLoop(context_->get_instr_block(user))) { |
138 | return; |
139 | } |
140 | |
141 | // Don't include labels or loop merge instructions in the instruction sets. |
142 | // Including them would mean we group instructions related only by using the |
143 | // same labels (i.e phis). We already preempt the inclusion of |
144 | // OpSelectionMerge by adding related instructions to the seen_instructions_ |
145 | // set. |
146 | if (user->opcode() == SpvOp::SpvOpLoopMerge || |
147 | user->opcode() == SpvOp::SpvOpLabel) |
148 | return; |
149 | |
150 | // If the |report_loads| flag is set, set the class field |
151 | // load_used_in_condition_ to false. This is used to check that none of the |
152 | // condition checks in the loop rely on loads. |
153 | if (user->opcode() == SpvOp::SpvOpLoad && report_loads) { |
154 | load_used_in_condition_ = true; |
155 | } |
156 | |
157 | // Add the instruction to the set of instructions already seen, this breaks |
158 | // recursion and allows us to ignore certain instructions. |
159 | seen_instructions_.insert(user); |
160 | |
161 | inst_set.insert(user); |
162 | |
163 | // Wrapper functor to traverse the operands of each instruction. |
164 | auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) { |
165 | traverser_functor(def_use->GetDef(*id)); |
166 | }; |
167 | user->ForEachInOperand(traverse_operand); |
168 | |
169 | // For the first traversal we want to ignore the users of the phi. |
170 | if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return; |
171 | |
172 | // Traverse each user with this lambda. |
173 | def_use->ForEachUser(user, traverser_functor); |
174 | |
175 | // Wrapper functor for the use traversal. |
176 | auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) { |
177 | traverser_functor(use); |
178 | }; |
179 | def_use->ForEachUse(user, traverse_use); |
180 | |
181 | }; |
182 | |
183 | // We start the traversal of the use def graph by invoking the above |
184 | // lambda with the |inst| parameter. |
185 | traverser_functor(inst); |
186 | } |
187 | |
188 | bool LoopFissionImpl::GroupInstructionsByUseDef() { |
189 | std::vector<std::set<Instruction*>> sets{}; |
190 | |
191 | // We want to ignore all the instructions stemming from the loop condition |
192 | // instruction. |
193 | BasicBlock* condition_block = loop_->FindConditionBlock(); |
194 | |
195 | if (!condition_block) return false; |
196 | Instruction* condition = &*condition_block->tail(); |
197 | |
198 | // We iterate over the blocks via iterating over all the blocks in the |
199 | // function, we do this so we are iterating in the same order which the blocks |
200 | // appear in the binary. |
201 | Function& function = *loop_->GetHeaderBlock()->GetParent(); |
202 | |
203 | // Create a temporary set to ignore certain groups of instructions within the |
204 | // loop. We don't want any instructions related to control flow to be removed |
205 | // from either loop only instructions within the control flow bodies. |
206 | std::set<Instruction*> instructions_to_ignore{}; |
207 | TraverseUseDef(condition, &instructions_to_ignore, true, true); |
208 | |
209 | // Traverse control flow instructions to ensure they are added to the |
210 | // seen_instructions_ set and will be ignored when it it called with actual |
211 | // sets. |
212 | for (BasicBlock& block : function) { |
213 | if (!loop_->IsInsideLoop(block.id())) continue; |
214 | |
215 | for (Instruction& inst : block) { |
216 | // Ignore all instructions related to control flow. |
217 | if (inst.opcode() == SpvOp::SpvOpSelectionMerge || inst.IsBranch()) { |
218 | TraverseUseDef(&inst, &instructions_to_ignore, true, true); |
219 | } |
220 | } |
221 | } |
222 | |
223 | // Traverse the instructions and generate the sets, automatically ignoring any |
224 | // instructions in instructions_to_ignore. |
225 | for (BasicBlock& block : function) { |
226 | if (!loop_->IsInsideLoop(block.id()) || |
227 | loop_->GetHeaderBlock()->id() == block.id()) |
228 | continue; |
229 | |
230 | for (Instruction& inst : block) { |
231 | // Record the order that each load/store is seen. |
232 | if (inst.opcode() == SpvOp::SpvOpLoad || |
233 | inst.opcode() == SpvOp::SpvOpStore) { |
234 | instruction_order_[&inst] = instruction_order_.size(); |
235 | } |
236 | |
237 | // Ignore instructions already seen in a traversal. |
238 | if (seen_instructions_.count(&inst) != 0) { |
239 | continue; |
240 | } |
241 | |
242 | // Build the set. |
243 | std::set<Instruction*> inst_set{}; |
244 | TraverseUseDef(&inst, &inst_set); |
245 | if (!inst_set.empty()) sets.push_back(std::move(inst_set)); |
246 | } |
247 | } |
248 | |
249 | // If we have one or zero sets return false to indicate that due to |
250 | // insufficient instructions we couldn't split the loop into two groups and |
251 | // thus the loop can't be split any further. |
252 | if (sets.size() < 2) { |
253 | return false; |
254 | } |
255 | |
256 | // Merge the loop sets into two different sets. In CanPerformSplit we will |
257 | // validate that we don't break the relative ordering of loads/stores by doing |
258 | // this. |
259 | for (size_t index = 0; index < sets.size() / 2; ++index) { |
260 | cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end()); |
261 | } |
262 | for (size_t index = sets.size() / 2; index < sets.size(); ++index) { |
263 | original_loop_instructions_.insert(sets[index].begin(), sets[index].end()); |
264 | } |
265 | |
266 | return true; |
267 | } |
268 | |
269 | bool LoopFissionImpl::CanPerformSplit() { |
270 | // Return false if any of the condition instructions in the loop depend on a |
271 | // load. |
272 | if (load_used_in_condition_) { |
273 | return false; |
274 | } |
275 | |
276 | // Build a list of all parent loops of this loop. Loop dependence analysis |
277 | // needs this structure. |
278 | std::vector<const Loop*> loops; |
279 | Loop* parent_loop = loop_; |
280 | while (parent_loop) { |
281 | loops.push_back(parent_loop); |
282 | parent_loop = parent_loop->GetParent(); |
283 | } |
284 | |
285 | LoopDependenceAnalysis analysis{context_, loops}; |
286 | |
287 | // A list of all the stores in the cloned loop. |
288 | std::vector<Instruction*> set_one_stores{}; |
289 | |
290 | // A list of all the loads in the cloned loop. |
291 | std::vector<Instruction*> set_one_loads{}; |
292 | |
293 | // Populate the above lists. |
294 | for (Instruction* inst : cloned_loop_instructions_) { |
295 | if (inst->opcode() == SpvOp::SpvOpStore) { |
296 | set_one_stores.push_back(inst); |
297 | } else if (inst->opcode() == SpvOp::SpvOpLoad) { |
298 | set_one_loads.push_back(inst); |
299 | } |
300 | |
301 | // If we find any instruction which we can't move (such as a barrier), |
302 | // return false. |
303 | if (!MovableInstruction(*inst)) return false; |
304 | } |
305 | |
306 | // We need to calculate the depth of the loop to create the loop dependency |
307 | // distance vectors. |
308 | const size_t loop_depth = loop_->GetDepth(); |
309 | |
310 | // Check the dependencies between loads in the cloned loop and stores in the |
311 | // original and vice versa. |
312 | for (Instruction* inst : original_loop_instructions_) { |
313 | // If we find any instruction which we can't move (such as a barrier), |
314 | // return false. |
315 | if (!MovableInstruction(*inst)) return false; |
316 | |
317 | // Look at the dependency between the loads in the original and stores in |
318 | // the cloned loops. |
319 | if (inst->opcode() == SpvOp::SpvOpLoad) { |
320 | for (Instruction* store : set_one_stores) { |
321 | DistanceVector vec{loop_depth}; |
322 | |
323 | // If the store actually should appear after the load, return false. |
324 | // This means the store has been placed in the wrong grouping. |
325 | if (instruction_order_[store] > instruction_order_[inst]) { |
326 | return false; |
327 | } |
328 | // If not independent check the distance vector. |
329 | if (!analysis.GetDependence(store, inst, &vec)) { |
330 | for (DistanceEntry& entry : vec.GetEntries()) { |
331 | // A distance greater than zero means that the store in the cloned |
332 | // loop has a dependency on the load in the original loop. |
333 | if (entry.distance > 0) return false; |
334 | } |
335 | } |
336 | } |
337 | } else if (inst->opcode() == SpvOp::SpvOpStore) { |
338 | for (Instruction* load : set_one_loads) { |
339 | DistanceVector vec{loop_depth}; |
340 | |
341 | // If the load actually should appear after the store, return false. |
342 | if (instruction_order_[load] > instruction_order_[inst]) { |
343 | return false; |
344 | } |
345 | |
346 | // If not independent check the distance vector. |
347 | if (!analysis.GetDependence(inst, load, &vec)) { |
348 | for (DistanceEntry& entry : vec.GetEntries()) { |
349 | // A distance less than zero means the load in the cloned loop is |
350 | // dependent on the store instruction in the original loop. |
351 | if (entry.distance < 0) return false; |
352 | } |
353 | } |
354 | } |
355 | } |
356 | } |
357 | return true; |
358 | } |
359 | |
360 | Loop* LoopFissionImpl::SplitLoop() { |
361 | // Clone the loop. |
362 | LoopUtils util{context_, loop_}; |
363 | LoopUtils::LoopCloningResult clone_results; |
364 | Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results); |
365 | |
366 | // Update the OpLoopMerge in the cloned loop. |
367 | cloned_loop->UpdateLoopMergeInst(); |
368 | |
369 | // Add the loop_ to the module. |
370 | // TODO(1841): Handle failure to create pre-header. |
371 | Function::iterator it = |
372 | util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id()); |
373 | util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(), |
374 | clone_results.cloned_bb_.end(), ++it); |
375 | loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock()); |
376 | |
377 | std::vector<Instruction*> instructions_to_kill{}; |
378 | |
379 | // Kill all the instructions which should appear in the cloned loop but not in |
380 | // the original loop. |
381 | for (uint32_t id : loop_->GetBlocks()) { |
382 | BasicBlock* block = context_->cfg()->block(id); |
383 | |
384 | for (Instruction& inst : *block) { |
385 | // If the instruction appears in the cloned loop instruction group, kill |
386 | // it. |
387 | if (cloned_loop_instructions_.count(&inst) == 1 && |
388 | original_loop_instructions_.count(&inst) == 0) { |
389 | instructions_to_kill.push_back(&inst); |
390 | if (inst.opcode() == SpvOp::SpvOpPhi) { |
391 | context_->ReplaceAllUsesWith( |
392 | inst.result_id(), clone_results.value_map_[inst.result_id()]); |
393 | } |
394 | } |
395 | } |
396 | } |
397 | |
398 | // Kill all instructions which should appear in the original loop and not in |
399 | // the cloned loop. |
400 | for (uint32_t id : cloned_loop->GetBlocks()) { |
401 | BasicBlock* block = context_->cfg()->block(id); |
402 | for (Instruction& inst : *block) { |
403 | Instruction* old_inst = clone_results.ptr_map_[&inst]; |
404 | // If the instruction belongs to the original loop instruction group, kill |
405 | // it. |
406 | if (cloned_loop_instructions_.count(old_inst) == 0 && |
407 | original_loop_instructions_.count(old_inst) == 1) { |
408 | instructions_to_kill.push_back(&inst); |
409 | } |
410 | } |
411 | } |
412 | |
413 | for (Instruction* i : instructions_to_kill) { |
414 | context_->KillInst(i); |
415 | } |
416 | |
417 | return cloned_loop; |
418 | } |
419 | |
420 | LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split, |
421 | bool split_multiple_times) |
422 | : split_multiple_times_(split_multiple_times) { |
423 | // Split if the number of registers in the loop exceeds |
424 | // |register_threshold_to_split|. |
425 | split_criteria_ = |
426 | [register_threshold_to_split]( |
427 | const RegisterLiveness::RegionRegisterLiveness& liveness) { |
428 | return liveness.used_registers_ > register_threshold_to_split; |
429 | }; |
430 | } |
431 | |
432 | LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) { |
433 | // Split by default. |
434 | split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) { |
435 | return true; |
436 | }; |
437 | } |
438 | |
439 | bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) { |
440 | LivenessAnalysis* analysis = c->GetLivenessAnalysis(); |
441 | |
442 | RegisterLiveness::RegionRegisterLiveness liveness{}; |
443 | |
444 | Function* function = loop.GetHeaderBlock()->GetParent(); |
445 | analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness); |
446 | |
447 | return split_criteria_(liveness); |
448 | } |
449 | |
450 | Pass::Status LoopFissionPass::Process() { |
451 | bool changed = false; |
452 | |
453 | for (Function& f : *context()->module()) { |
454 | // We collect all the inner most loops in the function and run the loop |
455 | // splitting util on each. The reason we do this is to allow us to iterate |
456 | // over each, as creating new loops will invalidate the the loop iterator. |
457 | std::vector<Loop*> inner_most_loops{}; |
458 | LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f); |
459 | for (Loop& loop : loop_descriptor) { |
460 | if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) { |
461 | inner_most_loops.push_back(&loop); |
462 | } |
463 | } |
464 | |
465 | // List of new loops which meet the criteria to be split again. |
466 | std::vector<Loop*> new_loops_to_split{}; |
467 | |
468 | while (!inner_most_loops.empty()) { |
469 | for (Loop* loop : inner_most_loops) { |
470 | LoopFissionImpl impl{context(), loop}; |
471 | |
472 | // Group the instructions in the loop into two different sets of related |
473 | // instructions. If we can't group the instructions into the two sets |
474 | // then we can't split the loop any further. |
475 | if (!impl.GroupInstructionsByUseDef()) { |
476 | continue; |
477 | } |
478 | |
479 | if (impl.CanPerformSplit()) { |
480 | Loop* second_loop = impl.SplitLoop(); |
481 | changed = true; |
482 | context()->InvalidateAnalysesExceptFor( |
483 | IRContext::kAnalysisLoopAnalysis); |
484 | |
485 | // If the newly created loop meets the criteria to be split, split it |
486 | // again. |
487 | if (ShouldSplitLoop(*second_loop, context())) |
488 | new_loops_to_split.push_back(second_loop); |
489 | |
490 | // If the original loop (now split) still meets the criteria to be |
491 | // split, split it again. |
492 | if (ShouldSplitLoop(*loop, context())) |
493 | new_loops_to_split.push_back(loop); |
494 | } |
495 | } |
496 | |
497 | // If the split multiple times flag has been set add the new loops which |
498 | // meet the splitting criteria into the list of loops to be split on the |
499 | // next iteration. |
500 | if (split_multiple_times_) { |
501 | inner_most_loops = std::move(new_loops_to_split); |
502 | } else { |
503 | break; |
504 | } |
505 | } |
506 | } |
507 | |
508 | return changed ? Pass::Status::SuccessWithChange |
509 | : Pass::Status::SuccessWithoutChange; |
510 | } |
511 | |
512 | } // namespace opt |
513 | } // namespace spvtools |
514 | |