1// Copyright (c) 2018 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "source/opt/loop_unroller.h"
16
17#include <limits>
18#include <map>
19#include <memory>
20#include <unordered_map>
21#include <utility>
22#include <vector>
23
24#include "source/opt/ir_builder.h"
25#include "source/opt/loop_utils.h"
26
27// Implements loop util unrolling functionality for fully and partially
28// unrolling loops. Given a factor it will duplicate the loop that many times,
29// appending each one to the end of the old loop and removing backedges, to
30// create a new unrolled loop.
31//
32// 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
33// loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
34// validate that a given loop can be unrolled. That method (along with the
35// constructor of loop) checks that the IR is in the expected canonicalised
36// format.
37//
38// 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
39// perform the unrolling. This implements helper methods to copy the loop basic
40// blocks and remap the ids of instructions used inside them.
41//
42// 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
43// actually performs the loop duplication. It does this by creating a
44// LoopUnrollState object and then copying the loop as given by the factor
45// parameter. The LoopUnrollState object retains the state of the unroller
46// between the loop body copies as each iteration needs information on the last
47// to adjust the phi induction variable, adjust the OpLoopMerge instruction in
48// the main loop header, and change the previous continue block to point to the
49// new header and the new continue block to the main loop header.
50//
51// 4 - If the loop is to be fully unrolled then it is simply closed after step
52// 3, with the OpLoopMerge being deleted, the backedge removed, and the
53// condition blocks folded.
54//
55// 5 - If it is being partially unrolled: if the unrolling factor leaves the
56// loop with an even number of bodies with respect to the number of loop
57// iterations then step 3 is all that is needed. If it is uneven then we need to
58// duplicate the loop completely and unroll the duplicated loop to cover the
59// residual part and adjust the first loop to cover only the "even" part. For
60// instance if you request an unroll factor of 3 on a loop with 10 iterations
61// then copying the body three times would leave you with three bodies in the
62// loop
63// where the loop still iterates over each 4 times. So we make two loops one
64// iterating once then a second loop of three iterating 3 times.
65
66namespace spvtools {
67namespace opt {
68namespace {
69
70// Loop control constant value for DontUnroll flag.
71static const uint32_t kLoopControlDontUnrollIndex = 2;
72
73// Operand index of the loop control parameter of the OpLoopMerge.
74static const uint32_t kLoopControlIndex = 2;
75
76// This utility class encapsulates some of the state we need to maintain between
77// loop unrolls. Specifically it maintains key blocks and the induction variable
78// in the current loop duplication step and the blocks from the previous one.
79// This is because each step of the unroll needs to use data from both the
80// preceding step and the original loop.
81struct LoopUnrollState {
82 LoopUnrollState()
83 : previous_phi_(nullptr),
84 previous_latch_block_(nullptr),
85 previous_condition_block_(nullptr),
86 new_phi(nullptr),
87 new_continue_block(nullptr),
88 new_condition_block(nullptr),
89 new_header_block(nullptr) {}
90
91 // Initialize from the loop descriptor class.
92 LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
93 BasicBlock* condition, std::vector<Instruction*>&& phis)
94 : previous_phi_(induction),
95 previous_latch_block_(latch_block),
96 previous_condition_block_(condition),
97 new_phi(nullptr),
98 new_continue_block(nullptr),
99 new_condition_block(nullptr),
100 new_header_block(nullptr) {
101 previous_phis_ = std::move(phis);
102 }
103
104 // Swap the state so that the new nodes are now the previous nodes.
105 void NextIterationState() {
106 previous_phi_ = new_phi;
107 previous_latch_block_ = new_latch_block;
108 previous_condition_block_ = new_condition_block;
109 previous_phis_ = std::move(new_phis_);
110
111 // Clear new nodes.
112 new_phi = nullptr;
113 new_continue_block = nullptr;
114 new_condition_block = nullptr;
115 new_header_block = nullptr;
116 new_latch_block = nullptr;
117
118 // Clear new block/instruction maps.
119 new_blocks.clear();
120 new_inst.clear();
121 ids_to_new_inst.clear();
122 }
123
124 // The induction variable from the immediately preceding loop body.
125 Instruction* previous_phi_;
126
127 // All the phi nodes from the previous loop iteration.
128 std::vector<Instruction*> previous_phis_;
129
130 std::vector<Instruction*> new_phis_;
131
132 // The previous latch block. The backedge will be removed from this and
133 // added to the new latch block.
134 BasicBlock* previous_latch_block_;
135
136 // The previous condition block. This may be folded to flatten the loop.
137 BasicBlock* previous_condition_block_;
138
139 // The new induction variable.
140 Instruction* new_phi;
141
142 // The new continue block.
143 BasicBlock* new_continue_block;
144
145 // The new condition block.
146 BasicBlock* new_condition_block;
147
148 // The new header block.
149 BasicBlock* new_header_block;
150
151 // The new latch block.
152 BasicBlock* new_latch_block;
153
154 // A mapping of new block ids to the original blocks which they were copied
155 // from.
156 std::unordered_map<uint32_t, BasicBlock*> new_blocks;
157
158 // A mapping of the original instruction ids to the instruction ids to their
159 // copies.
160 std::unordered_map<uint32_t, uint32_t> new_inst;
161
162 std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
163};
164
165// This class implements the actual unrolling. It uses a LoopUnrollState to
166// maintain the state of the unrolling inbetween steps.
167class LoopUnrollerUtilsImpl {
168 public:
169 using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
170
171 LoopUnrollerUtilsImpl(IRContext* c, Function* function)
172 : context_(c),
173 function_(*function),
174 loop_condition_block_(nullptr),
175 loop_induction_variable_(nullptr),
176 number_of_loop_iterations_(0),
177 loop_step_value_(0),
178 loop_init_value_(0) {}
179
180 // Unroll the |loop| by given |factor| by copying the whole body |factor|
181 // times. The resulting basicblock structure will remain a loop.
182 void PartiallyUnroll(Loop*, size_t factor);
183
184 // If partially unrolling the |loop| would leave the loop with too many bodies
185 // for its number of iterations then this method should be used. This method
186 // will duplicate the |loop| completely, making the duplicated loop the
187 // successor of the original's merge block. The original loop will have its
188 // condition changed to loop over the residual part and the duplicate will be
189 // partially unrolled. The resulting structure will be two loops.
190 void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
191
192 // Fully unroll the |loop| by copying the full body by the total number of
193 // loop iterations, folding all conditions, and removing the backedge from the
194 // continue block to the header.
195 void FullyUnroll(Loop* loop);
196
197 // Get the ID of the variable in the |phi| paired with |label|.
198 uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
199
200 // Close the loop by removing the OpLoopMerge from the |loop| header block and
201 // making the backedge point to the merge block.
202 void CloseUnrolledLoop(Loop* loop);
203
204 // Remove the OpConditionalBranch instruction inside |conditional_block| used
205 // to branch to either exit or continue the loop and replace it with an
206 // unconditional OpBranch to block |new_target|.
207 void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
208
209 // Add all blocks_to_add_ to function_ at the |insert_point|.
210 void AddBlocksToFunction(const BasicBlock* insert_point);
211
212 // Duplicates the |old_loop|, cloning each body and remaping the ids without
213 // removing instructions or changing relative structure. Result will be stored
214 // in |new_loop|.
215 void DuplicateLoop(Loop* old_loop, Loop* new_loop);
216
217 inline size_t GetLoopIterationCount() const {
218 return number_of_loop_iterations_;
219 }
220
221 // Extracts the initial state information from the |loop|.
222 void Init(Loop* loop);
223
224 // Replace the uses of each induction variable outside the loop with the final
225 // value of the induction variable before the loop exit. To reflect the proper
226 // state of a fully unrolled loop.
227 void ReplaceInductionUseWithFinalValue(Loop* loop);
228
229 // Remove all the instructions in the invalidated_instructions_ vector.
230 void RemoveDeadInstructions();
231
232 // Replace any use of induction variables outwith the loop with the final
233 // value of the induction variable in the unrolled loop.
234 void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
235
236 // Set the LoopControl operand of the OpLoopMerge instruction to be
237 // DontUnroll.
238 void MarkLoopControlAsDontUnroll(Loop* loop) const;
239
240 private:
241 // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
242 // to old
243 // ids. |loop| is used to identify special loop blocks (header, continue,
244 // ect).
245 void AssignNewResultIds(BasicBlock* basic_block);
246
247 // Using the map built by AssignNewResultIds, replace the uses in |inst|
248 // by the id that the use maps to.
249 void RemapOperands(Instruction* inst);
250
251 // Using the map built by AssignNewResultIds, for each instruction in
252 // |basic_block| use
253 // that map to substitute the IDs used by instructions (in the operands) with
254 // the new ids.
255 void RemapOperands(BasicBlock* basic_block);
256
257 // Copy the whole body of the loop, all blocks dominated by the |loop| header
258 // and not dominated by the |loop| merge. The copied body will be linked to by
259 // the old |loop| continue block and the new body will link to the |loop|
260 // header via the new continue block. |eliminate_conditions| is used to decide
261 // whether or not to fold all the condition blocks other than the last one.
262 void CopyBody(Loop* loop, bool eliminate_conditions);
263
264 // Copy a given |block_to_copy| in the |loop| and record the mapping of the
265 // old/new ids. |preserve_instructions| determines whether or not the method
266 // will modify (other than result_id) instructions which are copied.
267 void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
268 bool preserve_instructions);
269
270 // The actual implementation of the unroll step. Unrolls |loop| by given
271 // |factor| by copying the body by |factor| times. Also propagates the
272 // induction variable value throughout the copies.
273 void Unroll(Loop* loop, size_t factor);
274
275 // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
276 // as computed by the method ComputeLoopOrderedBlocks.
277 void ComputeLoopOrderedBlocks(Loop* loop);
278
279 // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
280 // the parent exists.
281 void AddBlocksToLoop(Loop* loop) const;
282
283 // After the partially unroll step the phi instructions in the header block
284 // will be in an illegal format. This function makes the phis legal by making
285 // the edge from the latch block come from the new latch block and the value
286 // to be the actual value of the phi at that point.
287 void LinkLastPhisToStart(Loop* loop) const;
288
289 // A pointer to the IRContext. Used to add/remove instructions and for usedef
290 // chains.
291 IRContext* context_;
292
293 // A reference the function the loop is within.
294 Function& function_;
295
296 // A list of basic blocks to be added to the loop at the end of an unroll
297 // step.
298 BasicBlockListTy blocks_to_add_;
299
300 // List of instructions which are now dead and can be removed.
301 std::vector<Instruction*> invalidated_instructions_;
302
303 // Maintains the current state of the transform between calls to unroll.
304 LoopUnrollState state_;
305
306 // An ordered list containing the loop basic blocks.
307 std::vector<BasicBlock*> loop_blocks_inorder_;
308
309 // The block containing the condition check which contains a conditional
310 // branch to the merge and continue block.
311 BasicBlock* loop_condition_block_;
312
313 // The induction variable of the loop.
314 Instruction* loop_induction_variable_;
315
316 // Phis used in the loop need to be remapped to use the actual result values
317 // and then be remapped at the end.
318 std::vector<Instruction*> loop_phi_instructions_;
319
320 // The number of loop iterations that the loop would preform pre-unroll.
321 size_t number_of_loop_iterations_;
322
323 // The amount that the loop steps each iteration.
324 int64_t loop_step_value_;
325
326 // The value the loop starts stepping from.
327 int64_t loop_init_value_;
328};
329
330/*
331 * Static helper functions.
332 */
333
334// Retrieve the index of the OpPhi instruction |phi| which corresponds to the
335// incoming |block| id.
336static uint32_t GetPhiIndexFromLabel(const BasicBlock* block,
337 const Instruction* phi) {
338 for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
339 if (block->id() == phi->GetSingleWordInOperand(i)) {
340 return i;
341 }
342 }
343 assert(false && "Could not find operand in instruction.");
344 return 0;
345}
346
347void LoopUnrollerUtilsImpl::Init(Loop* loop) {
348 loop_condition_block_ = loop->FindConditionBlock();
349
350 // When we reinit the second loop during PartiallyUnrollResidualFactor we need
351 // to use the cached value from the duplicate step as the dominator tree
352 // basded solution, loop->FindConditionBlock, requires all the nodes to be
353 // connected up with the correct branches. They won't be at this point.
354 if (!loop_condition_block_) {
355 loop_condition_block_ = state_.new_condition_block;
356 }
357 assert(loop_condition_block_);
358
359 loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
360 assert(loop_induction_variable_);
361
362 bool found = loop->FindNumberOfIterations(
363 loop_induction_variable_, &*loop_condition_block_->ctail(),
364 &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
365 (void)found; // To silence unused variable warning on release builds.
366 assert(found);
367
368 // Blocks are stored in an unordered set of ids in the loop class, we need to
369 // create the dominator ordered list.
370 ComputeLoopOrderedBlocks(loop);
371}
372
373// This function is used to partially unroll the loop when the factor provided
374// would normally lead to an illegal optimization. Instead of just unrolling the
375// loop it creates two loops and unrolls one and adjusts the condition on the
376// other. The end result being that the new loop pair iterates over the correct
377// number of bodies.
378void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
379 size_t factor) {
380 // TODO(1841): Handle id overflow.
381 std::unique_ptr<Instruction> new_label{new Instruction(
382 context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
383 std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
384
385 // Save the id of the block before we move it.
386 uint32_t new_merge_id = new_exit_bb->id();
387
388 // Add the block the list of blocks to add, we want this merge block to be
389 // right at the start of the new blocks.
390 blocks_to_add_.push_back(std::move(new_exit_bb));
391 BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
392 Instruction& original_conditional_branch = *loop_condition_block_->tail();
393 // Duplicate the loop, providing access to the blocks of both loops.
394 // This is a naked new due to the VS2013 requirement of not having unique
395 // pointers in vectors, as it will be inserted into a vector with
396 // loop_descriptor.AddLoop.
397 std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
398
399 // Clear the basic blocks of the new loop.
400 new_loop->ClearBlocks();
401
402 DuplicateLoop(loop, new_loop.get());
403
404 // Add the blocks to the function.
405 AddBlocksToFunction(loop->GetMergeBlock());
406 blocks_to_add_.clear();
407
408 // Create a new merge block for the first loop.
409 InstructionBuilder builder{context_, new_exit_bb_raw};
410 // Make the first loop branch to the second.
411 builder.AddBranch(new_loop->GetHeaderBlock()->id());
412
413 loop_condition_block_ = state_.new_condition_block;
414 loop_induction_variable_ = state_.new_phi;
415 // Unroll the new loop by the factor with the usual -1 to account for the
416 // existing block iteration.
417 Unroll(new_loop.get(), factor);
418
419 LinkLastPhisToStart(new_loop.get());
420 AddBlocksToLoop(new_loop.get());
421
422 // Add the new merge block to the back of the list of blocks to be added. It
423 // needs to be the last block added to maintain dominator order in the binary.
424 blocks_to_add_.push_back(
425 std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
426
427 // Add the blocks to the function.
428 AddBlocksToFunction(loop->GetMergeBlock());
429
430 // Reset the usedef analysis.
431 context_->InvalidateAnalysesExceptFor(
432 IRContext::Analysis::kAnalysisLoopAnalysis);
433 analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
434
435 // The loop condition.
436 Instruction* condition_check = def_use_manager->GetDef(
437 original_conditional_branch.GetSingleWordOperand(0));
438
439 // This should have been checked by the LoopUtils::CanPerformUnroll function
440 // before entering this.
441 assert(loop->IsSupportedCondition(condition_check->opcode()));
442
443 // We need to account for the initial body when calculating the remainder.
444 int64_t remainder = Loop::GetResidualConditionValue(
445 condition_check->opcode(), loop_init_value_, loop_step_value_,
446 number_of_loop_iterations_, factor);
447
448 assert(remainder > std::numeric_limits<int32_t>::min() &&
449 remainder < std::numeric_limits<int32_t>::max());
450
451 Instruction* new_constant = nullptr;
452
453 // If the remainder is negative then we add a signed constant, otherwise just
454 // add an unsigned constant.
455 if (remainder < 0) {
456 new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
457 } else {
458 new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
459 }
460
461 uint32_t constant_id = new_constant->result_id();
462
463 // Update the condition check.
464 condition_check->SetInOperand(1, {constant_id});
465
466 // Update the next phi node. The phi will have a constant value coming in from
467 // the preheader block. For the duplicated loop we need to update the constant
468 // to be the amount of iterations covered by the first loop and the incoming
469 // block to be the first loops new merge block.
470 std::vector<Instruction*> new_inductions;
471 new_loop->GetInductionVariables(new_inductions);
472
473 std::vector<Instruction*> old_inductions;
474 loop->GetInductionVariables(old_inductions);
475 for (size_t index = 0; index < new_inductions.size(); ++index) {
476 Instruction* new_induction = new_inductions[index];
477 Instruction* old_induction = old_inductions[index];
478 // Get the index of the loop initalizer, the value coming in from the
479 // preheader.
480 uint32_t initalizer_index =
481 GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
482
483 // Replace the second loop initalizer with the phi from the first
484 new_induction->SetInOperand(initalizer_index - 1,
485 {old_induction->result_id()});
486 new_induction->SetInOperand(initalizer_index, {new_merge_id});
487
488 // If the use of the first loop induction variable is outside of the loop
489 // then replace that use with the second loop induction variable.
490 uint32_t second_loop_induction = new_induction->result_id();
491 auto replace_use_outside_of_loop = [loop, second_loop_induction](
492 Instruction* user,
493 uint32_t operand_index) {
494 if (!loop->IsInsideLoop(user)) {
495 user->SetOperand(operand_index, {second_loop_induction});
496 }
497 };
498
499 context_->get_def_use_mgr()->ForEachUse(old_induction,
500 replace_use_outside_of_loop);
501 }
502
503 context_->InvalidateAnalysesExceptFor(
504 IRContext::Analysis::kAnalysisLoopAnalysis);
505
506 context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
507
508 LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
509
510 loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
511
512 RemoveDeadInstructions();
513}
514
515// Mark this loop as DontUnroll as it will already be unrolled and it may not
516// be safe to unroll a previously partially unrolled loop.
517void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
518 Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
519 assert(loop_merge_inst &&
520 "Loop merge instruction could not be found after entering unroller "
521 "(should have exited before this)");
522 loop_merge_inst->SetInOperand(kLoopControlIndex,
523 {kLoopControlDontUnrollIndex});
524}
525
526// Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
527// backedge intact. This will leave the loop with |factor| number of bodies
528// after accounting for the initial body.
529void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
530 // If we unroll a loop partially it will not be safe to unroll it further.
531 // This is due to the current method of calculating the number of loop
532 // iterations.
533 MarkLoopControlAsDontUnroll(loop);
534
535 std::vector<Instruction*> inductions;
536 loop->GetInductionVariables(inductions);
537 state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
538 loop_condition_block_, std::move(inductions)};
539 for (size_t i = 0; i < factor - 1; ++i) {
540 CopyBody(loop, true);
541 }
542}
543
544void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
545 // Remove the dead instructions.
546 for (Instruction* inst : invalidated_instructions_) {
547 context_->KillInst(inst);
548 }
549}
550
551void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
552 context_->InvalidateAnalysesExceptFor(
553 IRContext::Analysis::kAnalysisLoopAnalysis |
554 IRContext::Analysis::kAnalysisDefUse |
555 IRContext::Analysis::kAnalysisInstrToBlockMapping);
556
557 std::vector<Instruction*> inductions;
558 loop->GetInductionVariables(inductions);
559
560 for (size_t index = 0; index < inductions.size(); ++index) {
561 uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
562 state_.previous_latch_block_->id());
563 context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
564 invalidated_instructions_.push_back(inductions[index]);
565 }
566}
567
568// Fully unroll the loop by partially unrolling it by the number of loop
569// iterations minus one for the body already accounted for.
570void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
571 // We unroll the loop by number of iterations in the loop.
572 Unroll(loop, number_of_loop_iterations_);
573
574 // The first condition block is preserved until now so it can be copied.
575 FoldConditionBlock(loop_condition_block_, 1);
576
577 // Delete the OpLoopMerge and remove the backedge to the header.
578 CloseUnrolledLoop(loop);
579
580 // Mark the loop for later deletion. This allows us to preserve the loop
581 // iterators but still disregard dead loops.
582 loop->MarkLoopForRemoval();
583
584 // If the loop has a parent add the new blocks to the parent.
585 if (loop->GetParent()) {
586 AddBlocksToLoop(loop->GetParent());
587 }
588
589 // Add the blocks to the function.
590 AddBlocksToFunction(loop->GetMergeBlock());
591
592 ReplaceInductionUseWithFinalValue(loop);
593
594 RemoveDeadInstructions();
595 // Invalidate all analyses.
596 context_->InvalidateAnalysesExceptFor(
597 IRContext::Analysis::kAnalysisLoopAnalysis |
598 IRContext::Analysis::kAnalysisDefUse);
599}
600
601// Copy a given basic block, give it a new result_id, and store the new block
602// and the id mapping in the state. |preserve_instructions| is used to determine
603// whether or not this function should edit instructions other than the
604// |result_id|.
605void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
606 bool preserve_instructions) {
607 // Clone the block exactly, including the IDs.
608 BasicBlock* basic_block = itr->Clone(context_);
609 basic_block->SetParent(itr->GetParent());
610
611 // Assign each result a new unique ID and keep a mapping of the old ids to
612 // the new ones.
613 AssignNewResultIds(basic_block);
614
615 // If this is the continue block we are copying.
616 if (itr == loop->GetContinueBlock()) {
617 // Make the OpLoopMerge point to this block for the continue.
618 if (!preserve_instructions) {
619 Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
620 merge_inst->SetInOperand(1, {basic_block->id()});
621 context_->UpdateDefUse(merge_inst);
622 }
623
624 state_.new_continue_block = basic_block;
625 }
626
627 // If this is the header block we are copying.
628 if (itr == loop->GetHeaderBlock()) {
629 state_.new_header_block = basic_block;
630
631 if (!preserve_instructions) {
632 // Remove the loop merge instruction if it exists.
633 Instruction* merge_inst = basic_block->GetLoopMergeInst();
634 if (merge_inst) invalidated_instructions_.push_back(merge_inst);
635 }
636 }
637
638 // If this is the latch block being copied, record it in the state.
639 if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
640
641 // If this is the condition block we are copying.
642 if (itr == loop_condition_block_) {
643 state_.new_condition_block = basic_block;
644 }
645
646 // Add this block to the list of blocks to add to the function at the end of
647 // the unrolling process.
648 blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
649
650 // Keep tracking the old block via a map.
651 state_.new_blocks[itr->id()] = basic_block;
652}
653
654void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
655 // Copy each basic block in the loop, give them new ids, and save state
656 // information.
657 for (const BasicBlock* itr : loop_blocks_inorder_) {
658 CopyBasicBlock(loop, itr, false);
659 }
660
661 // Set the previous latch block to point to the new header.
662 Instruction* latch_branch = state_.previous_latch_block_->terminator();
663 latch_branch->SetInOperand(0, {state_.new_header_block->id()});
664 context_->UpdateDefUse(latch_branch);
665
666 // As the algorithm copies the original loop blocks exactly, the tail of the
667 // latch block on iterations after the first one will be a branch to the new
668 // header and not the actual loop header. The last continue block in the loop
669 // should always be a backedge to the global header.
670 Instruction* new_latch_branch = state_.new_latch_block->terminator();
671 new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
672 context_->AnalyzeUses(new_latch_branch);
673
674 std::vector<Instruction*> inductions;
675 loop->GetInductionVariables(inductions);
676 for (size_t index = 0; index < inductions.size(); ++index) {
677 Instruction* master_copy = inductions[index];
678
679 assert(master_copy->result_id() != 0);
680 Instruction* induction_clone =
681 state_.ids_to_new_inst[state_.new_inst[master_copy->result_id()]];
682
683 state_.new_phis_.push_back(induction_clone);
684 assert(induction_clone->result_id() != 0);
685
686 if (!state_.previous_phis_.empty()) {
687 state_.new_inst[master_copy->result_id()] = GetPhiDefID(
688 state_.previous_phis_[index], state_.previous_latch_block_->id());
689 } else {
690 // Do not replace the first phi block ids.
691 state_.new_inst[master_copy->result_id()] = master_copy->result_id();
692 }
693 }
694
695 if (eliminate_conditions &&
696 state_.new_condition_block != loop_condition_block_) {
697 FoldConditionBlock(state_.new_condition_block, 1);
698 }
699
700 // Only reference to the header block is the backedge in the latch block,
701 // don't change this.
702 state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
703
704 for (auto& pair : state_.new_blocks) {
705 RemapOperands(pair.second);
706 }
707
708 for (Instruction* dead_phi : state_.new_phis_)
709 invalidated_instructions_.push_back(dead_phi);
710
711 // Swap the state so the new is now the previous.
712 state_.NextIterationState();
713}
714
715uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
716 uint32_t label) const {
717 for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
718 if (phi->GetSingleWordOperand(operand) == label) {
719 return phi->GetSingleWordOperand(operand - 1);
720 }
721 }
722 assert(false && "Could not find a phi index matching the provided label");
723 return 0;
724}
725
726void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
727 uint32_t operand_label) {
728 // Remove the old conditional branch to the merge and continue blocks.
729 Instruction& old_branch = *condition_block->tail();
730 uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
731
732 context_->KillInst(&old_branch);
733 // Add the new unconditional branch to the merge block.
734 InstructionBuilder builder(
735 context_, condition_block,
736 IRContext::Analysis::kAnalysisDefUse |
737 IRContext::Analysis::kAnalysisInstrToBlockMapping);
738 builder.AddBranch(new_target);
739}
740
741void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
742 // Remove the OpLoopMerge instruction from the function.
743 Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
744 invalidated_instructions_.push_back(merge_inst);
745
746 // Remove the final backedge to the header and make it point instead to the
747 // merge block.
748 Instruction* latch_instruction = state_.previous_latch_block_->terminator();
749 latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
750 context_->UpdateDefUse(latch_instruction);
751
752 // Remove all induction variables as the phis will now be invalid. Replace all
753 // uses with the constant initializer value (all uses of phis will be in
754 // the first iteration with the subsequent phis already having been removed).
755 std::vector<Instruction*> inductions;
756 loop->GetInductionVariables(inductions);
757
758 // We can use the state instruction mechanism to replace all internal loop
759 // values within the first loop trip (as the subsequent ones will be updated
760 // by the copy function) with the value coming in from the preheader and then
761 // use context ReplaceAllUsesWith for the uses outside the loop with the final
762 // trip phi value.
763 state_.new_inst.clear();
764 for (Instruction* induction : inductions) {
765 uint32_t initalizer_id =
766 GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
767
768 state_.new_inst[induction->result_id()] = initalizer_id;
769 }
770
771 for (BasicBlock* block : loop_blocks_inorder_) {
772 RemapOperands(block);
773 }
774
775 // Rewrite the last phis, since they may still reference the original phi.
776 for (Instruction* last_phi : state_.previous_phis_) {
777 RemapOperands(last_phi);
778 }
779}
780
781// Uses the first loop to create a copy of the loop with new IDs.
782void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
783 std::vector<BasicBlock*> new_block_order;
784
785 // Copy every block in the old loop.
786 for (const BasicBlock* itr : loop_blocks_inorder_) {
787 CopyBasicBlock(old_loop, itr, true);
788 new_block_order.push_back(blocks_to_add_.back().get());
789 }
790
791 // Clone the merge block, give it a new id and record it in the state.
792 BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
793 new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
794 AssignNewResultIds(new_merge);
795 state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
796
797 // Remap the operands of every instruction in the loop to point to the new
798 // copies.
799 for (auto& pair : state_.new_blocks) {
800 RemapOperands(pair.second);
801 }
802
803 loop_blocks_inorder_ = std::move(new_block_order);
804
805 AddBlocksToLoop(new_loop);
806
807 new_loop->SetHeaderBlock(state_.new_header_block);
808 new_loop->SetContinueBlock(state_.new_continue_block);
809 new_loop->SetLatchBlock(state_.new_latch_block);
810 new_loop->SetMergeBlock(new_merge);
811}
812
813// Whenever the utility copies a block it stores it in a tempory buffer, this
814// function adds the buffer into the Function. The blocks will be inserted
815// after the block |insert_point|.
816void LoopUnrollerUtilsImpl::AddBlocksToFunction(
817 const BasicBlock* insert_point) {
818 for (auto basic_block_iterator = function_.begin();
819 basic_block_iterator != function_.end(); ++basic_block_iterator) {
820 if (basic_block_iterator->id() == insert_point->id()) {
821 basic_block_iterator.InsertBefore(&blocks_to_add_);
822 return;
823 }
824 }
825
826 assert(
827 false &&
828 "Could not add basic blocks to function as insert point was not found.");
829}
830
831// Assign all result_ids in |basic_block| instructions to new IDs and preserve
832// the mapping of new ids to old ones.
833void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
834 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
835
836 // Label instructions aren't covered by normal traversal of the
837 // instructions.
838 // TODO(1841): Handle id overflow.
839 uint32_t new_label_id = context_->TakeNextId();
840
841 // Assign a new id to the label.
842 state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
843 basic_block->GetLabelInst()->SetResultId(new_label_id);
844 def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
845
846 for (Instruction& inst : *basic_block) {
847 uint32_t old_id = inst.result_id();
848
849 // Ignore stores etc.
850 if (old_id == 0) {
851 continue;
852 }
853
854 // Give the instruction a new id.
855 // TODO(1841): Handle id overflow.
856 inst.SetResultId(context_->TakeNextId());
857 def_use_mgr->AnalyzeInstDef(&inst);
858
859 // Save the mapping of old_id -> new_id.
860 state_.new_inst[old_id] = inst.result_id();
861 // Check if this instruction is the induction variable.
862 if (loop_induction_variable_->result_id() == old_id) {
863 // Save a pointer to the new copy of it.
864 state_.new_phi = &inst;
865 }
866 state_.ids_to_new_inst[inst.result_id()] = &inst;
867 }
868}
869
870void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
871 auto remap_operands_to_new_ids = [this](uint32_t* id) {
872 auto itr = state_.new_inst.find(*id);
873
874 if (itr != state_.new_inst.end()) {
875 *id = itr->second;
876 }
877 };
878
879 inst->ForEachInId(remap_operands_to_new_ids);
880 context_->AnalyzeUses(inst);
881}
882
883void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
884 for (Instruction& inst : *basic_block) {
885 RemapOperands(&inst);
886 }
887}
888
889// Generate the ordered list of basic blocks in the |loop| and cache it for
890// later use.
891void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
892 loop_blocks_inorder_.clear();
893 loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
894}
895
896// Adds the blocks_to_add_ to both the loop and to the parent.
897void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
898 // Add the blocks to this loop.
899 for (auto& block_itr : blocks_to_add_) {
900 loop->AddBasicBlock(block_itr.get());
901 }
902
903 // Add the blocks to the parent as well.
904 if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
905}
906
907void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
908 std::vector<Instruction*> inductions;
909 loop->GetInductionVariables(inductions);
910
911 for (size_t i = 0; i < inductions.size(); ++i) {
912 Instruction* last_phi_in_block = state_.previous_phis_[i];
913
914 uint32_t phi_index =
915 GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
916 uint32_t phi_variable =
917 last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
918 uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
919
920 Instruction* phi = inductions[i];
921 phi->SetInOperand(phi_index - 1, {phi_variable});
922 phi->SetInOperand(phi_index, {phi_label});
923 }
924}
925
926// Duplicate the |loop| body |factor| number of times while keeping the loop
927// backedge intact.
928void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
929 Unroll(loop, factor);
930 LinkLastPhisToStart(loop);
931 AddBlocksToLoop(loop);
932 AddBlocksToFunction(loop->GetMergeBlock());
933 RemoveDeadInstructions();
934}
935
936/*
937 * End LoopUtilsImpl.
938 */
939
940} // namespace
941
942/*
943 *
944 * Begin Utils.
945 *
946 * */
947
948bool LoopUtils::CanPerformUnroll() {
949 // The loop is expected to be in structured order.
950 if (!loop_->GetHeaderBlock()->GetMergeInst()) {
951 return false;
952 }
953
954 // Find check the loop has a condition we can find and evaluate.
955 const BasicBlock* condition = loop_->FindConditionBlock();
956 if (!condition) return false;
957
958 // Check that we can find and process the induction variable.
959 const Instruction* induction = loop_->FindConditionVariable(condition);
960 if (!induction || induction->opcode() != SpvOpPhi) return false;
961
962 // Check that we can find the number of loop iterations.
963 if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
964 return false;
965
966 // Make sure the latch block is a unconditional branch to the header
967 // block.
968 const Instruction& branch = *loop_->GetLatchBlock()->ctail();
969 bool branching_assumption =
970 branch.opcode() == SpvOpBranch &&
971 branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
972 if (!branching_assumption) {
973 return false;
974 }
975
976 std::vector<Instruction*> inductions;
977 loop_->GetInductionVariables(inductions);
978
979 // Ban breaks within the loop.
980 const std::vector<uint32_t>& merge_block_preds =
981 context_->cfg()->preds(loop_->GetMergeBlock()->id());
982 if (merge_block_preds.size() != 1) {
983 return false;
984 }
985
986 // Ban continues within the loop.
987 const std::vector<uint32_t>& continue_block_preds =
988 context_->cfg()->preds(loop_->GetContinueBlock()->id());
989 if (continue_block_preds.size() != 1) {
990 return false;
991 }
992
993 // Ban returns in the loop.
994 // Iterate over all the blocks within the loop and check that none of them
995 // exit the loop.
996 for (uint32_t label_id : loop_->GetBlocks()) {
997 const BasicBlock* block = context_->cfg()->block(label_id);
998 if (block->ctail()->opcode() == SpvOp::SpvOpKill ||
999 block->ctail()->opcode() == SpvOp::SpvOpReturn ||
1000 block->ctail()->opcode() == SpvOp::SpvOpReturnValue) {
1001 return false;
1002 }
1003 }
1004 // Can only unroll inner loops.
1005 if (!loop_->AreAllChildrenMarkedForRemoval()) {
1006 return false;
1007 }
1008
1009 return true;
1010}
1011
1012bool LoopUtils::PartiallyUnroll(size_t factor) {
1013 if (factor == 1 || !CanPerformUnroll()) return false;
1014
1015 // Create the unroller utility.
1016 LoopUnrollerUtilsImpl unroller{context_,
1017 loop_->GetHeaderBlock()->GetParent()};
1018 unroller.Init(loop_);
1019
1020 // If the unrolling factor is larger than or the same size as the loop just
1021 // fully unroll the loop.
1022 if (factor >= unroller.GetLoopIterationCount()) {
1023 unroller.FullyUnroll(loop_);
1024 return true;
1025 }
1026
1027 // If the loop unrolling factor is an residual number of iterations we need to
1028 // let run the loop for the residual part then let it branch into the unrolled
1029 // remaining part. We add one when calucating the remainder to take into
1030 // account the one iteration already in the loop.
1031 if (unroller.GetLoopIterationCount() % factor != 0) {
1032 unroller.PartiallyUnrollResidualFactor(loop_, factor);
1033 } else {
1034 unroller.PartiallyUnroll(loop_, factor);
1035 }
1036
1037 return true;
1038}
1039
1040bool LoopUtils::FullyUnroll() {
1041 if (!CanPerformUnroll()) return false;
1042
1043 std::vector<Instruction*> inductions;
1044 loop_->GetInductionVariables(inductions);
1045
1046 LoopUnrollerUtilsImpl unroller{context_,
1047 loop_->GetHeaderBlock()->GetParent()};
1048
1049 unroller.Init(loop_);
1050 unroller.FullyUnroll(loop_);
1051
1052 return true;
1053}
1054
1055void LoopUtils::Finalize() {
1056 // Clean up the loop descriptor to preserve the analysis.
1057
1058 LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
1059 LD->PostModificationCleanup();
1060}
1061
1062/*
1063 *
1064 * Begin Pass.
1065 *
1066 */
1067
1068Pass::Status LoopUnroller::Process() {
1069 bool changed = false;
1070 for (Function& f : *context()->module()) {
1071 LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
1072 for (Loop& loop : *LD) {
1073 LoopUtils loop_utils{context(), &loop};
1074 if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
1075 continue;
1076 }
1077
1078 if (fully_unroll_) {
1079 loop_utils.FullyUnroll();
1080 } else {
1081 loop_utils.PartiallyUnroll(unroll_factor_);
1082 }
1083 changed = true;
1084 }
1085 LD->PostModificationCleanup();
1086 }
1087
1088 return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
1089}
1090
1091} // namespace opt
1092} // namespace spvtools
1093