1 | // Copyright (c) 2018 Google LLC. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include <algorithm> |
16 | #include <memory> |
17 | #include <unordered_map> |
18 | #include <unordered_set> |
19 | #include <utility> |
20 | #include <vector> |
21 | |
22 | #include "source/cfa.h" |
23 | #include "source/opt/cfg.h" |
24 | #include "source/opt/ir_builder.h" |
25 | #include "source/opt/ir_context.h" |
26 | #include "source/opt/loop_descriptor.h" |
27 | #include "source/opt/loop_utils.h" |
28 | |
29 | namespace spvtools { |
30 | namespace opt { |
31 | |
32 | namespace { |
33 | // Return true if |bb| is dominated by at least one block in |exits| |
34 | static inline bool DominatesAnExit(BasicBlock* bb, |
35 | const std::unordered_set<BasicBlock*>& exits, |
36 | const DominatorTree& dom_tree) { |
37 | for (BasicBlock* e_bb : exits) |
38 | if (dom_tree.Dominates(bb, e_bb)) return true; |
39 | return false; |
40 | } |
41 | |
42 | // Utility class to rewrite out-of-loop uses of an in-loop definition in terms |
43 | // of phi instructions to achieve a LCSSA form. |
44 | // For a given definition, the class user registers phi instructions using that |
45 | // definition in all loop exit blocks by which the definition escapes. |
46 | // Then, when rewriting a use of the definition, the rewriter walks the |
47 | // paths from the use the loop exits. At each step, it will insert a phi |
48 | // instruction to merge the incoming value according to exit blocks definition. |
49 | class LCSSARewriter { |
50 | public: |
51 | LCSSARewriter(IRContext* context, const DominatorTree& dom_tree, |
52 | const std::unordered_set<BasicBlock*>& exit_bb, |
53 | BasicBlock* merge_block) |
54 | : context_(context), |
55 | cfg_(context_->cfg()), |
56 | dom_tree_(dom_tree), |
57 | exit_bb_(exit_bb), |
58 | merge_block_id_(merge_block ? merge_block->id() : 0) {} |
59 | |
60 | struct UseRewriter { |
61 | explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn) |
62 | : base_(base), def_insn_(def_insn) {} |
63 | // Rewrites the use of |def_insn_| by the instruction |user| at the index |
64 | // |operand_index| in terms of phi instruction. This recursively builds new |
65 | // phi instructions from |user| to the loop exit blocks' phis. The use of |
66 | // |def_insn_| in |user| is replaced by the relevant phi instruction at the |
67 | // end of the operation. |
68 | // It is assumed that |user| does not dominates any of the loop exit basic |
69 | // block. This operation does not update the def/use manager, instead it |
70 | // records what needs to be updated. The actual update is performed by |
71 | // UpdateManagers. |
72 | void RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) { |
73 | assert( |
74 | (user->opcode() != SpvOpPhi || bb != GetParent(user)) && |
75 | "The root basic block must be the incoming edge if |user| is a phi " |
76 | "instruction" ); |
77 | assert((user->opcode() == SpvOpPhi || bb == GetParent(user)) && |
78 | "The root basic block must be the instruction parent if |user| is " |
79 | "not " |
80 | "phi instruction" ); |
81 | |
82 | Instruction* new_def = GetOrBuildIncoming(bb->id()); |
83 | |
84 | user->SetOperand(operand_index, {new_def->result_id()}); |
85 | rewritten_.insert(user); |
86 | } |
87 | |
88 | // In-place update of some managers (avoid full invalidation). |
89 | inline void UpdateManagers() { |
90 | analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr(); |
91 | // Register all new definitions. |
92 | for (Instruction* insn : rewritten_) { |
93 | def_use_mgr->AnalyzeInstDef(insn); |
94 | } |
95 | // Register all new uses. |
96 | for (Instruction* insn : rewritten_) { |
97 | def_use_mgr->AnalyzeInstUse(insn); |
98 | } |
99 | } |
100 | |
101 | private: |
102 | // Return the basic block that |instr| belongs to. |
103 | BasicBlock* GetParent(Instruction* instr) { |
104 | return base_->context_->get_instr_block(instr); |
105 | } |
106 | |
107 | // Builds a phi instruction for the basic block |bb|. The function assumes |
108 | // that |defining_blocks| contains the list of basic block that define the |
109 | // usable value for each predecessor of |bb|. |
110 | inline Instruction* CreatePhiInstruction( |
111 | BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) { |
112 | std::vector<uint32_t> incomings; |
113 | const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); |
114 | assert(bb_preds.size() == defining_blocks.size()); |
115 | for (size_t i = 0; i < bb_preds.size(); i++) { |
116 | incomings.push_back( |
117 | GetOrBuildIncoming(defining_blocks[i])->result_id()); |
118 | incomings.push_back(bb_preds[i]); |
119 | } |
120 | InstructionBuilder builder(base_->context_, &*bb->begin(), |
121 | IRContext::kAnalysisInstrToBlockMapping); |
122 | Instruction* incoming_phi = |
123 | builder.AddPhi(def_insn_.type_id(), incomings); |
124 | |
125 | rewritten_.insert(incoming_phi); |
126 | return incoming_phi; |
127 | } |
128 | |
129 | // Builds a phi instruction for the basic block |bb|, all incoming values |
130 | // will be |value|. |
131 | inline Instruction* CreatePhiInstruction(BasicBlock* bb, |
132 | const Instruction& value) { |
133 | std::vector<uint32_t> incomings; |
134 | const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); |
135 | for (size_t i = 0; i < bb_preds.size(); i++) { |
136 | incomings.push_back(value.result_id()); |
137 | incomings.push_back(bb_preds[i]); |
138 | } |
139 | InstructionBuilder builder(base_->context_, &*bb->begin(), |
140 | IRContext::kAnalysisInstrToBlockMapping); |
141 | Instruction* incoming_phi = |
142 | builder.AddPhi(def_insn_.type_id(), incomings); |
143 | |
144 | rewritten_.insert(incoming_phi); |
145 | return incoming_phi; |
146 | } |
147 | |
148 | // Return the new def to use for the basic block |bb_id|. |
149 | // If |bb_id| does not have a suitable def to use then we: |
150 | // - return the common def used by all predecessors; |
151 | // - if there is no common def, then we build a new phi instr at the |
152 | // beginning of |bb_id| and return this new instruction. |
153 | Instruction* GetOrBuildIncoming(uint32_t bb_id) { |
154 | assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block" ); |
155 | |
156 | Instruction*& incoming_phi = bb_to_phi_[bb_id]; |
157 | if (incoming_phi) { |
158 | return incoming_phi; |
159 | } |
160 | |
161 | BasicBlock* bb = &*base_->cfg_->block(bb_id); |
162 | // If this is an exit basic block, look if there already is an eligible |
163 | // phi instruction. An eligible phi has |def_insn_| as all incoming |
164 | // values. |
165 | if (base_->exit_bb_.count(bb)) { |
166 | // Look if there is an eligible phi in this block. |
167 | if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) { |
168 | for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { |
169 | if (phi->GetSingleWordInOperand(i) != def_insn_.result_id()) |
170 | return true; |
171 | } |
172 | incoming_phi = phi; |
173 | rewritten_.insert(incoming_phi); |
174 | return false; |
175 | })) { |
176 | return incoming_phi; |
177 | } |
178 | incoming_phi = CreatePhiInstruction(bb, def_insn_); |
179 | return incoming_phi; |
180 | } |
181 | |
182 | // Get the block that defines the value to use for each predecessor. |
183 | // If the vector has 1 value, then it means that this block does not need |
184 | // to build a phi instruction unless |bb_id| is the loop merge block. |
185 | const std::vector<uint32_t>& defining_blocks = |
186 | base_->GetDefiningBlocks(bb_id); |
187 | |
188 | // Special case for structured loops: merge block might be different from |
189 | // the exit block set. To maintain structured properties it will ease |
190 | // transformations if the merge block also holds a phi instruction like |
191 | // the exit ones. |
192 | if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) { |
193 | if (defining_blocks.size() > 1) { |
194 | incoming_phi = CreatePhiInstruction(bb, defining_blocks); |
195 | } else { |
196 | assert(bb_id == base_->merge_block_id_); |
197 | incoming_phi = |
198 | CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0])); |
199 | } |
200 | } else { |
201 | incoming_phi = GetOrBuildIncoming(defining_blocks[0]); |
202 | } |
203 | |
204 | return incoming_phi; |
205 | } |
206 | |
207 | LCSSARewriter* base_; |
208 | const Instruction& def_insn_; |
209 | std::unordered_map<uint32_t, Instruction*> bb_to_phi_; |
210 | std::unordered_set<Instruction*> rewritten_; |
211 | }; |
212 | |
213 | private: |
214 | // Return the new def to use for the basic block |bb_id|. |
215 | // If |bb_id| does not have a suitable def to use then we: |
216 | // - return the common def used by all predecessors; |
217 | // - if there is no common def, then we build a new phi instr at the |
218 | // beginning of |bb_id| and return this new instruction. |
219 | const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) { |
220 | assert(cfg_->block(bb_id) != nullptr && "Unknown basic block" ); |
221 | std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id]; |
222 | |
223 | if (defining_blocks.size()) return defining_blocks; |
224 | |
225 | // Check if one of the loop exit basic block dominates |bb_id|. |
226 | for (const BasicBlock* e_bb : exit_bb_) { |
227 | if (dom_tree_.Dominates(e_bb->id(), bb_id)) { |
228 | defining_blocks.push_back(e_bb->id()); |
229 | return defining_blocks; |
230 | } |
231 | } |
232 | |
233 | // Process parents, they will returns their suitable blocks. |
234 | // If they are all the same, this means this basic block is dominated by a |
235 | // common block, so we won't need to build a phi instruction. |
236 | for (uint32_t pred_id : cfg_->preds(bb_id)) { |
237 | const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id); |
238 | if (pred_blocks.size() == 1) |
239 | defining_blocks.push_back(pred_blocks[0]); |
240 | else |
241 | defining_blocks.push_back(pred_id); |
242 | } |
243 | assert(defining_blocks.size()); |
244 | if (std::all_of(defining_blocks.begin(), defining_blocks.end(), |
245 | [&defining_blocks](uint32_t id) { |
246 | return id == defining_blocks[0]; |
247 | })) { |
248 | // No need for a phi. |
249 | defining_blocks.resize(1); |
250 | } |
251 | |
252 | return defining_blocks; |
253 | } |
254 | |
255 | IRContext* context_; |
256 | CFG* cfg_; |
257 | const DominatorTree& dom_tree_; |
258 | const std::unordered_set<BasicBlock*>& exit_bb_; |
259 | uint32_t merge_block_id_; |
260 | // This map represent the set of known paths. For each key, the vector |
261 | // represent the set of blocks holding the definition to be used to build the |
262 | // phi instruction. |
263 | // If the vector has 0 value, then the path is unknown yet, and must be built. |
264 | // If the vector has 1 value, then the value defined by that basic block |
265 | // should be used. |
266 | // If the vector has more than 1 value, then a phi node must be created, the |
267 | // basic block ordering is the same as the predecessor ordering. |
268 | std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_; |
269 | }; |
270 | |
271 | // Make the set |blocks| closed SSA. The set is closed SSA if all the uses |
272 | // outside the set are phi instructions in exiting basic block set (hold by |
273 | // |lcssa_rewriter|). |
274 | inline void MakeSetClosedSSA(IRContext* context, Function* function, |
275 | const std::unordered_set<uint32_t>& blocks, |
276 | const std::unordered_set<BasicBlock*>& exit_bb, |
277 | LCSSARewriter* lcssa_rewriter) { |
278 | CFG& cfg = *context->cfg(); |
279 | DominatorTree& dom_tree = |
280 | context->GetDominatorAnalysis(function)->GetDomTree(); |
281 | analysis::DefUseManager* def_use_manager = context->get_def_use_mgr(); |
282 | |
283 | for (uint32_t bb_id : blocks) { |
284 | BasicBlock* bb = cfg.block(bb_id); |
285 | // If bb does not dominate an exit block, then it cannot have escaping defs. |
286 | if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue; |
287 | for (Instruction& inst : *bb) { |
288 | LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst); |
289 | def_use_manager->ForEachUse( |
290 | &inst, [&blocks, &rewriter, &exit_bb, context]( |
291 | Instruction* use, uint32_t operand_index) { |
292 | BasicBlock* use_parent = context->get_instr_block(use); |
293 | assert(use_parent); |
294 | if (blocks.count(use_parent->id())) return; |
295 | |
296 | if (use->opcode() == SpvOpPhi) { |
297 | // If the use is a Phi instruction and the incoming block is |
298 | // coming from the loop, then that's consistent with LCSSA form. |
299 | if (exit_bb.count(use_parent)) { |
300 | return; |
301 | } else { |
302 | // That's not an exit block, but the user is a phi instruction. |
303 | // Consider the incoming branch only. |
304 | use_parent = context->get_instr_block( |
305 | use->GetSingleWordOperand(operand_index + 1)); |
306 | } |
307 | } |
308 | // Rewrite the use. Note that this call does not invalidate the |
309 | // def/use manager. So this operation is safe. |
310 | rewriter.RewriteUse(use_parent, use, operand_index); |
311 | }); |
312 | rewriter.UpdateManagers(); |
313 | } |
314 | } |
315 | } |
316 | |
317 | } // namespace |
318 | |
319 | void LoopUtils::CreateLoopDedicatedExits() { |
320 | Function* function = loop_->GetHeaderBlock()->GetParent(); |
321 | LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function); |
322 | CFG& cfg = *context_->cfg(); |
323 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
324 | |
325 | const IRContext::Analysis PreservedAnalyses = |
326 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; |
327 | |
328 | // Gathers the set of basic block that are not in this loop and have at least |
329 | // one predecessor in the loop and one not in the loop. |
330 | std::unordered_set<uint32_t> exit_bb_set; |
331 | loop_->GetExitBlocks(&exit_bb_set); |
332 | |
333 | std::unordered_set<BasicBlock*> new_loop_exits; |
334 | bool made_change = false; |
335 | // For each block, we create a new one that gathers all branches from |
336 | // the loop and fall into the block. |
337 | for (uint32_t non_dedicate_id : exit_bb_set) { |
338 | BasicBlock* non_dedicate = cfg.block(non_dedicate_id); |
339 | const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id); |
340 | // Ignore the block if all the predecessors are in the loop. |
341 | if (std::all_of(bb_pred.begin(), bb_pred.end(), |
342 | [this](uint32_t id) { return loop_->IsInsideLoop(id); })) { |
343 | new_loop_exits.insert(non_dedicate); |
344 | continue; |
345 | } |
346 | |
347 | made_change = true; |
348 | Function::iterator insert_pt = function->begin(); |
349 | for (; insert_pt != function->end() && &*insert_pt != non_dedicate; |
350 | ++insert_pt) { |
351 | } |
352 | assert(insert_pt != function->end() && "Basic Block not found" ); |
353 | |
354 | // Create the dedicate exit basic block. |
355 | // TODO(1841): Handle id overflow. |
356 | BasicBlock& exit = *insert_pt.InsertBefore(std::unique_ptr<BasicBlock>( |
357 | new BasicBlock(std::unique_ptr<Instruction>(new Instruction( |
358 | context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); |
359 | exit.SetParent(function); |
360 | |
361 | // Redirect in loop predecessors to |exit| block. |
362 | for (uint32_t exit_pred_id : bb_pred) { |
363 | if (loop_->IsInsideLoop(exit_pred_id)) { |
364 | BasicBlock* pred_block = cfg.block(exit_pred_id); |
365 | pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) { |
366 | if (*id == non_dedicate->id()) *id = exit.id(); |
367 | }); |
368 | // Update the CFG. |
369 | // |non_dedicate|'s predecessor list will be updated at the end of the |
370 | // loop. |
371 | cfg.RegisterBlock(pred_block); |
372 | } |
373 | } |
374 | |
375 | // Register the label to the def/use manager, requires for the phi patching. |
376 | def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst()); |
377 | context_->set_instr_block(exit.GetLabelInst(), &exit); |
378 | |
379 | InstructionBuilder builder(context_, &exit, PreservedAnalyses); |
380 | // Now jump from our dedicate basic block to the old exit. |
381 | // We also reset the insert point so all instructions are inserted before |
382 | // the branch. |
383 | builder.SetInsertPoint(builder.AddBranch(non_dedicate->id())); |
384 | non_dedicate->ForEachPhiInst( |
385 | [&builder, &exit, def_use_mgr, this](Instruction* phi) { |
386 | // New phi operands for this instruction. |
387 | std::vector<uint32_t> new_phi_op; |
388 | // Phi operands for the dedicated exit block. |
389 | std::vector<uint32_t> exit_phi_op; |
390 | for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { |
391 | uint32_t def_id = phi->GetSingleWordInOperand(i); |
392 | uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1); |
393 | if (loop_->IsInsideLoop(incoming_id)) { |
394 | exit_phi_op.push_back(def_id); |
395 | exit_phi_op.push_back(incoming_id); |
396 | } else { |
397 | new_phi_op.push_back(def_id); |
398 | new_phi_op.push_back(incoming_id); |
399 | } |
400 | } |
401 | |
402 | // Build the new phi instruction dedicated exit block. |
403 | Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op); |
404 | // Build the new incoming branch. |
405 | new_phi_op.push_back(exit_phi->result_id()); |
406 | new_phi_op.push_back(exit.id()); |
407 | // Rewrite operands. |
408 | uint32_t idx = 0; |
409 | for (; idx < new_phi_op.size(); idx++) |
410 | phi->SetInOperand(idx, {new_phi_op[idx]}); |
411 | // Remove extra operands, from last to first (more efficient). |
412 | for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--) |
413 | phi->RemoveInOperand(j); |
414 | // Update the def/use manager for this |phi|. |
415 | def_use_mgr->AnalyzeInstUse(phi); |
416 | }); |
417 | // Update the CFG. |
418 | cfg.RegisterBlock(&exit); |
419 | cfg.RemoveNonExistingEdges(non_dedicate->id()); |
420 | new_loop_exits.insert(&exit); |
421 | // If non_dedicate is in a loop, add the new dedicated exit in that loop. |
422 | if (Loop* parent_loop = loop_desc[non_dedicate]) |
423 | parent_loop->AddBasicBlock(&exit); |
424 | } |
425 | |
426 | if (new_loop_exits.size() == 1) { |
427 | loop_->SetMergeBlock(*new_loop_exits.begin()); |
428 | } |
429 | |
430 | if (made_change) { |
431 | context_->InvalidateAnalysesExceptFor( |
432 | PreservedAnalyses | IRContext::kAnalysisCFG | |
433 | IRContext::Analysis::kAnalysisLoopAnalysis); |
434 | } |
435 | } |
436 | |
437 | void LoopUtils::MakeLoopClosedSSA() { |
438 | CreateLoopDedicatedExits(); |
439 | |
440 | Function* function = loop_->GetHeaderBlock()->GetParent(); |
441 | CFG& cfg = *context_->cfg(); |
442 | DominatorTree& dom_tree = |
443 | context_->GetDominatorAnalysis(function)->GetDomTree(); |
444 | |
445 | std::unordered_set<BasicBlock*> exit_bb; |
446 | { |
447 | std::unordered_set<uint32_t> exit_bb_id; |
448 | loop_->GetExitBlocks(&exit_bb_id); |
449 | for (uint32_t bb_id : exit_bb_id) { |
450 | exit_bb.insert(cfg.block(bb_id)); |
451 | } |
452 | } |
453 | |
454 | LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb, |
455 | loop_->GetMergeBlock()); |
456 | MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb, |
457 | &lcssa_rewriter); |
458 | |
459 | // Make sure all defs post-dominated by the merge block have their last use no |
460 | // further than the merge block. |
461 | if (loop_->GetMergeBlock()) { |
462 | std::unordered_set<uint32_t> merging_bb_id; |
463 | loop_->GetMergingBlocks(&merging_bb_id); |
464 | merging_bb_id.erase(loop_->GetMergeBlock()->id()); |
465 | // Reset the exit set, now only the merge block is the exit. |
466 | exit_bb.clear(); |
467 | exit_bb.insert(loop_->GetMergeBlock()); |
468 | // LCSSARewriter is reusable here only because it forces the creation of a |
469 | // phi instruction in the merge block. |
470 | MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb, |
471 | &lcssa_rewriter); |
472 | } |
473 | |
474 | context_->InvalidateAnalysesExceptFor( |
475 | IRContext::Analysis::kAnalysisCFG | |
476 | IRContext::Analysis::kAnalysisDominatorAnalysis | |
477 | IRContext::Analysis::kAnalysisLoopAnalysis); |
478 | } |
479 | |
480 | Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const { |
481 | // Compute the structured order of the loop basic blocks and store it in the |
482 | // vector ordered_loop_blocks. |
483 | std::vector<BasicBlock*> ordered_loop_blocks; |
484 | loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks); |
485 | |
486 | // Clone the loop. |
487 | return CloneLoop(cloning_result, ordered_loop_blocks); |
488 | } |
489 | |
490 | Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) { |
491 | // Clone the loop. |
492 | Loop* new_loop = CloneLoop(cloning_result); |
493 | |
494 | // Create a new exit block/label for the new loop. |
495 | // TODO(1841): Handle id overflow. |
496 | std::unique_ptr<Instruction> new_label{new Instruction( |
497 | context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})}; |
498 | std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))}; |
499 | new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent()); |
500 | |
501 | // Create an unconditional branch to the header block. |
502 | InstructionBuilder builder{context_, new_exit_bb.get()}; |
503 | builder.AddBranch(loop_->GetHeaderBlock()->id()); |
504 | |
505 | // Save the ids of the new and old merge block. |
506 | const uint32_t old_merge_block = loop_->GetMergeBlock()->id(); |
507 | const uint32_t new_merge_block = new_exit_bb->id(); |
508 | |
509 | // Replace the uses of the old merge block in the new loop with the new merge |
510 | // block. |
511 | for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) { |
512 | for (Instruction& inst : *basic_block) { |
513 | // For each operand in each instruction check if it is using the old merge |
514 | // block and change it to be the new merge block. |
515 | auto replace_merge_use = [old_merge_block, |
516 | new_merge_block](uint32_t* id) { |
517 | if (*id == old_merge_block) *id = new_merge_block; |
518 | }; |
519 | inst.ForEachInOperand(replace_merge_use); |
520 | } |
521 | } |
522 | |
523 | const uint32_t = loop_->GetHeaderBlock()->id(); |
524 | const uint32_t = new_loop->GetHeaderBlock()->id(); |
525 | analysis::DefUseManager* def_use = context_->get_def_use_mgr(); |
526 | |
527 | def_use->ForEachUse(old_header, |
528 | [new_header, this](Instruction* inst, uint32_t operand) { |
529 | if (!this->loop_->IsInsideLoop(inst)) |
530 | inst->SetOperand(operand, {new_header}); |
531 | }); |
532 | |
533 | // TODO(1841): Handle failure to create pre-header. |
534 | def_use->ForEachUse( |
535 | loop_->GetOrCreatePreHeaderBlock()->id(), |
536 | [new_merge_block, this](Instruction* inst, uint32_t operand) { |
537 | if (this->loop_->IsInsideLoop(inst)) |
538 | inst->SetOperand(operand, {new_merge_block}); |
539 | |
540 | }); |
541 | new_loop->SetMergeBlock(new_exit_bb.get()); |
542 | |
543 | new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock()); |
544 | |
545 | // Add the new block into the cloned instructions. |
546 | cloning_result->cloned_bb_.push_back(std::move(new_exit_bb)); |
547 | |
548 | return new_loop; |
549 | } |
550 | |
551 | Loop* LoopUtils::CloneLoop( |
552 | LoopCloningResult* cloning_result, |
553 | const std::vector<BasicBlock*>& ordered_loop_blocks) const { |
554 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
555 | |
556 | std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_); |
557 | |
558 | CFG& cfg = *context_->cfg(); |
559 | |
560 | // Clone and place blocks in a SPIR-V compliant order (dominators first). |
561 | for (BasicBlock* old_bb : ordered_loop_blocks) { |
562 | // For each basic block in the loop, we clone it and register the mapping |
563 | // between old and new ids. |
564 | BasicBlock* new_bb = old_bb->Clone(context_); |
565 | new_bb->SetParent(&function_); |
566 | // TODO(1841): Handle id overflow. |
567 | new_bb->GetLabelInst()->SetResultId(context_->TakeNextId()); |
568 | def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst()); |
569 | context_->set_instr_block(new_bb->GetLabelInst(), new_bb); |
570 | cloning_result->cloned_bb_.emplace_back(new_bb); |
571 | |
572 | cloning_result->old_to_new_bb_[old_bb->id()] = new_bb; |
573 | cloning_result->new_to_old_bb_[new_bb->id()] = old_bb; |
574 | cloning_result->value_map_[old_bb->id()] = new_bb->id(); |
575 | |
576 | if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb); |
577 | |
578 | for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin(); |
579 | new_inst != new_bb->end(); ++new_inst, ++old_inst) { |
580 | cloning_result->ptr_map_[&*new_inst] = &*old_inst; |
581 | if (new_inst->HasResultId()) { |
582 | // TODO(1841): Handle id overflow. |
583 | new_inst->SetResultId(context_->TakeNextId()); |
584 | cloning_result->value_map_[old_inst->result_id()] = |
585 | new_inst->result_id(); |
586 | |
587 | // Only look at the defs for now, uses are not updated yet. |
588 | def_use_mgr->AnalyzeInstDef(&*new_inst); |
589 | } |
590 | } |
591 | } |
592 | |
593 | // All instructions (including all labels) have been cloned, |
594 | // remap instruction operands id with the new ones. |
595 | for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) { |
596 | BasicBlock* bb = bb_ref.get(); |
597 | |
598 | for (Instruction& insn : *bb) { |
599 | insn.ForEachInId([cloning_result](uint32_t* old_id) { |
600 | // If the operand is defined in the loop, remap the id. |
601 | auto id_it = cloning_result->value_map_.find(*old_id); |
602 | if (id_it != cloning_result->value_map_.end()) { |
603 | *old_id = id_it->second; |
604 | } |
605 | }); |
606 | // Only look at what the instruction uses. All defs are register, so all |
607 | // should be fine now. |
608 | def_use_mgr->AnalyzeInstUse(&insn); |
609 | context_->set_instr_block(&insn, bb); |
610 | } |
611 | cfg.RegisterBlock(bb); |
612 | } |
613 | |
614 | PopulateLoopNest(new_loop.get(), *cloning_result); |
615 | |
616 | return new_loop.release(); |
617 | } |
618 | |
619 | void LoopUtils::PopulateLoopNest( |
620 | Loop* new_loop, const LoopCloningResult& cloning_result) const { |
621 | std::unordered_map<Loop*, Loop*> loop_mapping; |
622 | loop_mapping[loop_] = new_loop; |
623 | |
624 | if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop); |
625 | PopulateLoopDesc(new_loop, loop_, cloning_result); |
626 | |
627 | for (Loop& sub_loop : |
628 | make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) { |
629 | Loop* cloned = new Loop(context_); |
630 | if (Loop* parent = loop_mapping[sub_loop.GetParent()]) |
631 | parent->AddNestedLoop(cloned); |
632 | loop_mapping[&sub_loop] = cloned; |
633 | PopulateLoopDesc(cloned, &sub_loop, cloning_result); |
634 | } |
635 | |
636 | loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop)); |
637 | } |
638 | |
639 | // Populates |new_loop| descriptor according to |old_loop|'s one. |
640 | void LoopUtils::PopulateLoopDesc( |
641 | Loop* new_loop, Loop* old_loop, |
642 | const LoopCloningResult& cloning_result) const { |
643 | for (uint32_t bb_id : old_loop->GetBlocks()) { |
644 | BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id); |
645 | new_loop->AddBasicBlock(bb); |
646 | } |
647 | new_loop->SetHeaderBlock( |
648 | cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id())); |
649 | if (old_loop->GetLatchBlock()) |
650 | new_loop->SetLatchBlock( |
651 | cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id())); |
652 | if (old_loop->GetContinueBlock()) |
653 | new_loop->SetContinueBlock( |
654 | cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id())); |
655 | if (old_loop->GetMergeBlock()) { |
656 | auto it = |
657 | cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id()); |
658 | BasicBlock* bb = it != cloning_result.old_to_new_bb_.end() |
659 | ? it->second |
660 | : old_loop->GetMergeBlock(); |
661 | new_loop->SetMergeBlock(bb); |
662 | } |
663 | if (old_loop->GetPreHeaderBlock()) { |
664 | auto it = |
665 | cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id()); |
666 | if (it != cloning_result.old_to_new_bb_.end()) { |
667 | new_loop->SetPreHeaderBlock(it->second); |
668 | } |
669 | } |
670 | } |
671 | |
672 | // Class to gather some metrics about a region of interest. |
673 | void CodeMetrics::Analyze(const Loop& loop) { |
674 | CFG& cfg = *loop.GetContext()->cfg(); |
675 | |
676 | roi_size_ = 0; |
677 | block_sizes_.clear(); |
678 | |
679 | for (uint32_t id : loop.GetBlocks()) { |
680 | const BasicBlock* bb = cfg.block(id); |
681 | size_t bb_size = 0; |
682 | bb->ForEachInst([&bb_size](const Instruction* insn) { |
683 | if (insn->opcode() == SpvOpLabel) return; |
684 | if (insn->IsNop()) return; |
685 | if (insn->opcode() == SpvOpPhi) return; |
686 | bb_size++; |
687 | }); |
688 | block_sizes_[bb->id()] = bb_size; |
689 | roi_size_ += bb_size; |
690 | } |
691 | } |
692 | |
693 | } // namespace opt |
694 | } // namespace spvtools |
695 | |