1 | // Copyright (c) 2018 Google LLC. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "source/opt/loop_unswitch_pass.h" |
16 | |
17 | #include <functional> |
18 | #include <list> |
19 | #include <memory> |
20 | #include <type_traits> |
21 | #include <unordered_map> |
22 | #include <unordered_set> |
23 | #include <utility> |
24 | #include <vector> |
25 | |
26 | #include "source/opt/basic_block.h" |
27 | #include "source/opt/dominator_tree.h" |
28 | #include "source/opt/fold.h" |
29 | #include "source/opt/function.h" |
30 | #include "source/opt/instruction.h" |
31 | #include "source/opt/ir_builder.h" |
32 | #include "source/opt/ir_context.h" |
33 | #include "source/opt/loop_descriptor.h" |
34 | |
35 | #include "source/opt/loop_utils.h" |
36 | |
37 | namespace spvtools { |
38 | namespace opt { |
39 | namespace { |
40 | |
41 | static const uint32_t kTypePointerStorageClassInIdx = 0; |
42 | |
43 | } // anonymous namespace |
44 | |
45 | namespace { |
46 | |
47 | // This class handle the unswitch procedure for a given loop. |
48 | // The unswitch will not happen if: |
49 | // - The loop has any instruction that will prevent it; |
50 | // - The loop invariant condition is not uniform. |
51 | class LoopUnswitch { |
52 | public: |
53 | LoopUnswitch(IRContext* context, Function* function, Loop* loop, |
54 | LoopDescriptor* loop_desc) |
55 | : function_(function), |
56 | loop_(loop), |
57 | loop_desc_(*loop_desc), |
58 | context_(context), |
59 | switch_block_(nullptr) {} |
60 | |
61 | // Returns true if the loop can be unswitched. |
62 | // Can be unswitch if: |
63 | // - The loop has no instructions that prevents it (such as barrier); |
64 | // - The loop has one conditional branch or switch that do not depends on the |
65 | // loop; |
66 | // - The loop invariant condition is uniform; |
67 | bool CanUnswitchLoop() { |
68 | if (switch_block_) return true; |
69 | if (loop_->IsSafeToClone()) return false; |
70 | |
71 | CFG& cfg = *context_->cfg(); |
72 | |
73 | for (uint32_t bb_id : loop_->GetBlocks()) { |
74 | BasicBlock* bb = cfg.block(bb_id); |
75 | if (loop_->GetLatchBlock() == bb) { |
76 | continue; |
77 | } |
78 | |
79 | if (bb->terminator()->IsBranch() && |
80 | bb->terminator()->opcode() != SpvOpBranch) { |
81 | if (IsConditionNonConstantLoopInvariant(bb->terminator())) { |
82 | switch_block_ = bb; |
83 | break; |
84 | } |
85 | } |
86 | } |
87 | |
88 | return switch_block_; |
89 | } |
90 | |
91 | // Return the iterator to the basic block |bb|. |
92 | Function::iterator FindBasicBlockPosition(BasicBlock* bb_to_find) { |
93 | Function::iterator it = function_->FindBlock(bb_to_find->id()); |
94 | assert(it != function_->end() && "Basic Block not found" ); |
95 | return it; |
96 | } |
97 | |
98 | // Creates a new basic block and insert it into the function |fn| at the |
99 | // position |ip|. This function preserves the def/use and instr to block |
100 | // managers. |
101 | BasicBlock* CreateBasicBlock(Function::iterator ip) { |
102 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
103 | |
104 | // TODO(1841): Handle id overflow. |
105 | BasicBlock* bb = &*ip.InsertBefore(std::unique_ptr<BasicBlock>( |
106 | new BasicBlock(std::unique_ptr<Instruction>(new Instruction( |
107 | context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); |
108 | bb->SetParent(function_); |
109 | def_use_mgr->AnalyzeInstDef(bb->GetLabelInst()); |
110 | context_->set_instr_block(bb->GetLabelInst(), bb); |
111 | |
112 | return bb; |
113 | } |
114 | |
115 | Instruction* GetValueForDefaultPathForSwitch(Instruction* switch_inst) { |
116 | assert(switch_inst->opcode() == SpvOpSwitch && |
117 | "The given instructoin must be an OpSwitch." ); |
118 | |
119 | // Find a value that can be used to select the default path. |
120 | // If none are possible, then it will just use 0. The value does not matter |
121 | // because this path will never be taken becaues the new switch outside of |
122 | // the loop cannot select this path either. |
123 | std::vector<uint32_t> existing_values; |
124 | for (uint32_t i = 2; i < switch_inst->NumInOperands(); i += 2) { |
125 | existing_values.push_back(switch_inst->GetSingleWordInOperand(i)); |
126 | } |
127 | std::sort(existing_values.begin(), existing_values.end()); |
128 | uint32_t value_for_default_path = 0; |
129 | if (existing_values.size() < std::numeric_limits<uint32_t>::max()) { |
130 | for (value_for_default_path = 0; |
131 | value_for_default_path < existing_values.size(); |
132 | value_for_default_path++) { |
133 | if (existing_values[value_for_default_path] != value_for_default_path) { |
134 | break; |
135 | } |
136 | } |
137 | } |
138 | InstructionBuilder builder( |
139 | context_, static_cast<Instruction*>(nullptr), |
140 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); |
141 | return builder.GetUintConstant(value_for_default_path); |
142 | } |
143 | |
144 | // Unswitches |loop_|. |
145 | void PerformUnswitch() { |
146 | assert(CanUnswitchLoop() && |
147 | "Cannot unswitch if there is not constant condition" ); |
148 | assert(loop_->GetPreHeaderBlock() && "This loop has no pre-header block" ); |
149 | assert(loop_->IsLCSSA() && "This loop is not in LCSSA form" ); |
150 | |
151 | CFG& cfg = *context_->cfg(); |
152 | DominatorTree* dom_tree = |
153 | &context_->GetDominatorAnalysis(function_)->GetDomTree(); |
154 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
155 | LoopUtils loop_utils(context_, loop_); |
156 | |
157 | ////////////////////////////////////////////////////////////////////////////// |
158 | // Step 1: Create the if merge block for structured modules. |
159 | // To do so, the |loop_| merge block will become the if's one and we |
160 | // create a merge for the loop. This will limit the amount of duplicated |
161 | // code the structured control flow imposes. |
162 | // For non structured program, the new loop will be connected to |
163 | // the old loop's exit blocks. |
164 | ////////////////////////////////////////////////////////////////////////////// |
165 | |
166 | // Get the merge block if it exists. |
167 | BasicBlock* if_merge_block = loop_->GetMergeBlock(); |
168 | // The merge block is only created if the loop has a unique exit block. We |
169 | // have this guarantee for structured loops, for compute loop it will |
170 | // trivially help maintain both a structured-like form and LCSAA. |
171 | BasicBlock* loop_merge_block = |
172 | if_merge_block |
173 | ? CreateBasicBlock(FindBasicBlockPosition(if_merge_block)) |
174 | : nullptr; |
175 | if (loop_merge_block) { |
176 | // Add the instruction and update managers. |
177 | InstructionBuilder builder( |
178 | context_, loop_merge_block, |
179 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); |
180 | builder.AddBranch(if_merge_block->id()); |
181 | builder.SetInsertPoint(&*loop_merge_block->begin()); |
182 | cfg.RegisterBlock(loop_merge_block); |
183 | def_use_mgr->AnalyzeInstDef(loop_merge_block->GetLabelInst()); |
184 | // Update CFG. |
185 | if_merge_block->ForEachPhiInst( |
186 | [loop_merge_block, &builder, this](Instruction* phi) { |
187 | Instruction* cloned = phi->Clone(context_); |
188 | cloned->SetResultId(TakeNextId()); |
189 | builder.AddInstruction(std::unique_ptr<Instruction>(cloned)); |
190 | phi->SetInOperand(0, {cloned->result_id()}); |
191 | phi->SetInOperand(1, {loop_merge_block->id()}); |
192 | for (uint32_t j = phi->NumInOperands() - 1; j > 1; j--) |
193 | phi->RemoveInOperand(j); |
194 | }); |
195 | // Copy the predecessor list (will get invalidated otherwise). |
196 | std::vector<uint32_t> preds = cfg.preds(if_merge_block->id()); |
197 | for (uint32_t pid : preds) { |
198 | if (pid == loop_merge_block->id()) continue; |
199 | BasicBlock* p_bb = cfg.block(pid); |
200 | p_bb->ForEachSuccessorLabel( |
201 | [if_merge_block, loop_merge_block](uint32_t* id) { |
202 | if (*id == if_merge_block->id()) *id = loop_merge_block->id(); |
203 | }); |
204 | cfg.AddEdge(pid, loop_merge_block->id()); |
205 | } |
206 | cfg.RemoveNonExistingEdges(if_merge_block->id()); |
207 | // Update loop descriptor. |
208 | if (Loop* ploop = loop_->GetParent()) { |
209 | ploop->AddBasicBlock(loop_merge_block); |
210 | loop_desc_.SetBasicBlockToLoop(loop_merge_block->id(), ploop); |
211 | } |
212 | // Update the dominator tree. |
213 | DominatorTreeNode* loop_merge_dtn = |
214 | dom_tree->GetOrInsertNode(loop_merge_block); |
215 | DominatorTreeNode* if_merge_block_dtn = |
216 | dom_tree->GetOrInsertNode(if_merge_block); |
217 | loop_merge_dtn->parent_ = if_merge_block_dtn->parent_; |
218 | loop_merge_dtn->children_.push_back(if_merge_block_dtn); |
219 | loop_merge_dtn->parent_->children_.push_back(loop_merge_dtn); |
220 | if_merge_block_dtn->parent_->children_.erase(std::find( |
221 | if_merge_block_dtn->parent_->children_.begin(), |
222 | if_merge_block_dtn->parent_->children_.end(), if_merge_block_dtn)); |
223 | |
224 | loop_->SetMergeBlock(loop_merge_block); |
225 | } |
226 | |
227 | //////////////////////////////////////////////////////////////////////////// |
228 | // Step 2: Build a new preheader for |loop_|, use the old one |
229 | // for the invariant branch. |
230 | //////////////////////////////////////////////////////////////////////////// |
231 | |
232 | BasicBlock* if_block = loop_->GetPreHeaderBlock(); |
233 | // If this preheader is the parent loop header, |
234 | // we need to create a dedicated block for the if. |
235 | BasicBlock* = |
236 | CreateBasicBlock(++FindBasicBlockPosition(if_block)); |
237 | InstructionBuilder( |
238 | context_, loop_pre_header, |
239 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) |
240 | .AddBranch(loop_->GetHeaderBlock()->id()); |
241 | |
242 | if_block->tail()->SetInOperand(0, {loop_pre_header->id()}); |
243 | |
244 | // Update loop descriptor. |
245 | if (Loop* ploop = loop_desc_[if_block]) { |
246 | ploop->AddBasicBlock(loop_pre_header); |
247 | loop_desc_.SetBasicBlockToLoop(loop_pre_header->id(), ploop); |
248 | } |
249 | |
250 | // Update the CFG. |
251 | cfg.RegisterBlock(loop_pre_header); |
252 | def_use_mgr->AnalyzeInstDef(loop_pre_header->GetLabelInst()); |
253 | cfg.AddEdge(if_block->id(), loop_pre_header->id()); |
254 | cfg.RemoveNonExistingEdges(loop_->GetHeaderBlock()->id()); |
255 | |
256 | loop_->GetHeaderBlock()->ForEachPhiInst( |
257 | [loop_pre_header, if_block](Instruction* phi) { |
258 | phi->ForEachInId([loop_pre_header, if_block](uint32_t* id) { |
259 | if (*id == if_block->id()) { |
260 | *id = loop_pre_header->id(); |
261 | } |
262 | }); |
263 | }); |
264 | loop_->SetPreHeaderBlock(loop_pre_header); |
265 | |
266 | // Update the dominator tree. |
267 | DominatorTreeNode* = |
268 | dom_tree->GetOrInsertNode(loop_pre_header); |
269 | DominatorTreeNode* if_block_dtn = dom_tree->GetTreeNode(if_block); |
270 | loop_pre_header_dtn->parent_ = if_block_dtn; |
271 | assert( |
272 | if_block_dtn->children_.size() == 1 && |
273 | "A loop preheader should only have the header block as a child in the " |
274 | "dominator tree" ); |
275 | loop_pre_header_dtn->children_.push_back(if_block_dtn->children_[0]); |
276 | if_block_dtn->children_.clear(); |
277 | if_block_dtn->children_.push_back(loop_pre_header_dtn); |
278 | |
279 | // Make domination queries valid. |
280 | dom_tree->ResetDFNumbering(); |
281 | |
282 | // Compute an ordered list of basic block to clone: loop blocks + pre-header |
283 | // + merge block. |
284 | loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks_, true, true); |
285 | |
286 | ///////////////////////////// |
287 | // Do the actual unswitch: // |
288 | // - Clone the loop // |
289 | // - Connect exits // |
290 | // - Specialize the loop // |
291 | ///////////////////////////// |
292 | |
293 | Instruction* iv_condition = &*switch_block_->tail(); |
294 | SpvOp iv_opcode = iv_condition->opcode(); |
295 | Instruction* condition = |
296 | def_use_mgr->GetDef(iv_condition->GetOperand(0).words[0]); |
297 | |
298 | analysis::ConstantManager* cst_mgr = context_->get_constant_mgr(); |
299 | const analysis::Type* cond_type = |
300 | context_->get_type_mgr()->GetType(condition->type_id()); |
301 | |
302 | // Build the list of value for which we need to clone and specialize the |
303 | // loop. |
304 | std::vector<std::pair<Instruction*, BasicBlock*>> constant_branch; |
305 | // Special case for the original loop |
306 | Instruction* original_loop_constant_value; |
307 | if (iv_opcode == SpvOpBranchConditional) { |
308 | constant_branch.emplace_back( |
309 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {0})), |
310 | nullptr); |
311 | original_loop_constant_value = |
312 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {1})); |
313 | } else { |
314 | // We are looking to take the default branch, so we can't provide a |
315 | // specific value. |
316 | original_loop_constant_value = |
317 | GetValueForDefaultPathForSwitch(iv_condition); |
318 | |
319 | for (uint32_t i = 2; i < iv_condition->NumInOperands(); i += 2) { |
320 | constant_branch.emplace_back( |
321 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant( |
322 | cond_type, iv_condition->GetInOperand(i).words)), |
323 | nullptr); |
324 | } |
325 | } |
326 | |
327 | // Get the loop landing pads. |
328 | std::unordered_set<uint32_t> if_merging_blocks; |
329 | std::function<bool(uint32_t)> is_from_original_loop; |
330 | if (loop_->GetHeaderBlock()->GetLoopMergeInst()) { |
331 | if_merging_blocks.insert(if_merge_block->id()); |
332 | is_from_original_loop = [this](uint32_t id) { |
333 | return loop_->IsInsideLoop(id) || loop_->GetMergeBlock()->id() == id; |
334 | }; |
335 | } else { |
336 | loop_->GetExitBlocks(&if_merging_blocks); |
337 | is_from_original_loop = [this](uint32_t id) { |
338 | return loop_->IsInsideLoop(id); |
339 | }; |
340 | } |
341 | |
342 | for (auto& specialisation_pair : constant_branch) { |
343 | Instruction* specialisation_value = specialisation_pair.first; |
344 | ////////////////////////////////////////////////////////// |
345 | // Step 3: Duplicate |loop_|. |
346 | ////////////////////////////////////////////////////////// |
347 | LoopUtils::LoopCloningResult clone_result; |
348 | |
349 | Loop* cloned_loop = |
350 | loop_utils.CloneLoop(&clone_result, ordered_loop_blocks_); |
351 | specialisation_pair.second = cloned_loop->GetPreHeaderBlock(); |
352 | |
353 | //////////////////////////////////// |
354 | // Step 4: Specialize the loop. // |
355 | //////////////////////////////////// |
356 | |
357 | { |
358 | SpecializeLoop(cloned_loop, condition, specialisation_value); |
359 | |
360 | /////////////////////////////////////////////////////////// |
361 | // Step 5: Connect convergent edges to the landing pads. // |
362 | /////////////////////////////////////////////////////////// |
363 | |
364 | for (uint32_t merge_bb_id : if_merging_blocks) { |
365 | BasicBlock* merge = context_->cfg()->block(merge_bb_id); |
366 | // We are in LCSSA so we only care about phi instructions. |
367 | merge->ForEachPhiInst( |
368 | [is_from_original_loop, &clone_result](Instruction* phi) { |
369 | uint32_t num_in_operands = phi->NumInOperands(); |
370 | for (uint32_t i = 0; i < num_in_operands; i += 2) { |
371 | uint32_t pred = phi->GetSingleWordInOperand(i + 1); |
372 | if (is_from_original_loop(pred)) { |
373 | pred = clone_result.value_map_.at(pred); |
374 | uint32_t incoming_value_id = phi->GetSingleWordInOperand(i); |
375 | // Not all the incoming values are coming from the loop. |
376 | ValueMapTy::iterator new_value = |
377 | clone_result.value_map_.find(incoming_value_id); |
378 | if (new_value != clone_result.value_map_.end()) { |
379 | incoming_value_id = new_value->second; |
380 | } |
381 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {incoming_value_id}}); |
382 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {pred}}); |
383 | } |
384 | } |
385 | }); |
386 | } |
387 | } |
388 | function_->AddBasicBlocks(clone_result.cloned_bb_.begin(), |
389 | clone_result.cloned_bb_.end(), |
390 | ++FindBasicBlockPosition(if_block)); |
391 | } |
392 | |
393 | // Specialize the existing loop. |
394 | SpecializeLoop(loop_, condition, original_loop_constant_value); |
395 | BasicBlock* original_loop_target = loop_->GetPreHeaderBlock(); |
396 | |
397 | ///////////////////////////////////// |
398 | // Finally: connect the new loops. // |
399 | ///////////////////////////////////// |
400 | |
401 | // Delete the old jump |
402 | context_->KillInst(&*if_block->tail()); |
403 | InstructionBuilder builder(context_, if_block); |
404 | if (iv_opcode == SpvOpBranchConditional) { |
405 | assert(constant_branch.size() == 1); |
406 | builder.AddConditionalBranch( |
407 | condition->result_id(), original_loop_target->id(), |
408 | constant_branch[0].second->id(), |
409 | if_merge_block ? if_merge_block->id() : kInvalidId); |
410 | } else { |
411 | std::vector<std::pair<Operand::OperandData, uint32_t>> targets; |
412 | for (auto& t : constant_branch) { |
413 | targets.emplace_back(t.first->GetInOperand(0).words, t.second->id()); |
414 | } |
415 | |
416 | builder.AddSwitch(condition->result_id(), original_loop_target->id(), |
417 | targets, |
418 | if_merge_block ? if_merge_block->id() : kInvalidId); |
419 | } |
420 | |
421 | switch_block_ = nullptr; |
422 | ordered_loop_blocks_.clear(); |
423 | |
424 | context_->InvalidateAnalysesExceptFor( |
425 | IRContext::Analysis::kAnalysisLoopAnalysis); |
426 | } |
427 | |
428 | private: |
429 | using ValueMapTy = std::unordered_map<uint32_t, uint32_t>; |
430 | using BlockMapTy = std::unordered_map<uint32_t, BasicBlock*>; |
431 | |
432 | Function* function_; |
433 | Loop* loop_; |
434 | LoopDescriptor& loop_desc_; |
435 | IRContext* context_; |
436 | |
437 | BasicBlock* switch_block_; |
438 | // Map between instructions and if they are dynamically uniform. |
439 | std::unordered_map<uint32_t, bool> dynamically_uniform_; |
440 | // The loop basic blocks in structured order. |
441 | std::vector<BasicBlock*> ordered_loop_blocks_; |
442 | |
443 | // Returns the next usable id for the context. |
444 | uint32_t TakeNextId() { |
445 | // TODO(1841): Handle id overflow. |
446 | return context_->TakeNextId(); |
447 | } |
448 | |
449 | // Simplifies |loop| assuming the instruction |to_version_insn| takes the |
450 | // value |cst_value|. |block_range| is an iterator range returning the loop |
451 | // basic blocks in a structured order (dominator first). |
452 | // The function will ignore basic blocks returned by |block_range| if they |
453 | // does not belong to the loop. |
454 | // The set |dead_blocks| will contain all the dead basic blocks. |
455 | // |
456 | // Requirements: |
457 | // - |loop| must be in the LCSSA form; |
458 | // - |cst_value| must be constant. |
459 | void SpecializeLoop(Loop* loop, Instruction* to_version_insn, |
460 | Instruction* cst_value) { |
461 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
462 | |
463 | std::function<bool(uint32_t)> ignore_node; |
464 | ignore_node = [loop](uint32_t bb_id) { return !loop->IsInsideLoop(bb_id); }; |
465 | |
466 | std::vector<std::pair<Instruction*, uint32_t>> use_list; |
467 | def_use_mgr->ForEachUse(to_version_insn, |
468 | [&use_list, &ignore_node, this]( |
469 | Instruction* inst, uint32_t operand_index) { |
470 | BasicBlock* bb = context_->get_instr_block(inst); |
471 | |
472 | if (!bb || ignore_node(bb->id())) { |
473 | // Out of the loop, the specialization does not |
474 | // apply any more. |
475 | return; |
476 | } |
477 | use_list.emplace_back(inst, operand_index); |
478 | }); |
479 | |
480 | // First pass: inject the specialized value into the loop (and only the |
481 | // loop). |
482 | for (auto use : use_list) { |
483 | Instruction* inst = use.first; |
484 | uint32_t operand_index = use.second; |
485 | |
486 | // To also handle switch, cst_value can be nullptr: this case |
487 | // means that we are looking to branch to the default target of |
488 | // the switch. We don't actually know its value so we don't touch |
489 | // it if it not a switch. |
490 | assert(cst_value && "We do not have a value to use." ); |
491 | inst->SetOperand(operand_index, {cst_value->result_id()}); |
492 | def_use_mgr->AnalyzeInstUse(inst); |
493 | } |
494 | } |
495 | |
496 | // Returns true if |var| is dynamically uniform. |
497 | // Note: this is currently approximated as uniform. |
498 | bool IsDynamicallyUniform(Instruction* var, const BasicBlock* entry, |
499 | const DominatorTree& post_dom_tree) { |
500 | assert(post_dom_tree.IsPostDominator()); |
501 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
502 | |
503 | auto it = dynamically_uniform_.find(var->result_id()); |
504 | |
505 | if (it != dynamically_uniform_.end()) return it->second; |
506 | |
507 | analysis::DecorationManager* dec_mgr = context_->get_decoration_mgr(); |
508 | |
509 | bool& is_uniform = dynamically_uniform_[var->result_id()]; |
510 | is_uniform = false; |
511 | |
512 | dec_mgr->WhileEachDecoration(var->result_id(), SpvDecorationUniform, |
513 | [&is_uniform](const Instruction&) { |
514 | is_uniform = true; |
515 | return false; |
516 | }); |
517 | if (is_uniform) { |
518 | return is_uniform; |
519 | } |
520 | |
521 | BasicBlock* parent = context_->get_instr_block(var); |
522 | if (!parent) { |
523 | return is_uniform = true; |
524 | } |
525 | |
526 | if (!post_dom_tree.Dominates(parent->id(), entry->id())) { |
527 | return is_uniform = false; |
528 | } |
529 | if (var->opcode() == SpvOpLoad) { |
530 | const uint32_t PtrTypeId = |
531 | def_use_mgr->GetDef(var->GetSingleWordInOperand(0))->type_id(); |
532 | const Instruction* PtrTypeInst = def_use_mgr->GetDef(PtrTypeId); |
533 | uint32_t storage_class = |
534 | PtrTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx); |
535 | if (storage_class != SpvStorageClassUniform && |
536 | storage_class != SpvStorageClassUniformConstant) { |
537 | return is_uniform = false; |
538 | } |
539 | } else { |
540 | if (!context_->IsCombinatorInstruction(var)) { |
541 | return is_uniform = false; |
542 | } |
543 | } |
544 | |
545 | return is_uniform = var->WhileEachInId([entry, &post_dom_tree, |
546 | this](const uint32_t* id) { |
547 | return IsDynamicallyUniform(context_->get_def_use_mgr()->GetDef(*id), |
548 | entry, post_dom_tree); |
549 | }); |
550 | } |
551 | |
552 | // Returns true if |insn| is not a constant, but is loop invariant and |
553 | // dynamically uniform. |
554 | bool IsConditionNonConstantLoopInvariant(Instruction* insn) { |
555 | assert(insn->IsBranch()); |
556 | assert(insn->opcode() != SpvOpBranch); |
557 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
558 | |
559 | Instruction* condition = def_use_mgr->GetDef(insn->GetOperand(0).words[0]); |
560 | if (condition->IsConstant()) { |
561 | return false; |
562 | } |
563 | |
564 | if (loop_->IsInsideLoop(condition)) { |
565 | return false; |
566 | } |
567 | |
568 | return IsDynamicallyUniform( |
569 | condition, function_->entry().get(), |
570 | context_->GetPostDominatorAnalysis(function_)->GetDomTree()); |
571 | } |
572 | }; |
573 | |
574 | } // namespace |
575 | |
576 | Pass::Status LoopUnswitchPass::Process() { |
577 | bool modified = false; |
578 | Module* module = context()->module(); |
579 | |
580 | // Process each function in the module |
581 | for (Function& f : *module) { |
582 | modified |= ProcessFunction(&f); |
583 | } |
584 | |
585 | return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; |
586 | } |
587 | |
588 | bool LoopUnswitchPass::ProcessFunction(Function* f) { |
589 | bool modified = false; |
590 | std::unordered_set<Loop*> processed_loop; |
591 | |
592 | LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f); |
593 | |
594 | bool loop_changed = true; |
595 | while (loop_changed) { |
596 | loop_changed = false; |
597 | for (Loop& loop : |
598 | make_range(++TreeDFIterator<Loop>(loop_descriptor.GetDummyRootLoop()), |
599 | TreeDFIterator<Loop>())) { |
600 | if (processed_loop.count(&loop)) continue; |
601 | processed_loop.insert(&loop); |
602 | |
603 | LoopUnswitch unswitcher(context(), f, &loop, &loop_descriptor); |
604 | while (unswitcher.CanUnswitchLoop()) { |
605 | if (!loop.IsLCSSA()) { |
606 | LoopUtils(context(), &loop).MakeLoopClosedSSA(); |
607 | } |
608 | modified = true; |
609 | loop_changed = true; |
610 | unswitcher.PerformUnswitch(); |
611 | } |
612 | if (loop_changed) break; |
613 | } |
614 | } |
615 | |
616 | return modified; |
617 | } |
618 | |
619 | } // namespace opt |
620 | } // namespace spvtools |
621 | |