| 1 | // Copyright (c) 2018 Google LLC. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "source/opt/loop_unswitch_pass.h" |
| 16 | |
| 17 | #include <functional> |
| 18 | #include <list> |
| 19 | #include <memory> |
| 20 | #include <type_traits> |
| 21 | #include <unordered_map> |
| 22 | #include <unordered_set> |
| 23 | #include <utility> |
| 24 | #include <vector> |
| 25 | |
| 26 | #include "source/opt/basic_block.h" |
| 27 | #include "source/opt/dominator_tree.h" |
| 28 | #include "source/opt/fold.h" |
| 29 | #include "source/opt/function.h" |
| 30 | #include "source/opt/instruction.h" |
| 31 | #include "source/opt/ir_builder.h" |
| 32 | #include "source/opt/ir_context.h" |
| 33 | #include "source/opt/loop_descriptor.h" |
| 34 | |
| 35 | #include "source/opt/loop_utils.h" |
| 36 | |
| 37 | namespace spvtools { |
| 38 | namespace opt { |
| 39 | namespace { |
| 40 | |
| 41 | static const uint32_t kTypePointerStorageClassInIdx = 0; |
| 42 | |
| 43 | } // anonymous namespace |
| 44 | |
| 45 | namespace { |
| 46 | |
| 47 | // This class handle the unswitch procedure for a given loop. |
| 48 | // The unswitch will not happen if: |
| 49 | // - The loop has any instruction that will prevent it; |
| 50 | // - The loop invariant condition is not uniform. |
| 51 | class LoopUnswitch { |
| 52 | public: |
| 53 | LoopUnswitch(IRContext* context, Function* function, Loop* loop, |
| 54 | LoopDescriptor* loop_desc) |
| 55 | : function_(function), |
| 56 | loop_(loop), |
| 57 | loop_desc_(*loop_desc), |
| 58 | context_(context), |
| 59 | switch_block_(nullptr) {} |
| 60 | |
| 61 | // Returns true if the loop can be unswitched. |
| 62 | // Can be unswitch if: |
| 63 | // - The loop has no instructions that prevents it (such as barrier); |
| 64 | // - The loop has one conditional branch or switch that do not depends on the |
| 65 | // loop; |
| 66 | // - The loop invariant condition is uniform; |
| 67 | bool CanUnswitchLoop() { |
| 68 | if (switch_block_) return true; |
| 69 | if (loop_->IsSafeToClone()) return false; |
| 70 | |
| 71 | CFG& cfg = *context_->cfg(); |
| 72 | |
| 73 | for (uint32_t bb_id : loop_->GetBlocks()) { |
| 74 | BasicBlock* bb = cfg.block(bb_id); |
| 75 | if (loop_->GetLatchBlock() == bb) { |
| 76 | continue; |
| 77 | } |
| 78 | |
| 79 | if (bb->terminator()->IsBranch() && |
| 80 | bb->terminator()->opcode() != SpvOpBranch) { |
| 81 | if (IsConditionNonConstantLoopInvariant(bb->terminator())) { |
| 82 | switch_block_ = bb; |
| 83 | break; |
| 84 | } |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | return switch_block_; |
| 89 | } |
| 90 | |
| 91 | // Return the iterator to the basic block |bb|. |
| 92 | Function::iterator FindBasicBlockPosition(BasicBlock* bb_to_find) { |
| 93 | Function::iterator it = function_->FindBlock(bb_to_find->id()); |
| 94 | assert(it != function_->end() && "Basic Block not found" ); |
| 95 | return it; |
| 96 | } |
| 97 | |
| 98 | // Creates a new basic block and insert it into the function |fn| at the |
| 99 | // position |ip|. This function preserves the def/use and instr to block |
| 100 | // managers. |
| 101 | BasicBlock* CreateBasicBlock(Function::iterator ip) { |
| 102 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
| 103 | |
| 104 | // TODO(1841): Handle id overflow. |
| 105 | BasicBlock* bb = &*ip.InsertBefore(std::unique_ptr<BasicBlock>( |
| 106 | new BasicBlock(std::unique_ptr<Instruction>(new Instruction( |
| 107 | context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); |
| 108 | bb->SetParent(function_); |
| 109 | def_use_mgr->AnalyzeInstDef(bb->GetLabelInst()); |
| 110 | context_->set_instr_block(bb->GetLabelInst(), bb); |
| 111 | |
| 112 | return bb; |
| 113 | } |
| 114 | |
| 115 | Instruction* GetValueForDefaultPathForSwitch(Instruction* switch_inst) { |
| 116 | assert(switch_inst->opcode() == SpvOpSwitch && |
| 117 | "The given instructoin must be an OpSwitch." ); |
| 118 | |
| 119 | // Find a value that can be used to select the default path. |
| 120 | // If none are possible, then it will just use 0. The value does not matter |
| 121 | // because this path will never be taken becaues the new switch outside of |
| 122 | // the loop cannot select this path either. |
| 123 | std::vector<uint32_t> existing_values; |
| 124 | for (uint32_t i = 2; i < switch_inst->NumInOperands(); i += 2) { |
| 125 | existing_values.push_back(switch_inst->GetSingleWordInOperand(i)); |
| 126 | } |
| 127 | std::sort(existing_values.begin(), existing_values.end()); |
| 128 | uint32_t value_for_default_path = 0; |
| 129 | if (existing_values.size() < std::numeric_limits<uint32_t>::max()) { |
| 130 | for (value_for_default_path = 0; |
| 131 | value_for_default_path < existing_values.size(); |
| 132 | value_for_default_path++) { |
| 133 | if (existing_values[value_for_default_path] != value_for_default_path) { |
| 134 | break; |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | InstructionBuilder builder( |
| 139 | context_, static_cast<Instruction*>(nullptr), |
| 140 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); |
| 141 | return builder.GetUintConstant(value_for_default_path); |
| 142 | } |
| 143 | |
| 144 | // Unswitches |loop_|. |
| 145 | void PerformUnswitch() { |
| 146 | assert(CanUnswitchLoop() && |
| 147 | "Cannot unswitch if there is not constant condition" ); |
| 148 | assert(loop_->GetPreHeaderBlock() && "This loop has no pre-header block" ); |
| 149 | assert(loop_->IsLCSSA() && "This loop is not in LCSSA form" ); |
| 150 | |
| 151 | CFG& cfg = *context_->cfg(); |
| 152 | DominatorTree* dom_tree = |
| 153 | &context_->GetDominatorAnalysis(function_)->GetDomTree(); |
| 154 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
| 155 | LoopUtils loop_utils(context_, loop_); |
| 156 | |
| 157 | ////////////////////////////////////////////////////////////////////////////// |
| 158 | // Step 1: Create the if merge block for structured modules. |
| 159 | // To do so, the |loop_| merge block will become the if's one and we |
| 160 | // create a merge for the loop. This will limit the amount of duplicated |
| 161 | // code the structured control flow imposes. |
| 162 | // For non structured program, the new loop will be connected to |
| 163 | // the old loop's exit blocks. |
| 164 | ////////////////////////////////////////////////////////////////////////////// |
| 165 | |
| 166 | // Get the merge block if it exists. |
| 167 | BasicBlock* if_merge_block = loop_->GetMergeBlock(); |
| 168 | // The merge block is only created if the loop has a unique exit block. We |
| 169 | // have this guarantee for structured loops, for compute loop it will |
| 170 | // trivially help maintain both a structured-like form and LCSAA. |
| 171 | BasicBlock* loop_merge_block = |
| 172 | if_merge_block |
| 173 | ? CreateBasicBlock(FindBasicBlockPosition(if_merge_block)) |
| 174 | : nullptr; |
| 175 | if (loop_merge_block) { |
| 176 | // Add the instruction and update managers. |
| 177 | InstructionBuilder builder( |
| 178 | context_, loop_merge_block, |
| 179 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); |
| 180 | builder.AddBranch(if_merge_block->id()); |
| 181 | builder.SetInsertPoint(&*loop_merge_block->begin()); |
| 182 | cfg.RegisterBlock(loop_merge_block); |
| 183 | def_use_mgr->AnalyzeInstDef(loop_merge_block->GetLabelInst()); |
| 184 | // Update CFG. |
| 185 | if_merge_block->ForEachPhiInst( |
| 186 | [loop_merge_block, &builder, this](Instruction* phi) { |
| 187 | Instruction* cloned = phi->Clone(context_); |
| 188 | cloned->SetResultId(TakeNextId()); |
| 189 | builder.AddInstruction(std::unique_ptr<Instruction>(cloned)); |
| 190 | phi->SetInOperand(0, {cloned->result_id()}); |
| 191 | phi->SetInOperand(1, {loop_merge_block->id()}); |
| 192 | for (uint32_t j = phi->NumInOperands() - 1; j > 1; j--) |
| 193 | phi->RemoveInOperand(j); |
| 194 | }); |
| 195 | // Copy the predecessor list (will get invalidated otherwise). |
| 196 | std::vector<uint32_t> preds = cfg.preds(if_merge_block->id()); |
| 197 | for (uint32_t pid : preds) { |
| 198 | if (pid == loop_merge_block->id()) continue; |
| 199 | BasicBlock* p_bb = cfg.block(pid); |
| 200 | p_bb->ForEachSuccessorLabel( |
| 201 | [if_merge_block, loop_merge_block](uint32_t* id) { |
| 202 | if (*id == if_merge_block->id()) *id = loop_merge_block->id(); |
| 203 | }); |
| 204 | cfg.AddEdge(pid, loop_merge_block->id()); |
| 205 | } |
| 206 | cfg.RemoveNonExistingEdges(if_merge_block->id()); |
| 207 | // Update loop descriptor. |
| 208 | if (Loop* ploop = loop_->GetParent()) { |
| 209 | ploop->AddBasicBlock(loop_merge_block); |
| 210 | loop_desc_.SetBasicBlockToLoop(loop_merge_block->id(), ploop); |
| 211 | } |
| 212 | // Update the dominator tree. |
| 213 | DominatorTreeNode* loop_merge_dtn = |
| 214 | dom_tree->GetOrInsertNode(loop_merge_block); |
| 215 | DominatorTreeNode* if_merge_block_dtn = |
| 216 | dom_tree->GetOrInsertNode(if_merge_block); |
| 217 | loop_merge_dtn->parent_ = if_merge_block_dtn->parent_; |
| 218 | loop_merge_dtn->children_.push_back(if_merge_block_dtn); |
| 219 | loop_merge_dtn->parent_->children_.push_back(loop_merge_dtn); |
| 220 | if_merge_block_dtn->parent_->children_.erase(std::find( |
| 221 | if_merge_block_dtn->parent_->children_.begin(), |
| 222 | if_merge_block_dtn->parent_->children_.end(), if_merge_block_dtn)); |
| 223 | |
| 224 | loop_->SetMergeBlock(loop_merge_block); |
| 225 | } |
| 226 | |
| 227 | //////////////////////////////////////////////////////////////////////////// |
| 228 | // Step 2: Build a new preheader for |loop_|, use the old one |
| 229 | // for the invariant branch. |
| 230 | //////////////////////////////////////////////////////////////////////////// |
| 231 | |
| 232 | BasicBlock* if_block = loop_->GetPreHeaderBlock(); |
| 233 | // If this preheader is the parent loop header, |
| 234 | // we need to create a dedicated block for the if. |
| 235 | BasicBlock* = |
| 236 | CreateBasicBlock(++FindBasicBlockPosition(if_block)); |
| 237 | InstructionBuilder( |
| 238 | context_, loop_pre_header, |
| 239 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) |
| 240 | .AddBranch(loop_->GetHeaderBlock()->id()); |
| 241 | |
| 242 | if_block->tail()->SetInOperand(0, {loop_pre_header->id()}); |
| 243 | |
| 244 | // Update loop descriptor. |
| 245 | if (Loop* ploop = loop_desc_[if_block]) { |
| 246 | ploop->AddBasicBlock(loop_pre_header); |
| 247 | loop_desc_.SetBasicBlockToLoop(loop_pre_header->id(), ploop); |
| 248 | } |
| 249 | |
| 250 | // Update the CFG. |
| 251 | cfg.RegisterBlock(loop_pre_header); |
| 252 | def_use_mgr->AnalyzeInstDef(loop_pre_header->GetLabelInst()); |
| 253 | cfg.AddEdge(if_block->id(), loop_pre_header->id()); |
| 254 | cfg.RemoveNonExistingEdges(loop_->GetHeaderBlock()->id()); |
| 255 | |
| 256 | loop_->GetHeaderBlock()->ForEachPhiInst( |
| 257 | [loop_pre_header, if_block](Instruction* phi) { |
| 258 | phi->ForEachInId([loop_pre_header, if_block](uint32_t* id) { |
| 259 | if (*id == if_block->id()) { |
| 260 | *id = loop_pre_header->id(); |
| 261 | } |
| 262 | }); |
| 263 | }); |
| 264 | loop_->SetPreHeaderBlock(loop_pre_header); |
| 265 | |
| 266 | // Update the dominator tree. |
| 267 | DominatorTreeNode* = |
| 268 | dom_tree->GetOrInsertNode(loop_pre_header); |
| 269 | DominatorTreeNode* if_block_dtn = dom_tree->GetTreeNode(if_block); |
| 270 | loop_pre_header_dtn->parent_ = if_block_dtn; |
| 271 | assert( |
| 272 | if_block_dtn->children_.size() == 1 && |
| 273 | "A loop preheader should only have the header block as a child in the " |
| 274 | "dominator tree" ); |
| 275 | loop_pre_header_dtn->children_.push_back(if_block_dtn->children_[0]); |
| 276 | if_block_dtn->children_.clear(); |
| 277 | if_block_dtn->children_.push_back(loop_pre_header_dtn); |
| 278 | |
| 279 | // Make domination queries valid. |
| 280 | dom_tree->ResetDFNumbering(); |
| 281 | |
| 282 | // Compute an ordered list of basic block to clone: loop blocks + pre-header |
| 283 | // + merge block. |
| 284 | loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks_, true, true); |
| 285 | |
| 286 | ///////////////////////////// |
| 287 | // Do the actual unswitch: // |
| 288 | // - Clone the loop // |
| 289 | // - Connect exits // |
| 290 | // - Specialize the loop // |
| 291 | ///////////////////////////// |
| 292 | |
| 293 | Instruction* iv_condition = &*switch_block_->tail(); |
| 294 | SpvOp iv_opcode = iv_condition->opcode(); |
| 295 | Instruction* condition = |
| 296 | def_use_mgr->GetDef(iv_condition->GetOperand(0).words[0]); |
| 297 | |
| 298 | analysis::ConstantManager* cst_mgr = context_->get_constant_mgr(); |
| 299 | const analysis::Type* cond_type = |
| 300 | context_->get_type_mgr()->GetType(condition->type_id()); |
| 301 | |
| 302 | // Build the list of value for which we need to clone and specialize the |
| 303 | // loop. |
| 304 | std::vector<std::pair<Instruction*, BasicBlock*>> constant_branch; |
| 305 | // Special case for the original loop |
| 306 | Instruction* original_loop_constant_value; |
| 307 | if (iv_opcode == SpvOpBranchConditional) { |
| 308 | constant_branch.emplace_back( |
| 309 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {0})), |
| 310 | nullptr); |
| 311 | original_loop_constant_value = |
| 312 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {1})); |
| 313 | } else { |
| 314 | // We are looking to take the default branch, so we can't provide a |
| 315 | // specific value. |
| 316 | original_loop_constant_value = |
| 317 | GetValueForDefaultPathForSwitch(iv_condition); |
| 318 | |
| 319 | for (uint32_t i = 2; i < iv_condition->NumInOperands(); i += 2) { |
| 320 | constant_branch.emplace_back( |
| 321 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant( |
| 322 | cond_type, iv_condition->GetInOperand(i).words)), |
| 323 | nullptr); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | // Get the loop landing pads. |
| 328 | std::unordered_set<uint32_t> if_merging_blocks; |
| 329 | std::function<bool(uint32_t)> is_from_original_loop; |
| 330 | if (loop_->GetHeaderBlock()->GetLoopMergeInst()) { |
| 331 | if_merging_blocks.insert(if_merge_block->id()); |
| 332 | is_from_original_loop = [this](uint32_t id) { |
| 333 | return loop_->IsInsideLoop(id) || loop_->GetMergeBlock()->id() == id; |
| 334 | }; |
| 335 | } else { |
| 336 | loop_->GetExitBlocks(&if_merging_blocks); |
| 337 | is_from_original_loop = [this](uint32_t id) { |
| 338 | return loop_->IsInsideLoop(id); |
| 339 | }; |
| 340 | } |
| 341 | |
| 342 | for (auto& specialisation_pair : constant_branch) { |
| 343 | Instruction* specialisation_value = specialisation_pair.first; |
| 344 | ////////////////////////////////////////////////////////// |
| 345 | // Step 3: Duplicate |loop_|. |
| 346 | ////////////////////////////////////////////////////////// |
| 347 | LoopUtils::LoopCloningResult clone_result; |
| 348 | |
| 349 | Loop* cloned_loop = |
| 350 | loop_utils.CloneLoop(&clone_result, ordered_loop_blocks_); |
| 351 | specialisation_pair.second = cloned_loop->GetPreHeaderBlock(); |
| 352 | |
| 353 | //////////////////////////////////// |
| 354 | // Step 4: Specialize the loop. // |
| 355 | //////////////////////////////////// |
| 356 | |
| 357 | { |
| 358 | SpecializeLoop(cloned_loop, condition, specialisation_value); |
| 359 | |
| 360 | /////////////////////////////////////////////////////////// |
| 361 | // Step 5: Connect convergent edges to the landing pads. // |
| 362 | /////////////////////////////////////////////////////////// |
| 363 | |
| 364 | for (uint32_t merge_bb_id : if_merging_blocks) { |
| 365 | BasicBlock* merge = context_->cfg()->block(merge_bb_id); |
| 366 | // We are in LCSSA so we only care about phi instructions. |
| 367 | merge->ForEachPhiInst( |
| 368 | [is_from_original_loop, &clone_result](Instruction* phi) { |
| 369 | uint32_t num_in_operands = phi->NumInOperands(); |
| 370 | for (uint32_t i = 0; i < num_in_operands; i += 2) { |
| 371 | uint32_t pred = phi->GetSingleWordInOperand(i + 1); |
| 372 | if (is_from_original_loop(pred)) { |
| 373 | pred = clone_result.value_map_.at(pred); |
| 374 | uint32_t incoming_value_id = phi->GetSingleWordInOperand(i); |
| 375 | // Not all the incoming values are coming from the loop. |
| 376 | ValueMapTy::iterator new_value = |
| 377 | clone_result.value_map_.find(incoming_value_id); |
| 378 | if (new_value != clone_result.value_map_.end()) { |
| 379 | incoming_value_id = new_value->second; |
| 380 | } |
| 381 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {incoming_value_id}}); |
| 382 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {pred}}); |
| 383 | } |
| 384 | } |
| 385 | }); |
| 386 | } |
| 387 | } |
| 388 | function_->AddBasicBlocks(clone_result.cloned_bb_.begin(), |
| 389 | clone_result.cloned_bb_.end(), |
| 390 | ++FindBasicBlockPosition(if_block)); |
| 391 | } |
| 392 | |
| 393 | // Specialize the existing loop. |
| 394 | SpecializeLoop(loop_, condition, original_loop_constant_value); |
| 395 | BasicBlock* original_loop_target = loop_->GetPreHeaderBlock(); |
| 396 | |
| 397 | ///////////////////////////////////// |
| 398 | // Finally: connect the new loops. // |
| 399 | ///////////////////////////////////// |
| 400 | |
| 401 | // Delete the old jump |
| 402 | context_->KillInst(&*if_block->tail()); |
| 403 | InstructionBuilder builder(context_, if_block); |
| 404 | if (iv_opcode == SpvOpBranchConditional) { |
| 405 | assert(constant_branch.size() == 1); |
| 406 | builder.AddConditionalBranch( |
| 407 | condition->result_id(), original_loop_target->id(), |
| 408 | constant_branch[0].second->id(), |
| 409 | if_merge_block ? if_merge_block->id() : kInvalidId); |
| 410 | } else { |
| 411 | std::vector<std::pair<Operand::OperandData, uint32_t>> targets; |
| 412 | for (auto& t : constant_branch) { |
| 413 | targets.emplace_back(t.first->GetInOperand(0).words, t.second->id()); |
| 414 | } |
| 415 | |
| 416 | builder.AddSwitch(condition->result_id(), original_loop_target->id(), |
| 417 | targets, |
| 418 | if_merge_block ? if_merge_block->id() : kInvalidId); |
| 419 | } |
| 420 | |
| 421 | switch_block_ = nullptr; |
| 422 | ordered_loop_blocks_.clear(); |
| 423 | |
| 424 | context_->InvalidateAnalysesExceptFor( |
| 425 | IRContext::Analysis::kAnalysisLoopAnalysis); |
| 426 | } |
| 427 | |
| 428 | private: |
| 429 | using ValueMapTy = std::unordered_map<uint32_t, uint32_t>; |
| 430 | using BlockMapTy = std::unordered_map<uint32_t, BasicBlock*>; |
| 431 | |
| 432 | Function* function_; |
| 433 | Loop* loop_; |
| 434 | LoopDescriptor& loop_desc_; |
| 435 | IRContext* context_; |
| 436 | |
| 437 | BasicBlock* switch_block_; |
| 438 | // Map between instructions and if they are dynamically uniform. |
| 439 | std::unordered_map<uint32_t, bool> dynamically_uniform_; |
| 440 | // The loop basic blocks in structured order. |
| 441 | std::vector<BasicBlock*> ordered_loop_blocks_; |
| 442 | |
| 443 | // Returns the next usable id for the context. |
| 444 | uint32_t TakeNextId() { |
| 445 | // TODO(1841): Handle id overflow. |
| 446 | return context_->TakeNextId(); |
| 447 | } |
| 448 | |
| 449 | // Simplifies |loop| assuming the instruction |to_version_insn| takes the |
| 450 | // value |cst_value|. |block_range| is an iterator range returning the loop |
| 451 | // basic blocks in a structured order (dominator first). |
| 452 | // The function will ignore basic blocks returned by |block_range| if they |
| 453 | // does not belong to the loop. |
| 454 | // The set |dead_blocks| will contain all the dead basic blocks. |
| 455 | // |
| 456 | // Requirements: |
| 457 | // - |loop| must be in the LCSSA form; |
| 458 | // - |cst_value| must be constant. |
| 459 | void SpecializeLoop(Loop* loop, Instruction* to_version_insn, |
| 460 | Instruction* cst_value) { |
| 461 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
| 462 | |
| 463 | std::function<bool(uint32_t)> ignore_node; |
| 464 | ignore_node = [loop](uint32_t bb_id) { return !loop->IsInsideLoop(bb_id); }; |
| 465 | |
| 466 | std::vector<std::pair<Instruction*, uint32_t>> use_list; |
| 467 | def_use_mgr->ForEachUse(to_version_insn, |
| 468 | [&use_list, &ignore_node, this]( |
| 469 | Instruction* inst, uint32_t operand_index) { |
| 470 | BasicBlock* bb = context_->get_instr_block(inst); |
| 471 | |
| 472 | if (!bb || ignore_node(bb->id())) { |
| 473 | // Out of the loop, the specialization does not |
| 474 | // apply any more. |
| 475 | return; |
| 476 | } |
| 477 | use_list.emplace_back(inst, operand_index); |
| 478 | }); |
| 479 | |
| 480 | // First pass: inject the specialized value into the loop (and only the |
| 481 | // loop). |
| 482 | for (auto use : use_list) { |
| 483 | Instruction* inst = use.first; |
| 484 | uint32_t operand_index = use.second; |
| 485 | |
| 486 | // To also handle switch, cst_value can be nullptr: this case |
| 487 | // means that we are looking to branch to the default target of |
| 488 | // the switch. We don't actually know its value so we don't touch |
| 489 | // it if it not a switch. |
| 490 | assert(cst_value && "We do not have a value to use." ); |
| 491 | inst->SetOperand(operand_index, {cst_value->result_id()}); |
| 492 | def_use_mgr->AnalyzeInstUse(inst); |
| 493 | } |
| 494 | } |
| 495 | |
| 496 | // Returns true if |var| is dynamically uniform. |
| 497 | // Note: this is currently approximated as uniform. |
| 498 | bool IsDynamicallyUniform(Instruction* var, const BasicBlock* entry, |
| 499 | const DominatorTree& post_dom_tree) { |
| 500 | assert(post_dom_tree.IsPostDominator()); |
| 501 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
| 502 | |
| 503 | auto it = dynamically_uniform_.find(var->result_id()); |
| 504 | |
| 505 | if (it != dynamically_uniform_.end()) return it->second; |
| 506 | |
| 507 | analysis::DecorationManager* dec_mgr = context_->get_decoration_mgr(); |
| 508 | |
| 509 | bool& is_uniform = dynamically_uniform_[var->result_id()]; |
| 510 | is_uniform = false; |
| 511 | |
| 512 | dec_mgr->WhileEachDecoration(var->result_id(), SpvDecorationUniform, |
| 513 | [&is_uniform](const Instruction&) { |
| 514 | is_uniform = true; |
| 515 | return false; |
| 516 | }); |
| 517 | if (is_uniform) { |
| 518 | return is_uniform; |
| 519 | } |
| 520 | |
| 521 | BasicBlock* parent = context_->get_instr_block(var); |
| 522 | if (!parent) { |
| 523 | return is_uniform = true; |
| 524 | } |
| 525 | |
| 526 | if (!post_dom_tree.Dominates(parent->id(), entry->id())) { |
| 527 | return is_uniform = false; |
| 528 | } |
| 529 | if (var->opcode() == SpvOpLoad) { |
| 530 | const uint32_t PtrTypeId = |
| 531 | def_use_mgr->GetDef(var->GetSingleWordInOperand(0))->type_id(); |
| 532 | const Instruction* PtrTypeInst = def_use_mgr->GetDef(PtrTypeId); |
| 533 | uint32_t storage_class = |
| 534 | PtrTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx); |
| 535 | if (storage_class != SpvStorageClassUniform && |
| 536 | storage_class != SpvStorageClassUniformConstant) { |
| 537 | return is_uniform = false; |
| 538 | } |
| 539 | } else { |
| 540 | if (!context_->IsCombinatorInstruction(var)) { |
| 541 | return is_uniform = false; |
| 542 | } |
| 543 | } |
| 544 | |
| 545 | return is_uniform = var->WhileEachInId([entry, &post_dom_tree, |
| 546 | this](const uint32_t* id) { |
| 547 | return IsDynamicallyUniform(context_->get_def_use_mgr()->GetDef(*id), |
| 548 | entry, post_dom_tree); |
| 549 | }); |
| 550 | } |
| 551 | |
| 552 | // Returns true if |insn| is not a constant, but is loop invariant and |
| 553 | // dynamically uniform. |
| 554 | bool IsConditionNonConstantLoopInvariant(Instruction* insn) { |
| 555 | assert(insn->IsBranch()); |
| 556 | assert(insn->opcode() != SpvOpBranch); |
| 557 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
| 558 | |
| 559 | Instruction* condition = def_use_mgr->GetDef(insn->GetOperand(0).words[0]); |
| 560 | if (condition->IsConstant()) { |
| 561 | return false; |
| 562 | } |
| 563 | |
| 564 | if (loop_->IsInsideLoop(condition)) { |
| 565 | return false; |
| 566 | } |
| 567 | |
| 568 | return IsDynamicallyUniform( |
| 569 | condition, function_->entry().get(), |
| 570 | context_->GetPostDominatorAnalysis(function_)->GetDomTree()); |
| 571 | } |
| 572 | }; |
| 573 | |
| 574 | } // namespace |
| 575 | |
| 576 | Pass::Status LoopUnswitchPass::Process() { |
| 577 | bool modified = false; |
| 578 | Module* module = context()->module(); |
| 579 | |
| 580 | // Process each function in the module |
| 581 | for (Function& f : *module) { |
| 582 | modified |= ProcessFunction(&f); |
| 583 | } |
| 584 | |
| 585 | return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; |
| 586 | } |
| 587 | |
| 588 | bool LoopUnswitchPass::ProcessFunction(Function* f) { |
| 589 | bool modified = false; |
| 590 | std::unordered_set<Loop*> processed_loop; |
| 591 | |
| 592 | LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f); |
| 593 | |
| 594 | bool loop_changed = true; |
| 595 | while (loop_changed) { |
| 596 | loop_changed = false; |
| 597 | for (Loop& loop : |
| 598 | make_range(++TreeDFIterator<Loop>(loop_descriptor.GetDummyRootLoop()), |
| 599 | TreeDFIterator<Loop>())) { |
| 600 | if (processed_loop.count(&loop)) continue; |
| 601 | processed_loop.insert(&loop); |
| 602 | |
| 603 | LoopUnswitch unswitcher(context(), f, &loop, &loop_descriptor); |
| 604 | while (unswitcher.CanUnswitchLoop()) { |
| 605 | if (!loop.IsLCSSA()) { |
| 606 | LoopUtils(context(), &loop).MakeLoopClosedSSA(); |
| 607 | } |
| 608 | modified = true; |
| 609 | loop_changed = true; |
| 610 | unswitcher.PerformUnswitch(); |
| 611 | } |
| 612 | if (loop_changed) break; |
| 613 | } |
| 614 | } |
| 615 | |
| 616 | return modified; |
| 617 | } |
| 618 | |
| 619 | } // namespace opt |
| 620 | } // namespace spvtools |
| 621 | |