| 1 | // Copyright (c) 2018 Google LLC. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #include "source/opt/loop_unswitch_pass.h" | 
|---|
| 16 |  | 
|---|
| 17 | #include <functional> | 
|---|
| 18 | #include <list> | 
|---|
| 19 | #include <memory> | 
|---|
| 20 | #include <type_traits> | 
|---|
| 21 | #include <unordered_map> | 
|---|
| 22 | #include <unordered_set> | 
|---|
| 23 | #include <utility> | 
|---|
| 24 | #include <vector> | 
|---|
| 25 |  | 
|---|
| 26 | #include "source/opt/basic_block.h" | 
|---|
| 27 | #include "source/opt/dominator_tree.h" | 
|---|
| 28 | #include "source/opt/fold.h" | 
|---|
| 29 | #include "source/opt/function.h" | 
|---|
| 30 | #include "source/opt/instruction.h" | 
|---|
| 31 | #include "source/opt/ir_builder.h" | 
|---|
| 32 | #include "source/opt/ir_context.h" | 
|---|
| 33 | #include "source/opt/loop_descriptor.h" | 
|---|
| 34 |  | 
|---|
| 35 | #include "source/opt/loop_utils.h" | 
|---|
| 36 |  | 
|---|
| 37 | namespace spvtools { | 
|---|
| 38 | namespace opt { | 
|---|
| 39 | namespace { | 
|---|
| 40 |  | 
|---|
| 41 | static const uint32_t kTypePointerStorageClassInIdx = 0; | 
|---|
| 42 |  | 
|---|
| 43 | }  // anonymous namespace | 
|---|
| 44 |  | 
|---|
| 45 | namespace { | 
|---|
| 46 |  | 
|---|
| 47 | // This class handle the unswitch procedure for a given loop. | 
|---|
| 48 | // The unswitch will not happen if: | 
|---|
| 49 | //  - The loop has any instruction that will prevent it; | 
|---|
| 50 | //  - The loop invariant condition is not uniform. | 
|---|
| 51 | class LoopUnswitch { | 
|---|
| 52 | public: | 
|---|
| 53 | LoopUnswitch(IRContext* context, Function* function, Loop* loop, | 
|---|
| 54 | LoopDescriptor* loop_desc) | 
|---|
| 55 | : function_(function), | 
|---|
| 56 | loop_(loop), | 
|---|
| 57 | loop_desc_(*loop_desc), | 
|---|
| 58 | context_(context), | 
|---|
| 59 | switch_block_(nullptr) {} | 
|---|
| 60 |  | 
|---|
| 61 | // Returns true if the loop can be unswitched. | 
|---|
| 62 | // Can be unswitch if: | 
|---|
| 63 | //  - The loop has no instructions that prevents it (such as barrier); | 
|---|
| 64 | //  - The loop has one conditional branch or switch that do not depends on the | 
|---|
| 65 | //  loop; | 
|---|
| 66 | //  - The loop invariant condition is uniform; | 
|---|
| 67 | bool CanUnswitchLoop() { | 
|---|
| 68 | if (switch_block_) return true; | 
|---|
| 69 | if (loop_->IsSafeToClone()) return false; | 
|---|
| 70 |  | 
|---|
| 71 | CFG& cfg = *context_->cfg(); | 
|---|
| 72 |  | 
|---|
| 73 | for (uint32_t bb_id : loop_->GetBlocks()) { | 
|---|
| 74 | BasicBlock* bb = cfg.block(bb_id); | 
|---|
| 75 | if (loop_->GetLatchBlock() == bb) { | 
|---|
| 76 | continue; | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | if (bb->terminator()->IsBranch() && | 
|---|
| 80 | bb->terminator()->opcode() != SpvOpBranch) { | 
|---|
| 81 | if (IsConditionNonConstantLoopInvariant(bb->terminator())) { | 
|---|
| 82 | switch_block_ = bb; | 
|---|
| 83 | break; | 
|---|
| 84 | } | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | return switch_block_; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | // Return the iterator to the basic block |bb|. | 
|---|
| 92 | Function::iterator FindBasicBlockPosition(BasicBlock* bb_to_find) { | 
|---|
| 93 | Function::iterator it = function_->FindBlock(bb_to_find->id()); | 
|---|
| 94 | assert(it != function_->end() && "Basic Block not found"); | 
|---|
| 95 | return it; | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | // Creates a new basic block and insert it into the function |fn| at the | 
|---|
| 99 | // position |ip|. This function preserves the def/use and instr to block | 
|---|
| 100 | // managers. | 
|---|
| 101 | BasicBlock* CreateBasicBlock(Function::iterator ip) { | 
|---|
| 102 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); | 
|---|
| 103 |  | 
|---|
| 104 | // TODO(1841): Handle id overflow. | 
|---|
| 105 | BasicBlock* bb = &*ip.InsertBefore(std::unique_ptr<BasicBlock>( | 
|---|
| 106 | new BasicBlock(std::unique_ptr<Instruction>(new Instruction( | 
|---|
| 107 | context_, SpvOpLabel, 0, context_->TakeNextId(), {}))))); | 
|---|
| 108 | bb->SetParent(function_); | 
|---|
| 109 | def_use_mgr->AnalyzeInstDef(bb->GetLabelInst()); | 
|---|
| 110 | context_->set_instr_block(bb->GetLabelInst(), bb); | 
|---|
| 111 |  | 
|---|
| 112 | return bb; | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | Instruction* GetValueForDefaultPathForSwitch(Instruction* switch_inst) { | 
|---|
| 116 | assert(switch_inst->opcode() == SpvOpSwitch && | 
|---|
| 117 | "The given instructoin must be an OpSwitch."); | 
|---|
| 118 |  | 
|---|
| 119 | // Find a value that can be used to select the default path. | 
|---|
| 120 | // If none are possible, then it will just use 0.  The value does not matter | 
|---|
| 121 | // because this path will never be taken becaues the new switch outside of | 
|---|
| 122 | // the loop cannot select this path either. | 
|---|
| 123 | std::vector<uint32_t> existing_values; | 
|---|
| 124 | for (uint32_t i = 2; i < switch_inst->NumInOperands(); i += 2) { | 
|---|
| 125 | existing_values.push_back(switch_inst->GetSingleWordInOperand(i)); | 
|---|
| 126 | } | 
|---|
| 127 | std::sort(existing_values.begin(), existing_values.end()); | 
|---|
| 128 | uint32_t value_for_default_path = 0; | 
|---|
| 129 | if (existing_values.size() < std::numeric_limits<uint32_t>::max()) { | 
|---|
| 130 | for (value_for_default_path = 0; | 
|---|
| 131 | value_for_default_path < existing_values.size(); | 
|---|
| 132 | value_for_default_path++) { | 
|---|
| 133 | if (existing_values[value_for_default_path] != value_for_default_path) { | 
|---|
| 134 | break; | 
|---|
| 135 | } | 
|---|
| 136 | } | 
|---|
| 137 | } | 
|---|
| 138 | InstructionBuilder builder( | 
|---|
| 139 | context_, static_cast<Instruction*>(nullptr), | 
|---|
| 140 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); | 
|---|
| 141 | return builder.GetUintConstant(value_for_default_path); | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | // Unswitches |loop_|. | 
|---|
| 145 | void PerformUnswitch() { | 
|---|
| 146 | assert(CanUnswitchLoop() && | 
|---|
| 147 | "Cannot unswitch if there is not constant condition"); | 
|---|
| 148 | assert(loop_->GetPreHeaderBlock() && "This loop has no pre-header block"); | 
|---|
| 149 | assert(loop_->IsLCSSA() && "This loop is not in LCSSA form"); | 
|---|
| 150 |  | 
|---|
| 151 | CFG& cfg = *context_->cfg(); | 
|---|
| 152 | DominatorTree* dom_tree = | 
|---|
| 153 | &context_->GetDominatorAnalysis(function_)->GetDomTree(); | 
|---|
| 154 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); | 
|---|
| 155 | LoopUtils loop_utils(context_, loop_); | 
|---|
| 156 |  | 
|---|
| 157 | ////////////////////////////////////////////////////////////////////////////// | 
|---|
| 158 | // Step 1: Create the if merge block for structured modules. | 
|---|
| 159 | //    To do so, the |loop_| merge block will become the if's one and we | 
|---|
| 160 | //    create a merge for the loop. This will limit the amount of duplicated | 
|---|
| 161 | //    code the structured control flow imposes. | 
|---|
| 162 | //    For non structured program, the new loop will be connected to | 
|---|
| 163 | //    the old loop's exit blocks. | 
|---|
| 164 | ////////////////////////////////////////////////////////////////////////////// | 
|---|
| 165 |  | 
|---|
| 166 | // Get the merge block if it exists. | 
|---|
| 167 | BasicBlock* if_merge_block = loop_->GetMergeBlock(); | 
|---|
| 168 | // The merge block is only created if the loop has a unique exit block. We | 
|---|
| 169 | // have this guarantee for structured loops, for compute loop it will | 
|---|
| 170 | // trivially help maintain both a structured-like form and LCSAA. | 
|---|
| 171 | BasicBlock* loop_merge_block = | 
|---|
| 172 | if_merge_block | 
|---|
| 173 | ? CreateBasicBlock(FindBasicBlockPosition(if_merge_block)) | 
|---|
| 174 | : nullptr; | 
|---|
| 175 | if (loop_merge_block) { | 
|---|
| 176 | // Add the instruction and update managers. | 
|---|
| 177 | InstructionBuilder builder( | 
|---|
| 178 | context_, loop_merge_block, | 
|---|
| 179 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); | 
|---|
| 180 | builder.AddBranch(if_merge_block->id()); | 
|---|
| 181 | builder.SetInsertPoint(&*loop_merge_block->begin()); | 
|---|
| 182 | cfg.RegisterBlock(loop_merge_block); | 
|---|
| 183 | def_use_mgr->AnalyzeInstDef(loop_merge_block->GetLabelInst()); | 
|---|
| 184 | // Update CFG. | 
|---|
| 185 | if_merge_block->ForEachPhiInst( | 
|---|
| 186 | [loop_merge_block, &builder, this](Instruction* phi) { | 
|---|
| 187 | Instruction* cloned = phi->Clone(context_); | 
|---|
| 188 | cloned->SetResultId(TakeNextId()); | 
|---|
| 189 | builder.AddInstruction(std::unique_ptr<Instruction>(cloned)); | 
|---|
| 190 | phi->SetInOperand(0, {cloned->result_id()}); | 
|---|
| 191 | phi->SetInOperand(1, {loop_merge_block->id()}); | 
|---|
| 192 | for (uint32_t j = phi->NumInOperands() - 1; j > 1; j--) | 
|---|
| 193 | phi->RemoveInOperand(j); | 
|---|
| 194 | }); | 
|---|
| 195 | // Copy the predecessor list (will get invalidated otherwise). | 
|---|
| 196 | std::vector<uint32_t> preds = cfg.preds(if_merge_block->id()); | 
|---|
| 197 | for (uint32_t pid : preds) { | 
|---|
| 198 | if (pid == loop_merge_block->id()) continue; | 
|---|
| 199 | BasicBlock* p_bb = cfg.block(pid); | 
|---|
| 200 | p_bb->ForEachSuccessorLabel( | 
|---|
| 201 | [if_merge_block, loop_merge_block](uint32_t* id) { | 
|---|
| 202 | if (*id == if_merge_block->id()) *id = loop_merge_block->id(); | 
|---|
| 203 | }); | 
|---|
| 204 | cfg.AddEdge(pid, loop_merge_block->id()); | 
|---|
| 205 | } | 
|---|
| 206 | cfg.RemoveNonExistingEdges(if_merge_block->id()); | 
|---|
| 207 | // Update loop descriptor. | 
|---|
| 208 | if (Loop* ploop = loop_->GetParent()) { | 
|---|
| 209 | ploop->AddBasicBlock(loop_merge_block); | 
|---|
| 210 | loop_desc_.SetBasicBlockToLoop(loop_merge_block->id(), ploop); | 
|---|
| 211 | } | 
|---|
| 212 | // Update the dominator tree. | 
|---|
| 213 | DominatorTreeNode* loop_merge_dtn = | 
|---|
| 214 | dom_tree->GetOrInsertNode(loop_merge_block); | 
|---|
| 215 | DominatorTreeNode* if_merge_block_dtn = | 
|---|
| 216 | dom_tree->GetOrInsertNode(if_merge_block); | 
|---|
| 217 | loop_merge_dtn->parent_ = if_merge_block_dtn->parent_; | 
|---|
| 218 | loop_merge_dtn->children_.push_back(if_merge_block_dtn); | 
|---|
| 219 | loop_merge_dtn->parent_->children_.push_back(loop_merge_dtn); | 
|---|
| 220 | if_merge_block_dtn->parent_->children_.erase(std::find( | 
|---|
| 221 | if_merge_block_dtn->parent_->children_.begin(), | 
|---|
| 222 | if_merge_block_dtn->parent_->children_.end(), if_merge_block_dtn)); | 
|---|
| 223 |  | 
|---|
| 224 | loop_->SetMergeBlock(loop_merge_block); | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | //////////////////////////////////////////////////////////////////////////// | 
|---|
| 228 | // Step 2: Build a new preheader for |loop_|, use the old one | 
|---|
| 229 | //         for the invariant branch. | 
|---|
| 230 | //////////////////////////////////////////////////////////////////////////// | 
|---|
| 231 |  | 
|---|
| 232 | BasicBlock* if_block = loop_->GetPreHeaderBlock(); | 
|---|
| 233 | // If this preheader is the parent loop header, | 
|---|
| 234 | // we need to create a dedicated block for the if. | 
|---|
| 235 | BasicBlock*  = | 
|---|
| 236 | CreateBasicBlock(++FindBasicBlockPosition(if_block)); | 
|---|
| 237 | InstructionBuilder( | 
|---|
| 238 | context_, loop_pre_header, | 
|---|
| 239 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) | 
|---|
| 240 | .AddBranch(loop_->GetHeaderBlock()->id()); | 
|---|
| 241 |  | 
|---|
| 242 | if_block->tail()->SetInOperand(0, {loop_pre_header->id()}); | 
|---|
| 243 |  | 
|---|
| 244 | // Update loop descriptor. | 
|---|
| 245 | if (Loop* ploop = loop_desc_[if_block]) { | 
|---|
| 246 | ploop->AddBasicBlock(loop_pre_header); | 
|---|
| 247 | loop_desc_.SetBasicBlockToLoop(loop_pre_header->id(), ploop); | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | // Update the CFG. | 
|---|
| 251 | cfg.RegisterBlock(loop_pre_header); | 
|---|
| 252 | def_use_mgr->AnalyzeInstDef(loop_pre_header->GetLabelInst()); | 
|---|
| 253 | cfg.AddEdge(if_block->id(), loop_pre_header->id()); | 
|---|
| 254 | cfg.RemoveNonExistingEdges(loop_->GetHeaderBlock()->id()); | 
|---|
| 255 |  | 
|---|
| 256 | loop_->GetHeaderBlock()->ForEachPhiInst( | 
|---|
| 257 | [loop_pre_header, if_block](Instruction* phi) { | 
|---|
| 258 | phi->ForEachInId([loop_pre_header, if_block](uint32_t* id) { | 
|---|
| 259 | if (*id == if_block->id()) { | 
|---|
| 260 | *id = loop_pre_header->id(); | 
|---|
| 261 | } | 
|---|
| 262 | }); | 
|---|
| 263 | }); | 
|---|
| 264 | loop_->SetPreHeaderBlock(loop_pre_header); | 
|---|
| 265 |  | 
|---|
| 266 | // Update the dominator tree. | 
|---|
| 267 | DominatorTreeNode*  = | 
|---|
| 268 | dom_tree->GetOrInsertNode(loop_pre_header); | 
|---|
| 269 | DominatorTreeNode* if_block_dtn = dom_tree->GetTreeNode(if_block); | 
|---|
| 270 | loop_pre_header_dtn->parent_ = if_block_dtn; | 
|---|
| 271 | assert( | 
|---|
| 272 | if_block_dtn->children_.size() == 1 && | 
|---|
| 273 | "A loop preheader should only have the header block as a child in the " | 
|---|
| 274 | "dominator tree"); | 
|---|
| 275 | loop_pre_header_dtn->children_.push_back(if_block_dtn->children_[0]); | 
|---|
| 276 | if_block_dtn->children_.clear(); | 
|---|
| 277 | if_block_dtn->children_.push_back(loop_pre_header_dtn); | 
|---|
| 278 |  | 
|---|
| 279 | // Make domination queries valid. | 
|---|
| 280 | dom_tree->ResetDFNumbering(); | 
|---|
| 281 |  | 
|---|
| 282 | // Compute an ordered list of basic block to clone: loop blocks + pre-header | 
|---|
| 283 | // + merge block. | 
|---|
| 284 | loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks_, true, true); | 
|---|
| 285 |  | 
|---|
| 286 | ///////////////////////////// | 
|---|
| 287 | // Do the actual unswitch: // | 
|---|
| 288 | //   - Clone the loop      // | 
|---|
| 289 | //   - Connect exits       // | 
|---|
| 290 | //   - Specialize the loop // | 
|---|
| 291 | ///////////////////////////// | 
|---|
| 292 |  | 
|---|
| 293 | Instruction* iv_condition = &*switch_block_->tail(); | 
|---|
| 294 | SpvOp iv_opcode = iv_condition->opcode(); | 
|---|
| 295 | Instruction* condition = | 
|---|
| 296 | def_use_mgr->GetDef(iv_condition->GetOperand(0).words[0]); | 
|---|
| 297 |  | 
|---|
| 298 | analysis::ConstantManager* cst_mgr = context_->get_constant_mgr(); | 
|---|
| 299 | const analysis::Type* cond_type = | 
|---|
| 300 | context_->get_type_mgr()->GetType(condition->type_id()); | 
|---|
| 301 |  | 
|---|
| 302 | // Build the list of value for which we need to clone and specialize the | 
|---|
| 303 | // loop. | 
|---|
| 304 | std::vector<std::pair<Instruction*, BasicBlock*>> constant_branch; | 
|---|
| 305 | // Special case for the original loop | 
|---|
| 306 | Instruction* original_loop_constant_value; | 
|---|
| 307 | if (iv_opcode == SpvOpBranchConditional) { | 
|---|
| 308 | constant_branch.emplace_back( | 
|---|
| 309 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {0})), | 
|---|
| 310 | nullptr); | 
|---|
| 311 | original_loop_constant_value = | 
|---|
| 312 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant(cond_type, {1})); | 
|---|
| 313 | } else { | 
|---|
| 314 | // We are looking to take the default branch, so we can't provide a | 
|---|
| 315 | // specific value. | 
|---|
| 316 | original_loop_constant_value = | 
|---|
| 317 | GetValueForDefaultPathForSwitch(iv_condition); | 
|---|
| 318 |  | 
|---|
| 319 | for (uint32_t i = 2; i < iv_condition->NumInOperands(); i += 2) { | 
|---|
| 320 | constant_branch.emplace_back( | 
|---|
| 321 | cst_mgr->GetDefiningInstruction(cst_mgr->GetConstant( | 
|---|
| 322 | cond_type, iv_condition->GetInOperand(i).words)), | 
|---|
| 323 | nullptr); | 
|---|
| 324 | } | 
|---|
| 325 | } | 
|---|
| 326 |  | 
|---|
| 327 | // Get the loop landing pads. | 
|---|
| 328 | std::unordered_set<uint32_t> if_merging_blocks; | 
|---|
| 329 | std::function<bool(uint32_t)> is_from_original_loop; | 
|---|
| 330 | if (loop_->GetHeaderBlock()->GetLoopMergeInst()) { | 
|---|
| 331 | if_merging_blocks.insert(if_merge_block->id()); | 
|---|
| 332 | is_from_original_loop = [this](uint32_t id) { | 
|---|
| 333 | return loop_->IsInsideLoop(id) || loop_->GetMergeBlock()->id() == id; | 
|---|
| 334 | }; | 
|---|
| 335 | } else { | 
|---|
| 336 | loop_->GetExitBlocks(&if_merging_blocks); | 
|---|
| 337 | is_from_original_loop = [this](uint32_t id) { | 
|---|
| 338 | return loop_->IsInsideLoop(id); | 
|---|
| 339 | }; | 
|---|
| 340 | } | 
|---|
| 341 |  | 
|---|
| 342 | for (auto& specialisation_pair : constant_branch) { | 
|---|
| 343 | Instruction* specialisation_value = specialisation_pair.first; | 
|---|
| 344 | ////////////////////////////////////////////////////////// | 
|---|
| 345 | // Step 3: Duplicate |loop_|. | 
|---|
| 346 | ////////////////////////////////////////////////////////// | 
|---|
| 347 | LoopUtils::LoopCloningResult clone_result; | 
|---|
| 348 |  | 
|---|
| 349 | Loop* cloned_loop = | 
|---|
| 350 | loop_utils.CloneLoop(&clone_result, ordered_loop_blocks_); | 
|---|
| 351 | specialisation_pair.second = cloned_loop->GetPreHeaderBlock(); | 
|---|
| 352 |  | 
|---|
| 353 | //////////////////////////////////// | 
|---|
| 354 | // Step 4: Specialize the loop.   // | 
|---|
| 355 | //////////////////////////////////// | 
|---|
| 356 |  | 
|---|
| 357 | { | 
|---|
| 358 | SpecializeLoop(cloned_loop, condition, specialisation_value); | 
|---|
| 359 |  | 
|---|
| 360 | /////////////////////////////////////////////////////////// | 
|---|
| 361 | // Step 5: Connect convergent edges to the landing pads. // | 
|---|
| 362 | /////////////////////////////////////////////////////////// | 
|---|
| 363 |  | 
|---|
| 364 | for (uint32_t merge_bb_id : if_merging_blocks) { | 
|---|
| 365 | BasicBlock* merge = context_->cfg()->block(merge_bb_id); | 
|---|
| 366 | // We are in LCSSA so we only care about phi instructions. | 
|---|
| 367 | merge->ForEachPhiInst( | 
|---|
| 368 | [is_from_original_loop, &clone_result](Instruction* phi) { | 
|---|
| 369 | uint32_t num_in_operands = phi->NumInOperands(); | 
|---|
| 370 | for (uint32_t i = 0; i < num_in_operands; i += 2) { | 
|---|
| 371 | uint32_t pred = phi->GetSingleWordInOperand(i + 1); | 
|---|
| 372 | if (is_from_original_loop(pred)) { | 
|---|
| 373 | pred = clone_result.value_map_.at(pred); | 
|---|
| 374 | uint32_t incoming_value_id = phi->GetSingleWordInOperand(i); | 
|---|
| 375 | // Not all the incoming values are coming from the loop. | 
|---|
| 376 | ValueMapTy::iterator new_value = | 
|---|
| 377 | clone_result.value_map_.find(incoming_value_id); | 
|---|
| 378 | if (new_value != clone_result.value_map_.end()) { | 
|---|
| 379 | incoming_value_id = new_value->second; | 
|---|
| 380 | } | 
|---|
| 381 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {incoming_value_id}}); | 
|---|
| 382 | phi->AddOperand({SPV_OPERAND_TYPE_ID, {pred}}); | 
|---|
| 383 | } | 
|---|
| 384 | } | 
|---|
| 385 | }); | 
|---|
| 386 | } | 
|---|
| 387 | } | 
|---|
| 388 | function_->AddBasicBlocks(clone_result.cloned_bb_.begin(), | 
|---|
| 389 | clone_result.cloned_bb_.end(), | 
|---|
| 390 | ++FindBasicBlockPosition(if_block)); | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | // Specialize the existing loop. | 
|---|
| 394 | SpecializeLoop(loop_, condition, original_loop_constant_value); | 
|---|
| 395 | BasicBlock* original_loop_target = loop_->GetPreHeaderBlock(); | 
|---|
| 396 |  | 
|---|
| 397 | ///////////////////////////////////// | 
|---|
| 398 | // Finally: connect the new loops. // | 
|---|
| 399 | ///////////////////////////////////// | 
|---|
| 400 |  | 
|---|
| 401 | // Delete the old jump | 
|---|
| 402 | context_->KillInst(&*if_block->tail()); | 
|---|
| 403 | InstructionBuilder builder(context_, if_block); | 
|---|
| 404 | if (iv_opcode == SpvOpBranchConditional) { | 
|---|
| 405 | assert(constant_branch.size() == 1); | 
|---|
| 406 | builder.AddConditionalBranch( | 
|---|
| 407 | condition->result_id(), original_loop_target->id(), | 
|---|
| 408 | constant_branch[0].second->id(), | 
|---|
| 409 | if_merge_block ? if_merge_block->id() : kInvalidId); | 
|---|
| 410 | } else { | 
|---|
| 411 | std::vector<std::pair<Operand::OperandData, uint32_t>> targets; | 
|---|
| 412 | for (auto& t : constant_branch) { | 
|---|
| 413 | targets.emplace_back(t.first->GetInOperand(0).words, t.second->id()); | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | builder.AddSwitch(condition->result_id(), original_loop_target->id(), | 
|---|
| 417 | targets, | 
|---|
| 418 | if_merge_block ? if_merge_block->id() : kInvalidId); | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 | switch_block_ = nullptr; | 
|---|
| 422 | ordered_loop_blocks_.clear(); | 
|---|
| 423 |  | 
|---|
| 424 | context_->InvalidateAnalysesExceptFor( | 
|---|
| 425 | IRContext::Analysis::kAnalysisLoopAnalysis); | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | private: | 
|---|
| 429 | using ValueMapTy = std::unordered_map<uint32_t, uint32_t>; | 
|---|
| 430 | using BlockMapTy = std::unordered_map<uint32_t, BasicBlock*>; | 
|---|
| 431 |  | 
|---|
| 432 | Function* function_; | 
|---|
| 433 | Loop* loop_; | 
|---|
| 434 | LoopDescriptor& loop_desc_; | 
|---|
| 435 | IRContext* context_; | 
|---|
| 436 |  | 
|---|
| 437 | BasicBlock* switch_block_; | 
|---|
| 438 | // Map between instructions and if they are dynamically uniform. | 
|---|
| 439 | std::unordered_map<uint32_t, bool> dynamically_uniform_; | 
|---|
| 440 | // The loop basic blocks in structured order. | 
|---|
| 441 | std::vector<BasicBlock*> ordered_loop_blocks_; | 
|---|
| 442 |  | 
|---|
| 443 | // Returns the next usable id for the context. | 
|---|
| 444 | uint32_t TakeNextId() { | 
|---|
| 445 | // TODO(1841): Handle id overflow. | 
|---|
| 446 | return context_->TakeNextId(); | 
|---|
| 447 | } | 
|---|
| 448 |  | 
|---|
| 449 | // Simplifies |loop| assuming the instruction |to_version_insn| takes the | 
|---|
| 450 | // value |cst_value|. |block_range| is an iterator range returning the loop | 
|---|
| 451 | // basic blocks in a structured order (dominator first). | 
|---|
| 452 | // The function will ignore basic blocks returned by |block_range| if they | 
|---|
| 453 | // does not belong to the loop. | 
|---|
| 454 | // The set |dead_blocks| will contain all the dead basic blocks. | 
|---|
| 455 | // | 
|---|
| 456 | // Requirements: | 
|---|
| 457 | //   - |loop| must be in the LCSSA form; | 
|---|
| 458 | //   - |cst_value| must be constant. | 
|---|
| 459 | void SpecializeLoop(Loop* loop, Instruction* to_version_insn, | 
|---|
| 460 | Instruction* cst_value) { | 
|---|
| 461 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); | 
|---|
| 462 |  | 
|---|
| 463 | std::function<bool(uint32_t)> ignore_node; | 
|---|
| 464 | ignore_node = [loop](uint32_t bb_id) { return !loop->IsInsideLoop(bb_id); }; | 
|---|
| 465 |  | 
|---|
| 466 | std::vector<std::pair<Instruction*, uint32_t>> use_list; | 
|---|
| 467 | def_use_mgr->ForEachUse(to_version_insn, | 
|---|
| 468 | [&use_list, &ignore_node, this]( | 
|---|
| 469 | Instruction* inst, uint32_t operand_index) { | 
|---|
| 470 | BasicBlock* bb = context_->get_instr_block(inst); | 
|---|
| 471 |  | 
|---|
| 472 | if (!bb || ignore_node(bb->id())) { | 
|---|
| 473 | // Out of the loop, the specialization does not | 
|---|
| 474 | // apply any more. | 
|---|
| 475 | return; | 
|---|
| 476 | } | 
|---|
| 477 | use_list.emplace_back(inst, operand_index); | 
|---|
| 478 | }); | 
|---|
| 479 |  | 
|---|
| 480 | // First pass: inject the specialized value into the loop (and only the | 
|---|
| 481 | // loop). | 
|---|
| 482 | for (auto use : use_list) { | 
|---|
| 483 | Instruction* inst = use.first; | 
|---|
| 484 | uint32_t operand_index = use.second; | 
|---|
| 485 |  | 
|---|
| 486 | // To also handle switch, cst_value can be nullptr: this case | 
|---|
| 487 | // means that we are looking to branch to the default target of | 
|---|
| 488 | // the switch. We don't actually know its value so we don't touch | 
|---|
| 489 | // it if it not a switch. | 
|---|
| 490 | assert(cst_value && "We do not have a value to use."); | 
|---|
| 491 | inst->SetOperand(operand_index, {cst_value->result_id()}); | 
|---|
| 492 | def_use_mgr->AnalyzeInstUse(inst); | 
|---|
| 493 | } | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | // Returns true if |var| is dynamically uniform. | 
|---|
| 497 | // Note: this is currently approximated as uniform. | 
|---|
| 498 | bool IsDynamicallyUniform(Instruction* var, const BasicBlock* entry, | 
|---|
| 499 | const DominatorTree& post_dom_tree) { | 
|---|
| 500 | assert(post_dom_tree.IsPostDominator()); | 
|---|
| 501 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); | 
|---|
| 502 |  | 
|---|
| 503 | auto it = dynamically_uniform_.find(var->result_id()); | 
|---|
| 504 |  | 
|---|
| 505 | if (it != dynamically_uniform_.end()) return it->second; | 
|---|
| 506 |  | 
|---|
| 507 | analysis::DecorationManager* dec_mgr = context_->get_decoration_mgr(); | 
|---|
| 508 |  | 
|---|
| 509 | bool& is_uniform = dynamically_uniform_[var->result_id()]; | 
|---|
| 510 | is_uniform = false; | 
|---|
| 511 |  | 
|---|
| 512 | dec_mgr->WhileEachDecoration(var->result_id(), SpvDecorationUniform, | 
|---|
| 513 | [&is_uniform](const Instruction&) { | 
|---|
| 514 | is_uniform = true; | 
|---|
| 515 | return false; | 
|---|
| 516 | }); | 
|---|
| 517 | if (is_uniform) { | 
|---|
| 518 | return is_uniform; | 
|---|
| 519 | } | 
|---|
| 520 |  | 
|---|
| 521 | BasicBlock* parent = context_->get_instr_block(var); | 
|---|
| 522 | if (!parent) { | 
|---|
| 523 | return is_uniform = true; | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | if (!post_dom_tree.Dominates(parent->id(), entry->id())) { | 
|---|
| 527 | return is_uniform = false; | 
|---|
| 528 | } | 
|---|
| 529 | if (var->opcode() == SpvOpLoad) { | 
|---|
| 530 | const uint32_t PtrTypeId = | 
|---|
| 531 | def_use_mgr->GetDef(var->GetSingleWordInOperand(0))->type_id(); | 
|---|
| 532 | const Instruction* PtrTypeInst = def_use_mgr->GetDef(PtrTypeId); | 
|---|
| 533 | uint32_t storage_class = | 
|---|
| 534 | PtrTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx); | 
|---|
| 535 | if (storage_class != SpvStorageClassUniform && | 
|---|
| 536 | storage_class != SpvStorageClassUniformConstant) { | 
|---|
| 537 | return is_uniform = false; | 
|---|
| 538 | } | 
|---|
| 539 | } else { | 
|---|
| 540 | if (!context_->IsCombinatorInstruction(var)) { | 
|---|
| 541 | return is_uniform = false; | 
|---|
| 542 | } | 
|---|
| 543 | } | 
|---|
| 544 |  | 
|---|
| 545 | return is_uniform = var->WhileEachInId([entry, &post_dom_tree, | 
|---|
| 546 | this](const uint32_t* id) { | 
|---|
| 547 | return IsDynamicallyUniform(context_->get_def_use_mgr()->GetDef(*id), | 
|---|
| 548 | entry, post_dom_tree); | 
|---|
| 549 | }); | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | // Returns true if |insn| is not a constant, but is loop invariant and | 
|---|
| 553 | // dynamically uniform. | 
|---|
| 554 | bool IsConditionNonConstantLoopInvariant(Instruction* insn) { | 
|---|
| 555 | assert(insn->IsBranch()); | 
|---|
| 556 | assert(insn->opcode() != SpvOpBranch); | 
|---|
| 557 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); | 
|---|
| 558 |  | 
|---|
| 559 | Instruction* condition = def_use_mgr->GetDef(insn->GetOperand(0).words[0]); | 
|---|
| 560 | if (condition->IsConstant()) { | 
|---|
| 561 | return false; | 
|---|
| 562 | } | 
|---|
| 563 |  | 
|---|
| 564 | if (loop_->IsInsideLoop(condition)) { | 
|---|
| 565 | return false; | 
|---|
| 566 | } | 
|---|
| 567 |  | 
|---|
| 568 | return IsDynamicallyUniform( | 
|---|
| 569 | condition, function_->entry().get(), | 
|---|
| 570 | context_->GetPostDominatorAnalysis(function_)->GetDomTree()); | 
|---|
| 571 | } | 
|---|
| 572 | }; | 
|---|
| 573 |  | 
|---|
| 574 | }  // namespace | 
|---|
| 575 |  | 
|---|
| 576 | Pass::Status LoopUnswitchPass::Process() { | 
|---|
| 577 | bool modified = false; | 
|---|
| 578 | Module* module = context()->module(); | 
|---|
| 579 |  | 
|---|
| 580 | // Process each function in the module | 
|---|
| 581 | for (Function& f : *module) { | 
|---|
| 582 | modified |= ProcessFunction(&f); | 
|---|
| 583 | } | 
|---|
| 584 |  | 
|---|
| 585 | return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; | 
|---|
| 586 | } | 
|---|
| 587 |  | 
|---|
| 588 | bool LoopUnswitchPass::ProcessFunction(Function* f) { | 
|---|
| 589 | bool modified = false; | 
|---|
| 590 | std::unordered_set<Loop*> processed_loop; | 
|---|
| 591 |  | 
|---|
| 592 | LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f); | 
|---|
| 593 |  | 
|---|
| 594 | bool loop_changed = true; | 
|---|
| 595 | while (loop_changed) { | 
|---|
| 596 | loop_changed = false; | 
|---|
| 597 | for (Loop& loop : | 
|---|
| 598 | make_range(++TreeDFIterator<Loop>(loop_descriptor.GetDummyRootLoop()), | 
|---|
| 599 | TreeDFIterator<Loop>())) { | 
|---|
| 600 | if (processed_loop.count(&loop)) continue; | 
|---|
| 601 | processed_loop.insert(&loop); | 
|---|
| 602 |  | 
|---|
| 603 | LoopUnswitch unswitcher(context(), f, &loop, &loop_descriptor); | 
|---|
| 604 | while (unswitcher.CanUnswitchLoop()) { | 
|---|
| 605 | if (!loop.IsLCSSA()) { | 
|---|
| 606 | LoopUtils(context(), &loop).MakeLoopClosedSSA(); | 
|---|
| 607 | } | 
|---|
| 608 | modified = true; | 
|---|
| 609 | loop_changed = true; | 
|---|
| 610 | unswitcher.PerformUnswitch(); | 
|---|
| 611 | } | 
|---|
| 612 | if (loop_changed) break; | 
|---|
| 613 | } | 
|---|
| 614 | } | 
|---|
| 615 |  | 
|---|
| 616 | return modified; | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | }  // namespace opt | 
|---|
| 620 | }  // namespace spvtools | 
|---|
| 621 |  | 
|---|