loop_peeling.cpp source code [Skia/third_party/externals/spirv-tools/source/opt/loop_peeling.cpp]

1	// Copyright (c) 2018 Google LLC.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include <algorithm>
16	#include <functional>
17	#include <memory>
18	#include <unordered_map>
19	#include <unordered_set>
20	#include <vector>
21
22	#include "source/opt/ir_builder.h"
23	#include "source/opt/ir_context.h"
24	#include "source/opt/loop_descriptor.h"
25	#include "source/opt/loop_peeling.h"
26	#include "source/opt/loop_utils.h"
27	#include "source/opt/scalar_analysis.h"
28	#include "source/opt/scalar_analysis_nodes.h"
29
30	namespace spvtools {
31	namespace opt {
32	size_t LoopPeelingPass::code_grow_threshold_ = `1000`;
33
34	void LoopPeeling::DuplicateAndConnectLoop(
35	LoopUtils::LoopCloningResult* clone_results) {
36	CFG& cfg = *context_->cfg();
37	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
38
39	assert(CanPeelLoop() && "Cannot peel loop!");
40
41	std::vector<BasicBlock*> ordered_loop_blocks;
42	// TODO(1841): Handle failure to create pre-header.
43	BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
44
45	loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
46
47	cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
48
49	// Add the basic block to the function.
50	Function::iterator it =
51	loop_utils_.GetFunction()->FindBlock(pre_header->id());
52	assert(it != loop_utils_.GetFunction()->end() &&
53	"Pre-header not found in the function.");
54	loop_utils_.GetFunction()->AddBasicBlocks(
55	clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
56
57	// Make the \|loop_\|'s preheader the \|cloned_loop_\| one.
58	BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
59	pre_header->ForEachSuccessorLabel(
60	[cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
61
62	// Update cfg.
63	cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
64	cloned_loop_->SetPreHeaderBlock(pre_header);
65	loop_->SetPreHeaderBlock(nullptr);
66
67	// When cloning the loop, we didn't cloned the merge block, so currently
68	// \|cloned_loop_\| shares the same block as \|loop_\|.
69	// We mutate all branches from \|cloned_loop_\| block to \|loop_\|'s merge into a
70	// branch to \|loop_\|'s header (so header will also be the merge of
71	// \|cloned_loop_\|).
72	uint32_t cloned_loop_exit = `0`;
73	for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
74	if (loop_->IsInsideLoop(pred_id)) continue;
75	BasicBlock* bb = cfg.block(pred_id);
76	assert(cloned_loop_exit == `0` && "The loop has multiple exits.");
77	cloned_loop_exit = bb->id();
78	bb->ForEachSuccessorLabel([this](uint32_t* succ) {
79	if (*succ == loop_->GetMergeBlock()->id())
80	*succ = loop_->GetHeaderBlock()->id();
81	});
82	}
83
84	// Update cfg.
85	cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
86	cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
87
88	// Patch the phi of the original loop header:
89	// - Set the loop entry branch to come from the cloned loop exit block;
90	// - Set the initial value of the phi using the corresponding cloned loop
91	// exit values.
92	//
93	// We patch the iterating value initializers of the original loop using the
94	// corresponding cloned loop exit values. Connects the cloned loop iterating
95	// values to the original loop. This make sure that the initial value of the
96	// second loop starts with the last value of the first loop.
97	//
98	// For example, loops like:
99	//
100	// int z = 0;
101	// for (int i = 0; i++ < M; i += cst1) {
102	// if (cond)
103	// z += cst2;
104	// }
105	//
106	// Will become:
107	//
108	// int z = 0;
109	// int i = 0;
110	// for (; i++ < M; i += cst1) {
111	// if (cond)
112	// z += cst2;
113	// }
114	// for (; i++ < M; i += cst1) {
115	// if (cond)
116	// z += cst2;
117	// }
118	loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
119	clone_results,
120	this](Instruction* phi) {
121	for (uint32_t i = `0`; i < phi->NumInOperands(); i += `2`) {
122	if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + `1`))) {
123	phi->SetInOperand(i,
124	{clone_results->value_map_.at(
125	exit_value_.at(phi->result_id())->result_id())});
126	phi->SetInOperand(i + `1`, {cloned_loop_exit});
127	def_use_mgr->AnalyzeInstUse(phi);
128	return;
129	}
130	}
131	});
132
133	// Force the creation of a new preheader for the original loop and set it as
134	// the merge block for the cloned loop.
135	// TODO(1841): Handle failure to create pre-header.
136	cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
137	}
138
139	void LoopPeeling::InsertCanonicalInductionVariable(
140	LoopUtils::LoopCloningResult* clone_results) {
141	if (original_loop_canonical_induction_variable_) {
142	canonical_induction_variable_ =
143	context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
144	original_loop_canonical_induction_variable_->result_id()));
145	return;
146	}
147
148	BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
149	if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
150	--insert_point;
151	}
152	InstructionBuilder builder(
153	context_, &*insert_point,
154	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
155	Instruction* uint_1_cst =
156	builder.GetIntConstant<uint32_t>(`1`, int_type_->IsSigned());
157	// Create the increment.
158	// Note that we do "1 + 1" here, one of the operand should the phi
159	// value but we don't have it yet. The operand will be set latter.
160	Instruction* iv_inc = builder.AddIAdd(
161	uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
162
163	builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
164
165	canonical_induction_variable_ = builder.AddPhi(
166	uint_1_cst->type_id(),
167	{builder.GetIntConstant<uint32_t>(`0`, int_type_->IsSigned())->result_id(),
168	GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
169	GetClonedLoop()->GetLatchBlock()->id()});
170	// Connect everything.
171	iv_inc->SetInOperand(`0`, {canonical_induction_variable_->result_id()});
172
173	// Update def/use manager.
174	context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
175
176	// If do-while form, use the incremented value.
177	if (do_while_form_) {
178	canonical_induction_variable_ = iv_inc;
179	}
180	}
181
182	void LoopPeeling::GetIteratorUpdateOperations(
183	const Loop* loop, Instruction* iterator,
184	std::unordered_set<Instruction> operations) {
185	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
186	operations->insert(iterator);
187	iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
188	Instruction* insn = def_use_mgr->GetDef(*id);
189	if (insn->opcode() == SpvOpLabel) {
190	return;
191	}
192	if (operations->count(insn)) {
193	return;
194	}
195	if (!loop->IsInsideLoop(insn)) {
196	return;
197	}
198	GetIteratorUpdateOperations(loop, insn, operations);
199	});
200	}
201
202	// Gather the set of blocks for all the path from \|entry\| to \|root\|.
203	static void GetBlocksInPath(uint32_t block, uint32_t entry,
204	std::unordered_set<uint32_t>* blocks_in_path,
205	const CFG& cfg) {
206	for (uint32_t pid : cfg.preds(block)) {
207	if (blocks_in_path->insert(pid).second) {
208	if (pid != entry) {
209	GetBlocksInPath(pid, entry, blocks_in_path, cfg);
210	}
211	}
212	}
213	}
214
215	bool LoopPeeling::IsConditionCheckSideEffectFree() const {
216	CFG& cfg = *context_->cfg();
217
218	// The "do-while" form does not cause issues, the algorithm takes into account
219	// the first iteration.
220	if (!do_while_form_) {
221	uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[`0`];
222
223	std::unordered_set<uint32_t> blocks_in_path;
224
225	blocks_in_path.insert(condition_block_id);
226	GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
227	&blocks_in_path, cfg);
228
229	for (uint32_t bb_id : blocks_in_path) {
230	BasicBlock* bb = cfg.block(bb_id);
231	if (!bb->WhileEachInst([this](Instruction* insn) {
232	if (insn->IsBranch()) return true;
233	switch (insn->opcode()) {
234	case SpvOpLabel:
235	case SpvOpSelectionMerge:
236	case SpvOpLoopMerge:
237	return true;
238	default:
239	break;
240	}
241	return context_->IsCombinatorInstruction(insn);
242	})) {
243	return false;
244	}
245	}
246	}
247
248	return true;
249	}
250
251	void LoopPeeling::GetIteratingExitValues() {
252	CFG& cfg = *context_->cfg();
253
254	loop_->GetHeaderBlock()->ForEachPhiInst(
255	[this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
256
257	if (!loop_->GetMergeBlock()) {
258	return;
259	}
260	if (cfg.preds(loop_->GetMergeBlock()->id()).size() != `1`) {
261	return;
262	}
263	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
264
265	uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[`0`];
266
267	auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
268	do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
269	condition_block_id) != header_pred.end();
270	if (do_while_form_) {
271	loop_->GetHeaderBlock()->ForEachPhiInst(
272	[condition_block_id, def_use_mgr, this](Instruction* phi) {
273	std::unordered_set<Instruction*> operations;
274
275	for (uint32_t i = `0`; i < phi->NumInOperands(); i += `2`) {
276	if (condition_block_id == phi->GetSingleWordInOperand(i + `1`)) {
277	exit_value_[phi->result_id()] =
278	def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
279	}
280	}
281	});
282	} else {
283	DominatorTree* dom_tree =
284	&context_->GetDominatorAnalysis(loop_utils_.GetFunction())
285	->GetDomTree();
286	BasicBlock* condition_block = cfg.block(condition_block_id);
287
288	loop_->GetHeaderBlock()->ForEachPhiInst(
289	[dom_tree, condition_block, this](Instruction* phi) {
290	std::unordered_set<Instruction*> operations;
291
292	// Not the back-edge value, check if the phi instruction is the only
293	// possible candidate.
294	GetIteratorUpdateOperations(loop_, phi, &operations);
295
296	for (Instruction* insn : operations) {
297	if (insn == phi) {
298	continue;
299	}
300	if (dom_tree->Dominates(context_->get_instr_block(insn),
301	condition_block)) {
302	return;
303	}
304	}
305	exit_value_[phi->result_id()] = phi;
306	});
307	}
308	}
309
310	void LoopPeeling::FixExitCondition(
311	const std::function<uint32_t(Instruction*)>& condition_builder) {
312	CFG& cfg = *context_->cfg();
313
314	uint32_t condition_block_id = `0`;
315	for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
316	if (GetClonedLoop()->IsInsideLoop(id)) {
317	condition_block_id = id;
318	break;
319	}
320	}
321	assert(condition_block_id != `0` && "2nd loop in improperly connected");
322
323	BasicBlock* condition_block = cfg.block(condition_block_id);
324	Instruction* exit_condition = condition_block->terminator();
325	assert(exit_condition->opcode() == SpvOpBranchConditional);
326	BasicBlock::iterator insert_point = condition_block->tail();
327	if (condition_block->GetMergeInst()) {
328	--insert_point;
329	}
330
331	exit_condition->SetInOperand(`0`, {condition_builder (&*insert_point)});
332
333	uint32_t to_continue_block_idx =
334	GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(`1`))
335	? `1`
336	: `2`;
337	exit_condition->SetInOperand(
338	`1`, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
339	exit_condition->SetInOperand(`2`, {GetClonedLoop()->GetMergeBlock()->id()});
340
341	// Update def/use manager.
342	context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
343	}
344
345	BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
346	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
347	CFG& cfg = *context_->cfg();
348	assert(cfg.preds(bb->id()).size() == `1` && "More than one predecessor");
349
350	// TODO(1841): Handle id overflow.
351	std::unique_ptr<BasicBlock> new_bb =
352	MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction (
353	context_, SpvOpLabel, `0`, context_->TakeNextId(), {})));
354	new_bb ->SetParent(loop_utils_.GetFunction());
355	// Update the loop descriptor.
356	Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
357	if (in_loop) {
358	in_loop->AddBasicBlock(new_bb.get());
359	loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb ->id(), in_loop);
360	}
361
362	context_->set_instr_block(new_bb ->GetLabelInst(), new_bb.get());
363	def_use_mgr->AnalyzeInstDefUse(new_bb ->GetLabelInst());
364
365	BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[`0`]);
366	bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
367	if (*id == bb->id()) {
368	*id = new_bb ->id();
369	}
370	});
371	cfg.RemoveEdge(bb_pred->id(), bb->id());
372	cfg.AddEdge(bb_pred->id(), new_bb ->id());
373	def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
374
375	// Update the incoming branch.
376	bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
377	phi->SetInOperand(`1`, {new_bb ->id()});
378	def_use_mgr->AnalyzeInstUse(phi);
379	});
380	InstructionBuilder (
381	context_, new_bb.get(),
382	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping)
383	.AddBranch(bb->id());
384	cfg.RegisterBlock(new_bb.get());
385
386	// Add the basic block to the function.
387	Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
388	assert(it != loop_utils_.GetFunction()->end() &&
389	"Basic block not found in the function.");
390	BasicBlock* ret = new_bb.get();
391	loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
392	return ret;
393	}
394
395	BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
396	BasicBlock* if_merge) {
397	// TODO(1841): Handle failure to create pre-header.
398	BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
399	// Will no longer be a pre-header because of the if.
400	loop->SetPreHeaderBlock(nullptr);
401	// Kill the branch to the header.
402	context_->KillInst(&*if_block->tail());
403
404	InstructionBuilder builder(
405	context_, if_block,
406	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
407	builder.AddConditionalBranch(condition->result_id(),
408	loop->GetHeaderBlock()->id(), if_merge->id(),
409	if_merge->id());
410
411	return if_block;
412	}
413
414	void LoopPeeling::PeelBefore(uint32_t peel_factor) {
415	assert(CanPeelLoop() && "Cannot peel loop");
416	LoopUtils::LoopCloningResult clone_results;
417
418	// Clone the loop and insert the cloned one before the loop.
419	DuplicateAndConnectLoop(&clone_results);
420
421	// Add a canonical induction variable "canonical_induction_variable_".
422	InsertCanonicalInductionVariable(&clone_results);
423
424	InstructionBuilder builder(
425	context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
426	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
427	Instruction* factor =
428	builder.GetIntConstant(peel_factor, int_type_->IsSigned());
429
430	Instruction* has_remaining_iteration = builder.AddLessThan(
431	factor->result_id(), loop_iteration_count_->result_id());
432	Instruction* max_iteration = builder.AddSelect(
433	factor->type_id(), has_remaining_iteration->result_id(),
434	factor->result_id(), loop_iteration_count_->result_id());
435
436	// Change the exit condition of the cloned loop to be (exit when become
437	// false):
438	// "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
439	FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
440	return InstructionBuilder (context_, insert_before_point,
441	IRContext::kAnalysisDefUse \|
442	IRContext::kAnalysisInstrToBlockMapping)
443	.AddLessThan(canonical_induction_variable_->result_id(),
444	max_iteration->result_id())
445	->result_id();
446	});
447
448	// "Protect" the second loop: the second loop can only be executed if
449	// \|has_remaining_iteration\| is true (i.e. factor < loop_iteration_count_).
450	BasicBlock* if_merge_block = loop_->GetMergeBlock();
451	loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
452	// Prevent the second loop from being executed if we already executed all the
453	// required iterations.
454	BasicBlock* if_block =
455	ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
456	// Patch the phi of the merge block.
457	if_merge_block->ForEachPhiInst(
458	[&clone_results, if_block, this](Instruction* phi) {
459	// if_merge_block had previously only 1 predecessor.
460	uint32_t incoming_value = phi->GetSingleWordInOperand(`0`);
461	auto def_in_loop = clone_results.value_map_.find(incoming_value);
462	if (def_in_loop != clone_results.value_map_.end())
463	incoming_value = def_in_loop ->second;
464	phi->AddOperand(
465	{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
466	phi->AddOperand(
467	{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
468	context_->get_def_use_mgr()->AnalyzeInstUse(phi);
469	});
470
471	context_->InvalidateAnalysesExceptFor(
472	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping \|
473	IRContext::kAnalysisLoopAnalysis \| IRContext::kAnalysisCFG);
474	}
475
476	void LoopPeeling::PeelAfter(uint32_t peel_factor) {
477	assert(CanPeelLoop() && "Cannot peel loop");
478	LoopUtils::LoopCloningResult clone_results;
479
480	// Clone the loop and insert the cloned one before the loop.
481	DuplicateAndConnectLoop(&clone_results);
482
483	// Add a canonical induction variable "canonical_induction_variable_".
484	InsertCanonicalInductionVariable(&clone_results);
485
486	InstructionBuilder builder(
487	context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
488	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
489	Instruction* factor =
490	builder.GetIntConstant(peel_factor, int_type_->IsSigned());
491
492	Instruction* has_remaining_iteration = builder.AddLessThan(
493	factor->result_id(), loop_iteration_count_->result_id());
494
495	// Change the exit condition of the cloned loop to be (exit when become
496	// false):
497	// "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
498	FixExitCondition([factor, this](Instruction* insert_before_point) {
499	InstructionBuilder cond_builder(
500	context_, insert_before_point,
501	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
502	// Build the following check: canonical_induction_variable_ + factor <
503	// iteration_count
504	return cond_builder
505	.AddLessThan(cond_builder
506	.AddIAdd(canonical_induction_variable_->type_id(),
507	canonical_induction_variable_->result_id(),
508	factor->result_id())
509	->result_id(),
510	loop_iteration_count_->result_id())
511	->result_id();
512	});
513
514	// "Protect" the first loop: the first loop can only be executed if
515	// factor < loop_iteration_count_.
516
517	// The original loop's pre-header was the cloned loop merge block.
518	GetClonedLoop()->SetMergeBlock(
519	CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
520	// Use the second loop preheader as if merge block.
521
522	// Prevent the first loop if only the peeled loop needs it.
523	BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
524	GetOriginalLoop()->GetPreHeaderBlock());
525
526	// Patch the phi of the header block.
527	// We added an if to enclose the first loop and because the phi node are
528	// connected to the exit value of the first loop, the definition no longer
529	// dominate the preheader.
530	// We had to the preheader (our if merge block) the required phi instruction
531	// and patch the header phi.
532	GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
533	[&clone_results, if_block, this](Instruction* phi) {
534	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
535
536	auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
537	uint32_t preheader_value_idx =
538	!loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(`1`)) ? `0` : `2`;
539	return preheader_value_idx;
540	};
541
542	Instruction* cloned_phi =
543	def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
544	uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
545	find_value_idx (cloned_phi, GetClonedLoop()));
546
547	Instruction* new_phi =
548	InstructionBuilder (context_,
549	&*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
550	IRContext::kAnalysisDefUse \|
551	IRContext::kAnalysisInstrToBlockMapping)
552	.AddPhi(phi->type_id(),
553	{phi->GetSingleWordInOperand(
554	find_value_idx (phi, GetOriginalLoop())),
555	GetClonedLoop()->GetMergeBlock()->id(),
556	cloned_preheader_value, if_block->id()});
557
558	phi->SetInOperand(find_value_idx (phi, GetOriginalLoop()),
559	{new_phi->result_id()});
560	def_use_mgr->AnalyzeInstUse(phi);
561	});
562
563	context_->InvalidateAnalysesExceptFor(
564	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping \|
565	IRContext::kAnalysisLoopAnalysis \| IRContext::kAnalysisCFG);
566	}
567
568	Pass::Status LoopPeelingPass::Process() {
569	bool modified = false;
570	Module* module = context()->module();
571
572	// Process each function in the module
573	for (Function& f : *module) {
574	modified \|= ProcessFunction(&f);
575	}
576
577	return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
578	}
579
580	bool LoopPeelingPass::ProcessFunction(Function* f) {
581	bool modified = false;
582	LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
583
584	std::vector<Loop*> to_process_loop;
585	to_process_loop.reserve(loop_descriptor.NumLoops());
586	for (Loop& l : loop_descriptor) {
587	to_process_loop.push_back(&l);
588	}
589
590	ScalarEvolutionAnalysis scev_analysis(context());
591
592	for (Loop* loop : to_process_loop) {
593	CodeMetrics loop_size;
594	loop_size.Analyze(*loop);
595
596	auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
597	if (!loop_to_peel->IsLCSSA()) {
598	LoopUtils (context(), loop_to_peel).MakeLoopClosedSSA();
599	}
600
601	bool peeled_loop;
602	Loop* still_peelable_loop;
603	std::tie(peeled_loop, still_peelable_loop) =
604	ProcessLoop(loop_to_peel, &loop_size);
605
606	if (peeled_loop) {
607	modified = true;
608	}
609
610	return still_peelable_loop;
611	};
612
613	Loop* still_peelable_loop = try_peel (loop);
614	// The pass is working out the maximum factor by which a loop can be peeled.
615	// If the loop can potentially be peeled again, then there is only one
616	// possible direction, so only one call is still needed.
617	if (still_peelable_loop) {
618	try_peel (loop);
619	}
620	}
621
622	return modified;
623	}
624
625	std::pair<bool, Loop> LoopPeelingPass::ProcessLoop(Loop loop,
626	CodeMetrics* loop_size) {
627	ScalarEvolutionAnalysis* scev_analysis =
628	context()->GetScalarEvolutionAnalysis();
629	// Default values for bailing out.
630	std::pair<bool, Loop> bail_out{false, nullptr*};
631
632	BasicBlock* exit_block = loop->FindConditionBlock();
633	if (!exit_block) {
634	return bail_out;
635	}
636
637	Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
638	if (!exiting_iv) {
639	return bail_out;
640	}
641	size_t iterations = `0`;
642	if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
643	&iterations)) {
644	return bail_out;
645	}
646	if (!iterations) {
647	return bail_out;
648	}
649
650	Instruction* canonical_induction_variable = nullptr;
651
652	loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
653	scev_analysis,
654	this](Instruction* insn) {
655	if (const SERecurrentNode* iv =
656	scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
657	const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
658	const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
659	if (offset && coeff && offset->FoldToSingleValue() == `0` &&
660	coeff->FoldToSingleValue() == `1`) {
661	if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
662	canonical_induction_variable = insn;
663	return false;
664	}
665	}
666	}
667	return true;
668	});
669
670	bool is_signed = canonical_induction_variable
671	? context()
672	->get_type_mgr()
673	->GetType(canonical_induction_variable->type_id())
674	->AsInteger()
675	->IsSigned()
676	: false;
677
678	LoopPeeling peeler(
679	loop,
680	InstructionBuilder (
681	context(), loop->GetHeaderBlock(),
682	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping)
683	.GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
684	is_signed),
685	canonical_induction_variable);
686
687	if (!peeler.CanPeelLoop()) {
688	return bail_out;
689	}
690
691	// For each basic block in the loop, check if it can be peeled. If it
692	// can, get the direction (before/after) and by which factor.
693	LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
694
695	uint32_t peel_before_factor = `0`;
696	uint32_t peel_after_factor = `0`;
697
698	for (uint32_t block : loop->GetBlocks()) {
699	if (block == exit_block->id()) {
700	continue;
701	}
702	BasicBlock* bb = cfg()->block(block);
703	PeelDirection direction;
704	uint32_t factor;
705	std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
706
707	if (direction == PeelDirection::kNone) {
708	continue;
709	}
710	if (direction == PeelDirection::kBefore) {
711	peel_before_factor = std::max(peel_before_factor, factor);
712	} else {
713	assert(direction == PeelDirection::kAfter);
714	peel_after_factor = std::max(peel_after_factor, factor);
715	}
716	}
717	PeelDirection direction = PeelDirection::kNone;
718	uint32_t factor = `0`;
719
720	// Find which direction we should peel.
721	if (peel_before_factor) {
722	factor = peel_before_factor;
723	direction = PeelDirection::kBefore;
724	}
725	if (peel_after_factor) {
726	if (peel_before_factor < peel_after_factor) {
727	// Favor a peel after here and give the peel before another shot later.
728	factor = peel_after_factor;
729	direction = PeelDirection::kAfter;
730	}
731	}
732
733	// Do the peel if we can.
734	if (direction == PeelDirection::kNone) return bail_out;
735
736	// This does not take into account branch elimination opportunities and
737	// the unrolling. It assumes the peeled loop will be unrolled as well.
738	if (factor * loop_size->roi_size_ > code_grow_threshold_) {
739	return bail_out;
740	}
741	loop_size->roi_size_ *= factor;
742
743	// Find if a loop should be peeled again.
744	Loop* extra_opportunity = nullptr;
745
746	if (direction == PeelDirection::kBefore) {
747	peeler.PeelBefore(factor);
748	if (stats_) {
749	stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
750	}
751	if (peel_after_factor) {
752	// We could have peeled after, give it another try.
753	extra_opportunity = peeler.GetOriginalLoop();
754	}
755	} else {
756	peeler.PeelAfter(factor);
757	if (stats_) {
758	stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
759	}
760	if (peel_before_factor) {
761	// We could have peeled before, give it another try.
762	extra_opportunity = peeler.GetClonedLoop();
763	}
764	}
765
766	return {true, extra_opportunity};
767	}
768
769	uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
770	Instruction* condition) const {
771	for (uint32_t i = `0`; i < condition->NumInOperands(); i++) {
772	BasicBlock* bb =
773	context_->get_instr_block(condition->GetSingleWordInOperand(i));
774	if (bb && loop_->IsInsideLoop(bb)) {
775	return condition->GetSingleWordInOperand(i);
776	}
777	}
778
779	return `0`;
780	}
781
782	uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
783	Instruction* condition) const {
784	for (uint32_t i = `0`; i < condition->NumInOperands(); i++) {
785	BasicBlock* bb =
786	context_->get_instr_block(condition->GetSingleWordInOperand(i));
787	if (!bb \|\| !loop_->IsInsideLoop(bb)) {
788	return condition->GetSingleWordInOperand(i);
789	}
790	}
791
792	return `0`;
793	}
794
795	static bool IsHandledCondition(SpvOp opcode) {
796	switch (opcode) {
797	case SpvOpIEqual:
798	case SpvOpINotEqual:
799	case SpvOpUGreaterThan:
800	case SpvOpSGreaterThan:
801	case SpvOpUGreaterThanEqual:
802	case SpvOpSGreaterThanEqual:
803	case SpvOpULessThan:
804	case SpvOpSLessThan:
805	case SpvOpULessThanEqual:
806	case SpvOpSLessThanEqual:
807	return true;
808	default:
809	return false;
810	}
811	}
812
813	LoopPeelingPass::LoopPeelingInfo::Direction
814	LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
815	if (bb->terminator()->opcode() != SpvOpBranchConditional) {
816	return GetNoneDirection();
817	}
818
819	analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
820
821	Instruction* condition =
822	def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(`0`));
823
824	if (!IsHandledCondition(condition->opcode())) {
825	return GetNoneDirection();
826	}
827
828	if (!GetFirstLoopInvariantOperand(condition)) {
829	// No loop invariant, it cannot be peeled by this pass.
830	return GetNoneDirection();
831	}
832	if (!GetFirstNonLoopInvariantOperand(condition)) {
833	// Seems to be a job for the unswitch pass.
834	return GetNoneDirection();
835	}
836
837	// Left hand-side.
838	SExpression lhs = scev_analysis_->AnalyzeInstruction(
839	def_use_mgr->GetDef(condition->GetSingleWordInOperand(`0`)));
840	if (lhs ->GetType() == SENode::CanNotCompute) {
841	// Can't make any conclusion.
842	return GetNoneDirection();
843	}
844
845	// Right hand-side.
846	SExpression rhs = scev_analysis_->AnalyzeInstruction(
847	def_use_mgr->GetDef(condition->GetSingleWordInOperand(`1`)));
848	if (rhs ->GetType() == SENode::CanNotCompute) {
849	// Can't make any conclusion.
850	return GetNoneDirection();
851	}
852
853	// Only take into account recurrent expression over the current loop.
854	bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
855	bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
856
857	if ((is_lhs_rec && is_rhs_rec) \|\| (!is_lhs_rec && !is_rhs_rec)) {
858	return GetNoneDirection();
859	}
860
861	if (is_lhs_rec) {
862	if (!lhs ->AsSERecurrentNode() \|\|
863	lhs ->AsSERecurrentNode()->GetLoop() != loop_) {
864	return GetNoneDirection();
865	}
866	}
867	if (is_rhs_rec) {
868	if (!rhs ->AsSERecurrentNode() \|\|
869	rhs ->AsSERecurrentNode()->GetLoop() != loop_) {
870	return GetNoneDirection();
871	}
872	}
873
874	// If the op code is ==, then we try a peel before or after.
875	// If opcode is not <, >, <= or >=, we bail out.
876	//
877	// For the remaining cases, we canonicalize the expression so that the
878	// constant expression is on the left hand side and the recurring expression
879	// is on the right hand side. If we swap hand side, then < becomes >, <=
880	// becomes >= etc.
881	// If the opcode is <=, then we add 1 to the right hand side and do the peel
882	// check on <.
883	// If the opcode is >=, then we add 1 to the left hand side and do the peel
884	// check on >.
885
886	CmpOperator cmp_operator;
887	switch (condition->opcode()) {
888	default:
889	return GetNoneDirection();
890	case SpvOpIEqual:
891	case SpvOpINotEqual:
892	return HandleEquality(lhs, rhs);
893	case SpvOpUGreaterThan:
894	case SpvOpSGreaterThan: {
895	cmp_operator = CmpOperator::kGT;
896	break;
897	}
898	case SpvOpULessThan:
899	case SpvOpSLessThan: {
900	cmp_operator = CmpOperator::kLT;
901	break;
902	}
903	// We add one to transform >= into > and <= into <.
904	case SpvOpUGreaterThanEqual:
905	case SpvOpSGreaterThanEqual: {
906	cmp_operator = CmpOperator::kGE;
907	break;
908	}
909	case SpvOpULessThanEqual:
910	case SpvOpSLessThanEqual: {
911	cmp_operator = CmpOperator::kLE;
912	break;
913	}
914	}
915
916	// Force the left hand side to be the non recurring expression.
917	if (is_lhs_rec) {
918	std::swap(lhs, rhs);
919	switch (cmp_operator) {
920	case CmpOperator::kLT: {
921	cmp_operator = CmpOperator::kGT;
922	break;
923	}
924	case CmpOperator::kGT: {
925	cmp_operator = CmpOperator::kLT;
926	break;
927	}
928	case CmpOperator::kLE: {
929	cmp_operator = CmpOperator::kGE;
930	break;
931	}
932	case CmpOperator::kGE: {
933	cmp_operator = CmpOperator::kLE;
934	break;
935	}
936	}
937	}
938	return HandleInequality(cmp_operator, lhs, rhs ->AsSERecurrentNode());
939	}
940
941	SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
942	SERecurrentNode* rec) const {
943	return rec->GetOffset();
944	}
945
946	SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
947	SERecurrentNode* rec, int64_t iteration) const {
948	SExpression coeff = rec->GetCoefficient();
949	SExpression offset = rec->GetOffset();
950
951	return (coeff * iteration) + offset;
952	}
953
954	SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
955	SERecurrentNode* rec) const {
956	return GetValueAtIteration(rec, loop_max_iterations_ - `1`);
957	}
958
959	bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
960	SExpression lhs,
961	SExpression rhs,
962	bool* result) const {
963	assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
964	assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
965	// We perform the test: 0 cmp_op rhs - lhs
966	// What is left is then to determine the sign of the expression.
967	switch (cmp_op) {
968	case CmpOperator::kLT: {
969	return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
970	}
971	case CmpOperator::kGT: {
972	return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
973	}
974	case CmpOperator::kLE: {
975	return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
976	}
977	case CmpOperator::kGE: {
978	return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
979	}
980	}
981	return false;
982	}
983
984	LoopPeelingPass::LoopPeelingInfo::Direction
985	LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
986	SExpression rhs) const {
987	{
988	// Try peel before opportunity.
989	SExpression lhs_cst = lhs;
990	if (SERecurrentNode* rec_node = lhs ->AsSERecurrentNode()) {
991	lhs_cst = rec_node->GetOffset();
992	}
993	SExpression rhs_cst = rhs;
994	if (SERecurrentNode* rec_node = rhs ->AsSERecurrentNode()) {
995	rhs_cst = rec_node->GetOffset();
996	}
997
998	if (lhs_cst == rhs_cst) {
999	return Direction {LoopPeelingPass::PeelDirection::kBefore, `1`};
1000	}
1001	}
1002
1003	{
1004	// Try peel after opportunity.
1005	SExpression lhs_cst = lhs;
1006	if (SERecurrentNode* rec_node = lhs ->AsSERecurrentNode()) {
1007	// rec_node(x) = a x + b*
1008	// assign to lhs: a (loop_max_iterations_ - 1) + b*
1009	lhs_cst = GetValueAtLastIteration(rec_node);
1010	}
1011	SExpression rhs_cst = rhs;
1012	if (SERecurrentNode* rec_node = rhs ->AsSERecurrentNode()) {
1013	// rec_node(x) = a x + b*
1014	// assign to lhs: a (loop_max_iterations_ - 1) + b*
1015	rhs_cst = GetValueAtLastIteration(rec_node);
1016	}
1017
1018	if (lhs_cst == rhs_cst) {
1019	return Direction {LoopPeelingPass::PeelDirection::kAfter, `1`};
1020	}
1021	}
1022
1023	return GetNoneDirection();
1024	}
1025
1026	LoopPeelingPass::LoopPeelingInfo::Direction
1027	LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
1028	SExpression lhs,
1029	SERecurrentNode* rhs) const {
1030	SExpression offset = rhs->GetOffset();
1031	SExpression coefficient = rhs->GetCoefficient();
1032	// Compute (cst - B) / A.
1033	std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
1034	if (!flip_iteration.first ->AsSEConstantNode()) {
1035	return GetNoneDirection();
1036	}
1037	// note: !!flip_iteration.second normalize to 0/1 (via bool cast).
1038	int64_t iteration =
1039	flip_iteration.first ->AsSEConstantNode()->FoldToSingleValue() +
1040	!!flip_iteration.second;
1041	if (iteration <= `0` \|\|
1042	loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
1043	// Always true or false within the loop bounds.
1044	return GetNoneDirection();
1045	}
1046	// If this is a <= or >= operator and the iteration, make sure \|iteration\| is
1047	// the one flipping the condition.
1048	// If (cst - B) and A are not divisible, this equivalent to a < or > check, so
1049	// we skip this test.
1050	if (!flip_iteration.second &&
1051	(cmp_op == CmpOperator::kLE \|\| cmp_op == CmpOperator::kGE)) {
1052	bool first_iteration;
1053	bool current_iteration;
1054	if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) \|\|
1055	!EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
1056	&current_iteration)) {
1057	return GetNoneDirection();
1058	}
1059	// If the condition did not flip the next will.
1060	if (first_iteration == current_iteration) {
1061	iteration++;
1062	}
1063	}
1064
1065	uint32_t cast_iteration = `0`;
1066	// sanity check: can we fit \|iteration\| in a uint32_t ?
1067	if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
1068	cast_iteration = static_cast<uint32_t>(iteration);
1069	}
1070
1071	if (cast_iteration) {
1072	// Peel before if we are closer to the start, after if closer to the end.
1073	if (loop_max_iterations_ / `2` > cast_iteration) {
1074	return Direction {LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
1075	} else {
1076	return Direction {
1077	LoopPeelingPass::PeelDirection::kAfter,
1078	static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
1079	}
1080	}
1081
1082	return GetNoneDirection();
1083	}
1084
1085	} // namespace opt
1086	} // namespace spvtools
1087

Browse the source code of Skia/third_party/externals/spirv-tools/source/opt/loop_peeling.cpp