loop_fission.cpp source code [Skia/third_party/externals/spirv-tools/source/opt/loop_fission.cpp]

1	// Copyright (c) 2018 Google LLC.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "source/opt/loop_fission.h"
16
17	#include <set>
18
19	#include "source/opt/register_pressure.h"
20
21	// Implement loop fission with an optional parameter to split only
22	// if the register pressure in a given loop meets a certain criteria. This is
23	// controlled via the constructors of LoopFissionPass.
24	//
25	// 1 - Build a list of loops to be split, these are top level loops (loops
26	// without child loops themselves) which meet the register pressure criteria, as
27	// determined by the ShouldSplitLoop method of LoopFissionPass.
28	//
29	// 2 - For each loop in the list, group each instruction into a set of related
30	// instructions by traversing each instructions users and operands recursively.
31	// We stop if we encounter an instruction we have seen before or an instruction
32	// which we don't consider relevent (i.e OpLoopMerge). We then group these
33	// groups into two different sets, one for the first loop and one for the
34	// second.
35	//
36	// 3 - We then run CanPerformSplit to check that it would be legal to split a
37	// loop using those two sets. We check that we haven't altered the relative
38	// order load/stores appear in the binary and that we aren't breaking any
39	// dependency between load/stores by splitting them into two loops. We also
40	// check that none of the OpBranch instructions are dependent on a load as we
41	// leave control flow structure intact and move only instructions in the body so
42	// we want to avoid any loads with side affects or aliasing.
43	//
44	// 4 - We then split the loop by calling SplitLoop. This function clones the
45	// loop and attaches it to the preheader and connects the new loops merge block
46	// to the current loop header block. We then use the two sets built in step 2 to
47	// remove instructions from each loop. If an instruction appears in the first
48	// set it is removed from the second loop and vice versa.
49	//
50	// 5 - If the multiple split passes flag is set we check if each of the loops
51	// still meet the register pressure criteria. If they do then we add them to the
52	// list of loops to be split (created in step one) to allow for loops to be
53	// split multiple times.
54	//
55
56	namespace spvtools {
57	namespace opt {
58
59	class LoopFissionImpl {
60	public:
61	LoopFissionImpl(IRContext* context, Loop* loop)
62	: context_(context), loop_(loop), load_used_in_condition_(false) {}
63
64	// Group each instruction in the loop into sets of instructions related by
65	// their usedef chains. An instruction which uses another will appear in the
66	// same set. Then merge those sets into just two sets. Returns false if there
67	// was one or less sets created.
68	bool GroupInstructionsByUseDef();
69
70	// Check if the sets built by GroupInstructionsByUseDef violate any data
71	// dependence rules.
72	bool CanPerformSplit();
73
74	// Split the loop and return a pointer to the new loop.
75	Loop* SplitLoop();
76
77	// Checks if \|inst\| is safe to move. We can only move instructions which don't
78	// have any side effects and OpLoads and OpStores.
79	bool MovableInstruction(const Instruction& inst) const;
80
81	private:
82	// Traverse the def use chain of \|inst\| and add the users and uses of \|inst\|
83	// which are in the same loop to the \|returned_set\|.
84	void TraverseUseDef(Instruction* inst, std::set<Instruction> returned_set,
85	bool ignore_phi_users = false, bool report_loads = false);
86
87	// We group the instructions in the block into two different groups, the
88	// instructions to be kept in the original loop and the ones to be cloned into
89	// the new loop. As the cloned loop is attached to the preheader it will be
90	// the first loop and the second loop will be the original.
91	std::set<Instruction*> cloned_loop_instructions_;
92	std::set<Instruction*> original_loop_instructions_;
93
94	// We need a set of all the instructions to be seen so we can break any
95	// recursion and also so we can ignore certain instructions by preemptively
96	// adding them to this set.
97	std::set<Instruction*> seen_instructions_;
98
99	// A map of instructions to their relative position in the function.
100	std::map<Instruction*, size_t> instruction_order_;
101
102	IRContext* context_;
103
104	Loop* loop_;
105
106	// This is set to true by TraverseUseDef when traversing the instructions
107	// related to the loop condition and any if conditions should any of those
108	// instructions be a load.
109	bool load_used_in_condition_;
110	};
111
112	bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const {
113	return inst.opcode() == SpvOp::SpvOpLoad \|\|
114	inst.opcode() == SpvOp::SpvOpStore \|\|
115	inst.opcode() == SpvOp::SpvOpSelectionMerge \|\|
116	inst.opcode() == SpvOp::SpvOpPhi \|\| inst.IsOpcodeCodeMotionSafe();
117	}
118
119	void LoopFissionImpl::TraverseUseDef(Instruction* inst,
120	std::set<Instruction> returned_set,
121	bool ignore_phi_users, bool report_loads) {
122	assert(returned_set && "Set to be returned cannot be null.");
123
124	analysis::DefUseManager* def_use = context_->get_def_use_mgr();
125	std::set<Instruction>& inst_set = returned_set;
126
127	// We create this functor to traverse the use def chain to build the
128	// grouping of related instructions. The lambda captures the std::function
129	// to allow it to recurse.
130	std::function<void(Instruction*)> traverser_functor;
131	traverser_functor = [this, def_use, &inst_set, &traverser_functor,
132	ignore_phi_users, report_loads](Instruction* user) {
133	// If we've seen the instruction before or it is not inside the loop end the
134	// traversal.
135	if (!user \|\| seen_instructions_.count(user) != `0` \|\|
136	!context_->get_instr_block(user) \|\|
137	!loop_->IsInsideLoop(context_->get_instr_block(user))) {
138	return;
139	}
140
141	// Don't include labels or loop merge instructions in the instruction sets.
142	// Including them would mean we group instructions related only by using the
143	// same labels (i.e phis). We already preempt the inclusion of
144	// OpSelectionMerge by adding related instructions to the seen_instructions_
145	// set.
146	if (user->opcode() == SpvOp::SpvOpLoopMerge \|\|
147	user->opcode() == SpvOp::SpvOpLabel)
148	return;
149
150	// If the \|report_loads\| flag is set, set the class field
151	// load_used_in_condition_ to false. This is used to check that none of the
152	// condition checks in the loop rely on loads.
153	if (user->opcode() == SpvOp::SpvOpLoad && report_loads) {
154	load_used_in_condition_ = true;
155	}
156
157	// Add the instruction to the set of instructions already seen, this breaks
158	// recursion and allows us to ignore certain instructions.
159	seen_instructions_.insert(user);
160
161	inst_set.insert(user);
162
163	// Wrapper functor to traverse the operands of each instruction.
164	auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
165	traverser_functor (def_use->GetDef(*id));
166	};
167	user->ForEachInOperand(traverse_operand);
168
169	// For the first traversal we want to ignore the users of the phi.
170	if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return;
171
172	// Traverse each user with this lambda.
173	def_use->ForEachUser(user, traverser_functor);
174
175	// Wrapper functor for the use traversal.
176	auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) {
177	traverser_functor (use);
178	};
179	def_use->ForEachUse(user, traverse_use);
180
181	};
182
183	// We start the traversal of the use def graph by invoking the above
184	// lambda with the \|inst\| parameter.
185	traverser_functor (inst);
186	}
187
188	bool LoopFissionImpl::GroupInstructionsByUseDef() {
189	std::vector<std::set<Instruction*>> sets{};
190
191	// We want to ignore all the instructions stemming from the loop condition
192	// instruction.
193	BasicBlock* condition_block = loop_->FindConditionBlock();
194
195	if (!condition_block) return false;
196	Instruction* condition = &*condition_block->tail();
197
198	// We iterate over the blocks via iterating over all the blocks in the
199	// function, we do this so we are iterating in the same order which the blocks
200	// appear in the binary.
201	Function& function = *loop_->GetHeaderBlock()->GetParent();
202
203	// Create a temporary set to ignore certain groups of instructions within the
204	// loop. We don't want any instructions related to control flow to be removed
205	// from either loop only instructions within the control flow bodies.
206	std::set<Instruction*> instructions_to_ignore{};
207	TraverseUseDef(condition, &instructions_to_ignore, true, true);
208
209	// Traverse control flow instructions to ensure they are added to the
210	// seen_instructions_ set and will be ignored when it it called with actual
211	// sets.
212	for (BasicBlock& block : function) {
213	if (!loop_->IsInsideLoop(block.id())) continue;
214
215	for (Instruction& inst : block) {
216	// Ignore all instructions related to control flow.
217	if (inst.opcode() == SpvOp::SpvOpSelectionMerge \|\| inst.IsBranch()) {
218	TraverseUseDef(&inst, &instructions_to_ignore, true, true);
219	}
220	}
221	}
222
223	// Traverse the instructions and generate the sets, automatically ignoring any
224	// instructions in instructions_to_ignore.
225	for (BasicBlock& block : function) {
226	if (!loop_->IsInsideLoop(block.id()) \|\|
227	loop_->GetHeaderBlock()->id() == block.id())
228	continue;
229
230	for (Instruction& inst : block) {
231	// Record the order that each load/store is seen.
232	if (inst.opcode() == SpvOp::SpvOpLoad \|\|
233	inst.opcode() == SpvOp::SpvOpStore) {
234	instruction_order_[&inst] = instruction_order_.size();
235	}
236
237	// Ignore instructions already seen in a traversal.
238	if (seen_instructions_.count(&inst) != `0`) {
239	continue;
240	}
241
242	// Build the set.
243	std::set<Instruction*> inst_set{};
244	TraverseUseDef(&inst, &inst_set);
245	if (!inst_set.empty()) sets.push_back(std::move(inst_set));
246	}
247	}
248
249	// If we have one or zero sets return false to indicate that due to
250	// insufficient instructions we couldn't split the loop into two groups and
251	// thus the loop can't be split any further.
252	if (sets.size() < `2`) {
253	return false;
254	}
255
256	// Merge the loop sets into two different sets. In CanPerformSplit we will
257	// validate that we don't break the relative ordering of loads/stores by doing
258	// this.
259	for (size_t index = `0`; index < sets.size() / `2`; ++index) {
260	cloned_loop_instructions_.insert(sets [index].begin(), sets [index].end());
261	}
262	for (size_t index = sets.size() / `2`; index < sets.size(); ++index) {
263	original_loop_instructions_.insert(sets [index].begin(), sets [index].end());
264	}
265
266	return true;
267	}
268
269	bool LoopFissionImpl::CanPerformSplit() {
270	// Return false if any of the condition instructions in the loop depend on a
271	// load.
272	if (load_used_in_condition_) {
273	return false;
274	}
275
276	// Build a list of all parent loops of this loop. Loop dependence analysis
277	// needs this structure.
278	std::vector<const Loop*> loops;
279	Loop* parent_loop = loop_;
280	while (parent_loop) {
281	loops.push_back(parent_loop);
282	parent_loop = parent_loop->GetParent();
283	}
284
285	LoopDependenceAnalysis analysis{context_, loops};
286
287	// A list of all the stores in the cloned loop.
288	std::vector<Instruction*> set_one_stores{};
289
290	// A list of all the loads in the cloned loop.
291	std::vector<Instruction*> set_one_loads{};
292
293	// Populate the above lists.
294	for (Instruction* inst : cloned_loop_instructions_) {
295	if (inst->opcode() == SpvOp::SpvOpStore) {
296	set_one_stores.push_back(inst);
297	} else if (inst->opcode() == SpvOp::SpvOpLoad) {
298	set_one_loads.push_back(inst);
299	}
300
301	// If we find any instruction which we can't move (such as a barrier),
302	// return false.
303	if (!MovableInstruction(inst)) return* false;
304	}
305
306	// We need to calculate the depth of the loop to create the loop dependency
307	// distance vectors.
308	const size_t loop_depth = loop_->GetDepth();
309
310	// Check the dependencies between loads in the cloned loop and stores in the
311	// original and vice versa.
312	for (Instruction* inst : original_loop_instructions_) {
313	// If we find any instruction which we can't move (such as a barrier),
314	// return false.
315	if (!MovableInstruction(inst)) return* false;
316
317	// Look at the dependency between the loads in the original and stores in
318	// the cloned loops.
319	if (inst->opcode() == SpvOp::SpvOpLoad) {
320	for (Instruction* store : set_one_stores) {
321	DistanceVector vec{loop_depth};
322
323	// If the store actually should appear after the load, return false.
324	// This means the store has been placed in the wrong grouping.
325	if (instruction_order_[store] > instruction_order_[inst]) {
326	return false;
327	}
328	// If not independent check the distance vector.
329	if (!analysis.GetDependence(store, inst, &vec)) {
330	for (DistanceEntry& entry : vec.GetEntries()) {
331	// A distance greater than zero means that the store in the cloned
332	// loop has a dependency on the load in the original loop.
333	if (entry.distance > `0`) return false;
334	}
335	}
336	}
337	} else if (inst->opcode() == SpvOp::SpvOpStore) {
338	for (Instruction* load : set_one_loads) {
339	DistanceVector vec{loop_depth};
340
341	// If the load actually should appear after the store, return false.
342	if (instruction_order_[load] > instruction_order_[inst]) {
343	return false;
344	}
345
346	// If not independent check the distance vector.
347	if (!analysis.GetDependence(inst, load, &vec)) {
348	for (DistanceEntry& entry : vec.GetEntries()) {
349	// A distance less than zero means the load in the cloned loop is
350	// dependent on the store instruction in the original loop.
351	if (entry.distance < `0`) return false;
352	}
353	}
354	}
355	}
356	}
357	return true;
358	}
359
360	Loop* LoopFissionImpl::SplitLoop() {
361	// Clone the loop.
362	LoopUtils util{context_, loop_};
363	LoopUtils::LoopCloningResult clone_results;
364	Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);
365
366	// Update the OpLoopMerge in the cloned loop.
367	cloned_loop->UpdateLoopMergeInst();
368
369	// Add the loop_ to the module.
370	// TODO(1841): Handle failure to create pre-header.
371	Function::iterator it =
372	util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
373	util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
374	clone_results.cloned_bb_.end(), ++it);
375	loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());
376
377	std::vector<Instruction*> instructions_to_kill{};
378
379	// Kill all the instructions which should appear in the cloned loop but not in
380	// the original loop.
381	for (uint32_t id : loop_->GetBlocks()) {
382	BasicBlock* block = context_->cfg()->block(id);
383
384	for (Instruction& inst : *block) {
385	// If the instruction appears in the cloned loop instruction group, kill
386	// it.
387	if (cloned_loop_instructions_.count(&inst) == `1` &&
388	original_loop_instructions_.count(&inst) == `0`) {
389	instructions_to_kill.push_back(&inst);
390	if (inst.opcode() == SpvOp::SpvOpPhi) {
391	context_->ReplaceAllUsesWith(
392	inst.result_id(), clone_results.value_map_[inst.result_id()]);
393	}
394	}
395	}
396	}
397
398	// Kill all instructions which should appear in the original loop and not in
399	// the cloned loop.
400	for (uint32_t id : cloned_loop->GetBlocks()) {
401	BasicBlock* block = context_->cfg()->block(id);
402	for (Instruction& inst : *block) {
403	Instruction* old_inst = clone_results.ptr_map_[&inst];
404	// If the instruction belongs to the original loop instruction group, kill
405	// it.
406	if (cloned_loop_instructions_.count(old_inst) == `0` &&
407	original_loop_instructions_.count(old_inst) == `1`) {
408	instructions_to_kill.push_back(&inst);
409	}
410	}
411	}
412
413	for (Instruction* i : instructions_to_kill) {
414	context_->KillInst(i);
415	}
416
417	return cloned_loop;
418	}
419
420	LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
421	bool split_multiple_times)
422	: split_multiple_times_(split_multiple_times) {
423	// Split if the number of registers in the loop exceeds
424	// \|register_threshold_to_split\|.
425	split_criteria_ =
426	[register_threshold_to_split](
427	const RegisterLiveness::RegionRegisterLiveness& liveness) {
428	return liveness.used_registers_ > register_threshold_to_split;
429	};
430	}
431
432	LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
433	// Split by default.
434	split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
435	return true;
436	};
437	}
438
439	bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) {
440	LivenessAnalysis* analysis = c->GetLivenessAnalysis();
441
442	RegisterLiveness::RegionRegisterLiveness liveness{};
443
444	Function* function = loop.GetHeaderBlock()->GetParent();
445	analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);
446
447	return split_criteria_(liveness);
448	}
449
450	Pass::Status LoopFissionPass::Process() {
451	bool changed = false;
452
453	for (Function& f : *context()->module()) {
454	// We collect all the inner most loops in the function and run the loop
455	// splitting util on each. The reason we do this is to allow us to iterate
456	// over each, as creating new loops will invalidate the the loop iterator.
457	std::vector<Loop*> inner_most_loops{};
458	LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f);
459	for (Loop& loop : loop_descriptor) {
460	if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) {
461	inner_most_loops.push_back(&loop);
462	}
463	}
464
465	// List of new loops which meet the criteria to be split again.
466	std::vector<Loop*> new_loops_to_split{};
467
468	while (!inner_most_loops.empty()) {
469	for (Loop* loop : inner_most_loops) {
470	LoopFissionImpl impl{context(), loop};
471
472	// Group the instructions in the loop into two different sets of related
473	// instructions. If we can't group the instructions into the two sets
474	// then we can't split the loop any further.
475	if (!impl.GroupInstructionsByUseDef()) {
476	continue;
477	}
478
479	if (impl.CanPerformSplit()) {
480	Loop* second_loop = impl.SplitLoop();
481	changed = true;
482	context()->InvalidateAnalysesExceptFor(
483	IRContext::kAnalysisLoopAnalysis);
484
485	// If the newly created loop meets the criteria to be split, split it
486	// again.
487	if (ShouldSplitLoop(*second_loop, context()))
488	new_loops_to_split.push_back(second_loop);
489
490	// If the original loop (now split) still meets the criteria to be
491	// split, split it again.
492	if (ShouldSplitLoop(*loop, context()))
493	new_loops_to_split.push_back(loop);
494	}
495	}
496
497	// If the split multiple times flag has been set add the new loops which
498	// meet the splitting criteria into the list of loops to be split on the
499	// next iteration.
500	if (split_multiple_times_) {
501	inner_most_loops = std::move(new_loops_to_split);
502	} else {
503	break;
504	}
505	}
506	}
507
508	return changed ? Pass::Status::SuccessWithChange
509	: Pass::Status::SuccessWithoutChange;
510	}
511
512	} // namespace opt
513	} // namespace spvtools
514

Browse the source code of Skia/third_party/externals/spirv-tools/source/opt/loop_fission.cpp