ComputeProgram.cpp source code [engine/third_party/swiftshader/src/Pipeline/ComputeProgram.cpp]

1	// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "ComputeProgram.hpp"
16	#include "Constants.hpp"
17
18	#include "Vulkan/VkDebug.hpp"
19	#include "Vulkan/VkPipelineLayout.hpp"
20
21	#include "marl/defer.h"
22	#include "marl/trace.h"
23	#include "marl/waitgroup.h"
24
25	#include <queue>
26
27	namespace
28	{
29	enum { X, Y, Z };
30	} // anonymous namespace
31
32	namespace sw
33	{
34	ComputeProgram::ComputeProgram(SpirvShader const shader, vk::PipelineLayout const* pipelineLayout, const* vk::DescriptorSet::Bindings &descriptorSets)
35	: shader(shader),
36	pipelineLayout(pipelineLayout),
37	descriptorSets(descriptorSets)
38	{
39	}
40
41	ComputeProgram::~ComputeProgram()
42	{
43	}
44
45	void ComputeProgram::generate()
46	{
47	MARL_SCOPED_EVENT("ComputeProgram::generate");
48
49	SpirvRoutine routine(pipelineLayout);
50	shader->emitProlog(&routine);
51	emit(&routine);
52	shader->emitEpilog(&routine);
53	}
54
55	void ComputeProgram::setWorkgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[`3`])
56	{
57	routine->setInputBuiltin(shader, spv::BuiltInNumWorkgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
58	{
59	auto numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
60	for (uint32_t component = `0`; component < builtin.SizeInComponents; component++)
61	{
62	value [builtin.FirstComponent + component] =
63	As<SIMD::Float>(SIMD::Int (Extract(numWorkgroups, component)));
64	}
65	});
66
67	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
68	{
69	for (uint32_t component = `0`; component < builtin.SizeInComponents; component++)
70	{
71	value [builtin.FirstComponent + component] =
72	As<SIMD::Float>(SIMD::Int (workgroupID[component]));
73	}
74	});
75
76	routine->setInputBuiltin(shader, spv::BuiltInWorkgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
77	{
78	auto workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
79	for (uint32_t component = `0`; component < builtin.SizeInComponents; component++)
80	{
81	value [builtin.FirstComponent + component] =
82	As<SIMD::Float>(SIMD::Int (Extract(workgroupSize, component)));
83	}
84	});
85
86	routine->setInputBuiltin(shader, spv::BuiltInNumSubgroups, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
87	{
88	ASSERT(builtin.SizeInComponents == `1`);
89	auto subgroupsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, subgroupsPerWorkgroup));
90	value [builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int (subgroupsPerWorkgroup));
91	});
92
93	routine->setInputBuiltin(shader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
94	{
95	ASSERT(builtin.SizeInComponents == `1`);
96	auto invocationsPerSubgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerSubgroup));
97	value [builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int (invocationsPerSubgroup));
98	});
99
100	routine->setImmutableInputBuiltins(shader);
101	}
102
103	void ComputeProgram::setSubgroupBuiltins(Pointer<Byte> data, SpirvRoutine* routine, Int workgroupID[`3`], SIMD::Int localInvocationIndex, Int subgroupIndex)
104	{
105	Int4 numWorkgroups = *Pointer<Int4>(data + OFFSET(Data, numWorkgroups));
106	Int4 workgroupSize = *Pointer<Int4>(data + OFFSET(Data, workgroupSize));
107
108	// TODO: Fix Int4 swizzles so we can just use workgroupSize.x, workgroupSize.y.
109	Int workgroupSizeX = Extract(workgroupSize, X);
110	Int workgroupSizeY = Extract(workgroupSize, Y);
111
112	SIMD::Int localInvocationID[`3`];
113	{
114	SIMD::Int idx = localInvocationIndex;
115	localInvocationID[Z] = idx / SIMD::Int (workgroupSizeX * workgroupSizeY);
116	idx -= localInvocationID[Z] * SIMD::Int (workgroupSizeX * workgroupSizeY); // modulo
117	localInvocationID[Y] = idx / SIMD::Int (workgroupSizeX);
118	idx -= localInvocationID[Y] * SIMD::Int (workgroupSizeX); // modulo
119	localInvocationID[X] = idx;
120	}
121
122	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
123	{
124	ASSERT(builtin.SizeInComponents == `1`);
125	value [builtin.FirstComponent] = As<SIMD::Float>(localInvocationIndex);
126	});
127
128	routine->setInputBuiltin(shader, spv::BuiltInSubgroupId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
129	{
130	ASSERT(builtin.SizeInComponents == `1`);
131	value [builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int (subgroupIndex));
132	});
133
134	routine->setInputBuiltin(shader, spv::BuiltInLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
135	{
136	for (uint32_t component = `0`; component < builtin.SizeInComponents; component++)
137	{
138	value [builtin.FirstComponent + component] =
139	As<SIMD::Float>(localInvocationID[component]);
140	}
141	});
142
143	routine->setInputBuiltin(shader, spv::BuiltInGlobalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
144	{
145	SIMD::Int wgID = `0`;
146	wgID = Insert(wgID, workgroupID[X], X);
147	wgID = Insert(wgID, workgroupID[Y], Y);
148	wgID = Insert(wgID, workgroupID[Z], Z);
149	auto localBase = workgroupSize * wgID;
150	for (uint32_t component = `0`; component < builtin.SizeInComponents; component++)
151	{
152	auto globalInvocationID = SIMD::Int (Extract(localBase, component)) + localInvocationID[component];
153	value [builtin.FirstComponent + component] = As<SIMD::Float>(globalInvocationID);
154	}
155	});
156	}
157
158	void ComputeProgram::emit(SpirvRoutine* routine)
159	{
160	Pointer<Byte> data = Arg<`0`>();
161	Int workgroupX = Arg<`1`>();
162	Int workgroupY = Arg<`2`>();
163	Int workgroupZ = Arg<`3`>();
164	Pointer<Byte> workgroupMemory = Arg<`4`>();
165	Int firstSubgroup = Arg<`5`>();
166	Int subgroupCount = Arg<`6`>();
167
168	routine->descriptorSets = data + OFFSET(Data, descriptorSets);
169	routine->descriptorDynamicOffsets = data + OFFSET(Data, descriptorDynamicOffsets);
170	routine->pushConstants = data + OFFSET(Data, pushConstants);
171	routine->constants = *Pointer<Pointer<Byte>>(data + OFFSET(Data, constants));
172	routine->workgroupMemory = workgroupMemory;
173
174	Int invocationsPerWorkgroup = *Pointer<Int>(data + OFFSET(Data, invocationsPerWorkgroup));
175
176	Int workgroupID[`3`] = {workgroupX, workgroupY, workgroupZ};
177	setWorkgroupBuiltins(data, routine, workgroupID);
178
179	For(Int i = `0`, i < subgroupCount, i ++)
180	{
181	auto subgroupIndex = firstSubgroup + i;
182
183	// TODO: Replace SIMD::Int(0, 1, 2, 3) with SIMD-width equivalent
184	auto localInvocationIndex = SIMD::Int (subgroupIndex * SIMD::Width) + SIMD::Int (`0`, `1`, `2`, `3`);
185
186	// Disable lanes where (invocationIDs >= invocationsPerWorkgroup)
187	auto activeLaneMask = CmpLT(localInvocationIndex, SIMD::Int (invocationsPerWorkgroup));
188
189	setSubgroupBuiltins(data, routine, workgroupID, localInvocationIndex, subgroupIndex);
190
191	shader->emit(routine, activeLaneMask, activeLaneMask, descriptorSets);
192	}
193	}
194
195	void ComputeProgram::run(
196	vk::DescriptorSet::Bindings const &descriptorSets,
197	vk::DescriptorSet::DynamicOffsets const &descriptorDynamicOffsets,
198	PushConstantStorage const &pushConstants,
199	uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
200	uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
201	{
202	auto &modes = shader->getModes();
203
204	auto invocationsPerSubgroup = SIMD::Width;
205	auto invocationsPerWorkgroup = modes.WorkgroupSizeX * modes.WorkgroupSizeY * modes.WorkgroupSizeZ;
206	auto subgroupsPerWorkgroup = (invocationsPerWorkgroup + invocationsPerSubgroup - `1`) / invocationsPerSubgroup;
207
208	Data data;
209	data.descriptorSets = descriptorSets;
210	data.descriptorDynamicOffsets = descriptorDynamicOffsets;
211	data.numWorkgroups[X] = groupCountX;
212	data.numWorkgroups[Y] = groupCountY;
213	data.numWorkgroups[Z] = groupCountZ;
214	data.numWorkgroups[`3`] = `0`;
215	data.workgroupSize[X] = modes.WorkgroupSizeX;
216	data.workgroupSize[Y] = modes.WorkgroupSizeY;
217	data.workgroupSize[Z] = modes.WorkgroupSizeZ;
218	data.workgroupSize[`3`] = `0`;
219	data.invocationsPerSubgroup = invocationsPerSubgroup;
220	data.invocationsPerWorkgroup = invocationsPerWorkgroup;
221	data.subgroupsPerWorkgroup = subgroupsPerWorkgroup;
222	data.pushConstants = pushConstants;
223	data.constants = &sw::constants;
224
225	marl::WaitGroup wg;
226	const uint32_t batchCount = `16`;
227
228	auto groupCount = groupCountX * groupCountY * groupCountZ;
229
230	for (uint32_t batchID = `0`; batchID < batchCount && batchID < groupCount; batchID++)
231	{
232	wg.add(`1`);
233	marl::schedule([=, &data]
234	{
235	defer(wg.done());
236	std::vector<uint8_t> workgroupMemory(shader->workgroupMemory.size());
237
238	for (uint32_t groupIndex = batchID; groupIndex < groupCount; groupIndex += batchCount)
239	{
240	auto modulo = groupIndex;
241	auto groupOffsetZ = modulo / (groupCountX * groupCountY);
242	modulo -= groupOffsetZ * (groupCountX * groupCountY);
243	auto groupOffsetY = modulo / groupCountX;
244	modulo -= groupOffsetY * groupCountX;
245	auto groupOffsetX = modulo;
246
247	auto groupZ = baseGroupZ + groupOffsetZ;
248	auto groupY = baseGroupY + groupOffsetY;
249	auto groupX = baseGroupX + groupOffsetX;
250	MARL_SCOPED_EVENT("groupX: %d, groupY: %d, groupZ: %d", groupX, groupY, groupZ);
251
252	using Coroutine = std::unique_ptr<rr::Stream<SpirvShader::YieldResult>>;
253	std::queue<Coroutine> coroutines;
254
255	if (modes.ContainsControlBarriers)
256	{
257	// Make a function call per subgroup so each subgroup
258	// can yield, bringing all subgroups to the barrier
259	// together.
260	for(int subgroupIndex = `0`; subgroupIndex < subgroupsPerWorkgroup; subgroupIndex++)
261	{
262	auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), subgroupIndex, `1`);
263	coroutines.push(std::move(coroutine));
264	}
265	}
266	else
267	{
268	auto coroutine = (*this)(&data, groupX, groupY, groupZ, workgroupMemory.data(), `0`, subgroupsPerWorkgroup);
269	coroutines.push(std::move(coroutine));
270	}
271
272	while (coroutines.size() > `0`)
273	{
274	auto coroutine = std::move(coroutines.front());
275	coroutines.pop();
276
277	SpirvShader::YieldResult result;
278	if (coroutine ->await(result))
279	{
280	// TODO: Consider result (when the enum is more than 1 entry).
281	coroutines.push(std::move(coroutine));
282	}
283	}
284	}
285	});
286	}
287
288	wg.wait();
289	}
290
291	} // namespace sw
292

Browse the source code of engine/third_party/swiftshader/src/Pipeline/ComputeProgram.cpp