VertexRoutine.cpp source code [engine/third_party/swiftshader/src/Pipeline/VertexRoutine.cpp]

1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "VertexRoutine.hpp"
16
17	#include "Constants.hpp"
18	#include "SpirvShader.hpp"
19	#include "Device/Vertex.hpp"
20	#include "Device/Renderer.hpp"
21	#include "Vulkan/VkDebug.hpp"
22	#include "System/Half.hpp"
23
24	namespace sw
25	{
26	VertexRoutine::VertexRoutine(
27	const VertexProcessor::State &state,
28	vk::PipelineLayout const *pipelineLayout,
29	SpirvShader const *spirvShader)
30	: routine (pipelineLayout),
31	state(state),
32	spirvShader(spirvShader)
33	{
34	spirvShader->emitProlog(&routine);
35	}
36
37	VertexRoutine::~VertexRoutine()
38	{
39	}
40
41	void VertexRoutine::generate()
42	{
43	Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
44	Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
45	Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag));
46
47	UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
48
49	constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
50
51	// Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
52	// On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
53	// in reverse order to guarantee that the first one doesn't get evicted and can be written out.
54
55	Do
56	{
57	UInt index = *batch;
58	UInt cacheIndex = index & VertexCache::TAG_MASK;
59
60	If(tagCache[cacheIndex] != index)
61	{
62	readInput(batch);
63	program(batch, vertexCount);
64	computeClipFlags();
65
66	writeCache(vertexCache, tagCache, batch);
67	}
68
69	Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt ((int)sizeof(Vertex));
70
71	// For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive
72	for(int i = `0`; i < (state.isPoint ? `3` : `1`); i++)
73	{
74	writeVertex(vertex, cacheEntry);
75	vertex += sizeof(Vertex);
76	}
77
78	batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t));
79	vertexCount --;
80	}
81	Until(vertexCount == `0`)
82
83	Return();
84	}
85
86	void VertexRoutine::readInput(Pointer<UInt> &batch)
87	{
88	for(int i = `0`; i < MAX_INTERFACE_COMPONENTS; i += `4`)
89	{
90	if(spirvShader->inputs [i + `0`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
91	spirvShader->inputs [i + `1`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
92	spirvShader->inputs [i + `2`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
93	spirvShader->inputs [i + `3`].Type != SpirvShader::ATTRIBTYPE_UNUSED)
94	{
95	Pointer<Byte> input = Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) (i / `4`));
96	UInt stride = Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) (i / `4`));
97	Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
98	UInt robustnessSize(`0`);
99	if(state.robustBufferAccess)
100	{
101	robustnessSize = Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) (i / `4`));
102	}
103
104	auto value = readStream(input, stride, state.input[i / `4`], batch, state.robustBufferAccess, robustnessSize, baseVertex);
105	routine.inputs [i + `0`] = value.x;
106	routine.inputs [i + `1`] = value.y;
107	routine.inputs [i + `2`] = value.z;
108	routine.inputs [i + `3`] = value.w;
109	}
110	}
111	}
112
113	void VertexRoutine::computeClipFlags()
114	{
115	auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
116	assert(it != spirvShader->outputBuiltins.end());
117	assert(it ->second.SizeInComponents == `4`);
118	auto &pos = routine.getVariable(it ->second.Id);
119	auto posX = pos [it ->second.FirstComponent + `0`];
120	auto posY = pos [it ->second.FirstComponent + `1`];
121	auto posZ = pos [it ->second.FirstComponent + `2`];
122	auto posW = pos [it ->second.FirstComponent + `3`];
123
124	Int4 maxX = CmpLT(posW, posX);
125	Int4 maxY = CmpLT(posW, posY);
126	Int4 maxZ = CmpLT(posW, posZ);
127	Int4 minX = CmpNLE(-posW, posX);
128	Int4 minY = CmpNLE(-posW, posY);
129	Int4 minZ = CmpNLE(Float4 (`0.0f`), posZ);
130
131	clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)];
132	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)];
133	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)];
134	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)];
135	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)];
136	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)];
137
138	Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
139	Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
140	Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
141
142	Int4 finiteXYZ = finiteX & finiteY & finiteZ;
143	clipFlags \|= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
144	}
145
146	Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
147	bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
148	{
149	Vector4f v;
150	// Because of the following rule in the Vulkan spec, we do not care if a very large negative
151	// baseVertex would overflow all the way back into a valid region of the index buffer:
152	// "Out-of-bounds buffer loads will return any of the following values :
153	// - Values from anywhere within the memory range(s) bound to the buffer (possibly including
154	// bytes of memory past the end of the buffer, up to the end of the bound range)."
155	UInt4 offsets = (Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4 (baseVertex))) UInt4 (stride);
156
157	Pointer<Byte> source0 = buffer + offsets.x;
158	Pointer<Byte> source1 = buffer + offsets.y;
159	Pointer<Byte> source2 = buffer + offsets.z;
160	Pointer<Byte> source3 = buffer + offsets.w;
161
162	UInt4 zero(`0`);
163	if (robustBufferAccess)
164	{
165	// TODO(b/141124876): Optimize for wide-vector gather operations.
166	UInt4 limits = offsets + UInt4 (stream.bytesPerAttrib());
167	Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
168	source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource);
169	source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource);
170	source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource);
171	source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource);
172	}
173
174	bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) \|\| stream.normalized;
175
176	switch(stream.type)
177	{
178	case STREAMTYPE_FLOAT:
179	{
180	if(stream.count == `0`)
181	{
182	// Null stream, all default components
183	}
184	else
185	{
186	if(stream.count == `1`)
187	{
188	v.x.x = *Pointer<Float>(source0);
189	v.x.y = *Pointer<Float>(source1);
190	v.x.z = *Pointer<Float>(source2);
191	v.x.w = *Pointer<Float>(source3);
192	}
193	else
194	{
195	v.x = *Pointer<Float4>(source0);
196	v.y = *Pointer<Float4>(source1);
197	v.z = *Pointer<Float4>(source2);
198	v.w = *Pointer<Float4>(source3);
199
200	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
201	}
202
203	switch(stream.attribType)
204	{
205	case SpirvShader::ATTRIBTYPE_INT:
206	if(stream.count >= `1`) v.x = As<Float4>(Int4 (v.x));
207	if(stream.count >= `2`) v.x = As<Float4>(Int4 (v.y));
208	if(stream.count >= `3`) v.x = As<Float4>(Int4 (v.z));
209	if(stream.count >= `4`) v.x = As<Float4>(Int4 (v.w));
210	break;
211	case SpirvShader::ATTRIBTYPE_UINT:
212	if(stream.count >= `1`) v.x = As<Float4>(UInt4 (v.x));
213	if(stream.count >= `2`) v.x = As<Float4>(UInt4 (v.y));
214	if(stream.count >= `3`) v.x = As<Float4>(UInt4 (v.z));
215	if(stream.count >= `4`) v.x = As<Float4>(UInt4 (v.w));
216	break;
217	default:
218	break;
219	}
220	}
221	}
222	break;
223	case STREAMTYPE_BYTE:
224	if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
225	{
226	v.x = Float4 (*Pointer<Byte4>(source0));
227	v.y = Float4 (*Pointer<Byte4>(source1));
228	v.z = Float4 (*Pointer<Byte4>(source2));
229	v.w = Float4 (*Pointer<Byte4>(source3));
230
231	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
232
233	if(stream.normalized)
234	{
235	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
236	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
237	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
238	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
239	}
240	}
241	else // Stream: UByte, Shader attrib: Int / UInt
242	{
243	v.x = As<Float4>(Int4 (*Pointer<Byte4>(source0)));
244	v.y = As<Float4>(Int4 (*Pointer<Byte4>(source1)));
245	v.z = As<Float4>(Int4 (*Pointer<Byte4>(source2)));
246	v.w = As<Float4>(Int4 (*Pointer<Byte4>(source3)));
247
248	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
249	}
250	break;
251	case STREAMTYPE_SBYTE:
252	if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
253	{
254	v.x = Float4 (*Pointer<SByte4>(source0));
255	v.y = Float4 (*Pointer<SByte4>(source1));
256	v.z = Float4 (*Pointer<SByte4>(source2));
257	v.w = Float4 (*Pointer<SByte4>(source3));
258
259	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
260
261	if(stream.normalized)
262	{
263	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
264	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
265	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
266	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
267	}
268	}
269	else // Stream: SByte, Shader attrib: Int / UInt
270	{
271	v.x = As<Float4>(Int4 (*Pointer<SByte4>(source0)));
272	v.y = As<Float4>(Int4 (*Pointer<SByte4>(source1)));
273	v.z = As<Float4>(Int4 (*Pointer<SByte4>(source2)));
274	v.w = As<Float4>(Int4 (*Pointer<SByte4>(source3)));
275
276	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
277	}
278	break;
279	case STREAMTYPE_COLOR:
280	{
281	v.x = Float4 (Pointer<Byte4>(source0)) *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
282	v.y = Float4 (Pointer<Byte4>(source1)) *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
283	v.z = Float4 (Pointer<Byte4>(source2)) *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
284	v.w = Float4 (Pointer<Byte4>(source3)) *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
285
286	transpose4x4(v.x, v.y, v.z, v.w);
287
288	// Swap red and blue
289	Float4 t = v.x;
290	v.x = v.z;
291	v.z = t;
292	}
293	break;
294	case STREAMTYPE_SHORT:
295	if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
296	{
297	v.x = Float4 (*Pointer<Short4>(source0));
298	v.y = Float4 (*Pointer<Short4>(source1));
299	v.z = Float4 (*Pointer<Short4>(source2));
300	v.w = Float4 (*Pointer<Short4>(source3));
301
302	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
303
304	if(stream.normalized)
305	{
306	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
307	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
308	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
309	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
310	}
311	}
312	else // Stream: Short, Shader attrib: Int/UInt, no type conversion
313	{
314	v.x = As<Float4>(Int4 (*Pointer<Short4>(source0)));
315	v.y = As<Float4>(Int4 (*Pointer<Short4>(source1)));
316	v.z = As<Float4>(Int4 (*Pointer<Short4>(source2)));
317	v.w = As<Float4>(Int4 (*Pointer<Short4>(source3)));
318
319	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
320	}
321	break;
322	case STREAMTYPE_USHORT:
323	if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
324	{
325	v.x = Float4 (*Pointer<UShort4>(source0));
326	v.y = Float4 (*Pointer<UShort4>(source1));
327	v.z = Float4 (*Pointer<UShort4>(source2));
328	v.w = Float4 (*Pointer<UShort4>(source3));
329
330	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
331
332	if(stream.normalized)
333	{
334	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
335	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
336	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
337	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
338	}
339	}
340	else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
341	{
342	v.x = As<Float4>(Int4 (*Pointer<UShort4>(source0)));
343	v.y = As<Float4>(Int4 (*Pointer<UShort4>(source1)));
344	v.z = As<Float4>(Int4 (*Pointer<UShort4>(source2)));
345	v.w = As<Float4>(Int4 (*Pointer<UShort4>(source3)));
346
347	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
348	}
349	break;
350	case STREAMTYPE_INT:
351	if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
352	{
353	v.x = Float4 (*Pointer<Int4>(source0));
354	v.y = Float4 (*Pointer<Int4>(source1));
355	v.z = Float4 (*Pointer<Int4>(source2));
356	v.w = Float4 (*Pointer<Int4>(source3));
357
358	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
359
360	if(stream.normalized)
361	{
362	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
363	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
364	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
365	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
366	}
367	}
368	else // Stream: Int, Shader attrib: Int/UInt, no type conversion
369	{
370	v.x = *Pointer<Float4>(source0);
371	v.y = *Pointer<Float4>(source1);
372	v.z = *Pointer<Float4>(source2);
373	v.w = *Pointer<Float4>(source3);
374
375	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
376	}
377	break;
378	case STREAMTYPE_UINT:
379	if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
380	{
381	v.x = Float4 (*Pointer<UInt4>(source0));
382	v.y = Float4 (*Pointer<UInt4>(source1));
383	v.z = Float4 (*Pointer<UInt4>(source2));
384	v.w = Float4 (*Pointer<UInt4>(source3));
385
386	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
387
388	if(stream.normalized)
389	{
390	if(stream.count >= `1`) v.x = Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
391	if(stream.count >= `2`) v.y = Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
392	if(stream.count >= `3`) v.z = Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
393	if(stream.count >= `4`) v.w = Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
394	}
395	}
396	else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
397	{
398	v.x = *Pointer<Float4>(source0);
399	v.y = *Pointer<Float4>(source1);
400	v.z = *Pointer<Float4>(source2);
401	v.w = *Pointer<Float4>(source3);
402
403	transpose4xN(v.x, v.y, v.z, v.w, stream.count);
404	}
405	break;
406	case STREAMTYPE_HALF:
407	{
408	if(stream.count >= `1`)
409	{
410	UShort x0 = *Pointer<UShort>(source0 + `0`);
411	UShort x1 = *Pointer<UShort>(source1 + `0`);
412	UShort x2 = *Pointer<UShort>(source2 + `0`);
413	UShort x3 = *Pointer<UShort>(source3 + `0`);
414
415	v.x.x = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (x0) `4`);
416	v.x.y = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (x1) `4`);
417	v.x.z = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (x2) `4`);
418	v.x.w = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (x3) `4`);
419	}
420
421	if(stream.count >= `2`)
422	{
423	UShort y0 = *Pointer<UShort>(source0 + `2`);
424	UShort y1 = *Pointer<UShort>(source1 + `2`);
425	UShort y2 = *Pointer<UShort>(source2 + `2`);
426	UShort y3 = *Pointer<UShort>(source3 + `2`);
427
428	v.y.x = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (y0) `4`);
429	v.y.y = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (y1) `4`);
430	v.y.z = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (y2) `4`);
431	v.y.w = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (y3) `4`);
432	}
433
434	if(stream.count >= `3`)
435	{
436	UShort z0 = *Pointer<UShort>(source0 + `4`);
437	UShort z1 = *Pointer<UShort>(source1 + `4`);
438	UShort z2 = *Pointer<UShort>(source2 + `4`);
439	UShort z3 = *Pointer<UShort>(source3 + `4`);
440
441	v.z.x = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (z0) `4`);
442	v.z.y = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (z1) `4`);
443	v.z.z = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (z2) `4`);
444	v.z.w = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (z3) `4`);
445	}
446
447	if(stream.count >= `4`)
448	{
449	UShort w0 = *Pointer<UShort>(source0 + `6`);
450	UShort w1 = *Pointer<UShort>(source1 + `6`);
451	UShort w2 = *Pointer<UShort>(source2 + `6`);
452	UShort w3 = *Pointer<UShort>(source3 + `6`);
453
454	v.w.x = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (w0) `4`);
455	v.w.y = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (w1) `4`);
456	v.w.z = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (w2) `4`);
457	v.w.w = Pointer<Float>(constants + OFFSET(Constants,half2float) + Int (w3) `4`);
458	}
459	}
460	break;
461	case STREAMTYPE_2_10_10_10_INT:
462	{
463	Int4 src;
464	src = Insert(src, *Pointer<Int>(source0), `0`);
465	src = Insert(src, *Pointer<Int>(source1), `1`);
466	src = Insert(src, *Pointer<Int>(source2), `2`);
467	src = Insert(src, *Pointer<Int>(source3), `3`);
468
469	v.x = Float4 ((src << `22`) >> `22`);
470	v.y = Float4 ((src << `12`) >> `22`);
471	v.z = Float4 ((src << `02`) >> `22`);
472	v.w = Float4 (src >> `30`);
473
474	if(stream.normalized)
475	{
476	v.x = Max(v.x * Float4 (`1.0f` / `0x1FF`), Float4 (-`1.0f`));
477	v.y = Max(v.y * Float4 (`1.0f` / `0x1FF`), Float4 (-`1.0f`));
478	v.z = Max(v.z * Float4 (`1.0f` / `0x1FF`), Float4 (-`1.0f`));
479	v.w = Max(v.w, Float4 (-`1.0f`));
480	}
481	}
482	break;
483	case STREAMTYPE_2_10_10_10_UINT:
484	{
485	Int4 src;
486	src = Insert(src, *Pointer<Int>(source0), `0`);
487	src = Insert(src, *Pointer<Int>(source1), `1`);
488	src = Insert(src, *Pointer<Int>(source2), `2`);
489	src = Insert(src, *Pointer<Int>(source3), `3`);
490
491	v.x = Float4 (src & Int4 (`0x3FF`));
492	v.y = Float4 ((src >> `10`) & Int4 (`0x3FF`));
493	v.z = Float4 ((src >> `20`) & Int4 (`0x3FF`));
494	v.w = Float4 ((src >> `30`) & Int4 (`0x3`));
495
496	if(stream.normalized)
497	{
498	v.x *= Float4 (`1.0f` / `0x3FF`);
499	v.y *= Float4 (`1.0f` / `0x3FF`);
500	v.z *= Float4 (`1.0f` / `0x3FF`);
501	v.w *= Float4 (`1.0f` / `0x3`);
502	}
503	}
504	break;
505	default:
506	UNSUPPORTED("stream.type %d", int(stream.type));
507	}
508
509	if(stream.count < `1`) v.x = Float4 (`0.0f`);
510	if(stream.count < `2`) v.y = Float4 (`0.0f`);
511	if(stream.count < `3`) v.z = Float4 (`0.0f`);
512	if(stream.count < `4`) v.w = isNativeFloatAttrib ? As<Float4>(Float4 (`1.0f`)) : As<Float4>(Int4 (`1`));
513
514	return v;
515	}
516
517	void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch)
518	{
519	UInt index0 = batch [`0`];
520	UInt index1 = batch [`1`];
521	UInt index2 = batch [`2`];
522	UInt index3 = batch [`3`];
523
524	UInt cacheIndex0 = index0 & VertexCache::TAG_MASK;
525	UInt cacheIndex1 = index1 & VertexCache::TAG_MASK;
526	UInt cacheIndex2 = index2 & VertexCache::TAG_MASK;
527	UInt cacheIndex3 = index3 & VertexCache::TAG_MASK;
528
529	// We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check.
530	// Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache.
531	tagCache [cacheIndex3] = index3;
532	tagCache [cacheIndex2] = index2;
533	tagCache [cacheIndex1] = index1;
534	tagCache [cacheIndex0] = index0;
535
536	auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
537	assert(it != spirvShader->outputBuiltins.end());
538	assert(it ->second.SizeInComponents == `4`);
539	auto &position = routine.getVariable(it ->second.Id);
540
541	Vector4f pos;
542	pos.x = position [it ->second.FirstComponent + `0`];
543	pos.y = position [it ->second.FirstComponent + `1`];
544	pos.z = position [it ->second.FirstComponent + `2`];
545	pos.w = position [it ->second.FirstComponent + `3`];
546
547	// Projection and viewport transform.
548	Float4 w = As<Float4>(As<Int4>(pos.w) \| (As<Int4>(CmpEQ(pos.w, Float4 (`0.0f`))) & As<Int4>(Float4 (`1.0f`))));
549	Float4 rhw = Float4 (`1.0f`) / w;
550
551	Vector4f proj;
552	proj.x = As<Float4>(RoundInt(Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
553	proj.y = As<Float4>(RoundInt(Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
554	proj.z = pos.z * rhw;
555	proj.w = rhw;
556
557	transpose4x4(pos.x, pos.y, pos.z, pos.w);
558
559	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex3 + OFFSET(Vertex,position), `16`) = pos.w;
560	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex2 + OFFSET(Vertex,position), `16`) = pos.z;
561	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex1 + OFFSET(Vertex,position), `16`) = pos.y;
562	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex0 + OFFSET(Vertex,position), `16`) = pos.x;
563
564	it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
565	if(it != spirvShader->outputBuiltins.end())
566	{
567	assert(it ->second.SizeInComponents == `1`);
568	auto psize = routine.getVariable(it ->second.Id)[it ->second.FirstComponent];
569
570	Pointer<Float>(vertexCache + sizeof(Vertex) cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, `3`);
571	Pointer<Float>(vertexCache + sizeof(Vertex) cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, `2`);
572	Pointer<Float>(vertexCache + sizeof(Vertex) cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, `1`);
573	Pointer<Float>(vertexCache + sizeof(Vertex) cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, `0`);
574	}
575
576	Pointer<Int>(vertexCache + sizeof(Vertex) cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> `24`) & `0x0000000FF`;
577	Pointer<Int>(vertexCache + sizeof(Vertex) cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> `16`) & `0x0000000FF`;
578	Pointer<Int>(vertexCache + sizeof(Vertex) cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> `8`) & `0x0000000FF`;
579	Pointer<Int>(vertexCache + sizeof(Vertex) cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> `0`) & `0x0000000FF`;
580
581	transpose4x4(proj.x, proj.y, proj.z, proj.w);
582
583	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex3 + OFFSET(Vertex,projected), `16`) = proj.w;
584	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex2 + OFFSET(Vertex,projected), `16`) = proj.z;
585	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex1 + OFFSET(Vertex,projected), `16`) = proj.y;
586	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex0 + OFFSET(Vertex,projected), `16`) = proj.x;
587
588	for(int i = `0`; i < MAX_INTERFACE_COMPONENTS; i += `4`)
589	{
590	if(spirvShader->outputs [i + `0`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
591	spirvShader->outputs [i + `1`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
592	spirvShader->outputs [i + `2`].Type != SpirvShader::ATTRIBTYPE_UNUSED \|\|
593	spirvShader->outputs [i + `3`].Type != SpirvShader::ATTRIBTYPE_UNUSED)
594	{
595	Vector4f v;
596	v.x = routine.outputs [i + `0`];
597	v.y = routine.outputs [i + `1`];
598	v.z = routine.outputs [i + `2`];
599	v.w = routine.outputs [i + `3`];
600
601	transpose4x4(v.x, v.y, v.z, v.w);
602
603	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex3 + OFFSET(Vertex,v[i]), `16`) = v.w;
604	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex2 + OFFSET(Vertex,v[i]), `16`) = v.z;
605	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex1 + OFFSET(Vertex,v[i]), `16`) = v.y;
606	Pointer<Float4>(vertexCache + sizeof(Vertex) cacheIndex0 + OFFSET(Vertex,v[i]), `16`) = v.x;
607	}
608	}
609	}
610
611	void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
612	{
613	Pointer<Int4>(vertex + OFFSET(Vertex,position)) = Pointer<Int4>(cacheEntry + OFFSET(Vertex,position));
614	Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
615
616	Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
617	Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
618
619	for(int i = `0`; i < MAX_INTERFACE_COMPONENTS; i++)
620	{
621	if(spirvShader->outputs [i].Type != SpirvShader::ATTRIBTYPE_UNUSED)
622	{
623	Pointer<Int>(vertex + OFFSET(Vertex, v[i]), `4`) = Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), `4`);
624	}
625	}
626	}
627	}
628

Browse the source code of engine/third_party/swiftshader/src/Pipeline/VertexRoutine.cpp