1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "VertexRoutine.hpp"
16
17#include "Constants.hpp"
18#include "SpirvShader.hpp"
19#include "Device/Vertex.hpp"
20#include "Device/Renderer.hpp"
21#include "Vulkan/VkDebug.hpp"
22#include "System/Half.hpp"
23
24namespace sw
25{
26 VertexRoutine::VertexRoutine(
27 const VertexProcessor::State &state,
28 vk::PipelineLayout const *pipelineLayout,
29 SpirvShader const *spirvShader)
30 : routine(pipelineLayout),
31 state(state),
32 spirvShader(spirvShader)
33 {
34 spirvShader->emitProlog(&routine);
35 }
36
37 VertexRoutine::~VertexRoutine()
38 {
39 }
40
41 void VertexRoutine::generate()
42 {
43 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
44 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
45 Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag));
46
47 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
48
49 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
50
51 // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
52 // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
53 // in reverse order to guarantee that the first one doesn't get evicted and can be written out.
54
55 Do
56 {
57 UInt index = *batch;
58 UInt cacheIndex = index & VertexCache::TAG_MASK;
59
60 If(tagCache[cacheIndex] != index)
61 {
62 readInput(batch);
63 program(batch, vertexCount);
64 computeClipFlags();
65
66 writeCache(vertexCache, tagCache, batch);
67 }
68
69 Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
70
71 // For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive
72 for(int i = 0; i < (state.isPoint ? 3 : 1); i++)
73 {
74 writeVertex(vertex, cacheEntry);
75 vertex += sizeof(Vertex);
76 }
77
78 batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t));
79 vertexCount--;
80 }
81 Until(vertexCount == 0)
82
83 Return();
84 }
85
86 void VertexRoutine::readInput(Pointer<UInt> &batch)
87 {
88 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
89 {
90 if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
91 spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
92 spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
93 spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
94 {
95 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4));
96 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
97 Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
98 UInt robustnessSize(0);
99 if(state.robustBufferAccess)
100 {
101 robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4));
102 }
103
104 auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex);
105 routine.inputs[i + 0] = value.x;
106 routine.inputs[i + 1] = value.y;
107 routine.inputs[i + 2] = value.z;
108 routine.inputs[i + 3] = value.w;
109 }
110 }
111 }
112
113 void VertexRoutine::computeClipFlags()
114 {
115 auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
116 assert(it != spirvShader->outputBuiltins.end());
117 assert(it->second.SizeInComponents == 4);
118 auto &pos = routine.getVariable(it->second.Id);
119 auto posX = pos[it->second.FirstComponent + 0];
120 auto posY = pos[it->second.FirstComponent + 1];
121 auto posZ = pos[it->second.FirstComponent + 2];
122 auto posW = pos[it->second.FirstComponent + 3];
123
124 Int4 maxX = CmpLT(posW, posX);
125 Int4 maxY = CmpLT(posW, posY);
126 Int4 maxZ = CmpLT(posW, posZ);
127 Int4 minX = CmpNLE(-posW, posX);
128 Int4 minY = CmpNLE(-posW, posY);
129 Int4 minZ = CmpNLE(Float4(0.0f), posZ);
130
131 clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)];
132 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)];
133 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)];
134 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)];
135 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)];
136 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)];
137
138 Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
139 Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
140 Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
141
142 Int4 finiteXYZ = finiteX & finiteY & finiteZ;
143 clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)];
144 }
145
146 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
147 bool robustBufferAccess, UInt & robustnessSize, Int baseVertex)
148 {
149 Vector4f v;
150 // Because of the following rule in the Vulkan spec, we do not care if a very large negative
151 // baseVertex would overflow all the way back into a valid region of the index buffer:
152 // "Out-of-bounds buffer loads will return any of the following values :
153 // - Values from anywhere within the memory range(s) bound to the buffer (possibly including
154 // bytes of memory past the end of the buffer, up to the end of the bound range)."
155 UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride);
156
157 Pointer<Byte> source0 = buffer + offsets.x;
158 Pointer<Byte> source1 = buffer + offsets.y;
159 Pointer<Byte> source2 = buffer + offsets.z;
160 Pointer<Byte> source3 = buffer + offsets.w;
161
162 UInt4 zero(0);
163 if (robustBufferAccess)
164 {
165 // TODO(b/141124876): Optimize for wide-vector gather operations.
166 UInt4 limits = offsets + UInt4(stream.bytesPerAttrib());
167 Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
168 source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource);
169 source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource);
170 source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource);
171 source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource);
172 }
173
174 bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized;
175
176 switch(stream.type)
177 {
178 case STREAMTYPE_FLOAT:
179 {
180 if(stream.count == 0)
181 {
182 // Null stream, all default components
183 }
184 else
185 {
186 if(stream.count == 1)
187 {
188 v.x.x = *Pointer<Float>(source0);
189 v.x.y = *Pointer<Float>(source1);
190 v.x.z = *Pointer<Float>(source2);
191 v.x.w = *Pointer<Float>(source3);
192 }
193 else
194 {
195 v.x = *Pointer<Float4>(source0);
196 v.y = *Pointer<Float4>(source1);
197 v.z = *Pointer<Float4>(source2);
198 v.w = *Pointer<Float4>(source3);
199
200 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
201 }
202
203 switch(stream.attribType)
204 {
205 case SpirvShader::ATTRIBTYPE_INT:
206 if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
207 if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
208 if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
209 if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
210 break;
211 case SpirvShader::ATTRIBTYPE_UINT:
212 if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
213 if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
214 if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
215 if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
216 break;
217 default:
218 break;
219 }
220 }
221 }
222 break;
223 case STREAMTYPE_BYTE:
224 if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
225 {
226 v.x = Float4(*Pointer<Byte4>(source0));
227 v.y = Float4(*Pointer<Byte4>(source1));
228 v.z = Float4(*Pointer<Byte4>(source2));
229 v.w = Float4(*Pointer<Byte4>(source3));
230
231 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
232
233 if(stream.normalized)
234 {
235 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
236 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
237 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
238 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
239 }
240 }
241 else // Stream: UByte, Shader attrib: Int / UInt
242 {
243 v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
244 v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
245 v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
246 v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
247
248 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
249 }
250 break;
251 case STREAMTYPE_SBYTE:
252 if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
253 {
254 v.x = Float4(*Pointer<SByte4>(source0));
255 v.y = Float4(*Pointer<SByte4>(source1));
256 v.z = Float4(*Pointer<SByte4>(source2));
257 v.w = Float4(*Pointer<SByte4>(source3));
258
259 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
260
261 if(stream.normalized)
262 {
263 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
264 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
265 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
266 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
267 }
268 }
269 else // Stream: SByte, Shader attrib: Int / UInt
270 {
271 v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
272 v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
273 v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
274 v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
275
276 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
277 }
278 break;
279 case STREAMTYPE_COLOR:
280 {
281 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
282 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
283 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
284 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
285
286 transpose4x4(v.x, v.y, v.z, v.w);
287
288 // Swap red and blue
289 Float4 t = v.x;
290 v.x = v.z;
291 v.z = t;
292 }
293 break;
294 case STREAMTYPE_SHORT:
295 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
296 {
297 v.x = Float4(*Pointer<Short4>(source0));
298 v.y = Float4(*Pointer<Short4>(source1));
299 v.z = Float4(*Pointer<Short4>(source2));
300 v.w = Float4(*Pointer<Short4>(source3));
301
302 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
303
304 if(stream.normalized)
305 {
306 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
307 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
308 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
309 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
310 }
311 }
312 else // Stream: Short, Shader attrib: Int/UInt, no type conversion
313 {
314 v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
315 v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
316 v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
317 v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
318
319 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
320 }
321 break;
322 case STREAMTYPE_USHORT:
323 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
324 {
325 v.x = Float4(*Pointer<UShort4>(source0));
326 v.y = Float4(*Pointer<UShort4>(source1));
327 v.z = Float4(*Pointer<UShort4>(source2));
328 v.w = Float4(*Pointer<UShort4>(source3));
329
330 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
331
332 if(stream.normalized)
333 {
334 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
335 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
336 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
337 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
338 }
339 }
340 else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
341 {
342 v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
343 v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
344 v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
345 v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
346
347 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
348 }
349 break;
350 case STREAMTYPE_INT:
351 if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
352 {
353 v.x = Float4(*Pointer<Int4>(source0));
354 v.y = Float4(*Pointer<Int4>(source1));
355 v.z = Float4(*Pointer<Int4>(source2));
356 v.w = Float4(*Pointer<Int4>(source3));
357
358 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
359
360 if(stream.normalized)
361 {
362 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
363 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
364 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
365 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
366 }
367 }
368 else // Stream: Int, Shader attrib: Int/UInt, no type conversion
369 {
370 v.x = *Pointer<Float4>(source0);
371 v.y = *Pointer<Float4>(source1);
372 v.z = *Pointer<Float4>(source2);
373 v.w = *Pointer<Float4>(source3);
374
375 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
376 }
377 break;
378 case STREAMTYPE_UINT:
379 if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
380 {
381 v.x = Float4(*Pointer<UInt4>(source0));
382 v.y = Float4(*Pointer<UInt4>(source1));
383 v.z = Float4(*Pointer<UInt4>(source2));
384 v.w = Float4(*Pointer<UInt4>(source3));
385
386 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
387
388 if(stream.normalized)
389 {
390 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
391 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
392 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
393 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
394 }
395 }
396 else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
397 {
398 v.x = *Pointer<Float4>(source0);
399 v.y = *Pointer<Float4>(source1);
400 v.z = *Pointer<Float4>(source2);
401 v.w = *Pointer<Float4>(source3);
402
403 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
404 }
405 break;
406 case STREAMTYPE_HALF:
407 {
408 if(stream.count >= 1)
409 {
410 UShort x0 = *Pointer<UShort>(source0 + 0);
411 UShort x1 = *Pointer<UShort>(source1 + 0);
412 UShort x2 = *Pointer<UShort>(source2 + 0);
413 UShort x3 = *Pointer<UShort>(source3 + 0);
414
415 v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4);
416 v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4);
417 v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4);
418 v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4);
419 }
420
421 if(stream.count >= 2)
422 {
423 UShort y0 = *Pointer<UShort>(source0 + 2);
424 UShort y1 = *Pointer<UShort>(source1 + 2);
425 UShort y2 = *Pointer<UShort>(source2 + 2);
426 UShort y3 = *Pointer<UShort>(source3 + 2);
427
428 v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4);
429 v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4);
430 v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4);
431 v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4);
432 }
433
434 if(stream.count >= 3)
435 {
436 UShort z0 = *Pointer<UShort>(source0 + 4);
437 UShort z1 = *Pointer<UShort>(source1 + 4);
438 UShort z2 = *Pointer<UShort>(source2 + 4);
439 UShort z3 = *Pointer<UShort>(source3 + 4);
440
441 v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4);
442 v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4);
443 v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4);
444 v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4);
445 }
446
447 if(stream.count >= 4)
448 {
449 UShort w0 = *Pointer<UShort>(source0 + 6);
450 UShort w1 = *Pointer<UShort>(source1 + 6);
451 UShort w2 = *Pointer<UShort>(source2 + 6);
452 UShort w3 = *Pointer<UShort>(source3 + 6);
453
454 v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4);
455 v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4);
456 v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4);
457 v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4);
458 }
459 }
460 break;
461 case STREAMTYPE_2_10_10_10_INT:
462 {
463 Int4 src;
464 src = Insert(src, *Pointer<Int>(source0), 0);
465 src = Insert(src, *Pointer<Int>(source1), 1);
466 src = Insert(src, *Pointer<Int>(source2), 2);
467 src = Insert(src, *Pointer<Int>(source3), 3);
468
469 v.x = Float4((src << 22) >> 22);
470 v.y = Float4((src << 12) >> 22);
471 v.z = Float4((src << 02) >> 22);
472 v.w = Float4(src >> 30);
473
474 if(stream.normalized)
475 {
476 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
477 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
478 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
479 v.w = Max(v.w, Float4(-1.0f));
480 }
481 }
482 break;
483 case STREAMTYPE_2_10_10_10_UINT:
484 {
485 Int4 src;
486 src = Insert(src, *Pointer<Int>(source0), 0);
487 src = Insert(src, *Pointer<Int>(source1), 1);
488 src = Insert(src, *Pointer<Int>(source2), 2);
489 src = Insert(src, *Pointer<Int>(source3), 3);
490
491 v.x = Float4(src & Int4(0x3FF));
492 v.y = Float4((src >> 10) & Int4(0x3FF));
493 v.z = Float4((src >> 20) & Int4(0x3FF));
494 v.w = Float4((src >> 30) & Int4(0x3));
495
496 if(stream.normalized)
497 {
498 v.x *= Float4(1.0f / 0x3FF);
499 v.y *= Float4(1.0f / 0x3FF);
500 v.z *= Float4(1.0f / 0x3FF);
501 v.w *= Float4(1.0f / 0x3);
502 }
503 }
504 break;
505 default:
506 UNSUPPORTED("stream.type %d", int(stream.type));
507 }
508
509 if(stream.count < 1) v.x = Float4(0.0f);
510 if(stream.count < 2) v.y = Float4(0.0f);
511 if(stream.count < 3) v.z = Float4(0.0f);
512 if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1));
513
514 return v;
515 }
516
517 void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch)
518 {
519 UInt index0 = batch[0];
520 UInt index1 = batch[1];
521 UInt index2 = batch[2];
522 UInt index3 = batch[3];
523
524 UInt cacheIndex0 = index0 & VertexCache::TAG_MASK;
525 UInt cacheIndex1 = index1 & VertexCache::TAG_MASK;
526 UInt cacheIndex2 = index2 & VertexCache::TAG_MASK;
527 UInt cacheIndex3 = index3 & VertexCache::TAG_MASK;
528
529 // We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check.
530 // Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache.
531 tagCache[cacheIndex3] = index3;
532 tagCache[cacheIndex2] = index2;
533 tagCache[cacheIndex1] = index1;
534 tagCache[cacheIndex0] = index0;
535
536 auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
537 assert(it != spirvShader->outputBuiltins.end());
538 assert(it->second.SizeInComponents == 4);
539 auto &position = routine.getVariable(it->second.Id);
540
541 Vector4f pos;
542 pos.x = position[it->second.FirstComponent + 0];
543 pos.y = position[it->second.FirstComponent + 1];
544 pos.z = position[it->second.FirstComponent + 2];
545 pos.w = position[it->second.FirstComponent + 3];
546
547 // Projection and viewport transform.
548 Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
549 Float4 rhw = Float4(1.0f) / w;
550
551 Vector4f proj;
552 proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF))));
553 proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF))));
554 proj.z = pos.z * rhw;
555 proj.w = rhw;
556
557 transpose4x4(pos.x, pos.y, pos.z, pos.w);
558
559 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w;
560 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z;
561 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y;
562 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x;
563
564 it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
565 if(it != spirvShader->outputBuiltins.end())
566 {
567 assert(it->second.SizeInComponents == 1);
568 auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
569
570 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3);
571 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2);
572 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1);
573 *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0);
574 }
575
576 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF;
577 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF;
578 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8) & 0x0000000FF;
579 *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0) & 0x0000000FF;
580
581 transpose4x4(proj.x, proj.y, proj.z, proj.w);
582
583 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w;
584 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z;
585 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y;
586 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x;
587
588 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
589 {
590 if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
591 spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
592 spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED ||
593 spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED)
594 {
595 Vector4f v;
596 v.x = routine.outputs[i + 0];
597 v.y = routine.outputs[i + 1];
598 v.z = routine.outputs[i + 2];
599 v.w = routine.outputs[i + 3];
600
601 transpose4x4(v.x, v.y, v.z, v.w);
602
603 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w;
604 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z;
605 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y;
606 *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x;
607 }
608 }
609 }
610
611 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
612 {
613 *Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position));
614 *Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize));
615
616 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags));
617 *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected));
618
619 for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
620 {
621 if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED)
622 {
623 *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
624 }
625 }
626 }
627}
628