1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "VertexRoutine.hpp" |
16 | |
17 | #include "Constants.hpp" |
18 | #include "SpirvShader.hpp" |
19 | #include "Device/Vertex.hpp" |
20 | #include "Device/Renderer.hpp" |
21 | #include "Vulkan/VkDebug.hpp" |
22 | #include "System/Half.hpp" |
23 | |
24 | namespace sw |
25 | { |
26 | VertexRoutine::VertexRoutine( |
27 | const VertexProcessor::State &state, |
28 | vk::PipelineLayout const *pipelineLayout, |
29 | SpirvShader const *spirvShader) |
30 | : routine(pipelineLayout), |
31 | state(state), |
32 | spirvShader(spirvShader) |
33 | { |
34 | spirvShader->emitProlog(&routine); |
35 | } |
36 | |
37 | VertexRoutine::~VertexRoutine() |
38 | { |
39 | } |
40 | |
41 | void VertexRoutine::generate() |
42 | { |
43 | Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); |
44 | Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); |
45 | Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag)); |
46 | |
47 | UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); |
48 | |
49 | constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); |
50 | |
51 | // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer. |
52 | // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache |
53 | // in reverse order to guarantee that the first one doesn't get evicted and can be written out. |
54 | |
55 | Do |
56 | { |
57 | UInt index = *batch; |
58 | UInt cacheIndex = index & VertexCache::TAG_MASK; |
59 | |
60 | If(tagCache[cacheIndex] != index) |
61 | { |
62 | readInput(batch); |
63 | program(batch, vertexCount); |
64 | computeClipFlags(); |
65 | |
66 | writeCache(vertexCache, tagCache, batch); |
67 | } |
68 | |
69 | Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); |
70 | |
71 | // For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive |
72 | for(int i = 0; i < (state.isPoint ? 3 : 1); i++) |
73 | { |
74 | writeVertex(vertex, cacheEntry); |
75 | vertex += sizeof(Vertex); |
76 | } |
77 | |
78 | batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t)); |
79 | vertexCount--; |
80 | } |
81 | Until(vertexCount == 0) |
82 | |
83 | Return(); |
84 | } |
85 | |
86 | void VertexRoutine::readInput(Pointer<UInt> &batch) |
87 | { |
88 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) |
89 | { |
90 | if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
91 | spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
92 | spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
93 | spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
94 | { |
95 | Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4)); |
96 | UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4)); |
97 | Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex)); |
98 | UInt robustnessSize(0); |
99 | if(state.robustBufferAccess) |
100 | { |
101 | robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4)); |
102 | } |
103 | |
104 | auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex); |
105 | routine.inputs[i + 0] = value.x; |
106 | routine.inputs[i + 1] = value.y; |
107 | routine.inputs[i + 2] = value.z; |
108 | routine.inputs[i + 3] = value.w; |
109 | } |
110 | } |
111 | } |
112 | |
113 | void VertexRoutine::computeClipFlags() |
114 | { |
115 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); |
116 | assert(it != spirvShader->outputBuiltins.end()); |
117 | assert(it->second.SizeInComponents == 4); |
118 | auto &pos = routine.getVariable(it->second.Id); |
119 | auto posX = pos[it->second.FirstComponent + 0]; |
120 | auto posY = pos[it->second.FirstComponent + 1]; |
121 | auto posZ = pos[it->second.FirstComponent + 2]; |
122 | auto posW = pos[it->second.FirstComponent + 3]; |
123 | |
124 | Int4 maxX = CmpLT(posW, posX); |
125 | Int4 maxY = CmpLT(posW, posY); |
126 | Int4 maxZ = CmpLT(posW, posZ); |
127 | Int4 minX = CmpNLE(-posW, posX); |
128 | Int4 minY = CmpNLE(-posW, posY); |
129 | Int4 minZ = CmpNLE(Float4(0.0f), posZ); |
130 | |
131 | clipFlags = Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)]; |
132 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)]; |
133 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)]; |
134 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)]; |
135 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)]; |
136 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)]; |
137 | |
138 | Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
139 | Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
140 | Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
141 | |
142 | Int4 finiteXYZ = finiteX & finiteY & finiteZ; |
143 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)]; |
144 | } |
145 | |
146 | Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, |
147 | bool robustBufferAccess, UInt & robustnessSize, Int baseVertex) |
148 | { |
149 | Vector4f v; |
150 | // Because of the following rule in the Vulkan spec, we do not care if a very large negative |
151 | // baseVertex would overflow all the way back into a valid region of the index buffer: |
152 | // "Out-of-bounds buffer loads will return any of the following values :
|
153 | // - Values from anywhere within the memory range(s) bound to the buffer (possibly including |
154 | // bytes of memory past the end of the buffer, up to the end of the bound range)." |
155 | UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride); |
156 | |
157 | Pointer<Byte> source0 = buffer + offsets.x;
|
158 | Pointer<Byte> source1 = buffer + offsets.y;
|
159 | Pointer<Byte> source2 = buffer + offsets.z;
|
160 | Pointer<Byte> source3 = buffer + offsets.w; |
161 | |
162 | UInt4 zero(0); |
163 | if (robustBufferAccess) |
164 | { |
165 | // TODO(b/141124876): Optimize for wide-vector gather operations. |
166 | UInt4 limits = offsets + UInt4(stream.bytesPerAttrib()); |
167 | Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero); |
168 | source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource); |
169 | source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource); |
170 | source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource); |
171 | source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource); |
172 | } |
173 | |
174 | bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized; |
175 | |
176 | switch(stream.type) |
177 | { |
178 | case STREAMTYPE_FLOAT: |
179 | { |
180 | if(stream.count == 0) |
181 | { |
182 | // Null stream, all default components |
183 | } |
184 | else |
185 | { |
186 | if(stream.count == 1) |
187 | { |
188 | v.x.x = *Pointer<Float>(source0); |
189 | v.x.y = *Pointer<Float>(source1); |
190 | v.x.z = *Pointer<Float>(source2); |
191 | v.x.w = *Pointer<Float>(source3); |
192 | } |
193 | else |
194 | { |
195 | v.x = *Pointer<Float4>(source0); |
196 | v.y = *Pointer<Float4>(source1); |
197 | v.z = *Pointer<Float4>(source2); |
198 | v.w = *Pointer<Float4>(source3); |
199 | |
200 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
201 | } |
202 | |
203 | switch(stream.attribType) |
204 | { |
205 | case SpirvShader::ATTRIBTYPE_INT: |
206 | if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); |
207 | if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); |
208 | if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); |
209 | if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); |
210 | break; |
211 | case SpirvShader::ATTRIBTYPE_UINT: |
212 | if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); |
213 | if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); |
214 | if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); |
215 | if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); |
216 | break; |
217 | default: |
218 | break; |
219 | } |
220 | } |
221 | } |
222 | break; |
223 | case STREAMTYPE_BYTE: |
224 | if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float |
225 | { |
226 | v.x = Float4(*Pointer<Byte4>(source0)); |
227 | v.y = Float4(*Pointer<Byte4>(source1)); |
228 | v.z = Float4(*Pointer<Byte4>(source2)); |
229 | v.w = Float4(*Pointer<Byte4>(source3)); |
230 | |
231 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
232 | |
233 | if(stream.normalized) |
234 | { |
235 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
236 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
237 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
238 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
239 | } |
240 | } |
241 | else // Stream: UByte, Shader attrib: Int / UInt |
242 | { |
243 | v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); |
244 | v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); |
245 | v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); |
246 | v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); |
247 | |
248 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
249 | } |
250 | break; |
251 | case STREAMTYPE_SBYTE: |
252 | if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float |
253 | { |
254 | v.x = Float4(*Pointer<SByte4>(source0)); |
255 | v.y = Float4(*Pointer<SByte4>(source1)); |
256 | v.z = Float4(*Pointer<SByte4>(source2)); |
257 | v.w = Float4(*Pointer<SByte4>(source3)); |
258 | |
259 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
260 | |
261 | if(stream.normalized) |
262 | { |
263 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
264 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
265 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
266 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
267 | } |
268 | } |
269 | else // Stream: SByte, Shader attrib: Int / UInt |
270 | { |
271 | v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); |
272 | v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); |
273 | v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); |
274 | v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); |
275 | |
276 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
277 | } |
278 | break; |
279 | case STREAMTYPE_COLOR: |
280 | { |
281 | v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
282 | v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
283 | v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
284 | v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
285 | |
286 | transpose4x4(v.x, v.y, v.z, v.w); |
287 | |
288 | // Swap red and blue |
289 | Float4 t = v.x; |
290 | v.x = v.z; |
291 | v.z = t; |
292 | } |
293 | break; |
294 | case STREAMTYPE_SHORT: |
295 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
296 | { |
297 | v.x = Float4(*Pointer<Short4>(source0)); |
298 | v.y = Float4(*Pointer<Short4>(source1)); |
299 | v.z = Float4(*Pointer<Short4>(source2)); |
300 | v.w = Float4(*Pointer<Short4>(source3)); |
301 | |
302 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
303 | |
304 | if(stream.normalized) |
305 | { |
306 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
307 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
308 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
309 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
310 | } |
311 | } |
312 | else // Stream: Short, Shader attrib: Int/UInt, no type conversion |
313 | { |
314 | v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); |
315 | v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); |
316 | v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); |
317 | v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); |
318 | |
319 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
320 | } |
321 | break; |
322 | case STREAMTYPE_USHORT: |
323 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
324 | { |
325 | v.x = Float4(*Pointer<UShort4>(source0)); |
326 | v.y = Float4(*Pointer<UShort4>(source1)); |
327 | v.z = Float4(*Pointer<UShort4>(source2)); |
328 | v.w = Float4(*Pointer<UShort4>(source3)); |
329 | |
330 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
331 | |
332 | if(stream.normalized) |
333 | { |
334 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
335 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
336 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
337 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
338 | } |
339 | } |
340 | else // Stream: UShort, Shader attrib: Int/UInt, no type conversion |
341 | { |
342 | v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); |
343 | v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); |
344 | v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); |
345 | v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); |
346 | |
347 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
348 | } |
349 | break; |
350 | case STREAMTYPE_INT: |
351 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
352 | { |
353 | v.x = Float4(*Pointer<Int4>(source0)); |
354 | v.y = Float4(*Pointer<Int4>(source1)); |
355 | v.z = Float4(*Pointer<Int4>(source2)); |
356 | v.w = Float4(*Pointer<Int4>(source3)); |
357 | |
358 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
359 | |
360 | if(stream.normalized) |
361 | { |
362 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
363 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
364 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
365 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
366 | } |
367 | } |
368 | else // Stream: Int, Shader attrib: Int/UInt, no type conversion |
369 | { |
370 | v.x = *Pointer<Float4>(source0); |
371 | v.y = *Pointer<Float4>(source1); |
372 | v.z = *Pointer<Float4>(source2); |
373 | v.w = *Pointer<Float4>(source3); |
374 | |
375 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
376 | } |
377 | break; |
378 | case STREAMTYPE_UINT: |
379 | if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float |
380 | { |
381 | v.x = Float4(*Pointer<UInt4>(source0)); |
382 | v.y = Float4(*Pointer<UInt4>(source1)); |
383 | v.z = Float4(*Pointer<UInt4>(source2)); |
384 | v.w = Float4(*Pointer<UInt4>(source3)); |
385 | |
386 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
387 | |
388 | if(stream.normalized) |
389 | { |
390 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
391 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
392 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
393 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
394 | } |
395 | } |
396 | else // Stream: UInt, Shader attrib: Int/UInt, no type conversion |
397 | { |
398 | v.x = *Pointer<Float4>(source0); |
399 | v.y = *Pointer<Float4>(source1); |
400 | v.z = *Pointer<Float4>(source2); |
401 | v.w = *Pointer<Float4>(source3); |
402 | |
403 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
404 | } |
405 | break; |
406 | case STREAMTYPE_HALF: |
407 | { |
408 | if(stream.count >= 1) |
409 | { |
410 | UShort x0 = *Pointer<UShort>(source0 + 0); |
411 | UShort x1 = *Pointer<UShort>(source1 + 0); |
412 | UShort x2 = *Pointer<UShort>(source2 + 0); |
413 | UShort x3 = *Pointer<UShort>(source3 + 0); |
414 | |
415 | v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); |
416 | v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); |
417 | v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); |
418 | v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); |
419 | } |
420 | |
421 | if(stream.count >= 2) |
422 | { |
423 | UShort y0 = *Pointer<UShort>(source0 + 2); |
424 | UShort y1 = *Pointer<UShort>(source1 + 2); |
425 | UShort y2 = *Pointer<UShort>(source2 + 2); |
426 | UShort y3 = *Pointer<UShort>(source3 + 2); |
427 | |
428 | v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); |
429 | v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); |
430 | v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); |
431 | v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); |
432 | } |
433 | |
434 | if(stream.count >= 3) |
435 | { |
436 | UShort z0 = *Pointer<UShort>(source0 + 4); |
437 | UShort z1 = *Pointer<UShort>(source1 + 4); |
438 | UShort z2 = *Pointer<UShort>(source2 + 4); |
439 | UShort z3 = *Pointer<UShort>(source3 + 4); |
440 | |
441 | v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); |
442 | v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); |
443 | v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); |
444 | v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); |
445 | } |
446 | |
447 | if(stream.count >= 4) |
448 | { |
449 | UShort w0 = *Pointer<UShort>(source0 + 6); |
450 | UShort w1 = *Pointer<UShort>(source1 + 6); |
451 | UShort w2 = *Pointer<UShort>(source2 + 6); |
452 | UShort w3 = *Pointer<UShort>(source3 + 6); |
453 | |
454 | v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); |
455 | v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); |
456 | v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); |
457 | v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); |
458 | } |
459 | } |
460 | break; |
461 | case STREAMTYPE_2_10_10_10_INT: |
462 | { |
463 | Int4 src; |
464 | src = Insert(src, *Pointer<Int>(source0), 0); |
465 | src = Insert(src, *Pointer<Int>(source1), 1); |
466 | src = Insert(src, *Pointer<Int>(source2), 2); |
467 | src = Insert(src, *Pointer<Int>(source3), 3); |
468 | |
469 | v.x = Float4((src << 22) >> 22); |
470 | v.y = Float4((src << 12) >> 22); |
471 | v.z = Float4((src << 02) >> 22); |
472 | v.w = Float4(src >> 30); |
473 | |
474 | if(stream.normalized) |
475 | { |
476 | v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
477 | v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
478 | v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
479 | v.w = Max(v.w, Float4(-1.0f)); |
480 | } |
481 | } |
482 | break; |
483 | case STREAMTYPE_2_10_10_10_UINT: |
484 | { |
485 | Int4 src; |
486 | src = Insert(src, *Pointer<Int>(source0), 0); |
487 | src = Insert(src, *Pointer<Int>(source1), 1); |
488 | src = Insert(src, *Pointer<Int>(source2), 2); |
489 | src = Insert(src, *Pointer<Int>(source3), 3); |
490 | |
491 | v.x = Float4(src & Int4(0x3FF)); |
492 | v.y = Float4((src >> 10) & Int4(0x3FF)); |
493 | v.z = Float4((src >> 20) & Int4(0x3FF)); |
494 | v.w = Float4((src >> 30) & Int4(0x3)); |
495 | |
496 | if(stream.normalized) |
497 | { |
498 | v.x *= Float4(1.0f / 0x3FF); |
499 | v.y *= Float4(1.0f / 0x3FF); |
500 | v.z *= Float4(1.0f / 0x3FF); |
501 | v.w *= Float4(1.0f / 0x3); |
502 | } |
503 | } |
504 | break; |
505 | default: |
506 | UNSUPPORTED("stream.type %d" , int(stream.type)); |
507 | } |
508 | |
509 | if(stream.count < 1) v.x = Float4(0.0f); |
510 | if(stream.count < 2) v.y = Float4(0.0f); |
511 | if(stream.count < 3) v.z = Float4(0.0f); |
512 | if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1)); |
513 | |
514 | return v; |
515 | } |
516 | |
517 | void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch) |
518 | { |
519 | UInt index0 = batch[0]; |
520 | UInt index1 = batch[1]; |
521 | UInt index2 = batch[2]; |
522 | UInt index3 = batch[3]; |
523 | |
524 | UInt cacheIndex0 = index0 & VertexCache::TAG_MASK; |
525 | UInt cacheIndex1 = index1 & VertexCache::TAG_MASK; |
526 | UInt cacheIndex2 = index2 & VertexCache::TAG_MASK; |
527 | UInt cacheIndex3 = index3 & VertexCache::TAG_MASK; |
528 | |
529 | // We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check. |
530 | // Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache. |
531 | tagCache[cacheIndex3] = index3; |
532 | tagCache[cacheIndex2] = index2; |
533 | tagCache[cacheIndex1] = index1; |
534 | tagCache[cacheIndex0] = index0; |
535 | |
536 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); |
537 | assert(it != spirvShader->outputBuiltins.end()); |
538 | assert(it->second.SizeInComponents == 4); |
539 | auto &position = routine.getVariable(it->second.Id); |
540 | |
541 | Vector4f pos; |
542 | pos.x = position[it->second.FirstComponent + 0]; |
543 | pos.y = position[it->second.FirstComponent + 1]; |
544 | pos.z = position[it->second.FirstComponent + 2]; |
545 | pos.w = position[it->second.FirstComponent + 3]; |
546 | |
547 | // Projection and viewport transform. |
548 | Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); |
549 | Float4 rhw = Float4(1.0f) / w; |
550 | |
551 | Vector4f proj; |
552 | proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF)))); |
553 | proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF)))); |
554 | proj.z = pos.z * rhw; |
555 | proj.w = rhw; |
556 | |
557 | transpose4x4(pos.x, pos.y, pos.z, pos.w); |
558 | |
559 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w; |
560 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z; |
561 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y; |
562 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x; |
563 | |
564 | it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize); |
565 | if(it != spirvShader->outputBuiltins.end()) |
566 | { |
567 | assert(it->second.SizeInComponents == 1); |
568 | auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent]; |
569 | |
570 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3); |
571 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2); |
572 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1); |
573 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0); |
574 | } |
575 | |
576 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF; |
577 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF; |
578 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8) & 0x0000000FF; |
579 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0) & 0x0000000FF; |
580 | |
581 | transpose4x4(proj.x, proj.y, proj.z, proj.w); |
582 | |
583 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w; |
584 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z; |
585 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y; |
586 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x; |
587 | |
588 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) |
589 | { |
590 | if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
591 | spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
592 | spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || |
593 | spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
594 | { |
595 | Vector4f v; |
596 | v.x = routine.outputs[i + 0]; |
597 | v.y = routine.outputs[i + 1]; |
598 | v.z = routine.outputs[i + 2]; |
599 | v.w = routine.outputs[i + 3]; |
600 | |
601 | transpose4x4(v.x, v.y, v.z, v.w); |
602 | |
603 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w; |
604 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z; |
605 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y; |
606 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x; |
607 | } |
608 | } |
609 | } |
610 | |
611 | void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry) |
612 | { |
613 | *Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position)); |
614 | *Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize)); |
615 | |
616 | *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags)); |
617 | *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected)); |
618 | |
619 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) |
620 | { |
621 | if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED) |
622 | { |
623 | *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4); |
624 | } |
625 | } |
626 | } |
627 | } |
628 | |