1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "VertexRoutine.hpp" |
16 | |
17 | #include "VertexShader.hpp" |
18 | #include "Constants.hpp" |
19 | #include "Renderer/Vertex.hpp" |
20 | #include "Renderer/Renderer.hpp" |
21 | #include "Common/Half.hpp" |
22 | #include "Common/Debug.hpp" |
23 | |
24 | namespace sw |
25 | { |
26 | extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates |
27 | extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] |
28 | |
29 | VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) |
30 | : v(shader && shader->indirectAddressableInput), |
31 | o(shader && shader->indirectAddressableOutput), |
32 | state(state) |
33 | { |
34 | } |
35 | |
36 | VertexRoutine::~VertexRoutine() |
37 | { |
38 | } |
39 | |
40 | void VertexRoutine::generate() |
41 | { |
42 | const bool textureSampling = state.textureSampling; |
43 | |
44 | Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); |
45 | Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); |
46 | Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag); |
47 | |
48 | UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); |
49 | UInt primitiveNumber = *Pointer<UInt>(task + OFFSET(VertexTask, primitiveStart)); |
50 | UInt indexInPrimitive = 0; |
51 | |
52 | constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); |
53 | |
54 | Do |
55 | { |
56 | UInt index = *Pointer<UInt>(batch); |
57 | UInt tagIndex = index & 0x0000003C; |
58 | UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance. |
59 | |
60 | If(*Pointer<UInt>(tagCache + tagIndex) != indexQ) |
61 | { |
62 | *Pointer<UInt>(tagCache + tagIndex) = indexQ; |
63 | |
64 | readInput(indexQ); |
65 | pipeline(indexQ); |
66 | postTransform(); |
67 | computeClipFlags(); |
68 | |
69 | Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex)); |
70 | writeCache(cacheLine0); |
71 | } |
72 | |
73 | UInt cacheIndex = index & 0x0000003F; |
74 | Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); |
75 | writeVertex(vertex, cacheLine); |
76 | |
77 | if(state.transformFeedbackEnabled != 0) |
78 | { |
79 | transformFeedback(vertex, primitiveNumber, indexInPrimitive); |
80 | |
81 | indexInPrimitive++; |
82 | If(indexInPrimitive == 3) |
83 | { |
84 | primitiveNumber++; |
85 | indexInPrimitive = 0; |
86 | } |
87 | } |
88 | |
89 | vertex += sizeof(Vertex); |
90 | batch += sizeof(unsigned int); |
91 | vertexCount--; |
92 | } |
93 | Until(vertexCount == 0) |
94 | |
95 | Return(); |
96 | } |
97 | |
98 | void VertexRoutine::readInput(UInt &index) |
99 | { |
100 | for(int i = 0; i < MAX_VERTEX_INPUTS; i++) |
101 | { |
102 | Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i); |
103 | UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); |
104 | |
105 | v[i] = readStream(input, stride, state.input[i], index); |
106 | } |
107 | } |
108 | |
109 | void VertexRoutine::computeClipFlags() |
110 | { |
111 | int pos = state.positionRegister; |
112 | |
113 | Int4 maxX = CmpLT(o[pos].w, o[pos].x); |
114 | Int4 maxY = CmpLT(o[pos].w, o[pos].y); |
115 | Int4 maxZ = CmpLT(o[pos].w, o[pos].z); |
116 | Int4 minX = CmpNLE(-o[pos].w, o[pos].x); |
117 | Int4 minY = CmpNLE(-o[pos].w, o[pos].y); |
118 | Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z); |
119 | |
120 | clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing |
121 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); |
122 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4); |
123 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4); |
124 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); |
125 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); |
126 | |
127 | Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
128 | Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
129 | Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); |
130 | |
131 | Int4 finiteXYZ = finiteX & finiteY & finiteZ; |
132 | clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); |
133 | |
134 | if(state.preTransformed) |
135 | { |
136 | clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane |
137 | } |
138 | } |
139 | |
140 | Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index) |
141 | { |
142 | const bool textureSampling = state.textureSampling; |
143 | |
144 | Vector4f v; |
145 | |
146 | Pointer<Byte> source0 = buffer + index * stride; |
147 | Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0); |
148 | Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0); |
149 | Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0); |
150 | |
151 | bool isNativeFloatAttrib = (stream.attribType == VertexShader::ATTRIBTYPE_FLOAT) || stream.normalized; |
152 | |
153 | switch(stream.type) |
154 | { |
155 | case STREAMTYPE_FLOAT: |
156 | { |
157 | if(stream.count == 0) |
158 | { |
159 | // Null stream, all default components |
160 | } |
161 | else |
162 | { |
163 | if(stream.count == 1) |
164 | { |
165 | v.x.x = *Pointer<Float>(source0); |
166 | v.x.y = *Pointer<Float>(source1); |
167 | v.x.z = *Pointer<Float>(source2); |
168 | v.x.w = *Pointer<Float>(source3); |
169 | } |
170 | else |
171 | { |
172 | v.x = *Pointer<Float4>(source0); |
173 | v.y = *Pointer<Float4>(source1); |
174 | v.z = *Pointer<Float4>(source2); |
175 | v.w = *Pointer<Float4>(source3); |
176 | |
177 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
178 | } |
179 | |
180 | switch(stream.attribType) |
181 | { |
182 | case VertexShader::ATTRIBTYPE_INT: |
183 | if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); |
184 | if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); |
185 | if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); |
186 | if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); |
187 | break; |
188 | case VertexShader::ATTRIBTYPE_UINT: |
189 | if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); |
190 | if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); |
191 | if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); |
192 | if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); |
193 | break; |
194 | default: |
195 | break; |
196 | } |
197 | } |
198 | } |
199 | break; |
200 | case STREAMTYPE_BYTE: |
201 | if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float |
202 | { |
203 | v.x = Float4(*Pointer<Byte4>(source0)); |
204 | v.y = Float4(*Pointer<Byte4>(source1)); |
205 | v.z = Float4(*Pointer<Byte4>(source2)); |
206 | v.w = Float4(*Pointer<Byte4>(source3)); |
207 | |
208 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
209 | |
210 | if(stream.normalized) |
211 | { |
212 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
213 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
214 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
215 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
216 | } |
217 | } |
218 | else // Stream: UByte, Shader attrib: Int / UInt |
219 | { |
220 | v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); |
221 | v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); |
222 | v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); |
223 | v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); |
224 | |
225 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
226 | } |
227 | break; |
228 | case STREAMTYPE_SBYTE: |
229 | if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float |
230 | { |
231 | v.x = Float4(*Pointer<SByte4>(source0)); |
232 | v.y = Float4(*Pointer<SByte4>(source1)); |
233 | v.z = Float4(*Pointer<SByte4>(source2)); |
234 | v.w = Float4(*Pointer<SByte4>(source3)); |
235 | |
236 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
237 | |
238 | if(stream.normalized) |
239 | { |
240 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
241 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
242 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
243 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); |
244 | } |
245 | } |
246 | else // Stream: SByte, Shader attrib: Int / UInt |
247 | { |
248 | v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); |
249 | v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); |
250 | v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); |
251 | v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); |
252 | |
253 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
254 | } |
255 | break; |
256 | case STREAMTYPE_COLOR: |
257 | { |
258 | v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
259 | v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
260 | v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
261 | v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); |
262 | |
263 | transpose4x4(v.x, v.y, v.z, v.w); |
264 | |
265 | // Swap red and blue |
266 | Float4 t = v.x; |
267 | v.x = v.z; |
268 | v.z = t; |
269 | } |
270 | break; |
271 | case STREAMTYPE_SHORT: |
272 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
273 | { |
274 | v.x = Float4(*Pointer<Short4>(source0)); |
275 | v.y = Float4(*Pointer<Short4>(source1)); |
276 | v.z = Float4(*Pointer<Short4>(source2)); |
277 | v.w = Float4(*Pointer<Short4>(source3)); |
278 | |
279 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
280 | |
281 | if(stream.normalized) |
282 | { |
283 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
284 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
285 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
286 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); |
287 | } |
288 | } |
289 | else // Stream: Short, Shader attrib: Int/UInt, no type conversion |
290 | { |
291 | v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); |
292 | v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); |
293 | v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); |
294 | v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); |
295 | |
296 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
297 | } |
298 | break; |
299 | case STREAMTYPE_USHORT: |
300 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
301 | { |
302 | v.x = Float4(*Pointer<UShort4>(source0)); |
303 | v.y = Float4(*Pointer<UShort4>(source1)); |
304 | v.z = Float4(*Pointer<UShort4>(source2)); |
305 | v.w = Float4(*Pointer<UShort4>(source3)); |
306 | |
307 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
308 | |
309 | if(stream.normalized) |
310 | { |
311 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
312 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
313 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
314 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); |
315 | } |
316 | } |
317 | else // Stream: UShort, Shader attrib: Int/UInt, no type conversion |
318 | { |
319 | v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); |
320 | v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); |
321 | v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); |
322 | v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); |
323 | |
324 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
325 | } |
326 | break; |
327 | case STREAMTYPE_INT: |
328 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float |
329 | { |
330 | v.x = Float4(*Pointer<Int4>(source0)); |
331 | v.y = Float4(*Pointer<Int4>(source1)); |
332 | v.z = Float4(*Pointer<Int4>(source2)); |
333 | v.w = Float4(*Pointer<Int4>(source3)); |
334 | |
335 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
336 | |
337 | if(stream.normalized) |
338 | { |
339 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
340 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
341 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
342 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); |
343 | } |
344 | } |
345 | else // Stream: Int, Shader attrib: Int/UInt, no type conversion |
346 | { |
347 | v.x = *Pointer<Float4>(source0); |
348 | v.y = *Pointer<Float4>(source1); |
349 | v.z = *Pointer<Float4>(source2); |
350 | v.w = *Pointer<Float4>(source3); |
351 | |
352 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
353 | } |
354 | break; |
355 | case STREAMTYPE_UINT: |
356 | if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float |
357 | { |
358 | v.x = Float4(*Pointer<UInt4>(source0)); |
359 | v.y = Float4(*Pointer<UInt4>(source1)); |
360 | v.z = Float4(*Pointer<UInt4>(source2)); |
361 | v.w = Float4(*Pointer<UInt4>(source3)); |
362 | |
363 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
364 | |
365 | if(stream.normalized) |
366 | { |
367 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
368 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
369 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
370 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); |
371 | } |
372 | } |
373 | else // Stream: UInt, Shader attrib: Int/UInt, no type conversion |
374 | { |
375 | v.x = *Pointer<Float4>(source0); |
376 | v.y = *Pointer<Float4>(source1); |
377 | v.z = *Pointer<Float4>(source2); |
378 | v.w = *Pointer<Float4>(source3); |
379 | |
380 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
381 | } |
382 | break; |
383 | case STREAMTYPE_UDEC3: |
384 | { |
385 | // FIXME: Vectorize |
386 | { |
387 | Int x, y, z; |
388 | |
389 | x = y = z = *Pointer<Int>(source0); |
390 | |
391 | v.x.x = Float(x & 0x000003FF); |
392 | v.x.y = Float(y & 0x000FFC00); |
393 | v.x.z = Float(z & 0x3FF00000); |
394 | } |
395 | |
396 | { |
397 | Int x, y, z; |
398 | |
399 | x = y = z = *Pointer<Int>(source1); |
400 | |
401 | v.y.x = Float(x & 0x000003FF); |
402 | v.y.y = Float(y & 0x000FFC00); |
403 | v.y.z = Float(z & 0x3FF00000); |
404 | } |
405 | |
406 | { |
407 | Int x, y, z; |
408 | |
409 | x = y = z = *Pointer<Int>(source2); |
410 | |
411 | v.z.x = Float(x & 0x000003FF); |
412 | v.z.y = Float(y & 0x000FFC00); |
413 | v.z.z = Float(z & 0x3FF00000); |
414 | } |
415 | |
416 | { |
417 | Int x, y, z; |
418 | |
419 | x = y = z = *Pointer<Int>(source3); |
420 | |
421 | v.w.x = Float(x & 0x000003FF); |
422 | v.w.y = Float(y & 0x000FFC00); |
423 | v.w.z = Float(z & 0x3FF00000); |
424 | } |
425 | |
426 | transpose4x3(v.x, v.y, v.z, v.w); |
427 | |
428 | v.y *= Float4(1.0f / 0x00000400); |
429 | v.z *= Float4(1.0f / 0x00100000); |
430 | } |
431 | break; |
432 | case STREAMTYPE_DEC3N: |
433 | { |
434 | // FIXME: Vectorize |
435 | { |
436 | Int x, y, z; |
437 | |
438 | x = y = z = *Pointer<Int>(source0); |
439 | |
440 | v.x.x = Float((x << 22) & 0xFFC00000); |
441 | v.x.y = Float((y << 12) & 0xFFC00000); |
442 | v.x.z = Float((z << 2) & 0xFFC00000); |
443 | } |
444 | |
445 | { |
446 | Int x, y, z; |
447 | |
448 | x = y = z = *Pointer<Int>(source1); |
449 | |
450 | v.y.x = Float((x << 22) & 0xFFC00000); |
451 | v.y.y = Float((y << 12) & 0xFFC00000); |
452 | v.y.z = Float((z << 2) & 0xFFC00000); |
453 | } |
454 | |
455 | { |
456 | Int x, y, z; |
457 | |
458 | x = y = z = *Pointer<Int>(source2); |
459 | |
460 | v.z.x = Float((x << 22) & 0xFFC00000); |
461 | v.z.y = Float((y << 12) & 0xFFC00000); |
462 | v.z.z = Float((z << 2) & 0xFFC00000); |
463 | } |
464 | |
465 | { |
466 | Int x, y, z; |
467 | |
468 | x = y = z = *Pointer<Int>(source3); |
469 | |
470 | v.w.x = Float((x << 22) & 0xFFC00000); |
471 | v.w.y = Float((y << 12) & 0xFFC00000); |
472 | v.w.z = Float((z << 2) & 0xFFC00000); |
473 | } |
474 | |
475 | transpose4x3(v.x, v.y, v.z, v.w); |
476 | |
477 | v.x *= Float4(1.0f / 0x00400000 / 511.0f); |
478 | v.y *= Float4(1.0f / 0x00400000 / 511.0f); |
479 | v.z *= Float4(1.0f / 0x00400000 / 511.0f); |
480 | } |
481 | break; |
482 | case STREAMTYPE_FIXED: |
483 | { |
484 | v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); |
485 | v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); |
486 | v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); |
487 | v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed)); |
488 | |
489 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); |
490 | } |
491 | break; |
492 | case STREAMTYPE_HALF: |
493 | { |
494 | if(stream.count >= 1) |
495 | { |
496 | UShort x0 = *Pointer<UShort>(source0 + 0); |
497 | UShort x1 = *Pointer<UShort>(source1 + 0); |
498 | UShort x2 = *Pointer<UShort>(source2 + 0); |
499 | UShort x3 = *Pointer<UShort>(source3 + 0); |
500 | |
501 | v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); |
502 | v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); |
503 | v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); |
504 | v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); |
505 | } |
506 | |
507 | if(stream.count >= 2) |
508 | { |
509 | UShort y0 = *Pointer<UShort>(source0 + 2); |
510 | UShort y1 = *Pointer<UShort>(source1 + 2); |
511 | UShort y2 = *Pointer<UShort>(source2 + 2); |
512 | UShort y3 = *Pointer<UShort>(source3 + 2); |
513 | |
514 | v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); |
515 | v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); |
516 | v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); |
517 | v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); |
518 | } |
519 | |
520 | if(stream.count >= 3) |
521 | { |
522 | UShort z0 = *Pointer<UShort>(source0 + 4); |
523 | UShort z1 = *Pointer<UShort>(source1 + 4); |
524 | UShort z2 = *Pointer<UShort>(source2 + 4); |
525 | UShort z3 = *Pointer<UShort>(source3 + 4); |
526 | |
527 | v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); |
528 | v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); |
529 | v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); |
530 | v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); |
531 | } |
532 | |
533 | if(stream.count >= 4) |
534 | { |
535 | UShort w0 = *Pointer<UShort>(source0 + 6); |
536 | UShort w1 = *Pointer<UShort>(source1 + 6); |
537 | UShort w2 = *Pointer<UShort>(source2 + 6); |
538 | UShort w3 = *Pointer<UShort>(source3 + 6); |
539 | |
540 | v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); |
541 | v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); |
542 | v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); |
543 | v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); |
544 | } |
545 | } |
546 | break; |
547 | case STREAMTYPE_INDICES: |
548 | { |
549 | v.x.x = *Pointer<Float>(source0); |
550 | v.x.y = *Pointer<Float>(source1); |
551 | v.x.z = *Pointer<Float>(source2); |
552 | v.x.w = *Pointer<Float>(source3); |
553 | } |
554 | break; |
555 | case STREAMTYPE_2_10_10_10_INT: |
556 | { |
557 | Int4 src; |
558 | src = Insert(src, *Pointer<Int>(source0), 0); |
559 | src = Insert(src, *Pointer<Int>(source1), 1); |
560 | src = Insert(src, *Pointer<Int>(source2), 2); |
561 | src = Insert(src, *Pointer<Int>(source3), 3); |
562 | |
563 | v.x = Float4((src << 22) >> 22); |
564 | v.y = Float4((src << 12) >> 22); |
565 | v.z = Float4((src << 02) >> 22); |
566 | v.w = Float4(src >> 30); |
567 | |
568 | if(stream.normalized) |
569 | { |
570 | v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
571 | v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
572 | v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); |
573 | v.w = Max(v.w, Float4(-1.0f)); |
574 | } |
575 | } |
576 | break; |
577 | case STREAMTYPE_2_10_10_10_UINT: |
578 | { |
579 | Int4 src; |
580 | src = Insert(src, *Pointer<Int>(source0), 0); |
581 | src = Insert(src, *Pointer<Int>(source1), 1); |
582 | src = Insert(src, *Pointer<Int>(source2), 2); |
583 | src = Insert(src, *Pointer<Int>(source3), 3); |
584 | |
585 | v.x = Float4(src & Int4(0x3FF)); |
586 | v.y = Float4((src >> 10) & Int4(0x3FF)); |
587 | v.z = Float4((src >> 20) & Int4(0x3FF)); |
588 | v.w = Float4((src >> 30) & Int4(0x3)); |
589 | |
590 | if(stream.normalized) |
591 | { |
592 | v.x *= Float4(1.0f / 0x3FF); |
593 | v.y *= Float4(1.0f / 0x3FF); |
594 | v.z *= Float4(1.0f / 0x3FF); |
595 | v.w *= Float4(1.0f / 0x3); |
596 | } |
597 | } |
598 | break; |
599 | default: |
600 | ASSERT(false); |
601 | } |
602 | |
603 | if(stream.count < 1) v.x = Float4(0.0f); |
604 | if(stream.count < 2) v.y = Float4(0.0f); |
605 | if(stream.count < 3) v.z = Float4(0.0f); |
606 | if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(0)); |
607 | |
608 | return v; |
609 | } |
610 | |
611 | void VertexRoutine::postTransform() |
612 | { |
613 | int pos = state.positionRegister; |
614 | |
615 | // Backtransform |
616 | if(state.preTransformed) |
617 | { |
618 | Float4 rhw = Float4(1.0f) / o[pos].w; |
619 | |
620 | Float4 W = *Pointer<Float4>(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); |
621 | Float4 H = *Pointer<Float4>(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); |
622 | Float4 L = *Pointer<Float4>(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); |
623 | Float4 T = *Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); |
624 | |
625 | o[pos].x = (o[pos].x - L) / W * rhw; |
626 | o[pos].y = (o[pos].y - T) / H * rhw; |
627 | o[pos].z = o[pos].z * rhw; |
628 | o[pos].w = rhw; |
629 | } |
630 | |
631 | if(!halfIntegerCoordinates && !state.preTransformed) |
632 | { |
633 | o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w; |
634 | o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w; |
635 | } |
636 | |
637 | if(state.superSampling) |
638 | { |
639 | o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w; |
640 | o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w; |
641 | } |
642 | } |
643 | |
644 | void VertexRoutine::writeCache(Pointer<Byte> &cacheLine) |
645 | { |
646 | Vector4f v; |
647 | |
648 | for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) |
649 | { |
650 | if(state.output[i].write) |
651 | { |
652 | v.x = o[i].x; |
653 | v.y = o[i].y; |
654 | v.z = o[i].z; |
655 | v.w = o[i].w; |
656 | |
657 | if(state.output[i].xClamp) |
658 | { |
659 | v.x = Max(v.x, Float4(0.0f)); |
660 | v.x = Min(v.x, Float4(1.0f)); |
661 | } |
662 | |
663 | if(state.output[i].yClamp) |
664 | { |
665 | v.y = Max(v.y, Float4(0.0f)); |
666 | v.y = Min(v.y, Float4(1.0f)); |
667 | } |
668 | |
669 | if(state.output[i].zClamp) |
670 | { |
671 | v.z = Max(v.z, Float4(0.0f)); |
672 | v.z = Min(v.z, Float4(1.0f)); |
673 | } |
674 | |
675 | if(state.output[i].wClamp) |
676 | { |
677 | v.w = Max(v.w, Float4(0.0f)); |
678 | v.w = Min(v.w, Float4(1.0f)); |
679 | } |
680 | |
681 | if(state.output[i].write == 0x01) |
682 | { |
683 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x; |
684 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y; |
685 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z; |
686 | *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w; |
687 | } |
688 | else |
689 | { |
690 | if(state.output[i].write == 0x03) |
691 | { |
692 | transpose2x4(v.x, v.y, v.z, v.w); |
693 | } |
694 | else |
695 | { |
696 | transpose4x4(v.x, v.y, v.z, v.w); |
697 | } |
698 | |
699 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x; |
700 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y; |
701 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z; |
702 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w; |
703 | } |
704 | } |
705 | } |
706 | |
707 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; |
708 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF; |
709 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF; |
710 | *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; |
711 | |
712 | // Viewport transform |
713 | int pos = state.positionRegister; |
714 | |
715 | v.x = o[pos].x; |
716 | v.y = o[pos].y; |
717 | v.z = o[pos].z; |
718 | v.w = o[pos].w; |
719 | |
720 | if(symmetricNormalizedDepth) |
721 | { |
722 | v.z = (v.z + v.w) * Float4(0.5f); // [-1, 1] -> [0, 1] |
723 | } |
724 | |
725 | Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); |
726 | Float4 rhw = Float4(1.0f) / w; |
727 | |
728 | v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16)))); |
729 | v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16)))); |
730 | v.z = v.z * rhw; |
731 | v.w = rhw; |
732 | |
733 | transpose4x4(v.x, v.y, v.z, v.w); |
734 | |
735 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; |
736 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; |
737 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; |
738 | *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; |
739 | } |
740 | |
741 | void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache) |
742 | { |
743 | for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) |
744 | { |
745 | if(state.output[i].write) |
746 | { |
747 | *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16); |
748 | } |
749 | } |
750 | |
751 | *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X)); |
752 | *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags)); |
753 | } |
754 | |
755 | void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive) |
756 | { |
757 | If(indexInPrimitive < state.verticesPerPrimitive) |
758 | { |
759 | UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive; |
760 | |
761 | for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) |
762 | { |
763 | if(state.transformFeedbackEnabled & (1ULL << i)) |
764 | { |
765 | UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i])); |
766 | UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i])); |
767 | UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i])); |
768 | UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i])); |
769 | |
770 | Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float)); |
771 | Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float); |
772 | |
773 | For(UInt r = 0, r < row, r++) |
774 | { |
775 | UInt rOffsetX = r * col * sizeof(float); |
776 | UInt rOffset4 = r * sizeof(float4); |
777 | |
778 | For(UInt c = 0, c < col, c++) |
779 | { |
780 | UInt cOffset = c * sizeof(float); |
781 | *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset); |
782 | } |
783 | } |
784 | } |
785 | } |
786 | } |
787 | } |
788 | } |
789 | |