1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "VertexPipeline.hpp"
16
17#include "Renderer/Vertex.hpp"
18#include "Renderer/Renderer.hpp"
19#include "Common/Debug.hpp"
20
21#include <string.h>
22#include <stdlib.h>
23#include <stdio.h>
24
25#undef max
26#undef min
27
28namespace sw
29{
30 extern bool secondaryColor;
31
32 VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0)
33 {
34 }
35
36 VertexPipeline::~VertexPipeline()
37 {
38 }
39
40 Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
41 {
42 Vector4f dst;
43
44 if(state.vertexBlendMatrixCount == 0)
45 {
46 dst = transform(src, matrix, homogeneous);
47 }
48 else
49 {
50 UInt index0[4];
51 UInt index1[4];
52 UInt index2[4];
53 UInt index3[4];
54
55 if(state.indexedVertexBlendEnable)
56 {
57 for(int i = 0; i < 4; i++)
58 {
59 Float4 B = v[BlendIndices].x;
60 UInt indices;
61
62 switch(i)
63 {
64 case 0: indices = As<UInt>(Float(B.x)); break;
65 case 1: indices = As<UInt>(Float(B.y)); break;
66 case 2: indices = As<UInt>(Float(B.z)); break;
67 case 3: indices = As<UInt>(Float(B.w)); break;
68 }
69
70 index0[i] = (indices & 0x000000FF) << 6;
71 index1[i] = (indices & 0x0000FF00) >> 2;
72 index2[i] = (indices & 0x00FF0000) >> 10;
73 index3[i] = (indices & 0xFF000000) >> 18;
74 }
75 }
76 else
77 {
78 for(int i = 0; i < 4; i++)
79 {
80 index0[i] = 0 * 64;
81 index1[i] = 1 * 64;
82 index2[i] = 2 * 64;
83 index3[i] = 3 * 64;
84 }
85 }
86
87 Float4 weight0;
88 Float4 weight1;
89 Float4 weight2;
90 Float4 weight3;
91
92 switch(state.vertexBlendMatrixCount)
93 {
94 case 4: weight2 = v[BlendWeight].z;
95 case 3: weight1 = v[BlendWeight].y;
96 case 2: weight0 = v[BlendWeight].x;
97 case 1:
98 break;
99 }
100
101 if(state.vertexBlendMatrixCount == 1)
102 {
103 dst = transform(src, matrix, index0, homogeneous);
104 }
105 else if(state.vertexBlendMatrixCount == 2)
106 {
107 weight1 = Float4(1.0f) - weight0;
108
109 Vector4f pos0;
110 Vector4f pos1;
111
112 pos0 = transform(src, matrix, index0, homogeneous);
113 pos1 = transform(src, matrix, index1, homogeneous);
114
115 dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Vector4f operators
116 dst.y = pos0.y * weight0 + pos1.y * weight1;
117 dst.z = pos0.z * weight0 + pos1.z * weight1;
118 dst.w = pos0.w * weight0 + pos1.w * weight1;
119 }
120 else if(state.vertexBlendMatrixCount == 3)
121 {
122 weight2 = Float4(1.0f) - (weight0 + weight1);
123
124 Vector4f pos0;
125 Vector4f pos1;
126 Vector4f pos2;
127
128 pos0 = transform(src, matrix, index0, homogeneous);
129 pos1 = transform(src, matrix, index1, homogeneous);
130 pos2 = transform(src, matrix, index2, homogeneous);
131
132 dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2;
133 dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2;
134 dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2;
135 dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2;
136 }
137 else if(state.vertexBlendMatrixCount == 4)
138 {
139 weight3 = Float4(1.0f) - (weight0 + weight1 + weight2);
140
141 Vector4f pos0;
142 Vector4f pos1;
143 Vector4f pos2;
144 Vector4f pos3;
145
146 pos0 = transform(src, matrix, index0, homogeneous);
147 pos1 = transform(src, matrix, index1, homogeneous);
148 pos2 = transform(src, matrix, index2, homogeneous);
149 pos3 = transform(src, matrix, index3, homogeneous);
150
151 dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3;
152 dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3;
153 dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3;
154 dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3;
155 }
156 }
157
158 return dst;
159 }
160
161 void VertexPipeline::pipeline(UInt &index)
162 {
163 Vector4f position;
164 Vector4f normal;
165
166 if(!state.preTransformed)
167 {
168 position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true);
169 }
170 else
171 {
172 position = v[PositionT];
173 }
174
175 o[Pos].x = position.x;
176 o[Pos].y = position.y;
177 o[Pos].z = position.z;
178 o[Pos].w = position.w;
179
180 Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
181
182 if(state.vertexNormalActive)
183 {
184 normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false);
185
186 if(state.normalizeNormals)
187 {
188 normal = normalize(normal);
189 }
190 }
191
192 if(!state.vertexLightingActive)
193 {
194 // FIXME: Don't process if not used at all
195 if(state.diffuseActive && state.input[Color0])
196 {
197 Vector4f diffuse = v[Color0];
198
199 o[C0].x = diffuse.x;
200 o[C0].y = diffuse.y;
201 o[C0].z = diffuse.z;
202 o[C0].w = diffuse.w;
203 }
204 else
205 {
206 o[C0].x = Float4(1.0f);
207 o[C0].y = Float4(1.0f);
208 o[C0].z = Float4(1.0f);
209 o[C0].w = Float4(1.0f);
210 }
211
212 // FIXME: Don't process if not used at all
213 if(state.specularActive && state.input[Color1])
214 {
215 Vector4f specular = v[Color1];
216
217 o[C1].x = specular.x;
218 o[C1].y = specular.y;
219 o[C1].z = specular.z;
220 o[C1].w = specular.w;
221 }
222 else
223 {
224 o[C1].x = Float4(0.0f);
225 o[C1].y = Float4(0.0f);
226 o[C1].z = Float4(0.0f);
227 o[C1].w = Float4(1.0f);
228 }
229 }
230 else
231 {
232 o[C0].x = Float4(0.0f);
233 o[C0].y = Float4(0.0f);
234 o[C0].z = Float4(0.0f);
235 o[C0].w = Float4(0.0f);
236
237 o[C1].x = Float4(0.0f);
238 o[C1].y = Float4(0.0f);
239 o[C1].z = Float4(0.0f);
240 o[C1].w = Float4(0.0f);
241
242 Vector4f ambient;
243 Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack
244
245 ambient.x = globalAmbient.x;
246 ambient.y = globalAmbient.y;
247 ambient.z = globalAmbient.z;
248
249 for(int i = 0; i < 8; i++)
250 {
251 if(!(state.vertexLightActive & (1 << i)))
252 {
253 continue;
254 }
255
256 Vector4f L; // Light vector
257 Float4 att; // Attenuation
258
259 // Attenuation
260 {
261 Float4 d; // Distance
262
263 L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack
264 L.x = L.x.xxxx;
265 L.y = L.y.yyyy;
266 L.z = L.z.zzzz;
267
268 L.x -= vertexPosition.x;
269 L.y -= vertexPosition.y;
270 L.z -= vertexPosition.z;
271 d = dot3(L, L);
272 d = RcpSqrt_pp(d); // FIXME: Sufficient precision?
273 L.x *= d;
274 L.y *= d;
275 L.z *= d;
276 d = Rcp_pp(d); // FIXME: Sufficient precision?
277
278 Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
279 Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i]));
280 Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i]));
281
282 att = Rcp_pp((q * d + l) * d + c);
283 }
284
285 // Ambient per light
286 {
287 Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack
288
289 ambient.x = ambient.x + lightAmbient.x * att;
290 ambient.y = ambient.y + lightAmbient.y * att;
291 ambient.z = ambient.z + lightAmbient.z * att;
292 }
293
294 // Diffuse
295 if(state.vertexNormalActive)
296 {
297 Float4 dot;
298
299 dot = dot3(L, normal);
300 dot = Max(dot, Float4(0.0f));
301 dot *= att;
302
303 Vector4f diff;
304
305 if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
306 {
307 diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack
308 diff.x = diff.x.xxxx;
309 diff.y = diff.y.yyyy;
310 diff.z = diff.z.zzzz;
311 }
312 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
313 {
314 diff = v[Color0];
315 }
316 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
317 {
318 diff = v[Color1];
319 }
320 else ASSERT(false);
321
322 Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i]));
323
324 o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first?
325 o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first?
326 o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first?
327 }
328
329 // Specular
330 if(state.vertexSpecularActive)
331 {
332 Vector4f S;
333 Vector4f C; // Camera vector
334 Float4 pow;
335
336 pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess));
337
338 S.x = Float4(0.0f) - vertexPosition.x;
339 S.y = Float4(0.0f) - vertexPosition.y;
340 S.z = Float4(0.0f) - vertexPosition.z;
341 C = normalize(S);
342
343 S.x = L.x + C.x;
344 S.y = L.y + C.y;
345 S.z = L.z + C.z;
346 C = normalize(S);
347
348 Float4 dot = Max(dot3(C, normal), Float4(0.0f)); // FIXME: max(dot3(C, normal), 0)
349
350 Float4 P = power(dot, pow);
351 P *= att;
352
353 Vector4f spec;
354
355 if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
356 {
357 Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack
358
359 spec.x = materialSpecular.x;
360 spec.y = materialSpecular.y;
361 spec.z = materialSpecular.z;
362 }
363 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
364 {
365 spec = v[Color0];
366 }
367 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
368 {
369 spec = v[Color1];
370 }
371 else ASSERT(false);
372
373 Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i]));
374
375 spec.x *= lightSpecular.x;
376 spec.y *= lightSpecular.y;
377 spec.z *= lightSpecular.z;
378
379 spec.x *= P;
380 spec.y *= P;
381 spec.z *= P;
382
383 spec.x = Max(spec.x, Float4(0.0f));
384 spec.y = Max(spec.y, Float4(0.0f));
385 spec.z = Max(spec.z, Float4(0.0f));
386
387 if(secondaryColor)
388 {
389 o[C1].x = o[C1].x + spec.x;
390 o[C1].y = o[C1].y + spec.y;
391 o[C1].z = o[C1].z + spec.z;
392 }
393 else
394 {
395 o[C0].x = o[C0].x + spec.x;
396 o[C0].y = o[C0].y + spec.y;
397 o[C0].z = o[C0].z + spec.z;
398 }
399 }
400 }
401
402 if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL)
403 {
404 Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack
405
406 ambient.x = ambient.x * materialAmbient.x;
407 ambient.y = ambient.y * materialAmbient.y;
408 ambient.z = ambient.z * materialAmbient.z;
409 }
410 else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1)
411 {
412 Vector4f materialDiffuse = v[Color0];
413
414 ambient.x = ambient.x * materialDiffuse.x;
415 ambient.y = ambient.y * materialDiffuse.y;
416 ambient.z = ambient.z * materialDiffuse.z;
417 }
418 else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2)
419 {
420 Vector4f materialSpecular = v[Color1];
421
422 ambient.x = ambient.x * materialSpecular.x;
423 ambient.y = ambient.y * materialSpecular.y;
424 ambient.z = ambient.z * materialSpecular.z;
425 }
426 else ASSERT(false);
427
428 o[C0].x = o[C0].x + ambient.x;
429 o[C0].y = o[C0].y + ambient.y;
430 o[C0].z = o[C0].z + ambient.z;
431
432 // Emissive
433 if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL)
434 {
435 Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack
436
437 o[C0].x = o[C0].x + materialEmission.x;
438 o[C0].y = o[C0].y + materialEmission.y;
439 o[C0].z = o[C0].z + materialEmission.z;
440 }
441 else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1)
442 {
443 Vector4f materialSpecular = v[Color0];
444
445 o[C0].x = o[C0].x + materialSpecular.x;
446 o[C0].y = o[C0].y + materialSpecular.y;
447 o[C0].z = o[C0].z + materialSpecular.z;
448 }
449 else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2)
450 {
451 Vector4f materialSpecular = v[Color1];
452
453 o[C0].x = o[C0].x + materialSpecular.x;
454 o[C0].y = o[C0].y + materialSpecular.y;
455 o[C0].z = o[C0].z + materialSpecular.z;
456 }
457 else ASSERT(false);
458
459 // Diffuse alpha component
460 if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
461 {
462 o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack
463 }
464 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
465 {
466 Vector4f alpha = v[Color0];
467 o[C0].w = alpha.w;
468 }
469 else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
470 {
471 Vector4f alpha = v[Color1];
472 o[C0].w = alpha.w;
473 }
474 else ASSERT(false);
475
476 if(state.vertexSpecularActive)
477 {
478 // Specular alpha component
479 if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
480 {
481 o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack
482 }
483 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
484 {
485 Vector4f alpha = v[Color0];
486 o[C1].w = alpha.w;
487 }
488 else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
489 {
490 Vector4f alpha = v[Color1];
491 o[C1].w = alpha.w;
492 }
493 else ASSERT(false);
494 }
495 }
496
497 if(state.fogActive)
498 {
499 Float4 f;
500
501 if(!state.rangeFogActive)
502 {
503 f = Abs(vertexPosition.z);
504 }
505 else
506 {
507 f = Sqrt(dot3(vertexPosition, vertexPosition)); // FIXME: f = length(vertexPosition);
508 }
509
510 switch(state.vertexFogMode)
511 {
512 case FOG_NONE:
513 if(state.specularActive)
514 {
515 o[Fog].x = o[C1].w;
516 }
517 else
518 {
519 o[Fog].x = Float4(0.0f);
520 }
521 break;
522 case FOG_LINEAR:
523 o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
524 break;
525 case FOG_EXP:
526 o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true);
527 break;
528 case FOG_EXP2:
529 o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true);
530 break;
531 default:
532 ASSERT(false);
533 }
534 }
535
536 for(int stage = 0; stage < 8; stage++)
537 {
538 processTextureCoordinate(stage, normal, position);
539 }
540
541 processPointSize();
542 }
543
544 void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position)
545 {
546 if(state.output[T0 + stage].write)
547 {
548 int i = state.textureState[stage].texCoordIndexActive;
549
550 switch(state.textureState[stage].texGenActive)
551 {
552 case TEXGEN_NONE:
553 {
554 Vector4f &&varying = v[TexCoord0 + i];
555
556 o[T0 + stage].x = varying.x;
557 o[T0 + stage].y = varying.y;
558 o[T0 + stage].z = varying.z;
559 o[T0 + stage].w = varying.w;
560 }
561 break;
562 case TEXGEN_PASSTHRU:
563 {
564 Vector4f &&varying = v[TexCoord0 + i];
565
566 o[T0 + stage].x = varying.x;
567 o[T0 + stage].y = varying.y;
568 o[T0 + stage].z = varying.z;
569 o[T0 + stage].w = varying.w;
570
571 if(state.input[TexCoord0 + i])
572 {
573 switch(state.input[TexCoord0 + i].count)
574 {
575 case 1:
576 o[T0 + stage].y = Float4(1.0f);
577 o[T0 + stage].z = Float4(0.0f);
578 o[T0 + stage].w = Float4(0.0f);
579 break;
580 case 2:
581 o[T0 + stage].z = Float4(1.0f);
582 o[T0 + stage].w = Float4(0.0f);
583 break;
584 case 3:
585 o[T0 + stage].w = Float4(1.0f);
586 break;
587 case 4:
588 break;
589 default:
590 ASSERT(false);
591 }
592 }
593 }
594 break;
595 case TEXGEN_NORMAL:
596 {
597 Vector4f Nc; // Normal vector in camera space
598
599 if(state.vertexNormalActive)
600 {
601 Nc = normal;
602 }
603 else
604 {
605 Nc.x = Float4(0.0f);
606 Nc.y = Float4(0.0f);
607 Nc.z = Float4(0.0f);
608 }
609
610 Nc.w = Float4(1.0f);
611
612 o[T0 + stage].x = Nc.x;
613 o[T0 + stage].y = Nc.y;
614 o[T0 + stage].z = Nc.z;
615 o[T0 + stage].w = Nc.w;
616 }
617 break;
618 case TEXGEN_POSITION:
619 {
620 Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space
621
622 Pn.w = Float4(1.0f);
623
624 o[T0 + stage].x = Pn.x;
625 o[T0 + stage].y = Pn.y;
626 o[T0 + stage].z = Pn.z;
627 o[T0 + stage].w = Pn.w;
628 }
629 break;
630 case TEXGEN_REFLECTION:
631 {
632 Vector4f R; // Reflection vector
633
634 if(state.vertexNormalActive)
635 {
636 Vector4f Nc; // Normal vector in camera space
637
638 Nc = normal;
639
640 if(state.localViewerActive)
641 {
642 Vector4f Ec; // Eye vector in camera space
643 Vector4f N2;
644
645 Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
646 Ec = normalize(Ec);
647
648 // R = E - 2 * N * (E . N)
649 Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
650
651 R.x = Ec.x - Nc.x * dot;
652 R.y = Ec.y - Nc.y * dot;
653 R.z = Ec.z - Nc.z * dot;
654 }
655 else
656 {
657 // u = -2 * Nz * Nx
658 // v = -2 * Nz * Ny
659 // w = 1 - 2 * Nz * Nz
660
661 R.x = -Float4(2.0f) * Nc.z * Nc.x;
662 R.y = -Float4(2.0f) * Nc.z * Nc.y;
663 R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
664 }
665 }
666 else
667 {
668 R.x = Float4(0.0f);
669 R.y = Float4(0.0f);
670 R.z = Float4(0.0f);
671 }
672
673 R.w = Float4(1.0f);
674
675 o[T0 + stage].x = R.x;
676 o[T0 + stage].y = R.y;
677 o[T0 + stage].z = R.z;
678 o[T0 + stage].w = R.w;
679 }
680 break;
681 case TEXGEN_SPHEREMAP:
682 {
683 Vector4f R; // Reflection vector
684
685 if(state.vertexNormalActive)
686 {
687 Vector4f Nc; // Normal vector in camera space
688
689 Nc = normal;
690
691 if(state.localViewerActive)
692 {
693 Vector4f Ec; // Eye vector in camera space
694 Vector4f N2;
695
696 Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
697 Ec = normalize(Ec);
698
699 // R = E - 2 * N * (E . N)
700 Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
701
702 R.x = Ec.x - Nc.x * dot;
703 R.y = Ec.y - Nc.y * dot;
704 R.z = Ec.z - Nc.z * dot;
705 }
706 else
707 {
708 // u = -2 * Nz * Nx
709 // v = -2 * Nz * Ny
710 // w = 1 - 2 * Nz * Nz
711
712 R.x = -Float4(2.0f) * Nc.z * Nc.x;
713 R.y = -Float4(2.0f) * Nc.z * Nc.y;
714 R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
715 }
716 }
717 else
718 {
719 R.x = Float4(0.0f);
720 R.y = Float4(0.0f);
721 R.z = Float4(0.0f);
722 }
723
724 R.z -= Float4(1.0f);
725 R = normalize(R);
726 R.x = Float4(0.5f) * R.x + Float4(0.5f);
727 R.y = Float4(0.5f) * R.y + Float4(0.5f);
728
729 R.z = Float4(1.0f);
730 R.w = Float4(0.0f);
731
732 o[T0 + stage].x = R.x;
733 o[T0 + stage].y = R.y;
734 o[T0 + stage].z = R.z;
735 o[T0 + stage].w = R.w;
736 }
737 break;
738 default:
739 ASSERT(false);
740 }
741
742 Vector4f texTrans0;
743 Vector4f texTrans1;
744 Vector4f texTrans2;
745 Vector4f texTrans3;
746
747 Vector4f T;
748 Vector4f t;
749
750 T.x = o[T0 + stage].x;
751 T.y = o[T0 + stage].y;
752 T.z = o[T0 + stage].z;
753 T.w = o[T0 + stage].w;
754
755 switch(state.textureState[stage].textureTransformCountActive)
756 {
757 case 4:
758 texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack
759 texTrans3.x = texTrans3.x.xxxx;
760 texTrans3.y = texTrans3.y.yyyy;
761 texTrans3.z = texTrans3.z.zzzz;
762 texTrans3.w = texTrans3.w.wwww;
763 t.w = dot4(T, texTrans3);
764 case 3:
765 texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack
766 texTrans2.x = texTrans2.x.xxxx;
767 texTrans2.y = texTrans2.y.yyyy;
768 texTrans2.z = texTrans2.z.zzzz;
769 texTrans2.w = texTrans2.w.wwww;
770 t.z = dot4(T, texTrans2);
771 case 2:
772 texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack
773 texTrans1.x = texTrans1.x.xxxx;
774 texTrans1.y = texTrans1.y.yyyy;
775 texTrans1.z = texTrans1.z.zzzz;
776 texTrans1.w = texTrans1.w.wwww;
777 t.y = dot4(T, texTrans1);
778 case 1:
779 texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack
780 texTrans0.x = texTrans0.x.xxxx;
781 texTrans0.y = texTrans0.y.yyyy;
782 texTrans0.z = texTrans0.z.zzzz;
783 texTrans0.w = texTrans0.w.wwww;
784 t.x = dot4(T, texTrans0);
785
786 o[T0 + stage].x = t.x;
787 o[T0 + stage].y = t.y;
788 o[T0 + stage].z = t.z;
789 o[T0 + stage].w = t.w;
790 case 0:
791 break;
792 default:
793 ASSERT(false);
794 }
795 }
796 }
797
798 void VertexPipeline::processPointSize()
799 {
800 if(!state.pointSizeActive)
801 {
802 return; // Use global pointsize
803 }
804
805 if(state.input[PointSize])
806 {
807 o[Pts].y = v[PointSize].x;
808 }
809 else
810 {
811 o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize));
812 }
813
814 if(state.pointScaleActive && !state.preTransformed)
815 {
816 Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true);
817
818 Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p);
819
820 Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack
821 Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack
822 Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack
823
824 A = RcpSqrt_pp(A + d * (B + d * C));
825
826 o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack
827 }
828 }
829
830 Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
831 {
832 Vector4f dst;
833
834 if(homogeneous)
835 {
836 Float4 m[4][4];
837
838 for(int j = 0; j < 4; j++)
839 {
840 for(int i = 0; i < 4; i++)
841 {
842 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
843 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
844 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
845 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
846 }
847 }
848
849 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3];
850 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3];
851 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3];
852 dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3];
853 }
854 else
855 {
856 Float4 m[3][3];
857
858 for(int j = 0; j < 3; j++)
859 {
860 for(int i = 0; i < 3; i++)
861 {
862 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
863 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
864 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
865 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
866 }
867 }
868
869 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
870 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
871 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
872 }
873
874 return dst;
875 }
876
877 Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous)
878 {
879 Vector4f dst;
880
881 if(homogeneous)
882 {
883 Float4 m[4][4];
884
885 for(int j = 0; j < 4; j++)
886 {
887 for(int i = 0; i < 4; i++)
888 {
889 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
890 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
891 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
892 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
893 }
894 }
895
896 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3];
897 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3];
898 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3];
899 dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3];
900 }
901 else
902 {
903 Float4 m[3][3];
904
905 for(int j = 0; j < 3; j++)
906 {
907 for(int i = 0; i < 3; i++)
908 {
909 m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
910 m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
911 m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
912 m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
913 }
914 }
915
916 dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
917 dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
918 dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
919 }
920
921 return dst;
922 }
923
924 Vector4f VertexPipeline::normalize(Vector4f &src)
925 {
926 Vector4f dst;
927
928 Float4 rcpLength = RcpSqrt_pp(dot3(src, src));
929
930 dst.x = src.x * rcpLength;
931 dst.y = src.y * rcpLength;
932 dst.z = src.z * rcpLength;
933
934 return dst;
935 }
936
937 Float4 VertexPipeline::power(Float4 &src0, Float4 &src1)
938 {
939 Float4 dst = src0;
940
941 dst = dst * dst;
942 dst = dst * dst;
943 dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f)));
944
945 dst *= src1;
946
947 dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f)));
948 dst = RcpSqrt_pp(dst);
949 dst = RcpSqrt_pp(dst);
950
951 return dst;
952 }
953}
954