1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "VertexPipeline.hpp" |
16 | |
17 | #include "Renderer/Vertex.hpp" |
18 | #include "Renderer/Renderer.hpp" |
19 | #include "Common/Debug.hpp" |
20 | |
21 | #include <string.h> |
22 | #include <stdlib.h> |
23 | #include <stdio.h> |
24 | |
25 | #undef max |
26 | #undef min |
27 | |
28 | namespace sw |
29 | { |
30 | extern bool secondaryColor; |
31 | |
32 | VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0) |
33 | { |
34 | } |
35 | |
36 | VertexPipeline::~VertexPipeline() |
37 | { |
38 | } |
39 | |
40 | Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) |
41 | { |
42 | Vector4f dst; |
43 | |
44 | if(state.vertexBlendMatrixCount == 0) |
45 | { |
46 | dst = transform(src, matrix, homogeneous); |
47 | } |
48 | else |
49 | { |
50 | UInt index0[4]; |
51 | UInt index1[4]; |
52 | UInt index2[4]; |
53 | UInt index3[4]; |
54 | |
55 | if(state.indexedVertexBlendEnable) |
56 | { |
57 | for(int i = 0; i < 4; i++) |
58 | { |
59 | Float4 B = v[BlendIndices].x; |
60 | UInt indices; |
61 | |
62 | switch(i) |
63 | { |
64 | case 0: indices = As<UInt>(Float(B.x)); break; |
65 | case 1: indices = As<UInt>(Float(B.y)); break; |
66 | case 2: indices = As<UInt>(Float(B.z)); break; |
67 | case 3: indices = As<UInt>(Float(B.w)); break; |
68 | } |
69 | |
70 | index0[i] = (indices & 0x000000FF) << 6; |
71 | index1[i] = (indices & 0x0000FF00) >> 2; |
72 | index2[i] = (indices & 0x00FF0000) >> 10; |
73 | index3[i] = (indices & 0xFF000000) >> 18; |
74 | } |
75 | } |
76 | else |
77 | { |
78 | for(int i = 0; i < 4; i++) |
79 | { |
80 | index0[i] = 0 * 64; |
81 | index1[i] = 1 * 64; |
82 | index2[i] = 2 * 64; |
83 | index3[i] = 3 * 64; |
84 | } |
85 | } |
86 | |
87 | Float4 weight0; |
88 | Float4 weight1; |
89 | Float4 weight2; |
90 | Float4 weight3; |
91 | |
92 | switch(state.vertexBlendMatrixCount) |
93 | { |
94 | case 4: weight2 = v[BlendWeight].z; |
95 | case 3: weight1 = v[BlendWeight].y; |
96 | case 2: weight0 = v[BlendWeight].x; |
97 | case 1: |
98 | break; |
99 | } |
100 | |
101 | if(state.vertexBlendMatrixCount == 1) |
102 | { |
103 | dst = transform(src, matrix, index0, homogeneous); |
104 | } |
105 | else if(state.vertexBlendMatrixCount == 2) |
106 | { |
107 | weight1 = Float4(1.0f) - weight0; |
108 | |
109 | Vector4f pos0; |
110 | Vector4f pos1; |
111 | |
112 | pos0 = transform(src, matrix, index0, homogeneous); |
113 | pos1 = transform(src, matrix, index1, homogeneous); |
114 | |
115 | dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Vector4f operators |
116 | dst.y = pos0.y * weight0 + pos1.y * weight1; |
117 | dst.z = pos0.z * weight0 + pos1.z * weight1; |
118 | dst.w = pos0.w * weight0 + pos1.w * weight1; |
119 | } |
120 | else if(state.vertexBlendMatrixCount == 3) |
121 | { |
122 | weight2 = Float4(1.0f) - (weight0 + weight1); |
123 | |
124 | Vector4f pos0; |
125 | Vector4f pos1; |
126 | Vector4f pos2; |
127 | |
128 | pos0 = transform(src, matrix, index0, homogeneous); |
129 | pos1 = transform(src, matrix, index1, homogeneous); |
130 | pos2 = transform(src, matrix, index2, homogeneous); |
131 | |
132 | dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2; |
133 | dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2; |
134 | dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2; |
135 | dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2; |
136 | } |
137 | else if(state.vertexBlendMatrixCount == 4) |
138 | { |
139 | weight3 = Float4(1.0f) - (weight0 + weight1 + weight2); |
140 | |
141 | Vector4f pos0; |
142 | Vector4f pos1; |
143 | Vector4f pos2; |
144 | Vector4f pos3; |
145 | |
146 | pos0 = transform(src, matrix, index0, homogeneous); |
147 | pos1 = transform(src, matrix, index1, homogeneous); |
148 | pos2 = transform(src, matrix, index2, homogeneous); |
149 | pos3 = transform(src, matrix, index3, homogeneous); |
150 | |
151 | dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3; |
152 | dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3; |
153 | dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3; |
154 | dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3; |
155 | } |
156 | } |
157 | |
158 | return dst; |
159 | } |
160 | |
161 | void VertexPipeline::pipeline(UInt &index) |
162 | { |
163 | Vector4f position; |
164 | Vector4f normal; |
165 | |
166 | if(!state.preTransformed) |
167 | { |
168 | position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true); |
169 | } |
170 | else |
171 | { |
172 | position = v[PositionT]; |
173 | } |
174 | |
175 | o[Pos].x = position.x; |
176 | o[Pos].y = position.y; |
177 | o[Pos].z = position.z; |
178 | o[Pos].w = position.w; |
179 | |
180 | Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); |
181 | |
182 | if(state.vertexNormalActive) |
183 | { |
184 | normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false); |
185 | |
186 | if(state.normalizeNormals) |
187 | { |
188 | normal = normalize(normal); |
189 | } |
190 | } |
191 | |
192 | if(!state.vertexLightingActive) |
193 | { |
194 | // FIXME: Don't process if not used at all |
195 | if(state.diffuseActive && state.input[Color0]) |
196 | { |
197 | Vector4f diffuse = v[Color0]; |
198 | |
199 | o[C0].x = diffuse.x; |
200 | o[C0].y = diffuse.y; |
201 | o[C0].z = diffuse.z; |
202 | o[C0].w = diffuse.w; |
203 | } |
204 | else |
205 | { |
206 | o[C0].x = Float4(1.0f); |
207 | o[C0].y = Float4(1.0f); |
208 | o[C0].z = Float4(1.0f); |
209 | o[C0].w = Float4(1.0f); |
210 | } |
211 | |
212 | // FIXME: Don't process if not used at all |
213 | if(state.specularActive && state.input[Color1]) |
214 | { |
215 | Vector4f specular = v[Color1]; |
216 | |
217 | o[C1].x = specular.x; |
218 | o[C1].y = specular.y; |
219 | o[C1].z = specular.z; |
220 | o[C1].w = specular.w; |
221 | } |
222 | else |
223 | { |
224 | o[C1].x = Float4(0.0f); |
225 | o[C1].y = Float4(0.0f); |
226 | o[C1].z = Float4(0.0f); |
227 | o[C1].w = Float4(1.0f); |
228 | } |
229 | } |
230 | else |
231 | { |
232 | o[C0].x = Float4(0.0f); |
233 | o[C0].y = Float4(0.0f); |
234 | o[C0].z = Float4(0.0f); |
235 | o[C0].w = Float4(0.0f); |
236 | |
237 | o[C1].x = Float4(0.0f); |
238 | o[C1].y = Float4(0.0f); |
239 | o[C1].z = Float4(0.0f); |
240 | o[C1].w = Float4(0.0f); |
241 | |
242 | Vector4f ambient; |
243 | Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack |
244 | |
245 | ambient.x = globalAmbient.x; |
246 | ambient.y = globalAmbient.y; |
247 | ambient.z = globalAmbient.z; |
248 | |
249 | for(int i = 0; i < 8; i++) |
250 | { |
251 | if(!(state.vertexLightActive & (1 << i))) |
252 | { |
253 | continue; |
254 | } |
255 | |
256 | Vector4f L; // Light vector |
257 | Float4 att; // Attenuation |
258 | |
259 | // Attenuation |
260 | { |
261 | Float4 d; // Distance |
262 | |
263 | L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack |
264 | L.x = L.x.xxxx; |
265 | L.y = L.y.yyyy; |
266 | L.z = L.z.zzzz; |
267 | |
268 | L.x -= vertexPosition.x; |
269 | L.y -= vertexPosition.y; |
270 | L.z -= vertexPosition.z; |
271 | d = dot3(L, L); |
272 | d = RcpSqrt_pp(d); // FIXME: Sufficient precision? |
273 | L.x *= d; |
274 | L.y *= d; |
275 | L.z *= d; |
276 | d = Rcp_pp(d); // FIXME: Sufficient precision? |
277 | |
278 | Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i])); |
279 | Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i])); |
280 | Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i])); |
281 | |
282 | att = Rcp_pp((q * d + l) * d + c); |
283 | } |
284 | |
285 | // Ambient per light |
286 | { |
287 | Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack |
288 | |
289 | ambient.x = ambient.x + lightAmbient.x * att; |
290 | ambient.y = ambient.y + lightAmbient.y * att; |
291 | ambient.z = ambient.z + lightAmbient.z * att; |
292 | } |
293 | |
294 | // Diffuse |
295 | if(state.vertexNormalActive) |
296 | { |
297 | Float4 dot; |
298 | |
299 | dot = dot3(L, normal); |
300 | dot = Max(dot, Float4(0.0f)); |
301 | dot *= att; |
302 | |
303 | Vector4f diff; |
304 | |
305 | if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) |
306 | { |
307 | diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack |
308 | diff.x = diff.x.xxxx; |
309 | diff.y = diff.y.yyyy; |
310 | diff.z = diff.z.zzzz; |
311 | } |
312 | else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) |
313 | { |
314 | diff = v[Color0]; |
315 | } |
316 | else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) |
317 | { |
318 | diff = v[Color1]; |
319 | } |
320 | else ASSERT(false); |
321 | |
322 | Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i])); |
323 | |
324 | o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first? |
325 | o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first? |
326 | o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first? |
327 | } |
328 | |
329 | // Specular |
330 | if(state.vertexSpecularActive) |
331 | { |
332 | Vector4f S; |
333 | Vector4f C; // Camera vector |
334 | Float4 pow; |
335 | |
336 | pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess)); |
337 | |
338 | S.x = Float4(0.0f) - vertexPosition.x; |
339 | S.y = Float4(0.0f) - vertexPosition.y; |
340 | S.z = Float4(0.0f) - vertexPosition.z; |
341 | C = normalize(S); |
342 | |
343 | S.x = L.x + C.x; |
344 | S.y = L.y + C.y; |
345 | S.z = L.z + C.z; |
346 | C = normalize(S); |
347 | |
348 | Float4 dot = Max(dot3(C, normal), Float4(0.0f)); // FIXME: max(dot3(C, normal), 0) |
349 | |
350 | Float4 P = power(dot, pow); |
351 | P *= att; |
352 | |
353 | Vector4f spec; |
354 | |
355 | if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) |
356 | { |
357 | Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack |
358 | |
359 | spec.x = materialSpecular.x; |
360 | spec.y = materialSpecular.y; |
361 | spec.z = materialSpecular.z; |
362 | } |
363 | else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) |
364 | { |
365 | spec = v[Color0]; |
366 | } |
367 | else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) |
368 | { |
369 | spec = v[Color1]; |
370 | } |
371 | else ASSERT(false); |
372 | |
373 | Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i])); |
374 | |
375 | spec.x *= lightSpecular.x; |
376 | spec.y *= lightSpecular.y; |
377 | spec.z *= lightSpecular.z; |
378 | |
379 | spec.x *= P; |
380 | spec.y *= P; |
381 | spec.z *= P; |
382 | |
383 | spec.x = Max(spec.x, Float4(0.0f)); |
384 | spec.y = Max(spec.y, Float4(0.0f)); |
385 | spec.z = Max(spec.z, Float4(0.0f)); |
386 | |
387 | if(secondaryColor) |
388 | { |
389 | o[C1].x = o[C1].x + spec.x; |
390 | o[C1].y = o[C1].y + spec.y; |
391 | o[C1].z = o[C1].z + spec.z; |
392 | } |
393 | else |
394 | { |
395 | o[C0].x = o[C0].x + spec.x; |
396 | o[C0].y = o[C0].y + spec.y; |
397 | o[C0].z = o[C0].z + spec.z; |
398 | } |
399 | } |
400 | } |
401 | |
402 | if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL) |
403 | { |
404 | Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack |
405 | |
406 | ambient.x = ambient.x * materialAmbient.x; |
407 | ambient.y = ambient.y * materialAmbient.y; |
408 | ambient.z = ambient.z * materialAmbient.z; |
409 | } |
410 | else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1) |
411 | { |
412 | Vector4f materialDiffuse = v[Color0]; |
413 | |
414 | ambient.x = ambient.x * materialDiffuse.x; |
415 | ambient.y = ambient.y * materialDiffuse.y; |
416 | ambient.z = ambient.z * materialDiffuse.z; |
417 | } |
418 | else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2) |
419 | { |
420 | Vector4f materialSpecular = v[Color1]; |
421 | |
422 | ambient.x = ambient.x * materialSpecular.x; |
423 | ambient.y = ambient.y * materialSpecular.y; |
424 | ambient.z = ambient.z * materialSpecular.z; |
425 | } |
426 | else ASSERT(false); |
427 | |
428 | o[C0].x = o[C0].x + ambient.x; |
429 | o[C0].y = o[C0].y + ambient.y; |
430 | o[C0].z = o[C0].z + ambient.z; |
431 | |
432 | // Emissive |
433 | if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL) |
434 | { |
435 | Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack |
436 | |
437 | o[C0].x = o[C0].x + materialEmission.x; |
438 | o[C0].y = o[C0].y + materialEmission.y; |
439 | o[C0].z = o[C0].z + materialEmission.z; |
440 | } |
441 | else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1) |
442 | { |
443 | Vector4f materialSpecular = v[Color0]; |
444 | |
445 | o[C0].x = o[C0].x + materialSpecular.x; |
446 | o[C0].y = o[C0].y + materialSpecular.y; |
447 | o[C0].z = o[C0].z + materialSpecular.z; |
448 | } |
449 | else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2) |
450 | { |
451 | Vector4f materialSpecular = v[Color1]; |
452 | |
453 | o[C0].x = o[C0].x + materialSpecular.x; |
454 | o[C0].y = o[C0].y + materialSpecular.y; |
455 | o[C0].z = o[C0].z + materialSpecular.z; |
456 | } |
457 | else ASSERT(false); |
458 | |
459 | // Diffuse alpha component |
460 | if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) |
461 | { |
462 | o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack |
463 | } |
464 | else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) |
465 | { |
466 | Vector4f alpha = v[Color0]; |
467 | o[C0].w = alpha.w; |
468 | } |
469 | else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) |
470 | { |
471 | Vector4f alpha = v[Color1]; |
472 | o[C0].w = alpha.w; |
473 | } |
474 | else ASSERT(false); |
475 | |
476 | if(state.vertexSpecularActive) |
477 | { |
478 | // Specular alpha component |
479 | if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) |
480 | { |
481 | o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack |
482 | } |
483 | else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) |
484 | { |
485 | Vector4f alpha = v[Color0]; |
486 | o[C1].w = alpha.w; |
487 | } |
488 | else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) |
489 | { |
490 | Vector4f alpha = v[Color1]; |
491 | o[C1].w = alpha.w; |
492 | } |
493 | else ASSERT(false); |
494 | } |
495 | } |
496 | |
497 | if(state.fogActive) |
498 | { |
499 | Float4 f; |
500 | |
501 | if(!state.rangeFogActive) |
502 | { |
503 | f = Abs(vertexPosition.z); |
504 | } |
505 | else |
506 | { |
507 | f = Sqrt(dot3(vertexPosition, vertexPosition)); // FIXME: f = length(vertexPosition); |
508 | } |
509 | |
510 | switch(state.vertexFogMode) |
511 | { |
512 | case FOG_NONE: |
513 | if(state.specularActive) |
514 | { |
515 | o[Fog].x = o[C1].w; |
516 | } |
517 | else |
518 | { |
519 | o[Fog].x = Float4(0.0f); |
520 | } |
521 | break; |
522 | case FOG_LINEAR: |
523 | o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); |
524 | break; |
525 | case FOG_EXP: |
526 | o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true); |
527 | break; |
528 | case FOG_EXP2: |
529 | o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true); |
530 | break; |
531 | default: |
532 | ASSERT(false); |
533 | } |
534 | } |
535 | |
536 | for(int stage = 0; stage < 8; stage++) |
537 | { |
538 | processTextureCoordinate(stage, normal, position); |
539 | } |
540 | |
541 | processPointSize(); |
542 | } |
543 | |
544 | void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position) |
545 | { |
546 | if(state.output[T0 + stage].write) |
547 | { |
548 | int i = state.textureState[stage].texCoordIndexActive; |
549 | |
550 | switch(state.textureState[stage].texGenActive) |
551 | { |
552 | case TEXGEN_NONE: |
553 | { |
554 | Vector4f &&varying = v[TexCoord0 + i]; |
555 | |
556 | o[T0 + stage].x = varying.x; |
557 | o[T0 + stage].y = varying.y; |
558 | o[T0 + stage].z = varying.z; |
559 | o[T0 + stage].w = varying.w; |
560 | } |
561 | break; |
562 | case TEXGEN_PASSTHRU: |
563 | { |
564 | Vector4f &&varying = v[TexCoord0 + i]; |
565 | |
566 | o[T0 + stage].x = varying.x; |
567 | o[T0 + stage].y = varying.y; |
568 | o[T0 + stage].z = varying.z; |
569 | o[T0 + stage].w = varying.w; |
570 | |
571 | if(state.input[TexCoord0 + i]) |
572 | { |
573 | switch(state.input[TexCoord0 + i].count) |
574 | { |
575 | case 1: |
576 | o[T0 + stage].y = Float4(1.0f); |
577 | o[T0 + stage].z = Float4(0.0f); |
578 | o[T0 + stage].w = Float4(0.0f); |
579 | break; |
580 | case 2: |
581 | o[T0 + stage].z = Float4(1.0f); |
582 | o[T0 + stage].w = Float4(0.0f); |
583 | break; |
584 | case 3: |
585 | o[T0 + stage].w = Float4(1.0f); |
586 | break; |
587 | case 4: |
588 | break; |
589 | default: |
590 | ASSERT(false); |
591 | } |
592 | } |
593 | } |
594 | break; |
595 | case TEXGEN_NORMAL: |
596 | { |
597 | Vector4f Nc; // Normal vector in camera space |
598 | |
599 | if(state.vertexNormalActive) |
600 | { |
601 | Nc = normal; |
602 | } |
603 | else |
604 | { |
605 | Nc.x = Float4(0.0f); |
606 | Nc.y = Float4(0.0f); |
607 | Nc.z = Float4(0.0f); |
608 | } |
609 | |
610 | Nc.w = Float4(1.0f); |
611 | |
612 | o[T0 + stage].x = Nc.x; |
613 | o[T0 + stage].y = Nc.y; |
614 | o[T0 + stage].z = Nc.z; |
615 | o[T0 + stage].w = Nc.w; |
616 | } |
617 | break; |
618 | case TEXGEN_POSITION: |
619 | { |
620 | Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space |
621 | |
622 | Pn.w = Float4(1.0f); |
623 | |
624 | o[T0 + stage].x = Pn.x; |
625 | o[T0 + stage].y = Pn.y; |
626 | o[T0 + stage].z = Pn.z; |
627 | o[T0 + stage].w = Pn.w; |
628 | } |
629 | break; |
630 | case TEXGEN_REFLECTION: |
631 | { |
632 | Vector4f R; // Reflection vector |
633 | |
634 | if(state.vertexNormalActive) |
635 | { |
636 | Vector4f Nc; // Normal vector in camera space |
637 | |
638 | Nc = normal; |
639 | |
640 | if(state.localViewerActive) |
641 | { |
642 | Vector4f Ec; // Eye vector in camera space |
643 | Vector4f N2; |
644 | |
645 | Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); |
646 | Ec = normalize(Ec); |
647 | |
648 | // R = E - 2 * N * (E . N) |
649 | Float4 dot = Float4(2.0f) * dot3(Ec, Nc); |
650 | |
651 | R.x = Ec.x - Nc.x * dot; |
652 | R.y = Ec.y - Nc.y * dot; |
653 | R.z = Ec.z - Nc.z * dot; |
654 | } |
655 | else |
656 | { |
657 | // u = -2 * Nz * Nx |
658 | // v = -2 * Nz * Ny |
659 | // w = 1 - 2 * Nz * Nz |
660 | |
661 | R.x = -Float4(2.0f) * Nc.z * Nc.x; |
662 | R.y = -Float4(2.0f) * Nc.z * Nc.y; |
663 | R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; |
664 | } |
665 | } |
666 | else |
667 | { |
668 | R.x = Float4(0.0f); |
669 | R.y = Float4(0.0f); |
670 | R.z = Float4(0.0f); |
671 | } |
672 | |
673 | R.w = Float4(1.0f); |
674 | |
675 | o[T0 + stage].x = R.x; |
676 | o[T0 + stage].y = R.y; |
677 | o[T0 + stage].z = R.z; |
678 | o[T0 + stage].w = R.w; |
679 | } |
680 | break; |
681 | case TEXGEN_SPHEREMAP: |
682 | { |
683 | Vector4f R; // Reflection vector |
684 | |
685 | if(state.vertexNormalActive) |
686 | { |
687 | Vector4f Nc; // Normal vector in camera space |
688 | |
689 | Nc = normal; |
690 | |
691 | if(state.localViewerActive) |
692 | { |
693 | Vector4f Ec; // Eye vector in camera space |
694 | Vector4f N2; |
695 | |
696 | Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); |
697 | Ec = normalize(Ec); |
698 | |
699 | // R = E - 2 * N * (E . N) |
700 | Float4 dot = Float4(2.0f) * dot3(Ec, Nc); |
701 | |
702 | R.x = Ec.x - Nc.x * dot; |
703 | R.y = Ec.y - Nc.y * dot; |
704 | R.z = Ec.z - Nc.z * dot; |
705 | } |
706 | else |
707 | { |
708 | // u = -2 * Nz * Nx |
709 | // v = -2 * Nz * Ny |
710 | // w = 1 - 2 * Nz * Nz |
711 | |
712 | R.x = -Float4(2.0f) * Nc.z * Nc.x; |
713 | R.y = -Float4(2.0f) * Nc.z * Nc.y; |
714 | R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; |
715 | } |
716 | } |
717 | else |
718 | { |
719 | R.x = Float4(0.0f); |
720 | R.y = Float4(0.0f); |
721 | R.z = Float4(0.0f); |
722 | } |
723 | |
724 | R.z -= Float4(1.0f); |
725 | R = normalize(R); |
726 | R.x = Float4(0.5f) * R.x + Float4(0.5f); |
727 | R.y = Float4(0.5f) * R.y + Float4(0.5f); |
728 | |
729 | R.z = Float4(1.0f); |
730 | R.w = Float4(0.0f); |
731 | |
732 | o[T0 + stage].x = R.x; |
733 | o[T0 + stage].y = R.y; |
734 | o[T0 + stage].z = R.z; |
735 | o[T0 + stage].w = R.w; |
736 | } |
737 | break; |
738 | default: |
739 | ASSERT(false); |
740 | } |
741 | |
742 | Vector4f ; |
743 | Vector4f ; |
744 | Vector4f ; |
745 | Vector4f ; |
746 | |
747 | Vector4f T; |
748 | Vector4f t; |
749 | |
750 | T.x = o[T0 + stage].x; |
751 | T.y = o[T0 + stage].y; |
752 | T.z = o[T0 + stage].z; |
753 | T.w = o[T0 + stage].w; |
754 | |
755 | switch(state.textureState[stage].textureTransformCountActive) |
756 | { |
757 | case 4: |
758 | texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack |
759 | texTrans3.x = texTrans3.x.xxxx; |
760 | texTrans3.y = texTrans3.y.yyyy; |
761 | texTrans3.z = texTrans3.z.zzzz; |
762 | texTrans3.w = texTrans3.w.wwww; |
763 | t.w = dot4(T, texTrans3); |
764 | case 3: |
765 | texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack |
766 | texTrans2.x = texTrans2.x.xxxx; |
767 | texTrans2.y = texTrans2.y.yyyy; |
768 | texTrans2.z = texTrans2.z.zzzz; |
769 | texTrans2.w = texTrans2.w.wwww; |
770 | t.z = dot4(T, texTrans2); |
771 | case 2: |
772 | texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack |
773 | texTrans1.x = texTrans1.x.xxxx; |
774 | texTrans1.y = texTrans1.y.yyyy; |
775 | texTrans1.z = texTrans1.z.zzzz; |
776 | texTrans1.w = texTrans1.w.wwww; |
777 | t.y = dot4(T, texTrans1); |
778 | case 1: |
779 | texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack |
780 | texTrans0.x = texTrans0.x.xxxx; |
781 | texTrans0.y = texTrans0.y.yyyy; |
782 | texTrans0.z = texTrans0.z.zzzz; |
783 | texTrans0.w = texTrans0.w.wwww; |
784 | t.x = dot4(T, texTrans0); |
785 | |
786 | o[T0 + stage].x = t.x; |
787 | o[T0 + stage].y = t.y; |
788 | o[T0 + stage].z = t.z; |
789 | o[T0 + stage].w = t.w; |
790 | case 0: |
791 | break; |
792 | default: |
793 | ASSERT(false); |
794 | } |
795 | } |
796 | } |
797 | |
798 | void VertexPipeline::processPointSize() |
799 | { |
800 | if(!state.pointSizeActive) |
801 | { |
802 | return; // Use global pointsize |
803 | } |
804 | |
805 | if(state.input[PointSize]) |
806 | { |
807 | o[Pts].y = v[PointSize].x; |
808 | } |
809 | else |
810 | { |
811 | o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize)); |
812 | } |
813 | |
814 | if(state.pointScaleActive && !state.preTransformed) |
815 | { |
816 | Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); |
817 | |
818 | Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p); |
819 | |
820 | Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack |
821 | Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack |
822 | Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack |
823 | |
824 | A = RcpSqrt_pp(A + d * (B + d * C)); |
825 | |
826 | o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack |
827 | } |
828 | } |
829 | |
830 | Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) |
831 | { |
832 | Vector4f dst; |
833 | |
834 | if(homogeneous) |
835 | { |
836 | Float4 m[4][4]; |
837 | |
838 | for(int j = 0; j < 4; j++) |
839 | { |
840 | for(int i = 0; i < 4; i++) |
841 | { |
842 | m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); |
843 | m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); |
844 | m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); |
845 | m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); |
846 | } |
847 | } |
848 | |
849 | dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3]; |
850 | dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3]; |
851 | dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3]; |
852 | dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3]; |
853 | } |
854 | else |
855 | { |
856 | Float4 m[3][3]; |
857 | |
858 | for(int j = 0; j < 3; j++) |
859 | { |
860 | for(int i = 0; i < 3; i++) |
861 | { |
862 | m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); |
863 | m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); |
864 | m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); |
865 | m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); |
866 | } |
867 | } |
868 | |
869 | dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; |
870 | dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; |
871 | dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; |
872 | } |
873 | |
874 | return dst; |
875 | } |
876 | |
877 | Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous) |
878 | { |
879 | Vector4f dst; |
880 | |
881 | if(homogeneous) |
882 | { |
883 | Float4 m[4][4]; |
884 | |
885 | for(int j = 0; j < 4; j++) |
886 | { |
887 | for(int i = 0; i < 4; i++) |
888 | { |
889 | m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); |
890 | m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); |
891 | m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); |
892 | m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); |
893 | } |
894 | } |
895 | |
896 | dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3]; |
897 | dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3]; |
898 | dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3]; |
899 | dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3]; |
900 | } |
901 | else |
902 | { |
903 | Float4 m[3][3]; |
904 | |
905 | for(int j = 0; j < 3; j++) |
906 | { |
907 | for(int i = 0; i < 3; i++) |
908 | { |
909 | m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); |
910 | m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); |
911 | m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); |
912 | m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); |
913 | } |
914 | } |
915 | |
916 | dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; |
917 | dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; |
918 | dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; |
919 | } |
920 | |
921 | return dst; |
922 | } |
923 | |
924 | Vector4f VertexPipeline::normalize(Vector4f &src) |
925 | { |
926 | Vector4f dst; |
927 | |
928 | Float4 rcpLength = RcpSqrt_pp(dot3(src, src)); |
929 | |
930 | dst.x = src.x * rcpLength; |
931 | dst.y = src.y * rcpLength; |
932 | dst.z = src.z * rcpLength; |
933 | |
934 | return dst; |
935 | } |
936 | |
937 | Float4 VertexPipeline::power(Float4 &src0, Float4 &src1) |
938 | { |
939 | Float4 dst = src0; |
940 | |
941 | dst = dst * dst; |
942 | dst = dst * dst; |
943 | dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f))); |
944 | |
945 | dst *= src1; |
946 | |
947 | dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f))); |
948 | dst = RcpSqrt_pp(dst); |
949 | dst = RcpSqrt_pp(dst); |
950 | |
951 | return dst; |
952 | } |
953 | } |
954 | |