1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelPipeline.hpp"
16#include "SamplerCore.hpp"
17#include "Renderer/Renderer.hpp"
18
19namespace sw
20{
21 extern bool postBlendSRGB;
22
23 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
24 {
25 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
26 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
27 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
28 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
29
30 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000);
31 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000);
32 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000);
33 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000);
34 }
35
36 void PixelPipeline::fixedFunction()
37 {
38 current = diffuse;
39 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
40
41 for(int stage = 0; stage < 8; stage++)
42 {
43 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
44 {
45 break;
46 }
47
48 Vector4s texture;
49
50 if(state.textureStage[stage].usesTexture)
51 {
52 texture = sampleTexture(stage, stage);
53 }
54
55 blendTexture(temp, texture, stage);
56 }
57
58 specularPixel(current, specular);
59 }
60
61 void PixelPipeline::applyShader(Int cMask[4])
62 {
63 if(!shader)
64 {
65 fixedFunction();
66 return;
67 }
68
69 int pad = 0; // Count number of texm3x3pad instructions
70 Vector4s dPairing; // Destination for first pairing instruction
71
72 for(size_t i = 0; i < shader->getLength(); i++)
73 {
74 const Shader::Instruction *instruction = shader->getInstruction(i);
75 Shader::Opcode opcode = instruction->opcode;
76
77 // #ifndef NDEBUG // FIXME: Centralize debug output control
78 // shader->printInstruction(i, "debug.txt");
79 // #endif
80
81 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
82 {
83 continue;
84 }
85
86 const Dst &dst = instruction->dst;
87 const Src &src0 = instruction->src[0];
88 const Src &src1 = instruction->src[1];
89 const Src &src2 = instruction->src[2];
90
91 unsigned short shaderModel = shader->getShaderModel();
92 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
93 bool coissue = instruction->coissue; // Second instruction of pair
94
95 Vector4s d;
96 Vector4s s0;
97 Vector4s s1;
98 Vector4s s2;
99
100 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
101 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
102 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
103
104 Float4 x = shaderModel < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
105 Float4 y = shaderModel < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
106 Float4 z = shaderModel < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
107 Float4 w = shaderModel < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
108
109 switch(opcode)
110 {
111 case Shader::OPCODE_PS_1_0: break;
112 case Shader::OPCODE_PS_1_1: break;
113 case Shader::OPCODE_PS_1_2: break;
114 case Shader::OPCODE_PS_1_3: break;
115 case Shader::OPCODE_PS_1_4: break;
116
117 case Shader::OPCODE_DEF: break;
118
119 case Shader::OPCODE_NOP: break;
120 case Shader::OPCODE_MOV: MOV(d, s0); break;
121 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
122 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
123 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
124 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
125 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
126 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
127 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
128 case Shader::OPCODE_TEXCOORD:
129 if(shaderModel < 0x0104)
130 {
131 TEXCOORD(d, x, y, z, dst.index);
132 }
133 else
134 {
135 if((src0.swizzle & 0x30) == 0x20) // .xyz
136 {
137 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
138 }
139 else // .xwy
140 {
141 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
142 }
143 }
144 break;
145 case Shader::OPCODE_TEXKILL:
146 if(shaderModel < 0x0104)
147 {
148 TEXKILL(cMask, x, y, z);
149 }
150 else if(shaderModel == 0x0104)
151 {
152 if(dst.type == Shader::PARAMETER_TEXTURE)
153 {
154 TEXKILL(cMask, x, y, z);
155 }
156 else
157 {
158 TEXKILL(cMask, rs[dst.index]);
159 }
160 }
161 else ASSERT(false);
162 break;
163 case Shader::OPCODE_TEX:
164 if(shaderModel < 0x0104)
165 {
166 TEX(d, x, y, z, dst.index, false);
167 }
168 else if(shaderModel == 0x0104)
169 {
170 if(src0.type == Shader::PARAMETER_TEXTURE)
171 {
172 if((src0.swizzle & 0x30) == 0x20) // .xyz
173 {
174 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
175 }
176 else // .xyw
177 {
178 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
179 }
180 }
181 else
182 {
183 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
184 }
185 }
186 else ASSERT(false);
187 break;
188 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break;
189 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break;
190 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break;
191 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break;
192 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
193 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
194 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
195 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
196 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break;
197 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break;
198 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
199 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break;
200 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break;
201 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
202 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break;
203 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
204 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break;
205 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
206 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break;
207 case Shader::OPCODE_PHASE: break;
208 case Shader::OPCODE_END: break;
209 default:
210 ASSERT(false);
211 }
212
213 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
214 {
215 if(dst.shift > 0)
216 {
217 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
218 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
219 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
220 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
221 }
222 else if(dst.shift < 0)
223 {
224 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
225 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
226 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
227 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
228 }
229
230 if(dst.saturate)
231 {
232 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000)); }
233 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000)); }
234 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000)); }
235 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000)); }
236 }
237
238 if(pairing)
239 {
240 if(dst.mask & 0x1) dPairing.x = d.x;
241 if(dst.mask & 0x2) dPairing.y = d.y;
242 if(dst.mask & 0x4) dPairing.z = d.z;
243 if(dst.mask & 0x8) dPairing.w = d.w;
244 }
245
246 if(coissue)
247 {
248 const Dst &dst = shader->getInstruction(i - 1)->dst;
249
250 writeDestination(dPairing, dst);
251 }
252
253 if(!pairing)
254 {
255 writeDestination(d, dst);
256 }
257 }
258 }
259
260 current.x = Min(current.x, Short4(0x0FFF)); current.x = Max(current.x, Short4(0x0000));
261 current.y = Min(current.y, Short4(0x0FFF)); current.y = Max(current.y, Short4(0x0000));
262 current.z = Min(current.z, Short4(0x0FFF)); current.z = Max(current.z, Short4(0x0000));
263 current.w = Min(current.w, Short4(0x0FFF)); current.w = Max(current.w, Short4(0x0000));
264 }
265
266 Bool PixelPipeline::alphaTest(Int cMask[4])
267 {
268 if(!state.alphaTestActive())
269 {
270 return true;
271 }
272
273 Int aMask;
274
275 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
276 {
277 PixelRoutine::alphaTest(aMask, current.w);
278
279 for(unsigned int q = 0; q < state.multiSample; q++)
280 {
281 cMask[q] &= aMask;
282 }
283 }
284 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
285 {
286 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
287
288 alphaToCoverage(cMask, alpha);
289 }
290 else ASSERT(false);
291
292 Int pass = cMask[0];
293
294 for(unsigned int q = 1; q < state.multiSample; q++)
295 {
296 pass = pass | cMask[q];
297 }
298
299 return pass != 0x0;
300 }
301
302 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
303 {
304 if(!state.colorWriteActive(0))
305 {
306 return;
307 }
308
309 Vector4f oC;
310
311 switch(state.targetFormat[0])
312 {
313 case FORMAT_R5G6B5:
314 case FORMAT_X8R8G8B8:
315 case FORMAT_X8B8G8R8:
316 case FORMAT_A8R8G8B8:
317 case FORMAT_A8B8G8R8:
318 case FORMAT_A8:
319 case FORMAT_G16R16:
320 case FORMAT_A16B16G16R16:
321 if(!postBlendSRGB && state.writeSRGB)
322 {
323 linearToSRGB12_16(current);
324 }
325 else
326 {
327 current.x <<= 4;
328 current.y <<= 4;
329 current.z <<= 4;
330 current.w <<= 4;
331 }
332
333 if(state.targetFormat[0] == FORMAT_R5G6B5)
334 {
335 current.x &= Short4(0xF800u);
336 current.y &= Short4(0xFC00u);
337 current.z &= Short4(0xF800u);
338 }
339
340 fogBlend(current, fog);
341
342 for(unsigned int q = 0; q < state.multiSample; q++)
343 {
344 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
345 Vector4s color = current;
346
347 if(state.multiSampleMask & (1 << q))
348 {
349 alphaBlend(0, buffer, color, x);
350 logicOperation(0, buffer, color, x);
351 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
352 }
353 }
354 break;
355 case FORMAT_R32F:
356 case FORMAT_G32R32F:
357 case FORMAT_X32B32G32R32F:
358 case FORMAT_A32B32G32R32F:
359 // case FORMAT_X32B32G32R32F_UNSIGNED: // Not renderable in any fixed-function API.
360 convertSigned12(oC, current);
361 PixelRoutine::fogBlend(oC, fog);
362
363 for(unsigned int q = 0; q < state.multiSample; q++)
364 {
365 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
366 Vector4f color = oC;
367
368 if(state.multiSampleMask & (1 << q))
369 {
370 alphaBlend(0, buffer, color, x);
371 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
372 }
373 }
374 break;
375 default:
376 ASSERT(false);
377 }
378 }
379
380 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
381 {
382 Vector4s *arg1 = nullptr;
383 Vector4s *arg2 = nullptr;
384 Vector4s *arg3 = nullptr;
385 Vector4s res;
386
387 Vector4s constant;
388 Vector4s tfactor;
389
390 const TextureStage::State &textureStage = state.textureStage[stage];
391
392 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
393 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
394 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
395 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
396 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
397 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
398 {
399 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
400 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
401 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
402 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
403 }
404
405 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
406 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
407 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
408 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
409 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
410 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
411 {
412 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
413 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
414 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
415 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
416 }
417
418 // Premodulate
419 if(stage > 0 && textureStage.usesTexture)
420 {
421 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
422 {
423 current.x = MulHigh(current.x, texture.x) << 4;
424 current.y = MulHigh(current.y, texture.y) << 4;
425 current.z = MulHigh(current.z, texture.z) << 4;
426 }
427
428 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
429 {
430 current.w = MulHigh(current.w, texture.w) << 4;
431 }
432 }
433
434 if(luminance)
435 {
436 texture.x = MulHigh(texture.x, L) << 4;
437 texture.y = MulHigh(texture.y, L) << 4;
438 texture.z = MulHigh(texture.z, L) << 4;
439
440 luminance = false;
441 }
442
443 switch(textureStage.firstArgument)
444 {
445 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
446 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
447 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
448 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
449 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
450 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
451 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
452 default:
453 ASSERT(false);
454 }
455
456 switch(textureStage.secondArgument)
457 {
458 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
459 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
460 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
461 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
462 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
463 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
464 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
465 default:
466 ASSERT(false);
467 }
468
469 switch(textureStage.thirdArgument)
470 {
471 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
472 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
473 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
474 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
475 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
476 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
477 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
478 default:
479 ASSERT(false);
480 }
481
482 Vector4s mod1;
483 Vector4s mod2;
484 Vector4s mod3;
485
486 switch(textureStage.firstModifier)
487 {
488 case TextureStage::MODIFIER_COLOR:
489 break;
490 case TextureStage::MODIFIER_INVCOLOR:
491 mod1.x = SubSat(Short4(0x1000), arg1->x);
492 mod1.y = SubSat(Short4(0x1000), arg1->y);
493 mod1.z = SubSat(Short4(0x1000), arg1->z);
494 mod1.w = SubSat(Short4(0x1000), arg1->w);
495
496 arg1 = &mod1;
497 break;
498 case TextureStage::MODIFIER_ALPHA:
499 mod1.x = arg1->w;
500 mod1.y = arg1->w;
501 mod1.z = arg1->w;
502 mod1.w = arg1->w;
503
504 arg1 = &mod1;
505 break;
506 case TextureStage::MODIFIER_INVALPHA:
507 mod1.x = SubSat(Short4(0x1000), arg1->w);
508 mod1.y = SubSat(Short4(0x1000), arg1->w);
509 mod1.z = SubSat(Short4(0x1000), arg1->w);
510 mod1.w = SubSat(Short4(0x1000), arg1->w);
511
512 arg1 = &mod1;
513 break;
514 default:
515 ASSERT(false);
516 }
517
518 switch(textureStage.secondModifier)
519 {
520 case TextureStage::MODIFIER_COLOR:
521 break;
522 case TextureStage::MODIFIER_INVCOLOR:
523 mod2.x = SubSat(Short4(0x1000), arg2->x);
524 mod2.y = SubSat(Short4(0x1000), arg2->y);
525 mod2.z = SubSat(Short4(0x1000), arg2->z);
526 mod2.w = SubSat(Short4(0x1000), arg2->w);
527
528 arg2 = &mod2;
529 break;
530 case TextureStage::MODIFIER_ALPHA:
531 mod2.x = arg2->w;
532 mod2.y = arg2->w;
533 mod2.z = arg2->w;
534 mod2.w = arg2->w;
535
536 arg2 = &mod2;
537 break;
538 case TextureStage::MODIFIER_INVALPHA:
539 mod2.x = SubSat(Short4(0x1000), arg2->w);
540 mod2.y = SubSat(Short4(0x1000), arg2->w);
541 mod2.z = SubSat(Short4(0x1000), arg2->w);
542 mod2.w = SubSat(Short4(0x1000), arg2->w);
543
544 arg2 = &mod2;
545 break;
546 default:
547 ASSERT(false);
548 }
549
550 switch(textureStage.thirdModifier)
551 {
552 case TextureStage::MODIFIER_COLOR:
553 break;
554 case TextureStage::MODIFIER_INVCOLOR:
555 mod3.x = SubSat(Short4(0x1000), arg3->x);
556 mod3.y = SubSat(Short4(0x1000), arg3->y);
557 mod3.z = SubSat(Short4(0x1000), arg3->z);
558 mod3.w = SubSat(Short4(0x1000), arg3->w);
559
560 arg3 = &mod3;
561 break;
562 case TextureStage::MODIFIER_ALPHA:
563 mod3.x = arg3->w;
564 mod3.y = arg3->w;
565 mod3.z = arg3->w;
566 mod3.w = arg3->w;
567
568 arg3 = &mod3;
569 break;
570 case TextureStage::MODIFIER_INVALPHA:
571 mod3.x = SubSat(Short4(0x1000), arg3->w);
572 mod3.y = SubSat(Short4(0x1000), arg3->w);
573 mod3.z = SubSat(Short4(0x1000), arg3->w);
574 mod3.w = SubSat(Short4(0x1000), arg3->w);
575
576 arg3 = &mod3;
577 break;
578 default:
579 ASSERT(false);
580 }
581
582 switch(textureStage.stageOperation)
583 {
584 case TextureStage::STAGE_DISABLE:
585 break;
586 case TextureStage::STAGE_SELECTARG1: // Arg1
587 res.x = arg1->x;
588 res.y = arg1->y;
589 res.z = arg1->z;
590 break;
591 case TextureStage::STAGE_SELECTARG2: // Arg2
592 res.x = arg2->x;
593 res.y = arg2->y;
594 res.z = arg2->z;
595 break;
596 case TextureStage::STAGE_SELECTARG3: // Arg3
597 res.x = arg3->x;
598 res.y = arg3->y;
599 res.z = arg3->z;
600 break;
601 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
602 res.x = MulHigh(arg1->x, arg2->x) << 4;
603 res.y = MulHigh(arg1->y, arg2->y) << 4;
604 res.z = MulHigh(arg1->z, arg2->z) << 4;
605 break;
606 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
607 res.x = MulHigh(arg1->x, arg2->x) << 5;
608 res.y = MulHigh(arg1->y, arg2->y) << 5;
609 res.z = MulHigh(arg1->z, arg2->z) << 5;
610 break;
611 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
612 res.x = MulHigh(arg1->x, arg2->x) << 6;
613 res.y = MulHigh(arg1->y, arg2->y) << 6;
614 res.z = MulHigh(arg1->z, arg2->z) << 6;
615 break;
616 case TextureStage::STAGE_ADD: // Arg1 + Arg2
617 res.x = AddSat(arg1->x, arg2->x);
618 res.y = AddSat(arg1->y, arg2->y);
619 res.z = AddSat(arg1->z, arg2->z);
620 break;
621 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
622 res.x = AddSat(arg1->x, arg2->x);
623 res.y = AddSat(arg1->y, arg2->y);
624 res.z = AddSat(arg1->z, arg2->z);
625
626 res.x = SubSat(res.x, Short4(0x0800));
627 res.y = SubSat(res.y, Short4(0x0800));
628 res.z = SubSat(res.z, Short4(0x0800));
629 break;
630 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
631 res.x = AddSat(arg1->x, arg2->x);
632 res.y = AddSat(arg1->y, arg2->y);
633 res.z = AddSat(arg1->z, arg2->z);
634
635 res.x = SubSat(res.x, Short4(0x0800));
636 res.y = SubSat(res.y, Short4(0x0800));
637 res.z = SubSat(res.z, Short4(0x0800));
638
639 res.x = AddSat(res.x, res.x);
640 res.y = AddSat(res.y, res.y);
641 res.z = AddSat(res.z, res.z);
642 break;
643 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
644 res.x = SubSat(arg1->x, arg2->x);
645 res.y = SubSat(arg1->y, arg2->y);
646 res.z = SubSat(arg1->z, arg2->z);
647 break;
648 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
649 {
650 Short4 tmp;
651
652 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
653 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
654 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
655 }
656 break;
657 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
658 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
659 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
660 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
661 break;
662 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
663 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
664 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
665 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
666 break;
667 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
668 {
669 Short4 tmp;
670
671 res.x = SubSat(arg1->x, Short4(0x0800)); tmp = SubSat(arg2->x, Short4(0x0800)); res.x = MulHigh(res.x, tmp);
672 res.y = SubSat(arg1->y, Short4(0x0800)); tmp = SubSat(arg2->y, Short4(0x0800)); res.y = MulHigh(res.y, tmp);
673 res.z = SubSat(arg1->z, Short4(0x0800)); tmp = SubSat(arg2->z, Short4(0x0800)); res.z = MulHigh(res.z, tmp);
674
675 res.x = res.x << 6;
676 res.y = res.y << 6;
677 res.z = res.z << 6;
678
679 res.x = AddSat(res.x, res.y);
680 res.x = AddSat(res.x, res.z);
681
682 // Clamp to [0, 1]
683 res.x = Max(res.x, Short4(0x0000));
684 res.x = Min(res.x, Short4(0x1000));
685
686 res.y = res.x;
687 res.z = res.x;
688 res.w = res.x;
689 }
690 break;
691 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
692 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
693 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
694 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
695 break;
696 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
697 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
698 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
699 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
700 break;
701 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
702 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
703 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
704 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
705 break;
706 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
707 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
708 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
709 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
710 break;
711 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
712 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
713 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
714 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
715 break;
716 case TextureStage::STAGE_PREMODULATE:
717 res.x = arg1->x;
718 res.y = arg1->y;
719 res.z = arg1->z;
720 break;
721 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
722 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
723 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
724 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
725 break;
726 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
727 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
728 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
729 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
730 break;
731 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
732 {
733 Short4 tmp;
734
735 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
736 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
737 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
738 }
739 break;
740 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
741 {
742 Short4 tmp;
743
744 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
745 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
746 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
747 }
748 break;
749 case TextureStage::STAGE_BUMPENVMAP:
750 {
751 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
752 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
753
754 Float4 du2;
755 Float4 dv2;
756
757 du2 = du;
758 dv2 = dv;
759 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
760 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
761 du += dv2;
762 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
763 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
764 dv += du2;
765
766 perturbate = true;
767
768 res.x = current.x;
769 res.y = current.y;
770 res.z = current.z;
771 res.w = current.w;
772 }
773 break;
774 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
775 {
776 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
777 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
778
779 Float4 du2;
780 Float4 dv2;
781
782 du2 = du;
783 dv2 = dv;
784
785 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
786 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
787 du += dv2;
788 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
789 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
790 dv += du2;
791
792 perturbate = true;
793
794 L = texture.z;
795 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
796 L = L << 4;
797 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
798 L = Max(L, Short4(0x0000));
799 L = Min(L, Short4(0x1000));
800
801 luminance = true;
802
803 res.x = current.x;
804 res.y = current.y;
805 res.z = current.z;
806 res.w = current.w;
807 }
808 break;
809 default:
810 ASSERT(false);
811 }
812
813 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
814 {
815 switch(textureStage.firstArgumentAlpha)
816 {
817 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
818 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
819 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
820 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
821 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
822 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
823 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
824 default:
825 ASSERT(false);
826 }
827
828 switch(textureStage.secondArgumentAlpha)
829 {
830 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
831 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
832 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
833 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
834 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
835 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
836 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
837 default:
838 ASSERT(false);
839 }
840
841 switch(textureStage.thirdArgumentAlpha)
842 {
843 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
844 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
845 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
846 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
847 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
848 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
849 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
850 default:
851 ASSERT(false);
852 }
853
854 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
855 {
856 case TextureStage::MODIFIER_COLOR:
857 break;
858 case TextureStage::MODIFIER_INVCOLOR:
859 mod1.w = SubSat(Short4(0x1000), arg1->w);
860
861 arg1 = &mod1;
862 break;
863 case TextureStage::MODIFIER_ALPHA:
864 // Redudant
865 break;
866 case TextureStage::MODIFIER_INVALPHA:
867 mod1.w = SubSat(Short4(0x1000), arg1->w);
868
869 arg1 = &mod1;
870 break;
871 default:
872 ASSERT(false);
873 }
874
875 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
876 {
877 case TextureStage::MODIFIER_COLOR:
878 break;
879 case TextureStage::MODIFIER_INVCOLOR:
880 mod2.w = SubSat(Short4(0x1000), arg2->w);
881
882 arg2 = &mod2;
883 break;
884 case TextureStage::MODIFIER_ALPHA:
885 // Redudant
886 break;
887 case TextureStage::MODIFIER_INVALPHA:
888 mod2.w = SubSat(Short4(0x1000), arg2->w);
889
890 arg2 = &mod2;
891 break;
892 default:
893 ASSERT(false);
894 }
895
896 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
897 {
898 case TextureStage::MODIFIER_COLOR:
899 break;
900 case TextureStage::MODIFIER_INVCOLOR:
901 mod3.w = SubSat(Short4(0x1000), arg3->w);
902
903 arg3 = &mod3;
904 break;
905 case TextureStage::MODIFIER_ALPHA:
906 // Redudant
907 break;
908 case TextureStage::MODIFIER_INVALPHA:
909 mod3.w = SubSat(Short4(0x1000), arg3->w);
910
911 arg3 = &mod3;
912 break;
913 default:
914 ASSERT(false);
915 }
916
917 switch(textureStage.stageOperationAlpha)
918 {
919 case TextureStage::STAGE_DISABLE:
920 break;
921 case TextureStage::STAGE_SELECTARG1: // Arg1
922 res.w = arg1->w;
923 break;
924 case TextureStage::STAGE_SELECTARG2: // Arg2
925 res.w = arg2->w;
926 break;
927 case TextureStage::STAGE_SELECTARG3: // Arg3
928 res.w = arg3->w;
929 break;
930 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
931 res.w = MulHigh(arg1->w, arg2->w) << 4;
932 break;
933 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
934 res.w = MulHigh(arg1->w, arg2->w) << 5;
935 break;
936 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
937 res.w = MulHigh(arg1->w, arg2->w) << 6;
938 break;
939 case TextureStage::STAGE_ADD: // Arg1 + Arg2
940 res.w = AddSat(arg1->w, arg2->w);
941 break;
942 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
943 res.w = AddSat(arg1->w, arg2->w);
944 res.w = SubSat(res.w, Short4(0x0800));
945 break;
946 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
947 res.w = AddSat(arg1->w, arg2->w);
948 res.w = SubSat(res.w, Short4(0x0800));
949 res.w = AddSat(res.w, res.w);
950 break;
951 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
952 res.w = SubSat(arg1->w, arg2->w);
953 break;
954 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
955 {
956 Short4 tmp;
957
958 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
959 }
960 break;
961 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
962 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
963 break;
964 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
965 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
966 break;
967 case TextureStage::STAGE_DOT3:
968 break; // Already computed in color channel
969 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
970 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
971 break;
972 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
973 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
974 break;
975 case TextureStage::STAGE_BLENDFACTORALPHA:
976 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
977 break;
978 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
979 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
980 break;
981 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
982 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
983 break;
984 case TextureStage::STAGE_PREMODULATE:
985 res.w = arg1->w;
986 break;
987 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
988 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
989 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
990 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
991 case TextureStage::STAGE_BUMPENVMAP:
992 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
993 break; // Invalid alpha operations
994 default:
995 ASSERT(false);
996 }
997 }
998
999 // Clamp result to [0, 1]
1000
1001 switch(textureStage.stageOperation)
1002 {
1003 case TextureStage::STAGE_DISABLE:
1004 case TextureStage::STAGE_SELECTARG1:
1005 case TextureStage::STAGE_SELECTARG2:
1006 case TextureStage::STAGE_SELECTARG3:
1007 case TextureStage::STAGE_MODULATE:
1008 case TextureStage::STAGE_MODULATE2X:
1009 case TextureStage::STAGE_MODULATE4X:
1010 case TextureStage::STAGE_ADD:
1011 case TextureStage::STAGE_MULTIPLYADD:
1012 case TextureStage::STAGE_LERP:
1013 case TextureStage::STAGE_BLENDCURRENTALPHA:
1014 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1015 case TextureStage::STAGE_BLENDFACTORALPHA:
1016 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1017 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1018 case TextureStage::STAGE_DOT3: // Already clamped
1019 case TextureStage::STAGE_PREMODULATE:
1020 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1021 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1022 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1023 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1024 case TextureStage::STAGE_BUMPENVMAP:
1025 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1026 if(state.textureStage[stage].cantUnderflow)
1027 {
1028 break; // Can't go below zero
1029 }
1030 case TextureStage::STAGE_ADDSIGNED:
1031 case TextureStage::STAGE_ADDSIGNED2X:
1032 case TextureStage::STAGE_SUBTRACT:
1033 case TextureStage::STAGE_ADDSMOOTH:
1034 res.x = Max(res.x, Short4(0x0000));
1035 res.y = Max(res.y, Short4(0x0000));
1036 res.z = Max(res.z, Short4(0x0000));
1037 break;
1038 default:
1039 ASSERT(false);
1040 }
1041
1042 switch(textureStage.stageOperationAlpha)
1043 {
1044 case TextureStage::STAGE_DISABLE:
1045 case TextureStage::STAGE_SELECTARG1:
1046 case TextureStage::STAGE_SELECTARG2:
1047 case TextureStage::STAGE_SELECTARG3:
1048 case TextureStage::STAGE_MODULATE:
1049 case TextureStage::STAGE_MODULATE2X:
1050 case TextureStage::STAGE_MODULATE4X:
1051 case TextureStage::STAGE_ADD:
1052 case TextureStage::STAGE_MULTIPLYADD:
1053 case TextureStage::STAGE_LERP:
1054 case TextureStage::STAGE_BLENDCURRENTALPHA:
1055 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1056 case TextureStage::STAGE_BLENDFACTORALPHA:
1057 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1058 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1059 case TextureStage::STAGE_DOT3: // Already clamped
1060 case TextureStage::STAGE_PREMODULATE:
1061 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1062 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1063 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1064 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1065 case TextureStage::STAGE_BUMPENVMAP:
1066 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1067 if(state.textureStage[stage].cantUnderflow)
1068 {
1069 break; // Can't go below zero
1070 }
1071 case TextureStage::STAGE_ADDSIGNED:
1072 case TextureStage::STAGE_ADDSIGNED2X:
1073 case TextureStage::STAGE_SUBTRACT:
1074 case TextureStage::STAGE_ADDSMOOTH:
1075 res.w = Max(res.w, Short4(0x0000));
1076 break;
1077 default:
1078 ASSERT(false);
1079 }
1080
1081 switch(textureStage.stageOperation)
1082 {
1083 case TextureStage::STAGE_DISABLE:
1084 case TextureStage::STAGE_SELECTARG1:
1085 case TextureStage::STAGE_SELECTARG2:
1086 case TextureStage::STAGE_SELECTARG3:
1087 case TextureStage::STAGE_MODULATE:
1088 case TextureStage::STAGE_SUBTRACT:
1089 case TextureStage::STAGE_ADDSMOOTH:
1090 case TextureStage::STAGE_LERP:
1091 case TextureStage::STAGE_BLENDCURRENTALPHA:
1092 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1093 case TextureStage::STAGE_BLENDFACTORALPHA:
1094 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1095 case TextureStage::STAGE_DOT3: // Already clamped
1096 case TextureStage::STAGE_PREMODULATE:
1097 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1098 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1099 case TextureStage::STAGE_BUMPENVMAP:
1100 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1101 break; // Can't go above one
1102 case TextureStage::STAGE_MODULATE2X:
1103 case TextureStage::STAGE_MODULATE4X:
1104 case TextureStage::STAGE_ADD:
1105 case TextureStage::STAGE_ADDSIGNED:
1106 case TextureStage::STAGE_ADDSIGNED2X:
1107 case TextureStage::STAGE_MULTIPLYADD:
1108 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1109 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1110 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1111 res.x = Min(res.x, Short4(0x1000));
1112 res.y = Min(res.y, Short4(0x1000));
1113 res.z = Min(res.z, Short4(0x1000));
1114 break;
1115 default:
1116 ASSERT(false);
1117 }
1118
1119 switch(textureStage.stageOperationAlpha)
1120 {
1121 case TextureStage::STAGE_DISABLE:
1122 case TextureStage::STAGE_SELECTARG1:
1123 case TextureStage::STAGE_SELECTARG2:
1124 case TextureStage::STAGE_SELECTARG3:
1125 case TextureStage::STAGE_MODULATE:
1126 case TextureStage::STAGE_SUBTRACT:
1127 case TextureStage::STAGE_ADDSMOOTH:
1128 case TextureStage::STAGE_LERP:
1129 case TextureStage::STAGE_BLENDCURRENTALPHA:
1130 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1131 case TextureStage::STAGE_BLENDFACTORALPHA:
1132 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1133 case TextureStage::STAGE_DOT3: // Already clamped
1134 case TextureStage::STAGE_PREMODULATE:
1135 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1136 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1137 case TextureStage::STAGE_BUMPENVMAP:
1138 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1139 break; // Can't go above one
1140 case TextureStage::STAGE_MODULATE2X:
1141 case TextureStage::STAGE_MODULATE4X:
1142 case TextureStage::STAGE_ADD:
1143 case TextureStage::STAGE_ADDSIGNED:
1144 case TextureStage::STAGE_ADDSIGNED2X:
1145 case TextureStage::STAGE_MULTIPLYADD:
1146 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1147 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1148 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1149 res.w = Min(res.w, Short4(0x1000));
1150 break;
1151 default:
1152 ASSERT(false);
1153 }
1154
1155 switch(textureStage.destinationArgument)
1156 {
1157 case TextureStage::DESTINATION_CURRENT:
1158 current.x = res.x;
1159 current.y = res.y;
1160 current.z = res.z;
1161 current.w = res.w;
1162 break;
1163 case TextureStage::DESTINATION_TEMP:
1164 temp.x = res.x;
1165 temp.y = res.y;
1166 temp.z = res.z;
1167 temp.w = res.w;
1168 break;
1169 default:
1170 ASSERT(false);
1171 }
1172 }
1173
1174 void PixelPipeline::fogBlend(Vector4s &current, Float4 &f)
1175 {
1176 if(!state.fogActive)
1177 {
1178 return;
1179 }
1180
1181 if(state.pixelFogMode != FOG_NONE)
1182 {
1183 pixelFog(f);
1184 }
1185
1186 UShort4 fog = convertFixed16(f, true);
1187
1188 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1189 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1190 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1191
1192 UShort4 invFog = UShort4(0xFFFFu) - fog;
1193
1194 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1195 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1196 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
1197 }
1198
1199 void PixelPipeline::specularPixel(Vector4s &current, Vector4s &specular)
1200 {
1201 if(!state.specularAdd)
1202 {
1203 return;
1204 }
1205
1206 current.x = AddSat(current.x, specular.x);
1207 current.y = AddSat(current.y, specular.y);
1208 current.z = AddSat(current.z, specular.z);
1209 }
1210
1211 Vector4s PixelPipeline::sampleTexture(int coordinates, int stage, bool project)
1212 {
1213 Float4 x = v[2 + coordinates].x;
1214 Float4 y = v[2 + coordinates].y;
1215 Float4 z = v[2 + coordinates].z;
1216 Float4 w = v[2 + coordinates].w;
1217
1218 if(perturbate)
1219 {
1220 x += du;
1221 y += dv;
1222
1223 perturbate = false;
1224 }
1225
1226 return sampleTexture(stage, x, y, z, w, project);
1227 }
1228
1229 Vector4s PixelPipeline::sampleTexture(int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
1230 {
1231 Vector4s c;
1232
1233 #if PERF_PROFILE
1234 Long texTime = Ticks();
1235 #endif
1236
1237 Vector4f dsx;
1238 Vector4f dsy;
1239
1240 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
1241
1242 if(!project)
1243 {
1244 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, q, dsx, dsy);
1245 }
1246 else
1247 {
1248 Float4 rq = reciprocal(q);
1249
1250 Float4 u_q = u * rq;
1251 Float4 v_q = v * rq;
1252 Float4 w_q = w * rq;
1253
1254 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, q, dsx, dsy);
1255 }
1256
1257 #if PERF_PROFILE
1258 cycles[PERF_TEX] += Ticks() - texTime;
1259 #endif
1260
1261 return c;
1262 }
1263
1264 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1265 {
1266 return RoundShort4(cf * Float4(0x1000));
1267 }
1268
1269 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1270 {
1271 cs.x = convertFixed12(cf.x);
1272 cs.y = convertFixed12(cf.y);
1273 cs.z = convertFixed12(cf.z);
1274 cs.w = convertFixed12(cf.w);
1275 }
1276
1277 Float4 PixelPipeline::convertSigned12(Short4 &cs)
1278 {
1279 return Float4(cs) * Float4(1.0f / 0x0FFE);
1280 }
1281
1282 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1283 {
1284 cf.x = convertSigned12(cs.x);
1285 cf.y = convertSigned12(cs.y);
1286 cf.z = convertSigned12(cs.z);
1287 cf.w = convertSigned12(cs.w);
1288 }
1289
1290 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
1291 {
1292 switch(dst.type)
1293 {
1294 case Shader::PARAMETER_TEMP:
1295 if(dst.mask & 0x1) rs[dst.index].x = d.x;
1296 if(dst.mask & 0x2) rs[dst.index].y = d.y;
1297 if(dst.mask & 0x4) rs[dst.index].z = d.z;
1298 if(dst.mask & 0x8) rs[dst.index].w = d.w;
1299 break;
1300 case Shader::PARAMETER_INPUT:
1301 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1302 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1303 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1304 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1305 break;
1306 case Shader::PARAMETER_CONST: ASSERT(false); break;
1307 case Shader::PARAMETER_TEXTURE:
1308 if(dst.mask & 0x1) ts[dst.index].x = d.x;
1309 if(dst.mask & 0x2) ts[dst.index].y = d.y;
1310 if(dst.mask & 0x4) ts[dst.index].z = d.z;
1311 if(dst.mask & 0x8) ts[dst.index].w = d.w;
1312 break;
1313 case Shader::PARAMETER_COLOROUT:
1314 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1315 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1316 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1317 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1318 break;
1319 default:
1320 ASSERT(false);
1321 }
1322 }
1323
1324 Vector4s PixelPipeline::fetchRegister(const Src &src)
1325 {
1326 Vector4s *reg;
1327 int i = src.index;
1328
1329 Vector4s c;
1330
1331 if(src.type == Shader::PARAMETER_CONST)
1332 {
1333 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1334 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1335 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1336 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
1337 }
1338
1339 switch(src.type)
1340 {
1341 case Shader::PARAMETER_TEMP: reg = &rs[i]; break;
1342 case Shader::PARAMETER_INPUT: reg = &vs[i]; break;
1343 case Shader::PARAMETER_CONST: reg = &c; break;
1344 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break;
1345 case Shader::PARAMETER_VOID: return rs[0]; // Dummy
1346 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1347 default: ASSERT(false); return rs[0];
1348 }
1349
1350 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1351 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1352 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1353 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1354
1355 Vector4s mod;
1356
1357 switch(src.modifier)
1358 {
1359 case Shader::MODIFIER_NONE:
1360 mod.x = x;
1361 mod.y = y;
1362 mod.z = z;
1363 mod.w = w;
1364 break;
1365 case Shader::MODIFIER_BIAS:
1366 mod.x = SubSat(x, Short4(0x0800));
1367 mod.y = SubSat(y, Short4(0x0800));
1368 mod.z = SubSat(z, Short4(0x0800));
1369 mod.w = SubSat(w, Short4(0x0800));
1370 break;
1371 case Shader::MODIFIER_BIAS_NEGATE:
1372 mod.x = SubSat(Short4(0x0800), x);
1373 mod.y = SubSat(Short4(0x0800), y);
1374 mod.z = SubSat(Short4(0x0800), z);
1375 mod.w = SubSat(Short4(0x0800), w);
1376 break;
1377 case Shader::MODIFIER_COMPLEMENT:
1378 mod.x = SubSat(Short4(0x1000), x);
1379 mod.y = SubSat(Short4(0x1000), y);
1380 mod.z = SubSat(Short4(0x1000), z);
1381 mod.w = SubSat(Short4(0x1000), w);
1382 break;
1383 case Shader::MODIFIER_NEGATE:
1384 mod.x = -x;
1385 mod.y = -y;
1386 mod.z = -z;
1387 mod.w = -w;
1388 break;
1389 case Shader::MODIFIER_X2:
1390 mod.x = AddSat(x, x);
1391 mod.y = AddSat(y, y);
1392 mod.z = AddSat(z, z);
1393 mod.w = AddSat(w, w);
1394 break;
1395 case Shader::MODIFIER_X2_NEGATE:
1396 mod.x = -AddSat(x, x);
1397 mod.y = -AddSat(y, y);
1398 mod.z = -AddSat(z, z);
1399 mod.w = -AddSat(w, w);
1400 break;
1401 case Shader::MODIFIER_SIGN:
1402 mod.x = SubSat(x, Short4(0x0800));
1403 mod.y = SubSat(y, Short4(0x0800));
1404 mod.z = SubSat(z, Short4(0x0800));
1405 mod.w = SubSat(w, Short4(0x0800));
1406 mod.x = AddSat(mod.x, mod.x);
1407 mod.y = AddSat(mod.y, mod.y);
1408 mod.z = AddSat(mod.z, mod.z);
1409 mod.w = AddSat(mod.w, mod.w);
1410 break;
1411 case Shader::MODIFIER_SIGN_NEGATE:
1412 mod.x = SubSat(Short4(0x0800), x);
1413 mod.y = SubSat(Short4(0x0800), y);
1414 mod.z = SubSat(Short4(0x0800), z);
1415 mod.w = SubSat(Short4(0x0800), w);
1416 mod.x = AddSat(mod.x, mod.x);
1417 mod.y = AddSat(mod.y, mod.y);
1418 mod.z = AddSat(mod.z, mod.z);
1419 mod.w = AddSat(mod.w, mod.w);
1420 break;
1421 case Shader::MODIFIER_DZ:
1422 mod.x = x;
1423 mod.y = y;
1424 mod.z = z;
1425 mod.w = w;
1426 // Projection performed by texture sampler
1427 break;
1428 case Shader::MODIFIER_DW:
1429 mod.x = x;
1430 mod.y = y;
1431 mod.z = z;
1432 mod.w = w;
1433 // Projection performed by texture sampler
1434 break;
1435 default:
1436 ASSERT(false);
1437 }
1438
1439 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1440 {
1441 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000));
1442 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000));
1443 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000));
1444 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000));
1445 }
1446
1447 return mod;
1448 }
1449
1450 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1451 {
1452 dst.x = src0.x;
1453 dst.y = src0.y;
1454 dst.z = src0.z;
1455 dst.w = src0.w;
1456 }
1457
1458 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1459 {
1460 dst.x = AddSat(src0.x, src1.x);
1461 dst.y = AddSat(src0.y, src1.y);
1462 dst.z = AddSat(src0.z, src1.z);
1463 dst.w = AddSat(src0.w, src1.w);
1464 }
1465
1466 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1467 {
1468 dst.x = SubSat(src0.x, src1.x);
1469 dst.y = SubSat(src0.y, src1.y);
1470 dst.z = SubSat(src0.z, src1.z);
1471 dst.w = SubSat(src0.w, src1.w);
1472 }
1473
1474 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1475 {
1476 // FIXME: Long fixed-point multiply fixup
1477 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1478 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); }
1479 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1480 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1481 }
1482
1483 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1484 {
1485 // FIXME: Long fixed-point multiply fixup
1486 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1487 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); }
1488 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1489 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1490 }
1491
1492 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1493 {
1494 Short4 t0;
1495 Short4 t1;
1496
1497 // FIXME: Long fixed-point multiply fixup
1498 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1499 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1500 t0 = AddSat(t0, t1);
1501 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1502 t0 = AddSat(t0, t1);
1503
1504 dst.x = t0;
1505 dst.y = t0;
1506 dst.z = t0;
1507 dst.w = t0;
1508 }
1509
1510 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1511 {
1512 Short4 t0;
1513 Short4 t1;
1514
1515 // FIXME: Long fixed-point multiply fixup
1516 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1517 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1518 t0 = AddSat(t0, t1);
1519 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1520 t0 = AddSat(t0, t1);
1521 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1522 t0 = AddSat(t0, t1);
1523
1524 dst.x = t0;
1525 dst.y = t0;
1526 dst.z = t0;
1527 dst.w = t0;
1528 }
1529
1530 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1531 {
1532 // FIXME: Long fixed-point multiply fixup
1533 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1534 {
1535 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1536 }
1537 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1538 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1539 }
1540
1541 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1542 {
1543 Float4 uw;
1544 Float4 vw;
1545 Float4 sw;
1546
1547 if(state.interpolant[2 + coordinate].component & 0x01)
1548 {
1549 uw = Max(u, Float4(0.0f));
1550 uw = Min(uw, Float4(1.0f));
1551 dst.x = convertFixed12(uw);
1552 }
1553 else
1554 {
1555 dst.x = Short4(0x0000);
1556 }
1557
1558 if(state.interpolant[2 + coordinate].component & 0x02)
1559 {
1560 vw = Max(v, Float4(0.0f));
1561 vw = Min(vw, Float4(1.0f));
1562 dst.y = convertFixed12(vw);
1563 }
1564 else
1565 {
1566 dst.y = Short4(0x0000);
1567 }
1568
1569 if(state.interpolant[2 + coordinate].component & 0x04)
1570 {
1571 sw = Max(s, Float4(0.0f));
1572 sw = Min(sw, Float4(1.0f));
1573 dst.z = convertFixed12(sw);
1574 }
1575 else
1576 {
1577 dst.z = Short4(0x0000);
1578 }
1579
1580 dst.w = Short4(0x1000);
1581 }
1582
1583 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1584 {
1585 Float4 uw = u;
1586 Float4 vw = v;
1587 Float4 sw = s;
1588
1589 if(project)
1590 {
1591 uw *= Rcp_pp(s);
1592 vw *= Rcp_pp(s);
1593 }
1594
1595 if(state.interpolant[2 + coordinate].component & 0x01)
1596 {
1597 uw *= Float4(0x1000);
1598 uw = Max(uw, Float4(-0x8000));
1599 uw = Min(uw, Float4(0x7FFF));
1600 dst.x = RoundShort4(uw);
1601 }
1602 else
1603 {
1604 dst.x = Short4(0x0000);
1605 }
1606
1607 if(state.interpolant[2 + coordinate].component & 0x02)
1608 {
1609 vw *= Float4(0x1000);
1610 vw = Max(vw, Float4(-0x8000));
1611 vw = Min(vw, Float4(0x7FFF));
1612 dst.y = RoundShort4(vw);
1613 }
1614 else
1615 {
1616 dst.y = Short4(0x0000);
1617 }
1618
1619 if(state.interpolant[2 + coordinate].component & 0x04)
1620 {
1621 sw *= Float4(0x1000);
1622 sw = Max(sw, Float4(-0x8000));
1623 sw = Min(sw, Float4(0x7FFF));
1624 dst.z = RoundShort4(sw);
1625 }
1626 else
1627 {
1628 dst.z = Short4(0x0000);
1629 }
1630 }
1631
1632 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
1633 {
1634 TEXM3X3PAD(u, v, s, src, 0, false);
1635
1636 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
1637
1638 dst.x = t0;
1639 dst.y = t0;
1640 dst.z = t0;
1641 dst.w = t0;
1642 }
1643
1644 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
1645 {
1646 TEXM3X3PAD(u, v, s, src0, 0, false);
1647
1648 v_ = Float4(0.0f);
1649 w_ = Float4(0.0f);
1650
1651 dst = sampleTexture(stage, u_, v_, w_, w_);
1652 }
1653
1654 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1655 {
1656 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1657 SignMask(CmpNLT(v, Float4(0.0f))) &
1658 SignMask(CmpNLT(s, Float4(0.0f)));
1659
1660 for(unsigned int q = 0; q < state.multiSample; q++)
1661 {
1662 cMask[q] &= kill;
1663 }
1664 }
1665
1666 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1667 {
1668 Short4 test = src.x | src.y | src.z;
1669 Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F;
1670
1671 for(unsigned int q = 0; q < state.multiSample; q++)
1672 {
1673 cMask[q] &= kill;
1674 }
1675 }
1676
1677 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
1678 {
1679 dst = sampleTexture(sampler, u, v, s, s, project);
1680 }
1681
1682 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
1683 {
1684 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1685 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1686 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1687
1688 dst = sampleTexture(sampler, u, v, s, s, project);
1689 }
1690
1691 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1692 {
1693 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1694 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1695
1696 Float4 du2 = du;
1697 Float4 dv2 = dv;
1698
1699 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1700 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1701 du += dv2;
1702 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1703 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1704 dv += du2;
1705
1706 Float4 u_ = u + du;
1707 Float4 v_ = v + dv;
1708
1709 dst = sampleTexture(stage, u_, v_, s, s);
1710 }
1711
1712 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1713 {
1714 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1715 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1716
1717 Float4 du2 = du;
1718 Float4 dv2 = dv;
1719
1720 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1721 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1722 du += dv2;
1723 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1724 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1725 dv += du2;
1726
1727 Float4 u_ = u + du;
1728 Float4 v_ = v + dv;
1729
1730 dst = sampleTexture(stage, u_, v_, s, s);
1731
1732 Short4 L;
1733
1734 L = src.z;
1735 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
1736 L = L << 4;
1737 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
1738 L = Max(L, Short4(0x0000));
1739 L = Min(L, Short4(0x1000));
1740
1741 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1742 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1743 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1744 }
1745
1746 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
1747 {
1748 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1749 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1750 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1751
1752 dst = sampleTexture(stage, u, v, s, s);
1753 }
1754
1755 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
1756 {
1757 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1758 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1759 Float4 s = v;
1760
1761 dst = sampleTexture(stage, u, v, s, s);
1762 }
1763
1764 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
1765 {
1766 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1767 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1768 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1769
1770 dst = sampleTexture(stage, u, v, s, s);
1771 }
1772
1773 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
1774 {
1775 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
1776
1777 // z / w
1778 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1779
1780 oDepth = u_;
1781 }
1782
1783 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1784 {
1785 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
1786 }
1787
1788 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1789 {
1790 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
1791
1792 w_ = Float4(0.0f);
1793
1794 dst = sampleTexture(stage, u_, v_, w_, w_);
1795 }
1796
1797 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
1798 {
1799 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1800
1801 dst.x = RoundShort4(u_ * Float4(0x1000));
1802 dst.y = RoundShort4(v_ * Float4(0x1000));
1803 dst.z = RoundShort4(w_ * Float4(0x1000));
1804 dst.w = Short4(0x1000);
1805 }
1806
1807 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1808 {
1809 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
1810 {
1811 U = Float4(src0.x);
1812 V = Float4(src0.y);
1813 W = Float4(src0.z);
1814
1815 previousScaling = signedScaling;
1816 }
1817
1818 Float4 x = U * u + V * v + W * s;
1819
1820 x *= Float4(1.0f / 0x1000);
1821
1822 switch(component)
1823 {
1824 case 0: u_ = x; break;
1825 case 1: v_ = x; break;
1826 case 2: w_ = x; break;
1827 default: ASSERT(false);
1828 }
1829 }
1830
1831 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
1832 {
1833 TEXM3X3PAD(u, v, s, src0, 2, false);
1834
1835 Float4 E[3]; // Eye vector
1836
1837 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1838 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1839 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1840
1841 // Reflection
1842 Float4 u__;
1843 Float4 v__;
1844 Float4 w__;
1845
1846 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1847 u__ = u_ * E[0];
1848 v__ = v_ * E[1];
1849 w__ = w_ * E[2];
1850 u__ += v__ + w__;
1851 u__ += u__;
1852 v__ = u__;
1853 w__ = u__;
1854 u__ *= u_;
1855 v__ *= v_;
1856 w__ *= w_;
1857 u_ *= u_;
1858 v_ *= v_;
1859 w_ *= w_;
1860 u_ += v_ + w_;
1861 u__ -= E[0] * u_;
1862 v__ -= E[1] * u_;
1863 w__ -= E[2] * u_;
1864
1865 dst = sampleTexture(stage, u__, v__, w__, w__);
1866 }
1867
1868 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1869 {
1870 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1871
1872 dst = sampleTexture(stage, u_, v_, w_, w_);
1873 }
1874
1875 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
1876 {
1877 TEXM3X3PAD(x, y, z, src0, 2, false);
1878
1879 Float4 E[3]; // Eye vector
1880
1881 E[0] = v[2 + stage - 2].w;
1882 E[1] = v[2 + stage - 1].w;
1883 E[2] = v[2 + stage - 0].w;
1884
1885 // Reflection
1886 Float4 u__;
1887 Float4 v__;
1888 Float4 w__;
1889
1890 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1891 u__ = u_ * E[0];
1892 v__ = v_ * E[1];
1893 w__ = w_ * E[2];
1894 u__ += v__ + w__;
1895 u__ += u__;
1896 v__ = u__;
1897 w__ = u__;
1898 u__ *= u_;
1899 v__ *= v_;
1900 w__ *= w_;
1901 u_ *= u_;
1902 v_ *= v_;
1903 w_ *= w_;
1904 u_ += v_ + w_;
1905 u__ -= E[0] * u_;
1906 v__ -= E[1] * u_;
1907 w__ -= E[2] * u_;
1908
1909 dst = sampleTexture(stage, u__, v__, w__, w__);
1910 }
1911
1912 void PixelPipeline::TEXDEPTH()
1913 {
1914 u_ = Float4(rs[5].x);
1915 v_ = Float4(rs[5].y);
1916
1917 // z / w
1918 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1919
1920 oDepth = u_;
1921 }
1922
1923 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1924 {
1925 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1926 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1927 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1928 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1929 }
1930
1931 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1932 {
1933 {Short4 t0 = CmpGT(Short4(0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1934 {Short4 t0 = CmpGT(Short4(0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1935 {Short4 t0 = CmpGT(Short4(0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1936 {Short4 t0 = CmpGT(Short4(0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1937 }
1938
1939 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
1940 {
1941 Short4 t0;
1942 Short4 t1;
1943
1944 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
1945 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1946 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1947 t0 = AddSat(t0, t1);
1948 t0 = AddSat(t0, src0.x);
1949 dst.x = t0;
1950
1951 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
1952 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1953 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1954 t0 = AddSat(t0, t1);
1955 t0 = AddSat(t0, src0.y);
1956 dst.y = t0;
1957 }
1958}
1959
1960