1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "PixelProgram.hpp" |
16 | |
17 | #include "SamplerCore.hpp" |
18 | #include "Renderer/Primitive.hpp" |
19 | #include "Renderer/Renderer.hpp" |
20 | |
21 | namespace sw |
22 | { |
23 | extern bool postBlendSRGB; |
24 | extern bool booleanFaceRegister; |
25 | extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates |
26 | extern bool fullPixelPositionRegister; |
27 | |
28 | PixelProgram::PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) : |
29 | PixelRoutine(state, shader), |
30 | r(shader->indirectAddressableTemporaries), |
31 | aL(shader->getLimits().loops), |
32 | increment(shader->getLimits().loops), |
33 | iteration(shader->getLimits().loops), |
34 | callStack(shader->getLimits().stack) |
35 | { |
36 | auto limits = shader->getLimits(); |
37 | ifFalseBlock.resize(limits.ifs); |
38 | loopRepTestBlock.resize(limits.loops); |
39 | loopRepEndBlock.resize(limits.loops); |
40 | labelBlock.resize(limits.maxLabel + 1); |
41 | isConditionalIf.resize(limits.ifs); |
42 | |
43 | loopDepth = -1; |
44 | enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
45 | |
46 | if(shader->containsBreakInstruction()) |
47 | { |
48 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
49 | } |
50 | |
51 | if(shader->containsContinueInstruction()) |
52 | { |
53 | enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
54 | } |
55 | } |
56 | |
57 | void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) |
58 | { |
59 | if(shader->getShaderModel() >= 0x0300) |
60 | { |
61 | if(shader->isVPosDeclared()) |
62 | { |
63 | if(!halfIntegerCoordinates) |
64 | { |
65 | vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); |
66 | vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); |
67 | } |
68 | else |
69 | { |
70 | vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); |
71 | vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); |
72 | } |
73 | |
74 | if(fullPixelPositionRegister) |
75 | { |
76 | vPos.z = z[0]; // FIXME: Centroid? |
77 | vPos.w = w; // FIXME: Centroid? |
78 | } |
79 | } |
80 | |
81 | if(shader->isVFaceDeclared()) |
82 | { |
83 | Float4 face = *Pointer<Float>(primitive + OFFSET(Primitive, area)); |
84 | |
85 | if(booleanFaceRegister) |
86 | { |
87 | face = As<Float4>(state.frontFaceCCW ? CmpNLT(face, Float4(0.0f)) : CmpLT(face, Float4(0.0f))); |
88 | } |
89 | |
90 | vFace.x = face; |
91 | vFace.y = face; |
92 | vFace.z = face; |
93 | vFace.w = face; |
94 | } |
95 | } |
96 | } |
97 | |
98 | void PixelProgram::applyShader(Int cMask[4]) |
99 | { |
100 | enableIndex = 0; |
101 | stackIndex = 0; |
102 | |
103 | if(shader->containsLeaveInstruction()) |
104 | { |
105 | enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
106 | } |
107 | |
108 | for(int i = 0; i < RENDERTARGETS; i++) |
109 | { |
110 | if(state.targetFormat[i] != FORMAT_NULL) |
111 | { |
112 | oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); |
113 | } |
114 | } |
115 | |
116 | // Create all call site return blocks up front |
117 | for(size_t i = 0; i < shader->getLength(); i++) |
118 | { |
119 | const Shader::Instruction *instruction = shader->getInstruction(i); |
120 | Shader::Opcode opcode = instruction->opcode; |
121 | |
122 | if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) |
123 | { |
124 | const Dst &dst = instruction->dst; |
125 | |
126 | ASSERT(callRetBlock[dst.label].size() == dst.callSite); |
127 | callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); |
128 | } |
129 | } |
130 | |
131 | bool broadcastColor0 = true; |
132 | |
133 | for(size_t i = 0; i < shader->getLength(); i++) |
134 | { |
135 | const Shader::Instruction *instruction = shader->getInstruction(i); |
136 | Shader::Opcode opcode = instruction->opcode; |
137 | |
138 | if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) |
139 | { |
140 | continue; |
141 | } |
142 | |
143 | const Dst &dst = instruction->dst; |
144 | const Src &src0 = instruction->src[0]; |
145 | const Src &src1 = instruction->src[1]; |
146 | const Src &src2 = instruction->src[2]; |
147 | const Src &src3 = instruction->src[3]; |
148 | const Src &src4 = instruction->src[4]; |
149 | |
150 | bool predicate = instruction->predicate; |
151 | Control control = instruction->control; |
152 | bool pp = dst.partialPrecision; |
153 | bool project = instruction->project; |
154 | bool bias = instruction->bias; |
155 | |
156 | Vector4f d; |
157 | Vector4f s0; |
158 | Vector4f s1; |
159 | Vector4f s2; |
160 | Vector4f s3; |
161 | Vector4f s4; |
162 | |
163 | if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input |
164 | { |
165 | if(dst.type == Shader::PARAMETER_TEXTURE) |
166 | { |
167 | d.x = v[2 + dst.index].x; |
168 | d.y = v[2 + dst.index].y; |
169 | d.z = v[2 + dst.index].z; |
170 | d.w = v[2 + dst.index].w; |
171 | } |
172 | else |
173 | { |
174 | d = r[dst.index]; |
175 | } |
176 | } |
177 | |
178 | if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); |
179 | if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); |
180 | if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); |
181 | if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); |
182 | if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); |
183 | |
184 | switch(opcode) |
185 | { |
186 | case Shader::OPCODE_PS_2_0: break; |
187 | case Shader::OPCODE_PS_2_x: break; |
188 | case Shader::OPCODE_PS_3_0: break; |
189 | case Shader::OPCODE_DEF: break; |
190 | case Shader::OPCODE_DCL: break; |
191 | case Shader::OPCODE_NOP: break; |
192 | case Shader::OPCODE_MOV: mov(d, s0); break; |
193 | case Shader::OPCODE_NEG: neg(d, s0); break; |
194 | case Shader::OPCODE_INEG: ineg(d, s0); break; |
195 | case Shader::OPCODE_F2B: f2b(d, s0); break; |
196 | case Shader::OPCODE_B2F: b2f(d, s0); break; |
197 | case Shader::OPCODE_F2I: f2i(d, s0); break; |
198 | case Shader::OPCODE_I2F: i2f(d, s0); break; |
199 | case Shader::OPCODE_F2U: f2u(d, s0); break; |
200 | case Shader::OPCODE_U2F: u2f(d, s0); break; |
201 | case Shader::OPCODE_I2B: i2b(d, s0); break; |
202 | case Shader::OPCODE_B2I: b2i(d, s0); break; |
203 | case Shader::OPCODE_ADD: add(d, s0, s1); break; |
204 | case Shader::OPCODE_IADD: iadd(d, s0, s1); break; |
205 | case Shader::OPCODE_SUB: sub(d, s0, s1); break; |
206 | case Shader::OPCODE_ISUB: isub(d, s0, s1); break; |
207 | case Shader::OPCODE_MUL: mul(d, s0, s1); break; |
208 | case Shader::OPCODE_IMUL: imul(d, s0, s1); break; |
209 | case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; |
210 | case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; |
211 | case Shader::OPCODE_DP1: dp1(d, s0, s1); break; |
212 | case Shader::OPCODE_DP2: dp2(d, s0, s1); break; |
213 | case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; |
214 | case Shader::OPCODE_DP3: dp3(d, s0, s1); break; |
215 | case Shader::OPCODE_DP4: dp4(d, s0, s1); break; |
216 | case Shader::OPCODE_DET2: det2(d, s0, s1); break; |
217 | case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; |
218 | case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; |
219 | case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; |
220 | case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; |
221 | case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; |
222 | case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; |
223 | case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; |
224 | case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; |
225 | case Shader::OPCODE_FRC: frc(d, s0); break; |
226 | case Shader::OPCODE_TRUNC: trunc(d, s0); break; |
227 | case Shader::OPCODE_FLOOR: floor(d, s0); break; |
228 | case Shader::OPCODE_ROUND: round(d, s0); break; |
229 | case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; |
230 | case Shader::OPCODE_CEIL: ceil(d, s0); break; |
231 | case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; |
232 | case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; |
233 | case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; |
234 | case Shader::OPCODE_LOG2: log2(d, s0, pp); break; |
235 | case Shader::OPCODE_EXP: exp(d, s0, pp); break; |
236 | case Shader::OPCODE_LOG: log(d, s0, pp); break; |
237 | case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; |
238 | case Shader::OPCODE_DIV: div(d, s0, s1); break; |
239 | case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; |
240 | case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; |
241 | case Shader::OPCODE_MOD: mod(d, s0, s1); break; |
242 | case Shader::OPCODE_IMOD: imod(d, s0, s1); break; |
243 | case Shader::OPCODE_UMOD: umod(d, s0, s1); break; |
244 | case Shader::OPCODE_SHL: shl(d, s0, s1); break; |
245 | case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; |
246 | case Shader::OPCODE_USHR: ushr(d, s0, s1); break; |
247 | case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; |
248 | case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; |
249 | case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; |
250 | case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; |
251 | case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; |
252 | case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; |
253 | case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; |
254 | case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; |
255 | case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; |
256 | case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; |
257 | case Shader::OPCODE_MIN: min(d, s0, s1); break; |
258 | case Shader::OPCODE_IMIN: imin(d, s0, s1); break; |
259 | case Shader::OPCODE_UMIN: umin(d, s0, s1); break; |
260 | case Shader::OPCODE_MAX: max(d, s0, s1); break; |
261 | case Shader::OPCODE_IMAX: imax(d, s0, s1); break; |
262 | case Shader::OPCODE_UMAX: umax(d, s0, s1); break; |
263 | case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; |
264 | case Shader::OPCODE_STEP: step(d, s0, s1); break; |
265 | case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; |
266 | case Shader::OPCODE_ISINF: isinf(d, s0); break; |
267 | case Shader::OPCODE_ISNAN: isnan(d, s0); break; |
268 | case Shader::OPCODE_FLOATBITSTOINT: |
269 | case Shader::OPCODE_FLOATBITSTOUINT: |
270 | case Shader::OPCODE_INTBITSTOFLOAT: |
271 | case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; |
272 | case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; |
273 | case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; |
274 | case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; |
275 | case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; |
276 | case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; |
277 | case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; |
278 | case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; |
279 | case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; |
280 | case Shader::OPCODE_SGN: sgn(d, s0); break; |
281 | case Shader::OPCODE_ISGN: isgn(d, s0); break; |
282 | case Shader::OPCODE_CRS: crs(d, s0, s1); break; |
283 | case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; |
284 | case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; |
285 | case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; |
286 | case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; |
287 | case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; |
288 | case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; |
289 | case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; |
290 | case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; |
291 | case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; |
292 | case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; |
293 | case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; |
294 | case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; |
295 | case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; |
296 | case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; |
297 | case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; |
298 | case Shader::OPCODE_ABS: abs(d, s0); break; |
299 | case Shader::OPCODE_IABS: iabs(d, s0); break; |
300 | case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; |
301 | case Shader::OPCODE_COS: cos(d, s0, pp); break; |
302 | case Shader::OPCODE_SIN: sin(d, s0, pp); break; |
303 | case Shader::OPCODE_TAN: tan(d, s0, pp); break; |
304 | case Shader::OPCODE_ACOS: acos(d, s0, pp); break; |
305 | case Shader::OPCODE_ASIN: asin(d, s0, pp); break; |
306 | case Shader::OPCODE_ATAN: atan(d, s0, pp); break; |
307 | case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; |
308 | case Shader::OPCODE_COSH: cosh(d, s0, pp); break; |
309 | case Shader::OPCODE_SINH: sinh(d, s0, pp); break; |
310 | case Shader::OPCODE_TANH: tanh(d, s0, pp); break; |
311 | case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; |
312 | case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; |
313 | case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; |
314 | case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; |
315 | case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; |
316 | case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; |
317 | case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; |
318 | case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; |
319 | case Shader::OPCODE_TEX: TEX(d, s0, src1, project, bias); break; |
320 | case Shader::OPCODE_TEXLDD: TEXGRAD(d, s0, src1, s2, s3); break; |
321 | case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; |
322 | case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; |
323 | case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; |
324 | case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; |
325 | case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; |
326 | case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; |
327 | case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; |
328 | case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; |
329 | case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; |
330 | case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; |
331 | case Shader::OPCODE_TEXBIAS: TEXBIAS(d, s0, src1, s2.x); break; |
332 | case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x); break; |
333 | case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; |
334 | case Shader::OPCODE_DFDX: DFDX(d, s0); break; |
335 | case Shader::OPCODE_DFDY: DFDY(d, s0); break; |
336 | case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; |
337 | case Shader::OPCODE_BREAK: BREAK(); break; |
338 | case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; |
339 | case Shader::OPCODE_BREAKP: BREAKP(src0); break; |
340 | case Shader::OPCODE_CONTINUE: CONTINUE(); break; |
341 | case Shader::OPCODE_TEST: TEST(); break; |
342 | case Shader::OPCODE_SCALAR: SCALAR(); break; |
343 | case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; |
344 | case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; |
345 | case Shader::OPCODE_ELSE: ELSE(); break; |
346 | case Shader::OPCODE_ENDIF: ENDIF(); break; |
347 | case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; |
348 | case Shader::OPCODE_ENDREP: ENDREP(); break; |
349 | case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; |
350 | case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; |
351 | case Shader::OPCODE_IF: IF(src0); break; |
352 | case Shader::OPCODE_IFC: IFC(s0, s1, control); break; |
353 | case Shader::OPCODE_LABEL: LABEL(dst.index); break; |
354 | case Shader::OPCODE_LOOP: LOOP(src1); break; |
355 | case Shader::OPCODE_REP: REP(src0); break; |
356 | case Shader::OPCODE_WHILE: WHILE(src0); break; |
357 | case Shader::OPCODE_SWITCH: SWITCH(); break; |
358 | case Shader::OPCODE_RET: RET(); break; |
359 | case Shader::OPCODE_LEAVE: LEAVE(); break; |
360 | case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; |
361 | case Shader::OPCODE_ALL: all(d.x, s0); break; |
362 | case Shader::OPCODE_ANY: any(d.x, s0); break; |
363 | case Shader::OPCODE_NOT: bitwise_not(d, s0); break; |
364 | case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; |
365 | case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; |
366 | case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; |
367 | case Shader::OPCODE_EQ: equal(d, s0, s1); break; |
368 | case Shader::OPCODE_NE: notEqual(d, s0, s1); break; |
369 | case Shader::OPCODE_END: break; |
370 | default: |
371 | ASSERT(false); |
372 | } |
373 | |
374 | if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) |
375 | { |
376 | if(dst.saturate) |
377 | { |
378 | if(dst.x) d.x = Max(d.x, Float4(0.0f)); |
379 | if(dst.y) d.y = Max(d.y, Float4(0.0f)); |
380 | if(dst.z) d.z = Max(d.z, Float4(0.0f)); |
381 | if(dst.w) d.w = Max(d.w, Float4(0.0f)); |
382 | |
383 | if(dst.x) d.x = Min(d.x, Float4(1.0f)); |
384 | if(dst.y) d.y = Min(d.y, Float4(1.0f)); |
385 | if(dst.z) d.z = Min(d.z, Float4(1.0f)); |
386 | if(dst.w) d.w = Min(d.w, Float4(1.0f)); |
387 | } |
388 | |
389 | if(instruction->isPredicated()) |
390 | { |
391 | Vector4f pDst; // FIXME: Rename |
392 | |
393 | switch(dst.type) |
394 | { |
395 | case Shader::PARAMETER_TEMP: |
396 | if(dst.rel.type == Shader::PARAMETER_VOID) |
397 | { |
398 | if(dst.x) pDst.x = r[dst.index].x; |
399 | if(dst.y) pDst.y = r[dst.index].y; |
400 | if(dst.z) pDst.z = r[dst.index].z; |
401 | if(dst.w) pDst.w = r[dst.index].w; |
402 | } |
403 | else if(!dst.rel.dynamic) |
404 | { |
405 | Int a = dst.index + relativeAddress(dst.rel); |
406 | |
407 | if(dst.x) pDst.x = r[a].x; |
408 | if(dst.y) pDst.y = r[a].y; |
409 | if(dst.z) pDst.z = r[a].z; |
410 | if(dst.w) pDst.w = r[a].w; |
411 | } |
412 | else |
413 | { |
414 | Int4 a = dst.index + dynamicAddress(dst.rel); |
415 | |
416 | if(dst.x) pDst.x = r[a].x; |
417 | if(dst.y) pDst.y = r[a].y; |
418 | if(dst.z) pDst.z = r[a].z; |
419 | if(dst.w) pDst.w = r[a].w; |
420 | } |
421 | break; |
422 | case Shader::PARAMETER_COLOROUT: |
423 | if(dst.rel.type == Shader::PARAMETER_VOID) |
424 | { |
425 | if(dst.x) pDst.x = oC[dst.index].x; |
426 | if(dst.y) pDst.y = oC[dst.index].y; |
427 | if(dst.z) pDst.z = oC[dst.index].z; |
428 | if(dst.w) pDst.w = oC[dst.index].w; |
429 | } |
430 | else if(!dst.rel.dynamic) |
431 | { |
432 | Int a = dst.index + relativeAddress(dst.rel); |
433 | |
434 | if(dst.x) pDst.x = oC[a].x; |
435 | if(dst.y) pDst.y = oC[a].y; |
436 | if(dst.z) pDst.z = oC[a].z; |
437 | if(dst.w) pDst.w = oC[a].w; |
438 | } |
439 | else |
440 | { |
441 | Int4 a = dst.index + dynamicAddress(dst.rel); |
442 | |
443 | if(dst.x) pDst.x = oC[a].x; |
444 | if(dst.y) pDst.y = oC[a].y; |
445 | if(dst.z) pDst.z = oC[a].z; |
446 | if(dst.w) pDst.w = oC[a].w; |
447 | } |
448 | break; |
449 | case Shader::PARAMETER_PREDICATE: |
450 | if(dst.x) pDst.x = p0.x; |
451 | if(dst.y) pDst.y = p0.y; |
452 | if(dst.z) pDst.z = p0.z; |
453 | if(dst.w) pDst.w = p0.w; |
454 | break; |
455 | case Shader::PARAMETER_DEPTHOUT: |
456 | pDst.x = oDepth; |
457 | break; |
458 | default: |
459 | ASSERT(false); |
460 | } |
461 | |
462 | Int4 enable = enableMask(instruction); |
463 | |
464 | Int4 xEnable = enable; |
465 | Int4 yEnable = enable; |
466 | Int4 zEnable = enable; |
467 | Int4 wEnable = enable; |
468 | |
469 | if(predicate) |
470 | { |
471 | unsigned char pSwizzle = instruction->predicateSwizzle; |
472 | |
473 | Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; |
474 | Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; |
475 | Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; |
476 | Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; |
477 | |
478 | if(!instruction->predicateNot) |
479 | { |
480 | if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); |
481 | if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); |
482 | if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); |
483 | if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); |
484 | } |
485 | else |
486 | { |
487 | if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); |
488 | if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); |
489 | if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); |
490 | if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); |
491 | } |
492 | } |
493 | |
494 | if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); |
495 | if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); |
496 | if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); |
497 | if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); |
498 | |
499 | if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); |
500 | if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); |
501 | if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); |
502 | if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); |
503 | } |
504 | |
505 | switch(dst.type) |
506 | { |
507 | case Shader::PARAMETER_TEMP: |
508 | if(dst.rel.type == Shader::PARAMETER_VOID) |
509 | { |
510 | if(dst.x) r[dst.index].x = d.x; |
511 | if(dst.y) r[dst.index].y = d.y; |
512 | if(dst.z) r[dst.index].z = d.z; |
513 | if(dst.w) r[dst.index].w = d.w; |
514 | } |
515 | else if(!dst.rel.dynamic) |
516 | { |
517 | Int a = dst.index + relativeAddress(dst.rel); |
518 | |
519 | if(dst.x) r[a].x = d.x; |
520 | if(dst.y) r[a].y = d.y; |
521 | if(dst.z) r[a].z = d.z; |
522 | if(dst.w) r[a].w = d.w; |
523 | } |
524 | else |
525 | { |
526 | Int4 a = dst.index + dynamicAddress(dst.rel); |
527 | |
528 | if(dst.x) r.scatter_x(a, d.x); |
529 | if(dst.y) r.scatter_y(a, d.y); |
530 | if(dst.z) r.scatter_z(a, d.z); |
531 | if(dst.w) r.scatter_w(a, d.w); |
532 | } |
533 | break; |
534 | case Shader::PARAMETER_COLOROUT: |
535 | if(dst.rel.type == Shader::PARAMETER_VOID) |
536 | { |
537 | broadcastColor0 = (dst.index == 0) && broadcastColor0; |
538 | |
539 | if(dst.x) oC[dst.index].x = d.x; |
540 | if(dst.y) oC[dst.index].y = d.y; |
541 | if(dst.z) oC[dst.index].z = d.z; |
542 | if(dst.w) oC[dst.index].w = d.w; |
543 | } |
544 | else if(!dst.rel.dynamic) |
545 | { |
546 | broadcastColor0 = false; |
547 | Int a = dst.index + relativeAddress(dst.rel); |
548 | |
549 | if(dst.x) oC[a].x = d.x; |
550 | if(dst.y) oC[a].y = d.y; |
551 | if(dst.z) oC[a].z = d.z; |
552 | if(dst.w) oC[a].w = d.w; |
553 | } |
554 | else |
555 | { |
556 | broadcastColor0 = false; |
557 | Int4 a = dst.index + dynamicAddress(dst.rel); |
558 | |
559 | if(dst.x) oC.scatter_x(a, d.x); |
560 | if(dst.y) oC.scatter_y(a, d.y); |
561 | if(dst.z) oC.scatter_z(a, d.z); |
562 | if(dst.w) oC.scatter_w(a, d.w); |
563 | } |
564 | break; |
565 | case Shader::PARAMETER_PREDICATE: |
566 | if(dst.x) p0.x = d.x; |
567 | if(dst.y) p0.y = d.y; |
568 | if(dst.z) p0.z = d.z; |
569 | if(dst.w) p0.w = d.w; |
570 | break; |
571 | case Shader::PARAMETER_DEPTHOUT: |
572 | oDepth = d.x; |
573 | break; |
574 | default: |
575 | ASSERT(false); |
576 | } |
577 | } |
578 | } |
579 | |
580 | if(currentLabel != -1) |
581 | { |
582 | Nucleus::setInsertBlock(returnBlock); |
583 | } |
584 | |
585 | if(broadcastColor0) |
586 | { |
587 | for(int i = 0; i < RENDERTARGETS; i++) |
588 | { |
589 | c[i] = oC[0]; |
590 | } |
591 | } |
592 | else |
593 | { |
594 | for(int i = 0; i < RENDERTARGETS; i++) |
595 | { |
596 | c[i] = oC[i]; |
597 | } |
598 | } |
599 | |
600 | clampColor(c); |
601 | |
602 | if(state.depthOverride) |
603 | { |
604 | oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f)); |
605 | } |
606 | } |
607 | |
608 | Bool PixelProgram::alphaTest(Int cMask[4]) |
609 | { |
610 | if(!state.alphaTestActive()) |
611 | { |
612 | return true; |
613 | } |
614 | |
615 | Int aMask; |
616 | |
617 | if(state.transparencyAntialiasing == TRANSPARENCY_NONE) |
618 | { |
619 | Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); |
620 | |
621 | PixelRoutine::alphaTest(aMask, alpha); |
622 | |
623 | for(unsigned int q = 0; q < state.multiSample; q++) |
624 | { |
625 | cMask[q] &= aMask; |
626 | } |
627 | } |
628 | else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) |
629 | { |
630 | alphaToCoverage(cMask, c[0].w); |
631 | } |
632 | else ASSERT(false); |
633 | |
634 | Int pass = cMask[0]; |
635 | |
636 | for(unsigned int q = 1; q < state.multiSample; q++) |
637 | { |
638 | pass = pass | cMask[q]; |
639 | } |
640 | |
641 | return pass != 0x0; |
642 | } |
643 | |
644 | void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) |
645 | { |
646 | for(int index = 0; index < RENDERTARGETS; index++) |
647 | { |
648 | if(!state.colorWriteActive(index)) |
649 | { |
650 | continue; |
651 | } |
652 | |
653 | if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) |
654 | { |
655 | c[index].x = linearToSRGB(c[index].x); |
656 | c[index].y = linearToSRGB(c[index].y); |
657 | c[index].z = linearToSRGB(c[index].z); |
658 | } |
659 | |
660 | if(index == 0) |
661 | { |
662 | fogBlend(c[index], fog); |
663 | } |
664 | |
665 | switch(state.targetFormat[index]) |
666 | { |
667 | case FORMAT_R5G6B5: |
668 | case FORMAT_X8R8G8B8: |
669 | case FORMAT_X8B8G8R8: |
670 | case FORMAT_A8R8G8B8: |
671 | case FORMAT_A8B8G8R8: |
672 | case FORMAT_SRGB8_X8: |
673 | case FORMAT_SRGB8_A8: |
674 | case FORMAT_G8R8: |
675 | case FORMAT_R8: |
676 | case FORMAT_A8: |
677 | case FORMAT_G16R16: |
678 | case FORMAT_A16B16G16R16: |
679 | for(unsigned int q = 0; q < state.multiSample; q++) |
680 | { |
681 | Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); |
682 | Vector4s color; |
683 | |
684 | if(state.targetFormat[index] == FORMAT_R5G6B5) |
685 | { |
686 | color.x = UShort4(c[index].x * Float4(0xFBFF), false); |
687 | color.y = UShort4(c[index].y * Float4(0xFDFF), false); |
688 | color.z = UShort4(c[index].z * Float4(0xFBFF), false); |
689 | color.w = UShort4(c[index].w * Float4(0xFFFF), false); |
690 | } |
691 | else |
692 | { |
693 | color.x = convertFixed16(c[index].x, false); |
694 | color.y = convertFixed16(c[index].y, false); |
695 | color.z = convertFixed16(c[index].z, false); |
696 | color.w = convertFixed16(c[index].w, false); |
697 | } |
698 | |
699 | if(state.multiSampleMask & (1 << q)) |
700 | { |
701 | alphaBlend(index, buffer, color, x); |
702 | logicOperation(index, buffer, color, x); |
703 | writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); |
704 | } |
705 | } |
706 | break; |
707 | case FORMAT_R32F: |
708 | case FORMAT_G32R32F: |
709 | case FORMAT_X32B32G32R32F: |
710 | case FORMAT_A32B32G32R32F: |
711 | case FORMAT_X32B32G32R32F_UNSIGNED: |
712 | case FORMAT_R32I: |
713 | case FORMAT_G32R32I: |
714 | case FORMAT_A32B32G32R32I: |
715 | case FORMAT_R32UI: |
716 | case FORMAT_G32R32UI: |
717 | case FORMAT_A32B32G32R32UI: |
718 | case FORMAT_R16I: |
719 | case FORMAT_G16R16I: |
720 | case FORMAT_A16B16G16R16I: |
721 | case FORMAT_R16UI: |
722 | case FORMAT_G16R16UI: |
723 | case FORMAT_A16B16G16R16UI: |
724 | case FORMAT_R8I: |
725 | case FORMAT_G8R8I: |
726 | case FORMAT_A8B8G8R8I: |
727 | case FORMAT_R8UI: |
728 | case FORMAT_G8R8UI: |
729 | case FORMAT_A8B8G8R8UI: |
730 | for(unsigned int q = 0; q < state.multiSample; q++) |
731 | { |
732 | Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); |
733 | Vector4f color = c[index]; |
734 | |
735 | if(state.multiSampleMask & (1 << q)) |
736 | { |
737 | alphaBlend(index, buffer, color, x); |
738 | writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); |
739 | } |
740 | } |
741 | break; |
742 | default: |
743 | ASSERT(false); |
744 | } |
745 | } |
746 | } |
747 | |
748 | Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) |
749 | { |
750 | Vector4f tmp; |
751 | |
752 | if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) |
753 | { |
754 | tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function); |
755 | } |
756 | else |
757 | { |
758 | Int index = As<Int>(Float(fetchRegister(sampler).x.x)); |
759 | |
760 | for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) |
761 | { |
762 | if(shader->usesSampler(i)) |
763 | { |
764 | If(index == i) |
765 | { |
766 | tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function); |
767 | // FIXME: When the sampler states are the same, we could use one sampler and just index the texture |
768 | } |
769 | } |
770 | } |
771 | } |
772 | |
773 | Vector4f c; |
774 | c.x = tmp[(sampler.swizzle >> 0) & 0x3]; |
775 | c.y = tmp[(sampler.swizzle >> 2) & 0x3]; |
776 | c.z = tmp[(sampler.swizzle >> 4) & 0x3]; |
777 | c.w = tmp[(sampler.swizzle >> 6) & 0x3]; |
778 | |
779 | return c; |
780 | } |
781 | |
782 | Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) |
783 | { |
784 | #if PERF_PROFILE |
785 | Long texTime = Ticks(); |
786 | #endif |
787 | |
788 | Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); |
789 | Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function); |
790 | |
791 | #if PERF_PROFILE |
792 | cycles[PERF_TEX] += Ticks() - texTime; |
793 | #endif |
794 | |
795 | return c; |
796 | } |
797 | |
798 | void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) |
799 | { |
800 | for(int index = 0; index < RENDERTARGETS; index++) |
801 | { |
802 | if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) |
803 | { |
804 | continue; |
805 | } |
806 | |
807 | switch(state.targetFormat[index]) |
808 | { |
809 | case FORMAT_NULL: |
810 | break; |
811 | case FORMAT_R5G6B5: |
812 | case FORMAT_A8R8G8B8: |
813 | case FORMAT_A8B8G8R8: |
814 | case FORMAT_X8R8G8B8: |
815 | case FORMAT_X8B8G8R8: |
816 | case FORMAT_SRGB8_X8: |
817 | case FORMAT_SRGB8_A8: |
818 | case FORMAT_G8R8: |
819 | case FORMAT_R8: |
820 | case FORMAT_A8: |
821 | case FORMAT_G16R16: |
822 | case FORMAT_A16B16G16R16: |
823 | oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); |
824 | oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); |
825 | oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); |
826 | oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); |
827 | break; |
828 | case FORMAT_R32F: |
829 | case FORMAT_G32R32F: |
830 | case FORMAT_X32B32G32R32F: |
831 | case FORMAT_A32B32G32R32F: |
832 | case FORMAT_R32I: |
833 | case FORMAT_G32R32I: |
834 | case FORMAT_A32B32G32R32I: |
835 | case FORMAT_R32UI: |
836 | case FORMAT_G32R32UI: |
837 | case FORMAT_A32B32G32R32UI: |
838 | case FORMAT_R16I: |
839 | case FORMAT_G16R16I: |
840 | case FORMAT_A16B16G16R16I: |
841 | case FORMAT_R16UI: |
842 | case FORMAT_G16R16UI: |
843 | case FORMAT_A16B16G16R16UI: |
844 | case FORMAT_R8I: |
845 | case FORMAT_G8R8I: |
846 | case FORMAT_A8B8G8R8I: |
847 | case FORMAT_R8UI: |
848 | case FORMAT_G8R8UI: |
849 | case FORMAT_A8B8G8R8UI: |
850 | break; |
851 | case FORMAT_X32B32G32R32F_UNSIGNED: |
852 | oC[index].x = Max(oC[index].x, Float4(0.0f)); |
853 | oC[index].y = Max(oC[index].y, Float4(0.0f)); |
854 | oC[index].z = Max(oC[index].z, Float4(0.0f)); |
855 | oC[index].w = Max(oC[index].w, Float4(0.0f)); |
856 | break; |
857 | default: |
858 | ASSERT(false); |
859 | } |
860 | } |
861 | } |
862 | |
863 | Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) |
864 | { |
865 | if(scalar) |
866 | { |
867 | return Int4(0xFFFFFFFF); |
868 | } |
869 | |
870 | Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF); |
871 | |
872 | if(shader->containsBreakInstruction() && instruction->analysisBreak) |
873 | { |
874 | enable &= enableBreak; |
875 | } |
876 | |
877 | if(shader->containsContinueInstruction() && instruction->analysisContinue) |
878 | { |
879 | enable &= enableContinue; |
880 | } |
881 | |
882 | if(shader->containsLeaveInstruction() && instruction->analysisLeave) |
883 | { |
884 | enable &= enableLeave; |
885 | } |
886 | |
887 | return enable; |
888 | } |
889 | |
890 | Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) |
891 | { |
892 | Vector4f reg; |
893 | unsigned int i = src.index + offset; |
894 | |
895 | switch(src.type) |
896 | { |
897 | case Shader::PARAMETER_TEMP: |
898 | if(src.rel.type == Shader::PARAMETER_VOID) |
899 | { |
900 | reg = r[i]; |
901 | } |
902 | else if(!src.rel.dynamic) |
903 | { |
904 | reg = r[i + relativeAddress(src.rel, src.bufferIndex)]; |
905 | } |
906 | else |
907 | { |
908 | reg = r[i + dynamicAddress(src.rel)]; |
909 | } |
910 | break; |
911 | case Shader::PARAMETER_INPUT: |
912 | if(src.rel.type == Shader::PARAMETER_VOID) // Not relative |
913 | { |
914 | reg = v[i]; |
915 | } |
916 | else if(!src.rel.dynamic) |
917 | { |
918 | reg = v[i + relativeAddress(src.rel, src.bufferIndex)]; |
919 | } |
920 | else |
921 | { |
922 | reg = v[i + dynamicAddress(src.rel)]; |
923 | } |
924 | break; |
925 | case Shader::PARAMETER_CONST: |
926 | reg = readConstant(src, offset); |
927 | break; |
928 | case Shader::PARAMETER_TEXTURE: |
929 | reg = v[2 + i]; |
930 | break; |
931 | case Shader::PARAMETER_MISCTYPE: |
932 | if(src.index == Shader::VPosIndex) reg = vPos; |
933 | if(src.index == Shader::VFaceIndex) reg = vFace; |
934 | break; |
935 | case Shader::PARAMETER_SAMPLER: |
936 | if(src.rel.type == Shader::PARAMETER_VOID) |
937 | { |
938 | reg.x = As<Float4>(Int4(i)); |
939 | } |
940 | else if(src.rel.type == Shader::PARAMETER_TEMP) |
941 | { |
942 | reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); |
943 | } |
944 | return reg; |
945 | case Shader::PARAMETER_PREDICATE: return reg; // Dummy |
946 | case Shader::PARAMETER_VOID: return reg; // Dummy |
947 | case Shader::PARAMETER_FLOAT4LITERAL: |
948 | // This is used for all literal types, and since Reactor doesn't guarantee |
949 | // preserving the bit pattern of float constants, we must construct them |
950 | // as integer constants and bitcast. |
951 | reg.x = As<Float4>(Int4(src.integer[0])); |
952 | reg.y = As<Float4>(Int4(src.integer[1])); |
953 | reg.z = As<Float4>(Int4(src.integer[2])); |
954 | reg.w = As<Float4>(Int4(src.integer[3])); |
955 | break; |
956 | case Shader::PARAMETER_CONSTINT: return reg; // Dummy |
957 | case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy |
958 | case Shader::PARAMETER_LOOP: return reg; // Dummy |
959 | case Shader::PARAMETER_COLOROUT: |
960 | if(src.rel.type == Shader::PARAMETER_VOID) // Not relative |
961 | { |
962 | reg = oC[i]; |
963 | } |
964 | else if(!src.rel.dynamic) |
965 | { |
966 | reg = oC[i + relativeAddress(src.rel, src.bufferIndex)]; |
967 | } |
968 | else |
969 | { |
970 | reg = oC[i + dynamicAddress(src.rel)]; |
971 | } |
972 | break; |
973 | case Shader::PARAMETER_DEPTHOUT: |
974 | reg.x = oDepth; |
975 | break; |
976 | default: |
977 | ASSERT(false); |
978 | } |
979 | |
980 | const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; |
981 | const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; |
982 | const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; |
983 | const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; |
984 | |
985 | Vector4f mod; |
986 | |
987 | switch(src.modifier) |
988 | { |
989 | case Shader::MODIFIER_NONE: |
990 | mod.x = x; |
991 | mod.y = y; |
992 | mod.z = z; |
993 | mod.w = w; |
994 | break; |
995 | case Shader::MODIFIER_NEGATE: |
996 | mod.x = -x; |
997 | mod.y = -y; |
998 | mod.z = -z; |
999 | mod.w = -w; |
1000 | break; |
1001 | case Shader::MODIFIER_ABS: |
1002 | mod.x = Abs(x); |
1003 | mod.y = Abs(y); |
1004 | mod.z = Abs(z); |
1005 | mod.w = Abs(w); |
1006 | break; |
1007 | case Shader::MODIFIER_ABS_NEGATE: |
1008 | mod.x = -Abs(x); |
1009 | mod.y = -Abs(y); |
1010 | mod.z = -Abs(z); |
1011 | mod.w = -Abs(w); |
1012 | break; |
1013 | case Shader::MODIFIER_NOT: |
1014 | mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); |
1015 | mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); |
1016 | mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); |
1017 | mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); |
1018 | break; |
1019 | default: |
1020 | ASSERT(false); |
1021 | } |
1022 | |
1023 | return mod; |
1024 | } |
1025 | |
1026 | RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) |
1027 | { |
1028 | if(bufferIndex == -1) |
1029 | { |
1030 | return data + OFFSET(DrawData, ps.c[index]); |
1031 | } |
1032 | else |
1033 | { |
1034 | return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; |
1035 | } |
1036 | } |
1037 | |
1038 | RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) |
1039 | { |
1040 | return uniformAddress(bufferIndex, index) + offset * sizeof(float4); |
1041 | } |
1042 | |
1043 | Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) |
1044 | { |
1045 | Vector4f c; |
1046 | unsigned int i = src.index + offset; |
1047 | |
1048 | if(src.rel.type == Shader::PARAMETER_VOID) // Not relative |
1049 | { |
1050 | c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); |
1051 | |
1052 | c.x = c.x.xxxx; |
1053 | c.y = c.y.yyyy; |
1054 | c.z = c.z.zzzz; |
1055 | c.w = c.w.wwww; |
1056 | |
1057 | if(shader->containsDefineInstruction()) // Constant may be known at compile time |
1058 | { |
1059 | for(size_t j = 0; j < shader->getLength(); j++) |
1060 | { |
1061 | const Shader::Instruction &instruction = *shader->getInstruction(j); |
1062 | |
1063 | if(instruction.opcode == Shader::OPCODE_DEF) |
1064 | { |
1065 | if(instruction.dst.index == i) |
1066 | { |
1067 | c.x = Float4(instruction.src[0].value[0]); |
1068 | c.y = Float4(instruction.src[0].value[1]); |
1069 | c.z = Float4(instruction.src[0].value[2]); |
1070 | c.w = Float4(instruction.src[0].value[3]); |
1071 | |
1072 | break; |
1073 | } |
1074 | } |
1075 | } |
1076 | } |
1077 | } |
1078 | else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP) |
1079 | { |
1080 | Int a = relativeAddress(src.rel, src.bufferIndex); |
1081 | |
1082 | c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); |
1083 | |
1084 | c.x = c.x.xxxx; |
1085 | c.y = c.y.yyyy; |
1086 | c.z = c.z.zzzz; |
1087 | c.w = c.w.wwww; |
1088 | } |
1089 | else |
1090 | { |
1091 | int component = src.rel.swizzle & 0x03; |
1092 | Float4 a; |
1093 | |
1094 | switch(src.rel.type) |
1095 | { |
1096 | case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break; |
1097 | case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break; |
1098 | case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break; |
1099 | case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break; |
1100 | case Shader::PARAMETER_MISCTYPE: |
1101 | switch(src.rel.index) |
1102 | { |
1103 | case Shader::VPosIndex: a = vPos.x; break; |
1104 | case Shader::VFaceIndex: a = vFace.x; break; |
1105 | default: ASSERT(false); |
1106 | } |
1107 | break; |
1108 | default: ASSERT(false); |
1109 | } |
1110 | |
1111 | Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale); |
1112 | |
1113 | index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0} |
1114 | |
1115 | Int index0 = Extract(index, 0); |
1116 | Int index1 = Extract(index, 1); |
1117 | Int index2 = Extract(index, 2); |
1118 | Int index3 = Extract(index, 3); |
1119 | |
1120 | c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16); |
1121 | c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16); |
1122 | c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16); |
1123 | c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16); |
1124 | |
1125 | transpose4x4(c.x, c.y, c.z, c.w); |
1126 | } |
1127 | |
1128 | return c; |
1129 | } |
1130 | |
1131 | Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex) |
1132 | { |
1133 | ASSERT(!rel.dynamic); |
1134 | |
1135 | if(rel.type == Shader::PARAMETER_TEMP) |
1136 | { |
1137 | return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale; |
1138 | } |
1139 | else if(rel.type == Shader::PARAMETER_INPUT) |
1140 | { |
1141 | return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale; |
1142 | } |
1143 | else if(rel.type == Shader::PARAMETER_OUTPUT) |
1144 | { |
1145 | return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale; |
1146 | } |
1147 | else if(rel.type == Shader::PARAMETER_CONST) |
1148 | { |
1149 | return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale; |
1150 | } |
1151 | else if(rel.type == Shader::PARAMETER_LOOP) |
1152 | { |
1153 | return aL[loopDepth]; |
1154 | } |
1155 | else ASSERT(false); |
1156 | |
1157 | return 0; |
1158 | } |
1159 | |
1160 | Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel) |
1161 | { |
1162 | int component = rel.swizzle & 0x03; |
1163 | Float4 a; |
1164 | |
1165 | switch(rel.type) |
1166 | { |
1167 | case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break; |
1168 | case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break; |
1169 | case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break; |
1170 | case Shader::PARAMETER_MISCTYPE: |
1171 | switch(rel.index) |
1172 | { |
1173 | case Shader::VPosIndex: a = vPos.x; break; |
1174 | case Shader::VFaceIndex: a = vFace.x; break; |
1175 | default: ASSERT(false); |
1176 | } |
1177 | break; |
1178 | default: ASSERT(false); |
1179 | } |
1180 | |
1181 | return As<Int4>(a) * Int4(rel.scale); |
1182 | } |
1183 | |
1184 | Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) |
1185 | { |
1186 | Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); |
1187 | Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); |
1188 | |
1189 | return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); |
1190 | } |
1191 | |
1192 | void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) |
1193 | { |
1194 | Vector4f row0 = fetchRegister(src1, 0); |
1195 | Vector4f row1 = fetchRegister(src1, 1); |
1196 | |
1197 | dst.x = dot3(src0, row0); |
1198 | dst.y = dot3(src0, row1); |
1199 | } |
1200 | |
1201 | void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) |
1202 | { |
1203 | Vector4f row0 = fetchRegister(src1, 0); |
1204 | Vector4f row1 = fetchRegister(src1, 1); |
1205 | Vector4f row2 = fetchRegister(src1, 2); |
1206 | |
1207 | dst.x = dot3(src0, row0); |
1208 | dst.y = dot3(src0, row1); |
1209 | dst.z = dot3(src0, row2); |
1210 | } |
1211 | |
1212 | void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) |
1213 | { |
1214 | Vector4f row0 = fetchRegister(src1, 0); |
1215 | Vector4f row1 = fetchRegister(src1, 1); |
1216 | Vector4f row2 = fetchRegister(src1, 2); |
1217 | Vector4f row3 = fetchRegister(src1, 3); |
1218 | |
1219 | dst.x = dot3(src0, row0); |
1220 | dst.y = dot3(src0, row1); |
1221 | dst.z = dot3(src0, row2); |
1222 | dst.w = dot3(src0, row3); |
1223 | } |
1224 | |
1225 | void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) |
1226 | { |
1227 | Vector4f row0 = fetchRegister(src1, 0); |
1228 | Vector4f row1 = fetchRegister(src1, 1); |
1229 | Vector4f row2 = fetchRegister(src1, 2); |
1230 | |
1231 | dst.x = dot4(src0, row0); |
1232 | dst.y = dot4(src0, row1); |
1233 | dst.z = dot4(src0, row2); |
1234 | } |
1235 | |
1236 | void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) |
1237 | { |
1238 | Vector4f row0 = fetchRegister(src1, 0); |
1239 | Vector4f row1 = fetchRegister(src1, 1); |
1240 | Vector4f row2 = fetchRegister(src1, 2); |
1241 | Vector4f row3 = fetchRegister(src1, 3); |
1242 | |
1243 | dst.x = dot4(src0, row0); |
1244 | dst.y = dot4(src0, row1); |
1245 | dst.z = dot4(src0, row2); |
1246 | dst.w = dot4(src0, row3); |
1247 | } |
1248 | |
1249 | void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) |
1250 | { |
1251 | if(project) |
1252 | { |
1253 | Vector4f proj; |
1254 | Float4 rw = reciprocal(src0.w); |
1255 | proj.x = src0.x * rw; |
1256 | proj.y = src0.y * rw; |
1257 | proj.z = src0.z * rw; |
1258 | |
1259 | dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit); |
1260 | } |
1261 | else |
1262 | { |
1263 | dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit); |
1264 | } |
1265 | } |
1266 | |
1267 | void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset) |
1268 | { |
1269 | dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset}); |
1270 | } |
1271 | |
1272 | void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod) |
1273 | { |
1274 | dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); |
1275 | } |
1276 | |
1277 | void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias) |
1278 | { |
1279 | dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias); |
1280 | } |
1281 | |
1282 | void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias) |
1283 | { |
1284 | dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset}); |
1285 | } |
1286 | |
1287 | void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) |
1288 | { |
1289 | dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); |
1290 | } |
1291 | |
1292 | void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) |
1293 | { |
1294 | dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); |
1295 | } |
1296 | |
1297 | void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) |
1298 | { |
1299 | dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad); |
1300 | } |
1301 | |
1302 | void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) |
1303 | { |
1304 | dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); |
1305 | } |
1306 | |
1307 | void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod) |
1308 | { |
1309 | dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); |
1310 | } |
1311 | |
1312 | void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) |
1313 | { |
1314 | bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID); |
1315 | Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture); |
1316 | Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + offset; |
1317 | |
1318 | dst = SamplerCore::textureSize(texture, lod); |
1319 | } |
1320 | |
1321 | void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) |
1322 | { |
1323 | Int kill = -1; |
1324 | |
1325 | if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); |
1326 | if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); |
1327 | if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); |
1328 | if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); |
1329 | |
1330 | // FIXME: Dynamic branching affects TEXKILL? |
1331 | // if(shader->containsDynamicBranching()) |
1332 | // { |
1333 | // kill = ~SignMask(enableMask()); |
1334 | // } |
1335 | |
1336 | for(unsigned int q = 0; q < state.multiSample; q++) |
1337 | { |
1338 | cMask[q] &= kill; |
1339 | } |
1340 | |
1341 | // FIXME: Branch to end of shader if all killed? |
1342 | } |
1343 | |
1344 | void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) |
1345 | { |
1346 | Int kill = 0; |
1347 | |
1348 | if(shader->containsDynamicBranching()) |
1349 | { |
1350 | kill = ~SignMask(enableMask(instruction)); |
1351 | } |
1352 | |
1353 | for(unsigned int q = 0; q < state.multiSample; q++) |
1354 | { |
1355 | cMask[q] &= kill; |
1356 | } |
1357 | |
1358 | // FIXME: Branch to end of shader if all killed? |
1359 | } |
1360 | |
1361 | void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) |
1362 | { |
1363 | dst.x = src.x.yyww - src.x.xxzz; |
1364 | dst.y = src.y.yyww - src.y.xxzz; |
1365 | dst.z = src.z.yyww - src.z.xxzz; |
1366 | dst.w = src.w.yyww - src.w.xxzz; |
1367 | } |
1368 | |
1369 | void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) |
1370 | { |
1371 | dst.x = src.x.zwzw - src.x.xyxy; |
1372 | dst.y = src.y.zwzw - src.y.xyxy; |
1373 | dst.z = src.z.zwzw - src.z.xyxy; |
1374 | dst.w = src.w.zwzw - src.w.xyxy; |
1375 | } |
1376 | |
1377 | void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) |
1378 | { |
1379 | // abs(dFdx(src)) + abs(dFdy(src)); |
1380 | dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); |
1381 | dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); |
1382 | dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); |
1383 | dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); |
1384 | } |
1385 | |
1386 | void PixelProgram::BREAK() |
1387 | { |
1388 | enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1389 | } |
1390 | |
1391 | void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) |
1392 | { |
1393 | Int4 condition; |
1394 | |
1395 | switch(control) |
1396 | { |
1397 | case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; |
1398 | case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; |
1399 | case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; |
1400 | case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; |
1401 | case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; |
1402 | case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; |
1403 | default: |
1404 | ASSERT(false); |
1405 | } |
1406 | |
1407 | BREAK(condition); |
1408 | } |
1409 | |
1410 | void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC |
1411 | { |
1412 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1413 | |
1414 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1415 | { |
1416 | condition = ~condition; |
1417 | } |
1418 | |
1419 | BREAK(condition); |
1420 | } |
1421 | |
1422 | void PixelProgram::BREAK(Int4 &condition) |
1423 | { |
1424 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1425 | |
1426 | enableBreak = enableBreak & ~condition; |
1427 | } |
1428 | |
1429 | void PixelProgram::CONTINUE() |
1430 | { |
1431 | enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1432 | } |
1433 | |
1434 | void PixelProgram::TEST() |
1435 | { |
1436 | enableContinue = restoreContinue.back(); |
1437 | restoreContinue.pop_back(); |
1438 | } |
1439 | |
1440 | void PixelProgram::SCALAR() |
1441 | { |
1442 | scalar = true; |
1443 | } |
1444 | |
1445 | void PixelProgram::CALL(int labelIndex, int callSiteIndex) |
1446 | { |
1447 | if(!labelBlock[labelIndex]) |
1448 | { |
1449 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1450 | } |
1451 | |
1452 | if(callRetBlock[labelIndex].size() > 1) |
1453 | { |
1454 | callStack[stackIndex++] = UInt(callSiteIndex); |
1455 | } |
1456 | |
1457 | Int4 restoreLeave = enableLeave; |
1458 | |
1459 | Nucleus::createBr(labelBlock[labelIndex]); |
1460 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1461 | |
1462 | enableLeave = restoreLeave; |
1463 | } |
1464 | |
1465 | void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) |
1466 | { |
1467 | if(src.type == Shader::PARAMETER_CONSTBOOL) |
1468 | { |
1469 | CALLNZb(labelIndex, callSiteIndex, src); |
1470 | } |
1471 | else if(src.type == Shader::PARAMETER_PREDICATE) |
1472 | { |
1473 | CALLNZp(labelIndex, callSiteIndex, src); |
1474 | } |
1475 | else ASSERT(false); |
1476 | } |
1477 | |
1478 | void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) |
1479 | { |
1480 | Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME |
1481 | |
1482 | if(boolRegister.modifier == Shader::MODIFIER_NOT) |
1483 | { |
1484 | condition = !condition; |
1485 | } |
1486 | |
1487 | if(!labelBlock[labelIndex]) |
1488 | { |
1489 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1490 | } |
1491 | |
1492 | if(callRetBlock[labelIndex].size() > 1) |
1493 | { |
1494 | callStack[stackIndex++] = UInt(callSiteIndex); |
1495 | } |
1496 | |
1497 | Int4 restoreLeave = enableLeave; |
1498 | |
1499 | branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); |
1500 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1501 | |
1502 | enableLeave = restoreLeave; |
1503 | } |
1504 | |
1505 | void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) |
1506 | { |
1507 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1508 | |
1509 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1510 | { |
1511 | condition = ~condition; |
1512 | } |
1513 | |
1514 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1515 | |
1516 | if(!labelBlock[labelIndex]) |
1517 | { |
1518 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1519 | } |
1520 | |
1521 | if(callRetBlock[labelIndex].size() > 1) |
1522 | { |
1523 | callStack[stackIndex++] = UInt(callSiteIndex); |
1524 | } |
1525 | |
1526 | enableIndex++; |
1527 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1528 | Int4 restoreLeave = enableLeave; |
1529 | |
1530 | Bool notAllFalse = SignMask(condition) != 0; |
1531 | branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); |
1532 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1533 | |
1534 | enableIndex--; |
1535 | enableLeave = restoreLeave; |
1536 | } |
1537 | |
1538 | void PixelProgram::ELSE() |
1539 | { |
1540 | ifDepth--; |
1541 | |
1542 | BasicBlock *falseBlock = ifFalseBlock[ifDepth]; |
1543 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1544 | |
1545 | if(isConditionalIf[ifDepth]) |
1546 | { |
1547 | Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1548 | Bool notAllFalse = SignMask(condition) != 0; |
1549 | |
1550 | branch(notAllFalse, falseBlock, endBlock); |
1551 | |
1552 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1553 | } |
1554 | else |
1555 | { |
1556 | Nucleus::createBr(endBlock); |
1557 | Nucleus::setInsertBlock(falseBlock); |
1558 | } |
1559 | |
1560 | ifFalseBlock[ifDepth] = endBlock; |
1561 | |
1562 | ifDepth++; |
1563 | } |
1564 | |
1565 | void PixelProgram::ENDIF() |
1566 | { |
1567 | ifDepth--; |
1568 | |
1569 | BasicBlock *endBlock = ifFalseBlock[ifDepth]; |
1570 | |
1571 | Nucleus::createBr(endBlock); |
1572 | Nucleus::setInsertBlock(endBlock); |
1573 | |
1574 | if(isConditionalIf[ifDepth]) |
1575 | { |
1576 | enableIndex--; |
1577 | } |
1578 | } |
1579 | |
1580 | void PixelProgram::ENDLOOP() |
1581 | { |
1582 | loopRepDepth--; |
1583 | |
1584 | aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += |
1585 | |
1586 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1587 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1588 | |
1589 | Nucleus::createBr(testBlock); |
1590 | Nucleus::setInsertBlock(endBlock); |
1591 | |
1592 | loopDepth--; |
1593 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
1594 | } |
1595 | |
1596 | void PixelProgram::ENDREP() |
1597 | { |
1598 | loopRepDepth--; |
1599 | |
1600 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1601 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1602 | |
1603 | Nucleus::createBr(testBlock); |
1604 | Nucleus::setInsertBlock(endBlock); |
1605 | |
1606 | loopDepth--; |
1607 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
1608 | } |
1609 | |
1610 | void PixelProgram::ENDWHILE() |
1611 | { |
1612 | loopRepDepth--; |
1613 | |
1614 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1615 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1616 | |
1617 | Nucleus::createBr(testBlock); |
1618 | Nucleus::setInsertBlock(endBlock); |
1619 | |
1620 | enableIndex--; |
1621 | scalar = false; |
1622 | } |
1623 | |
1624 | void PixelProgram::ENDSWITCH() |
1625 | { |
1626 | loopRepDepth--; |
1627 | |
1628 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1629 | |
1630 | Nucleus::createBr(endBlock); |
1631 | Nucleus::setInsertBlock(endBlock); |
1632 | } |
1633 | |
1634 | void PixelProgram::IF(const Src &src) |
1635 | { |
1636 | if(src.type == Shader::PARAMETER_CONSTBOOL) |
1637 | { |
1638 | IFb(src); |
1639 | } |
1640 | else if(src.type == Shader::PARAMETER_PREDICATE) |
1641 | { |
1642 | IFp(src); |
1643 | } |
1644 | else |
1645 | { |
1646 | Int4 condition = As<Int4>(fetchRegister(src).x); |
1647 | IF(condition); |
1648 | } |
1649 | } |
1650 | |
1651 | void PixelProgram::IFb(const Src &boolRegister) |
1652 | { |
1653 | ASSERT(ifDepth < 24 + 4); |
1654 | |
1655 | Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME |
1656 | |
1657 | if(boolRegister.modifier == Shader::MODIFIER_NOT) |
1658 | { |
1659 | condition = !condition; |
1660 | } |
1661 | |
1662 | BasicBlock *trueBlock = Nucleus::createBasicBlock(); |
1663 | BasicBlock *falseBlock = Nucleus::createBasicBlock(); |
1664 | |
1665 | branch(condition, trueBlock, falseBlock); |
1666 | |
1667 | isConditionalIf[ifDepth] = false; |
1668 | ifFalseBlock[ifDepth] = falseBlock; |
1669 | |
1670 | ifDepth++; |
1671 | } |
1672 | |
1673 | void PixelProgram::IFp(const Src &predicateRegister) |
1674 | { |
1675 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1676 | |
1677 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1678 | { |
1679 | condition = ~condition; |
1680 | } |
1681 | |
1682 | IF(condition); |
1683 | } |
1684 | |
1685 | void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) |
1686 | { |
1687 | Int4 condition; |
1688 | |
1689 | switch(control) |
1690 | { |
1691 | case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; |
1692 | case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; |
1693 | case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; |
1694 | case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; |
1695 | case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; |
1696 | case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; |
1697 | default: |
1698 | ASSERT(false); |
1699 | } |
1700 | |
1701 | IF(condition); |
1702 | } |
1703 | |
1704 | void PixelProgram::IF(Int4 &condition) |
1705 | { |
1706 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1707 | |
1708 | enableIndex++; |
1709 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1710 | |
1711 | BasicBlock *trueBlock = Nucleus::createBasicBlock(); |
1712 | BasicBlock *falseBlock = Nucleus::createBasicBlock(); |
1713 | |
1714 | Bool notAllFalse = SignMask(condition) != 0; |
1715 | |
1716 | branch(notAllFalse, trueBlock, falseBlock); |
1717 | |
1718 | isConditionalIf[ifDepth] = true; |
1719 | ifFalseBlock[ifDepth] = falseBlock; |
1720 | |
1721 | ifDepth++; |
1722 | } |
1723 | |
1724 | void PixelProgram::LABEL(int labelIndex) |
1725 | { |
1726 | if(!labelBlock[labelIndex]) |
1727 | { |
1728 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1729 | } |
1730 | |
1731 | Nucleus::setInsertBlock(labelBlock[labelIndex]); |
1732 | currentLabel = labelIndex; |
1733 | } |
1734 | |
1735 | void PixelProgram::LOOP(const Src &integerRegister) |
1736 | { |
1737 | loopDepth++; |
1738 | |
1739 | iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); |
1740 | aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); |
1741 | increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); |
1742 | |
1743 | // If(increment[loopDepth] == 0) |
1744 | // { |
1745 | // increment[loopDepth] = 1; |
1746 | // } |
1747 | |
1748 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1749 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1750 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1751 | |
1752 | loopRepTestBlock[loopRepDepth] = testBlock; |
1753 | loopRepEndBlock[loopRepDepth] = endBlock; |
1754 | |
1755 | // FIXME: jump(testBlock) |
1756 | Nucleus::createBr(testBlock); |
1757 | Nucleus::setInsertBlock(testBlock); |
1758 | |
1759 | branch(iteration[loopDepth] > 0, loopBlock, endBlock); |
1760 | Nucleus::setInsertBlock(loopBlock); |
1761 | |
1762 | iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- |
1763 | |
1764 | loopRepDepth++; |
1765 | } |
1766 | |
1767 | void PixelProgram::REP(const Src &integerRegister) |
1768 | { |
1769 | loopDepth++; |
1770 | |
1771 | iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); |
1772 | aL[loopDepth] = aL[loopDepth - 1]; |
1773 | |
1774 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1775 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1776 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1777 | |
1778 | loopRepTestBlock[loopRepDepth] = testBlock; |
1779 | loopRepEndBlock[loopRepDepth] = endBlock; |
1780 | |
1781 | // FIXME: jump(testBlock) |
1782 | Nucleus::createBr(testBlock); |
1783 | Nucleus::setInsertBlock(testBlock); |
1784 | |
1785 | branch(iteration[loopDepth] > 0, loopBlock, endBlock); |
1786 | Nucleus::setInsertBlock(loopBlock); |
1787 | |
1788 | iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- |
1789 | |
1790 | loopRepDepth++; |
1791 | } |
1792 | |
1793 | void PixelProgram::WHILE(const Src &temporaryRegister) |
1794 | { |
1795 | enableIndex++; |
1796 | |
1797 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1798 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1799 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1800 | |
1801 | loopRepTestBlock[loopRepDepth] = testBlock; |
1802 | loopRepEndBlock[loopRepDepth] = endBlock; |
1803 | |
1804 | Int4 restoreBreak = enableBreak; |
1805 | restoreContinue.push_back(enableContinue); |
1806 | |
1807 | // TODO: jump(testBlock) |
1808 | Nucleus::createBr(testBlock); |
1809 | Nucleus::setInsertBlock(testBlock); |
1810 | |
1811 | const Vector4f &src = fetchRegister(temporaryRegister); |
1812 | Int4 condition = As<Int4>(src.x); |
1813 | condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1814 | if(shader->containsLeaveInstruction()) condition &= enableLeave; |
1815 | if(shader->containsBreakInstruction()) condition &= enableBreak; |
1816 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1817 | |
1818 | Bool notAllFalse = SignMask(condition) != 0; |
1819 | branch(notAllFalse, loopBlock, endBlock); |
1820 | |
1821 | Nucleus::setInsertBlock(endBlock); |
1822 | enableBreak = restoreBreak; |
1823 | |
1824 | Nucleus::setInsertBlock(loopBlock); |
1825 | |
1826 | loopRepDepth++; |
1827 | scalar = false; |
1828 | } |
1829 | |
1830 | void PixelProgram::SWITCH() |
1831 | { |
1832 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1833 | |
1834 | loopRepTestBlock[loopRepDepth] = nullptr; |
1835 | loopRepEndBlock[loopRepDepth] = endBlock; |
1836 | |
1837 | Int4 restoreBreak = enableBreak; |
1838 | |
1839 | BasicBlock *currentBlock = Nucleus::getInsertBlock(); |
1840 | |
1841 | Nucleus::setInsertBlock(endBlock); |
1842 | enableBreak = restoreBreak; |
1843 | |
1844 | Nucleus::setInsertBlock(currentBlock); |
1845 | |
1846 | loopRepDepth++; |
1847 | } |
1848 | |
1849 | void PixelProgram::RET() |
1850 | { |
1851 | if(currentLabel == -1) |
1852 | { |
1853 | returnBlock = Nucleus::createBasicBlock(); |
1854 | Nucleus::createBr(returnBlock); |
1855 | } |
1856 | else |
1857 | { |
1858 | BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); |
1859 | |
1860 | if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack |
1861 | { |
1862 | // FIXME: Encapsulate |
1863 | UInt index = callStack[--stackIndex]; |
1864 | |
1865 | Value *value = index.loadValue(); |
1866 | SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); |
1867 | |
1868 | for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) |
1869 | { |
1870 | Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); |
1871 | } |
1872 | } |
1873 | else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination |
1874 | { |
1875 | Nucleus::createBr(callRetBlock[currentLabel][0]); |
1876 | } |
1877 | else // Function isn't called |
1878 | { |
1879 | Nucleus::createBr(unreachableBlock); |
1880 | } |
1881 | |
1882 | Nucleus::setInsertBlock(unreachableBlock); |
1883 | Nucleus::createUnreachable(); |
1884 | } |
1885 | } |
1886 | |
1887 | void PixelProgram::LEAVE() |
1888 | { |
1889 | enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1890 | |
1891 | // FIXME: Return from function if all instances left |
1892 | // FIXME: Use enableLeave in other control-flow constructs |
1893 | } |
1894 | } |
1895 | |