1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "VertexProgram.hpp" |
16 | |
17 | #include "VertexShader.hpp" |
18 | #include "SamplerCore.hpp" |
19 | #include "Renderer/Renderer.hpp" |
20 | #include "Renderer/Vertex.hpp" |
21 | #include "Common/Half.hpp" |
22 | #include "Common/Debug.hpp" |
23 | |
24 | namespace sw |
25 | { |
26 | VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) |
27 | : VertexRoutine(state, shader), |
28 | shader(shader), |
29 | r(shader->indirectAddressableTemporaries), |
30 | aL(shader->getLimits().loops), |
31 | increment(shader->getLimits().loops), |
32 | iteration(shader->getLimits().loops), |
33 | callStack(shader->getLimits().stack) |
34 | { |
35 | auto limits = shader->getLimits(); |
36 | ifFalseBlock.resize(limits.ifs); |
37 | loopRepTestBlock.resize(limits.loops); |
38 | loopRepEndBlock.resize(limits.loops); |
39 | labelBlock.resize(limits.maxLabel + 1); |
40 | isConditionalIf.resize(limits.ifs); |
41 | |
42 | loopDepth = -1; |
43 | enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
44 | |
45 | if(shader->containsBreakInstruction()) |
46 | { |
47 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
48 | } |
49 | |
50 | if(shader->containsContinueInstruction()) |
51 | { |
52 | enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
53 | } |
54 | |
55 | if(shader->isInstanceIdDeclared()) |
56 | { |
57 | instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID)); |
58 | } |
59 | } |
60 | |
61 | VertexProgram::~VertexProgram() |
62 | { |
63 | } |
64 | |
65 | void VertexProgram::pipeline(UInt &index) |
66 | { |
67 | if(!state.preTransformed) |
68 | { |
69 | program(index); |
70 | } |
71 | else |
72 | { |
73 | passThrough(); |
74 | } |
75 | } |
76 | |
77 | void VertexProgram::program(UInt &index) |
78 | { |
79 | // shader->print("VertexShader-%0.8X.txt", state.shaderID); |
80 | |
81 | unsigned short shaderModel = shader->getShaderModel(); |
82 | |
83 | enableIndex = 0; |
84 | stackIndex = 0; |
85 | |
86 | if(shader->containsLeaveInstruction()) |
87 | { |
88 | enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
89 | } |
90 | |
91 | if(shader->isVertexIdDeclared()) |
92 | { |
93 | if(state.textureSampling) |
94 | { |
95 | vertexID = Int4(Int(index)); |
96 | } |
97 | else |
98 | { |
99 | vertexID = Insert(vertexID, As<Int>(index), 0); |
100 | vertexID = Insert(vertexID, As<Int>(index + 1), 1); |
101 | vertexID = Insert(vertexID, As<Int>(index + 2), 2); |
102 | vertexID = Insert(vertexID, As<Int>(index + 3), 3); |
103 | } |
104 | } |
105 | |
106 | // Create all call site return blocks up front |
107 | for(size_t i = 0; i < shader->getLength(); i++) |
108 | { |
109 | const Shader::Instruction *instruction = shader->getInstruction(i); |
110 | Shader::Opcode opcode = instruction->opcode; |
111 | |
112 | if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) |
113 | { |
114 | const Dst &dst = instruction->dst; |
115 | |
116 | ASSERT(callRetBlock[dst.label].size() == dst.callSite); |
117 | callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); |
118 | } |
119 | } |
120 | |
121 | for(size_t i = 0; i < shader->getLength(); i++) |
122 | { |
123 | const Shader::Instruction *instruction = shader->getInstruction(i); |
124 | Shader::Opcode opcode = instruction->opcode; |
125 | |
126 | if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) |
127 | { |
128 | continue; |
129 | } |
130 | |
131 | Dst dst = instruction->dst; |
132 | Src src0 = instruction->src[0]; |
133 | Src src1 = instruction->src[1]; |
134 | Src src2 = instruction->src[2]; |
135 | Src src3 = instruction->src[3]; |
136 | Src src4 = instruction->src[4]; |
137 | |
138 | bool predicate = instruction->predicate; |
139 | Control control = instruction->control; |
140 | bool integer = dst.type == Shader::PARAMETER_ADDR; |
141 | bool pp = dst.partialPrecision; |
142 | |
143 | Vector4f d; |
144 | Vector4f s0; |
145 | Vector4f s1; |
146 | Vector4f s2; |
147 | Vector4f s3; |
148 | Vector4f s4; |
149 | |
150 | if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); |
151 | if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); |
152 | if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); |
153 | if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); |
154 | if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); |
155 | |
156 | switch(opcode) |
157 | { |
158 | case Shader::OPCODE_VS_1_0: break; |
159 | case Shader::OPCODE_VS_1_1: break; |
160 | case Shader::OPCODE_VS_2_0: break; |
161 | case Shader::OPCODE_VS_2_x: break; |
162 | case Shader::OPCODE_VS_2_sw: break; |
163 | case Shader::OPCODE_VS_3_0: break; |
164 | case Shader::OPCODE_VS_3_sw: break; |
165 | case Shader::OPCODE_DCL: break; |
166 | case Shader::OPCODE_DEF: break; |
167 | case Shader::OPCODE_DEFI: break; |
168 | case Shader::OPCODE_DEFB: break; |
169 | case Shader::OPCODE_NOP: break; |
170 | case Shader::OPCODE_ABS: abs(d, s0); break; |
171 | case Shader::OPCODE_IABS: iabs(d, s0); break; |
172 | case Shader::OPCODE_ADD: add(d, s0, s1); break; |
173 | case Shader::OPCODE_IADD: iadd(d, s0, s1); break; |
174 | case Shader::OPCODE_CRS: crs(d, s0, s1); break; |
175 | case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; |
176 | case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; |
177 | case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; |
178 | case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; |
179 | case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; |
180 | case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; |
181 | case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; |
182 | case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; |
183 | case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; |
184 | case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; |
185 | case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; |
186 | case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; |
187 | case Shader::OPCODE_DP1: dp1(d, s0, s1); break; |
188 | case Shader::OPCODE_DP2: dp2(d, s0, s1); break; |
189 | case Shader::OPCODE_DP3: dp3(d, s0, s1); break; |
190 | case Shader::OPCODE_DP4: dp4(d, s0, s1); break; |
191 | case Shader::OPCODE_DET2: det2(d, s0, s1); break; |
192 | case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; |
193 | case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; |
194 | case Shader::OPCODE_ATT: att(d, s0, s1); break; |
195 | case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; |
196 | case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; |
197 | case Shader::OPCODE_EXPP: expp(d, s0, shaderModel); break; |
198 | case Shader::OPCODE_EXP: exp(d, s0, pp); break; |
199 | case Shader::OPCODE_FRC: frc(d, s0); break; |
200 | case Shader::OPCODE_TRUNC: trunc(d, s0); break; |
201 | case Shader::OPCODE_FLOOR: floor(d, s0); break; |
202 | case Shader::OPCODE_ROUND: round(d, s0); break; |
203 | case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; |
204 | case Shader::OPCODE_CEIL: ceil(d, s0); break; |
205 | case Shader::OPCODE_LIT: lit(d, s0); break; |
206 | case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; |
207 | case Shader::OPCODE_LOG2: log2(d, s0, pp); break; |
208 | case Shader::OPCODE_LOGP: logp(d, s0, shaderModel); break; |
209 | case Shader::OPCODE_LOG: log(d, s0, pp); break; |
210 | case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; |
211 | case Shader::OPCODE_STEP: step(d, s0, s1); break; |
212 | case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; |
213 | case Shader::OPCODE_ISINF: isinf(d, s0); break; |
214 | case Shader::OPCODE_ISNAN: isnan(d, s0); break; |
215 | case Shader::OPCODE_FLOATBITSTOINT: |
216 | case Shader::OPCODE_FLOATBITSTOUINT: |
217 | case Shader::OPCODE_INTBITSTOFLOAT: |
218 | case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; |
219 | case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; |
220 | case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; |
221 | case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; |
222 | case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; |
223 | case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; |
224 | case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; |
225 | case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; |
226 | case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; |
227 | case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; |
228 | case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; |
229 | case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; |
230 | case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; |
231 | case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; |
232 | case Shader::OPCODE_MAX: max(d, s0, s1); break; |
233 | case Shader::OPCODE_IMAX: imax(d, s0, s1); break; |
234 | case Shader::OPCODE_UMAX: umax(d, s0, s1); break; |
235 | case Shader::OPCODE_MIN: min(d, s0, s1); break; |
236 | case Shader::OPCODE_IMIN: imin(d, s0, s1); break; |
237 | case Shader::OPCODE_UMIN: umin(d, s0, s1); break; |
238 | case Shader::OPCODE_MOV: mov(d, s0, integer); break; |
239 | case Shader::OPCODE_MOVA: mov(d, s0, true); break; |
240 | case Shader::OPCODE_NEG: neg(d, s0); break; |
241 | case Shader::OPCODE_INEG: ineg(d, s0); break; |
242 | case Shader::OPCODE_F2B: f2b(d, s0); break; |
243 | case Shader::OPCODE_B2F: b2f(d, s0); break; |
244 | case Shader::OPCODE_F2I: f2i(d, s0); break; |
245 | case Shader::OPCODE_I2F: i2f(d, s0); break; |
246 | case Shader::OPCODE_F2U: f2u(d, s0); break; |
247 | case Shader::OPCODE_U2F: u2f(d, s0); break; |
248 | case Shader::OPCODE_I2B: i2b(d, s0); break; |
249 | case Shader::OPCODE_B2I: b2i(d, s0); break; |
250 | case Shader::OPCODE_MUL: mul(d, s0, s1); break; |
251 | case Shader::OPCODE_IMUL: imul(d, s0, s1); break; |
252 | case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; |
253 | case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; |
254 | case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; |
255 | case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; |
256 | case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; |
257 | case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; |
258 | case Shader::OPCODE_DIV: div(d, s0, s1); break; |
259 | case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; |
260 | case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; |
261 | case Shader::OPCODE_MOD: mod(d, s0, s1); break; |
262 | case Shader::OPCODE_IMOD: imod(d, s0, s1); break; |
263 | case Shader::OPCODE_UMOD: umod(d, s0, s1); break; |
264 | case Shader::OPCODE_SHL: shl(d, s0, s1); break; |
265 | case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; |
266 | case Shader::OPCODE_USHR: ushr(d, s0, s1); break; |
267 | case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; |
268 | case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; |
269 | case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; |
270 | case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; |
271 | case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; |
272 | case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; |
273 | case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; |
274 | case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; |
275 | case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; |
276 | case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; |
277 | case Shader::OPCODE_SGE: step(d, s1, s0); break; |
278 | case Shader::OPCODE_SGN: sgn(d, s0); break; |
279 | case Shader::OPCODE_ISGN: isgn(d, s0); break; |
280 | case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; |
281 | case Shader::OPCODE_COS: cos(d, s0, pp); break; |
282 | case Shader::OPCODE_SIN: sin(d, s0, pp); break; |
283 | case Shader::OPCODE_TAN: tan(d, s0); break; |
284 | case Shader::OPCODE_ACOS: acos(d, s0); break; |
285 | case Shader::OPCODE_ASIN: asin(d, s0); break; |
286 | case Shader::OPCODE_ATAN: atan(d, s0); break; |
287 | case Shader::OPCODE_ATAN2: atan2(d, s0, s1); break; |
288 | case Shader::OPCODE_COSH: cosh(d, s0, pp); break; |
289 | case Shader::OPCODE_SINH: sinh(d, s0, pp); break; |
290 | case Shader::OPCODE_TANH: tanh(d, s0, pp); break; |
291 | case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; |
292 | case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; |
293 | case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; |
294 | case Shader::OPCODE_SLT: slt(d, s0, s1); break; |
295 | case Shader::OPCODE_SUB: sub(d, s0, s1); break; |
296 | case Shader::OPCODE_ISUB: isub(d, s0, s1); break; |
297 | case Shader::OPCODE_BREAK: BREAK(); break; |
298 | case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; |
299 | case Shader::OPCODE_BREAKP: BREAKP(src0); break; |
300 | case Shader::OPCODE_CONTINUE: CONTINUE(); break; |
301 | case Shader::OPCODE_TEST: TEST(); break; |
302 | case Shader::OPCODE_SCALAR: SCALAR(); break; |
303 | case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; |
304 | case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; |
305 | case Shader::OPCODE_ELSE: ELSE(); break; |
306 | case Shader::OPCODE_ENDIF: ENDIF(); break; |
307 | case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; |
308 | case Shader::OPCODE_ENDREP: ENDREP(); break; |
309 | case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; |
310 | case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; |
311 | case Shader::OPCODE_IF: IF(src0); break; |
312 | case Shader::OPCODE_IFC: IFC(s0, s1, control); break; |
313 | case Shader::OPCODE_LABEL: LABEL(dst.index); break; |
314 | case Shader::OPCODE_LOOP: LOOP(src1); break; |
315 | case Shader::OPCODE_REP: REP(src0); break; |
316 | case Shader::OPCODE_WHILE: WHILE(src0); break; |
317 | case Shader::OPCODE_SWITCH: SWITCH(); break; |
318 | case Shader::OPCODE_RET: RET(); break; |
319 | case Shader::OPCODE_LEAVE: LEAVE(); break; |
320 | case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; |
321 | case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; |
322 | case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; |
323 | case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; |
324 | case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; |
325 | case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; |
326 | case Shader::OPCODE_ALL: all(d.x, s0); break; |
327 | case Shader::OPCODE_ANY: any(d.x, s0); break; |
328 | case Shader::OPCODE_NOT: bitwise_not(d, s0); break; |
329 | case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; |
330 | case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; |
331 | case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; |
332 | case Shader::OPCODE_EQ: equal(d, s0, s1); break; |
333 | case Shader::OPCODE_NE: notEqual(d, s0, s1); break; |
334 | case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; |
335 | case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; |
336 | case Shader::OPCODE_TEX: TEX(d, s0, src1); break; |
337 | case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; |
338 | case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; |
339 | case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; |
340 | case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; |
341 | case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; |
342 | case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; |
343 | case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; |
344 | case Shader::OPCODE_END: break; |
345 | default: |
346 | ASSERT(false); |
347 | } |
348 | |
349 | if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP) |
350 | { |
351 | if(dst.saturate) |
352 | { |
353 | if(dst.x) d.x = Max(d.x, Float4(0.0f)); |
354 | if(dst.y) d.y = Max(d.y, Float4(0.0f)); |
355 | if(dst.z) d.z = Max(d.z, Float4(0.0f)); |
356 | if(dst.w) d.w = Max(d.w, Float4(0.0f)); |
357 | |
358 | if(dst.x) d.x = Min(d.x, Float4(1.0f)); |
359 | if(dst.y) d.y = Min(d.y, Float4(1.0f)); |
360 | if(dst.z) d.z = Min(d.z, Float4(1.0f)); |
361 | if(dst.w) d.w = Min(d.w, Float4(1.0f)); |
362 | } |
363 | |
364 | if(instruction->isPredicated()) |
365 | { |
366 | Vector4f pDst; // FIXME: Rename |
367 | |
368 | switch(dst.type) |
369 | { |
370 | case Shader::PARAMETER_VOID: break; |
371 | case Shader::PARAMETER_TEMP: |
372 | if(dst.rel.type == Shader::PARAMETER_VOID) |
373 | { |
374 | if(dst.x) pDst.x = r[dst.index].x; |
375 | if(dst.y) pDst.y = r[dst.index].y; |
376 | if(dst.z) pDst.z = r[dst.index].z; |
377 | if(dst.w) pDst.w = r[dst.index].w; |
378 | } |
379 | else if(!dst.rel.dynamic) |
380 | { |
381 | Int a = dst.index + relativeAddress(dst.rel); |
382 | |
383 | if(dst.x) pDst.x = r[a].x; |
384 | if(dst.y) pDst.y = r[a].y; |
385 | if(dst.z) pDst.z = r[a].z; |
386 | if(dst.w) pDst.w = r[a].w; |
387 | } |
388 | else |
389 | { |
390 | Int4 a = dst.index + dynamicAddress(dst.rel); |
391 | |
392 | if(dst.x) pDst.x = r[a].x; |
393 | if(dst.y) pDst.y = r[a].y; |
394 | if(dst.z) pDst.z = r[a].z; |
395 | if(dst.w) pDst.w = r[a].w; |
396 | } |
397 | break; |
398 | case Shader::PARAMETER_ADDR: pDst = a0; break; |
399 | case Shader::PARAMETER_RASTOUT: |
400 | switch(dst.index) |
401 | { |
402 | case 0: |
403 | if(dst.x) pDst.x = o[Pos].x; |
404 | if(dst.y) pDst.y = o[Pos].y; |
405 | if(dst.z) pDst.z = o[Pos].z; |
406 | if(dst.w) pDst.w = o[Pos].w; |
407 | break; |
408 | case 1: |
409 | pDst.x = o[Fog].x; |
410 | break; |
411 | case 2: |
412 | pDst.x = o[Pts].y; |
413 | break; |
414 | default: |
415 | ASSERT(false); |
416 | } |
417 | break; |
418 | case Shader::PARAMETER_ATTROUT: |
419 | if(dst.x) pDst.x = o[C0 + dst.index].x; |
420 | if(dst.y) pDst.y = o[C0 + dst.index].y; |
421 | if(dst.z) pDst.z = o[C0 + dst.index].z; |
422 | if(dst.w) pDst.w = o[C0 + dst.index].w; |
423 | break; |
424 | case Shader::PARAMETER_TEXCRDOUT: |
425 | // case Shader::PARAMETER_OUTPUT: |
426 | if(shaderModel < 0x0300) |
427 | { |
428 | if(dst.x) pDst.x = o[T0 + dst.index].x; |
429 | if(dst.y) pDst.y = o[T0 + dst.index].y; |
430 | if(dst.z) pDst.z = o[T0 + dst.index].z; |
431 | if(dst.w) pDst.w = o[T0 + dst.index].w; |
432 | } |
433 | else if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative |
434 | { |
435 | if(dst.x) pDst.x = o[dst.index].x; |
436 | if(dst.y) pDst.y = o[dst.index].y; |
437 | if(dst.z) pDst.z = o[dst.index].z; |
438 | if(dst.w) pDst.w = o[dst.index].w; |
439 | } |
440 | else if(!dst.rel.dynamic) |
441 | { |
442 | Int a = dst.index + relativeAddress(dst.rel); |
443 | |
444 | if(dst.x) pDst.x = o[a].x; |
445 | if(dst.y) pDst.y = o[a].y; |
446 | if(dst.z) pDst.z = o[a].z; |
447 | if(dst.w) pDst.w = o[a].w; |
448 | } |
449 | else |
450 | { |
451 | Int4 a = dst.index + dynamicAddress(dst.rel); |
452 | |
453 | if(dst.x) pDst.x = o[a].x; |
454 | if(dst.y) pDst.y = o[a].y; |
455 | if(dst.z) pDst.z = o[a].z; |
456 | if(dst.w) pDst.w = o[a].w; |
457 | } |
458 | break; |
459 | case Shader::PARAMETER_LABEL: break; |
460 | case Shader::PARAMETER_PREDICATE: pDst = p0; break; |
461 | case Shader::PARAMETER_INPUT: break; |
462 | default: |
463 | ASSERT(false); |
464 | } |
465 | |
466 | Int4 enable = enableMask(instruction); |
467 | |
468 | Int4 xEnable = enable; |
469 | Int4 yEnable = enable; |
470 | Int4 zEnable = enable; |
471 | Int4 wEnable = enable; |
472 | |
473 | if(predicate) |
474 | { |
475 | unsigned char pSwizzle = instruction->predicateSwizzle; |
476 | |
477 | Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; |
478 | Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; |
479 | Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; |
480 | Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; |
481 | |
482 | if(!instruction->predicateNot) |
483 | { |
484 | if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); |
485 | if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); |
486 | if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); |
487 | if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); |
488 | } |
489 | else |
490 | { |
491 | if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); |
492 | if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); |
493 | if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); |
494 | if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); |
495 | } |
496 | } |
497 | |
498 | if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); |
499 | if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); |
500 | if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); |
501 | if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); |
502 | |
503 | if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); |
504 | if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); |
505 | if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); |
506 | if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); |
507 | } |
508 | |
509 | switch(dst.type) |
510 | { |
511 | case Shader::PARAMETER_VOID: |
512 | break; |
513 | case Shader::PARAMETER_TEMP: |
514 | if(dst.rel.type == Shader::PARAMETER_VOID) |
515 | { |
516 | if(dst.x) r[dst.index].x = d.x; |
517 | if(dst.y) r[dst.index].y = d.y; |
518 | if(dst.z) r[dst.index].z = d.z; |
519 | if(dst.w) r[dst.index].w = d.w; |
520 | } |
521 | else if(!dst.rel.dynamic) |
522 | { |
523 | Int a = dst.index + relativeAddress(dst.rel); |
524 | |
525 | if(dst.x) r[a].x = d.x; |
526 | if(dst.y) r[a].y = d.y; |
527 | if(dst.z) r[a].z = d.z; |
528 | if(dst.w) r[a].w = d.w; |
529 | } |
530 | else |
531 | { |
532 | Int4 a = dst.index + dynamicAddress(dst.rel); |
533 | |
534 | if(dst.x) r.scatter_x(a, d.x); |
535 | if(dst.y) r.scatter_y(a, d.y); |
536 | if(dst.z) r.scatter_z(a, d.z); |
537 | if(dst.w) r.scatter_w(a, d.w); |
538 | } |
539 | break; |
540 | case Shader::PARAMETER_ADDR: |
541 | if(dst.x) a0.x = d.x; |
542 | if(dst.y) a0.y = d.y; |
543 | if(dst.z) a0.z = d.z; |
544 | if(dst.w) a0.w = d.w; |
545 | break; |
546 | case Shader::PARAMETER_RASTOUT: |
547 | switch(dst.index) |
548 | { |
549 | case 0: |
550 | if(dst.x) o[Pos].x = d.x; |
551 | if(dst.y) o[Pos].y = d.y; |
552 | if(dst.z) o[Pos].z = d.z; |
553 | if(dst.w) o[Pos].w = d.w; |
554 | break; |
555 | case 1: |
556 | o[Fog].x = d.x; |
557 | break; |
558 | case 2: |
559 | o[Pts].y = d.x; |
560 | break; |
561 | default: ASSERT(false); |
562 | } |
563 | break; |
564 | case Shader::PARAMETER_ATTROUT: |
565 | if(dst.x) o[C0 + dst.index].x = d.x; |
566 | if(dst.y) o[C0 + dst.index].y = d.y; |
567 | if(dst.z) o[C0 + dst.index].z = d.z; |
568 | if(dst.w) o[C0 + dst.index].w = d.w; |
569 | break; |
570 | case Shader::PARAMETER_TEXCRDOUT: |
571 | // case Shader::PARAMETER_OUTPUT: |
572 | if(shaderModel < 0x0300) |
573 | { |
574 | if(dst.x) o[T0 + dst.index].x = d.x; |
575 | if(dst.y) o[T0 + dst.index].y = d.y; |
576 | if(dst.z) o[T0 + dst.index].z = d.z; |
577 | if(dst.w) o[T0 + dst.index].w = d.w; |
578 | } |
579 | else if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative |
580 | { |
581 | if(dst.x) o[dst.index].x = d.x; |
582 | if(dst.y) o[dst.index].y = d.y; |
583 | if(dst.z) o[dst.index].z = d.z; |
584 | if(dst.w) o[dst.index].w = d.w; |
585 | } |
586 | else if(!dst.rel.dynamic) |
587 | { |
588 | Int a = dst.index + relativeAddress(dst.rel); |
589 | |
590 | if(dst.x) o[a].x = d.x; |
591 | if(dst.y) o[a].y = d.y; |
592 | if(dst.z) o[a].z = d.z; |
593 | if(dst.w) o[a].w = d.w; |
594 | } |
595 | else |
596 | { |
597 | Int4 a = dst.index + dynamicAddress(dst.rel); |
598 | |
599 | if(dst.x) o.scatter_x(a, d.x); |
600 | if(dst.y) o.scatter_y(a, d.y); |
601 | if(dst.z) o.scatter_z(a, d.z); |
602 | if(dst.w) o.scatter_w(a, d.w); |
603 | } |
604 | break; |
605 | case Shader::PARAMETER_LABEL: break; |
606 | case Shader::PARAMETER_PREDICATE: p0 = d; break; |
607 | case Shader::PARAMETER_INPUT: break; |
608 | default: |
609 | ASSERT(false); |
610 | } |
611 | } |
612 | } |
613 | |
614 | if(currentLabel != -1) |
615 | { |
616 | Nucleus::setInsertBlock(returnBlock); |
617 | } |
618 | } |
619 | |
620 | void VertexProgram::passThrough() |
621 | { |
622 | if(shader) |
623 | { |
624 | for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) |
625 | { |
626 | unsigned char usage = shader->getOutput(i, 0).usage; |
627 | |
628 | switch(usage) |
629 | { |
630 | case 0xFF: |
631 | continue; |
632 | case Shader::USAGE_PSIZE: |
633 | o[i].y = v[i].x; |
634 | break; |
635 | case Shader::USAGE_TEXCOORD: |
636 | o[i].x = v[i].x; |
637 | o[i].y = v[i].y; |
638 | o[i].z = v[i].z; |
639 | o[i].w = v[i].w; |
640 | break; |
641 | case Shader::USAGE_POSITION: |
642 | o[i].x = v[i].x; |
643 | o[i].y = v[i].y; |
644 | o[i].z = v[i].z; |
645 | o[i].w = v[i].w; |
646 | break; |
647 | case Shader::USAGE_COLOR: |
648 | o[i].x = v[i].x; |
649 | o[i].y = v[i].y; |
650 | o[i].z = v[i].z; |
651 | o[i].w = v[i].w; |
652 | break; |
653 | case Shader::USAGE_FOG: |
654 | o[i].x = v[i].x; |
655 | break; |
656 | default: |
657 | ASSERT(false); |
658 | } |
659 | } |
660 | } |
661 | else |
662 | { |
663 | o[Pos].x = v[PositionT].x; |
664 | o[Pos].y = v[PositionT].y; |
665 | o[Pos].z = v[PositionT].z; |
666 | o[Pos].w = v[PositionT].w; |
667 | |
668 | for(int i = 0; i < 2; i++) |
669 | { |
670 | o[C0 + i].x = v[Color0 + i].x; |
671 | o[C0 + i].y = v[Color0 + i].y; |
672 | o[C0 + i].z = v[Color0 + i].z; |
673 | o[C0 + i].w = v[Color0 + i].w; |
674 | } |
675 | |
676 | for(int i = 0; i < 8; i++) |
677 | { |
678 | o[T0 + i].x = v[TexCoord0 + i].x; |
679 | o[T0 + i].y = v[TexCoord0 + i].y; |
680 | o[T0 + i].z = v[TexCoord0 + i].z; |
681 | o[T0 + i].w = v[TexCoord0 + i].w; |
682 | } |
683 | |
684 | o[Pts].y = v[PointSize].x; |
685 | } |
686 | } |
687 | |
688 | Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset) |
689 | { |
690 | Vector4f reg; |
691 | unsigned int i = src.index + offset; |
692 | |
693 | switch(src.type) |
694 | { |
695 | case Shader::PARAMETER_TEMP: |
696 | if(src.rel.type == Shader::PARAMETER_VOID) |
697 | { |
698 | reg = r[i]; |
699 | } |
700 | else if(!src.rel.dynamic) |
701 | { |
702 | reg = r[i + relativeAddress(src.rel, src.bufferIndex)]; |
703 | } |
704 | else |
705 | { |
706 | reg = r[i + dynamicAddress(src.rel)]; |
707 | } |
708 | break; |
709 | case Shader::PARAMETER_CONST: |
710 | reg = readConstant(src, offset); |
711 | break; |
712 | case Shader::PARAMETER_INPUT: |
713 | if(src.rel.type == Shader::PARAMETER_VOID) |
714 | { |
715 | reg = v[i]; |
716 | } |
717 | else if(!src.rel.dynamic) |
718 | { |
719 | reg = v[i + relativeAddress(src.rel, src.bufferIndex)]; |
720 | } |
721 | else |
722 | { |
723 | reg = v[i + dynamicAddress(src.rel)]; |
724 | } |
725 | break; |
726 | case Shader::PARAMETER_VOID: return r[0]; // Dummy |
727 | case Shader::PARAMETER_FLOAT4LITERAL: |
728 | // This is used for all literal types, and since Reactor doesn't guarantee |
729 | // preserving the bit pattern of float constants, we must construct them |
730 | // as integer constants and bitcast. |
731 | reg.x = As<Float4>(Int4(src.integer[0])); |
732 | reg.y = As<Float4>(Int4(src.integer[1])); |
733 | reg.z = As<Float4>(Int4(src.integer[2])); |
734 | reg.w = As<Float4>(Int4(src.integer[3])); |
735 | break; |
736 | case Shader::PARAMETER_ADDR: reg = a0; break; |
737 | case Shader::PARAMETER_CONSTBOOL: return r[0]; // Dummy |
738 | case Shader::PARAMETER_CONSTINT: return r[0]; // Dummy |
739 | case Shader::PARAMETER_LOOP: return r[0]; // Dummy |
740 | case Shader::PARAMETER_PREDICATE: return r[0]; // Dummy |
741 | case Shader::PARAMETER_SAMPLER: |
742 | if(src.rel.type == Shader::PARAMETER_VOID) |
743 | { |
744 | reg.x = As<Float4>(Int4(i)); |
745 | } |
746 | else if(src.rel.type == Shader::PARAMETER_TEMP) |
747 | { |
748 | reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); |
749 | } |
750 | return reg; |
751 | case Shader::PARAMETER_OUTPUT: |
752 | if(src.rel.type == Shader::PARAMETER_VOID) |
753 | { |
754 | reg = o[i]; |
755 | } |
756 | else if(!src.rel.dynamic) |
757 | { |
758 | reg = o[i + relativeAddress(src.rel, src.bufferIndex)]; |
759 | } |
760 | else |
761 | { |
762 | reg = o[i + dynamicAddress(src.rel)]; |
763 | } |
764 | break; |
765 | case Shader::PARAMETER_MISCTYPE: |
766 | if(src.index == Shader::InstanceIDIndex) |
767 | { |
768 | reg.x = As<Float>(instanceID); |
769 | } |
770 | else if(src.index == Shader::VertexIDIndex) |
771 | { |
772 | reg.x = As<Float4>(vertexID); |
773 | } |
774 | else ASSERT(false); |
775 | return reg; |
776 | default: |
777 | ASSERT(false); |
778 | } |
779 | |
780 | const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; |
781 | const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; |
782 | const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; |
783 | const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; |
784 | |
785 | Vector4f mod; |
786 | |
787 | switch(src.modifier) |
788 | { |
789 | case Shader::MODIFIER_NONE: |
790 | mod.x = x; |
791 | mod.y = y; |
792 | mod.z = z; |
793 | mod.w = w; |
794 | break; |
795 | case Shader::MODIFIER_NEGATE: |
796 | mod.x = -x; |
797 | mod.y = -y; |
798 | mod.z = -z; |
799 | mod.w = -w; |
800 | break; |
801 | case Shader::MODIFIER_ABS: |
802 | mod.x = Abs(x); |
803 | mod.y = Abs(y); |
804 | mod.z = Abs(z); |
805 | mod.w = Abs(w); |
806 | break; |
807 | case Shader::MODIFIER_ABS_NEGATE: |
808 | mod.x = -Abs(x); |
809 | mod.y = -Abs(y); |
810 | mod.z = -Abs(z); |
811 | mod.w = -Abs(w); |
812 | break; |
813 | case Shader::MODIFIER_NOT: |
814 | mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); |
815 | mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); |
816 | mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); |
817 | mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); |
818 | break; |
819 | default: |
820 | ASSERT(false); |
821 | } |
822 | |
823 | return mod; |
824 | } |
825 | |
826 | RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index) |
827 | { |
828 | if(bufferIndex == -1) |
829 | { |
830 | return data + OFFSET(DrawData, vs.c[index]); |
831 | } |
832 | else |
833 | { |
834 | return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index; |
835 | } |
836 | } |
837 | |
838 | RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int &offset) |
839 | { |
840 | return uniformAddress(bufferIndex, index) + offset * sizeof(float4); |
841 | } |
842 | |
843 | Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset) |
844 | { |
845 | Vector4f c; |
846 | unsigned int i = src.index + offset; |
847 | |
848 | if(src.rel.type == Shader::PARAMETER_VOID) // Not relative |
849 | { |
850 | c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); |
851 | |
852 | c.x = c.x.xxxx; |
853 | c.y = c.y.yyyy; |
854 | c.z = c.z.zzzz; |
855 | c.w = c.w.wwww; |
856 | |
857 | if(shader->containsDefineInstruction()) // Constant may be known at compile time |
858 | { |
859 | for(size_t j = 0; j < shader->getLength(); j++) |
860 | { |
861 | const Shader::Instruction &instruction = *shader->getInstruction(j); |
862 | |
863 | if(instruction.opcode == Shader::OPCODE_DEF) |
864 | { |
865 | if(instruction.dst.index == i) |
866 | { |
867 | c.x = Float4(instruction.src[0].value[0]); |
868 | c.y = Float4(instruction.src[0].value[1]); |
869 | c.z = Float4(instruction.src[0].value[2]); |
870 | c.w = Float4(instruction.src[0].value[3]); |
871 | |
872 | break; |
873 | } |
874 | } |
875 | } |
876 | } |
877 | } |
878 | else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP) |
879 | { |
880 | Int a = relativeAddress(src.rel, src.bufferIndex); |
881 | |
882 | c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); |
883 | |
884 | c.x = c.x.xxxx; |
885 | c.y = c.y.yyyy; |
886 | c.z = c.z.zzzz; |
887 | c.w = c.w.wwww; |
888 | } |
889 | else |
890 | { |
891 | int component = src.rel.swizzle & 0x03; |
892 | Float4 a; |
893 | |
894 | switch(src.rel.type) |
895 | { |
896 | case Shader::PARAMETER_ADDR: a = a0[component]; break; |
897 | case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break; |
898 | case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break; |
899 | case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break; |
900 | case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break; |
901 | case Shader::PARAMETER_MISCTYPE: |
902 | switch(src.rel.index) |
903 | { |
904 | case Shader::InstanceIDIndex: a = As<Float4>(Int4(instanceID)); break; |
905 | case Shader::VertexIDIndex: a = As<Float4>(vertexID); break; |
906 | default: ASSERT(false); |
907 | } |
908 | break; |
909 | default: ASSERT(false); |
910 | } |
911 | |
912 | Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale); |
913 | |
914 | index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0} |
915 | |
916 | Int index0 = Extract(index, 0); |
917 | Int index1 = Extract(index, 1); |
918 | Int index2 = Extract(index, 2); |
919 | Int index3 = Extract(index, 3); |
920 | |
921 | c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16); |
922 | c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16); |
923 | c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16); |
924 | c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16); |
925 | |
926 | transpose4x4(c.x, c.y, c.z, c.w); |
927 | } |
928 | |
929 | return c; |
930 | } |
931 | |
932 | Int VertexProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex) |
933 | { |
934 | ASSERT(!rel.dynamic); |
935 | |
936 | if(rel.type == Shader::PARAMETER_TEMP) |
937 | { |
938 | return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale; |
939 | } |
940 | else if(rel.type == Shader::PARAMETER_INPUT) |
941 | { |
942 | return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale; |
943 | } |
944 | else if(rel.type == Shader::PARAMETER_OUTPUT) |
945 | { |
946 | return As<Int>(Extract(o[rel.index].x, 0)) * rel.scale; |
947 | } |
948 | else if(rel.type == Shader::PARAMETER_CONST) |
949 | { |
950 | return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale; |
951 | } |
952 | else if(rel.type == Shader::PARAMETER_LOOP) |
953 | { |
954 | return aL[loopDepth]; |
955 | } |
956 | else ASSERT(false); |
957 | |
958 | return 0; |
959 | } |
960 | |
961 | Int4 VertexProgram::dynamicAddress(const Shader::Relative &rel) |
962 | { |
963 | int component = rel.swizzle & 0x03; |
964 | Float4 a; |
965 | |
966 | switch(rel.type) |
967 | { |
968 | case Shader::PARAMETER_ADDR: a = a0[component]; break; |
969 | case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break; |
970 | case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break; |
971 | case Shader::PARAMETER_OUTPUT: a = o[rel.index][component]; break; |
972 | case Shader::PARAMETER_MISCTYPE: |
973 | switch(rel.index) |
974 | { |
975 | case Shader::InstanceIDIndex: a = As<Float>(instanceID); break; |
976 | case Shader::VertexIDIndex: a = As<Float4>(vertexID); break; |
977 | default: ASSERT(false); |
978 | } |
979 | break; |
980 | default: ASSERT(false); |
981 | } |
982 | |
983 | return As<Int4>(a) * Int4(rel.scale); |
984 | } |
985 | |
986 | Int4 VertexProgram::enableMask(const Shader::Instruction *instruction) |
987 | { |
988 | if(scalar) |
989 | { |
990 | return Int4(0xFFFFFFFF); |
991 | } |
992 | |
993 | Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF); |
994 | |
995 | if(shader->containsBreakInstruction() && instruction->analysisBreak) |
996 | { |
997 | enable &= enableBreak; |
998 | } |
999 | |
1000 | if(shader->containsContinueInstruction() && instruction->analysisContinue) |
1001 | { |
1002 | enable &= enableContinue; |
1003 | } |
1004 | |
1005 | if(shader->containsLeaveInstruction() && instruction->analysisLeave) |
1006 | { |
1007 | enable &= enableLeave; |
1008 | } |
1009 | |
1010 | return enable; |
1011 | } |
1012 | |
1013 | void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1) |
1014 | { |
1015 | Vector4f row0 = fetchRegister(src1, 0); |
1016 | Vector4f row1 = fetchRegister(src1, 1); |
1017 | |
1018 | dst.x = dot3(src0, row0); |
1019 | dst.y = dot3(src0, row1); |
1020 | } |
1021 | |
1022 | void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1) |
1023 | { |
1024 | Vector4f row0 = fetchRegister(src1, 0); |
1025 | Vector4f row1 = fetchRegister(src1, 1); |
1026 | Vector4f row2 = fetchRegister(src1, 2); |
1027 | |
1028 | dst.x = dot3(src0, row0); |
1029 | dst.y = dot3(src0, row1); |
1030 | dst.z = dot3(src0, row2); |
1031 | } |
1032 | |
1033 | void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1) |
1034 | { |
1035 | Vector4f row0 = fetchRegister(src1, 0); |
1036 | Vector4f row1 = fetchRegister(src1, 1); |
1037 | Vector4f row2 = fetchRegister(src1, 2); |
1038 | Vector4f row3 = fetchRegister(src1, 3); |
1039 | |
1040 | dst.x = dot3(src0, row0); |
1041 | dst.y = dot3(src0, row1); |
1042 | dst.z = dot3(src0, row2); |
1043 | dst.w = dot3(src0, row3); |
1044 | } |
1045 | |
1046 | void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1) |
1047 | { |
1048 | Vector4f row0 = fetchRegister(src1, 0); |
1049 | Vector4f row1 = fetchRegister(src1, 1); |
1050 | Vector4f row2 = fetchRegister(src1, 2); |
1051 | |
1052 | dst.x = dot4(src0, row0); |
1053 | dst.y = dot4(src0, row1); |
1054 | dst.z = dot4(src0, row2); |
1055 | } |
1056 | |
1057 | void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1) |
1058 | { |
1059 | Vector4f row0 = fetchRegister(src1, 0); |
1060 | Vector4f row1 = fetchRegister(src1, 1); |
1061 | Vector4f row2 = fetchRegister(src1, 2); |
1062 | Vector4f row3 = fetchRegister(src1, 3); |
1063 | |
1064 | dst.x = dot4(src0, row0); |
1065 | dst.y = dot4(src0, row1); |
1066 | dst.z = dot4(src0, row2); |
1067 | dst.w = dot4(src0, row3); |
1068 | } |
1069 | |
1070 | void VertexProgram::BREAK() |
1071 | { |
1072 | enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1073 | } |
1074 | |
1075 | void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) |
1076 | { |
1077 | Int4 condition; |
1078 | |
1079 | switch(control) |
1080 | { |
1081 | case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; |
1082 | case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; |
1083 | case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; |
1084 | case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; |
1085 | case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; |
1086 | case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; |
1087 | default: |
1088 | ASSERT(false); |
1089 | } |
1090 | |
1091 | BREAK(condition); |
1092 | } |
1093 | |
1094 | void VertexProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC |
1095 | { |
1096 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1097 | |
1098 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1099 | { |
1100 | condition = ~condition; |
1101 | } |
1102 | |
1103 | BREAK(condition); |
1104 | } |
1105 | |
1106 | void VertexProgram::BREAK(Int4 &condition) |
1107 | { |
1108 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1109 | |
1110 | enableBreak = enableBreak & ~condition; |
1111 | } |
1112 | |
1113 | void VertexProgram::CONTINUE() |
1114 | { |
1115 | enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1116 | } |
1117 | |
1118 | void VertexProgram::TEST() |
1119 | { |
1120 | enableContinue = restoreContinue.back(); |
1121 | restoreContinue.pop_back(); |
1122 | } |
1123 | |
1124 | void VertexProgram::SCALAR() |
1125 | { |
1126 | scalar = true; |
1127 | } |
1128 | |
1129 | void VertexProgram::CALL(int labelIndex, int callSiteIndex) |
1130 | { |
1131 | if(!labelBlock[labelIndex]) |
1132 | { |
1133 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1134 | } |
1135 | |
1136 | if(callRetBlock[labelIndex].size() > 1) |
1137 | { |
1138 | callStack[stackIndex++] = UInt(callSiteIndex); |
1139 | } |
1140 | |
1141 | Int4 restoreLeave = enableLeave; |
1142 | |
1143 | Nucleus::createBr(labelBlock[labelIndex]); |
1144 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1145 | |
1146 | enableLeave = restoreLeave; |
1147 | } |
1148 | |
1149 | void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) |
1150 | { |
1151 | if(src.type == Shader::PARAMETER_CONSTBOOL) |
1152 | { |
1153 | CALLNZb(labelIndex, callSiteIndex, src); |
1154 | } |
1155 | else if(src.type == Shader::PARAMETER_PREDICATE) |
1156 | { |
1157 | CALLNZp(labelIndex, callSiteIndex, src); |
1158 | } |
1159 | else ASSERT(false); |
1160 | } |
1161 | |
1162 | void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) |
1163 | { |
1164 | Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME |
1165 | |
1166 | if(boolRegister.modifier == Shader::MODIFIER_NOT) |
1167 | { |
1168 | condition = !condition; |
1169 | } |
1170 | |
1171 | if(!labelBlock[labelIndex]) |
1172 | { |
1173 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1174 | } |
1175 | |
1176 | if(callRetBlock[labelIndex].size() > 1) |
1177 | { |
1178 | callStack[stackIndex++] = UInt(callSiteIndex); |
1179 | } |
1180 | |
1181 | Int4 restoreLeave = enableLeave; |
1182 | |
1183 | branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); |
1184 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1185 | |
1186 | enableLeave = restoreLeave; |
1187 | } |
1188 | |
1189 | void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) |
1190 | { |
1191 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1192 | |
1193 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1194 | { |
1195 | condition = ~condition; |
1196 | } |
1197 | |
1198 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1199 | |
1200 | if(!labelBlock[labelIndex]) |
1201 | { |
1202 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1203 | } |
1204 | |
1205 | if(callRetBlock[labelIndex].size() > 1) |
1206 | { |
1207 | callStack[stackIndex++] = UInt(callSiteIndex); |
1208 | } |
1209 | |
1210 | enableIndex++; |
1211 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1212 | Int4 restoreLeave = enableLeave; |
1213 | |
1214 | Bool notAllFalse = SignMask(condition) != 0; |
1215 | branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); |
1216 | Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); |
1217 | |
1218 | enableIndex--; |
1219 | enableLeave = restoreLeave; |
1220 | } |
1221 | |
1222 | void VertexProgram::ELSE() |
1223 | { |
1224 | ifDepth--; |
1225 | |
1226 | BasicBlock *falseBlock = ifFalseBlock[ifDepth]; |
1227 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1228 | |
1229 | if(isConditionalIf[ifDepth]) |
1230 | { |
1231 | Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1232 | Bool notAllFalse = SignMask(condition) != 0; |
1233 | |
1234 | branch(notAllFalse, falseBlock, endBlock); |
1235 | |
1236 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1237 | } |
1238 | else |
1239 | { |
1240 | Nucleus::createBr(endBlock); |
1241 | Nucleus::setInsertBlock(falseBlock); |
1242 | } |
1243 | |
1244 | ifFalseBlock[ifDepth] = endBlock; |
1245 | |
1246 | ifDepth++; |
1247 | } |
1248 | |
1249 | void VertexProgram::ENDIF() |
1250 | { |
1251 | ifDepth--; |
1252 | |
1253 | BasicBlock *endBlock = ifFalseBlock[ifDepth]; |
1254 | |
1255 | Nucleus::createBr(endBlock); |
1256 | Nucleus::setInsertBlock(endBlock); |
1257 | |
1258 | if(isConditionalIf[ifDepth]) |
1259 | { |
1260 | enableIndex--; |
1261 | } |
1262 | } |
1263 | |
1264 | void VertexProgram::ENDLOOP() |
1265 | { |
1266 | loopRepDepth--; |
1267 | |
1268 | aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += |
1269 | |
1270 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1271 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1272 | |
1273 | Nucleus::createBr(testBlock); |
1274 | Nucleus::setInsertBlock(endBlock); |
1275 | |
1276 | loopDepth--; |
1277 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
1278 | } |
1279 | |
1280 | void VertexProgram::ENDREP() |
1281 | { |
1282 | loopRepDepth--; |
1283 | |
1284 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1285 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1286 | |
1287 | Nucleus::createBr(testBlock); |
1288 | Nucleus::setInsertBlock(endBlock); |
1289 | |
1290 | loopDepth--; |
1291 | enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
1292 | } |
1293 | |
1294 | void VertexProgram::ENDWHILE() |
1295 | { |
1296 | loopRepDepth--; |
1297 | |
1298 | BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; |
1299 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1300 | |
1301 | Nucleus::createBr(testBlock); |
1302 | Nucleus::setInsertBlock(endBlock); |
1303 | |
1304 | enableIndex--; |
1305 | scalar = false; |
1306 | } |
1307 | |
1308 | void VertexProgram::ENDSWITCH() |
1309 | { |
1310 | loopRepDepth--; |
1311 | |
1312 | BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; |
1313 | |
1314 | Nucleus::createBr(endBlock); |
1315 | Nucleus::setInsertBlock(endBlock); |
1316 | } |
1317 | |
1318 | void VertexProgram::IF(const Src &src) |
1319 | { |
1320 | if(src.type == Shader::PARAMETER_CONSTBOOL) |
1321 | { |
1322 | IFb(src); |
1323 | } |
1324 | else if(src.type == Shader::PARAMETER_PREDICATE) |
1325 | { |
1326 | IFp(src); |
1327 | } |
1328 | else |
1329 | { |
1330 | Int4 condition = As<Int4>(fetchRegister(src).x); |
1331 | IF(condition); |
1332 | } |
1333 | } |
1334 | |
1335 | void VertexProgram::IFb(const Src &boolRegister) |
1336 | { |
1337 | ASSERT(ifDepth < 24 + 4); |
1338 | |
1339 | Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME |
1340 | |
1341 | if(boolRegister.modifier == Shader::MODIFIER_NOT) |
1342 | { |
1343 | condition = !condition; |
1344 | } |
1345 | |
1346 | BasicBlock *trueBlock = Nucleus::createBasicBlock(); |
1347 | BasicBlock *falseBlock = Nucleus::createBasicBlock(); |
1348 | |
1349 | branch(condition, trueBlock, falseBlock); |
1350 | |
1351 | isConditionalIf[ifDepth] = false; |
1352 | ifFalseBlock[ifDepth] = falseBlock; |
1353 | |
1354 | ifDepth++; |
1355 | } |
1356 | |
1357 | void VertexProgram::IFp(const Src &predicateRegister) |
1358 | { |
1359 | Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); |
1360 | |
1361 | if(predicateRegister.modifier == Shader::MODIFIER_NOT) |
1362 | { |
1363 | condition = ~condition; |
1364 | } |
1365 | |
1366 | IF(condition); |
1367 | } |
1368 | |
1369 | void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) |
1370 | { |
1371 | Int4 condition; |
1372 | |
1373 | switch(control) |
1374 | { |
1375 | case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; |
1376 | case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; |
1377 | case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; |
1378 | case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; |
1379 | case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; |
1380 | case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; |
1381 | default: |
1382 | ASSERT(false); |
1383 | } |
1384 | |
1385 | IF(condition); |
1386 | } |
1387 | |
1388 | void VertexProgram::IF(Int4 &condition) |
1389 | { |
1390 | condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1391 | |
1392 | enableIndex++; |
1393 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1394 | |
1395 | BasicBlock *trueBlock = Nucleus::createBasicBlock(); |
1396 | BasicBlock *falseBlock = Nucleus::createBasicBlock(); |
1397 | |
1398 | Bool notAllFalse = SignMask(condition) != 0; |
1399 | |
1400 | branch(notAllFalse, trueBlock, falseBlock); |
1401 | |
1402 | isConditionalIf[ifDepth] = true; |
1403 | ifFalseBlock[ifDepth] = falseBlock; |
1404 | |
1405 | ifDepth++; |
1406 | } |
1407 | |
1408 | void VertexProgram::LABEL(int labelIndex) |
1409 | { |
1410 | if(!labelBlock[labelIndex]) |
1411 | { |
1412 | labelBlock[labelIndex] = Nucleus::createBasicBlock(); |
1413 | } |
1414 | |
1415 | Nucleus::setInsertBlock(labelBlock[labelIndex]); |
1416 | currentLabel = labelIndex; |
1417 | } |
1418 | |
1419 | void VertexProgram::LOOP(const Src &integerRegister) |
1420 | { |
1421 | loopDepth++; |
1422 | |
1423 | iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0])); |
1424 | aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1])); |
1425 | increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2])); |
1426 | |
1427 | // FIXME: Compiles to two instructions? |
1428 | If(increment[loopDepth] == 0) |
1429 | { |
1430 | increment[loopDepth] = 1; |
1431 | } |
1432 | |
1433 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1434 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1435 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1436 | |
1437 | loopRepTestBlock[loopRepDepth] = testBlock; |
1438 | loopRepEndBlock[loopRepDepth] = endBlock; |
1439 | |
1440 | // FIXME: jump(testBlock) |
1441 | Nucleus::createBr(testBlock); |
1442 | Nucleus::setInsertBlock(testBlock); |
1443 | |
1444 | branch(iteration[loopDepth] > 0, loopBlock, endBlock); |
1445 | Nucleus::setInsertBlock(loopBlock); |
1446 | |
1447 | iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- |
1448 | |
1449 | loopRepDepth++; |
1450 | } |
1451 | |
1452 | void VertexProgram::REP(const Src &integerRegister) |
1453 | { |
1454 | loopDepth++; |
1455 | |
1456 | iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0])); |
1457 | aL[loopDepth] = aL[loopDepth - 1]; |
1458 | |
1459 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1460 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1461 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1462 | |
1463 | loopRepTestBlock[loopRepDepth] = testBlock; |
1464 | loopRepEndBlock[loopRepDepth] = endBlock; |
1465 | |
1466 | // FIXME: jump(testBlock) |
1467 | Nucleus::createBr(testBlock); |
1468 | Nucleus::setInsertBlock(testBlock); |
1469 | |
1470 | branch(iteration[loopDepth] > 0, loopBlock, endBlock); |
1471 | Nucleus::setInsertBlock(loopBlock); |
1472 | |
1473 | iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- |
1474 | |
1475 | loopRepDepth++; |
1476 | } |
1477 | |
1478 | void VertexProgram::WHILE(const Src &temporaryRegister) |
1479 | { |
1480 | enableIndex++; |
1481 | |
1482 | BasicBlock *loopBlock = Nucleus::createBasicBlock(); |
1483 | BasicBlock *testBlock = Nucleus::createBasicBlock(); |
1484 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1485 | |
1486 | loopRepTestBlock[loopRepDepth] = testBlock; |
1487 | loopRepEndBlock[loopRepDepth] = endBlock; |
1488 | |
1489 | Int4 restoreBreak = enableBreak; |
1490 | restoreContinue.push_back(enableContinue); |
1491 | |
1492 | // TODO: jump(testBlock) |
1493 | Nucleus::createBr(testBlock); |
1494 | Nucleus::setInsertBlock(testBlock); |
1495 | |
1496 | const Vector4f &src = fetchRegister(temporaryRegister); |
1497 | Int4 condition = As<Int4>(src.x); |
1498 | condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1499 | if(shader->containsLeaveInstruction()) condition &= enableLeave; |
1500 | if(shader->containsBreakInstruction()) condition &= enableBreak; |
1501 | enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; |
1502 | |
1503 | Bool notAllFalse = SignMask(condition) != 0; |
1504 | branch(notAllFalse, loopBlock, endBlock); |
1505 | |
1506 | Nucleus::setInsertBlock(endBlock); |
1507 | enableBreak = restoreBreak; |
1508 | |
1509 | Nucleus::setInsertBlock(loopBlock); |
1510 | |
1511 | loopRepDepth++; |
1512 | scalar = false; |
1513 | } |
1514 | |
1515 | void VertexProgram::SWITCH() |
1516 | { |
1517 | BasicBlock *endBlock = Nucleus::createBasicBlock(); |
1518 | |
1519 | loopRepTestBlock[loopRepDepth] = nullptr; |
1520 | loopRepEndBlock[loopRepDepth] = endBlock; |
1521 | |
1522 | Int4 restoreBreak = enableBreak; |
1523 | |
1524 | BasicBlock *currentBlock = Nucleus::getInsertBlock(); |
1525 | |
1526 | Nucleus::setInsertBlock(endBlock); |
1527 | enableBreak = restoreBreak; |
1528 | |
1529 | Nucleus::setInsertBlock(currentBlock); |
1530 | |
1531 | loopRepDepth++; |
1532 | } |
1533 | |
1534 | void VertexProgram::RET() |
1535 | { |
1536 | if(currentLabel == -1) |
1537 | { |
1538 | returnBlock = Nucleus::createBasicBlock(); |
1539 | Nucleus::createBr(returnBlock); |
1540 | } |
1541 | else |
1542 | { |
1543 | BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); |
1544 | |
1545 | if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack |
1546 | { |
1547 | // FIXME: Encapsulate |
1548 | UInt index = callStack[--stackIndex]; |
1549 | |
1550 | Value *value = index.loadValue(); |
1551 | SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); |
1552 | |
1553 | for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) |
1554 | { |
1555 | Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); |
1556 | } |
1557 | } |
1558 | else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination |
1559 | { |
1560 | Nucleus::createBr(callRetBlock[currentLabel][0]); |
1561 | } |
1562 | else // Function isn't called |
1563 | { |
1564 | Nucleus::createBr(unreachableBlock); |
1565 | } |
1566 | |
1567 | Nucleus::setInsertBlock(unreachableBlock); |
1568 | Nucleus::createUnreachable(); |
1569 | } |
1570 | } |
1571 | |
1572 | void VertexProgram::LEAVE() |
1573 | { |
1574 | enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; |
1575 | |
1576 | // FIXME: Return from function if all instances left |
1577 | // FIXME: Use enableLeave in other control-flow constructs |
1578 | } |
1579 | |
1580 | void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1) |
1581 | { |
1582 | dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), (src0), Base); |
1583 | } |
1584 | |
1585 | void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) |
1586 | { |
1587 | dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Base, Offset}); |
1588 | } |
1589 | |
1590 | void VertexProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) |
1591 | { |
1592 | dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); |
1593 | } |
1594 | |
1595 | void VertexProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) |
1596 | { |
1597 | dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); |
1598 | } |
1599 | |
1600 | void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) |
1601 | { |
1602 | dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); |
1603 | } |
1604 | |
1605 | void VertexProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) |
1606 | { |
1607 | dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); |
1608 | } |
1609 | |
1610 | void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) |
1611 | { |
1612 | dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, src0, Grad); |
1613 | } |
1614 | |
1615 | void VertexProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) |
1616 | { |
1617 | dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); |
1618 | } |
1619 | |
1620 | void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) |
1621 | { |
1622 | bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID); |
1623 | Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture); |
1624 | Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + offset; |
1625 | |
1626 | dst = SamplerCore::textureSize(texture, lod); |
1627 | } |
1628 | |
1629 | Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) |
1630 | { |
1631 | Vector4f tmp; |
1632 | |
1633 | if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID) |
1634 | { |
1635 | tmp = sampleTexture(s.index, uvwq, lod, dsx, dsy, offset, function); |
1636 | } |
1637 | else |
1638 | { |
1639 | Int index = As<Int>(Float(fetchRegister(s).x.x)); |
1640 | |
1641 | for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++) |
1642 | { |
1643 | if(shader->usesSampler(i)) |
1644 | { |
1645 | If(index == i) |
1646 | { |
1647 | tmp = sampleTexture(i, uvwq, lod, dsx, dsy, offset, function); |
1648 | // FIXME: When the sampler states are the same, we could use one sampler and just index the texture |
1649 | } |
1650 | } |
1651 | } |
1652 | } |
1653 | |
1654 | Vector4f c; |
1655 | c.x = tmp[(s.swizzle >> 0) & 0x3]; |
1656 | c.y = tmp[(s.swizzle >> 2) & 0x3]; |
1657 | c.z = tmp[(s.swizzle >> 4) & 0x3]; |
1658 | c.w = tmp[(s.swizzle >> 6) & 0x3]; |
1659 | |
1660 | return c; |
1661 | } |
1662 | |
1663 | Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) |
1664 | { |
1665 | Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture); |
1666 | return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, lod, dsx, dsy, offset, function); |
1667 | } |
1668 | } |
1669 | |