1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "Shader.hpp" |
16 | |
17 | #include "VertexShader.hpp" |
18 | #include "PixelShader.hpp" |
19 | #include "Common/Math.hpp" |
20 | #include "Common/Debug.hpp" |
21 | |
22 | #include <algorithm> |
23 | #include <set> |
24 | #include <fstream> |
25 | #include <functional> |
26 | #include <sstream> |
27 | #include <stdarg.h> |
28 | #include <unordered_map> |
29 | #include <unordered_set> |
30 | |
31 | namespace sw |
32 | { |
33 | volatile int Shader::serialCounter = 1; |
34 | |
35 | Shader::Opcode Shader::OPCODE_DP(int i) |
36 | { |
37 | switch(i) |
38 | { |
39 | default: ASSERT(false); |
40 | case 1: return OPCODE_DP1; |
41 | case 2: return OPCODE_DP2; |
42 | case 3: return OPCODE_DP3; |
43 | case 4: return OPCODE_DP4; |
44 | } |
45 | } |
46 | |
47 | Shader::Opcode Shader::OPCODE_LEN(int i) |
48 | { |
49 | switch(i) |
50 | { |
51 | default: ASSERT(false); |
52 | case 1: return OPCODE_ABS; |
53 | case 2: return OPCODE_LEN2; |
54 | case 3: return OPCODE_LEN3; |
55 | case 4: return OPCODE_LEN4; |
56 | } |
57 | } |
58 | |
59 | Shader::Opcode Shader::OPCODE_DIST(int i) |
60 | { |
61 | switch(i) |
62 | { |
63 | default: ASSERT(false); |
64 | case 1: return OPCODE_DIST1; |
65 | case 2: return OPCODE_DIST2; |
66 | case 3: return OPCODE_DIST3; |
67 | case 4: return OPCODE_DIST4; |
68 | } |
69 | } |
70 | |
71 | Shader::Opcode Shader::OPCODE_NRM(int i) |
72 | { |
73 | switch(i) |
74 | { |
75 | default: ASSERT(false); |
76 | case 1: return OPCODE_SGN; |
77 | case 2: return OPCODE_NRM2; |
78 | case 3: return OPCODE_NRM3; |
79 | case 4: return OPCODE_NRM4; |
80 | } |
81 | } |
82 | |
83 | Shader::Opcode Shader::OPCODE_FORWARD(int i) |
84 | { |
85 | switch(i) |
86 | { |
87 | default: ASSERT(false); |
88 | case 1: return OPCODE_FORWARD1; |
89 | case 2: return OPCODE_FORWARD2; |
90 | case 3: return OPCODE_FORWARD3; |
91 | case 4: return OPCODE_FORWARD4; |
92 | } |
93 | } |
94 | |
95 | Shader::Opcode Shader::OPCODE_REFLECT(int i) |
96 | { |
97 | switch(i) |
98 | { |
99 | default: ASSERT(false); |
100 | case 1: return OPCODE_REFLECT1; |
101 | case 2: return OPCODE_REFLECT2; |
102 | case 3: return OPCODE_REFLECT3; |
103 | case 4: return OPCODE_REFLECT4; |
104 | } |
105 | } |
106 | |
107 | Shader::Opcode Shader::OPCODE_REFRACT(int i) |
108 | { |
109 | switch(i) |
110 | { |
111 | default: ASSERT(false); |
112 | case 1: return OPCODE_REFRACT1; |
113 | case 2: return OPCODE_REFRACT2; |
114 | case 3: return OPCODE_REFRACT3; |
115 | case 4: return OPCODE_REFRACT4; |
116 | } |
117 | } |
118 | |
119 | Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0) |
120 | { |
121 | control = CONTROL_RESERVED0; |
122 | |
123 | predicate = false; |
124 | predicateNot = false; |
125 | predicateSwizzle = 0xE4; |
126 | |
127 | coissue = false; |
128 | samplerType = SAMPLER_UNKNOWN; |
129 | usage = USAGE_POSITION; |
130 | usageIndex = 0; |
131 | } |
132 | |
133 | Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0) |
134 | { |
135 | parseOperationToken(*token++, majorVersion); |
136 | |
137 | samplerType = SAMPLER_UNKNOWN; |
138 | usage = USAGE_POSITION; |
139 | usageIndex = 0; |
140 | |
141 | if(opcode == OPCODE_IF || |
142 | opcode == OPCODE_IFC || |
143 | opcode == OPCODE_LOOP || |
144 | opcode == OPCODE_REP || |
145 | opcode == OPCODE_BREAKC || |
146 | opcode == OPCODE_BREAKP) // No destination operand |
147 | { |
148 | if(size > 0) parseSourceToken(0, token++, majorVersion); |
149 | if(size > 1) parseSourceToken(1, token++, majorVersion); |
150 | if(size > 2) parseSourceToken(2, token++, majorVersion); |
151 | if(size > 3) ASSERT(false); |
152 | } |
153 | else if(opcode == OPCODE_DCL) |
154 | { |
155 | parseDeclarationToken(*token++); |
156 | parseDestinationToken(token++, majorVersion); |
157 | } |
158 | else |
159 | { |
160 | if(size > 0) |
161 | { |
162 | parseDestinationToken(token, majorVersion); |
163 | |
164 | if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3) |
165 | { |
166 | token++; |
167 | size--; |
168 | } |
169 | |
170 | token++; |
171 | size--; |
172 | } |
173 | |
174 | if(predicate) |
175 | { |
176 | ASSERT(size != 0); |
177 | |
178 | predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT; |
179 | predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16); |
180 | |
181 | token++; |
182 | size--; |
183 | } |
184 | |
185 | for(int i = 0; size > 0; i++) |
186 | { |
187 | parseSourceToken(i, token, majorVersion); |
188 | |
189 | token++; |
190 | size--; |
191 | |
192 | if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2) |
193 | { |
194 | token++; |
195 | size--; |
196 | } |
197 | } |
198 | } |
199 | } |
200 | |
201 | Shader::Instruction::~Instruction() |
202 | { |
203 | } |
204 | |
205 | std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const |
206 | { |
207 | std::string instructionString; |
208 | |
209 | if(opcode != OPCODE_DCL) |
210 | { |
211 | instructionString += coissue ? "+ " : "" ; |
212 | |
213 | if(predicate) |
214 | { |
215 | instructionString += predicateNot ? "(!p0" : "(p0" ; |
216 | instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle); |
217 | instructionString += ") " ; |
218 | } |
219 | |
220 | instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString(); |
221 | |
222 | if(dst.type != PARAMETER_VOID) |
223 | { |
224 | instructionString += " " + dst.string(shaderType, version) + |
225 | dst.relativeString() + |
226 | dst.maskString(); |
227 | } |
228 | |
229 | for(int i = 0; i < 4; i++) |
230 | { |
231 | if(src[i].type != PARAMETER_VOID) |
232 | { |
233 | instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " " ; |
234 | instructionString += src[i].preModifierString() + |
235 | src[i].string(shaderType, version) + |
236 | src[i].relativeString() + |
237 | src[i].postModifierString() + |
238 | src[i].swizzleString(); |
239 | } |
240 | } |
241 | } |
242 | else // DCL |
243 | { |
244 | instructionString += "dcl" ; |
245 | |
246 | if(dst.type == PARAMETER_SAMPLER) |
247 | { |
248 | switch(samplerType) |
249 | { |
250 | case SAMPLER_UNKNOWN: instructionString += " " ; break; |
251 | case SAMPLER_1D: instructionString += "_1d " ; break; |
252 | case SAMPLER_2D: instructionString += "_2d " ; break; |
253 | case SAMPLER_CUBE: instructionString += "_cube " ; break; |
254 | case SAMPLER_VOLUME: instructionString += "_volume " ; break; |
255 | default: |
256 | ASSERT(false); |
257 | } |
258 | |
259 | instructionString += dst.string(shaderType, version); |
260 | } |
261 | else if(dst.type == PARAMETER_INPUT || |
262 | dst.type == PARAMETER_OUTPUT || |
263 | dst.type == PARAMETER_TEXTURE) |
264 | { |
265 | if(version >= 0x0300) |
266 | { |
267 | switch(usage) |
268 | { |
269 | case USAGE_POSITION: instructionString += "_position" ; break; |
270 | case USAGE_BLENDWEIGHT: instructionString += "_blendweight" ; break; |
271 | case USAGE_BLENDINDICES: instructionString += "_blendindices" ; break; |
272 | case USAGE_NORMAL: instructionString += "_normal" ; break; |
273 | case USAGE_PSIZE: instructionString += "_psize" ; break; |
274 | case USAGE_TEXCOORD: instructionString += "_texcoord" ; break; |
275 | case USAGE_TANGENT: instructionString += "_tangent" ; break; |
276 | case USAGE_BINORMAL: instructionString += "_binormal" ; break; |
277 | case USAGE_TESSFACTOR: instructionString += "_tessfactor" ; break; |
278 | case USAGE_POSITIONT: instructionString += "_positiont" ; break; |
279 | case USAGE_COLOR: instructionString += "_color" ; break; |
280 | case USAGE_FOG: instructionString += "_fog" ; break; |
281 | case USAGE_DEPTH: instructionString += "_depth" ; break; |
282 | case USAGE_SAMPLE: instructionString += "_sample" ; break; |
283 | default: |
284 | ASSERT(false); |
285 | } |
286 | |
287 | if(usageIndex > 0) |
288 | { |
289 | std::ostringstream buffer; |
290 | |
291 | buffer << (int)usageIndex; |
292 | |
293 | instructionString += buffer.str(); |
294 | } |
295 | } |
296 | else ASSERT(dst.type != PARAMETER_OUTPUT); |
297 | |
298 | instructionString += " " ; |
299 | |
300 | instructionString += dst.string(shaderType, version); |
301 | instructionString += dst.maskString(); |
302 | } |
303 | else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace |
304 | { |
305 | instructionString += " " ; |
306 | |
307 | instructionString += dst.string(shaderType, version); |
308 | } |
309 | else ASSERT(false); |
310 | } |
311 | |
312 | return instructionString; |
313 | } |
314 | |
315 | std::string Shader::DestinationParameter::modifierString() const |
316 | { |
317 | if(type == PARAMETER_VOID || type == PARAMETER_LABEL) |
318 | { |
319 | return "" ; |
320 | } |
321 | |
322 | std::string modifierString; |
323 | |
324 | if(saturate) |
325 | { |
326 | modifierString += "_sat" ; |
327 | } |
328 | |
329 | if(partialPrecision) |
330 | { |
331 | modifierString += "_pp" ; |
332 | } |
333 | |
334 | if(centroid) |
335 | { |
336 | modifierString += "_centroid" ; |
337 | } |
338 | |
339 | return modifierString; |
340 | } |
341 | |
342 | std::string Shader::DestinationParameter::shiftString() const |
343 | { |
344 | if(type == PARAMETER_VOID || type == PARAMETER_LABEL) |
345 | { |
346 | return "" ; |
347 | } |
348 | |
349 | switch(shift) |
350 | { |
351 | case 0: return "" ; |
352 | case 1: return "_x2" ; |
353 | case 2: return "_x4" ; |
354 | case 3: return "_x8" ; |
355 | case -1: return "_d2" ; |
356 | case -2: return "_d4" ; |
357 | case -3: return "_d8" ; |
358 | default: |
359 | return "" ; |
360 | // ASSERT(false); // FIXME |
361 | } |
362 | } |
363 | |
364 | std::string Shader::DestinationParameter::maskString() const |
365 | { |
366 | if(type == PARAMETER_VOID || type == PARAMETER_LABEL) |
367 | { |
368 | return "" ; |
369 | } |
370 | |
371 | switch(mask) |
372 | { |
373 | case 0x0: return "" ; |
374 | case 0x1: return ".x" ; |
375 | case 0x2: return ".y" ; |
376 | case 0x3: return ".xy" ; |
377 | case 0x4: return ".z" ; |
378 | case 0x5: return ".xz" ; |
379 | case 0x6: return ".yz" ; |
380 | case 0x7: return ".xyz" ; |
381 | case 0x8: return ".w" ; |
382 | case 0x9: return ".xw" ; |
383 | case 0xA: return ".yw" ; |
384 | case 0xB: return ".xyw" ; |
385 | case 0xC: return ".zw" ; |
386 | case 0xD: return ".xzw" ; |
387 | case 0xE: return ".yzw" ; |
388 | case 0xF: return "" ; |
389 | default: |
390 | ASSERT(false); |
391 | } |
392 | |
393 | return "" ; |
394 | } |
395 | |
396 | std::string Shader::SourceParameter::preModifierString() const |
397 | { |
398 | if(type == PARAMETER_VOID) |
399 | { |
400 | return "" ; |
401 | } |
402 | |
403 | switch(modifier) |
404 | { |
405 | case MODIFIER_NONE: return "" ; |
406 | case MODIFIER_NEGATE: return "-" ; |
407 | case MODIFIER_BIAS: return "" ; |
408 | case MODIFIER_BIAS_NEGATE: return "-" ; |
409 | case MODIFIER_SIGN: return "" ; |
410 | case MODIFIER_SIGN_NEGATE: return "-" ; |
411 | case MODIFIER_COMPLEMENT: return "1-" ; |
412 | case MODIFIER_X2: return "" ; |
413 | case MODIFIER_X2_NEGATE: return "-" ; |
414 | case MODIFIER_DZ: return "" ; |
415 | case MODIFIER_DW: return "" ; |
416 | case MODIFIER_ABS: return "" ; |
417 | case MODIFIER_ABS_NEGATE: return "-" ; |
418 | case MODIFIER_NOT: return "!" ; |
419 | default: |
420 | ASSERT(false); |
421 | } |
422 | |
423 | return "" ; |
424 | } |
425 | |
426 | std::string Shader::Parameter::relativeString() const |
427 | { |
428 | if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP) |
429 | { |
430 | if(rel.type == PARAMETER_VOID) |
431 | { |
432 | return "" ; |
433 | } |
434 | else if(rel.type == PARAMETER_ADDR) |
435 | { |
436 | switch(rel.swizzle & 0x03) |
437 | { |
438 | case 0: return "[a0.x]" ; |
439 | case 1: return "[a0.y]" ; |
440 | case 2: return "[a0.z]" ; |
441 | case 3: return "[a0.w]" ; |
442 | } |
443 | } |
444 | else if(rel.type == PARAMETER_TEMP) |
445 | { |
446 | std::ostringstream buffer; |
447 | buffer << rel.index; |
448 | |
449 | switch(rel.swizzle & 0x03) |
450 | { |
451 | case 0: return "[r" + buffer.str() + ".x]" ; |
452 | case 1: return "[r" + buffer.str() + ".y]" ; |
453 | case 2: return "[r" + buffer.str() + ".z]" ; |
454 | case 3: return "[r" + buffer.str() + ".w]" ; |
455 | } |
456 | } |
457 | else if(rel.type == PARAMETER_LOOP) |
458 | { |
459 | return "[aL]" ; |
460 | } |
461 | else if(rel.type == PARAMETER_CONST) |
462 | { |
463 | std::ostringstream buffer; |
464 | buffer << rel.index; |
465 | |
466 | switch(rel.swizzle & 0x03) |
467 | { |
468 | case 0: return "[c" + buffer.str() + ".x]" ; |
469 | case 1: return "[c" + buffer.str() + ".y]" ; |
470 | case 2: return "[c" + buffer.str() + ".z]" ; |
471 | case 3: return "[c" + buffer.str() + ".w]" ; |
472 | } |
473 | } |
474 | else ASSERT(false); |
475 | } |
476 | |
477 | return "" ; |
478 | } |
479 | |
480 | std::string Shader::SourceParameter::postModifierString() const |
481 | { |
482 | if(type == PARAMETER_VOID) |
483 | { |
484 | return "" ; |
485 | } |
486 | |
487 | switch(modifier) |
488 | { |
489 | case MODIFIER_NONE: return "" ; |
490 | case MODIFIER_NEGATE: return "" ; |
491 | case MODIFIER_BIAS: return "_bias" ; |
492 | case MODIFIER_BIAS_NEGATE: return "_bias" ; |
493 | case MODIFIER_SIGN: return "_bx2" ; |
494 | case MODIFIER_SIGN_NEGATE: return "_bx2" ; |
495 | case MODIFIER_COMPLEMENT: return "" ; |
496 | case MODIFIER_X2: return "_x2" ; |
497 | case MODIFIER_X2_NEGATE: return "_x2" ; |
498 | case MODIFIER_DZ: return "_dz" ; |
499 | case MODIFIER_DW: return "_dw" ; |
500 | case MODIFIER_ABS: return "_abs" ; |
501 | case MODIFIER_ABS_NEGATE: return "_abs" ; |
502 | case MODIFIER_NOT: return "" ; |
503 | default: |
504 | ASSERT(false); |
505 | } |
506 | |
507 | return "" ; |
508 | } |
509 | |
510 | std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const |
511 | { |
512 | if(type == PARAMETER_CONST && bufferIndex >= 0) |
513 | { |
514 | std::ostringstream buffer; |
515 | buffer << bufferIndex; |
516 | |
517 | std::ostringstream offset; |
518 | offset << index; |
519 | |
520 | return "cb" + buffer.str() + "[" + offset.str() + "]" ; |
521 | } |
522 | else |
523 | { |
524 | return Parameter::string(shaderType, version); |
525 | } |
526 | } |
527 | |
528 | std::string Shader::SourceParameter::swizzleString() const |
529 | { |
530 | return Instruction::swizzleString(type, swizzle); |
531 | } |
532 | |
533 | void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion) |
534 | { |
535 | if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token |
536 | { |
537 | opcode = (Opcode)token; |
538 | |
539 | control = CONTROL_RESERVED0; |
540 | predicate = false; |
541 | coissue = false; |
542 | } |
543 | else |
544 | { |
545 | opcode = (Opcode)(token & 0x0000FFFF); |
546 | control = (Control)((token & 0x00FF0000) >> 16); |
547 | |
548 | int size = (token & 0x0F000000) >> 24; |
549 | |
550 | predicate = (token & 0x10000000) != 0x00000000; |
551 | coissue = (token & 0x40000000) != 0x00000000; |
552 | |
553 | if(majorVersion < 2) |
554 | { |
555 | if(size != 0) |
556 | { |
557 | ASSERT(false); // Reserved |
558 | } |
559 | } |
560 | |
561 | if(majorVersion < 2) |
562 | { |
563 | if(predicate) |
564 | { |
565 | ASSERT(false); |
566 | } |
567 | } |
568 | |
569 | if((token & 0x20000000) != 0x00000000) |
570 | { |
571 | ASSERT(false); // Reserved |
572 | } |
573 | |
574 | if(majorVersion >= 2) |
575 | { |
576 | if(coissue) |
577 | { |
578 | ASSERT(false); // Reserved |
579 | } |
580 | } |
581 | |
582 | if((token & 0x80000000) != 0x00000000) |
583 | { |
584 | ASSERT(false); |
585 | } |
586 | } |
587 | } |
588 | |
589 | void Shader::Instruction::parseDeclarationToken(unsigned long token) |
590 | { |
591 | samplerType = (SamplerType)((token & 0x78000000) >> 27); |
592 | usage = (Usage)(token & 0x0000001F); |
593 | usageIndex = (unsigned char)((token & 0x000F0000) >> 16); |
594 | } |
595 | |
596 | void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion) |
597 | { |
598 | dst.index = (unsigned short)(token[0] & 0x000007FF); |
599 | dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); |
600 | |
601 | // TODO: Check type and index range |
602 | |
603 | bool relative = (token[0] & 0x00002000) != 0x00000000; |
604 | dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; |
605 | dst.rel.swizzle = 0x00; |
606 | dst.rel.scale = 1; |
607 | |
608 | if(relative && majorVersion >= 3) |
609 | { |
610 | dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); |
611 | dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); |
612 | } |
613 | else if(relative) ASSERT(false); // Reserved |
614 | |
615 | if((token[0] & 0x0000C000) != 0x00000000) |
616 | { |
617 | ASSERT(false); // Reserved |
618 | } |
619 | |
620 | dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16); |
621 | dst.saturate = (token[0] & 0x00100000) != 0; |
622 | dst.partialPrecision = (token[0] & 0x00200000) != 0; |
623 | dst.centroid = (token[0] & 0x00400000) != 0; |
624 | dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4; |
625 | |
626 | if(majorVersion >= 2) |
627 | { |
628 | if(dst.shift) |
629 | { |
630 | ASSERT(false); // Reserved |
631 | } |
632 | } |
633 | |
634 | if((token[0] & 0x80000000) != 0x80000000) |
635 | { |
636 | ASSERT(false); |
637 | } |
638 | } |
639 | |
640 | void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion) |
641 | { |
642 | // Defaults |
643 | src[i].index = 0; |
644 | src[i].type = PARAMETER_VOID; |
645 | src[i].modifier = MODIFIER_NONE; |
646 | src[i].swizzle = 0xE4; |
647 | src[i].rel.type = PARAMETER_VOID; |
648 | src[i].rel.swizzle = 0x00; |
649 | src[i].rel.scale = 1; |
650 | |
651 | switch(opcode) |
652 | { |
653 | case OPCODE_DEF: |
654 | src[0].type = PARAMETER_FLOAT4LITERAL; |
655 | src[0].value[i] = *(float*)token; |
656 | break; |
657 | case OPCODE_DEFB: |
658 | src[0].type = PARAMETER_BOOL1LITERAL; |
659 | src[0].boolean[0] = *(int*)token; |
660 | break; |
661 | case OPCODE_DEFI: |
662 | src[0].type = PARAMETER_INT4LITERAL; |
663 | src[0].integer[i] = *(int*)token; |
664 | break; |
665 | default: |
666 | src[i].index = (unsigned short)(token[0] & 0x000007FF); |
667 | src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28)); |
668 | |
669 | // FIXME: Check type and index range |
670 | |
671 | bool relative = (token[0] & 0x00002000) != 0x00000000; |
672 | src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID; |
673 | |
674 | if((token[0] & 0x0000C000) != 0x00000000) |
675 | { |
676 | if(opcode != OPCODE_DEF && |
677 | opcode != OPCODE_DEFI && |
678 | opcode != OPCODE_DEFB) |
679 | { |
680 | ASSERT(false); |
681 | } |
682 | } |
683 | |
684 | src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16); |
685 | src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24); |
686 | |
687 | if((token[0] & 0x80000000) != 0x80000000) |
688 | { |
689 | if(opcode != OPCODE_DEF && |
690 | opcode != OPCODE_DEFI && |
691 | opcode != OPCODE_DEFB) |
692 | { |
693 | ASSERT(false); |
694 | } |
695 | } |
696 | |
697 | if(relative && majorVersion >= 2) |
698 | { |
699 | src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28)); |
700 | src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16); |
701 | } |
702 | } |
703 | } |
704 | |
705 | std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle) |
706 | { |
707 | if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4) |
708 | { |
709 | return "" ; |
710 | } |
711 | |
712 | int x = (swizzle & 0x03) >> 0; |
713 | int y = (swizzle & 0x0C) >> 2; |
714 | int z = (swizzle & 0x30) >> 4; |
715 | int w = (swizzle & 0xC0) >> 6; |
716 | |
717 | std::string swizzleString = "." ; |
718 | |
719 | switch(x) |
720 | { |
721 | case 0: swizzleString += "x" ; break; |
722 | case 1: swizzleString += "y" ; break; |
723 | case 2: swizzleString += "z" ; break; |
724 | case 3: swizzleString += "w" ; break; |
725 | } |
726 | |
727 | if(!(x == y && y == z && z == w)) |
728 | { |
729 | switch(y) |
730 | { |
731 | case 0: swizzleString += "x" ; break; |
732 | case 1: swizzleString += "y" ; break; |
733 | case 2: swizzleString += "z" ; break; |
734 | case 3: swizzleString += "w" ; break; |
735 | } |
736 | |
737 | if(!(y == z && z == w)) |
738 | { |
739 | switch(z) |
740 | { |
741 | case 0: swizzleString += "x" ; break; |
742 | case 1: swizzleString += "y" ; break; |
743 | case 2: swizzleString += "z" ; break; |
744 | case 3: swizzleString += "w" ; break; |
745 | } |
746 | |
747 | if(!(z == w)) |
748 | { |
749 | switch(w) |
750 | { |
751 | case 0: swizzleString += "x" ; break; |
752 | case 1: swizzleString += "y" ; break; |
753 | case 2: swizzleString += "z" ; break; |
754 | case 3: swizzleString += "w" ; break; |
755 | } |
756 | } |
757 | } |
758 | } |
759 | |
760 | return swizzleString; |
761 | } |
762 | |
763 | std::string Shader::Instruction::operationString(unsigned short version) const |
764 | { |
765 | switch(opcode) |
766 | { |
767 | case OPCODE_NULL: return "null" ; |
768 | case OPCODE_NOP: return "nop" ; |
769 | case OPCODE_MOV: return "mov" ; |
770 | case OPCODE_ADD: return "add" ; |
771 | case OPCODE_IADD: return "iadd" ; |
772 | case OPCODE_SUB: return "sub" ; |
773 | case OPCODE_ISUB: return "isub" ; |
774 | case OPCODE_MAD: return "mad" ; |
775 | case OPCODE_IMAD: return "imad" ; |
776 | case OPCODE_MUL: return "mul" ; |
777 | case OPCODE_IMUL: return "imul" ; |
778 | case OPCODE_RCPX: return "rcpx" ; |
779 | case OPCODE_DIV: return "div" ; |
780 | case OPCODE_IDIV: return "idiv" ; |
781 | case OPCODE_UDIV: return "udiv" ; |
782 | case OPCODE_MOD: return "mod" ; |
783 | case OPCODE_IMOD: return "imod" ; |
784 | case OPCODE_UMOD: return "umod" ; |
785 | case OPCODE_SHL: return "shl" ; |
786 | case OPCODE_ISHR: return "ishr" ; |
787 | case OPCODE_USHR: return "ushr" ; |
788 | case OPCODE_RSQX: return "rsqx" ; |
789 | case OPCODE_SQRT: return "sqrt" ; |
790 | case OPCODE_RSQ: return "rsq" ; |
791 | case OPCODE_LEN2: return "len2" ; |
792 | case OPCODE_LEN3: return "len3" ; |
793 | case OPCODE_LEN4: return "len4" ; |
794 | case OPCODE_DIST1: return "dist1" ; |
795 | case OPCODE_DIST2: return "dist2" ; |
796 | case OPCODE_DIST3: return "dist3" ; |
797 | case OPCODE_DIST4: return "dist4" ; |
798 | case OPCODE_DP3: return "dp3" ; |
799 | case OPCODE_DP4: return "dp4" ; |
800 | case OPCODE_DET2: return "det2" ; |
801 | case OPCODE_DET3: return "det3" ; |
802 | case OPCODE_DET4: return "det4" ; |
803 | case OPCODE_MIN: return "min" ; |
804 | case OPCODE_IMIN: return "imin" ; |
805 | case OPCODE_UMIN: return "umin" ; |
806 | case OPCODE_MAX: return "max" ; |
807 | case OPCODE_IMAX: return "imax" ; |
808 | case OPCODE_UMAX: return "umax" ; |
809 | case OPCODE_SLT: return "slt" ; |
810 | case OPCODE_SGE: return "sge" ; |
811 | case OPCODE_EXP2X: return "exp2x" ; |
812 | case OPCODE_LOG2X: return "log2x" ; |
813 | case OPCODE_LIT: return "lit" ; |
814 | case OPCODE_ATT: return "att" ; |
815 | case OPCODE_LRP: return "lrp" ; |
816 | case OPCODE_STEP: return "step" ; |
817 | case OPCODE_SMOOTH: return "smooth" ; |
818 | case OPCODE_FLOATBITSTOINT: return "floatBitsToInt" ; |
819 | case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt" ; |
820 | case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat" ; |
821 | case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat" ; |
822 | case OPCODE_PACKSNORM2x16: return "packSnorm2x16" ; |
823 | case OPCODE_PACKUNORM2x16: return "packUnorm2x16" ; |
824 | case OPCODE_PACKHALF2x16: return "packHalf2x16" ; |
825 | case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16" ; |
826 | case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16" ; |
827 | case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16" ; |
828 | case OPCODE_FRC: return "frc" ; |
829 | case OPCODE_M4X4: return "m4x4" ; |
830 | case OPCODE_M4X3: return "m4x3" ; |
831 | case OPCODE_M3X4: return "m3x4" ; |
832 | case OPCODE_M3X3: return "m3x3" ; |
833 | case OPCODE_M3X2: return "m3x2" ; |
834 | case OPCODE_CALL: return "call" ; |
835 | case OPCODE_CALLNZ: return "callnz" ; |
836 | case OPCODE_LOOP: return "loop" ; |
837 | case OPCODE_RET: return "ret" ; |
838 | case OPCODE_ENDLOOP: return "endloop" ; |
839 | case OPCODE_LABEL: return "label" ; |
840 | case OPCODE_DCL: return "dcl" ; |
841 | case OPCODE_POWX: return "powx" ; |
842 | case OPCODE_CRS: return "crs" ; |
843 | case OPCODE_SGN: return "sgn" ; |
844 | case OPCODE_ISGN: return "isgn" ; |
845 | case OPCODE_ABS: return "abs" ; |
846 | case OPCODE_IABS: return "iabs" ; |
847 | case OPCODE_NRM2: return "nrm2" ; |
848 | case OPCODE_NRM3: return "nrm3" ; |
849 | case OPCODE_NRM4: return "nrm4" ; |
850 | case OPCODE_SINCOS: return "sincos" ; |
851 | case OPCODE_REP: return "rep" ; |
852 | case OPCODE_ENDREP: return "endrep" ; |
853 | case OPCODE_IF: return "if" ; |
854 | case OPCODE_IFC: return "ifc" ; |
855 | case OPCODE_ELSE: return "else" ; |
856 | case OPCODE_ENDIF: return "endif" ; |
857 | case OPCODE_BREAK: return "break" ; |
858 | case OPCODE_BREAKC: return "breakc" ; |
859 | case OPCODE_MOVA: return "mova" ; |
860 | case OPCODE_DEFB: return "defb" ; |
861 | case OPCODE_DEFI: return "defi" ; |
862 | case OPCODE_TEXCOORD: return "texcoord" ; |
863 | case OPCODE_TEXKILL: return "texkill" ; |
864 | case OPCODE_DISCARD: return "discard" ; |
865 | case OPCODE_TEX: |
866 | if(version < 0x0104) return "tex" ; |
867 | else return "texld" ; |
868 | case OPCODE_TEXBEM: return "texbem" ; |
869 | case OPCODE_TEXBEML: return "texbeml" ; |
870 | case OPCODE_TEXREG2AR: return "texreg2ar" ; |
871 | case OPCODE_TEXREG2GB: return "texreg2gb" ; |
872 | case OPCODE_TEXM3X2PAD: return "texm3x2pad" ; |
873 | case OPCODE_TEXM3X2TEX: return "texm3x2tex" ; |
874 | case OPCODE_TEXM3X3PAD: return "texm3x3pad" ; |
875 | case OPCODE_TEXM3X3TEX: return "texm3x3tex" ; |
876 | case OPCODE_RESERVED0: return "reserved0" ; |
877 | case OPCODE_TEXM3X3SPEC: return "texm3x3spec" ; |
878 | case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec" ; |
879 | case OPCODE_EXPP: return "expp" ; |
880 | case OPCODE_LOGP: return "logp" ; |
881 | case OPCODE_CND: return "cnd" ; |
882 | case OPCODE_DEF: return "def" ; |
883 | case OPCODE_TEXREG2RGB: return "texreg2rgb" ; |
884 | case OPCODE_TEXDP3TEX: return "texdp3tex" ; |
885 | case OPCODE_TEXM3X2DEPTH: return "texm3x2depth" ; |
886 | case OPCODE_TEXDP3: return "texdp3" ; |
887 | case OPCODE_TEXM3X3: return "texm3x3" ; |
888 | case OPCODE_TEXDEPTH: return "texdepth" ; |
889 | case OPCODE_CMP0: return "cmp0" ; |
890 | case OPCODE_ICMP: return "icmp" ; |
891 | case OPCODE_UCMP: return "ucmp" ; |
892 | case OPCODE_SELECT: return "select" ; |
893 | case OPCODE_EXTRACT: return "extract" ; |
894 | case OPCODE_INSERT: return "insert" ; |
895 | case OPCODE_BEM: return "bem" ; |
896 | case OPCODE_DP2ADD: return "dp2add" ; |
897 | case OPCODE_DFDX: return "dFdx" ; |
898 | case OPCODE_DFDY: return "dFdy" ; |
899 | case OPCODE_FWIDTH: return "fwidth" ; |
900 | case OPCODE_TEXLDD: return "texldd" ; |
901 | case OPCODE_CMP: return "cmp" ; |
902 | case OPCODE_TEXLDL: return "texldl" ; |
903 | case OPCODE_TEXBIAS: return "texbias" ; |
904 | case OPCODE_TEXOFFSET: return "texoffset" ; |
905 | case OPCODE_TEXOFFSETBIAS: return "texoffsetbias" ; |
906 | case OPCODE_TEXLODOFFSET: return "texlodoffset" ; |
907 | case OPCODE_TEXELFETCH: return "texelfetch" ; |
908 | case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset" ; |
909 | case OPCODE_TEXGRAD: return "texgrad" ; |
910 | case OPCODE_TEXGRADOFFSET: return "texgradoffset" ; |
911 | case OPCODE_BREAKP: return "breakp" ; |
912 | case OPCODE_TEXSIZE: return "texsize" ; |
913 | case OPCODE_PHASE: return "phase" ; |
914 | case OPCODE_COMMENT: return "comment" ; |
915 | case OPCODE_END: return "end" ; |
916 | case OPCODE_PS_1_0: return "ps_1_0" ; |
917 | case OPCODE_PS_1_1: return "ps_1_1" ; |
918 | case OPCODE_PS_1_2: return "ps_1_2" ; |
919 | case OPCODE_PS_1_3: return "ps_1_3" ; |
920 | case OPCODE_PS_1_4: return "ps_1_4" ; |
921 | case OPCODE_PS_2_0: return "ps_2_0" ; |
922 | case OPCODE_PS_2_x: return "ps_2_x" ; |
923 | case OPCODE_PS_3_0: return "ps_3_0" ; |
924 | case OPCODE_VS_1_0: return "vs_1_0" ; |
925 | case OPCODE_VS_1_1: return "vs_1_1" ; |
926 | case OPCODE_VS_2_0: return "vs_2_0" ; |
927 | case OPCODE_VS_2_x: return "vs_2_x" ; |
928 | case OPCODE_VS_2_sw: return "vs_2_sw" ; |
929 | case OPCODE_VS_3_0: return "vs_3_0" ; |
930 | case OPCODE_VS_3_sw: return "vs_3_sw" ; |
931 | case OPCODE_WHILE: return "while" ; |
932 | case OPCODE_ENDWHILE: return "endwhile" ; |
933 | case OPCODE_COS: return "cos" ; |
934 | case OPCODE_SIN: return "sin" ; |
935 | case OPCODE_TAN: return "tan" ; |
936 | case OPCODE_ACOS: return "acos" ; |
937 | case OPCODE_ASIN: return "asin" ; |
938 | case OPCODE_ATAN: return "atan" ; |
939 | case OPCODE_ATAN2: return "atan2" ; |
940 | case OPCODE_COSH: return "cosh" ; |
941 | case OPCODE_SINH: return "sinh" ; |
942 | case OPCODE_TANH: return "tanh" ; |
943 | case OPCODE_ACOSH: return "acosh" ; |
944 | case OPCODE_ASINH: return "asinh" ; |
945 | case OPCODE_ATANH: return "atanh" ; |
946 | case OPCODE_DP1: return "dp1" ; |
947 | case OPCODE_DP2: return "dp2" ; |
948 | case OPCODE_TRUNC: return "trunc" ; |
949 | case OPCODE_FLOOR: return "floor" ; |
950 | case OPCODE_ROUND: return "round" ; |
951 | case OPCODE_ROUNDEVEN: return "roundEven" ; |
952 | case OPCODE_CEIL: return "ceil" ; |
953 | case OPCODE_EXP2: return "exp2" ; |
954 | case OPCODE_LOG2: return "log2" ; |
955 | case OPCODE_EXP: return "exp" ; |
956 | case OPCODE_LOG: return "log" ; |
957 | case OPCODE_POW: return "pow" ; |
958 | case OPCODE_F2B: return "f2b" ; |
959 | case OPCODE_B2F: return "b2f" ; |
960 | case OPCODE_F2I: return "f2i" ; |
961 | case OPCODE_I2F: return "i2f" ; |
962 | case OPCODE_F2U: return "f2u" ; |
963 | case OPCODE_U2F: return "u2f" ; |
964 | case OPCODE_B2I: return "b2i" ; |
965 | case OPCODE_I2B: return "i2b" ; |
966 | case OPCODE_ALL: return "all" ; |
967 | case OPCODE_ANY: return "any" ; |
968 | case OPCODE_NEG: return "neg" ; |
969 | case OPCODE_INEG: return "ineg" ; |
970 | case OPCODE_ISNAN: return "isnan" ; |
971 | case OPCODE_ISINF: return "isinf" ; |
972 | case OPCODE_NOT: return "not" ; |
973 | case OPCODE_OR: return "or" ; |
974 | case OPCODE_XOR: return "xor" ; |
975 | case OPCODE_AND: return "and" ; |
976 | case OPCODE_EQ: return "eq" ; |
977 | case OPCODE_NE: return "neq" ; |
978 | case OPCODE_FORWARD1: return "forward1" ; |
979 | case OPCODE_FORWARD2: return "forward2" ; |
980 | case OPCODE_FORWARD3: return "forward3" ; |
981 | case OPCODE_FORWARD4: return "forward4" ; |
982 | case OPCODE_REFLECT1: return "reflect1" ; |
983 | case OPCODE_REFLECT2: return "reflect2" ; |
984 | case OPCODE_REFLECT3: return "reflect3" ; |
985 | case OPCODE_REFLECT4: return "reflect4" ; |
986 | case OPCODE_REFRACT1: return "refract1" ; |
987 | case OPCODE_REFRACT2: return "refract2" ; |
988 | case OPCODE_REFRACT3: return "refract3" ; |
989 | case OPCODE_REFRACT4: return "refract4" ; |
990 | case OPCODE_LEAVE: return "leave" ; |
991 | case OPCODE_CONTINUE: return "continue" ; |
992 | case OPCODE_TEST: return "test" ; |
993 | case OPCODE_SWITCH: return "switch" ; |
994 | case OPCODE_ENDSWITCH: return "endswitch" ; |
995 | default: |
996 | ASSERT(false); |
997 | } |
998 | |
999 | return "<unknown>" ; |
1000 | } |
1001 | |
1002 | std::string Shader::Instruction::controlString() const |
1003 | { |
1004 | if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP) |
1005 | { |
1006 | if(project) return "p" ; |
1007 | |
1008 | if(bias) return "b" ; |
1009 | |
1010 | // FIXME: LOD |
1011 | } |
1012 | |
1013 | switch(control) |
1014 | { |
1015 | case 1: return "_gt" ; |
1016 | case 2: return "_eq" ; |
1017 | case 3: return "_ge" ; |
1018 | case 4: return "_lt" ; |
1019 | case 5: return "_ne" ; |
1020 | case 6: return "_le" ; |
1021 | default: |
1022 | return "" ; |
1023 | // ASSERT(false); // FIXME |
1024 | } |
1025 | } |
1026 | |
1027 | std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const |
1028 | { |
1029 | std::ostringstream buffer; |
1030 | |
1031 | if(type == PARAMETER_FLOAT4LITERAL) |
1032 | { |
1033 | buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}'; |
1034 | |
1035 | return buffer.str(); |
1036 | } |
1037 | else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE) |
1038 | { |
1039 | buffer << index; |
1040 | |
1041 | return typeString(shaderType, version) + buffer.str(); |
1042 | } |
1043 | else |
1044 | { |
1045 | return typeString(shaderType, version); |
1046 | } |
1047 | } |
1048 | |
1049 | std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const |
1050 | { |
1051 | switch(type) |
1052 | { |
1053 | case PARAMETER_TEMP: return "r" ; |
1054 | case PARAMETER_INPUT: return "v" ; |
1055 | case PARAMETER_CONST: return "c" ; |
1056 | case PARAMETER_TEXTURE: |
1057 | // case PARAMETER_ADDR: |
1058 | if(shaderType == SHADER_PIXEL) return "t" ; |
1059 | else return "a0" ; |
1060 | case PARAMETER_RASTOUT: |
1061 | if(index == 0) return "oPos" ; |
1062 | else if(index == 1) return "oFog" ; |
1063 | else if(index == 2) return "oPts" ; |
1064 | else ASSERT(false); |
1065 | case PARAMETER_ATTROUT: return "oD" ; |
1066 | case PARAMETER_TEXCRDOUT: |
1067 | // case PARAMETER_OUTPUT: return ""; |
1068 | if(version < 0x0300) return "oT" ; |
1069 | else return "o" ; |
1070 | case PARAMETER_CONSTINT: return "i" ; |
1071 | case PARAMETER_COLOROUT: return "oC" ; |
1072 | case PARAMETER_DEPTHOUT: return "oDepth" ; |
1073 | case PARAMETER_SAMPLER: return "s" ; |
1074 | // case PARAMETER_CONST2: return ""; |
1075 | // case PARAMETER_CONST3: return ""; |
1076 | // case PARAMETER_CONST4: return ""; |
1077 | case PARAMETER_CONSTBOOL: return "b" ; |
1078 | case PARAMETER_LOOP: return "aL" ; |
1079 | // case PARAMETER_TEMPFLOAT16: return ""; |
1080 | case PARAMETER_MISCTYPE: |
1081 | switch(index) |
1082 | { |
1083 | case VPosIndex: return "vPos" ; |
1084 | case VFaceIndex: return "vFace" ; |
1085 | case InstanceIDIndex: return "iID" ; |
1086 | case VertexIDIndex: return "vID" ; |
1087 | default: ASSERT(false); |
1088 | } |
1089 | case PARAMETER_LABEL: return "l" ; |
1090 | case PARAMETER_PREDICATE: return "p0" ; |
1091 | case PARAMETER_FLOAT4LITERAL: return "" ; |
1092 | case PARAMETER_BOOL1LITERAL: return "" ; |
1093 | case PARAMETER_INT4LITERAL: return "" ; |
1094 | // case PARAMETER_VOID: return ""; |
1095 | default: |
1096 | ASSERT(false); |
1097 | } |
1098 | |
1099 | return "" ; |
1100 | } |
1101 | |
1102 | bool Shader::Instruction::isBranch() const |
1103 | { |
1104 | return opcode == OPCODE_IF || opcode == OPCODE_IFC; |
1105 | } |
1106 | |
1107 | bool Shader::Instruction::isCall() const |
1108 | { |
1109 | return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ; |
1110 | } |
1111 | |
1112 | bool Shader::Instruction::isBreak() const |
1113 | { |
1114 | return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP; |
1115 | } |
1116 | |
1117 | bool Shader::Instruction::isLoop() const |
1118 | { |
1119 | return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE; |
1120 | } |
1121 | |
1122 | bool Shader::Instruction::isEndLoop() const |
1123 | { |
1124 | return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE; |
1125 | } |
1126 | |
1127 | bool Shader::Instruction::isPredicated() const |
1128 | { |
1129 | return predicate || |
1130 | analysisBranch || |
1131 | analysisBreak || |
1132 | analysisContinue || |
1133 | analysisLeave; |
1134 | } |
1135 | |
1136 | Shader::Shader() : serialID(serialCounter++) |
1137 | { |
1138 | usedSamplers = 0; |
1139 | } |
1140 | |
1141 | Shader::~Shader() |
1142 | { |
1143 | for(auto &inst : instruction) |
1144 | { |
1145 | delete inst; |
1146 | inst = 0; |
1147 | } |
1148 | } |
1149 | |
1150 | void Shader::parse(const unsigned long *token) |
1151 | { |
1152 | minorVersion = (unsigned char)(token[0] & 0x000000FF); |
1153 | majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8); |
1154 | shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16); |
1155 | |
1156 | int length = 0; |
1157 | |
1158 | if(shaderType == SHADER_VERTEX) |
1159 | { |
1160 | length = VertexShader::validate(token); |
1161 | } |
1162 | else if(shaderType == SHADER_PIXEL) |
1163 | { |
1164 | length = PixelShader::validate(token); |
1165 | } |
1166 | else ASSERT(false); |
1167 | |
1168 | ASSERT(length != 0); |
1169 | instruction.resize(length); |
1170 | |
1171 | for(int i = 0; i < length; i++) |
1172 | { |
1173 | while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token |
1174 | { |
1175 | int length = (*token & 0x7FFF0000) >> 16; |
1176 | |
1177 | token += length + 1; |
1178 | } |
1179 | |
1180 | int tokenCount = size(*token); |
1181 | |
1182 | instruction[i] = new Instruction(token, tokenCount, majorVersion); |
1183 | |
1184 | token += 1 + tokenCount; |
1185 | } |
1186 | } |
1187 | |
1188 | int Shader::size(unsigned long opcode) const |
1189 | { |
1190 | return size(opcode, shaderModel); |
1191 | } |
1192 | |
1193 | int Shader::size(unsigned long opcode, unsigned short shaderModel) |
1194 | { |
1195 | if(shaderModel > 0x0300) |
1196 | { |
1197 | ASSERT(false); |
1198 | } |
1199 | |
1200 | static const signed char size[] = |
1201 | { |
1202 | 0, // NOP = 0 |
1203 | 2, // MOV |
1204 | 3, // ADD |
1205 | 3, // SUB |
1206 | 4, // MAD |
1207 | 3, // MUL |
1208 | 2, // RCP |
1209 | 2, // RSQ |
1210 | 3, // DP3 |
1211 | 3, // DP4 |
1212 | 3, // MIN |
1213 | 3, // MAX |
1214 | 3, // SLT |
1215 | 3, // SGE |
1216 | 2, // EXP |
1217 | 2, // LOG |
1218 | 2, // LIT |
1219 | 3, // DST |
1220 | 4, // LRP |
1221 | 2, // FRC |
1222 | 3, // M4x4 |
1223 | 3, // M4x3 |
1224 | 3, // M3x4 |
1225 | 3, // M3x3 |
1226 | 3, // M3x2 |
1227 | 1, // CALL |
1228 | 2, // CALLNZ |
1229 | 2, // LOOP |
1230 | 0, // RET |
1231 | 0, // ENDLOOP |
1232 | 1, // LABEL |
1233 | 2, // DCL |
1234 | 3, // POW |
1235 | 3, // CRS |
1236 | 4, // SGN |
1237 | 2, // ABS |
1238 | 2, // NRM |
1239 | 4, // SINCOS |
1240 | 1, // REP |
1241 | 0, // ENDREP |
1242 | 1, // IF |
1243 | 2, // IFC |
1244 | 0, // ELSE |
1245 | 0, // ENDIF |
1246 | 0, // BREAK |
1247 | 2, // BREAKC |
1248 | 2, // MOVA |
1249 | 2, // DEFB |
1250 | 5, // DEFI |
1251 | -1, // 49 |
1252 | -1, // 50 |
1253 | -1, // 51 |
1254 | -1, // 52 |
1255 | -1, // 53 |
1256 | -1, // 54 |
1257 | -1, // 55 |
1258 | -1, // 56 |
1259 | -1, // 57 |
1260 | -1, // 58 |
1261 | -1, // 59 |
1262 | -1, // 60 |
1263 | -1, // 61 |
1264 | -1, // 62 |
1265 | -1, // 63 |
1266 | 1, // TEXCOORD = 64 |
1267 | 1, // TEXKILL |
1268 | 1, // TEX |
1269 | 2, // TEXBEM |
1270 | 2, // TEXBEML |
1271 | 2, // TEXREG2AR |
1272 | 2, // TEXREG2GB |
1273 | 2, // TEXM3x2PAD |
1274 | 2, // TEXM3x2TEX |
1275 | 2, // TEXM3x3PAD |
1276 | 2, // TEXM3x3TEX |
1277 | -1, // RESERVED0 |
1278 | 3, // TEXM3x3SPEC |
1279 | 2, // TEXM3x3VSPEC |
1280 | 2, // EXPP |
1281 | 2, // LOGP |
1282 | 4, // CND |
1283 | 5, // DEF |
1284 | 2, // TEXREG2RGB |
1285 | 2, // TEXDP3TEX |
1286 | 2, // TEXM3x2DEPTH |
1287 | 2, // TEXDP3 |
1288 | 2, // TEXM3x3 |
1289 | 1, // TEXDEPTH |
1290 | 4, // CMP |
1291 | 3, // BEM |
1292 | 4, // DP2ADD |
1293 | 2, // DSX |
1294 | 2, // DSY |
1295 | 5, // TEXLDD |
1296 | 3, // SETP |
1297 | 3, // TEXLDL |
1298 | 2, // BREAKP |
1299 | -1, // 97 |
1300 | -1, // 98 |
1301 | -1, // 99 |
1302 | -1, // 100 |
1303 | -1, // 101 |
1304 | -1, // 102 |
1305 | -1, // 103 |
1306 | -1, // 104 |
1307 | -1, // 105 |
1308 | -1, // 106 |
1309 | -1, // 107 |
1310 | -1, // 108 |
1311 | -1, // 109 |
1312 | -1, // 110 |
1313 | -1, // 111 |
1314 | -1, // 112 |
1315 | }; |
1316 | |
1317 | int length = 0; |
1318 | |
1319 | if((opcode & 0x0000FFFF) == OPCODE_COMMENT) |
1320 | { |
1321 | return (opcode & 0x7FFF0000) >> 16; |
1322 | } |
1323 | |
1324 | if(opcode != OPCODE_PS_1_0 && |
1325 | opcode != OPCODE_PS_1_1 && |
1326 | opcode != OPCODE_PS_1_2 && |
1327 | opcode != OPCODE_PS_1_3 && |
1328 | opcode != OPCODE_PS_1_4 && |
1329 | opcode != OPCODE_PS_2_0 && |
1330 | opcode != OPCODE_PS_2_x && |
1331 | opcode != OPCODE_PS_3_0 && |
1332 | opcode != OPCODE_VS_1_0 && |
1333 | opcode != OPCODE_VS_1_1 && |
1334 | opcode != OPCODE_VS_2_0 && |
1335 | opcode != OPCODE_VS_2_x && |
1336 | opcode != OPCODE_VS_2_sw && |
1337 | opcode != OPCODE_VS_3_0 && |
1338 | opcode != OPCODE_VS_3_sw && |
1339 | opcode != OPCODE_PHASE && |
1340 | opcode != OPCODE_END) |
1341 | { |
1342 | if(shaderModel >= 0x0200) |
1343 | { |
1344 | length = (opcode & 0x0F000000) >> 24; |
1345 | } |
1346 | else |
1347 | { |
1348 | length = size[opcode & 0x0000FFFF]; |
1349 | } |
1350 | } |
1351 | |
1352 | if(length < 0) |
1353 | { |
1354 | ASSERT(false); |
1355 | } |
1356 | |
1357 | if(shaderModel == 0x0104) |
1358 | { |
1359 | switch(opcode & 0x0000FFFF) |
1360 | { |
1361 | case OPCODE_TEX: |
1362 | length += 1; |
1363 | break; |
1364 | case OPCODE_TEXCOORD: |
1365 | length += 1; |
1366 | break; |
1367 | default: |
1368 | break; |
1369 | } |
1370 | } |
1371 | |
1372 | return length; |
1373 | } |
1374 | |
1375 | bool Shader::maskContainsComponent(int mask, int component) |
1376 | { |
1377 | return (mask & (1 << component)) != 0; |
1378 | } |
1379 | |
1380 | bool Shader::swizzleContainsComponent(int swizzle, int component) |
1381 | { |
1382 | if((swizzle & 0x03) >> 0 == component) return true; |
1383 | if((swizzle & 0x0C) >> 2 == component) return true; |
1384 | if((swizzle & 0x30) >> 4 == component) return true; |
1385 | if((swizzle & 0xC0) >> 6 == component) return true; |
1386 | |
1387 | return false; |
1388 | } |
1389 | |
1390 | bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask) |
1391 | { |
1392 | if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true; |
1393 | if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true; |
1394 | if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true; |
1395 | if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true; |
1396 | |
1397 | return false; |
1398 | } |
1399 | |
1400 | bool Shader::containsDynamicBranching() const |
1401 | { |
1402 | return dynamicBranching; |
1403 | } |
1404 | |
1405 | bool Shader::containsBreakInstruction() const |
1406 | { |
1407 | return containsBreak; |
1408 | } |
1409 | |
1410 | bool Shader::containsContinueInstruction() const |
1411 | { |
1412 | return containsContinue; |
1413 | } |
1414 | |
1415 | bool Shader::containsLeaveInstruction() const |
1416 | { |
1417 | return containsLeave; |
1418 | } |
1419 | |
1420 | bool Shader::containsDefineInstruction() const |
1421 | { |
1422 | return containsDefine; |
1423 | } |
1424 | |
1425 | bool Shader::usesSampler(int index) const |
1426 | { |
1427 | return (usedSamplers & (1 << index)) != 0; |
1428 | } |
1429 | |
1430 | int Shader::getSerialID() const |
1431 | { |
1432 | return serialID; |
1433 | } |
1434 | |
1435 | size_t Shader::getLength() const |
1436 | { |
1437 | return instruction.size(); |
1438 | } |
1439 | |
1440 | Shader::ShaderType Shader::getShaderType() const |
1441 | { |
1442 | return shaderType; |
1443 | } |
1444 | |
1445 | unsigned short Shader::getShaderModel() const |
1446 | { |
1447 | return shaderModel; |
1448 | } |
1449 | |
1450 | void Shader::print(const char *fileName, ...) const |
1451 | { |
1452 | char fullName[1024 + 1]; |
1453 | |
1454 | va_list vararg; |
1455 | va_start(vararg, fileName); |
1456 | vsnprintf(fullName, 1024, fileName, vararg); |
1457 | va_end(vararg); |
1458 | |
1459 | std::ofstream file(fullName, std::ofstream::out); |
1460 | |
1461 | for(const auto &inst : instruction) |
1462 | { |
1463 | file << inst->string(shaderType, shaderModel) << std::endl; |
1464 | } |
1465 | } |
1466 | |
1467 | void Shader::printInstruction(int index, const char *fileName) const |
1468 | { |
1469 | std::ofstream file(fileName, std::ofstream::out | std::ofstream::app); |
1470 | |
1471 | file << instruction[index]->string(shaderType, shaderModel) << std::endl; |
1472 | } |
1473 | |
1474 | void Shader::append(Instruction *instruction) |
1475 | { |
1476 | this->instruction.push_back(instruction); |
1477 | } |
1478 | |
1479 | void Shader::declareSampler(int i) |
1480 | { |
1481 | if(i >= 0 && i < 16) |
1482 | { |
1483 | usedSamplers |= 1 << i; |
1484 | } |
1485 | } |
1486 | |
1487 | const Shader::Instruction *Shader::getInstruction(size_t i) const |
1488 | { |
1489 | ASSERT(i < instruction.size()); |
1490 | |
1491 | return instruction[i]; |
1492 | } |
1493 | |
1494 | void Shader::optimize() |
1495 | { |
1496 | optimizeLeave(); |
1497 | optimizeCall(); |
1498 | removeNull(); |
1499 | } |
1500 | |
1501 | void Shader::optimizeLeave() |
1502 | { |
1503 | // A return (leave) right before the end of a function or the shader can be removed |
1504 | for(unsigned int i = 0; i < instruction.size(); i++) |
1505 | { |
1506 | if(instruction[i]->opcode == OPCODE_LEAVE) |
1507 | { |
1508 | if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET) |
1509 | { |
1510 | instruction[i]->opcode = OPCODE_NULL; |
1511 | } |
1512 | } |
1513 | } |
1514 | } |
1515 | |
1516 | void Shader::optimizeCall() |
1517 | { |
1518 | // Eliminate uncalled functions |
1519 | std::set<int> calledFunctions; |
1520 | bool rescan = true; |
1521 | |
1522 | while(rescan) |
1523 | { |
1524 | calledFunctions.clear(); |
1525 | rescan = false; |
1526 | |
1527 | for(const auto &inst : instruction) |
1528 | { |
1529 | if(inst->isCall()) |
1530 | { |
1531 | calledFunctions.insert(inst->dst.label); |
1532 | } |
1533 | } |
1534 | |
1535 | if(!calledFunctions.empty()) |
1536 | { |
1537 | for(unsigned int i = 0; i < instruction.size(); i++) |
1538 | { |
1539 | if(instruction[i]->opcode == OPCODE_LABEL) |
1540 | { |
1541 | if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end()) |
1542 | { |
1543 | for( ; i < instruction.size(); i++) |
1544 | { |
1545 | Opcode oldOpcode = instruction[i]->opcode; |
1546 | instruction[i]->opcode = OPCODE_NULL; |
1547 | |
1548 | if(oldOpcode == OPCODE_RET) |
1549 | { |
1550 | rescan = true; |
1551 | break; |
1552 | } |
1553 | } |
1554 | } |
1555 | } |
1556 | } |
1557 | } |
1558 | } |
1559 | |
1560 | // Optimize the entry call |
1561 | if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET) |
1562 | { |
1563 | if(calledFunctions.size() == 1) |
1564 | { |
1565 | instruction[0]->opcode = OPCODE_NULL; |
1566 | instruction[1]->opcode = OPCODE_NULL; |
1567 | |
1568 | for(size_t i = 2; i < instruction.size(); i++) |
1569 | { |
1570 | if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET) |
1571 | { |
1572 | instruction[i]->opcode = OPCODE_NULL; |
1573 | } |
1574 | } |
1575 | } |
1576 | } |
1577 | } |
1578 | |
1579 | void Shader::removeNull() |
1580 | { |
1581 | size_t size = 0; |
1582 | for(size_t i = 0; i < instruction.size(); i++) |
1583 | { |
1584 | if(instruction[i]->opcode != OPCODE_NULL) |
1585 | { |
1586 | instruction[size] = instruction[i]; |
1587 | size++; |
1588 | } |
1589 | else |
1590 | { |
1591 | delete instruction[i]; |
1592 | } |
1593 | } |
1594 | |
1595 | instruction.resize(size); |
1596 | } |
1597 | |
1598 | void Shader::analyzeDirtyConstants() |
1599 | { |
1600 | dirtyConstantsF = 0; |
1601 | dirtyConstantsI = 0; |
1602 | dirtyConstantsB = 0; |
1603 | |
1604 | for(const auto &inst : instruction) |
1605 | { |
1606 | switch(inst->opcode) |
1607 | { |
1608 | case OPCODE_DEF: |
1609 | if(inst->dst.index + 1 > dirtyConstantsF) |
1610 | { |
1611 | dirtyConstantsF = inst->dst.index + 1; |
1612 | } |
1613 | break; |
1614 | case OPCODE_DEFI: |
1615 | if(inst->dst.index + 1 > dirtyConstantsI) |
1616 | { |
1617 | dirtyConstantsI = inst->dst.index + 1; |
1618 | } |
1619 | break; |
1620 | case OPCODE_DEFB: |
1621 | if(inst->dst.index + 1 > dirtyConstantsB) |
1622 | { |
1623 | dirtyConstantsB = inst->dst.index + 1; |
1624 | } |
1625 | break; |
1626 | default: |
1627 | break; |
1628 | } |
1629 | } |
1630 | } |
1631 | |
1632 | void Shader::analyzeDynamicBranching() |
1633 | { |
1634 | dynamicBranching = false; |
1635 | containsLeave = false; |
1636 | containsBreak = false; |
1637 | containsContinue = false; |
1638 | containsDefine = false; |
1639 | |
1640 | // Determine global presence of branching instructions |
1641 | for(const auto &inst : instruction) |
1642 | { |
1643 | switch(inst->opcode) |
1644 | { |
1645 | case OPCODE_CALLNZ: |
1646 | case OPCODE_IF: |
1647 | case OPCODE_IFC: |
1648 | case OPCODE_BREAK: |
1649 | case OPCODE_BREAKC: |
1650 | case OPCODE_CMP: |
1651 | case OPCODE_BREAKP: |
1652 | case OPCODE_LEAVE: |
1653 | case OPCODE_CONTINUE: |
1654 | if(inst->src[0].type != PARAMETER_CONSTBOOL) |
1655 | { |
1656 | dynamicBranching = true; |
1657 | } |
1658 | |
1659 | if(inst->opcode == OPCODE_LEAVE) |
1660 | { |
1661 | containsLeave = true; |
1662 | } |
1663 | |
1664 | if(inst->isBreak()) |
1665 | { |
1666 | containsBreak = true; |
1667 | } |
1668 | |
1669 | if(inst->opcode == OPCODE_CONTINUE) |
1670 | { |
1671 | containsContinue = true; |
1672 | } |
1673 | case OPCODE_DEF: |
1674 | case OPCODE_DEFB: |
1675 | case OPCODE_DEFI: |
1676 | containsDefine = true; |
1677 | default: |
1678 | break; |
1679 | } |
1680 | } |
1681 | |
1682 | // Conservatively determine which instructions are affected by dynamic branching |
1683 | int branchDepth = 0; |
1684 | int breakDepth = 0; |
1685 | int continueDepth = 0; |
1686 | bool leaveReturn = false; |
1687 | unsigned int functionBegin = 0; |
1688 | |
1689 | for(unsigned int i = 0; i < instruction.size(); i++) |
1690 | { |
1691 | // If statements and loops |
1692 | if(instruction[i]->isBranch() || instruction[i]->isLoop()) |
1693 | { |
1694 | branchDepth++; |
1695 | } |
1696 | else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop()) |
1697 | { |
1698 | branchDepth--; |
1699 | } |
1700 | |
1701 | if(branchDepth > 0) |
1702 | { |
1703 | instruction[i]->analysisBranch = true; |
1704 | |
1705 | if(instruction[i]->isCall()) |
1706 | { |
1707 | markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); |
1708 | } |
1709 | } |
1710 | |
1711 | // Break statemement |
1712 | if(instruction[i]->isBreak()) |
1713 | { |
1714 | breakDepth++; |
1715 | } |
1716 | |
1717 | if(breakDepth > 0) |
1718 | { |
1719 | if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask |
1720 | { |
1721 | breakDepth++; |
1722 | } |
1723 | else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) |
1724 | { |
1725 | breakDepth--; |
1726 | } |
1727 | |
1728 | instruction[i]->analysisBreak = true; |
1729 | |
1730 | if(instruction[i]->isCall()) |
1731 | { |
1732 | markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH); |
1733 | } |
1734 | } |
1735 | |
1736 | // Continue statement |
1737 | if(instruction[i]->opcode == OPCODE_CONTINUE) |
1738 | { |
1739 | continueDepth++; |
1740 | } |
1741 | |
1742 | if(continueDepth > 0) |
1743 | { |
1744 | if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask |
1745 | { |
1746 | continueDepth++; |
1747 | } |
1748 | else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH) |
1749 | { |
1750 | continueDepth--; |
1751 | } |
1752 | |
1753 | instruction[i]->analysisContinue = true; |
1754 | |
1755 | if(instruction[i]->isCall()) |
1756 | { |
1757 | markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE); |
1758 | } |
1759 | } |
1760 | |
1761 | // Return (leave) statement |
1762 | if(instruction[i]->opcode == OPCODE_LEAVE) |
1763 | { |
1764 | leaveReturn = true; |
1765 | |
1766 | // Mark loop body instructions prior to the return statement |
1767 | for(unsigned int l = functionBegin; l < i; l++) |
1768 | { |
1769 | if(instruction[l]->isLoop()) |
1770 | { |
1771 | for(unsigned int r = l + 1; r < i; r++) |
1772 | { |
1773 | instruction[r]->analysisLeave = true; |
1774 | } |
1775 | |
1776 | break; |
1777 | } |
1778 | } |
1779 | } |
1780 | else if(instruction[i]->opcode == OPCODE_RET) // End of the function |
1781 | { |
1782 | leaveReturn = false; |
1783 | } |
1784 | else if(instruction[i]->opcode == OPCODE_LABEL) |
1785 | { |
1786 | functionBegin = i; |
1787 | } |
1788 | |
1789 | if(leaveReturn) |
1790 | { |
1791 | instruction[i]->analysisLeave = true; |
1792 | |
1793 | if(instruction[i]->isCall()) |
1794 | { |
1795 | markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE); |
1796 | } |
1797 | } |
1798 | } |
1799 | } |
1800 | |
1801 | void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag) |
1802 | { |
1803 | bool marker = false; |
1804 | for(auto &inst : instruction) |
1805 | { |
1806 | if(!marker) |
1807 | { |
1808 | if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel) |
1809 | { |
1810 | marker = true; |
1811 | } |
1812 | } |
1813 | else |
1814 | { |
1815 | if(inst->opcode == OPCODE_RET) |
1816 | { |
1817 | break; |
1818 | } |
1819 | else if(inst->isCall()) |
1820 | { |
1821 | markFunctionAnalysis(inst->dst.label, flag); |
1822 | } |
1823 | |
1824 | inst->analysis |= flag; |
1825 | } |
1826 | } |
1827 | } |
1828 | |
1829 | void Shader::analyzeSamplers() |
1830 | { |
1831 | for(const auto &inst : instruction) |
1832 | { |
1833 | switch(inst->opcode) |
1834 | { |
1835 | case OPCODE_TEX: |
1836 | case OPCODE_TEXBEM: |
1837 | case OPCODE_TEXBEML: |
1838 | case OPCODE_TEXREG2AR: |
1839 | case OPCODE_TEXREG2GB: |
1840 | case OPCODE_TEXM3X2TEX: |
1841 | case OPCODE_TEXM3X3TEX: |
1842 | case OPCODE_TEXM3X3SPEC: |
1843 | case OPCODE_TEXM3X3VSPEC: |
1844 | case OPCODE_TEXREG2RGB: |
1845 | case OPCODE_TEXDP3TEX: |
1846 | case OPCODE_TEXM3X2DEPTH: |
1847 | case OPCODE_TEXLDD: |
1848 | case OPCODE_TEXLDL: |
1849 | case OPCODE_TEXLOD: |
1850 | case OPCODE_TEXOFFSET: |
1851 | case OPCODE_TEXOFFSETBIAS: |
1852 | case OPCODE_TEXLODOFFSET: |
1853 | case OPCODE_TEXELFETCH: |
1854 | case OPCODE_TEXELFETCHOFFSET: |
1855 | case OPCODE_TEXGRAD: |
1856 | case OPCODE_TEXGRADOFFSET: |
1857 | { |
1858 | Parameter &dst = inst->dst; |
1859 | Parameter &src1 = inst->src[1]; |
1860 | |
1861 | if(majorVersion >= 2) |
1862 | { |
1863 | if(src1.type == PARAMETER_SAMPLER) |
1864 | { |
1865 | usedSamplers |= 1 << src1.index; |
1866 | } |
1867 | } |
1868 | else |
1869 | { |
1870 | usedSamplers |= 1 << dst.index; |
1871 | } |
1872 | } |
1873 | break; |
1874 | default: |
1875 | break; |
1876 | } |
1877 | } |
1878 | } |
1879 | |
1880 | // Assigns a unique index to each call instruction, on a per label basis. |
1881 | // This is used to know what basic block to return to. |
1882 | void Shader::analyzeCallSites() |
1883 | { |
1884 | std::unordered_map<int, int> callSiteIndices; |
1885 | |
1886 | for(auto &inst : instruction) |
1887 | { |
1888 | if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ) |
1889 | { |
1890 | inst->dst.callSite = callSiteIndices[inst->dst.label]++; |
1891 | } |
1892 | } |
1893 | } |
1894 | |
1895 | void Shader::analyzeIndirectAddressing() |
1896 | { |
1897 | indirectAddressableTemporaries = false; |
1898 | indirectAddressableInput = false; |
1899 | indirectAddressableOutput = false; |
1900 | |
1901 | for(const auto &inst : instruction) |
1902 | { |
1903 | if(inst->dst.rel.type != PARAMETER_VOID) |
1904 | { |
1905 | switch(inst->dst.type) |
1906 | { |
1907 | case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; |
1908 | case PARAMETER_INPUT: indirectAddressableInput = true; break; |
1909 | case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; |
1910 | default: break; |
1911 | } |
1912 | } |
1913 | |
1914 | for(int j = 0; j < 3; j++) |
1915 | { |
1916 | if(inst->src[j].rel.type != PARAMETER_VOID) |
1917 | { |
1918 | switch(inst->src[j].type) |
1919 | { |
1920 | case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; |
1921 | case PARAMETER_INPUT: indirectAddressableInput = true; break; |
1922 | case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; |
1923 | default: break; |
1924 | } |
1925 | } |
1926 | } |
1927 | } |
1928 | } |
1929 | |
1930 | // analyzeLimits analyzes the whole shader program to determine the deepest |
1931 | // nesting of control flow blocks and function calls. These calculations |
1932 | // are stored into the limits member, and is used by the programs to |
1933 | // allocate stack storage variables. |
1934 | void Shader::analyzeLimits() |
1935 | { |
1936 | typedef unsigned int FunctionID; |
1937 | |
1938 | // Identifier of the function with the main entry point. |
1939 | constexpr FunctionID MAIN_ID = 0xF0000000; |
1940 | |
1941 | // Invalid function identifier. |
1942 | constexpr FunctionID INVALID_ID = ~0U; |
1943 | |
1944 | // Limits on a single function. |
1945 | struct FunctionLimits |
1946 | { |
1947 | uint32_t loops = 0; // maximum nested loop and reps. |
1948 | uint32_t ifs = 0; // maximum nested if statements. |
1949 | uint32_t stack = 0; // maximum call depth. |
1950 | }; |
1951 | |
1952 | // Information about a single function in the shader. |
1953 | struct FunctionInfo |
1954 | { |
1955 | FunctionLimits limits; |
1956 | std::unordered_set<FunctionID> calls; // What this function calls. |
1957 | bool reachable; // Is this function reachable? |
1958 | }; |
1959 | |
1960 | std::unordered_map<FunctionID, FunctionInfo> functions; |
1961 | |
1962 | uint32_t maxLabel = 0; // Highest label found |
1963 | |
1964 | // Add a definition for the main entry point. |
1965 | // This starts at the beginning of the instructions and does not have |
1966 | // its own label. |
1967 | functions[MAIN_ID] = FunctionInfo(); |
1968 | functions[MAIN_ID].reachable = true; |
1969 | |
1970 | // Begin by doing a pass over the instructions to identify all the |
1971 | // functions. These start with a label and end with a ret. Note that |
1972 | // functions can have labels within them. |
1973 | FunctionID currentFunc = MAIN_ID; |
1974 | for(auto &inst : instruction) |
1975 | { |
1976 | switch (inst->opcode) |
1977 | { |
1978 | case OPCODE_LABEL: |
1979 | if (currentFunc == INVALID_ID) |
1980 | { |
1981 | // Start of a function. |
1982 | FunctionID id = inst->dst.label; |
1983 | ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else. |
1984 | functions[id] = FunctionInfo(); |
1985 | } |
1986 | break; |
1987 | case OPCODE_RET: |
1988 | currentFunc = INVALID_ID; |
1989 | break; |
1990 | default: |
1991 | break; |
1992 | } |
1993 | } |
1994 | |
1995 | // Limits for the currently analyzed function. |
1996 | FunctionLimits currentLimits; |
1997 | |
1998 | // Now loop over the instructions gathering the limits of each of the |
1999 | // functions. |
2000 | currentFunc = MAIN_ID; |
2001 | for(size_t i = 0; i < instruction.size(); i++) |
2002 | { |
2003 | const auto& inst = instruction[i]; |
2004 | switch (inst->opcode) |
2005 | { |
2006 | case OPCODE_LABEL: |
2007 | { |
2008 | maxLabel = std::max(maxLabel, inst->dst.label); |
2009 | if (currentFunc == INVALID_ID) |
2010 | { |
2011 | // Start of a function. |
2012 | FunctionID id = inst->dst.label; |
2013 | ASSERT(functions.find(id) != functions.end()); // Sanity check |
2014 | currentFunc = id; |
2015 | } |
2016 | break; |
2017 | } |
2018 | case OPCODE_CALL: |
2019 | case OPCODE_CALLNZ: |
2020 | { |
2021 | ASSERT(currentFunc != INVALID_ID); |
2022 | FunctionID id = inst->dst.label; |
2023 | ASSERT(functions.find(id) != functions.end()); |
2024 | functions[currentFunc].calls.emplace(id); |
2025 | functions[id].reachable = true; |
2026 | break; |
2027 | } |
2028 | case OPCODE_LOOP: |
2029 | case OPCODE_REP: |
2030 | case OPCODE_WHILE: |
2031 | case OPCODE_SWITCH: // Not a mistake - switches share loopReps. |
2032 | { |
2033 | ASSERT(currentFunc != INVALID_ID); |
2034 | ++currentLimits.loops; |
2035 | auto& func = functions[currentFunc]; |
2036 | func.limits.loops = std::max(func.limits.loops, currentLimits.loops); |
2037 | break; |
2038 | } |
2039 | case OPCODE_ENDLOOP: |
2040 | case OPCODE_ENDREP: |
2041 | case OPCODE_ENDWHILE: |
2042 | case OPCODE_ENDSWITCH: |
2043 | { |
2044 | ASSERT(currentLimits.loops > 0); |
2045 | --currentLimits.loops; |
2046 | break; |
2047 | } |
2048 | case OPCODE_IF: |
2049 | case OPCODE_IFC: |
2050 | { |
2051 | ASSERT(currentFunc != INVALID_ID); |
2052 | ++currentLimits.ifs; |
2053 | auto& func = functions[currentFunc]; |
2054 | func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs); |
2055 | break; |
2056 | } |
2057 | case OPCODE_ENDIF: |
2058 | { |
2059 | ASSERT(currentLimits.ifs > 0); |
2060 | currentLimits.ifs--; |
2061 | break; |
2062 | } |
2063 | case OPCODE_RET: |
2064 | { |
2065 | // Must be in a function to return. |
2066 | ASSERT(currentFunc != INVALID_ID); |
2067 | |
2068 | // All stacks should be popped before returning. |
2069 | ASSERT(currentLimits.ifs == 0); |
2070 | ASSERT(currentLimits.loops == 0); |
2071 | |
2072 | currentFunc = INVALID_ID; |
2073 | currentLimits = FunctionLimits(); |
2074 | break; |
2075 | } |
2076 | default: |
2077 | break; |
2078 | } |
2079 | } |
2080 | |
2081 | // Assert that every function is reachable (these should have been |
2082 | // stripped in earlier stages). Unreachable functions may be code |
2083 | // generated, but their own limits are not considered below, potentially |
2084 | // causing OOB indexing in later stages. |
2085 | // If we ever find cases where there are unreachable functions, we can |
2086 | // replace this assert with NO-OPing or stripping out the dead |
2087 | // functions. |
2088 | for (auto it : functions) { ASSERT(it.second.reachable); } |
2089 | |
2090 | // We have now gathered all the information about each of the functions |
2091 | // in the shader. Traverse these functions starting from the main |
2092 | // function to calculate the maximum limits across the entire shader. |
2093 | |
2094 | std::unordered_set<FunctionID> visited; |
2095 | std::function<Limits(FunctionID)> traverse; |
2096 | traverse = [&](FunctionID id) -> Limits |
2097 | { |
2098 | const auto& func = functions[id]; |
2099 | ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed. |
2100 | visited.insert(id); |
2101 | Limits limits; |
2102 | limits.stack = 1; |
2103 | for (auto callee : func.calls) |
2104 | { |
2105 | auto calleeLimits = traverse(callee); |
2106 | limits.loops = std::max(limits.loops, calleeLimits.loops); |
2107 | limits.ifs = std::max(limits.ifs, calleeLimits.ifs); |
2108 | limits.stack = std::max(limits.stack, calleeLimits.stack + 1); |
2109 | } |
2110 | visited.erase(id); |
2111 | |
2112 | limits.loops += func.limits.loops; |
2113 | limits.ifs += func.limits.ifs; |
2114 | return limits; |
2115 | }; |
2116 | |
2117 | limits = traverse(MAIN_ID); |
2118 | limits.maxLabel = maxLabel; |
2119 | } |
2120 | } |
2121 | |