1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Shader.hpp"
16
17#include "VertexShader.hpp"
18#include "PixelShader.hpp"
19#include "Common/Math.hpp"
20#include "Common/Debug.hpp"
21
22#include <algorithm>
23#include <set>
24#include <fstream>
25#include <functional>
26#include <sstream>
27#include <stdarg.h>
28#include <unordered_map>
29#include <unordered_set>
30
31namespace sw
32{
33 volatile int Shader::serialCounter = 1;
34
35 Shader::Opcode Shader::OPCODE_DP(int i)
36 {
37 switch(i)
38 {
39 default: ASSERT(false);
40 case 1: return OPCODE_DP1;
41 case 2: return OPCODE_DP2;
42 case 3: return OPCODE_DP3;
43 case 4: return OPCODE_DP4;
44 }
45 }
46
47 Shader::Opcode Shader::OPCODE_LEN(int i)
48 {
49 switch(i)
50 {
51 default: ASSERT(false);
52 case 1: return OPCODE_ABS;
53 case 2: return OPCODE_LEN2;
54 case 3: return OPCODE_LEN3;
55 case 4: return OPCODE_LEN4;
56 }
57 }
58
59 Shader::Opcode Shader::OPCODE_DIST(int i)
60 {
61 switch(i)
62 {
63 default: ASSERT(false);
64 case 1: return OPCODE_DIST1;
65 case 2: return OPCODE_DIST2;
66 case 3: return OPCODE_DIST3;
67 case 4: return OPCODE_DIST4;
68 }
69 }
70
71 Shader::Opcode Shader::OPCODE_NRM(int i)
72 {
73 switch(i)
74 {
75 default: ASSERT(false);
76 case 1: return OPCODE_SGN;
77 case 2: return OPCODE_NRM2;
78 case 3: return OPCODE_NRM3;
79 case 4: return OPCODE_NRM4;
80 }
81 }
82
83 Shader::Opcode Shader::OPCODE_FORWARD(int i)
84 {
85 switch(i)
86 {
87 default: ASSERT(false);
88 case 1: return OPCODE_FORWARD1;
89 case 2: return OPCODE_FORWARD2;
90 case 3: return OPCODE_FORWARD3;
91 case 4: return OPCODE_FORWARD4;
92 }
93 }
94
95 Shader::Opcode Shader::OPCODE_REFLECT(int i)
96 {
97 switch(i)
98 {
99 default: ASSERT(false);
100 case 1: return OPCODE_REFLECT1;
101 case 2: return OPCODE_REFLECT2;
102 case 3: return OPCODE_REFLECT3;
103 case 4: return OPCODE_REFLECT4;
104 }
105 }
106
107 Shader::Opcode Shader::OPCODE_REFRACT(int i)
108 {
109 switch(i)
110 {
111 default: ASSERT(false);
112 case 1: return OPCODE_REFRACT1;
113 case 2: return OPCODE_REFRACT2;
114 case 3: return OPCODE_REFRACT3;
115 case 4: return OPCODE_REFRACT4;
116 }
117 }
118
119 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
120 {
121 control = CONTROL_RESERVED0;
122
123 predicate = false;
124 predicateNot = false;
125 predicateSwizzle = 0xE4;
126
127 coissue = false;
128 samplerType = SAMPLER_UNKNOWN;
129 usage = USAGE_POSITION;
130 usageIndex = 0;
131 }
132
133 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
134 {
135 parseOperationToken(*token++, majorVersion);
136
137 samplerType = SAMPLER_UNKNOWN;
138 usage = USAGE_POSITION;
139 usageIndex = 0;
140
141 if(opcode == OPCODE_IF ||
142 opcode == OPCODE_IFC ||
143 opcode == OPCODE_LOOP ||
144 opcode == OPCODE_REP ||
145 opcode == OPCODE_BREAKC ||
146 opcode == OPCODE_BREAKP) // No destination operand
147 {
148 if(size > 0) parseSourceToken(0, token++, majorVersion);
149 if(size > 1) parseSourceToken(1, token++, majorVersion);
150 if(size > 2) parseSourceToken(2, token++, majorVersion);
151 if(size > 3) ASSERT(false);
152 }
153 else if(opcode == OPCODE_DCL)
154 {
155 parseDeclarationToken(*token++);
156 parseDestinationToken(token++, majorVersion);
157 }
158 else
159 {
160 if(size > 0)
161 {
162 parseDestinationToken(token, majorVersion);
163
164 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
165 {
166 token++;
167 size--;
168 }
169
170 token++;
171 size--;
172 }
173
174 if(predicate)
175 {
176 ASSERT(size != 0);
177
178 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
179 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
180
181 token++;
182 size--;
183 }
184
185 for(int i = 0; size > 0; i++)
186 {
187 parseSourceToken(i, token, majorVersion);
188
189 token++;
190 size--;
191
192 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
193 {
194 token++;
195 size--;
196 }
197 }
198 }
199 }
200
201 Shader::Instruction::~Instruction()
202 {
203 }
204
205 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
206 {
207 std::string instructionString;
208
209 if(opcode != OPCODE_DCL)
210 {
211 instructionString += coissue ? "+ " : "";
212
213 if(predicate)
214 {
215 instructionString += predicateNot ? "(!p0" : "(p0";
216 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
217 instructionString += ") ";
218 }
219
220 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
221
222 if(dst.type != PARAMETER_VOID)
223 {
224 instructionString += " " + dst.string(shaderType, version) +
225 dst.relativeString() +
226 dst.maskString();
227 }
228
229 for(int i = 0; i < 4; i++)
230 {
231 if(src[i].type != PARAMETER_VOID)
232 {
233 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
234 instructionString += src[i].preModifierString() +
235 src[i].string(shaderType, version) +
236 src[i].relativeString() +
237 src[i].postModifierString() +
238 src[i].swizzleString();
239 }
240 }
241 }
242 else // DCL
243 {
244 instructionString += "dcl";
245
246 if(dst.type == PARAMETER_SAMPLER)
247 {
248 switch(samplerType)
249 {
250 case SAMPLER_UNKNOWN: instructionString += " "; break;
251 case SAMPLER_1D: instructionString += "_1d "; break;
252 case SAMPLER_2D: instructionString += "_2d "; break;
253 case SAMPLER_CUBE: instructionString += "_cube "; break;
254 case SAMPLER_VOLUME: instructionString += "_volume "; break;
255 default:
256 ASSERT(false);
257 }
258
259 instructionString += dst.string(shaderType, version);
260 }
261 else if(dst.type == PARAMETER_INPUT ||
262 dst.type == PARAMETER_OUTPUT ||
263 dst.type == PARAMETER_TEXTURE)
264 {
265 if(version >= 0x0300)
266 {
267 switch(usage)
268 {
269 case USAGE_POSITION: instructionString += "_position"; break;
270 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break;
271 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
272 case USAGE_NORMAL: instructionString += "_normal"; break;
273 case USAGE_PSIZE: instructionString += "_psize"; break;
274 case USAGE_TEXCOORD: instructionString += "_texcoord"; break;
275 case USAGE_TANGENT: instructionString += "_tangent"; break;
276 case USAGE_BINORMAL: instructionString += "_binormal"; break;
277 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break;
278 case USAGE_POSITIONT: instructionString += "_positiont"; break;
279 case USAGE_COLOR: instructionString += "_color"; break;
280 case USAGE_FOG: instructionString += "_fog"; break;
281 case USAGE_DEPTH: instructionString += "_depth"; break;
282 case USAGE_SAMPLE: instructionString += "_sample"; break;
283 default:
284 ASSERT(false);
285 }
286
287 if(usageIndex > 0)
288 {
289 std::ostringstream buffer;
290
291 buffer << (int)usageIndex;
292
293 instructionString += buffer.str();
294 }
295 }
296 else ASSERT(dst.type != PARAMETER_OUTPUT);
297
298 instructionString += " ";
299
300 instructionString += dst.string(shaderType, version);
301 instructionString += dst.maskString();
302 }
303 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace
304 {
305 instructionString += " ";
306
307 instructionString += dst.string(shaderType, version);
308 }
309 else ASSERT(false);
310 }
311
312 return instructionString;
313 }
314
315 std::string Shader::DestinationParameter::modifierString() const
316 {
317 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
318 {
319 return "";
320 }
321
322 std::string modifierString;
323
324 if(saturate)
325 {
326 modifierString += "_sat";
327 }
328
329 if(partialPrecision)
330 {
331 modifierString += "_pp";
332 }
333
334 if(centroid)
335 {
336 modifierString += "_centroid";
337 }
338
339 return modifierString;
340 }
341
342 std::string Shader::DestinationParameter::shiftString() const
343 {
344 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
345 {
346 return "";
347 }
348
349 switch(shift)
350 {
351 case 0: return "";
352 case 1: return "_x2";
353 case 2: return "_x4";
354 case 3: return "_x8";
355 case -1: return "_d2";
356 case -2: return "_d4";
357 case -3: return "_d8";
358 default:
359 return "";
360 // ASSERT(false); // FIXME
361 }
362 }
363
364 std::string Shader::DestinationParameter::maskString() const
365 {
366 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
367 {
368 return "";
369 }
370
371 switch(mask)
372 {
373 case 0x0: return "";
374 case 0x1: return ".x";
375 case 0x2: return ".y";
376 case 0x3: return ".xy";
377 case 0x4: return ".z";
378 case 0x5: return ".xz";
379 case 0x6: return ".yz";
380 case 0x7: return ".xyz";
381 case 0x8: return ".w";
382 case 0x9: return ".xw";
383 case 0xA: return ".yw";
384 case 0xB: return ".xyw";
385 case 0xC: return ".zw";
386 case 0xD: return ".xzw";
387 case 0xE: return ".yzw";
388 case 0xF: return "";
389 default:
390 ASSERT(false);
391 }
392
393 return "";
394 }
395
396 std::string Shader::SourceParameter::preModifierString() const
397 {
398 if(type == PARAMETER_VOID)
399 {
400 return "";
401 }
402
403 switch(modifier)
404 {
405 case MODIFIER_NONE: return "";
406 case MODIFIER_NEGATE: return "-";
407 case MODIFIER_BIAS: return "";
408 case MODIFIER_BIAS_NEGATE: return "-";
409 case MODIFIER_SIGN: return "";
410 case MODIFIER_SIGN_NEGATE: return "-";
411 case MODIFIER_COMPLEMENT: return "1-";
412 case MODIFIER_X2: return "";
413 case MODIFIER_X2_NEGATE: return "-";
414 case MODIFIER_DZ: return "";
415 case MODIFIER_DW: return "";
416 case MODIFIER_ABS: return "";
417 case MODIFIER_ABS_NEGATE: return "-";
418 case MODIFIER_NOT: return "!";
419 default:
420 ASSERT(false);
421 }
422
423 return "";
424 }
425
426 std::string Shader::Parameter::relativeString() const
427 {
428 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
429 {
430 if(rel.type == PARAMETER_VOID)
431 {
432 return "";
433 }
434 else if(rel.type == PARAMETER_ADDR)
435 {
436 switch(rel.swizzle & 0x03)
437 {
438 case 0: return "[a0.x]";
439 case 1: return "[a0.y]";
440 case 2: return "[a0.z]";
441 case 3: return "[a0.w]";
442 }
443 }
444 else if(rel.type == PARAMETER_TEMP)
445 {
446 std::ostringstream buffer;
447 buffer << rel.index;
448
449 switch(rel.swizzle & 0x03)
450 {
451 case 0: return "[r" + buffer.str() + ".x]";
452 case 1: return "[r" + buffer.str() + ".y]";
453 case 2: return "[r" + buffer.str() + ".z]";
454 case 3: return "[r" + buffer.str() + ".w]";
455 }
456 }
457 else if(rel.type == PARAMETER_LOOP)
458 {
459 return "[aL]";
460 }
461 else if(rel.type == PARAMETER_CONST)
462 {
463 std::ostringstream buffer;
464 buffer << rel.index;
465
466 switch(rel.swizzle & 0x03)
467 {
468 case 0: return "[c" + buffer.str() + ".x]";
469 case 1: return "[c" + buffer.str() + ".y]";
470 case 2: return "[c" + buffer.str() + ".z]";
471 case 3: return "[c" + buffer.str() + ".w]";
472 }
473 }
474 else ASSERT(false);
475 }
476
477 return "";
478 }
479
480 std::string Shader::SourceParameter::postModifierString() const
481 {
482 if(type == PARAMETER_VOID)
483 {
484 return "";
485 }
486
487 switch(modifier)
488 {
489 case MODIFIER_NONE: return "";
490 case MODIFIER_NEGATE: return "";
491 case MODIFIER_BIAS: return "_bias";
492 case MODIFIER_BIAS_NEGATE: return "_bias";
493 case MODIFIER_SIGN: return "_bx2";
494 case MODIFIER_SIGN_NEGATE: return "_bx2";
495 case MODIFIER_COMPLEMENT: return "";
496 case MODIFIER_X2: return "_x2";
497 case MODIFIER_X2_NEGATE: return "_x2";
498 case MODIFIER_DZ: return "_dz";
499 case MODIFIER_DW: return "_dw";
500 case MODIFIER_ABS: return "_abs";
501 case MODIFIER_ABS_NEGATE: return "_abs";
502 case MODIFIER_NOT: return "";
503 default:
504 ASSERT(false);
505 }
506
507 return "";
508 }
509
510 std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
511 {
512 if(type == PARAMETER_CONST && bufferIndex >= 0)
513 {
514 std::ostringstream buffer;
515 buffer << bufferIndex;
516
517 std::ostringstream offset;
518 offset << index;
519
520 return "cb" + buffer.str() + "[" + offset.str() + "]";
521 }
522 else
523 {
524 return Parameter::string(shaderType, version);
525 }
526 }
527
528 std::string Shader::SourceParameter::swizzleString() const
529 {
530 return Instruction::swizzleString(type, swizzle);
531 }
532
533 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
534 {
535 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token
536 {
537 opcode = (Opcode)token;
538
539 control = CONTROL_RESERVED0;
540 predicate = false;
541 coissue = false;
542 }
543 else
544 {
545 opcode = (Opcode)(token & 0x0000FFFF);
546 control = (Control)((token & 0x00FF0000) >> 16);
547
548 int size = (token & 0x0F000000) >> 24;
549
550 predicate = (token & 0x10000000) != 0x00000000;
551 coissue = (token & 0x40000000) != 0x00000000;
552
553 if(majorVersion < 2)
554 {
555 if(size != 0)
556 {
557 ASSERT(false); // Reserved
558 }
559 }
560
561 if(majorVersion < 2)
562 {
563 if(predicate)
564 {
565 ASSERT(false);
566 }
567 }
568
569 if((token & 0x20000000) != 0x00000000)
570 {
571 ASSERT(false); // Reserved
572 }
573
574 if(majorVersion >= 2)
575 {
576 if(coissue)
577 {
578 ASSERT(false); // Reserved
579 }
580 }
581
582 if((token & 0x80000000) != 0x00000000)
583 {
584 ASSERT(false);
585 }
586 }
587 }
588
589 void Shader::Instruction::parseDeclarationToken(unsigned long token)
590 {
591 samplerType = (SamplerType)((token & 0x78000000) >> 27);
592 usage = (Usage)(token & 0x0000001F);
593 usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
594 }
595
596 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
597 {
598 dst.index = (unsigned short)(token[0] & 0x000007FF);
599 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
600
601 // TODO: Check type and index range
602
603 bool relative = (token[0] & 0x00002000) != 0x00000000;
604 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
605 dst.rel.swizzle = 0x00;
606 dst.rel.scale = 1;
607
608 if(relative && majorVersion >= 3)
609 {
610 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
611 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
612 }
613 else if(relative) ASSERT(false); // Reserved
614
615 if((token[0] & 0x0000C000) != 0x00000000)
616 {
617 ASSERT(false); // Reserved
618 }
619
620 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
621 dst.saturate = (token[0] & 0x00100000) != 0;
622 dst.partialPrecision = (token[0] & 0x00200000) != 0;
623 dst.centroid = (token[0] & 0x00400000) != 0;
624 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
625
626 if(majorVersion >= 2)
627 {
628 if(dst.shift)
629 {
630 ASSERT(false); // Reserved
631 }
632 }
633
634 if((token[0] & 0x80000000) != 0x80000000)
635 {
636 ASSERT(false);
637 }
638 }
639
640 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
641 {
642 // Defaults
643 src[i].index = 0;
644 src[i].type = PARAMETER_VOID;
645 src[i].modifier = MODIFIER_NONE;
646 src[i].swizzle = 0xE4;
647 src[i].rel.type = PARAMETER_VOID;
648 src[i].rel.swizzle = 0x00;
649 src[i].rel.scale = 1;
650
651 switch(opcode)
652 {
653 case OPCODE_DEF:
654 src[0].type = PARAMETER_FLOAT4LITERAL;
655 src[0].value[i] = *(float*)token;
656 break;
657 case OPCODE_DEFB:
658 src[0].type = PARAMETER_BOOL1LITERAL;
659 src[0].boolean[0] = *(int*)token;
660 break;
661 case OPCODE_DEFI:
662 src[0].type = PARAMETER_INT4LITERAL;
663 src[0].integer[i] = *(int*)token;
664 break;
665 default:
666 src[i].index = (unsigned short)(token[0] & 0x000007FF);
667 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
668
669 // FIXME: Check type and index range
670
671 bool relative = (token[0] & 0x00002000) != 0x00000000;
672 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
673
674 if((token[0] & 0x0000C000) != 0x00000000)
675 {
676 if(opcode != OPCODE_DEF &&
677 opcode != OPCODE_DEFI &&
678 opcode != OPCODE_DEFB)
679 {
680 ASSERT(false);
681 }
682 }
683
684 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
685 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
686
687 if((token[0] & 0x80000000) != 0x80000000)
688 {
689 if(opcode != OPCODE_DEF &&
690 opcode != OPCODE_DEFI &&
691 opcode != OPCODE_DEFB)
692 {
693 ASSERT(false);
694 }
695 }
696
697 if(relative && majorVersion >= 2)
698 {
699 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
700 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
701 }
702 }
703 }
704
705 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
706 {
707 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
708 {
709 return "";
710 }
711
712 int x = (swizzle & 0x03) >> 0;
713 int y = (swizzle & 0x0C) >> 2;
714 int z = (swizzle & 0x30) >> 4;
715 int w = (swizzle & 0xC0) >> 6;
716
717 std::string swizzleString = ".";
718
719 switch(x)
720 {
721 case 0: swizzleString += "x"; break;
722 case 1: swizzleString += "y"; break;
723 case 2: swizzleString += "z"; break;
724 case 3: swizzleString += "w"; break;
725 }
726
727 if(!(x == y && y == z && z == w))
728 {
729 switch(y)
730 {
731 case 0: swizzleString += "x"; break;
732 case 1: swizzleString += "y"; break;
733 case 2: swizzleString += "z"; break;
734 case 3: swizzleString += "w"; break;
735 }
736
737 if(!(y == z && z == w))
738 {
739 switch(z)
740 {
741 case 0: swizzleString += "x"; break;
742 case 1: swizzleString += "y"; break;
743 case 2: swizzleString += "z"; break;
744 case 3: swizzleString += "w"; break;
745 }
746
747 if(!(z == w))
748 {
749 switch(w)
750 {
751 case 0: swizzleString += "x"; break;
752 case 1: swizzleString += "y"; break;
753 case 2: swizzleString += "z"; break;
754 case 3: swizzleString += "w"; break;
755 }
756 }
757 }
758 }
759
760 return swizzleString;
761 }
762
763 std::string Shader::Instruction::operationString(unsigned short version) const
764 {
765 switch(opcode)
766 {
767 case OPCODE_NULL: return "null";
768 case OPCODE_NOP: return "nop";
769 case OPCODE_MOV: return "mov";
770 case OPCODE_ADD: return "add";
771 case OPCODE_IADD: return "iadd";
772 case OPCODE_SUB: return "sub";
773 case OPCODE_ISUB: return "isub";
774 case OPCODE_MAD: return "mad";
775 case OPCODE_IMAD: return "imad";
776 case OPCODE_MUL: return "mul";
777 case OPCODE_IMUL: return "imul";
778 case OPCODE_RCPX: return "rcpx";
779 case OPCODE_DIV: return "div";
780 case OPCODE_IDIV: return "idiv";
781 case OPCODE_UDIV: return "udiv";
782 case OPCODE_MOD: return "mod";
783 case OPCODE_IMOD: return "imod";
784 case OPCODE_UMOD: return "umod";
785 case OPCODE_SHL: return "shl";
786 case OPCODE_ISHR: return "ishr";
787 case OPCODE_USHR: return "ushr";
788 case OPCODE_RSQX: return "rsqx";
789 case OPCODE_SQRT: return "sqrt";
790 case OPCODE_RSQ: return "rsq";
791 case OPCODE_LEN2: return "len2";
792 case OPCODE_LEN3: return "len3";
793 case OPCODE_LEN4: return "len4";
794 case OPCODE_DIST1: return "dist1";
795 case OPCODE_DIST2: return "dist2";
796 case OPCODE_DIST3: return "dist3";
797 case OPCODE_DIST4: return "dist4";
798 case OPCODE_DP3: return "dp3";
799 case OPCODE_DP4: return "dp4";
800 case OPCODE_DET2: return "det2";
801 case OPCODE_DET3: return "det3";
802 case OPCODE_DET4: return "det4";
803 case OPCODE_MIN: return "min";
804 case OPCODE_IMIN: return "imin";
805 case OPCODE_UMIN: return "umin";
806 case OPCODE_MAX: return "max";
807 case OPCODE_IMAX: return "imax";
808 case OPCODE_UMAX: return "umax";
809 case OPCODE_SLT: return "slt";
810 case OPCODE_SGE: return "sge";
811 case OPCODE_EXP2X: return "exp2x";
812 case OPCODE_LOG2X: return "log2x";
813 case OPCODE_LIT: return "lit";
814 case OPCODE_ATT: return "att";
815 case OPCODE_LRP: return "lrp";
816 case OPCODE_STEP: return "step";
817 case OPCODE_SMOOTH: return "smooth";
818 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt";
819 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
820 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat";
821 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
822 case OPCODE_PACKSNORM2x16: return "packSnorm2x16";
823 case OPCODE_PACKUNORM2x16: return "packUnorm2x16";
824 case OPCODE_PACKHALF2x16: return "packHalf2x16";
825 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
826 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
827 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16";
828 case OPCODE_FRC: return "frc";
829 case OPCODE_M4X4: return "m4x4";
830 case OPCODE_M4X3: return "m4x3";
831 case OPCODE_M3X4: return "m3x4";
832 case OPCODE_M3X3: return "m3x3";
833 case OPCODE_M3X2: return "m3x2";
834 case OPCODE_CALL: return "call";
835 case OPCODE_CALLNZ: return "callnz";
836 case OPCODE_LOOP: return "loop";
837 case OPCODE_RET: return "ret";
838 case OPCODE_ENDLOOP: return "endloop";
839 case OPCODE_LABEL: return "label";
840 case OPCODE_DCL: return "dcl";
841 case OPCODE_POWX: return "powx";
842 case OPCODE_CRS: return "crs";
843 case OPCODE_SGN: return "sgn";
844 case OPCODE_ISGN: return "isgn";
845 case OPCODE_ABS: return "abs";
846 case OPCODE_IABS: return "iabs";
847 case OPCODE_NRM2: return "nrm2";
848 case OPCODE_NRM3: return "nrm3";
849 case OPCODE_NRM4: return "nrm4";
850 case OPCODE_SINCOS: return "sincos";
851 case OPCODE_REP: return "rep";
852 case OPCODE_ENDREP: return "endrep";
853 case OPCODE_IF: return "if";
854 case OPCODE_IFC: return "ifc";
855 case OPCODE_ELSE: return "else";
856 case OPCODE_ENDIF: return "endif";
857 case OPCODE_BREAK: return "break";
858 case OPCODE_BREAKC: return "breakc";
859 case OPCODE_MOVA: return "mova";
860 case OPCODE_DEFB: return "defb";
861 case OPCODE_DEFI: return "defi";
862 case OPCODE_TEXCOORD: return "texcoord";
863 case OPCODE_TEXKILL: return "texkill";
864 case OPCODE_DISCARD: return "discard";
865 case OPCODE_TEX:
866 if(version < 0x0104) return "tex";
867 else return "texld";
868 case OPCODE_TEXBEM: return "texbem";
869 case OPCODE_TEXBEML: return "texbeml";
870 case OPCODE_TEXREG2AR: return "texreg2ar";
871 case OPCODE_TEXREG2GB: return "texreg2gb";
872 case OPCODE_TEXM3X2PAD: return "texm3x2pad";
873 case OPCODE_TEXM3X2TEX: return "texm3x2tex";
874 case OPCODE_TEXM3X3PAD: return "texm3x3pad";
875 case OPCODE_TEXM3X3TEX: return "texm3x3tex";
876 case OPCODE_RESERVED0: return "reserved0";
877 case OPCODE_TEXM3X3SPEC: return "texm3x3spec";
878 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec";
879 case OPCODE_EXPP: return "expp";
880 case OPCODE_LOGP: return "logp";
881 case OPCODE_CND: return "cnd";
882 case OPCODE_DEF: return "def";
883 case OPCODE_TEXREG2RGB: return "texreg2rgb";
884 case OPCODE_TEXDP3TEX: return "texdp3tex";
885 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth";
886 case OPCODE_TEXDP3: return "texdp3";
887 case OPCODE_TEXM3X3: return "texm3x3";
888 case OPCODE_TEXDEPTH: return "texdepth";
889 case OPCODE_CMP0: return "cmp0";
890 case OPCODE_ICMP: return "icmp";
891 case OPCODE_UCMP: return "ucmp";
892 case OPCODE_SELECT: return "select";
893 case OPCODE_EXTRACT: return "extract";
894 case OPCODE_INSERT: return "insert";
895 case OPCODE_BEM: return "bem";
896 case OPCODE_DP2ADD: return "dp2add";
897 case OPCODE_DFDX: return "dFdx";
898 case OPCODE_DFDY: return "dFdy";
899 case OPCODE_FWIDTH: return "fwidth";
900 case OPCODE_TEXLDD: return "texldd";
901 case OPCODE_CMP: return "cmp";
902 case OPCODE_TEXLDL: return "texldl";
903 case OPCODE_TEXBIAS: return "texbias";
904 case OPCODE_TEXOFFSET: return "texoffset";
905 case OPCODE_TEXOFFSETBIAS: return "texoffsetbias";
906 case OPCODE_TEXLODOFFSET: return "texlodoffset";
907 case OPCODE_TEXELFETCH: return "texelfetch";
908 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
909 case OPCODE_TEXGRAD: return "texgrad";
910 case OPCODE_TEXGRADOFFSET: return "texgradoffset";
911 case OPCODE_BREAKP: return "breakp";
912 case OPCODE_TEXSIZE: return "texsize";
913 case OPCODE_PHASE: return "phase";
914 case OPCODE_COMMENT: return "comment";
915 case OPCODE_END: return "end";
916 case OPCODE_PS_1_0: return "ps_1_0";
917 case OPCODE_PS_1_1: return "ps_1_1";
918 case OPCODE_PS_1_2: return "ps_1_2";
919 case OPCODE_PS_1_3: return "ps_1_3";
920 case OPCODE_PS_1_4: return "ps_1_4";
921 case OPCODE_PS_2_0: return "ps_2_0";
922 case OPCODE_PS_2_x: return "ps_2_x";
923 case OPCODE_PS_3_0: return "ps_3_0";
924 case OPCODE_VS_1_0: return "vs_1_0";
925 case OPCODE_VS_1_1: return "vs_1_1";
926 case OPCODE_VS_2_0: return "vs_2_0";
927 case OPCODE_VS_2_x: return "vs_2_x";
928 case OPCODE_VS_2_sw: return "vs_2_sw";
929 case OPCODE_VS_3_0: return "vs_3_0";
930 case OPCODE_VS_3_sw: return "vs_3_sw";
931 case OPCODE_WHILE: return "while";
932 case OPCODE_ENDWHILE: return "endwhile";
933 case OPCODE_COS: return "cos";
934 case OPCODE_SIN: return "sin";
935 case OPCODE_TAN: return "tan";
936 case OPCODE_ACOS: return "acos";
937 case OPCODE_ASIN: return "asin";
938 case OPCODE_ATAN: return "atan";
939 case OPCODE_ATAN2: return "atan2";
940 case OPCODE_COSH: return "cosh";
941 case OPCODE_SINH: return "sinh";
942 case OPCODE_TANH: return "tanh";
943 case OPCODE_ACOSH: return "acosh";
944 case OPCODE_ASINH: return "asinh";
945 case OPCODE_ATANH: return "atanh";
946 case OPCODE_DP1: return "dp1";
947 case OPCODE_DP2: return "dp2";
948 case OPCODE_TRUNC: return "trunc";
949 case OPCODE_FLOOR: return "floor";
950 case OPCODE_ROUND: return "round";
951 case OPCODE_ROUNDEVEN: return "roundEven";
952 case OPCODE_CEIL: return "ceil";
953 case OPCODE_EXP2: return "exp2";
954 case OPCODE_LOG2: return "log2";
955 case OPCODE_EXP: return "exp";
956 case OPCODE_LOG: return "log";
957 case OPCODE_POW: return "pow";
958 case OPCODE_F2B: return "f2b";
959 case OPCODE_B2F: return "b2f";
960 case OPCODE_F2I: return "f2i";
961 case OPCODE_I2F: return "i2f";
962 case OPCODE_F2U: return "f2u";
963 case OPCODE_U2F: return "u2f";
964 case OPCODE_B2I: return "b2i";
965 case OPCODE_I2B: return "i2b";
966 case OPCODE_ALL: return "all";
967 case OPCODE_ANY: return "any";
968 case OPCODE_NEG: return "neg";
969 case OPCODE_INEG: return "ineg";
970 case OPCODE_ISNAN: return "isnan";
971 case OPCODE_ISINF: return "isinf";
972 case OPCODE_NOT: return "not";
973 case OPCODE_OR: return "or";
974 case OPCODE_XOR: return "xor";
975 case OPCODE_AND: return "and";
976 case OPCODE_EQ: return "eq";
977 case OPCODE_NE: return "neq";
978 case OPCODE_FORWARD1: return "forward1";
979 case OPCODE_FORWARD2: return "forward2";
980 case OPCODE_FORWARD3: return "forward3";
981 case OPCODE_FORWARD4: return "forward4";
982 case OPCODE_REFLECT1: return "reflect1";
983 case OPCODE_REFLECT2: return "reflect2";
984 case OPCODE_REFLECT3: return "reflect3";
985 case OPCODE_REFLECT4: return "reflect4";
986 case OPCODE_REFRACT1: return "refract1";
987 case OPCODE_REFRACT2: return "refract2";
988 case OPCODE_REFRACT3: return "refract3";
989 case OPCODE_REFRACT4: return "refract4";
990 case OPCODE_LEAVE: return "leave";
991 case OPCODE_CONTINUE: return "continue";
992 case OPCODE_TEST: return "test";
993 case OPCODE_SWITCH: return "switch";
994 case OPCODE_ENDSWITCH: return "endswitch";
995 default:
996 ASSERT(false);
997 }
998
999 return "<unknown>";
1000 }
1001
1002 std::string Shader::Instruction::controlString() const
1003 {
1004 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
1005 {
1006 if(project) return "p";
1007
1008 if(bias) return "b";
1009
1010 // FIXME: LOD
1011 }
1012
1013 switch(control)
1014 {
1015 case 1: return "_gt";
1016 case 2: return "_eq";
1017 case 3: return "_ge";
1018 case 4: return "_lt";
1019 case 5: return "_ne";
1020 case 6: return "_le";
1021 default:
1022 return "";
1023 // ASSERT(false); // FIXME
1024 }
1025 }
1026
1027 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1028 {
1029 std::ostringstream buffer;
1030
1031 if(type == PARAMETER_FLOAT4LITERAL)
1032 {
1033 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1034
1035 return buffer.str();
1036 }
1037 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1038 {
1039 buffer << index;
1040
1041 return typeString(shaderType, version) + buffer.str();
1042 }
1043 else
1044 {
1045 return typeString(shaderType, version);
1046 }
1047 }
1048
1049 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1050 {
1051 switch(type)
1052 {
1053 case PARAMETER_TEMP: return "r";
1054 case PARAMETER_INPUT: return "v";
1055 case PARAMETER_CONST: return "c";
1056 case PARAMETER_TEXTURE:
1057 // case PARAMETER_ADDR:
1058 if(shaderType == SHADER_PIXEL) return "t";
1059 else return "a0";
1060 case PARAMETER_RASTOUT:
1061 if(index == 0) return "oPos";
1062 else if(index == 1) return "oFog";
1063 else if(index == 2) return "oPts";
1064 else ASSERT(false);
1065 case PARAMETER_ATTROUT: return "oD";
1066 case PARAMETER_TEXCRDOUT:
1067 // case PARAMETER_OUTPUT: return "";
1068 if(version < 0x0300) return "oT";
1069 else return "o";
1070 case PARAMETER_CONSTINT: return "i";
1071 case PARAMETER_COLOROUT: return "oC";
1072 case PARAMETER_DEPTHOUT: return "oDepth";
1073 case PARAMETER_SAMPLER: return "s";
1074 // case PARAMETER_CONST2: return "";
1075 // case PARAMETER_CONST3: return "";
1076 // case PARAMETER_CONST4: return "";
1077 case PARAMETER_CONSTBOOL: return "b";
1078 case PARAMETER_LOOP: return "aL";
1079 // case PARAMETER_TEMPFLOAT16: return "";
1080 case PARAMETER_MISCTYPE:
1081 switch(index)
1082 {
1083 case VPosIndex: return "vPos";
1084 case VFaceIndex: return "vFace";
1085 case InstanceIDIndex: return "iID";
1086 case VertexIDIndex: return "vID";
1087 default: ASSERT(false);
1088 }
1089 case PARAMETER_LABEL: return "l";
1090 case PARAMETER_PREDICATE: return "p0";
1091 case PARAMETER_FLOAT4LITERAL: return "";
1092 case PARAMETER_BOOL1LITERAL: return "";
1093 case PARAMETER_INT4LITERAL: return "";
1094 // case PARAMETER_VOID: return "";
1095 default:
1096 ASSERT(false);
1097 }
1098
1099 return "";
1100 }
1101
1102 bool Shader::Instruction::isBranch() const
1103 {
1104 return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1105 }
1106
1107 bool Shader::Instruction::isCall() const
1108 {
1109 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1110 }
1111
1112 bool Shader::Instruction::isBreak() const
1113 {
1114 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1115 }
1116
1117 bool Shader::Instruction::isLoop() const
1118 {
1119 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1120 }
1121
1122 bool Shader::Instruction::isEndLoop() const
1123 {
1124 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1125 }
1126
1127 bool Shader::Instruction::isPredicated() const
1128 {
1129 return predicate ||
1130 analysisBranch ||
1131 analysisBreak ||
1132 analysisContinue ||
1133 analysisLeave;
1134 }
1135
1136 Shader::Shader() : serialID(serialCounter++)
1137 {
1138 usedSamplers = 0;
1139 }
1140
1141 Shader::~Shader()
1142 {
1143 for(auto &inst : instruction)
1144 {
1145 delete inst;
1146 inst = 0;
1147 }
1148 }
1149
1150 void Shader::parse(const unsigned long *token)
1151 {
1152 minorVersion = (unsigned char)(token[0] & 0x000000FF);
1153 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1154 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1155
1156 int length = 0;
1157
1158 if(shaderType == SHADER_VERTEX)
1159 {
1160 length = VertexShader::validate(token);
1161 }
1162 else if(shaderType == SHADER_PIXEL)
1163 {
1164 length = PixelShader::validate(token);
1165 }
1166 else ASSERT(false);
1167
1168 ASSERT(length != 0);
1169 instruction.resize(length);
1170
1171 for(int i = 0; i < length; i++)
1172 {
1173 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token
1174 {
1175 int length = (*token & 0x7FFF0000) >> 16;
1176
1177 token += length + 1;
1178 }
1179
1180 int tokenCount = size(*token);
1181
1182 instruction[i] = new Instruction(token, tokenCount, majorVersion);
1183
1184 token += 1 + tokenCount;
1185 }
1186 }
1187
1188 int Shader::size(unsigned long opcode) const
1189 {
1190 return size(opcode, shaderModel);
1191 }
1192
1193 int Shader::size(unsigned long opcode, unsigned short shaderModel)
1194 {
1195 if(shaderModel > 0x0300)
1196 {
1197 ASSERT(false);
1198 }
1199
1200 static const signed char size[] =
1201 {
1202 0, // NOP = 0
1203 2, // MOV
1204 3, // ADD
1205 3, // SUB
1206 4, // MAD
1207 3, // MUL
1208 2, // RCP
1209 2, // RSQ
1210 3, // DP3
1211 3, // DP4
1212 3, // MIN
1213 3, // MAX
1214 3, // SLT
1215 3, // SGE
1216 2, // EXP
1217 2, // LOG
1218 2, // LIT
1219 3, // DST
1220 4, // LRP
1221 2, // FRC
1222 3, // M4x4
1223 3, // M4x3
1224 3, // M3x4
1225 3, // M3x3
1226 3, // M3x2
1227 1, // CALL
1228 2, // CALLNZ
1229 2, // LOOP
1230 0, // RET
1231 0, // ENDLOOP
1232 1, // LABEL
1233 2, // DCL
1234 3, // POW
1235 3, // CRS
1236 4, // SGN
1237 2, // ABS
1238 2, // NRM
1239 4, // SINCOS
1240 1, // REP
1241 0, // ENDREP
1242 1, // IF
1243 2, // IFC
1244 0, // ELSE
1245 0, // ENDIF
1246 0, // BREAK
1247 2, // BREAKC
1248 2, // MOVA
1249 2, // DEFB
1250 5, // DEFI
1251 -1, // 49
1252 -1, // 50
1253 -1, // 51
1254 -1, // 52
1255 -1, // 53
1256 -1, // 54
1257 -1, // 55
1258 -1, // 56
1259 -1, // 57
1260 -1, // 58
1261 -1, // 59
1262 -1, // 60
1263 -1, // 61
1264 -1, // 62
1265 -1, // 63
1266 1, // TEXCOORD = 64
1267 1, // TEXKILL
1268 1, // TEX
1269 2, // TEXBEM
1270 2, // TEXBEML
1271 2, // TEXREG2AR
1272 2, // TEXREG2GB
1273 2, // TEXM3x2PAD
1274 2, // TEXM3x2TEX
1275 2, // TEXM3x3PAD
1276 2, // TEXM3x3TEX
1277 -1, // RESERVED0
1278 3, // TEXM3x3SPEC
1279 2, // TEXM3x3VSPEC
1280 2, // EXPP
1281 2, // LOGP
1282 4, // CND
1283 5, // DEF
1284 2, // TEXREG2RGB
1285 2, // TEXDP3TEX
1286 2, // TEXM3x2DEPTH
1287 2, // TEXDP3
1288 2, // TEXM3x3
1289 1, // TEXDEPTH
1290 4, // CMP
1291 3, // BEM
1292 4, // DP2ADD
1293 2, // DSX
1294 2, // DSY
1295 5, // TEXLDD
1296 3, // SETP
1297 3, // TEXLDL
1298 2, // BREAKP
1299 -1, // 97
1300 -1, // 98
1301 -1, // 99
1302 -1, // 100
1303 -1, // 101
1304 -1, // 102
1305 -1, // 103
1306 -1, // 104
1307 -1, // 105
1308 -1, // 106
1309 -1, // 107
1310 -1, // 108
1311 -1, // 109
1312 -1, // 110
1313 -1, // 111
1314 -1, // 112
1315 };
1316
1317 int length = 0;
1318
1319 if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1320 {
1321 return (opcode & 0x7FFF0000) >> 16;
1322 }
1323
1324 if(opcode != OPCODE_PS_1_0 &&
1325 opcode != OPCODE_PS_1_1 &&
1326 opcode != OPCODE_PS_1_2 &&
1327 opcode != OPCODE_PS_1_3 &&
1328 opcode != OPCODE_PS_1_4 &&
1329 opcode != OPCODE_PS_2_0 &&
1330 opcode != OPCODE_PS_2_x &&
1331 opcode != OPCODE_PS_3_0 &&
1332 opcode != OPCODE_VS_1_0 &&
1333 opcode != OPCODE_VS_1_1 &&
1334 opcode != OPCODE_VS_2_0 &&
1335 opcode != OPCODE_VS_2_x &&
1336 opcode != OPCODE_VS_2_sw &&
1337 opcode != OPCODE_VS_3_0 &&
1338 opcode != OPCODE_VS_3_sw &&
1339 opcode != OPCODE_PHASE &&
1340 opcode != OPCODE_END)
1341 {
1342 if(shaderModel >= 0x0200)
1343 {
1344 length = (opcode & 0x0F000000) >> 24;
1345 }
1346 else
1347 {
1348 length = size[opcode & 0x0000FFFF];
1349 }
1350 }
1351
1352 if(length < 0)
1353 {
1354 ASSERT(false);
1355 }
1356
1357 if(shaderModel == 0x0104)
1358 {
1359 switch(opcode & 0x0000FFFF)
1360 {
1361 case OPCODE_TEX:
1362 length += 1;
1363 break;
1364 case OPCODE_TEXCOORD:
1365 length += 1;
1366 break;
1367 default:
1368 break;
1369 }
1370 }
1371
1372 return length;
1373 }
1374
1375 bool Shader::maskContainsComponent(int mask, int component)
1376 {
1377 return (mask & (1 << component)) != 0;
1378 }
1379
1380 bool Shader::swizzleContainsComponent(int swizzle, int component)
1381 {
1382 if((swizzle & 0x03) >> 0 == component) return true;
1383 if((swizzle & 0x0C) >> 2 == component) return true;
1384 if((swizzle & 0x30) >> 4 == component) return true;
1385 if((swizzle & 0xC0) >> 6 == component) return true;
1386
1387 return false;
1388 }
1389
1390 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1391 {
1392 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1393 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1394 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1395 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1396
1397 return false;
1398 }
1399
1400 bool Shader::containsDynamicBranching() const
1401 {
1402 return dynamicBranching;
1403 }
1404
1405 bool Shader::containsBreakInstruction() const
1406 {
1407 return containsBreak;
1408 }
1409
1410 bool Shader::containsContinueInstruction() const
1411 {
1412 return containsContinue;
1413 }
1414
1415 bool Shader::containsLeaveInstruction() const
1416 {
1417 return containsLeave;
1418 }
1419
1420 bool Shader::containsDefineInstruction() const
1421 {
1422 return containsDefine;
1423 }
1424
1425 bool Shader::usesSampler(int index) const
1426 {
1427 return (usedSamplers & (1 << index)) != 0;
1428 }
1429
1430 int Shader::getSerialID() const
1431 {
1432 return serialID;
1433 }
1434
1435 size_t Shader::getLength() const
1436 {
1437 return instruction.size();
1438 }
1439
1440 Shader::ShaderType Shader::getShaderType() const
1441 {
1442 return shaderType;
1443 }
1444
1445 unsigned short Shader::getShaderModel() const
1446 {
1447 return shaderModel;
1448 }
1449
1450 void Shader::print(const char *fileName, ...) const
1451 {
1452 char fullName[1024 + 1];
1453
1454 va_list vararg;
1455 va_start(vararg, fileName);
1456 vsnprintf(fullName, 1024, fileName, vararg);
1457 va_end(vararg);
1458
1459 std::ofstream file(fullName, std::ofstream::out);
1460
1461 for(const auto &inst : instruction)
1462 {
1463 file << inst->string(shaderType, shaderModel) << std::endl;
1464 }
1465 }
1466
1467 void Shader::printInstruction(int index, const char *fileName) const
1468 {
1469 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1470
1471 file << instruction[index]->string(shaderType, shaderModel) << std::endl;
1472 }
1473
1474 void Shader::append(Instruction *instruction)
1475 {
1476 this->instruction.push_back(instruction);
1477 }
1478
1479 void Shader::declareSampler(int i)
1480 {
1481 if(i >= 0 && i < 16)
1482 {
1483 usedSamplers |= 1 << i;
1484 }
1485 }
1486
1487 const Shader::Instruction *Shader::getInstruction(size_t i) const
1488 {
1489 ASSERT(i < instruction.size());
1490
1491 return instruction[i];
1492 }
1493
1494 void Shader::optimize()
1495 {
1496 optimizeLeave();
1497 optimizeCall();
1498 removeNull();
1499 }
1500
1501 void Shader::optimizeLeave()
1502 {
1503 // A return (leave) right before the end of a function or the shader can be removed
1504 for(unsigned int i = 0; i < instruction.size(); i++)
1505 {
1506 if(instruction[i]->opcode == OPCODE_LEAVE)
1507 {
1508 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1509 {
1510 instruction[i]->opcode = OPCODE_NULL;
1511 }
1512 }
1513 }
1514 }
1515
1516 void Shader::optimizeCall()
1517 {
1518 // Eliminate uncalled functions
1519 std::set<int> calledFunctions;
1520 bool rescan = true;
1521
1522 while(rescan)
1523 {
1524 calledFunctions.clear();
1525 rescan = false;
1526
1527 for(const auto &inst : instruction)
1528 {
1529 if(inst->isCall())
1530 {
1531 calledFunctions.insert(inst->dst.label);
1532 }
1533 }
1534
1535 if(!calledFunctions.empty())
1536 {
1537 for(unsigned int i = 0; i < instruction.size(); i++)
1538 {
1539 if(instruction[i]->opcode == OPCODE_LABEL)
1540 {
1541 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1542 {
1543 for( ; i < instruction.size(); i++)
1544 {
1545 Opcode oldOpcode = instruction[i]->opcode;
1546 instruction[i]->opcode = OPCODE_NULL;
1547
1548 if(oldOpcode == OPCODE_RET)
1549 {
1550 rescan = true;
1551 break;
1552 }
1553 }
1554 }
1555 }
1556 }
1557 }
1558 }
1559
1560 // Optimize the entry call
1561 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1562 {
1563 if(calledFunctions.size() == 1)
1564 {
1565 instruction[0]->opcode = OPCODE_NULL;
1566 instruction[1]->opcode = OPCODE_NULL;
1567
1568 for(size_t i = 2; i < instruction.size(); i++)
1569 {
1570 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1571 {
1572 instruction[i]->opcode = OPCODE_NULL;
1573 }
1574 }
1575 }
1576 }
1577 }
1578
1579 void Shader::removeNull()
1580 {
1581 size_t size = 0;
1582 for(size_t i = 0; i < instruction.size(); i++)
1583 {
1584 if(instruction[i]->opcode != OPCODE_NULL)
1585 {
1586 instruction[size] = instruction[i];
1587 size++;
1588 }
1589 else
1590 {
1591 delete instruction[i];
1592 }
1593 }
1594
1595 instruction.resize(size);
1596 }
1597
1598 void Shader::analyzeDirtyConstants()
1599 {
1600 dirtyConstantsF = 0;
1601 dirtyConstantsI = 0;
1602 dirtyConstantsB = 0;
1603
1604 for(const auto &inst : instruction)
1605 {
1606 switch(inst->opcode)
1607 {
1608 case OPCODE_DEF:
1609 if(inst->dst.index + 1 > dirtyConstantsF)
1610 {
1611 dirtyConstantsF = inst->dst.index + 1;
1612 }
1613 break;
1614 case OPCODE_DEFI:
1615 if(inst->dst.index + 1 > dirtyConstantsI)
1616 {
1617 dirtyConstantsI = inst->dst.index + 1;
1618 }
1619 break;
1620 case OPCODE_DEFB:
1621 if(inst->dst.index + 1 > dirtyConstantsB)
1622 {
1623 dirtyConstantsB = inst->dst.index + 1;
1624 }
1625 break;
1626 default:
1627 break;
1628 }
1629 }
1630 }
1631
1632 void Shader::analyzeDynamicBranching()
1633 {
1634 dynamicBranching = false;
1635 containsLeave = false;
1636 containsBreak = false;
1637 containsContinue = false;
1638 containsDefine = false;
1639
1640 // Determine global presence of branching instructions
1641 for(const auto &inst : instruction)
1642 {
1643 switch(inst->opcode)
1644 {
1645 case OPCODE_CALLNZ:
1646 case OPCODE_IF:
1647 case OPCODE_IFC:
1648 case OPCODE_BREAK:
1649 case OPCODE_BREAKC:
1650 case OPCODE_CMP:
1651 case OPCODE_BREAKP:
1652 case OPCODE_LEAVE:
1653 case OPCODE_CONTINUE:
1654 if(inst->src[0].type != PARAMETER_CONSTBOOL)
1655 {
1656 dynamicBranching = true;
1657 }
1658
1659 if(inst->opcode == OPCODE_LEAVE)
1660 {
1661 containsLeave = true;
1662 }
1663
1664 if(inst->isBreak())
1665 {
1666 containsBreak = true;
1667 }
1668
1669 if(inst->opcode == OPCODE_CONTINUE)
1670 {
1671 containsContinue = true;
1672 }
1673 case OPCODE_DEF:
1674 case OPCODE_DEFB:
1675 case OPCODE_DEFI:
1676 containsDefine = true;
1677 default:
1678 break;
1679 }
1680 }
1681
1682 // Conservatively determine which instructions are affected by dynamic branching
1683 int branchDepth = 0;
1684 int breakDepth = 0;
1685 int continueDepth = 0;
1686 bool leaveReturn = false;
1687 unsigned int functionBegin = 0;
1688
1689 for(unsigned int i = 0; i < instruction.size(); i++)
1690 {
1691 // If statements and loops
1692 if(instruction[i]->isBranch() || instruction[i]->isLoop())
1693 {
1694 branchDepth++;
1695 }
1696 else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
1697 {
1698 branchDepth--;
1699 }
1700
1701 if(branchDepth > 0)
1702 {
1703 instruction[i]->analysisBranch = true;
1704
1705 if(instruction[i]->isCall())
1706 {
1707 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1708 }
1709 }
1710
1711 // Break statemement
1712 if(instruction[i]->isBreak())
1713 {
1714 breakDepth++;
1715 }
1716
1717 if(breakDepth > 0)
1718 {
1719 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask
1720 {
1721 breakDepth++;
1722 }
1723 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1724 {
1725 breakDepth--;
1726 }
1727
1728 instruction[i]->analysisBreak = true;
1729
1730 if(instruction[i]->isCall())
1731 {
1732 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1733 }
1734 }
1735
1736 // Continue statement
1737 if(instruction[i]->opcode == OPCODE_CONTINUE)
1738 {
1739 continueDepth++;
1740 }
1741
1742 if(continueDepth > 0)
1743 {
1744 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask
1745 {
1746 continueDepth++;
1747 }
1748 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1749 {
1750 continueDepth--;
1751 }
1752
1753 instruction[i]->analysisContinue = true;
1754
1755 if(instruction[i]->isCall())
1756 {
1757 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1758 }
1759 }
1760
1761 // Return (leave) statement
1762 if(instruction[i]->opcode == OPCODE_LEAVE)
1763 {
1764 leaveReturn = true;
1765
1766 // Mark loop body instructions prior to the return statement
1767 for(unsigned int l = functionBegin; l < i; l++)
1768 {
1769 if(instruction[l]->isLoop())
1770 {
1771 for(unsigned int r = l + 1; r < i; r++)
1772 {
1773 instruction[r]->analysisLeave = true;
1774 }
1775
1776 break;
1777 }
1778 }
1779 }
1780 else if(instruction[i]->opcode == OPCODE_RET) // End of the function
1781 {
1782 leaveReturn = false;
1783 }
1784 else if(instruction[i]->opcode == OPCODE_LABEL)
1785 {
1786 functionBegin = i;
1787 }
1788
1789 if(leaveReturn)
1790 {
1791 instruction[i]->analysisLeave = true;
1792
1793 if(instruction[i]->isCall())
1794 {
1795 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1796 }
1797 }
1798 }
1799 }
1800
1801 void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1802 {
1803 bool marker = false;
1804 for(auto &inst : instruction)
1805 {
1806 if(!marker)
1807 {
1808 if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
1809 {
1810 marker = true;
1811 }
1812 }
1813 else
1814 {
1815 if(inst->opcode == OPCODE_RET)
1816 {
1817 break;
1818 }
1819 else if(inst->isCall())
1820 {
1821 markFunctionAnalysis(inst->dst.label, flag);
1822 }
1823
1824 inst->analysis |= flag;
1825 }
1826 }
1827 }
1828
1829 void Shader::analyzeSamplers()
1830 {
1831 for(const auto &inst : instruction)
1832 {
1833 switch(inst->opcode)
1834 {
1835 case OPCODE_TEX:
1836 case OPCODE_TEXBEM:
1837 case OPCODE_TEXBEML:
1838 case OPCODE_TEXREG2AR:
1839 case OPCODE_TEXREG2GB:
1840 case OPCODE_TEXM3X2TEX:
1841 case OPCODE_TEXM3X3TEX:
1842 case OPCODE_TEXM3X3SPEC:
1843 case OPCODE_TEXM3X3VSPEC:
1844 case OPCODE_TEXREG2RGB:
1845 case OPCODE_TEXDP3TEX:
1846 case OPCODE_TEXM3X2DEPTH:
1847 case OPCODE_TEXLDD:
1848 case OPCODE_TEXLDL:
1849 case OPCODE_TEXLOD:
1850 case OPCODE_TEXOFFSET:
1851 case OPCODE_TEXOFFSETBIAS:
1852 case OPCODE_TEXLODOFFSET:
1853 case OPCODE_TEXELFETCH:
1854 case OPCODE_TEXELFETCHOFFSET:
1855 case OPCODE_TEXGRAD:
1856 case OPCODE_TEXGRADOFFSET:
1857 {
1858 Parameter &dst = inst->dst;
1859 Parameter &src1 = inst->src[1];
1860
1861 if(majorVersion >= 2)
1862 {
1863 if(src1.type == PARAMETER_SAMPLER)
1864 {
1865 usedSamplers |= 1 << src1.index;
1866 }
1867 }
1868 else
1869 {
1870 usedSamplers |= 1 << dst.index;
1871 }
1872 }
1873 break;
1874 default:
1875 break;
1876 }
1877 }
1878 }
1879
1880 // Assigns a unique index to each call instruction, on a per label basis.
1881 // This is used to know what basic block to return to.
1882 void Shader::analyzeCallSites()
1883 {
1884 std::unordered_map<int, int> callSiteIndices;
1885
1886 for(auto &inst : instruction)
1887 {
1888 if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
1889 {
1890 inst->dst.callSite = callSiteIndices[inst->dst.label]++;
1891 }
1892 }
1893 }
1894
1895 void Shader::analyzeIndirectAddressing()
1896 {
1897 indirectAddressableTemporaries = false;
1898 indirectAddressableInput = false;
1899 indirectAddressableOutput = false;
1900
1901 for(const auto &inst : instruction)
1902 {
1903 if(inst->dst.rel.type != PARAMETER_VOID)
1904 {
1905 switch(inst->dst.type)
1906 {
1907 case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
1908 case PARAMETER_INPUT: indirectAddressableInput = true; break;
1909 case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
1910 default: break;
1911 }
1912 }
1913
1914 for(int j = 0; j < 3; j++)
1915 {
1916 if(inst->src[j].rel.type != PARAMETER_VOID)
1917 {
1918 switch(inst->src[j].type)
1919 {
1920 case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
1921 case PARAMETER_INPUT: indirectAddressableInput = true; break;
1922 case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
1923 default: break;
1924 }
1925 }
1926 }
1927 }
1928 }
1929
1930 // analyzeLimits analyzes the whole shader program to determine the deepest
1931 // nesting of control flow blocks and function calls. These calculations
1932 // are stored into the limits member, and is used by the programs to
1933 // allocate stack storage variables.
1934 void Shader::analyzeLimits()
1935 {
1936 typedef unsigned int FunctionID;
1937
1938 // Identifier of the function with the main entry point.
1939 constexpr FunctionID MAIN_ID = 0xF0000000;
1940
1941 // Invalid function identifier.
1942 constexpr FunctionID INVALID_ID = ~0U;
1943
1944 // Limits on a single function.
1945 struct FunctionLimits
1946 {
1947 uint32_t loops = 0; // maximum nested loop and reps.
1948 uint32_t ifs = 0; // maximum nested if statements.
1949 uint32_t stack = 0; // maximum call depth.
1950 };
1951
1952 // Information about a single function in the shader.
1953 struct FunctionInfo
1954 {
1955 FunctionLimits limits;
1956 std::unordered_set<FunctionID> calls; // What this function calls.
1957 bool reachable; // Is this function reachable?
1958 };
1959
1960 std::unordered_map<FunctionID, FunctionInfo> functions;
1961
1962 uint32_t maxLabel = 0; // Highest label found
1963
1964 // Add a definition for the main entry point.
1965 // This starts at the beginning of the instructions and does not have
1966 // its own label.
1967 functions[MAIN_ID] = FunctionInfo();
1968 functions[MAIN_ID].reachable = true;
1969
1970 // Begin by doing a pass over the instructions to identify all the
1971 // functions. These start with a label and end with a ret. Note that
1972 // functions can have labels within them.
1973 FunctionID currentFunc = MAIN_ID;
1974 for(auto &inst : instruction)
1975 {
1976 switch (inst->opcode)
1977 {
1978 case OPCODE_LABEL:
1979 if (currentFunc == INVALID_ID)
1980 {
1981 // Start of a function.
1982 FunctionID id = inst->dst.label;
1983 ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
1984 functions[id] = FunctionInfo();
1985 }
1986 break;
1987 case OPCODE_RET:
1988 currentFunc = INVALID_ID;
1989 break;
1990 default:
1991 break;
1992 }
1993 }
1994
1995 // Limits for the currently analyzed function.
1996 FunctionLimits currentLimits;
1997
1998 // Now loop over the instructions gathering the limits of each of the
1999 // functions.
2000 currentFunc = MAIN_ID;
2001 for(size_t i = 0; i < instruction.size(); i++)
2002 {
2003 const auto& inst = instruction[i];
2004 switch (inst->opcode)
2005 {
2006 case OPCODE_LABEL:
2007 {
2008 maxLabel = std::max(maxLabel, inst->dst.label);
2009 if (currentFunc == INVALID_ID)
2010 {
2011 // Start of a function.
2012 FunctionID id = inst->dst.label;
2013 ASSERT(functions.find(id) != functions.end()); // Sanity check
2014 currentFunc = id;
2015 }
2016 break;
2017 }
2018 case OPCODE_CALL:
2019 case OPCODE_CALLNZ:
2020 {
2021 ASSERT(currentFunc != INVALID_ID);
2022 FunctionID id = inst->dst.label;
2023 ASSERT(functions.find(id) != functions.end());
2024 functions[currentFunc].calls.emplace(id);
2025 functions[id].reachable = true;
2026 break;
2027 }
2028 case OPCODE_LOOP:
2029 case OPCODE_REP:
2030 case OPCODE_WHILE:
2031 case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
2032 {
2033 ASSERT(currentFunc != INVALID_ID);
2034 ++currentLimits.loops;
2035 auto& func = functions[currentFunc];
2036 func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
2037 break;
2038 }
2039 case OPCODE_ENDLOOP:
2040 case OPCODE_ENDREP:
2041 case OPCODE_ENDWHILE:
2042 case OPCODE_ENDSWITCH:
2043 {
2044 ASSERT(currentLimits.loops > 0);
2045 --currentLimits.loops;
2046 break;
2047 }
2048 case OPCODE_IF:
2049 case OPCODE_IFC:
2050 {
2051 ASSERT(currentFunc != INVALID_ID);
2052 ++currentLimits.ifs;
2053 auto& func = functions[currentFunc];
2054 func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
2055 break;
2056 }
2057 case OPCODE_ENDIF:
2058 {
2059 ASSERT(currentLimits.ifs > 0);
2060 currentLimits.ifs--;
2061 break;
2062 }
2063 case OPCODE_RET:
2064 {
2065 // Must be in a function to return.
2066 ASSERT(currentFunc != INVALID_ID);
2067
2068 // All stacks should be popped before returning.
2069 ASSERT(currentLimits.ifs == 0);
2070 ASSERT(currentLimits.loops == 0);
2071
2072 currentFunc = INVALID_ID;
2073 currentLimits = FunctionLimits();
2074 break;
2075 }
2076 default:
2077 break;
2078 }
2079 }
2080
2081 // Assert that every function is reachable (these should have been
2082 // stripped in earlier stages). Unreachable functions may be code
2083 // generated, but their own limits are not considered below, potentially
2084 // causing OOB indexing in later stages.
2085 // If we ever find cases where there are unreachable functions, we can
2086 // replace this assert with NO-OPing or stripping out the dead
2087 // functions.
2088 for (auto it : functions) { ASSERT(it.second.reachable); }
2089
2090 // We have now gathered all the information about each of the functions
2091 // in the shader. Traverse these functions starting from the main
2092 // function to calculate the maximum limits across the entire shader.
2093
2094 std::unordered_set<FunctionID> visited;
2095 std::function<Limits(FunctionID)> traverse;
2096 traverse = [&](FunctionID id) -> Limits
2097 {
2098 const auto& func = functions[id];
2099 ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
2100 visited.insert(id);
2101 Limits limits;
2102 limits.stack = 1;
2103 for (auto callee : func.calls)
2104 {
2105 auto calleeLimits = traverse(callee);
2106 limits.loops = std::max(limits.loops, calleeLimits.loops);
2107 limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
2108 limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
2109 }
2110 visited.erase(id);
2111
2112 limits.loops += func.limits.loops;
2113 limits.ifs += func.limits.ifs;
2114 return limits;
2115 };
2116
2117 limits = traverse(MAIN_ID);
2118 limits.maxLabel = maxLabel;
2119 }
2120}
2121