1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <functional>
18#include <vector>
19
20namespace SkSL {
21
22#if defined(SK_ENABLE_SKSL_INTERPRETER)
23
24constexpr int VecWidth = ByteCode::kVecWidth;
25
26struct Interpreter {
27
28using F32 = skvx::Vec<VecWidth, float>;
29using I32 = skvx::Vec<VecWidth, int32_t>;
30using U32 = skvx::Vec<VecWidth, uint32_t>;
31
32#define READ8() (*(ip++))
33#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
34#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
35#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
36 sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
37
38#define DISASSEMBLE_COUNT(op, text) \
39 case ByteCodeInstruction::op: printf(text " %d", READ8()); break;
40
41#define DISASSEMBLE_COUNT_SLOT(op, text) \
42 case ByteCodeInstruction::op: { \
43 int N = READ8(), \
44 slot = READ8(); \
45 printf(text " %d [%d]", N, slot); \
46 } break;
47
48static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
49 auto inst = READ_INST();
50 printf("%02x ", (int)inst);
51 switch (inst) {
52 DISASSEMBLE_COUNT(kAddF, "addf")
53 DISASSEMBLE_COUNT(kAddI, "addi")
54 DISASSEMBLE_COUNT(kAndB, "andb")
55 DISASSEMBLE_COUNT(kATan, "atan")
56 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
57 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
58 case ByteCodeInstruction::kCallExternal: {
59 int argumentCount = READ8();
60 int returnCount = READ8();
61 int externalValue = READ8();
62 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
63 break;
64 }
65 DISASSEMBLE_COUNT(kCeil, "ceil")
66 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
67 DISASSEMBLE_COUNT(kCompareIEQ, "compareieq")
68 DISASSEMBLE_COUNT(kCompareINEQ, "compareineq")
69 DISASSEMBLE_COUNT(kCompareFEQ, "comparefeq")
70 DISASSEMBLE_COUNT(kCompareFNEQ, "comparefneq")
71 DISASSEMBLE_COUNT(kCompareFGT, "comparefgt")
72 DISASSEMBLE_COUNT(kCompareFGTEQ, "comparefgteq")
73 DISASSEMBLE_COUNT(kCompareFLT, "compareflt")
74 DISASSEMBLE_COUNT(kCompareFLTEQ, "compareflteq")
75 DISASSEMBLE_COUNT(kCompareSGT, "comparesgt")
76 DISASSEMBLE_COUNT(kCompareSGTEQ, "comparesgteq")
77 DISASSEMBLE_COUNT(kCompareSLT, "compareslt")
78 DISASSEMBLE_COUNT(kCompareSLTEQ, "compareslteq")
79 DISASSEMBLE_COUNT(kCompareUGT, "compareugt")
80 DISASSEMBLE_COUNT(kCompareUGTEQ, "compareugteq")
81 DISASSEMBLE_COUNT(kCompareULT, "compareult")
82 DISASSEMBLE_COUNT(kCompareULTEQ, "compareulteq")
83 DISASSEMBLE_COUNT(kConvertFtoI, "convertftoi")
84 DISASSEMBLE_COUNT(kConvertStoF, "convertstof")
85 DISASSEMBLE_COUNT(kConvertUtoF, "convertutof")
86 DISASSEMBLE_COUNT(kCos, "cos")
87 DISASSEMBLE_COUNT(kDivideF, "dividef")
88 DISASSEMBLE_COUNT(kDivideS, "divideS")
89 DISASSEMBLE_COUNT(kDivideU, "divideu")
90 DISASSEMBLE_COUNT(kDup, "dup")
91 DISASSEMBLE_COUNT(kFloor, "floor")
92 DISASSEMBLE_COUNT(kFract, "fract")
93 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
94 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
95 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
96 DISASSEMBLE_COUNT(kLerp, "lerp")
97 DISASSEMBLE_COUNT_SLOT(kLoad, "load")
98 DISASSEMBLE_COUNT_SLOT(kLoadGlobal, "loadglobal")
99 DISASSEMBLE_COUNT_SLOT(kLoadUniform, "loaduniform")
100 DISASSEMBLE_COUNT(kLoadExtended, "loadextended")
101 DISASSEMBLE_COUNT(kLoadExtendedGlobal, "loadextendedglobal")
102 DISASSEMBLE_COUNT(kLoadExtendedUniform, "loadextendeduniform")
103 case ByteCodeInstruction::kLoadFragCoord: printf("loadfragcoord"); break;
104 case ByteCodeInstruction::kMatrixToMatrix: {
105 int srcCols = READ8();
106 int srcRows = READ8();
107 int dstCols = READ8();
108 int dstRows = READ8();
109 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
110 break;
111 }
112 case ByteCodeInstruction::kMatrixMultiply: {
113 int lCols = READ8();
114 int lRows = READ8();
115 int rCols = READ8();
116 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
117 break;
118 }
119 DISASSEMBLE_COUNT(kMaxF, "maxf")
120 DISASSEMBLE_COUNT(kMaxS, "maxs")
121 DISASSEMBLE_COUNT(kMinF, "minf")
122 DISASSEMBLE_COUNT(kMinS, "mins")
123 DISASSEMBLE_COUNT(kMix, "mix")
124 DISASSEMBLE_COUNT(kMultiplyF, "multiplyf")
125 DISASSEMBLE_COUNT(kMultiplyI, "multiplyi")
126 DISASSEMBLE_COUNT(kNegateF, "negatef")
127 DISASSEMBLE_COUNT(kNegateI, "negatei")
128 DISASSEMBLE_COUNT(kNotB, "notb")
129 DISASSEMBLE_COUNT(kOrB, "orb")
130 DISASSEMBLE_COUNT(kPop, "pop")
131 DISASSEMBLE_COUNT(kPow, "pow")
132 case ByteCodeInstruction::kPushImmediate: {
133 uint32_t v = READ32();
134 union { uint32_t u; float f; } pun = { v };
135 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
136 break;
137 }
138 DISASSEMBLE_COUNT_SLOT(kReadExternal, "readexternal")
139 DISASSEMBLE_COUNT(kRemainderF, "remainderf")
140 DISASSEMBLE_COUNT(kRemainderS, "remainders")
141 DISASSEMBLE_COUNT(kRemainderU, "remainderu")
142 DISASSEMBLE_COUNT(kReserve, "reserve")
143 DISASSEMBLE_COUNT(kReturn, "return")
144 case ByteCodeInstruction::kSample: printf("sample %d", READ8()); break;
145 case ByteCodeInstruction::kSampleExplicit: printf("sampleExplicit %d", READ8()); break;
146 case ByteCodeInstruction::kSampleMatrix: printf("sampleMatrix %d", READ8()); break;
147 case ByteCodeInstruction::kScalarToMatrix: {
148 int cols = READ8();
149 int rows = READ8();
150 printf("scalartomatrix %dx%d", cols, rows);
151 break;
152 }
153 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
154 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
155 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
156 DISASSEMBLE_COUNT(kSin, "sin")
157 DISASSEMBLE_COUNT(kSqrt, "sqrt")
158 DISASSEMBLE_COUNT_SLOT(kStore, "store")
159 DISASSEMBLE_COUNT_SLOT(kStoreGlobal, "storeglobal")
160 DISASSEMBLE_COUNT(kStoreExtended, "storeextended")
161 DISASSEMBLE_COUNT(kStoreExtendedGlobal, "storeextendedglobal")
162 DISASSEMBLE_COUNT(kSubtractF, "subtractf")
163 DISASSEMBLE_COUNT(kSubtractI, "subtracti")
164 case ByteCodeInstruction::kSwizzle: {
165 printf("swizzle %d, ", READ8());
166 int count = READ8();
167 printf("%d", count);
168 for (int i = 0; i < count; ++i) {
169 printf(", %d", READ8());
170 }
171 break;
172 }
173 DISASSEMBLE_COUNT(kTan, "tan")
174 DISASSEMBLE_COUNT_SLOT(kWriteExternal, "writeexternal")
175 DISASSEMBLE_COUNT(kXorB, "xorb")
176 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
177 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
178 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
179 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
180 case ByteCodeInstruction::kBranchIfAllFalse:
181 printf("branchifallfalse %d", READ16());
182 break;
183 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
184 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
185 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
186 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
187 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
188 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
189 default:
190 ip -= sizeof(ByteCodeInstruction);
191 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
192 SkASSERT(false);
193 }
194 return ip;
195}
196
197// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
198// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
199// TODO: Would it be better to do this with a select of (lane, 1) based on mask?
200#define VECTOR_BINARY_MASKED_OP(inst, field, op) \
201 case ByteCodeInstruction::inst: { \
202 int count = READ8(); \
203 for (int i = count; i > 0; --i) { \
204 for (int j = 0; j < VecWidth; ++j) { \
205 if (mask()[j]) { \
206 sp[-count].field[j] op ## = sp[0].field[j]; \
207 } \
208 } \
209 POP(); \
210 } \
211 } continue;
212
213#define VECTOR_BINARY_OP(inst, field, op) \
214 case ByteCodeInstruction::inst: { \
215 int count = READ8(); \
216 for (int i = count; i > 0; --i) { \
217 sp[-count] = sp[-count].field op sp[0].field; \
218 POP(); \
219 } \
220 } continue;
221
222#define VECTOR_BINARY_FN(inst, field, fn) \
223 case ByteCodeInstruction::inst: { \
224 int count = READ8(); \
225 for (int i = count; i > 0; --i) { \
226 sp[-count] = fn(sp[-count].field, sp[0].field); \
227 POP(); \
228 } \
229 } continue;
230
231#define VECTOR_UNARY_FN(inst, fn, field) \
232 case ByteCodeInstruction::inst: { \
233 int count = READ8(); \
234 for (int i = count; i --> 0; ) { \
235 sp[-i] = fn(sp[-i].field); \
236 } \
237 } continue;
238
239union VValue {
240 VValue() {}
241 VValue(F32 f) : fFloat(f) {}
242 VValue(I32 s) : fSigned(s) {}
243 VValue(U32 u) : fUnsigned(u) {}
244
245 F32 fFloat;
246 I32 fSigned;
247 U32 fUnsigned;
248};
249
250struct StackFrame {
251 const uint8_t* fCode;
252 const uint8_t* fIP;
253 VValue* fStack;
254 int fParameterCount;
255};
256
257static F32 VecMod(F32 a, F32 b) {
258 return a - skvx::trunc(a / b) * b;
259}
260
261#define spf(index) sp[index].fFloat
262
263static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
264 int baseIndex, I32 mask) {
265 int argumentCount = READ8();
266 int returnCount = READ8();
267 int target = READ8();
268 ExternalValue* v = byteCode->fExternalValues[target];
269 sp -= argumentCount - 1;
270
271 float tmpArgs[4];
272 float tmpReturn[4];
273 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
274 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
275
276 for (int i = 0; i < VecWidth; ++i) {
277 if (mask[i]) {
278 for (int j = 0; j < argumentCount; ++j) {
279 tmpArgs[j] = sp[j].fFloat[i];
280 }
281 v->call(baseIndex + i, tmpArgs, tmpReturn);
282 for (int j = 0; j < returnCount; ++j) {
283 sp[j].fFloat[i] = tmpReturn[j];
284 }
285 }
286 }
287 sp += returnCount - 1;
288}
289
290static void Inverse2x2(VValue* sp) {
291 F32 a = sp[-3].fFloat,
292 b = sp[-2].fFloat,
293 c = sp[-1].fFloat,
294 d = sp[ 0].fFloat;
295 F32 idet = F32(1) / (a*d - b*c);
296 sp[-3].fFloat = d * idet;
297 sp[-2].fFloat = -b * idet;
298 sp[-1].fFloat = -c * idet;
299 sp[ 0].fFloat = a * idet;
300}
301
302static void Inverse3x3(VValue* sp) {
303 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
304 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
305 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
306 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
307 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
308 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
309 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
310 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
311 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
312 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
313 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
314 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
315 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
316 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
317}
318
319static void Inverse4x4(VValue* sp) {
320 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
321 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
322 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
323 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
324
325 F32 b00 = a00 * a11 - a01 * a10,
326 b01 = a00 * a12 - a02 * a10,
327 b02 = a00 * a13 - a03 * a10,
328 b03 = a01 * a12 - a02 * a11,
329 b04 = a01 * a13 - a03 * a11,
330 b05 = a02 * a13 - a03 * a12,
331 b06 = a20 * a31 - a21 * a30,
332 b07 = a20 * a32 - a22 * a30,
333 b08 = a20 * a33 - a23 * a30,
334 b09 = a21 * a32 - a22 * a31,
335 b10 = a21 * a33 - a23 * a31,
336 b11 = a22 * a33 - a23 * a32;
337
338 F32 idet = F32(1) /
339 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
340
341 b00 *= idet;
342 b01 *= idet;
343 b02 *= idet;
344 b03 *= idet;
345 b04 *= idet;
346 b05 *= idet;
347 b06 *= idet;
348 b07 *= idet;
349 b08 *= idet;
350 b09 *= idet;
351 b10 *= idet;
352 b11 *= idet;
353
354 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
355 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
356 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
357 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
358 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
359 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
360 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
361 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
362 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
363 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
364 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
365 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
366 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
367 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
368 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
369 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
370}
371
372static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
373 float* outReturn[], VValue globals[], const float uniforms[],
374 bool stripedOutput, int N, int baseIndex) {
375 // Needs to be the first N non-negative integers, at least as large as VecWidth
376 static const Interpreter::I32 gLanes = {
377 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
378 };
379
380 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
381
382 #define POP() (*(sp--))
383 #define PUSH(v) (sp[1] = v, ++sp)
384
385 const uint8_t* code = f->fCode.data();
386 const uint8_t* ip = code;
387 std::vector<StackFrame> frames;
388
389 I32 condStack[16]; // Independent condition masks
390 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
391 I32 contStack[16]; // Continue flags for loops
392 I32 loopStack[16]; // Loop execution masks
393 condStack[0] = maskStack[0] = (gLanes < N);
394 contStack[0] = I32( 0);
395 loopStack[0] = I32(~0);
396 I32* condPtr = condStack;
397 I32* maskPtr = maskStack;
398 I32* contPtr = contStack;
399 I32* loopPtr = loopStack;
400
401 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
402 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
403 return false;
404 }
405
406 auto mask = [&]() { return *maskPtr & *loopPtr; };
407
408 for (;;) {
409#ifdef TRACE
410 printf("at %3d ", (int) (ip - code));
411 disassemble_instruction(ip);
412 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
413#endif
414 ByteCodeInstruction inst = READ_INST();
415 switch (inst) {
416
417 VECTOR_BINARY_OP(kAddF, fFloat, +)
418 VECTOR_BINARY_OP(kAddI, fSigned, +)
419
420 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
421 VECTOR_BINARY_OP(kAndB, fSigned, &)
422 VECTOR_BINARY_OP(kOrB, fSigned, |)
423 VECTOR_BINARY_OP(kXorB, fSigned, ^)
424 VECTOR_UNARY_FN(kNotB, std::bit_not<>{}, fSigned)
425
426 case ByteCodeInstruction::kBranch:
427 ip = code + READ16();
428 continue;
429
430 case ByteCodeInstruction::kCall: {
431 // Precursor code reserved space for the return value, and pushed all parameters to
432 // the stack. Update our bottom of stack to point at the first parameter, and our
433 // sp to point past those parameters (plus space for locals).
434 int target = READ8();
435 const ByteCodeFunction* f = byteCode->fFunctions[target].get();
436 if (skvx::any(mask())) {
437 frames.push_back({ code, ip, stack, f->fParameterCount });
438 ip = code = f->fCode.data();
439 stack = sp - f->fParameterCount + 1;
440 sp = stack + f->fParameterCount + f->fLocalCount - 1;
441 // As we did in runStriped(), zero locals so they're safe to mask-store into.
442 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
443 stack[i].fFloat = 0.0f;
444 }
445 }
446 } continue;
447
448 case ByteCodeInstruction::kCallExternal:
449 CallExternal(byteCode, ip, sp, baseIndex, mask());
450 continue;
451
452 VECTOR_UNARY_FN(kCeil, skvx::ceil, fFloat)
453
454 case ByteCodeInstruction::kClampIndex: {
455 int length = READ8();
456 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
457 return false;
458 }
459 } continue;
460
461 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
462 VECTOR_BINARY_OP(kCompareFEQ, fFloat, ==)
463 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
464 VECTOR_BINARY_OP(kCompareFNEQ, fFloat, !=)
465 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
466 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
467 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
468 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
469 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
470 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
471 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
472 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
473 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
474 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
475 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
476 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
477
478 VECTOR_UNARY_FN(kConvertFtoI, skvx::cast<int>, fFloat)
479 VECTOR_UNARY_FN(kConvertStoF, skvx::cast<float>, fSigned)
480 VECTOR_UNARY_FN(kConvertUtoF, skvx::cast<float>, fUnsigned)
481
482 VECTOR_UNARY_FN(kCos, skvx::cos, fFloat)
483
484 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
485 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
486 VECTOR_BINARY_OP(kDivideF, fFloat, /)
487
488 case ByteCodeInstruction::kDup: {
489 int count = READ8();
490 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
491 sp += count;
492 } continue;
493
494 VECTOR_UNARY_FN(kFloor, skvx::floor, fFloat)
495 VECTOR_UNARY_FN(kFract, skvx::fract, fFloat)
496
497 case ByteCodeInstruction::kInverse2x2:
498 Inverse2x2(sp);
499 continue;
500 case ByteCodeInstruction::kInverse3x3:
501 Inverse3x3(sp);
502 continue;
503 case ByteCodeInstruction::kInverse4x4:
504 Inverse4x4(sp);
505 continue;
506
507 case ByteCodeInstruction::kLerp: {
508 int count = READ8();
509 VValue* T = sp - count + 1,
510 * B = T - count,
511 * A = B - count;
512 for (int i = count; i --> 0; ) {
513 A[i].fFloat += (B[i].fFloat - A[i].fFloat) * T[i].fFloat;
514 }
515 sp -= 2 * count;
516 } continue;
517
518 case ByteCodeInstruction::kLoad: {
519 int count = READ8(),
520 slot = READ8();
521 memcpy(sp + 1, stack + slot, count * sizeof(VValue));
522 sp += count;
523 } continue;
524
525 case ByteCodeInstruction::kLoadGlobal: {
526 int count = READ8(),
527 slot = READ8();
528 memcpy(sp + 1, globals + slot, count * sizeof(VValue));
529 sp += count;
530 } continue;
531
532 case ByteCodeInstruction::kLoadUniform: {
533 int count = READ8(),
534 slot = READ8();
535 for (int i = 0; i < count; ++i) {
536 sp[i + 1].fFloat = uniforms[slot + i];
537 }
538 sp += count;
539 } continue;
540
541 case ByteCodeInstruction::kLoadExtended: {
542 int count = READ8();
543 I32 src = POP().fSigned;
544 I32 m = mask();
545 for (int i = 0; i < count; ++i) {
546 for (int j = 0; j < VecWidth; ++j) {
547 if (m[j]) {
548 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
549 }
550 }
551 }
552 sp += count;
553 } continue;
554
555 case ByteCodeInstruction::kLoadExtendedGlobal: {
556 int count = READ8();
557 I32 src = POP().fSigned;
558 I32 m = mask();
559 for (int i = 0; i < count; ++i) {
560 for (int j = 0; j < VecWidth; ++j) {
561 if (m[j]) {
562 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
563 }
564 }
565 }
566 sp += count;
567 } continue;
568
569 case ByteCodeInstruction::kLoadExtendedUniform: {
570 int count = READ8();
571 I32 src = POP().fSigned;
572 I32 m = mask();
573 for (int i = 0; i < count; ++i) {
574 for (int j = 0; j < VecWidth; ++j) {
575 if (m[j]) {
576 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
577 }
578 }
579 }
580 sp += count;
581 } continue;
582
583 case ByteCodeInstruction::kMatrixToMatrix: {
584 int srcCols = READ8();
585 int srcRows = READ8();
586 int dstCols = READ8();
587 int dstRows = READ8();
588 SkASSERT(srcCols >= 2 && srcCols <= 4);
589 SkASSERT(srcRows >= 2 && srcRows <= 4);
590 SkASSERT(dstCols >= 2 && dstCols <= 4);
591 SkASSERT(dstRows >= 2 && dstRows <= 4);
592 F32 tmp[16];
593 memset(tmp, 0, sizeof(tmp));
594 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
595 for (int c = srcCols - 1; c >= 0; --c) {
596 for (int r = srcRows - 1; r >= 0; --r) {
597 tmp[c*4 + r] = POP().fFloat;
598 }
599 }
600 for (int c = 0; c < dstCols; ++c) {
601 for (int r = 0; r < dstRows; ++r) {
602 PUSH(tmp[c*4 + r]);
603 }
604 }
605 } continue;
606
607 case ByteCodeInstruction::kMatrixMultiply: {
608 int lCols = READ8();
609 int lRows = READ8();
610 int rCols = READ8();
611 int rRows = lCols;
612 F32 tmp[16] = { 0.0f };
613 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
614 F32* A = B - (lCols * lRows);
615 for (int c = 0; c < rCols; ++c) {
616 for (int r = 0; r < lRows; ++r) {
617 for (int j = 0; j < lCols; ++j) {
618 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
619 }
620 }
621 }
622 sp -= (lCols * lRows) + (rCols * rRows);
623 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
624 sp += (rCols * lRows);
625 } continue;
626
627 VECTOR_BINARY_FN(kMaxF, fFloat, skvx::max)
628 VECTOR_BINARY_FN(kMaxS, fSigned, skvx::max)
629 VECTOR_BINARY_FN(kMinF, fFloat, skvx::min)
630 VECTOR_BINARY_FN(kMinS, fSigned, skvx::min)
631
632 case ByteCodeInstruction::kMix: {
633 int count = READ8();
634 for (int i = count; i --> 0; ) {
635 // GLSL's arguments are mix(else, true, cond)
636 sp[-(2*count + i)] = skvx::if_then_else(sp[-( i)].fSigned,
637 sp[-( count + i)].fFloat,
638 sp[-(2*count + i)].fFloat);
639 }
640 sp -= 2 * count;
641 } continue;
642
643 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
644 VECTOR_BINARY_OP(kMultiplyF, fFloat, *)
645
646 VECTOR_UNARY_FN(kNegateF, std::negate<>{}, fFloat)
647 VECTOR_UNARY_FN(kNegateI, std::negate<>{}, fSigned)
648
649 case ByteCodeInstruction::kPop:
650 sp -= READ8();
651 continue;
652
653 VECTOR_BINARY_FN(kPow, fFloat, skvx::pow)
654
655 case ByteCodeInstruction::kPushImmediate:
656 PUSH(U32(READ32()));
657 continue;
658
659 case ByteCodeInstruction::kReadExternal: {
660 int count = READ8(),
661 slot = READ8();
662 SkASSERT(count <= 4);
663 float tmp[4];
664 I32 m = mask();
665 for (int i = 0; i < VecWidth; ++i) {
666 if (m[i]) {
667 byteCode->fExternalValues[slot]->read(baseIndex + i, tmp);
668 for (int j = 0; j < count; ++j) {
669 sp[j + 1].fFloat[i] = tmp[j];
670 }
671 }
672 }
673 sp += count;
674 } continue;
675
676 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
677 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
678 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
679
680 case ByteCodeInstruction::kReserve:
681 sp += READ8();
682 continue;
683
684 case ByteCodeInstruction::kReturn: {
685 int count = READ8();
686 if (frames.empty()) {
687 if (outReturn) {
688 VValue* src = sp - count + 1;
689 if (stripedOutput) {
690 for (int i = 0; i < count; ++i) {
691 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
692 ++src;
693 }
694 } else {
695 float* outPtr = outReturn[0];
696 for (int i = 0; i < count; ++i) {
697 for (int j = 0; j < N; ++j) {
698 outPtr[count * j] = src->fFloat[j];
699 }
700 ++outPtr;
701 ++src;
702 }
703 }
704 }
705 return true;
706 } else {
707 // When we were called, the caller reserved stack space for their copy of our
708 // return value, then 'stack' was positioned after that, where our parameters
709 // were placed. Copy our return values to their reserved area.
710 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
711
712 // Now move the stack pointer to the end of the passed-in parameters. This odd
713 // calling convention requires the caller to pop the arguments after calling,
714 // but allows them to store any out-parameters back during that unwinding.
715 // After that sequence finishes, the return value will be the top of the stack.
716 const StackFrame& frame(frames.back());
717 sp = stack + frame.fParameterCount - 1;
718 stack = frame.fStack;
719 code = frame.fCode;
720 ip = frame.fIP;
721 frames.pop_back();
722 }
723 } continue;
724
725 case ByteCodeInstruction::kScalarToMatrix: {
726 int cols = READ8();
727 int rows = READ8();
728 VValue v = POP();
729 for (int c = 0; c < cols; ++c) {
730 for (int r = 0; r < rows; ++r) {
731 PUSH(c == r ? v : F32(0.0f));
732 }
733 }
734 } continue;
735
736 case ByteCodeInstruction::kShiftLeft:
737 sp[0] = sp[0].fSigned << READ8();
738 continue;
739 case ByteCodeInstruction::kShiftRightS:
740 sp[0] = sp[0].fSigned >> READ8();
741 continue;
742 case ByteCodeInstruction::kShiftRightU:
743 sp[0] = sp[0].fUnsigned >> READ8();
744 continue;
745
746 VECTOR_UNARY_FN(kSin, skvx::sin, fFloat)
747 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
748
749 case ByteCodeInstruction::kStore: {
750 int count = READ8(),
751 slot = READ8();
752 auto m = mask();
753 for (int i = count; i --> 0; ) {
754 stack[slot+i] = skvx::if_then_else(m, POP().fFloat, stack[slot+i].fFloat);
755 }
756 } continue;
757
758 case ByteCodeInstruction::kStoreGlobal: {
759 int count = READ8(),
760 slot = READ8();
761 auto m = mask();
762 for (int i = count; i --> 0; ) {
763 globals[slot+i] = skvx::if_then_else(m, POP().fFloat, globals[slot+i].fFloat);
764 }
765 } continue;
766
767 case ByteCodeInstruction::kStoreExtended: {
768 int count = READ8();
769 I32 target = POP().fSigned;
770 VValue* src = sp - count + 1;
771 I32 m = mask();
772 for (int i = 0; i < count; ++i) {
773 for (int j = 0; j < VecWidth; ++j) {
774 if (m[j]) {
775 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
776 }
777 }
778 }
779 sp -= count;
780 } continue;
781
782 case ByteCodeInstruction::kStoreExtendedGlobal: {
783 int count = READ8();
784 I32 target = POP().fSigned;
785 VValue* src = sp - count + 1;
786 I32 m = mask();
787 for (int i = 0; i < count; ++i) {
788 for (int j = 0; j < VecWidth; ++j) {
789 if (m[j]) {
790 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
791 }
792 }
793 }
794 sp -= count;
795 } continue;
796
797 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
798 VECTOR_BINARY_OP(kSubtractF, fFloat, -)
799
800 case ByteCodeInstruction::kSwizzle: {
801 VValue tmp[4];
802 for (int i = READ8() - 1; i >= 0; --i) {
803 tmp[i] = POP();
804 }
805 for (int i = READ8() - 1; i >= 0; --i) {
806 PUSH(tmp[READ8()]);
807 }
808 } continue;
809
810 VECTOR_UNARY_FN(kATan, skvx::atan, fFloat)
811 VECTOR_UNARY_FN(kTan, skvx::tan, fFloat)
812
813 case ByteCodeInstruction::kWriteExternal: {
814 int count = READ8(),
815 slot = READ8();
816 SkASSERT(count <= 4);
817 float tmp[4];
818 I32 m = mask();
819 sp -= count;
820 for (int i = 0; i < VecWidth; ++i) {
821 if (m[i]) {
822 for (int j = 0; j < count; ++j) {
823 tmp[j] = sp[j + 1].fFloat[i];
824 }
825 byteCode->fExternalValues[slot]->write(baseIndex + i, tmp);
826 }
827 }
828 } continue;
829
830 case ByteCodeInstruction::kMaskPush:
831 condPtr[1] = POP().fSigned;
832 maskPtr[1] = maskPtr[0] & condPtr[1];
833 ++condPtr; ++maskPtr;
834 continue;
835 case ByteCodeInstruction::kMaskPop:
836 --condPtr; --maskPtr;
837 continue;
838 case ByteCodeInstruction::kMaskNegate:
839 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
840 continue;
841 case ByteCodeInstruction::kMaskBlend: {
842 int count = READ8();
843 I32 m = condPtr[0];
844 --condPtr; --maskPtr;
845 for (int i = 0; i < count; ++i) {
846 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
847 --sp;
848 }
849 } continue;
850 case ByteCodeInstruction::kBranchIfAllFalse: {
851 int target = READ16();
852 if (!skvx::any(mask())) {
853 ip = code + target;
854 }
855 } continue;
856
857 case ByteCodeInstruction::kLoopBegin:
858 contPtr[1] = 0;
859 loopPtr[1] = loopPtr[0];
860 ++contPtr; ++loopPtr;
861 continue;
862 case ByteCodeInstruction::kLoopNext:
863 *loopPtr |= *contPtr;
864 *contPtr = 0;
865 continue;
866 case ByteCodeInstruction::kLoopMask:
867 *loopPtr &= POP().fSigned;
868 continue;
869 case ByteCodeInstruction::kLoopEnd:
870 --contPtr; --loopPtr;
871 continue;
872 case ByteCodeInstruction::kLoopBreak:
873 *loopPtr &= ~mask();
874 continue;
875 case ByteCodeInstruction::kLoopContinue: {
876 I32 m = mask();
877 *contPtr |= m;
878 *loopPtr &= ~m;
879 } continue;
880
881 case ByteCodeInstruction::kLoadFragCoord:
882 case ByteCodeInstruction::kSample:
883 case ByteCodeInstruction::kSampleExplicit:
884 case ByteCodeInstruction::kSampleMatrix:
885 default:
886 // TODO: Support these?
887 SkASSERT(false);
888 return false;
889 }
890 }
891}
892
893}; // class Interpreter
894
895#endif // SK_ENABLE_SKSL_INTERPRETER
896
897#undef spf
898
899void ByteCodeFunction::disassemble() const {
900#if defined(SK_ENABLE_SKSL_INTERPRETER)
901 const uint8_t* ip = fCode.data();
902 while (ip < fCode.data() + fCode.size()) {
903 printf("%d: ", (int)(ip - fCode.data()));
904 ip = Interpreter::DisassembleInstruction(ip);
905 printf("\n");
906 }
907#endif
908}
909
910bool ByteCode::run(const ByteCodeFunction* f,
911 float* args, int argCount,
912 float* outReturn, int returnCount,
913 const float* uniforms, int uniformCount) const {
914#if defined(SK_ENABLE_SKSL_INTERPRETER)
915 Interpreter::VValue stack[128];
916 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
917 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
918 return false;
919 }
920
921 if (argCount != f->fParameterCount ||
922 returnCount != f->fReturnCount ||
923 uniformCount != fUniformSlotCount) {
924 return false;
925 }
926
927 Interpreter::VValue globals[32];
928 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
929 return false;
930 }
931
932 // Transpose args into stack
933 {
934 float* src = args;
935 float* dst = (float*)stack;
936 for (int i = 0; i < argCount; ++i) {
937 *dst = *src++;
938 dst += VecWidth;
939 }
940 }
941
942 bool stripedOutput = false;
943 float** outArray = outReturn ? &outReturn : nullptr;
944 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
945 return false;
946 }
947
948 // Transpose out parameters back
949 {
950 float* dst = args;
951 float* src = (float*)stack;
952 for (const auto& p : f->fParameters) {
953 if (p.fIsOutParameter) {
954 for (int i = p.fSlotCount; i > 0; --i) {
955 *dst++ = *src;
956 src += VecWidth;
957 }
958 } else {
959 dst += p.fSlotCount;
960 src += p.fSlotCount * VecWidth;
961 }
962 }
963 }
964
965 return true;
966#else
967 SkDEBUGFAIL("ByteCode interpreter not enabled");
968 return false;
969#endif
970}
971
972bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
973 float* args[], int argCount,
974 float* outReturn[], int returnCount,
975 const float* uniforms, int uniformCount) const {
976#if defined(SK_ENABLE_SKSL_INTERPRETER)
977 Interpreter::VValue stack[192];
978 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
979 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
980 return false;
981 }
982
983 if (argCount != f->fParameterCount ||
984 returnCount != f->fReturnCount ||
985 uniformCount != fUniformSlotCount) {
986 return false;
987 }
988
989 Interpreter::VValue globals[32];
990 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
991 return false;
992 }
993
994 // innerRun just takes outArgs, so clear it if the count is zero
995 if (returnCount == 0) {
996 outReturn = nullptr;
997 }
998
999 // The instructions to store to locals and globals mask in the original value,
1000 // so they technically need to be initialized (to any value).
1001 for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
1002 stack[i].fFloat = 0.0f;
1003 }
1004 for (int i = 0; i < fGlobalSlotCount; i++) {
1005 globals[i].fFloat = 0.0f;
1006 }
1007
1008 int baseIndex = 0;
1009
1010 while (N) {
1011 int w = std::min(N, VecWidth);
1012
1013 // Copy args into stack
1014 for (int i = 0; i < argCount; ++i) {
1015 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1016 }
1017
1018 bool stripedOutput = true;
1019 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1020 baseIndex)) {
1021 return false;
1022 }
1023
1024 // Copy out parameters back
1025 int slot = 0;
1026 for (const auto& p : f->fParameters) {
1027 if (p.fIsOutParameter) {
1028 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1029 memcpy(args[i], stack + i, w * sizeof(float));
1030 }
1031 }
1032 slot += p.fSlotCount;
1033 }
1034
1035 // Step each argument pointer ahead
1036 for (int i = 0; i < argCount; ++i) {
1037 args[i] += w;
1038 }
1039 N -= w;
1040 baseIndex += w;
1041 }
1042
1043 return true;
1044#else
1045 SkDEBUGFAIL("ByteCode interpreter not enabled");
1046 return false;
1047#endif
1048}
1049
1050} // namespace SkSL
1051
1052#endif
1053