1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SKSL_STANDALONE
9
10#include "include/core/SkPoint3.h"
11#include "include/private/SkVx.h"
12#include "src/core/SkUtils.h" // sk_unaligned_load
13#include "src/sksl/SkSLByteCode.h"
14#include "src/sksl/SkSLByteCodeGenerator.h"
15#include "src/sksl/SkSLExternalValue.h"
16
17#include <vector>
18
19namespace SkSL {
20
21#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23constexpr int VecWidth = ByteCode::kVecWidth;
24
25struct Interpreter {
26
27using F32 = skvx::Vec<VecWidth, float>;
28using I32 = skvx::Vec<VecWidth, int32_t>;
29using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31#define READ8() (*(ip++))
32#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
34#define READ_INST() (ip += sizeof(instruction), \
35 sk_unaligned_load<instruction>(ip - sizeof(instruction)))
36
37#define VECTOR_DISASSEMBLE(op, text) \
38 case ByteCodeInstruction::op: printf(text); ++ip; break; \
39 case ByteCodeInstruction::op##2: printf(text "2"); ++ip; break; \
40 case ByteCodeInstruction::op##3: printf(text "3"); ++ip; break; \
41 case ByteCodeInstruction::op##4: printf(text "4"); ++ip; break;
42
43#define VECTOR_DISASSEMBLE_NO_COUNT(op, text) \
44 case ByteCodeInstruction::op: printf(text); break; \
45 case ByteCodeInstruction::op##2: printf(text "2"); break; \
46 case ByteCodeInstruction::op##3: printf(text "3"); break; \
47 case ByteCodeInstruction::op##4: printf(text "4"); break;
48
49#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
50 VECTOR_DISASSEMBLE(op, text) \
51 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
52
53#define VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(op, text) \
54 VECTOR_DISASSEMBLE_NO_COUNT(op, text) \
55 case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
56
57static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
58 auto inst = (ByteCodeInstruction) (intptr_t) READ_INST();
59 printf("%04x ", (int)inst);
60 switch (inst) {
61 VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
62 VECTOR_DISASSEMBLE(kAddI, "addi")
63 case ByteCodeInstruction::kAndB: printf("andb"); break;
64 case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
65 case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
66 case ByteCodeInstruction::kCallExternal: {
67 int argumentCount = READ8();
68 int returnCount = READ8();
69 int externalValue = READ8();
70 printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
71 break;
72 }
73 case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
74 VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
75 VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
76 VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
77 VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
78 VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
79 VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
80 VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
81 VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
82 VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
83 VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
84 VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
85 VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
86 VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
87 VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
88 VECTOR_DISASSEMBLE(kCompareULT, "compareult")
89 VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
90 VECTOR_DISASSEMBLE_NO_COUNT(kConvertFtoI, "convertftoi")
91 VECTOR_DISASSEMBLE_NO_COUNT(kConvertStoF, "convertstof")
92 VECTOR_DISASSEMBLE_NO_COUNT(kConvertUtoF, "convertutof")
93 VECTOR_DISASSEMBLE(kCos, "cos")
94 VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
95 VECTOR_DISASSEMBLE(kDivideS, "divideS")
96 VECTOR_DISASSEMBLE(kDivideU, "divideu")
97 VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
98 case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
99 case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
100 case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
101 case ByteCodeInstruction::kLoad: printf("load %d", READ16() >> 8); break;
102 case ByteCodeInstruction::kLoad2: printf("load2 %d", READ16() >> 8); break;
103 case ByteCodeInstruction::kLoad3: printf("load3 %d", READ16() >> 8); break;
104 case ByteCodeInstruction::kLoad4: printf("load4 %d", READ16() >> 8); break;
105 case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ16() >> 8); break;
106 case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ16() >> 8); break;
107 case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ16() >> 8); break;
108 case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ16() >> 8); break;
109 case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ16() >> 8); break;
110 case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ16() >> 8); break;
111 case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ16() >> 8); break;
112 case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ16() >> 8); break;
113 case ByteCodeInstruction::kLoadSwizzle: {
114 int target = READ8();
115 int count = READ8();
116 printf("loadswizzle %d %d", target, count);
117 for (int i = 0; i < count; ++i) {
118 printf(", %d", READ8());
119 }
120 break;
121 }
122 case ByteCodeInstruction::kLoadSwizzleGlobal: {
123 int target = READ8();
124 int count = READ8();
125 printf("loadswizzleglobal %d %d", target, count);
126 for (int i = 0; i < count; ++i) {
127 printf(", %d", READ8());
128 }
129 break;
130 }
131 case ByteCodeInstruction::kLoadSwizzleUniform: {
132 int target = READ8();
133 int count = READ8();
134 printf("loadswizzleuniform %d %d", target, count);
135 for (int i = 0; i < count; ++i) {
136 printf(", %d", READ8());
137 }
138 break;
139 }
140 case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
141 case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
142 break;
143 case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
144 break;
145 case ByteCodeInstruction::kMatrixToMatrix: {
146 int srcCols = READ8();
147 int srcRows = READ8();
148 int dstCols = READ8();
149 int dstRows = READ8();
150 printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
151 break;
152 }
153 case ByteCodeInstruction::kMatrixMultiply: {
154 int lCols = READ8();
155 int lRows = READ8();
156 int rCols = READ8();
157 printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
158 break;
159 }
160 VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
161 VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
162 VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kNegateF, "negatef")
163 VECTOR_DISASSEMBLE_NO_COUNT(kNegateI, "negatei")
164 case ByteCodeInstruction::kNotB: printf("notb"); break;
165 case ByteCodeInstruction::kOrB: printf("orb"); break;
166 VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kPop, "pop")
167 case ByteCodeInstruction::kPushImmediate: {
168 uint32_t v = READ32();
169 union { uint32_t u; float f; } pun = { v };
170 printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
171 break;
172 }
173 case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ16() >> 8); break;
174 case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ16() >> 8); break;
175 case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ16() >> 8); break;
176 case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ16() >> 8); break;
177 VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
178 VECTOR_DISASSEMBLE(kRemainderS, "remainders")
179 VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
180 case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
181 case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
182 case ByteCodeInstruction::kScalarToMatrix: {
183 int cols = READ8();
184 int rows = READ8();
185 printf("scalartomatrix %dx%d", cols, rows);
186 break;
187 }
188 case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
189 case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
190 case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
191 VECTOR_DISASSEMBLE(kSin, "sin")
192 VECTOR_DISASSEMBLE_NO_COUNT(kSqrt, "sqrt")
193 case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
194 case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
195 case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
196 case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
197 case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
198 case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
199 case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
200 case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
201 case ByteCodeInstruction::kStoreSwizzle: {
202 int target = READ8();
203 int count = READ8();
204 printf("storeswizzle %d %d", target, count);
205 for (int i = 0; i < count; ++i) {
206 printf(", %d", READ8());
207 }
208 break;
209 }
210 case ByteCodeInstruction::kStoreSwizzleGlobal: {
211 int target = READ8();
212 int count = READ8();
213 printf("storeswizzleglobal %d %d", target, count);
214 for (int i = 0; i < count; ++i) {
215 printf(", %d", READ8());
216 }
217 break;
218 }
219 case ByteCodeInstruction::kStoreSwizzleIndirect: {
220 int count = READ8();
221 printf("storeswizzleindirect %d", count);
222 for (int i = 0; i < count; ++i) {
223 printf(", %d", READ8());
224 }
225 break;
226 }
227 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
228 int count = READ8();
229 printf("storeswizzleindirectglobal %d", count);
230 for (int i = 0; i < count; ++i) {
231 printf(", %d", READ8());
232 }
233 break;
234 }
235 case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
236 case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
237 break;
238 VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
239 VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
240 case ByteCodeInstruction::kSwizzle: {
241 printf("swizzle %d, ", READ8());
242 int count = READ8();
243 printf("%d", count);
244 for (int i = 0; i < count; ++i) {
245 printf(", %d", READ8());
246 }
247 break;
248 }
249 VECTOR_DISASSEMBLE(kTan, "tan")
250 case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ16() >> 8); break;
251 case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ16() >> 8); break;
252 case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ16() >> 8); break;
253 case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ16() >> 8); break;
254 case ByteCodeInstruction::kXorB: printf("xorb"); break;
255 case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
256 case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
257 case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
258 case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
259 case ByteCodeInstruction::kBranchIfAllFalse:
260 printf("branchifallfalse %d", READ16());
261 break;
262 case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
263 case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
264 case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
265 case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
266 case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
267 case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
268 default:
269 ip -= sizeof(instruction);
270 printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
271 SkASSERT(false);
272 }
273 return ip;
274}
275
276#ifdef SKSLC_THREADED_CODE
277 #define LABEL(name) name:
278 #ifdef TRACE
279 #define NEXT() goto next
280 #else
281 #define NEXT() goto *READ_INST()
282 #endif
283#else
284 #define LABEL(name) case ByteCodeInstruction::name:
285 #define NEXT() continue
286#endif
287
288#define VECTOR_BINARY_OP(base, field, op) \
289 LABEL(base ## 4) \
290 sp[-4] = sp[-4].field op sp[0].field; \
291 POP(); \
292 /* fall through */ \
293 LABEL(base ## 3) { \
294 sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
295 POP(); \
296 } /* fall through */ \
297 LABEL(base ## 2) { \
298 sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
299 POP(); \
300 } /* fall through */ \
301 LABEL(base) { \
302 sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
303 POP(); \
304 ++ip; \
305 NEXT(); \
306 }
307
308// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
309// in inactive vector lanesm, so we need to be sure to avoid masked-off lanes.
310#define VECTOR_BINARY_MASKED_OP(base, field, op) \
311 LABEL(base ## 4) \
312 for (int i = 0; i < VecWidth; ++i) { \
313 if (mask()[i]) { \
314 sp[-4].field[i] op ## = sp[0].field[i]; \
315 } \
316 } \
317 POP(); \
318 /* fall through */ \
319 LABEL(base ## 3) { \
320 for (int i = 0; i < VecWidth; ++i) { \
321 if (mask()[i]) { \
322 sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
323 } \
324 } \
325 POP(); \
326 } /* fall through */ \
327 LABEL(base ## 2) { \
328 for (int i = 0; i < VecWidth; ++i) { \
329 if (mask()[i]) { \
330 sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
331 } \
332 } \
333 POP(); \
334 } /* fall through */ \
335 LABEL(base) { \
336 for (int i = 0; i < VecWidth; ++i) { \
337 if (mask()[i]) { \
338 sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
339 } \
340 } \
341 POP(); \
342 ++ip; \
343 NEXT(); \
344 }
345
346
347#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
348 VECTOR_BINARY_OP(base, field, op) \
349 LABEL(base ## N) { \
350 int count = READ8(); \
351 for (int i = count; i > 0; --i) { \
352 sp[-count] = sp[-count].field op sp[0].field; \
353 POP(); \
354 } \
355 NEXT(); \
356 }
357
358#define VECTOR_BINARY_FN(base, field, fn) \
359 LABEL(base ## 4) \
360 sp[-4] = fn(sp[-4].field, sp[0].field); \
361 POP(); \
362 /* fall through */ \
363 LABEL(base ## 3) { \
364 sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
365 POP(); \
366 } /* fall through */ \
367 LABEL(base ## 2) { \
368 sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
369 POP(); \
370 } /* fall through */ \
371 LABEL(base) { \
372 sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
373 POP(); \
374 ++ip; \
375 NEXT(); \
376 }
377
378#define VECTOR_UNARY_FN(base, fn, field) \
379 LABEL(base ## 4) sp[-3] = fn(sp[-3].field); \
380 LABEL(base ## 3) sp[-2] = fn(sp[-2].field); \
381 LABEL(base ## 2) sp[-1] = fn(sp[-1].field); \
382 LABEL(base) sp[ 0] = fn(sp[ 0].field); \
383 NEXT();
384
385#define VECTOR_UNARY_FN_VEC(base, fn) \
386 LABEL(base ## 4) \
387 LABEL(base ## 3) \
388 LABEL(base ## 2) \
389 LABEL(base) { \
390 int count = READ8(); \
391 float* v = (float*)sp - count + 1; \
392 for (int i = VecWidth * count; i > 0; --i, ++v) { \
393 *v = fn(*v); \
394 } \
395 NEXT(); \
396 }
397
398#define VECTOR_LABELS(base) \
399 &&base ## 4, \
400 &&base ## 3, \
401 &&base ## 2, \
402 &&base
403
404#define VECTOR_MATRIX_LABELS(base) \
405 VECTOR_LABELS(base), \
406 &&base ## N
407
408// If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction
409// does not match the order of the opcodes listed in the 'labels' array in innerRun().
410#define CHECK_LABEL(name) \
411 SkASSERT(labels[(int) ByteCodeInstruction::name] == &&name)
412
413#define CHECK_VECTOR_LABELS(name) \
414 CHECK_LABEL(name ## 4); \
415 CHECK_LABEL(name ## 3); \
416 CHECK_LABEL(name ## 2); \
417 CHECK_LABEL(name)
418
419#define CHECK_VECTOR_MATRIX_LABELS(name) \
420 CHECK_VECTOR_LABELS(name); \
421 CHECK_LABEL(name ## N)
422
423union VValue {
424 VValue() {}
425 VValue(F32 f) : fFloat(f) {}
426 VValue(I32 s) : fSigned(s) {}
427 VValue(U32 u) : fUnsigned(u) {}
428
429 F32 fFloat;
430 I32 fSigned;
431 U32 fUnsigned;
432};
433
434struct StackFrame {
435 const uint8_t* fCode;
436 const uint8_t* fIP;
437 VValue* fStack;
438 int fParameterCount;
439};
440
441static F32 VecMod(F32 a, F32 b) {
442 return a - skvx::trunc(a / b) * b;
443}
444
445#define spf(index) sp[index].fFloat
446
447static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp,
448 int baseIndex, I32 mask) {
449 int argumentCount = READ8();
450 int returnCount = READ8();
451 int target = READ8();
452 ExternalValue* v = byteCode->fExternalValues[target];
453 sp -= argumentCount - 1;
454
455 float tmpArgs[4];
456 float tmpReturn[4];
457 SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
458 SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
459
460 for (int i = 0; i < VecWidth; ++i) {
461 if (mask[i]) {
462 for (int j = 0; j < argumentCount; ++j) {
463 tmpArgs[j] = sp[j].fFloat[i];
464 }
465 v->call(baseIndex + i, tmpArgs, tmpReturn);
466 for (int j = 0; j < returnCount; ++j) {
467 sp[j].fFloat[i] = tmpReturn[j];
468 }
469 }
470 }
471 sp += returnCount - 1;
472}
473
474static void Inverse2x2(VValue* sp) {
475 F32 a = sp[-3].fFloat,
476 b = sp[-2].fFloat,
477 c = sp[-1].fFloat,
478 d = sp[ 0].fFloat;
479 F32 idet = F32(1) / (a*d - b*c);
480 sp[-3].fFloat = d * idet;
481 sp[-2].fFloat = -b * idet;
482 sp[-1].fFloat = -c * idet;
483 sp[ 0].fFloat = a * idet;
484}
485
486static void Inverse3x3(VValue* sp) {
487 F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat,
488 a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat,
489 a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat;
490 F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
491 a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
492 sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet;
493 sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet;
494 sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet;
495 sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet;
496 sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet;
497 sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet;
498 sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet;
499 sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet;
500 sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet;
501}
502
503static void Inverse4x4(VValue* sp) {
504 F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3),
505 a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2),
506 a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1),
507 a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0);
508
509 F32 b00 = a00 * a11 - a01 * a10,
510 b01 = a00 * a12 - a02 * a10,
511 b02 = a00 * a13 - a03 * a10,
512 b03 = a01 * a12 - a02 * a11,
513 b04 = a01 * a13 - a03 * a11,
514 b05 = a02 * a13 - a03 * a12,
515 b06 = a20 * a31 - a21 * a30,
516 b07 = a20 * a32 - a22 * a30,
517 b08 = a20 * a33 - a23 * a30,
518 b09 = a21 * a32 - a22 * a31,
519 b10 = a21 * a33 - a23 * a31,
520 b11 = a22 * a33 - a23 * a32;
521
522 F32 idet = F32(1) /
523 (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
524
525 b00 *= idet;
526 b01 *= idet;
527 b02 *= idet;
528 b03 *= idet;
529 b04 *= idet;
530 b05 *= idet;
531 b06 *= idet;
532 b07 *= idet;
533 b08 *= idet;
534 b09 *= idet;
535 b10 *= idet;
536 b11 *= idet;
537
538 spf(-15) = a11 * b11 - a12 * b10 + a13 * b09;
539 spf(-14) = a02 * b10 - a01 * b11 - a03 * b09;
540 spf(-13) = a31 * b05 - a32 * b04 + a33 * b03;
541 spf(-12) = a22 * b04 - a21 * b05 - a23 * b03;
542 spf(-11) = a12 * b08 - a10 * b11 - a13 * b07;
543 spf(-10) = a00 * b11 - a02 * b08 + a03 * b07;
544 spf( -9) = a32 * b02 - a30 * b05 - a33 * b01;
545 spf( -8) = a20 * b05 - a22 * b02 + a23 * b01;
546 spf( -7) = a10 * b10 - a11 * b08 + a13 * b06;
547 spf( -6) = a01 * b08 - a00 * b10 - a03 * b06;
548 spf( -5) = a30 * b04 - a31 * b02 + a33 * b00;
549 spf( -4) = a21 * b02 - a20 * b04 - a23 * b00;
550 spf( -3) = a11 * b07 - a10 * b09 - a12 * b06;
551 spf( -2) = a00 * b09 - a01 * b07 + a02 * b06;
552 spf( -1) = a31 * b01 - a30 * b03 - a32 * b00;
553 spf( 0) = a20 * b03 - a21 * b01 + a22 * b00;
554}
555
556static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
557 float* outReturn[], VValue globals[], const float uniforms[],
558 bool stripedOutput, int N, int baseIndex) {
559#ifdef SKSLC_THREADED_CODE
560 static const void* labels[] = {
561 // If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as values"
562 // extension. If you add anything to this array, be sure to add the corresponding
563 // CHECK_LABEL() or CHECK_*_LABELS() assert below.
564 VECTOR_MATRIX_LABELS(kAddF),
565 VECTOR_LABELS(kAddI),
566 &&kAndB,
567 &&kBranch,
568 &&kCall,
569 &&kCallExternal,
570 &&kClampIndex,
571 VECTOR_LABELS(kCompareIEQ),
572 VECTOR_LABELS(kCompareINEQ),
573 VECTOR_MATRIX_LABELS(kCompareFEQ),
574 VECTOR_MATRIX_LABELS(kCompareFNEQ),
575 VECTOR_LABELS(kCompareFGT),
576 VECTOR_LABELS(kCompareFGTEQ),
577 VECTOR_LABELS(kCompareFLT),
578 VECTOR_LABELS(kCompareFLTEQ),
579 VECTOR_LABELS(kCompareSGT),
580 VECTOR_LABELS(kCompareSGTEQ),
581 VECTOR_LABELS(kCompareSLT),
582 VECTOR_LABELS(kCompareSLTEQ),
583 VECTOR_LABELS(kCompareUGT),
584 VECTOR_LABELS(kCompareUGTEQ),
585 VECTOR_LABELS(kCompareULT),
586 VECTOR_LABELS(kCompareULTEQ),
587 VECTOR_LABELS(kConvertFtoI),
588 VECTOR_LABELS(kConvertStoF),
589 VECTOR_LABELS(kConvertUtoF),
590 VECTOR_LABELS(kCos),
591 VECTOR_MATRIX_LABELS(kDivideF),
592 VECTOR_LABELS(kDivideS),
593 VECTOR_LABELS(kDivideU),
594 VECTOR_MATRIX_LABELS(kDup),
595 &&kInverse2x2,
596 &&kInverse3x3,
597 &&kInverse4x4,
598 VECTOR_LABELS(kLoad),
599 VECTOR_LABELS(kLoadGlobal),
600 VECTOR_LABELS(kLoadUniform),
601 &&kLoadSwizzle,
602 &&kLoadSwizzleGlobal,
603 &&kLoadSwizzleUniform,
604 &&kLoadExtended,
605 &&kLoadExtendedGlobal,
606 &&kLoadExtendedUniform,
607 &&kMatrixToMatrix,
608 &&kMatrixMultiply,
609 VECTOR_MATRIX_LABELS(kNegateF),
610 VECTOR_LABELS(kNegateI),
611 VECTOR_MATRIX_LABELS(kMultiplyF),
612 VECTOR_LABELS(kMultiplyI),
613 &&kNotB,
614 &&kOrB,
615 VECTOR_MATRIX_LABELS(kPop),
616 &&kPushImmediate,
617 VECTOR_LABELS(kReadExternal),
618 VECTOR_LABELS(kRemainderF),
619 VECTOR_LABELS(kRemainderS),
620 VECTOR_LABELS(kRemainderU),
621 &&kReserve,
622 &&kReturn,
623 &&kScalarToMatrix,
624 &&kShiftLeft,
625 &&kShiftRightS,
626 &&kShiftRightU,
627 VECTOR_LABELS(kSin),
628 VECTOR_LABELS(kSqrt),
629 VECTOR_LABELS(kStore),
630 VECTOR_LABELS(kStoreGlobal),
631 &&kStoreExtended,
632 &&kStoreExtendedGlobal,
633 &&kStoreSwizzle,
634 &&kStoreSwizzleGlobal,
635 &&kStoreSwizzleIndirect,
636 &&kStoreSwizzleIndirectGlobal,
637 &&kSwizzle,
638 VECTOR_MATRIX_LABELS(kSubtractF),
639 VECTOR_LABELS(kSubtractI),
640 VECTOR_LABELS(kTan),
641 VECTOR_LABELS(kWriteExternal),
642 &&kXorB,
643
644 &&kMaskPush,
645 &&kMaskPop,
646 &&kMaskNegate,
647 &&kMaskBlend,
648 &&kBranchIfAllFalse,
649
650 &&kLoopBegin,
651 &&kLoopNext,
652 &&kLoopMask,
653 &&kLoopEnd,
654 &&kLoopBreak,
655 &&kLoopContinue,
656 };
657 // Verify that the order of the labels array matches the order of the ByteCodeInstruction enum.
658 CHECK_VECTOR_MATRIX_LABELS(kAddF);
659 CHECK_VECTOR_LABELS(kAddI);
660 CHECK_LABEL(kAndB);
661 CHECK_LABEL(kBranch);
662 CHECK_LABEL(kCall);
663 CHECK_LABEL(kCallExternal);
664 CHECK_LABEL(kClampIndex);
665 CHECK_VECTOR_LABELS(kCompareIEQ);
666 CHECK_VECTOR_LABELS(kCompareINEQ);
667 CHECK_VECTOR_MATRIX_LABELS(kCompareFEQ);
668 CHECK_VECTOR_MATRIX_LABELS(kCompareFNEQ);
669 CHECK_VECTOR_LABELS(kCompareFGT);
670 CHECK_VECTOR_LABELS(kCompareFGTEQ);
671 CHECK_VECTOR_LABELS(kCompareFLT);
672 CHECK_VECTOR_LABELS(kCompareFLTEQ);
673 CHECK_VECTOR_LABELS(kCompareSGT);
674 CHECK_VECTOR_LABELS(kCompareSGTEQ);
675 CHECK_VECTOR_LABELS(kCompareSLT);
676 CHECK_VECTOR_LABELS(kCompareSLTEQ);
677 CHECK_VECTOR_LABELS(kCompareUGT);
678 CHECK_VECTOR_LABELS(kCompareUGTEQ);
679 CHECK_VECTOR_LABELS(kCompareULT);
680 CHECK_VECTOR_LABELS(kCompareULTEQ);
681 CHECK_VECTOR_LABELS(kConvertFtoI);
682 CHECK_VECTOR_LABELS(kConvertStoF);
683 CHECK_VECTOR_LABELS(kConvertUtoF);
684 CHECK_VECTOR_LABELS(kCos);
685 CHECK_VECTOR_MATRIX_LABELS(kDivideF);
686 CHECK_VECTOR_LABELS(kDivideS);
687 CHECK_VECTOR_LABELS(kDivideU);
688 CHECK_VECTOR_MATRIX_LABELS(kDup);
689 CHECK_LABEL(kInverse2x2);
690 CHECK_LABEL(kInverse3x3);
691 CHECK_LABEL(kInverse4x4);
692 CHECK_VECTOR_LABELS(kLoad);
693 CHECK_VECTOR_LABELS(kLoadGlobal);
694 CHECK_VECTOR_LABELS(kLoadUniform);
695 CHECK_LABEL(kLoadSwizzle);
696 CHECK_LABEL(kLoadSwizzleGlobal);
697 CHECK_LABEL(kLoadSwizzleUniform);
698 CHECK_LABEL(kLoadExtended);
699 CHECK_LABEL(kLoadExtendedGlobal);
700 CHECK_LABEL(kLoadExtendedUniform);
701 CHECK_LABEL(kMatrixToMatrix);
702 CHECK_LABEL(kMatrixMultiply);
703 CHECK_VECTOR_MATRIX_LABELS(kNegateF);
704 CHECK_VECTOR_LABELS(kNegateI);
705 CHECK_VECTOR_MATRIX_LABELS(kMultiplyF);
706 CHECK_VECTOR_LABELS(kMultiplyI);
707 CHECK_LABEL(kNotB);
708 CHECK_LABEL(kOrB);
709 CHECK_VECTOR_MATRIX_LABELS(kPop);
710 CHECK_LABEL(kPushImmediate);
711 CHECK_VECTOR_LABELS(kReadExternal);
712 CHECK_VECTOR_LABELS(kRemainderF);
713 CHECK_VECTOR_LABELS(kRemainderS);
714 CHECK_VECTOR_LABELS(kRemainderU);
715 CHECK_LABEL(kReserve);
716 CHECK_LABEL(kReturn);
717 CHECK_LABEL(kScalarToMatrix);
718 CHECK_LABEL(kShiftLeft);
719 CHECK_LABEL(kShiftRightS);
720 CHECK_LABEL(kShiftRightU);
721 CHECK_VECTOR_LABELS(kSin);
722 CHECK_VECTOR_LABELS(kSqrt);
723 CHECK_VECTOR_LABELS(kStore);
724 CHECK_VECTOR_LABELS(kStoreGlobal);
725 CHECK_LABEL(kStoreExtended);
726 CHECK_LABEL(kStoreExtendedGlobal);
727 CHECK_LABEL(kStoreSwizzle);
728 CHECK_LABEL(kStoreSwizzleGlobal);
729 CHECK_LABEL(kStoreSwizzleIndirect);
730 CHECK_LABEL(kStoreSwizzleIndirectGlobal);
731 CHECK_LABEL(kSwizzle);
732 CHECK_VECTOR_MATRIX_LABELS(kSubtractF);
733 CHECK_VECTOR_LABELS(kSubtractI);
734 CHECK_VECTOR_LABELS(kTan);
735 CHECK_VECTOR_LABELS(kWriteExternal);
736 CHECK_LABEL(kXorB);
737 CHECK_LABEL(kMaskPush);
738 CHECK_LABEL(kMaskPop);
739 CHECK_LABEL(kMaskNegate);
740 CHECK_LABEL(kMaskBlend);
741 CHECK_LABEL(kBranchIfAllFalse);
742 CHECK_LABEL(kLoopBegin);
743 CHECK_LABEL(kLoopNext);
744 CHECK_LABEL(kLoopMask);
745 CHECK_LABEL(kLoopEnd);
746 CHECK_LABEL(kLoopBreak);
747 CHECK_LABEL(kLoopContinue);
748 f->fPreprocessOnce([f] { ((ByteCodeFunction*)f)->preprocess(labels); });
749#endif
750
751 // Needs to be the first N non-negative integers, at least as large as VecWidth
752 static const Interpreter::I32 gLanes = {
753 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
754 };
755
756 VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1;
757
758 #define POP() (*(sp--))
759 #define PUSH(v) (sp[1] = v, ++sp)
760
761 const uint8_t* code = f->fCode.data();
762 const uint8_t* ip = code;
763 std::vector<StackFrame> frames;
764
765 I32 condStack[16]; // Independent condition masks
766 I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
767 I32 contStack[16]; // Continue flags for loops
768 I32 loopStack[16]; // Loop execution masks
769 condStack[0] = maskStack[0] = (gLanes < N);
770 contStack[0] = I32( 0);
771 loopStack[0] = I32(~0);
772 I32* condPtr = condStack;
773 I32* maskPtr = maskStack;
774 I32* contPtr = contStack;
775 I32* loopPtr = loopStack;
776
777 if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) ||
778 f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) {
779 return false;
780 }
781
782 auto mask = [&]() { return *maskPtr & *loopPtr; };
783
784#ifdef SKSLC_THREADED_CODE
785 // If the "labels as values" extension is available, we implement this using threaded code.
786 // Instead of opcodes, the code directly contains the addresses of the labels to jump to. Then
787 // the code for each opcode simply grabs the address of the next opcode and uses a goto to jump
788 // there.
789 NEXT();
790#else
791 // Otherwise, we have to use a switch statement and a loop to execute the right label.
792 for (;;) {
793 #ifdef TRACE
794 printf("at %3d ", (int) (ip - code));
795 disassemble_instruction(ip);
796 printf(" (stack: %d)\n", (int) (sp - stack) + 1);
797 #endif
798 switch ((ByteCodeInstruction) READ16()) {
799#endif
800
801 VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
802 VECTOR_BINARY_OP(kAddI, fSigned, +)
803
804 // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
805 LABEL(kAndB)
806 sp[-1] = sp[-1].fSigned & sp[0].fSigned;
807 POP();
808 NEXT();
809 LABEL(kNotB)
810 sp[0] = ~sp[0].fSigned;
811 NEXT();
812 LABEL(kOrB)
813 sp[-1] = sp[-1].fSigned | sp[0].fSigned;
814 POP();
815 NEXT();
816 LABEL(kXorB)
817 sp[-1] = sp[-1].fSigned ^ sp[0].fSigned;
818 POP();
819 NEXT();
820
821 LABEL(kBranch)
822 ip = code + READ16();
823 NEXT();
824
825 LABEL(kCall) {
826 // Precursor code reserved space for the return value, and pushed all parameters to
827 // the stack. Update our bottom of stack to point at the first parameter, and our
828 // sp to point past those parameters (plus space for locals).
829 int target = READ8();
830 const ByteCodeFunction* fun = byteCode->fFunctions[target].get();
831#ifdef SKSLC_THREADED_CODE
832 fun->fPreprocessOnce([fun] { ((ByteCodeFunction*)fun)->preprocess(labels); });
833#endif
834 if (skvx::any(mask())) {
835 frames.push_back({ code, ip, stack, fun->fParameterCount });
836 ip = code = fun->fCode.data();
837 stack = sp - fun->fParameterCount + 1;
838 sp = stack + fun->fParameterCount + fun->fLocalCount - 1;
839 }
840 NEXT();
841 }
842
843 LABEL(kCallExternal) {
844 CallExternal(byteCode, ip, sp, baseIndex, mask());
845 NEXT();
846 }
847
848 LABEL(kClampIndex) {
849 int length = READ8();
850 if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) {
851 return false;
852 }
853 NEXT();
854 }
855
856 VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
857 VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
858 VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
859 VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
860 VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
861 VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
862 VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
863 VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
864 VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
865 VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
866 VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
867 VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
868 VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
869 VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
870 VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
871 VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
872
873 LABEL(kConvertFtoI4) sp[-3] = skvx::cast<int>(sp[-3].fFloat);
874 LABEL(kConvertFtoI3) sp[-2] = skvx::cast<int>(sp[-2].fFloat);
875 LABEL(kConvertFtoI2) sp[-1] = skvx::cast<int>(sp[-1].fFloat);
876 LABEL(kConvertFtoI) sp[ 0] = skvx::cast<int>(sp[ 0].fFloat);
877 NEXT();
878
879 LABEL(kConvertStoF4) sp[-3] = skvx::cast<float>(sp[-3].fSigned);
880 LABEL(kConvertStoF3) sp[-2] = skvx::cast<float>(sp[-2].fSigned);
881 LABEL(kConvertStoF2) sp[-1] = skvx::cast<float>(sp[-1].fSigned);
882 LABEL(kConvertStoF) sp[ 0] = skvx::cast<float>(sp[ 0].fSigned);
883 NEXT();
884
885 LABEL(kConvertUtoF4) sp[-3] = skvx::cast<float>(sp[-3].fUnsigned);
886 LABEL(kConvertUtoF3) sp[-2] = skvx::cast<float>(sp[-2].fUnsigned);
887 LABEL(kConvertUtoF2) sp[-1] = skvx::cast<float>(sp[-1].fUnsigned);
888 LABEL(kConvertUtoF) sp[ 0] = skvx::cast<float>(sp[ 0].fUnsigned);
889 NEXT();
890
891 VECTOR_UNARY_FN_VEC(kCos, cosf)
892
893 VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
894 VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
895 VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
896
897 LABEL(kDup4) PUSH(sp[1 - ip[0]]);
898 LABEL(kDup3) PUSH(sp[1 - ip[0]]);
899 LABEL(kDup2) PUSH(sp[1 - ip[0]]);
900 LABEL(kDup) PUSH(sp[1 - ip[0]]);
901 ++ip;
902 NEXT();
903
904 LABEL(kDupN) {
905 int count = READ8();
906 memcpy(sp + 1, sp - count + 1, count * sizeof(VValue));
907 sp += count;
908 NEXT();
909 }
910
911 LABEL(kInverse2x2) {
912 Inverse2x2(sp);
913 NEXT();
914 }
915 LABEL(kInverse3x3) {
916 Inverse3x3(sp);
917 NEXT();
918 }
919 LABEL(kInverse4x4) {
920 Inverse4x4(sp);
921 NEXT();
922 }
923
924 LABEL(kLoad4) sp[4] = stack[ip[1] + 3];
925 LABEL(kLoad3) sp[3] = stack[ip[1] + 2];
926 LABEL(kLoad2) sp[2] = stack[ip[1] + 1];
927 LABEL(kLoad) sp[1] = stack[ip[1] + 0];
928 sp += ip[0];
929 ip += 2;
930 NEXT();
931
932 LABEL(kLoadGlobal4) sp[4] = globals[ip[1] + 3];
933 LABEL(kLoadGlobal3) sp[3] = globals[ip[1] + 2];
934 LABEL(kLoadGlobal2) sp[2] = globals[ip[1] + 1];
935 LABEL(kLoadGlobal) sp[1] = globals[ip[1] + 0];
936 sp += ip[0];
937 ip += 2;
938 NEXT();
939
940 LABEL(kLoadUniform4) sp[4].fFloat = uniforms[ip[1] + 3];
941 LABEL(kLoadUniform3) sp[3].fFloat = uniforms[ip[1] + 2];
942 LABEL(kLoadUniform2) sp[2].fFloat = uniforms[ip[1] + 1];
943 LABEL(kLoadUniform) sp[1].fFloat = uniforms[ip[1] + 0];
944 sp += ip[0];
945 ip += 2;
946 NEXT();
947
948 LABEL(kLoadExtended) {
949 int count = READ8();
950 I32 src = POP().fSigned;
951 I32 m = mask();
952 for (int i = 0; i < count; ++i) {
953 for (int j = 0; j < VecWidth; ++j) {
954 if (m[j]) {
955 sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j];
956 }
957 }
958 }
959 sp += count;
960 NEXT();
961 }
962
963 LABEL(kLoadExtendedGlobal) {
964 int count = READ8();
965 I32 src = POP().fSigned;
966 I32 m = mask();
967 for (int i = 0; i < count; ++i) {
968 for (int j = 0; j < VecWidth; ++j) {
969 if (m[j]) {
970 sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j];
971 }
972 }
973 }
974 sp += count;
975 NEXT();
976 }
977
978 LABEL(kLoadExtendedUniform) {
979 int count = READ8();
980 I32 src = POP().fSigned;
981 I32 m = mask();
982 for (int i = 0; i < count; ++i) {
983 for (int j = 0; j < VecWidth; ++j) {
984 if (m[j]) {
985 sp[i + 1].fFloat[j] = uniforms[src[j] + i];
986 }
987 }
988 }
989 sp += count;
990 NEXT();
991 }
992
993 LABEL(kLoadSwizzle) {
994 int src = READ8();
995 int count = READ8();
996 for (int i = 0; i < count; ++i) {
997 PUSH(stack[src + *(ip + i)]);
998 }
999 ip += count;
1000 NEXT();
1001 }
1002
1003 LABEL(kLoadSwizzleGlobal) {
1004 int src = READ8();
1005 int count = READ8();
1006 for (int i = 0; i < count; ++i) {
1007 PUSH(globals[src + *(ip + i)]);
1008 }
1009 ip += count;
1010 NEXT();
1011 }
1012
1013 LABEL(kLoadSwizzleUniform) {
1014 int src = READ8();
1015 int count = READ8();
1016 for (int i = 0; i < count; ++i) {
1017 PUSH(F32(uniforms[src + *(ip + i)]));
1018 }
1019 ip += count;
1020 NEXT();
1021 }
1022
1023 LABEL(kMatrixToMatrix) {
1024 int srcCols = READ8();
1025 int srcRows = READ8();
1026 int dstCols = READ8();
1027 int dstRows = READ8();
1028 SkASSERT(srcCols >= 2 && srcCols <= 4);
1029 SkASSERT(srcRows >= 2 && srcRows <= 4);
1030 SkASSERT(dstCols >= 2 && dstCols <= 4);
1031 SkASSERT(dstRows >= 2 && dstRows <= 4);
1032 F32 tmp[16];
1033 memset(tmp, 0, sizeof(tmp));
1034 tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f);
1035 for (int c = srcCols - 1; c >= 0; --c) {
1036 for (int r = srcRows - 1; r >= 0; --r) {
1037 tmp[c*4 + r] = POP().fFloat;
1038 }
1039 }
1040 for (int c = 0; c < dstCols; ++c) {
1041 for (int r = 0; r < dstRows; ++r) {
1042 PUSH(tmp[c*4 + r]);
1043 }
1044 }
1045 NEXT();
1046 }
1047
1048 LABEL(kMatrixMultiply) {
1049 int lCols = READ8();
1050 int lRows = READ8();
1051 int rCols = READ8();
1052 int rRows = lCols;
1053 F32 tmp[16] = { 0.0f };
1054 F32* B = &(sp - (rCols * rRows) + 1)->fFloat;
1055 F32* A = B - (lCols * lRows);
1056 for (int c = 0; c < rCols; ++c) {
1057 for (int r = 0; r < lRows; ++r) {
1058 for (int j = 0; j < lCols; ++j) {
1059 tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j];
1060 }
1061 }
1062 }
1063 sp -= (lCols * lRows) + (rCols * rRows);
1064 memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue));
1065 sp += (rCols * lRows);
1066 NEXT();
1067 }
1068
1069 VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
1070 VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
1071
1072 LABEL(kNegateF4) sp[-3] = -sp[-3].fFloat;
1073 LABEL(kNegateF3) sp[-2] = -sp[-2].fFloat;
1074 LABEL(kNegateF2) sp[-1] = -sp[-1].fFloat;
1075 LABEL(kNegateF) sp[ 0] = -sp[ 0].fFloat;
1076 NEXT();
1077
1078 LABEL(kNegateFN) {
1079 int count = READ8();
1080 for (int i = count - 1; i >= 0; --i) {
1081 sp[-i] = -sp[-i].fFloat;
1082 }
1083 NEXT();
1084 }
1085
1086 LABEL(kNegateI4) sp[-3] = -sp[-3].fSigned;
1087 LABEL(kNegateI3) sp[-2] = -sp[-2].fSigned;
1088 LABEL(kNegateI2) sp[-1] = -sp[-1].fSigned;
1089 LABEL(kNegateI) sp[ 0] = -sp[ 0].fSigned;
1090 NEXT();
1091
1092 LABEL(kPop4) POP();
1093 LABEL(kPop3) POP();
1094 LABEL(kPop2) POP();
1095 LABEL(kPop) POP();
1096 NEXT();
1097
1098 LABEL(kPopN)
1099 sp -= READ8();
1100 NEXT();
1101
1102 LABEL(kPushImmediate)
1103 PUSH(U32(READ32()));
1104 NEXT();
1105
1106 LABEL(kReadExternal)
1107 LABEL(kReadExternal2)
1108 LABEL(kReadExternal3)
1109 LABEL(kReadExternal4) {
1110 int count = READ8();
1111 int src = READ8();
1112 float tmp[4];
1113 I32 m = mask();
1114 for (int i = 0; i < VecWidth; ++i) {
1115 if (m[i]) {
1116 byteCode->fExternalValues[src]->read(baseIndex + i, tmp);
1117 for (int j = 0; j < count; ++j) {
1118 sp[j + 1].fFloat[i] = tmp[j];
1119 }
1120 }
1121 }
1122 sp += count;
1123 NEXT();
1124 }
1125
1126 VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
1127 VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
1128 VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
1129
1130 LABEL(kReserve)
1131 sp += READ8();
1132 NEXT();
1133
1134 LABEL(kReturn) {
1135 int count = READ8();
1136 if (frames.empty()) {
1137 if (outReturn) {
1138 VValue* src = sp - count + 1;
1139 if (stripedOutput) {
1140 for (int i = 0; i < count; ++i) {
1141 memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
1142 ++src;
1143 }
1144 } else {
1145 float* outPtr = outReturn[0];
1146 for (int i = 0; i < count; ++i) {
1147 for (int j = 0; j < N; ++j) {
1148 outPtr[count * j] = src->fFloat[j];
1149 }
1150 ++outPtr;
1151 ++src;
1152 }
1153 }
1154 }
1155 return true;
1156 } else {
1157 // When we were called, the caller reserved stack space for their copy of our
1158 // return value, then 'stack' was positioned after that, where our parameters
1159 // were placed. Copy our return values to their reserved area.
1160 memcpy(stack - count, sp - count + 1, count * sizeof(VValue));
1161
1162 // Now move the stack pointer to the end of the passed-in parameters. This odd
1163 // calling convention requires the caller to pop the arguments after calling,
1164 // but allows them to store any out-parameters back during that unwinding.
1165 // After that sequence finishes, the return value will be the top of the stack.
1166 const StackFrame& frame(frames.back());
1167 sp = stack + frame.fParameterCount - 1;
1168 stack = frame.fStack;
1169 code = frame.fCode;
1170 ip = frame.fIP;
1171 frames.pop_back();
1172 NEXT();
1173 }
1174 }
1175
1176 LABEL(kScalarToMatrix) {
1177 int cols = READ8();
1178 int rows = READ8();
1179 VValue v = POP();
1180 for (int c = 0; c < cols; ++c) {
1181 for (int r = 0; r < rows; ++r) {
1182 PUSH(c == r ? v : F32(0.0f));
1183 }
1184 }
1185 NEXT();
1186 }
1187
1188 LABEL(kShiftLeft)
1189 sp[0] = sp[0].fSigned << READ8();
1190 NEXT();
1191 LABEL(kShiftRightS)
1192 sp[0] = sp[0].fSigned >> READ8();
1193 NEXT();
1194 LABEL(kShiftRightU)
1195 sp[0] = sp[0].fUnsigned >> READ8();
1196 NEXT();
1197
1198 VECTOR_UNARY_FN_VEC(kSin, sinf)
1199 VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
1200
1201 LABEL(kStore4)
1202 stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat);
1203 LABEL(kStore3)
1204 stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat);
1205 LABEL(kStore2)
1206 stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat);
1207 LABEL(kStore)
1208 stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat);
1209 ++ip;
1210 NEXT();
1211
1212 LABEL(kStoreGlobal4)
1213 globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat);
1214 LABEL(kStoreGlobal3)
1215 globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat);
1216 LABEL(kStoreGlobal2)
1217 globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat);
1218 LABEL(kStoreGlobal)
1219 globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat);
1220 ++ip;
1221 NEXT();
1222
1223 LABEL(kStoreExtended) {
1224 int count = READ8();
1225 I32 target = POP().fSigned;
1226 VValue* src = sp - count + 1;
1227 I32 m = mask();
1228 for (int i = 0; i < count; ++i) {
1229 for (int j = 0; j < VecWidth; ++j) {
1230 if (m[j]) {
1231 stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
1232 }
1233 }
1234 }
1235 sp -= count;
1236 NEXT();
1237 }
1238 LABEL(kStoreExtendedGlobal) {
1239 int count = READ8();
1240 I32 target = POP().fSigned;
1241 VValue* src = sp - count + 1;
1242 I32 m = mask();
1243 for (int i = 0; i < count; ++i) {
1244 for (int j = 0; j < VecWidth; ++j) {
1245 if (m[j]) {
1246 globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
1247 }
1248 }
1249 }
1250 sp -= count;
1251 NEXT();
1252 }
1253
1254 LABEL(kStoreSwizzle) {
1255 int target = READ8();
1256 int count = READ8();
1257 for (int i = count - 1; i >= 0; --i) {
1258 stack[target + *(ip + i)] = skvx::if_then_else(
1259 mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1260 }
1261 ip += count;
1262 NEXT();
1263 }
1264
1265 LABEL(kStoreSwizzleGlobal) {
1266 int target = READ8();
1267 int count = READ8();
1268 for (int i = count - 1; i >= 0; --i) {
1269 globals[target + *(ip + i)] = skvx::if_then_else(
1270 mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1271 }
1272 ip += count;
1273 NEXT();
1274 }
1275
1276 LABEL(kStoreSwizzleIndirect) {
1277 int count = READ8();
1278 I32 target = POP().fSigned;
1279 I32 m = mask();
1280 for (int i = count - 1; i >= 0; --i) {
1281 I32 v = POP().fSigned;
1282 for (int j = 0; j < VecWidth; ++j) {
1283 if (m[j]) {
1284 stack[target[j] + *(ip + i)].fSigned[j] = v[j];
1285 }
1286 }
1287 }
1288 ip += count;
1289 NEXT();
1290 }
1291
1292 LABEL(kStoreSwizzleIndirectGlobal) {
1293 int count = READ8();
1294 I32 target = POP().fSigned;
1295 I32 m = mask();
1296 for (int i = count - 1; i >= 0; --i) {
1297 I32 v = POP().fSigned;
1298 for (int j = 0; j < VecWidth; ++j) {
1299 if (m[j]) {
1300 globals[target[j] + *(ip + i)].fSigned[j] = v[j];
1301 }
1302 }
1303 }
1304 ip += count;
1305 NEXT();
1306 }
1307
1308 VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1309 VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1310
1311 LABEL(kSwizzle) {
1312 VValue tmp[4];
1313 for (int i = READ8() - 1; i >= 0; --i) {
1314 tmp[i] = POP();
1315 }
1316 for (int i = READ8() - 1; i >= 0; --i) {
1317 PUSH(tmp[READ8()]);
1318 }
1319 NEXT();
1320 }
1321
1322 VECTOR_UNARY_FN_VEC(kTan, tanf)
1323
1324 LABEL(kWriteExternal4)
1325 LABEL(kWriteExternal3)
1326 LABEL(kWriteExternal2)
1327 LABEL(kWriteExternal) {
1328 int count = READ8();
1329 int target = READ8();
1330 float tmp[4];
1331 I32 m = mask();
1332 sp -= count;
1333 for (int i = 0; i < VecWidth; ++i) {
1334 if (m[i]) {
1335 for (int j = 0; j < count; ++j) {
1336 tmp[j] = sp[j + 1].fFloat[i];
1337 }
1338 byteCode->fExternalValues[target]->write(baseIndex + i, tmp);
1339 }
1340 }
1341 NEXT();
1342 }
1343
1344 LABEL(kMaskPush)
1345 condPtr[1] = POP().fSigned;
1346 maskPtr[1] = maskPtr[0] & condPtr[1];
1347 ++condPtr; ++maskPtr;
1348 NEXT();
1349 LABEL(kMaskPop)
1350 --condPtr; --maskPtr;
1351 NEXT();
1352 LABEL(kMaskNegate)
1353 maskPtr[0] = maskPtr[-1] & ~condPtr[0];
1354 NEXT();
1355 LABEL(kMaskBlend) {
1356 int count = READ8();
1357 I32 m = condPtr[0];
1358 --condPtr; --maskPtr;
1359 for (int i = 0; i < count; ++i) {
1360 sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat);
1361 --sp;
1362 }
1363 NEXT();
1364 }
1365 LABEL(kBranchIfAllFalse) {
1366 int target = READ16();
1367 if (!skvx::any(mask())) {
1368 ip = code + target;
1369 }
1370 NEXT();
1371 }
1372
1373 LABEL(kLoopBegin)
1374 contPtr[1] = 0;
1375 loopPtr[1] = loopPtr[0];
1376 ++contPtr; ++loopPtr;
1377 NEXT();
1378 LABEL(kLoopNext)
1379 *loopPtr |= *contPtr;
1380 *contPtr = 0;
1381 NEXT();
1382 LABEL(kLoopMask)
1383 *loopPtr &= POP().fSigned;
1384 NEXT();
1385 LABEL(kLoopEnd)
1386 --contPtr; --loopPtr;
1387 NEXT();
1388 LABEL(kLoopBreak)
1389 *loopPtr &= ~mask();
1390 NEXT();
1391 LABEL(kLoopContinue) {
1392 I32 m = mask();
1393 *contPtr |= m;
1394 *loopPtr &= ~m;
1395 NEXT();
1396 }
1397#ifdef SKSLC_THREADED_CODE
1398 #ifdef TRACE
1399 next:
1400 printf("at %3d (stack: %d) (disable threaded code for disassembly)\n",
1401 (int) (ip - code), (int) (sp - stack) + 1);
1402 goto *READ_INST();
1403 #endif
1404#else
1405 }
1406 }
1407#endif
1408}
1409
1410}; // class Interpreter
1411
1412#endif // SK_ENABLE_SKSL_INTERPRETER
1413
1414#undef spf
1415
1416void ByteCodeFunction::disassemble() const {
1417#if defined(SK_ENABLE_SKSL_INTERPRETER)
1418 const uint8_t* ip = fCode.data();
1419 while (ip < fCode.data() + fCode.size()) {
1420 printf("%d: ", (int)(ip - fCode.data()));
1421 ip = Interpreter::DisassembleInstruction(ip);
1422 printf("\n");
1423 }
1424#endif
1425}
1426
1427#define VECTOR_PREPROCESS(base) \
1428 case ByteCodeInstruction::base ## 4: \
1429 case ByteCodeInstruction::base ## 3: \
1430 case ByteCodeInstruction::base ## 2: \
1431 case ByteCodeInstruction::base: READ8(); break;
1432
1433#define VECTOR_PREPROCESS_NO_COUNT(base) \
1434 case ByteCodeInstruction::base ## 4: \
1435 case ByteCodeInstruction::base ## 3: \
1436 case ByteCodeInstruction::base ## 2: \
1437 case ByteCodeInstruction::base: break;
1438
1439#define VECTOR_MATRIX_PREPROCESS(base) \
1440 VECTOR_PREPROCESS(base) \
1441 case ByteCodeInstruction::base ## N: READ8(); break;
1442
1443#define VECTOR_MATRIX_PREPROCESS_NO_COUNT(base) \
1444 VECTOR_PREPROCESS_NO_COUNT(base) \
1445 case ByteCodeInstruction::base ## N: READ8(); break;
1446
1447void ByteCodeFunction::preprocess(const void* labels[]) {
1448#if defined(SK_ENABLE_SKSL_INTERPRETER)
1449#ifdef TRACE
1450 this->disassemble();
1451#endif
1452 uint8_t* ip = fCode.data();
1453 while (ip < fCode.data() + fCode.size()) {
1454 ByteCodeInstruction inst = (ByteCodeInstruction) (intptr_t) READ_INST();
1455 const void* label = labels[(int) inst];
1456 memcpy(ip - sizeof(instruction), &label, sizeof(label));
1457 switch (inst) {
1458 VECTOR_MATRIX_PREPROCESS(kAddF)
1459 VECTOR_PREPROCESS(kAddI)
1460 case ByteCodeInstruction::kAndB: break;
1461 case ByteCodeInstruction::kBranch: READ16(); break;
1462 case ByteCodeInstruction::kCall: READ8(); break;
1463 case ByteCodeInstruction::kCallExternal: {
1464 READ8();
1465 READ8();
1466 READ8();
1467 break;
1468 }
1469 case ByteCodeInstruction::kClampIndex: READ8(); break;
1470 VECTOR_PREPROCESS(kCompareIEQ)
1471 VECTOR_PREPROCESS(kCompareINEQ)
1472 VECTOR_MATRIX_PREPROCESS(kCompareFEQ)
1473 VECTOR_MATRIX_PREPROCESS(kCompareFNEQ)
1474 VECTOR_PREPROCESS(kCompareFGT)
1475 VECTOR_PREPROCESS(kCompareFGTEQ)
1476 VECTOR_PREPROCESS(kCompareFLT)
1477 VECTOR_PREPROCESS(kCompareFLTEQ)
1478 VECTOR_PREPROCESS(kCompareSGT)
1479 VECTOR_PREPROCESS(kCompareSGTEQ)
1480 VECTOR_PREPROCESS(kCompareSLT)
1481 VECTOR_PREPROCESS(kCompareSLTEQ)
1482 VECTOR_PREPROCESS(kCompareUGT)
1483 VECTOR_PREPROCESS(kCompareUGTEQ)
1484 VECTOR_PREPROCESS(kCompareULT)
1485 VECTOR_PREPROCESS(kCompareULTEQ)
1486 VECTOR_PREPROCESS_NO_COUNT(kConvertFtoI)
1487 VECTOR_PREPROCESS_NO_COUNT(kConvertStoF)
1488 VECTOR_PREPROCESS_NO_COUNT(kConvertUtoF)
1489 VECTOR_PREPROCESS(kCos)
1490 VECTOR_MATRIX_PREPROCESS(kDivideF)
1491 VECTOR_PREPROCESS(kDivideS)
1492 VECTOR_PREPROCESS(kDivideU)
1493 VECTOR_MATRIX_PREPROCESS(kDup)
1494
1495 case ByteCodeInstruction::kInverse2x2:
1496 case ByteCodeInstruction::kInverse3x3:
1497 case ByteCodeInstruction::kInverse4x4: break;
1498
1499 case ByteCodeInstruction::kLoad:
1500 case ByteCodeInstruction::kLoad2:
1501 case ByteCodeInstruction::kLoad3:
1502 case ByteCodeInstruction::kLoad4:
1503 case ByteCodeInstruction::kLoadGlobal:
1504 case ByteCodeInstruction::kLoadGlobal2:
1505 case ByteCodeInstruction::kLoadGlobal3:
1506 case ByteCodeInstruction::kLoadGlobal4:
1507 case ByteCodeInstruction::kLoadUniform:
1508 case ByteCodeInstruction::kLoadUniform2:
1509 case ByteCodeInstruction::kLoadUniform3:
1510 case ByteCodeInstruction::kLoadUniform4: READ16(); break;
1511
1512 case ByteCodeInstruction::kLoadSwizzle:
1513 case ByteCodeInstruction::kLoadSwizzleGlobal:
1514 case ByteCodeInstruction::kLoadSwizzleUniform: {
1515 READ8();
1516 int count = READ8();
1517 ip += count;
1518 break;
1519 }
1520
1521 case ByteCodeInstruction::kLoadExtended:
1522 case ByteCodeInstruction::kLoadExtendedGlobal:
1523 case ByteCodeInstruction::kLoadExtendedUniform:
1524 READ8();
1525 break;
1526
1527 case ByteCodeInstruction::kMatrixToMatrix: {
1528 READ8();
1529 READ8();
1530 READ8();
1531 READ8();
1532 break;
1533 }
1534 case ByteCodeInstruction::kMatrixMultiply: {
1535 READ8();
1536 READ8();
1537 READ8();
1538 break;
1539 }
1540 VECTOR_MATRIX_PREPROCESS(kMultiplyF)
1541 VECTOR_PREPROCESS(kMultiplyI)
1542 VECTOR_MATRIX_PREPROCESS_NO_COUNT(kNegateF)
1543 VECTOR_PREPROCESS_NO_COUNT(kNegateI)
1544 case ByteCodeInstruction::kNotB: break;
1545 case ByteCodeInstruction::kOrB: break;
1546 VECTOR_MATRIX_PREPROCESS_NO_COUNT(kPop)
1547 case ByteCodeInstruction::kPushImmediate: READ32(); break;
1548
1549 case ByteCodeInstruction::kReadExternal:
1550 case ByteCodeInstruction::kReadExternal2:
1551 case ByteCodeInstruction::kReadExternal3:
1552 case ByteCodeInstruction::kReadExternal4: READ16(); break;
1553
1554 VECTOR_PREPROCESS(kRemainderF)
1555 VECTOR_PREPROCESS(kRemainderS)
1556 VECTOR_PREPROCESS(kRemainderU)
1557 case ByteCodeInstruction::kReserve: READ8(); break;
1558 case ByteCodeInstruction::kReturn: READ8(); break;
1559 case ByteCodeInstruction::kScalarToMatrix: READ8(); READ8(); break;
1560 case ByteCodeInstruction::kShiftLeft: READ8(); break;
1561 case ByteCodeInstruction::kShiftRightS: READ8(); break;
1562 case ByteCodeInstruction::kShiftRightU: READ8(); break;
1563 VECTOR_PREPROCESS(kSin)
1564 VECTOR_PREPROCESS_NO_COUNT(kSqrt)
1565
1566 case ByteCodeInstruction::kStore:
1567 case ByteCodeInstruction::kStore2:
1568 case ByteCodeInstruction::kStore3:
1569 case ByteCodeInstruction::kStore4:
1570 case ByteCodeInstruction::kStoreGlobal:
1571 case ByteCodeInstruction::kStoreGlobal2:
1572 case ByteCodeInstruction::kStoreGlobal3:
1573 case ByteCodeInstruction::kStoreGlobal4: READ8(); break;
1574
1575 case ByteCodeInstruction::kStoreSwizzle:
1576 case ByteCodeInstruction::kStoreSwizzleGlobal: {
1577 READ8();
1578 int count = READ8();
1579 ip += count;
1580 break;
1581 }
1582
1583 case ByteCodeInstruction::kStoreSwizzleIndirect:
1584 case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1585 int count = READ8();
1586 ip += count;
1587 break;
1588 }
1589
1590 case ByteCodeInstruction::kStoreExtended: READ8(); break;
1591 case ByteCodeInstruction::kStoreExtendedGlobal: READ8(); break;
1592
1593 VECTOR_MATRIX_PREPROCESS(kSubtractF)
1594 VECTOR_PREPROCESS(kSubtractI)
1595
1596 case ByteCodeInstruction::kSwizzle: {
1597 READ8();
1598 int count = READ8();
1599 ip += count;
1600 break;
1601 }
1602 VECTOR_PREPROCESS(kTan)
1603 case ByteCodeInstruction::kWriteExternal:
1604 case ByteCodeInstruction::kWriteExternal2:
1605 case ByteCodeInstruction::kWriteExternal3:
1606 case ByteCodeInstruction::kWriteExternal4: READ16(); break;
1607
1608 case ByteCodeInstruction::kXorB: break;
1609 case ByteCodeInstruction::kMaskPush: break;
1610 case ByteCodeInstruction::kMaskPop: break;
1611 case ByteCodeInstruction::kMaskNegate: break;
1612 case ByteCodeInstruction::kMaskBlend: READ8(); break;
1613 case ByteCodeInstruction::kBranchIfAllFalse: READ16(); break;
1614 case ByteCodeInstruction::kLoopBegin: break;
1615 case ByteCodeInstruction::kLoopNext: break;
1616 case ByteCodeInstruction::kLoopMask: break;
1617 case ByteCodeInstruction::kLoopEnd: break;
1618 case ByteCodeInstruction::kLoopContinue: break;
1619 case ByteCodeInstruction::kLoopBreak: break;
1620 default:
1621 ip -= 2;
1622 printf("unknown(%d)\n", READ16());
1623 SkASSERT(false);
1624 }
1625 }
1626#endif
1627}
1628
1629bool ByteCode::run(const ByteCodeFunction* f,
1630 float* args, int argCount,
1631 float* outReturn, int returnCount,
1632 const float* uniforms, int uniformCount) const {
1633#if defined(SK_ENABLE_SKSL_INTERPRETER)
1634 Interpreter::VValue stack[128];
1635 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1636 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1637 return false;
1638 }
1639
1640 if (argCount != f->fParameterCount ||
1641 returnCount != f->fReturnCount ||
1642 uniformCount != fUniformSlotCount) {
1643 return false;
1644 }
1645
1646 Interpreter::VValue globals[32];
1647 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1648 return false;
1649 }
1650
1651 // Transpose args into stack
1652 {
1653 float* src = args;
1654 float* dst = (float*)stack;
1655 for (int i = 0; i < argCount; ++i) {
1656 *dst = *src++;
1657 dst += VecWidth;
1658 }
1659 }
1660
1661 bool stripedOutput = false;
1662 float** outArray = outReturn ? &outReturn : nullptr;
1663 if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) {
1664 return false;
1665 }
1666
1667 // Transpose out parameters back
1668 {
1669 float* dst = args;
1670 float* src = (float*)stack;
1671 for (const auto& p : f->fParameters) {
1672 if (p.fIsOutParameter) {
1673 for (int i = p.fSlotCount; i > 0; --i) {
1674 *dst++ = *src;
1675 src += VecWidth;
1676 }
1677 } else {
1678 dst += p.fSlotCount;
1679 src += p.fSlotCount * VecWidth;
1680 }
1681 }
1682 }
1683
1684 return true;
1685#else
1686 SkDEBUGFAIL("ByteCode interpreter not enabled");
1687 return false;
1688#endif
1689}
1690
1691bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1692 float* args[], int argCount,
1693 float* outReturn[], int returnCount,
1694 const float* uniforms, int uniformCount) const {
1695#if defined(SK_ENABLE_SKSL_INTERPRETER)
1696 Interpreter::VValue stack[128];
1697 int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1698 if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1699 return false;
1700 }
1701
1702 if (argCount != f->fParameterCount ||
1703 returnCount != f->fReturnCount ||
1704 uniformCount != fUniformSlotCount) {
1705 return false;
1706 }
1707
1708 Interpreter::VValue globals[32];
1709 if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1710 return false;
1711 }
1712
1713 // innerRun just takes outArgs, so clear it if the count is zero
1714 if (returnCount == 0) {
1715 outReturn = nullptr;
1716 }
1717
1718 int baseIndex = 0;
1719
1720 while (N) {
1721 int w = std::min(N, VecWidth);
1722
1723 // Copy args into stack
1724 for (int i = 0; i < argCount; ++i) {
1725 memcpy((void*)(stack + i), args[i], w * sizeof(float));
1726 }
1727
1728 bool stripedOutput = true;
1729 if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1730 baseIndex)) {
1731 return false;
1732 }
1733
1734 // Copy out parameters back
1735 int slot = 0;
1736 for (const auto& p : f->fParameters) {
1737 if (p.fIsOutParameter) {
1738 for (int i = slot; i < slot + p.fSlotCount; ++i) {
1739 memcpy(args[i], stack + i, w * sizeof(float));
1740 }
1741 }
1742 slot += p.fSlotCount;
1743 }
1744
1745 // Step each argument pointer ahead
1746 for (int i = 0; i < argCount; ++i) {
1747 args[i] += w;
1748 }
1749 N -= w;
1750 baseIndex += w;
1751 }
1752
1753 return true;
1754#else
1755 SkDEBUGFAIL("ByteCode interpreter not enabled");
1756 return false;
1757#endif
1758}
1759
1760} // namespace SkSL
1761
1762#endif
1763