1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #ifndef SKSL_STANDALONE |
9 | |
10 | #include "include/core/SkPoint3.h" |
11 | #include "include/private/SkVx.h" |
12 | #include "src/core/SkUtils.h" // sk_unaligned_load |
13 | #include "src/sksl/SkSLByteCode.h" |
14 | #include "src/sksl/SkSLByteCodeGenerator.h" |
15 | #include "src/sksl/SkSLExternalValue.h" |
16 | |
17 | #include <functional> |
18 | #include <vector> |
19 | |
20 | namespace SkSL { |
21 | |
22 | #if defined(SK_ENABLE_SKSL_INTERPRETER) |
23 | |
24 | constexpr int VecWidth = ByteCode::kVecWidth; |
25 | |
26 | struct Interpreter { |
27 | |
28 | using F32 = skvx::Vec<VecWidth, float>; |
29 | using I32 = skvx::Vec<VecWidth, int32_t>; |
30 | using U32 = skvx::Vec<VecWidth, uint32_t>; |
31 | |
32 | #define READ8() (*(ip++)) |
33 | #define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2)) |
34 | #define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4)) |
35 | #define READ_INST() (ip += sizeof(ByteCodeInstruction), \ |
36 | sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction))) |
37 | |
38 | #define DISASSEMBLE_COUNT(op, text) \ |
39 | case ByteCodeInstruction::op: printf(text " %d", READ8()); break; |
40 | |
41 | #define DISASSEMBLE_COUNT_SLOT(op, text) \ |
42 | case ByteCodeInstruction::op: { \ |
43 | int N = READ8(), \ |
44 | slot = READ8(); \ |
45 | printf(text " %d [%d]", N, slot); \ |
46 | } break; |
47 | |
48 | static const uint8_t* DisassembleInstruction(const uint8_t* ip) { |
49 | auto inst = READ_INST(); |
50 | printf("%02x " , (int)inst); |
51 | switch (inst) { |
52 | DISASSEMBLE_COUNT(kAddF, "addf" ) |
53 | DISASSEMBLE_COUNT(kAddI, "addi" ) |
54 | DISASSEMBLE_COUNT(kAndB, "andb" ) |
55 | DISASSEMBLE_COUNT(kATan, "atan" ) |
56 | case ByteCodeInstruction::kBranch: printf("branch %d" , READ16()); break; |
57 | case ByteCodeInstruction::kCall: printf("call %d" , READ8()); break; |
58 | case ByteCodeInstruction::kCallExternal: { |
59 | int argumentCount = READ8(); |
60 | int returnCount = READ8(); |
61 | int externalValue = READ8(); |
62 | printf("callexternal %d, %d, %d" , argumentCount, returnCount, externalValue); |
63 | break; |
64 | } |
65 | DISASSEMBLE_COUNT(kCeil, "ceil" ) |
66 | case ByteCodeInstruction::kClampIndex: printf("clampindex %d" , READ8()); break; |
67 | DISASSEMBLE_COUNT(kCompareIEQ, "compareieq" ) |
68 | DISASSEMBLE_COUNT(kCompareINEQ, "compareineq" ) |
69 | DISASSEMBLE_COUNT(kCompareFEQ, "comparefeq" ) |
70 | DISASSEMBLE_COUNT(kCompareFNEQ, "comparefneq" ) |
71 | DISASSEMBLE_COUNT(kCompareFGT, "comparefgt" ) |
72 | DISASSEMBLE_COUNT(kCompareFGTEQ, "comparefgteq" ) |
73 | DISASSEMBLE_COUNT(kCompareFLT, "compareflt" ) |
74 | DISASSEMBLE_COUNT(kCompareFLTEQ, "compareflteq" ) |
75 | DISASSEMBLE_COUNT(kCompareSGT, "comparesgt" ) |
76 | DISASSEMBLE_COUNT(kCompareSGTEQ, "comparesgteq" ) |
77 | DISASSEMBLE_COUNT(kCompareSLT, "compareslt" ) |
78 | DISASSEMBLE_COUNT(kCompareSLTEQ, "compareslteq" ) |
79 | DISASSEMBLE_COUNT(kCompareUGT, "compareugt" ) |
80 | DISASSEMBLE_COUNT(kCompareUGTEQ, "compareugteq" ) |
81 | DISASSEMBLE_COUNT(kCompareULT, "compareult" ) |
82 | DISASSEMBLE_COUNT(kCompareULTEQ, "compareulteq" ) |
83 | DISASSEMBLE_COUNT(kConvertFtoI, "convertftoi" ) |
84 | DISASSEMBLE_COUNT(kConvertStoF, "convertstof" ) |
85 | DISASSEMBLE_COUNT(kConvertUtoF, "convertutof" ) |
86 | DISASSEMBLE_COUNT(kCos, "cos" ) |
87 | DISASSEMBLE_COUNT(kDivideF, "dividef" ) |
88 | DISASSEMBLE_COUNT(kDivideS, "divideS" ) |
89 | DISASSEMBLE_COUNT(kDivideU, "divideu" ) |
90 | DISASSEMBLE_COUNT(kDup, "dup" ) |
91 | DISASSEMBLE_COUNT(kFloor, "floor" ) |
92 | DISASSEMBLE_COUNT(kFract, "fract" ) |
93 | case ByteCodeInstruction::kInverse2x2: printf("inverse2x2" ); break; |
94 | case ByteCodeInstruction::kInverse3x3: printf("inverse3x3" ); break; |
95 | case ByteCodeInstruction::kInverse4x4: printf("inverse4x4" ); break; |
96 | DISASSEMBLE_COUNT(kLerp, "lerp" ) |
97 | DISASSEMBLE_COUNT_SLOT(kLoad, "load" ) |
98 | DISASSEMBLE_COUNT_SLOT(kLoadGlobal, "loadglobal" ) |
99 | DISASSEMBLE_COUNT_SLOT(kLoadUniform, "loaduniform" ) |
100 | DISASSEMBLE_COUNT(kLoadExtended, "loadextended" ) |
101 | DISASSEMBLE_COUNT(kLoadExtendedGlobal, "loadextendedglobal" ) |
102 | DISASSEMBLE_COUNT(kLoadExtendedUniform, "loadextendeduniform" ) |
103 | case ByteCodeInstruction::kLoadFragCoord: printf("loadfragcoord" ); break; |
104 | case ByteCodeInstruction::kMatrixToMatrix: { |
105 | int srcCols = READ8(); |
106 | int srcRows = READ8(); |
107 | int dstCols = READ8(); |
108 | int dstRows = READ8(); |
109 | printf("matrixtomatrix %dx%d %dx%d" , srcCols, srcRows, dstCols, dstRows); |
110 | break; |
111 | } |
112 | case ByteCodeInstruction::kMatrixMultiply: { |
113 | int lCols = READ8(); |
114 | int lRows = READ8(); |
115 | int rCols = READ8(); |
116 | printf("matrixmultiply %dx%d %dx%d" , lCols, lRows, rCols, lCols); |
117 | break; |
118 | } |
119 | DISASSEMBLE_COUNT(kMaxF, "maxf" ) |
120 | DISASSEMBLE_COUNT(kMaxS, "maxs" ) |
121 | DISASSEMBLE_COUNT(kMinF, "minf" ) |
122 | DISASSEMBLE_COUNT(kMinS, "mins" ) |
123 | DISASSEMBLE_COUNT(kMix, "mix" ) |
124 | DISASSEMBLE_COUNT(kMultiplyF, "multiplyf" ) |
125 | DISASSEMBLE_COUNT(kMultiplyI, "multiplyi" ) |
126 | DISASSEMBLE_COUNT(kNegateF, "negatef" ) |
127 | DISASSEMBLE_COUNT(kNegateI, "negatei" ) |
128 | DISASSEMBLE_COUNT(kNotB, "notb" ) |
129 | DISASSEMBLE_COUNT(kOrB, "orb" ) |
130 | DISASSEMBLE_COUNT(kPop, "pop" ) |
131 | DISASSEMBLE_COUNT(kPow, "pow" ) |
132 | case ByteCodeInstruction::kPushImmediate: { |
133 | uint32_t v = READ32(); |
134 | union { uint32_t u; float f; } pun = { v }; |
135 | printf("pushimmediate %s" , (to_string(v) + "(" + to_string(pun.f) + ")" ).c_str()); |
136 | break; |
137 | } |
138 | DISASSEMBLE_COUNT_SLOT(kReadExternal, "readexternal" ) |
139 | DISASSEMBLE_COUNT(kRemainderF, "remainderf" ) |
140 | DISASSEMBLE_COUNT(kRemainderS, "remainders" ) |
141 | DISASSEMBLE_COUNT(kRemainderU, "remainderu" ) |
142 | DISASSEMBLE_COUNT(kReserve, "reserve" ) |
143 | DISASSEMBLE_COUNT(kReturn, "return" ) |
144 | case ByteCodeInstruction::kSample: printf("sample %d" , READ8()); break; |
145 | case ByteCodeInstruction::kSampleExplicit: printf("sampleExplicit %d" , READ8()); break; |
146 | case ByteCodeInstruction::kSampleMatrix: printf("sampleMatrix %d" , READ8()); break; |
147 | case ByteCodeInstruction::kScalarToMatrix: { |
148 | int cols = READ8(); |
149 | int rows = READ8(); |
150 | printf("scalartomatrix %dx%d" , cols, rows); |
151 | break; |
152 | } |
153 | case ByteCodeInstruction::kShiftLeft: printf("shl %d" , READ8()); break; |
154 | case ByteCodeInstruction::kShiftRightS: printf("shrs %d" , READ8()); break; |
155 | case ByteCodeInstruction::kShiftRightU: printf("shru %d" , READ8()); break; |
156 | DISASSEMBLE_COUNT(kSin, "sin" ) |
157 | DISASSEMBLE_COUNT(kSqrt, "sqrt" ) |
158 | DISASSEMBLE_COUNT_SLOT(kStore, "store" ) |
159 | DISASSEMBLE_COUNT_SLOT(kStoreGlobal, "storeglobal" ) |
160 | DISASSEMBLE_COUNT(kStoreExtended, "storeextended" ) |
161 | DISASSEMBLE_COUNT(kStoreExtendedGlobal, "storeextendedglobal" ) |
162 | DISASSEMBLE_COUNT(kSubtractF, "subtractf" ) |
163 | DISASSEMBLE_COUNT(kSubtractI, "subtracti" ) |
164 | case ByteCodeInstruction::kSwizzle: { |
165 | printf("swizzle %d, " , READ8()); |
166 | int count = READ8(); |
167 | printf("%d" , count); |
168 | for (int i = 0; i < count; ++i) { |
169 | printf(", %d" , READ8()); |
170 | } |
171 | break; |
172 | } |
173 | DISASSEMBLE_COUNT(kTan, "tan" ) |
174 | DISASSEMBLE_COUNT_SLOT(kWriteExternal, "writeexternal" ) |
175 | DISASSEMBLE_COUNT(kXorB, "xorb" ) |
176 | case ByteCodeInstruction::kMaskPush: printf("maskpush" ); break; |
177 | case ByteCodeInstruction::kMaskPop: printf("maskpop" ); break; |
178 | case ByteCodeInstruction::kMaskNegate: printf("masknegate" ); break; |
179 | case ByteCodeInstruction::kMaskBlend: printf("maskblend %d" , READ8()); break; |
180 | case ByteCodeInstruction::kBranchIfAllFalse: |
181 | printf("branchifallfalse %d" , READ16()); |
182 | break; |
183 | case ByteCodeInstruction::kLoopBegin: printf("loopbegin" ); break; |
184 | case ByteCodeInstruction::kLoopNext: printf("loopnext" ); break; |
185 | case ByteCodeInstruction::kLoopMask: printf("loopmask" ); break; |
186 | case ByteCodeInstruction::kLoopEnd: printf("loopend" ); break; |
187 | case ByteCodeInstruction::kLoopContinue: printf("loopcontinue" ); break; |
188 | case ByteCodeInstruction::kLoopBreak: printf("loopbreak" ); break; |
189 | default: |
190 | ip -= sizeof(ByteCodeInstruction); |
191 | printf("unknown(%d)\n" , (int) (intptr_t) READ_INST()); |
192 | SkASSERT(false); |
193 | } |
194 | return ip; |
195 | } |
196 | |
197 | // A naive implementation of / or % using skvx operations will likely crash with a divide by zero |
198 | // in inactive vector lanes, so we need to be sure to avoid masked-off lanes. |
199 | // TODO: Would it be better to do this with a select of (lane, 1) based on mask? |
200 | #define VECTOR_BINARY_MASKED_OP(inst, field, op) \ |
201 | case ByteCodeInstruction::inst: { \ |
202 | int count = READ8(); \ |
203 | for (int i = count; i > 0; --i) { \ |
204 | for (int j = 0; j < VecWidth; ++j) { \ |
205 | if (mask()[j]) { \ |
206 | sp[-count].field[j] op ## = sp[0].field[j]; \ |
207 | } \ |
208 | } \ |
209 | POP(); \ |
210 | } \ |
211 | } continue; |
212 | |
213 | #define VECTOR_BINARY_OP(inst, field, op) \ |
214 | case ByteCodeInstruction::inst: { \ |
215 | int count = READ8(); \ |
216 | for (int i = count; i > 0; --i) { \ |
217 | sp[-count] = sp[-count].field op sp[0].field; \ |
218 | POP(); \ |
219 | } \ |
220 | } continue; |
221 | |
222 | #define VECTOR_BINARY_FN(inst, field, fn) \ |
223 | case ByteCodeInstruction::inst: { \ |
224 | int count = READ8(); \ |
225 | for (int i = count; i > 0; --i) { \ |
226 | sp[-count] = fn(sp[-count].field, sp[0].field); \ |
227 | POP(); \ |
228 | } \ |
229 | } continue; |
230 | |
231 | #define VECTOR_UNARY_FN(inst, fn, field) \ |
232 | case ByteCodeInstruction::inst: { \ |
233 | int count = READ8(); \ |
234 | for (int i = count; i --> 0; ) { \ |
235 | sp[-i] = fn(sp[-i].field); \ |
236 | } \ |
237 | } continue; |
238 | |
239 | union VValue { |
240 | VValue() {} |
241 | VValue(F32 f) : fFloat(f) {} |
242 | VValue(I32 s) : fSigned(s) {} |
243 | VValue(U32 u) : fUnsigned(u) {} |
244 | |
245 | F32 fFloat; |
246 | I32 fSigned; |
247 | U32 fUnsigned; |
248 | }; |
249 | |
250 | struct StackFrame { |
251 | const uint8_t* fCode; |
252 | const uint8_t* fIP; |
253 | VValue* fStack; |
254 | int fParameterCount; |
255 | }; |
256 | |
257 | static F32 VecMod(F32 a, F32 b) { |
258 | return a - skvx::trunc(a / b) * b; |
259 | } |
260 | |
261 | #define spf(index) sp[index].fFloat |
262 | |
263 | static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp, |
264 | int baseIndex, I32 mask) { |
265 | int argumentCount = READ8(); |
266 | int returnCount = READ8(); |
267 | int target = READ8(); |
268 | ExternalValue* v = byteCode->fExternalValues[target]; |
269 | sp -= argumentCount - 1; |
270 | |
271 | float tmpArgs[4]; |
272 | float tmpReturn[4]; |
273 | SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs)); |
274 | SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn)); |
275 | |
276 | for (int i = 0; i < VecWidth; ++i) { |
277 | if (mask[i]) { |
278 | for (int j = 0; j < argumentCount; ++j) { |
279 | tmpArgs[j] = sp[j].fFloat[i]; |
280 | } |
281 | v->call(baseIndex + i, tmpArgs, tmpReturn); |
282 | for (int j = 0; j < returnCount; ++j) { |
283 | sp[j].fFloat[i] = tmpReturn[j]; |
284 | } |
285 | } |
286 | } |
287 | sp += returnCount - 1; |
288 | } |
289 | |
290 | static void Inverse2x2(VValue* sp) { |
291 | F32 a = sp[-3].fFloat, |
292 | b = sp[-2].fFloat, |
293 | c = sp[-1].fFloat, |
294 | d = sp[ 0].fFloat; |
295 | F32 idet = F32(1) / (a*d - b*c); |
296 | sp[-3].fFloat = d * idet; |
297 | sp[-2].fFloat = -b * idet; |
298 | sp[-1].fFloat = -c * idet; |
299 | sp[ 0].fFloat = a * idet; |
300 | } |
301 | |
302 | static void Inverse3x3(VValue* sp) { |
303 | F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat, |
304 | a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat, |
305 | a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat; |
306 | F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - |
307 | a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31); |
308 | sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet; |
309 | sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet; |
310 | sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet; |
311 | sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet; |
312 | sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet; |
313 | sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet; |
314 | sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet; |
315 | sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet; |
316 | sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet; |
317 | } |
318 | |
319 | static void Inverse4x4(VValue* sp) { |
320 | F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3), |
321 | a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2), |
322 | a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1), |
323 | a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0); |
324 | |
325 | F32 b00 = a00 * a11 - a01 * a10, |
326 | b01 = a00 * a12 - a02 * a10, |
327 | b02 = a00 * a13 - a03 * a10, |
328 | b03 = a01 * a12 - a02 * a11, |
329 | b04 = a01 * a13 - a03 * a11, |
330 | b05 = a02 * a13 - a03 * a12, |
331 | b06 = a20 * a31 - a21 * a30, |
332 | b07 = a20 * a32 - a22 * a30, |
333 | b08 = a20 * a33 - a23 * a30, |
334 | b09 = a21 * a32 - a22 * a31, |
335 | b10 = a21 * a33 - a23 * a31, |
336 | b11 = a22 * a33 - a23 * a32; |
337 | |
338 | F32 idet = F32(1) / |
339 | (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); |
340 | |
341 | b00 *= idet; |
342 | b01 *= idet; |
343 | b02 *= idet; |
344 | b03 *= idet; |
345 | b04 *= idet; |
346 | b05 *= idet; |
347 | b06 *= idet; |
348 | b07 *= idet; |
349 | b08 *= idet; |
350 | b09 *= idet; |
351 | b10 *= idet; |
352 | b11 *= idet; |
353 | |
354 | spf(-15) = a11 * b11 - a12 * b10 + a13 * b09; |
355 | spf(-14) = a02 * b10 - a01 * b11 - a03 * b09; |
356 | spf(-13) = a31 * b05 - a32 * b04 + a33 * b03; |
357 | spf(-12) = a22 * b04 - a21 * b05 - a23 * b03; |
358 | spf(-11) = a12 * b08 - a10 * b11 - a13 * b07; |
359 | spf(-10) = a00 * b11 - a02 * b08 + a03 * b07; |
360 | spf( -9) = a32 * b02 - a30 * b05 - a33 * b01; |
361 | spf( -8) = a20 * b05 - a22 * b02 + a23 * b01; |
362 | spf( -7) = a10 * b10 - a11 * b08 + a13 * b06; |
363 | spf( -6) = a01 * b08 - a00 * b10 - a03 * b06; |
364 | spf( -5) = a30 * b04 - a31 * b02 + a33 * b00; |
365 | spf( -4) = a21 * b02 - a20 * b04 - a23 * b00; |
366 | spf( -3) = a11 * b07 - a10 * b09 - a12 * b06; |
367 | spf( -2) = a00 * b09 - a01 * b07 + a02 * b06; |
368 | spf( -1) = a31 * b01 - a30 * b03 - a32 * b00; |
369 | spf( 0) = a20 * b03 - a21 * b01 + a22 * b00; |
370 | } |
371 | |
372 | static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack, |
373 | float* outReturn[], VValue globals[], const float uniforms[], |
374 | bool stripedOutput, int N, int baseIndex) { |
375 | // Needs to be the first N non-negative integers, at least as large as VecWidth |
376 | static const Interpreter::I32 gLanes = { |
377 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 |
378 | }; |
379 | |
380 | VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1; |
381 | |
382 | #define POP() (*(sp--)) |
383 | #define PUSH(v) (sp[1] = v, ++sp) |
384 | |
385 | const uint8_t* code = f->fCode.data(); |
386 | const uint8_t* ip = code; |
387 | std::vector<StackFrame> frames; |
388 | |
389 | I32 condStack[16]; // Independent condition masks |
390 | I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...) |
391 | I32 contStack[16]; // Continue flags for loops |
392 | I32 loopStack[16]; // Loop execution masks |
393 | condStack[0] = maskStack[0] = (gLanes < N); |
394 | contStack[0] = I32( 0); |
395 | loopStack[0] = I32(~0); |
396 | I32* condPtr = condStack; |
397 | I32* maskPtr = maskStack; |
398 | I32* contPtr = contStack; |
399 | I32* loopPtr = loopStack; |
400 | |
401 | if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) || |
402 | f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) { |
403 | return false; |
404 | } |
405 | |
406 | auto mask = [&]() { return *maskPtr & *loopPtr; }; |
407 | |
408 | for (;;) { |
409 | #ifdef TRACE |
410 | printf("at %3d " , (int) (ip - code)); |
411 | disassemble_instruction(ip); |
412 | printf(" (stack: %d)\n" , (int) (sp - stack) + 1); |
413 | #endif |
414 | ByteCodeInstruction inst = READ_INST(); |
415 | switch (inst) { |
416 | |
417 | VECTOR_BINARY_OP(kAddF, fFloat, +) |
418 | VECTOR_BINARY_OP(kAddI, fSigned, +) |
419 | |
420 | // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want: |
421 | VECTOR_BINARY_OP(kAndB, fSigned, &) |
422 | VECTOR_BINARY_OP(kOrB, fSigned, |) |
423 | VECTOR_BINARY_OP(kXorB, fSigned, ^) |
424 | VECTOR_UNARY_FN(kNotB, std::bit_not<>{}, fSigned) |
425 | |
426 | case ByteCodeInstruction::kBranch: |
427 | ip = code + READ16(); |
428 | continue; |
429 | |
430 | case ByteCodeInstruction::kCall: { |
431 | // Precursor code reserved space for the return value, and pushed all parameters to |
432 | // the stack. Update our bottom of stack to point at the first parameter, and our |
433 | // sp to point past those parameters (plus space for locals). |
434 | int target = READ8(); |
435 | const ByteCodeFunction* f = byteCode->fFunctions[target].get(); |
436 | if (skvx::any(mask())) { |
437 | frames.push_back({ code, ip, stack, f->fParameterCount }); |
438 | ip = code = f->fCode.data(); |
439 | stack = sp - f->fParameterCount + 1; |
440 | sp = stack + f->fParameterCount + f->fLocalCount - 1; |
441 | // As we did in runStriped(), zero locals so they're safe to mask-store into. |
442 | for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) { |
443 | stack[i].fFloat = 0.0f; |
444 | } |
445 | } |
446 | } continue; |
447 | |
448 | case ByteCodeInstruction::kCallExternal: |
449 | CallExternal(byteCode, ip, sp, baseIndex, mask()); |
450 | continue; |
451 | |
452 | VECTOR_UNARY_FN(kCeil, skvx::ceil, fFloat) |
453 | |
454 | case ByteCodeInstruction::kClampIndex: { |
455 | int length = READ8(); |
456 | if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) { |
457 | return false; |
458 | } |
459 | } continue; |
460 | |
461 | VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==) |
462 | VECTOR_BINARY_OP(kCompareFEQ, fFloat, ==) |
463 | VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=) |
464 | VECTOR_BINARY_OP(kCompareFNEQ, fFloat, !=) |
465 | VECTOR_BINARY_OP(kCompareSGT, fSigned, >) |
466 | VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >) |
467 | VECTOR_BINARY_OP(kCompareFGT, fFloat, >) |
468 | VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=) |
469 | VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=) |
470 | VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=) |
471 | VECTOR_BINARY_OP(kCompareSLT, fSigned, <) |
472 | VECTOR_BINARY_OP(kCompareULT, fUnsigned, <) |
473 | VECTOR_BINARY_OP(kCompareFLT, fFloat, <) |
474 | VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=) |
475 | VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=) |
476 | VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=) |
477 | |
478 | VECTOR_UNARY_FN(kConvertFtoI, skvx::cast<int>, fFloat) |
479 | VECTOR_UNARY_FN(kConvertStoF, skvx::cast<float>, fSigned) |
480 | VECTOR_UNARY_FN(kConvertUtoF, skvx::cast<float>, fUnsigned) |
481 | |
482 | VECTOR_UNARY_FN(kCos, skvx::cos, fFloat) |
483 | |
484 | VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /) |
485 | VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /) |
486 | VECTOR_BINARY_OP(kDivideF, fFloat, /) |
487 | |
488 | case ByteCodeInstruction::kDup: { |
489 | int count = READ8(); |
490 | memcpy(sp + 1, sp - count + 1, count * sizeof(VValue)); |
491 | sp += count; |
492 | } continue; |
493 | |
494 | VECTOR_UNARY_FN(kFloor, skvx::floor, fFloat) |
495 | VECTOR_UNARY_FN(kFract, skvx::fract, fFloat) |
496 | |
497 | case ByteCodeInstruction::kInverse2x2: |
498 | Inverse2x2(sp); |
499 | continue; |
500 | case ByteCodeInstruction::kInverse3x3: |
501 | Inverse3x3(sp); |
502 | continue; |
503 | case ByteCodeInstruction::kInverse4x4: |
504 | Inverse4x4(sp); |
505 | continue; |
506 | |
507 | case ByteCodeInstruction::kLerp: { |
508 | int count = READ8(); |
509 | VValue* T = sp - count + 1, |
510 | * B = T - count, |
511 | * A = B - count; |
512 | for (int i = count; i --> 0; ) { |
513 | A[i].fFloat += (B[i].fFloat - A[i].fFloat) * T[i].fFloat; |
514 | } |
515 | sp -= 2 * count; |
516 | } continue; |
517 | |
518 | case ByteCodeInstruction::kLoad: { |
519 | int count = READ8(), |
520 | slot = READ8(); |
521 | memcpy(sp + 1, stack + slot, count * sizeof(VValue)); |
522 | sp += count; |
523 | } continue; |
524 | |
525 | case ByteCodeInstruction::kLoadGlobal: { |
526 | int count = READ8(), |
527 | slot = READ8(); |
528 | memcpy(sp + 1, globals + slot, count * sizeof(VValue)); |
529 | sp += count; |
530 | } continue; |
531 | |
532 | case ByteCodeInstruction::kLoadUniform: { |
533 | int count = READ8(), |
534 | slot = READ8(); |
535 | for (int i = 0; i < count; ++i) { |
536 | sp[i + 1].fFloat = uniforms[slot + i]; |
537 | } |
538 | sp += count; |
539 | } continue; |
540 | |
541 | case ByteCodeInstruction::kLoadExtended: { |
542 | int count = READ8(); |
543 | I32 src = POP().fSigned; |
544 | I32 m = mask(); |
545 | for (int i = 0; i < count; ++i) { |
546 | for (int j = 0; j < VecWidth; ++j) { |
547 | if (m[j]) { |
548 | sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j]; |
549 | } |
550 | } |
551 | } |
552 | sp += count; |
553 | } continue; |
554 | |
555 | case ByteCodeInstruction::kLoadExtendedGlobal: { |
556 | int count = READ8(); |
557 | I32 src = POP().fSigned; |
558 | I32 m = mask(); |
559 | for (int i = 0; i < count; ++i) { |
560 | for (int j = 0; j < VecWidth; ++j) { |
561 | if (m[j]) { |
562 | sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j]; |
563 | } |
564 | } |
565 | } |
566 | sp += count; |
567 | } continue; |
568 | |
569 | case ByteCodeInstruction::kLoadExtendedUniform: { |
570 | int count = READ8(); |
571 | I32 src = POP().fSigned; |
572 | I32 m = mask(); |
573 | for (int i = 0; i < count; ++i) { |
574 | for (int j = 0; j < VecWidth; ++j) { |
575 | if (m[j]) { |
576 | sp[i + 1].fFloat[j] = uniforms[src[j] + i]; |
577 | } |
578 | } |
579 | } |
580 | sp += count; |
581 | } continue; |
582 | |
583 | case ByteCodeInstruction::kMatrixToMatrix: { |
584 | int srcCols = READ8(); |
585 | int srcRows = READ8(); |
586 | int dstCols = READ8(); |
587 | int dstRows = READ8(); |
588 | SkASSERT(srcCols >= 2 && srcCols <= 4); |
589 | SkASSERT(srcRows >= 2 && srcRows <= 4); |
590 | SkASSERT(dstCols >= 2 && dstCols <= 4); |
591 | SkASSERT(dstRows >= 2 && dstRows <= 4); |
592 | F32 tmp[16]; |
593 | memset(tmp, 0, sizeof(tmp)); |
594 | tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f); |
595 | for (int c = srcCols - 1; c >= 0; --c) { |
596 | for (int r = srcRows - 1; r >= 0; --r) { |
597 | tmp[c*4 + r] = POP().fFloat; |
598 | } |
599 | } |
600 | for (int c = 0; c < dstCols; ++c) { |
601 | for (int r = 0; r < dstRows; ++r) { |
602 | PUSH(tmp[c*4 + r]); |
603 | } |
604 | } |
605 | } continue; |
606 | |
607 | case ByteCodeInstruction::kMatrixMultiply: { |
608 | int lCols = READ8(); |
609 | int lRows = READ8(); |
610 | int rCols = READ8(); |
611 | int rRows = lCols; |
612 | F32 tmp[16] = { 0.0f }; |
613 | F32* B = &(sp - (rCols * rRows) + 1)->fFloat; |
614 | F32* A = B - (lCols * lRows); |
615 | for (int c = 0; c < rCols; ++c) { |
616 | for (int r = 0; r < lRows; ++r) { |
617 | for (int j = 0; j < lCols; ++j) { |
618 | tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j]; |
619 | } |
620 | } |
621 | } |
622 | sp -= (lCols * lRows) + (rCols * rRows); |
623 | memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue)); |
624 | sp += (rCols * lRows); |
625 | } continue; |
626 | |
627 | VECTOR_BINARY_FN(kMaxF, fFloat, skvx::max) |
628 | VECTOR_BINARY_FN(kMaxS, fSigned, skvx::max) |
629 | VECTOR_BINARY_FN(kMinF, fFloat, skvx::min) |
630 | VECTOR_BINARY_FN(kMinS, fSigned, skvx::min) |
631 | |
632 | case ByteCodeInstruction::kMix: { |
633 | int count = READ8(); |
634 | for (int i = count; i --> 0; ) { |
635 | // GLSL's arguments are mix(else, true, cond) |
636 | sp[-(2*count + i)] = skvx::if_then_else(sp[-( i)].fSigned, |
637 | sp[-( count + i)].fFloat, |
638 | sp[-(2*count + i)].fFloat); |
639 | } |
640 | sp -= 2 * count; |
641 | } continue; |
642 | |
643 | VECTOR_BINARY_OP(kMultiplyI, fSigned, *) |
644 | VECTOR_BINARY_OP(kMultiplyF, fFloat, *) |
645 | |
646 | VECTOR_UNARY_FN(kNegateF, std::negate<>{}, fFloat) |
647 | VECTOR_UNARY_FN(kNegateI, std::negate<>{}, fSigned) |
648 | |
649 | case ByteCodeInstruction::kPop: |
650 | sp -= READ8(); |
651 | continue; |
652 | |
653 | VECTOR_BINARY_FN(kPow, fFloat, skvx::pow) |
654 | |
655 | case ByteCodeInstruction::kPushImmediate: |
656 | PUSH(U32(READ32())); |
657 | continue; |
658 | |
659 | case ByteCodeInstruction::kReadExternal: { |
660 | int count = READ8(), |
661 | slot = READ8(); |
662 | SkASSERT(count <= 4); |
663 | float tmp[4]; |
664 | I32 m = mask(); |
665 | for (int i = 0; i < VecWidth; ++i) { |
666 | if (m[i]) { |
667 | byteCode->fExternalValues[slot]->read(baseIndex + i, tmp); |
668 | for (int j = 0; j < count; ++j) { |
669 | sp[j + 1].fFloat[i] = tmp[j]; |
670 | } |
671 | } |
672 | } |
673 | sp += count; |
674 | } continue; |
675 | |
676 | VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod) |
677 | VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %) |
678 | VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %) |
679 | |
680 | case ByteCodeInstruction::kReserve: |
681 | sp += READ8(); |
682 | continue; |
683 | |
684 | case ByteCodeInstruction::kReturn: { |
685 | int count = READ8(); |
686 | if (frames.empty()) { |
687 | if (outReturn) { |
688 | VValue* src = sp - count + 1; |
689 | if (stripedOutput) { |
690 | for (int i = 0; i < count; ++i) { |
691 | memcpy(outReturn[i], &src->fFloat, N * sizeof(float)); |
692 | ++src; |
693 | } |
694 | } else { |
695 | float* outPtr = outReturn[0]; |
696 | for (int i = 0; i < count; ++i) { |
697 | for (int j = 0; j < N; ++j) { |
698 | outPtr[count * j] = src->fFloat[j]; |
699 | } |
700 | ++outPtr; |
701 | ++src; |
702 | } |
703 | } |
704 | } |
705 | return true; |
706 | } else { |
707 | // When we were called, the caller reserved stack space for their copy of our |
708 | // return value, then 'stack' was positioned after that, where our parameters |
709 | // were placed. Copy our return values to their reserved area. |
710 | memcpy(stack - count, sp - count + 1, count * sizeof(VValue)); |
711 | |
712 | // Now move the stack pointer to the end of the passed-in parameters. This odd |
713 | // calling convention requires the caller to pop the arguments after calling, |
714 | // but allows them to store any out-parameters back during that unwinding. |
715 | // After that sequence finishes, the return value will be the top of the stack. |
716 | const StackFrame& frame(frames.back()); |
717 | sp = stack + frame.fParameterCount - 1; |
718 | stack = frame.fStack; |
719 | code = frame.fCode; |
720 | ip = frame.fIP; |
721 | frames.pop_back(); |
722 | } |
723 | } continue; |
724 | |
725 | case ByteCodeInstruction::kScalarToMatrix: { |
726 | int cols = READ8(); |
727 | int rows = READ8(); |
728 | VValue v = POP(); |
729 | for (int c = 0; c < cols; ++c) { |
730 | for (int r = 0; r < rows; ++r) { |
731 | PUSH(c == r ? v : F32(0.0f)); |
732 | } |
733 | } |
734 | } continue; |
735 | |
736 | case ByteCodeInstruction::kShiftLeft: |
737 | sp[0] = sp[0].fSigned << READ8(); |
738 | continue; |
739 | case ByteCodeInstruction::kShiftRightS: |
740 | sp[0] = sp[0].fSigned >> READ8(); |
741 | continue; |
742 | case ByteCodeInstruction::kShiftRightU: |
743 | sp[0] = sp[0].fUnsigned >> READ8(); |
744 | continue; |
745 | |
746 | VECTOR_UNARY_FN(kSin, skvx::sin, fFloat) |
747 | VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat) |
748 | |
749 | case ByteCodeInstruction::kStore: { |
750 | int count = READ8(), |
751 | slot = READ8(); |
752 | auto m = mask(); |
753 | for (int i = count; i --> 0; ) { |
754 | stack[slot+i] = skvx::if_then_else(m, POP().fFloat, stack[slot+i].fFloat); |
755 | } |
756 | } continue; |
757 | |
758 | case ByteCodeInstruction::kStoreGlobal: { |
759 | int count = READ8(), |
760 | slot = READ8(); |
761 | auto m = mask(); |
762 | for (int i = count; i --> 0; ) { |
763 | globals[slot+i] = skvx::if_then_else(m, POP().fFloat, globals[slot+i].fFloat); |
764 | } |
765 | } continue; |
766 | |
767 | case ByteCodeInstruction::kStoreExtended: { |
768 | int count = READ8(); |
769 | I32 target = POP().fSigned; |
770 | VValue* src = sp - count + 1; |
771 | I32 m = mask(); |
772 | for (int i = 0; i < count; ++i) { |
773 | for (int j = 0; j < VecWidth; ++j) { |
774 | if (m[j]) { |
775 | stack[target[j] + i].fSigned[j] = src[i].fSigned[j]; |
776 | } |
777 | } |
778 | } |
779 | sp -= count; |
780 | } continue; |
781 | |
782 | case ByteCodeInstruction::kStoreExtendedGlobal: { |
783 | int count = READ8(); |
784 | I32 target = POP().fSigned; |
785 | VValue* src = sp - count + 1; |
786 | I32 m = mask(); |
787 | for (int i = 0; i < count; ++i) { |
788 | for (int j = 0; j < VecWidth; ++j) { |
789 | if (m[j]) { |
790 | globals[target[j] + i].fSigned[j] = src[i].fSigned[j]; |
791 | } |
792 | } |
793 | } |
794 | sp -= count; |
795 | } continue; |
796 | |
797 | VECTOR_BINARY_OP(kSubtractI, fSigned, -) |
798 | VECTOR_BINARY_OP(kSubtractF, fFloat, -) |
799 | |
800 | case ByteCodeInstruction::kSwizzle: { |
801 | VValue tmp[4]; |
802 | for (int i = READ8() - 1; i >= 0; --i) { |
803 | tmp[i] = POP(); |
804 | } |
805 | for (int i = READ8() - 1; i >= 0; --i) { |
806 | PUSH(tmp[READ8()]); |
807 | } |
808 | } continue; |
809 | |
810 | VECTOR_UNARY_FN(kATan, skvx::atan, fFloat) |
811 | VECTOR_UNARY_FN(kTan, skvx::tan, fFloat) |
812 | |
813 | case ByteCodeInstruction::kWriteExternal: { |
814 | int count = READ8(), |
815 | slot = READ8(); |
816 | SkASSERT(count <= 4); |
817 | float tmp[4]; |
818 | I32 m = mask(); |
819 | sp -= count; |
820 | for (int i = 0; i < VecWidth; ++i) { |
821 | if (m[i]) { |
822 | for (int j = 0; j < count; ++j) { |
823 | tmp[j] = sp[j + 1].fFloat[i]; |
824 | } |
825 | byteCode->fExternalValues[slot]->write(baseIndex + i, tmp); |
826 | } |
827 | } |
828 | } continue; |
829 | |
830 | case ByteCodeInstruction::kMaskPush: |
831 | condPtr[1] = POP().fSigned; |
832 | maskPtr[1] = maskPtr[0] & condPtr[1]; |
833 | ++condPtr; ++maskPtr; |
834 | continue; |
835 | case ByteCodeInstruction::kMaskPop: |
836 | --condPtr; --maskPtr; |
837 | continue; |
838 | case ByteCodeInstruction::kMaskNegate: |
839 | maskPtr[0] = maskPtr[-1] & ~condPtr[0]; |
840 | continue; |
841 | case ByteCodeInstruction::kMaskBlend: { |
842 | int count = READ8(); |
843 | I32 m = condPtr[0]; |
844 | --condPtr; --maskPtr; |
845 | for (int i = 0; i < count; ++i) { |
846 | sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat); |
847 | --sp; |
848 | } |
849 | } continue; |
850 | case ByteCodeInstruction::kBranchIfAllFalse: { |
851 | int target = READ16(); |
852 | if (!skvx::any(mask())) { |
853 | ip = code + target; |
854 | } |
855 | } continue; |
856 | |
857 | case ByteCodeInstruction::kLoopBegin: |
858 | contPtr[1] = 0; |
859 | loopPtr[1] = loopPtr[0]; |
860 | ++contPtr; ++loopPtr; |
861 | continue; |
862 | case ByteCodeInstruction::kLoopNext: |
863 | *loopPtr |= *contPtr; |
864 | *contPtr = 0; |
865 | continue; |
866 | case ByteCodeInstruction::kLoopMask: |
867 | *loopPtr &= POP().fSigned; |
868 | continue; |
869 | case ByteCodeInstruction::kLoopEnd: |
870 | --contPtr; --loopPtr; |
871 | continue; |
872 | case ByteCodeInstruction::kLoopBreak: |
873 | *loopPtr &= ~mask(); |
874 | continue; |
875 | case ByteCodeInstruction::kLoopContinue: { |
876 | I32 m = mask(); |
877 | *contPtr |= m; |
878 | *loopPtr &= ~m; |
879 | } continue; |
880 | |
881 | case ByteCodeInstruction::kLoadFragCoord: |
882 | case ByteCodeInstruction::kSample: |
883 | case ByteCodeInstruction::kSampleExplicit: |
884 | case ByteCodeInstruction::kSampleMatrix: |
885 | default: |
886 | // TODO: Support these? |
887 | SkASSERT(false); |
888 | return false; |
889 | } |
890 | } |
891 | } |
892 | |
893 | }; // class Interpreter |
894 | |
895 | #endif // SK_ENABLE_SKSL_INTERPRETER |
896 | |
897 | #undef spf |
898 | |
899 | void ByteCodeFunction::disassemble() const { |
900 | #if defined(SK_ENABLE_SKSL_INTERPRETER) |
901 | const uint8_t* ip = fCode.data(); |
902 | while (ip < fCode.data() + fCode.size()) { |
903 | printf("%d: " , (int)(ip - fCode.data())); |
904 | ip = Interpreter::DisassembleInstruction(ip); |
905 | printf("\n" ); |
906 | } |
907 | #endif |
908 | } |
909 | |
910 | bool ByteCode::run(const ByteCodeFunction* f, |
911 | float* args, int argCount, |
912 | float* outReturn, int returnCount, |
913 | const float* uniforms, int uniformCount) const { |
914 | #if defined(SK_ENABLE_SKSL_INTERPRETER) |
915 | Interpreter::VValue stack[128]; |
916 | int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount; |
917 | if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) { |
918 | return false; |
919 | } |
920 | |
921 | if (argCount != f->fParameterCount || |
922 | returnCount != f->fReturnCount || |
923 | uniformCount != fUniformSlotCount) { |
924 | return false; |
925 | } |
926 | |
927 | Interpreter::VValue globals[32]; |
928 | if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) { |
929 | return false; |
930 | } |
931 | |
932 | // Transpose args into stack |
933 | { |
934 | float* src = args; |
935 | float* dst = (float*)stack; |
936 | for (int i = 0; i < argCount; ++i) { |
937 | *dst = *src++; |
938 | dst += VecWidth; |
939 | } |
940 | } |
941 | |
942 | bool stripedOutput = false; |
943 | float** outArray = outReturn ? &outReturn : nullptr; |
944 | if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) { |
945 | return false; |
946 | } |
947 | |
948 | // Transpose out parameters back |
949 | { |
950 | float* dst = args; |
951 | float* src = (float*)stack; |
952 | for (const auto& p : f->fParameters) { |
953 | if (p.fIsOutParameter) { |
954 | for (int i = p.fSlotCount; i > 0; --i) { |
955 | *dst++ = *src; |
956 | src += VecWidth; |
957 | } |
958 | } else { |
959 | dst += p.fSlotCount; |
960 | src += p.fSlotCount * VecWidth; |
961 | } |
962 | } |
963 | } |
964 | |
965 | return true; |
966 | #else |
967 | SkDEBUGFAIL("ByteCode interpreter not enabled" ); |
968 | return false; |
969 | #endif |
970 | } |
971 | |
972 | bool ByteCode::runStriped(const ByteCodeFunction* f, int N, |
973 | float* args[], int argCount, |
974 | float* outReturn[], int returnCount, |
975 | const float* uniforms, int uniformCount) const { |
976 | #if defined(SK_ENABLE_SKSL_INTERPRETER) |
977 | Interpreter::VValue stack[192]; |
978 | int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount; |
979 | if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) { |
980 | return false; |
981 | } |
982 | |
983 | if (argCount != f->fParameterCount || |
984 | returnCount != f->fReturnCount || |
985 | uniformCount != fUniformSlotCount) { |
986 | return false; |
987 | } |
988 | |
989 | Interpreter::VValue globals[32]; |
990 | if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) { |
991 | return false; |
992 | } |
993 | |
994 | // innerRun just takes outArgs, so clear it if the count is zero |
995 | if (returnCount == 0) { |
996 | outReturn = nullptr; |
997 | } |
998 | |
999 | // The instructions to store to locals and globals mask in the original value, |
1000 | // so they technically need to be initialized (to any value). |
1001 | for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) { |
1002 | stack[i].fFloat = 0.0f; |
1003 | } |
1004 | for (int i = 0; i < fGlobalSlotCount; i++) { |
1005 | globals[i].fFloat = 0.0f; |
1006 | } |
1007 | |
1008 | int baseIndex = 0; |
1009 | |
1010 | while (N) { |
1011 | int w = std::min(N, VecWidth); |
1012 | |
1013 | // Copy args into stack |
1014 | for (int i = 0; i < argCount; ++i) { |
1015 | memcpy((void*)(stack + i), args[i], w * sizeof(float)); |
1016 | } |
1017 | |
1018 | bool stripedOutput = true; |
1019 | if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w, |
1020 | baseIndex)) { |
1021 | return false; |
1022 | } |
1023 | |
1024 | // Copy out parameters back |
1025 | int slot = 0; |
1026 | for (const auto& p : f->fParameters) { |
1027 | if (p.fIsOutParameter) { |
1028 | for (int i = slot; i < slot + p.fSlotCount; ++i) { |
1029 | memcpy(args[i], stack + i, w * sizeof(float)); |
1030 | } |
1031 | } |
1032 | slot += p.fSlotCount; |
1033 | } |
1034 | |
1035 | // Step each argument pointer ahead |
1036 | for (int i = 0; i < argCount; ++i) { |
1037 | args[i] += w; |
1038 | } |
1039 | N -= w; |
1040 | baseIndex += w; |
1041 | } |
1042 | |
1043 | return true; |
1044 | #else |
1045 | SkDEBUGFAIL("ByteCode interpreter not enabled" ); |
1046 | return false; |
1047 | #endif |
1048 | } |
1049 | |
1050 | } // namespace SkSL |
1051 | |
1052 | #endif |
1053 | |