SkSLByteCode.cpp source code [Skia/src/sksl/SkSLByteCode.cpp]

1	/*
2	* Copyright 2018 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#ifndef SKSL_STANDALONE
9
10	#include "include/core/SkPoint3.h"
11	#include "include/private/SkVx.h"
12	#include "src/core/SkUtils.h" // sk_unaligned_load
13	#include "src/sksl/SkSLByteCode.h"
14	#include "src/sksl/SkSLByteCodeGenerator.h"
15	#include "src/sksl/SkSLExternalValue.h"
16
17	#include <vector>
18
19	namespace SkSL {
20
21	#if defined(SK_ENABLE_SKSL_INTERPRETER)
22
23	constexpr int VecWidth = ByteCode::kVecWidth;
24
25	struct Interpreter {
26
27	using F32 = skvx::Vec<VecWidth, float>;
28	using I32 = skvx::Vec<VecWidth, int32_t>;
29	using U32 = skvx::Vec<VecWidth, uint32_t>;
30
31	#define READ8() (*(ip++))
32	#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
33	#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
34	#define READ_INST() (ip += sizeof(instruction), \
35	sk_unaligned_load<instruction>(ip - sizeof(instruction)))
36
37	#define VECTOR_DISASSEMBLE(op, text) \
38	case ByteCodeInstruction::op: printf(text); ++ip; break; \
39	case ByteCodeInstruction::op##2: printf(text "2"); ++ip; break; \
40	case ByteCodeInstruction::op##3: printf(text "3"); ++ip; break; \
41	case ByteCodeInstruction::op##4: printf(text "4"); ++ip; break;
42
43	#define VECTOR_DISASSEMBLE_NO_COUNT(op, text) \
44	case ByteCodeInstruction::op: printf(text); break; \
45	case ByteCodeInstruction::op##2: printf(text "2"); break; \
46	case ByteCodeInstruction::op##3: printf(text "3"); break; \
47	case ByteCodeInstruction::op##4: printf(text "4"); break;
48
49	#define VECTOR_MATRIX_DISASSEMBLE(op, text) \
50	VECTOR_DISASSEMBLE(op, text) \
51	case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
52
53	#define VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(op, text) \
54	VECTOR_DISASSEMBLE_NO_COUNT(op, text) \
55	case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
56
57	static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
58	auto inst = (ByteCodeInstruction) (intptr_t) READ_INST();
59	printf("%04x ", (int)inst);
60	switch (inst) {
61	VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
62	VECTOR_DISASSEMBLE(kAddI, "addi")
63	case ByteCodeInstruction::kAndB: printf("andb"); break;
64	case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
65	case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
66	case ByteCodeInstruction::kCallExternal: {
67	int argumentCount = READ8();
68	int returnCount = READ8();
69	int externalValue = READ8();
70	printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
71	break;
72	}
73	case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
74	VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq")
75	VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq")
76	VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq")
77	VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq")
78	VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt")
79	VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq")
80	VECTOR_DISASSEMBLE(kCompareFLT, "compareflt")
81	VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq")
82	VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt")
83	VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq")
84	VECTOR_DISASSEMBLE(kCompareSLT, "compareslt")
85	VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq")
86	VECTOR_DISASSEMBLE(kCompareUGT, "compareugt")
87	VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq")
88	VECTOR_DISASSEMBLE(kCompareULT, "compareult")
89	VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq")
90	VECTOR_DISASSEMBLE_NO_COUNT(kConvertFtoI, "convertftoi")
91	VECTOR_DISASSEMBLE_NO_COUNT(kConvertStoF, "convertstof")
92	VECTOR_DISASSEMBLE_NO_COUNT(kConvertUtoF, "convertutof")
93	VECTOR_DISASSEMBLE(kCos, "cos")
94	VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef")
95	VECTOR_DISASSEMBLE(kDivideS, "divideS")
96	VECTOR_DISASSEMBLE(kDivideU, "divideu")
97	VECTOR_MATRIX_DISASSEMBLE(kDup, "dup")
98	case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
99	case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
100	case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
101	case ByteCodeInstruction::kLoad: printf("load %d", READ16() >> `8`); break;
102	case ByteCodeInstruction::kLoad2: printf("load2 %d", READ16() >> `8`); break;
103	case ByteCodeInstruction::kLoad3: printf("load3 %d", READ16() >> `8`); break;
104	case ByteCodeInstruction::kLoad4: printf("load4 %d", READ16() >> `8`); break;
105	case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ16() >> `8`); break;
106	case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ16() >> `8`); break;
107	case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ16() >> `8`); break;
108	case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ16() >> `8`); break;
109	case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ16() >> `8`); break;
110	case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ16() >> `8`); break;
111	case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ16() >> `8`); break;
112	case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ16() >> `8`); break;
113	case ByteCodeInstruction::kLoadSwizzle: {
114	int target = READ8();
115	int count = READ8();
116	printf("loadswizzle %d %d", target, count);
117	for (int i = `0`; i < count; ++i) {
118	printf(", %d", READ8());
119	}
120	break;
121	}
122	case ByteCodeInstruction::kLoadSwizzleGlobal: {
123	int target = READ8();
124	int count = READ8();
125	printf("loadswizzleglobal %d %d", target, count);
126	for (int i = `0`; i < count; ++i) {
127	printf(", %d", READ8());
128	}
129	break;
130	}
131	case ByteCodeInstruction::kLoadSwizzleUniform: {
132	int target = READ8();
133	int count = READ8();
134	printf("loadswizzleuniform %d %d", target, count);
135	for (int i = `0`; i < count; ++i) {
136	printf(", %d", READ8());
137	}
138	break;
139	}
140	case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break;
141	case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8());
142	break;
143	case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8());
144	break;
145	case ByteCodeInstruction::kMatrixToMatrix: {
146	int srcCols = READ8();
147	int srcRows = READ8();
148	int dstCols = READ8();
149	int dstRows = READ8();
150	printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
151	break;
152	}
153	case ByteCodeInstruction::kMatrixMultiply: {
154	int lCols = READ8();
155	int lRows = READ8();
156	int rCols = READ8();
157	printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
158	break;
159	}
160	VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf")
161	VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
162	VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kNegateF, "negatef")
163	VECTOR_DISASSEMBLE_NO_COUNT(kNegateI, "negatei")
164	case ByteCodeInstruction::kNotB: printf("notb"); break;
165	case ByteCodeInstruction::kOrB: printf("orb"); break;
166	VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kPop, "pop")
167	case ByteCodeInstruction::kPushImmediate: {
168	uint32_t v = READ32();
169	union { uint32_t u; float f; } pun = { v };
170	printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
171	break;
172	}
173	case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ16() >> `8`); break;
174	case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ16() >> `8`); break;
175	case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ16() >> `8`); break;
176	case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ16() >> `8`); break;
177	VECTOR_DISASSEMBLE(kRemainderF, "remainderf")
178	VECTOR_DISASSEMBLE(kRemainderS, "remainders")
179	VECTOR_DISASSEMBLE(kRemainderU, "remainderu")
180	case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break;
181	case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break;
182	case ByteCodeInstruction::kScalarToMatrix: {
183	int cols = READ8();
184	int rows = READ8();
185	printf("scalartomatrix %dx%d", cols, rows);
186	break;
187	}
188	case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
189	case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
190	case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
191	VECTOR_DISASSEMBLE(kSin, "sin")
192	VECTOR_DISASSEMBLE_NO_COUNT(kSqrt, "sqrt")
193	case ByteCodeInstruction::kStore: printf("store %d", READ8()); break;
194	case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break;
195	case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break;
196	case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break;
197	case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break;
198	case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break;
199	case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break;
200	case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break;
201	case ByteCodeInstruction::kStoreSwizzle: {
202	int target = READ8();
203	int count = READ8();
204	printf("storeswizzle %d %d", target, count);
205	for (int i = `0`; i < count; ++i) {
206	printf(", %d", READ8());
207	}
208	break;
209	}
210	case ByteCodeInstruction::kStoreSwizzleGlobal: {
211	int target = READ8();
212	int count = READ8();
213	printf("storeswizzleglobal %d %d", target, count);
214	for (int i = `0`; i < count; ++i) {
215	printf(", %d", READ8());
216	}
217	break;
218	}
219	case ByteCodeInstruction::kStoreSwizzleIndirect: {
220	int count = READ8();
221	printf("storeswizzleindirect %d", count);
222	for (int i = `0`; i < count; ++i) {
223	printf(", %d", READ8());
224	}
225	break;
226	}
227	case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
228	int count = READ8();
229	printf("storeswizzleindirectglobal %d", count);
230	for (int i = `0`; i < count; ++i) {
231	printf(", %d", READ8());
232	}
233	break;
234	}
235	case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break;
236	case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8());
237	break;
238	VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf")
239	VECTOR_DISASSEMBLE(kSubtractI, "subtracti")
240	case ByteCodeInstruction::kSwizzle: {
241	printf("swizzle %d, ", READ8());
242	int count = READ8();
243	printf("%d", count);
244	for (int i = `0`; i < count; ++i) {
245	printf(", %d", READ8());
246	}
247	break;
248	}
249	VECTOR_DISASSEMBLE(kTan, "tan")
250	case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ16() >> `8`); break;
251	case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ16() >> `8`); break;
252	case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ16() >> `8`); break;
253	case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ16() >> `8`); break;
254	case ByteCodeInstruction::kXorB: printf("xorb"); break;
255	case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
256	case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
257	case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
258	case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
259	case ByteCodeInstruction::kBranchIfAllFalse:
260	printf("branchifallfalse %d", READ16());
261	break;
262	case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
263	case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
264	case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
265	case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
266	case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
267	case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
268	default:
269	ip -= sizeof(instruction);
270	printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
271	SkASSERT(false);
272	}
273	return ip;
274	}
275
276	#ifdef SKSLC_THREADED_CODE
277	#define LABEL(name) name:
278	#ifdef TRACE
279	#define NEXT() goto next
280	#else
281	#define NEXT() goto *READ_INST()
282	#endif
283	#else
284	#define LABEL(name) case ByteCodeInstruction::name:
285	#define NEXT() continue
286	#endif
287
288	#define VECTOR_BINARY_OP(base, field, op) \
289	LABEL(base ## 4) \
290	sp[-4] = sp[-4].field op sp[0].field; \
291	POP(); \
292	/* fall through */ \
293	LABEL(base ## 3) { \
294	sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
295	POP(); \
296	} /* fall through */ \
297	LABEL(base ## 2) { \
298	sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
299	POP(); \
300	} /* fall through */ \
301	LABEL(base) { \
302	sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \
303	POP(); \
304	++ip; \
305	NEXT(); \
306	}
307
308	// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
309	// in inactive vector lanesm, so we need to be sure to avoid masked-off lanes.
310	#define VECTOR_BINARY_MASKED_OP(base, field, op) \
311	LABEL(base ## 4) \
312	for (int i = 0; i < VecWidth; ++i) { \
313	if (mask()[i]) { \
314	sp[-4].field[i] op ## = sp[0].field[i]; \
315	} \
316	} \
317	POP(); \
318	/* fall through */ \
319	LABEL(base ## 3) { \
320	for (int i = 0; i < VecWidth; ++i) { \
321	if (mask()[i]) { \
322	sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
323	} \
324	} \
325	POP(); \
326	} /* fall through */ \
327	LABEL(base ## 2) { \
328	for (int i = 0; i < VecWidth; ++i) { \
329	if (mask()[i]) { \
330	sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
331	} \
332	} \
333	POP(); \
334	} /* fall through */ \
335	LABEL(base) { \
336	for (int i = 0; i < VecWidth; ++i) { \
337	if (mask()[i]) { \
338	sp[-ip[0]].field[i] op ## = sp[0].field[i]; \
339	} \
340	} \
341	POP(); \
342	++ip; \
343	NEXT(); \
344	}
345
346
347	#define VECTOR_MATRIX_BINARY_OP(base, field, op) \
348	VECTOR_BINARY_OP(base, field, op) \
349	LABEL(base ## N) { \
350	int count = READ8(); \
351	for (int i = count; i > 0; --i) { \
352	sp[-count] = sp[-count].field op sp[0].field; \
353	POP(); \
354	} \
355	NEXT(); \
356	}
357
358	#define VECTOR_BINARY_FN(base, field, fn) \
359	LABEL(base ## 4) \
360	sp[-4] = fn(sp[-4].field, sp[0].field); \
361	POP(); \
362	/* fall through */ \
363	LABEL(base ## 3) { \
364	sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
365	POP(); \
366	} /* fall through */ \
367	LABEL(base ## 2) { \
368	sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
369	POP(); \
370	} /* fall through */ \
371	LABEL(base) { \
372	sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \
373	POP(); \
374	++ip; \
375	NEXT(); \
376	}
377
378	#define VECTOR_UNARY_FN(base, fn, field) \
379	LABEL(base ## 4) sp[-3] = fn(sp[-3].field); \
380	LABEL(base ## 3) sp[-2] = fn(sp[-2].field); \
381	LABEL(base ## 2) sp[-1] = fn(sp[-1].field); \
382	LABEL(base) sp[ 0] = fn(sp[ 0].field); \
383	NEXT();
384
385	#define VECTOR_UNARY_FN_VEC(base, fn) \
386	LABEL(base ## 4) \
387	LABEL(base ## 3) \
388	LABEL(base ## 2) \
389	LABEL(base) { \
390	int count = READ8(); \
391	float* v = (float*)sp - count + 1; \
392	for (int i = VecWidth * count; i > 0; --i, ++v) { \
393	v = fn(v); \
394	} \
395	NEXT(); \
396	}
397
398	#define VECTOR_LABELS(base) \
399	&&base ## 4, \
400	&&base ## 3, \
401	&&base ## 2, \
402	&&base
403
404	#define VECTOR_MATRIX_LABELS(base) \
405	VECTOR_LABELS(base), \
406	&&base ## N
407
408	// If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction
409	// does not match the order of the opcodes listed in the 'labels' array in innerRun().
410	#define CHECK_LABEL(name) \
411	SkASSERT(labels[(int) ByteCodeInstruction::name] == &&name)
412
413	#define CHECK_VECTOR_LABELS(name) \
414	CHECK_LABEL(name ## 4); \
415	CHECK_LABEL(name ## 3); \
416	CHECK_LABEL(name ## 2); \
417	CHECK_LABEL(name)
418
419	#define CHECK_VECTOR_MATRIX_LABELS(name) \
420	CHECK_VECTOR_LABELS(name); \
421	CHECK_LABEL(name ## N)
422
423	union VValue {
424	VValue() {}
425	VValue(F32 f) : fFloat (f) {}
426	VValue(I32 s) : fSigned (s) {}
427	VValue(U32 u) : fUnsigned (u) {}
428
429	F32 fFloat;
430	I32 fSigned;
431	U32 fUnsigned;
432	};
433
434	struct StackFrame {
435	const uint8_t* fCode;
436	const uint8_t* fIP;
437	VValue* fStack;
438	int fParameterCount;
439	};
440
441	static F32 VecMod(F32 a, F32 b) {
442	return a - skvx::trunc(a / b) * b;
443	}
444
445	#define spf(index) sp[index].fFloat
446
447	static void CallExternal(const ByteCode* byteCode, const uint8_t& ip, VValue& sp,
448	int baseIndex, I32 mask) {
449	int argumentCount = READ8();
450	int returnCount = READ8();
451	int target = READ8();
452	ExternalValue* v = byteCode->fExternalValues [target];
453	sp -= argumentCount - `1`;
454
455	float tmpArgs[`4`];
456	float tmpReturn[`4`];
457	SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
458	SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
459
460	for (int i = `0`; i < VecWidth; ++i) {
461	if (mask [i]) {
462	for (int j = `0`; j < argumentCount; ++j) {
463	tmpArgs[j] = sp[j].fFloat [i];
464	}
465	v->call(baseIndex + i, tmpArgs, tmpReturn);
466	for (int j = `0`; j < returnCount; ++j) {
467	sp[j].fFloat [i] = tmpReturn[j];
468	}
469	}
470	}
471	sp += returnCount - `1`;
472	}
473
474	static void Inverse2x2(VValue* sp) {
475	F32 a = sp[-`3`].fFloat,
476	b = sp[-`2`].fFloat,
477	c = sp[-`1`].fFloat,
478	d = sp[ `0`].fFloat;
479	F32 idet = F32 (`1`) / (a d - b c);
480	sp[-`3`].fFloat = d * idet;
481	sp[-`2`].fFloat = -b * idet;
482	sp[-`1`].fFloat = -c * idet;
483	sp[ `0`].fFloat = a * idet;
484	}
485
486	static void Inverse3x3(VValue* sp) {
487	F32 a11 = sp[-`8`].fFloat, a12 = sp[-`5`].fFloat, a13 = sp[-`2`].fFloat,
488	a21 = sp[-`7`].fFloat, a22 = sp[-`4`].fFloat, a23 = sp[-`1`].fFloat,
489	a31 = sp[-`6`].fFloat, a32 = sp[-`3`].fFloat, a33 = sp[ `0`].fFloat;
490	F32 idet = F32 (`1`) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
491	a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
492	sp[-`8`].fFloat = (a22 * a33 - a23 * a32) * idet;
493	sp[-`7`].fFloat = (a23 * a31 - a21 * a33) * idet;
494	sp[-`6`].fFloat = (a21 * a32 - a22 * a31) * idet;
495	sp[-`5`].fFloat = (a13 * a32 - a12 * a33) * idet;
496	sp[-`4`].fFloat = (a11 * a33 - a13 * a31) * idet;
497	sp[-`3`].fFloat = (a12 * a31 - a11 * a32) * idet;
498	sp[-`2`].fFloat = (a12 * a23 - a13 * a22) * idet;
499	sp[-`1`].fFloat = (a13 * a21 - a11 * a23) * idet;
500	sp[ `0`].fFloat = (a11 * a22 - a12 * a21) * idet;
501	}
502
503	static void Inverse4x4(VValue* sp) {
504	F32 a00 = spf(-`15`), a10 = spf(-`11`), a20 = spf( -`7`), a30 = spf( -`3`),
505	a01 = spf(-`14`), a11 = spf(-`10`), a21 = spf( -`6`), a31 = spf( -`2`),
506	a02 = spf(-`13`), a12 = spf( -`9`), a22 = spf( -`5`), a32 = spf( -`1`),
507	a03 = spf(-`12`), a13 = spf( -`8`), a23 = spf( -`4`), a33 = spf( `0`);
508
509	F32 b00 = a00 * a11 - a01 * a10,
510	b01 = a00 * a12 - a02 * a10,
511	b02 = a00 * a13 - a03 * a10,
512	b03 = a01 * a12 - a02 * a11,
513	b04 = a01 * a13 - a03 * a11,
514	b05 = a02 * a13 - a03 * a12,
515	b06 = a20 * a31 - a21 * a30,
516	b07 = a20 * a32 - a22 * a30,
517	b08 = a20 * a33 - a23 * a30,
518	b09 = a21 * a32 - a22 * a31,
519	b10 = a21 * a33 - a23 * a31,
520	b11 = a22 * a33 - a23 * a32;
521
522	F32 idet = F32 (`1`) /
523	(b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
524
525	b00 *= idet;
526	b01 *= idet;
527	b02 *= idet;
528	b03 *= idet;
529	b04 *= idet;
530	b05 *= idet;
531	b06 *= idet;
532	b07 *= idet;
533	b08 *= idet;
534	b09 *= idet;
535	b10 *= idet;
536	b11 *= idet;
537
538	spf(-`15`) = a11 * b11 - a12 * b10 + a13 * b09;
539	spf(-`14`) = a02 * b10 - a01 * b11 - a03 * b09;
540	spf(-`13`) = a31 * b05 - a32 * b04 + a33 * b03;
541	spf(-`12`) = a22 * b04 - a21 * b05 - a23 * b03;
542	spf(-`11`) = a12 * b08 - a10 * b11 - a13 * b07;
543	spf(-`10`) = a00 * b11 - a02 * b08 + a03 * b07;
544	spf( -`9`) = a32 * b02 - a30 * b05 - a33 * b01;
545	spf( -`8`) = a20 * b05 - a22 * b02 + a23 * b01;
546	spf( -`7`) = a10 * b10 - a11 * b08 + a13 * b06;
547	spf( -`6`) = a01 * b08 - a00 * b10 - a03 * b06;
548	spf( -`5`) = a30 * b04 - a31 * b02 + a33 * b00;
549	spf( -`4`) = a21 * b02 - a20 * b04 - a23 * b00;
550	spf( -`3`) = a11 * b07 - a10 * b09 - a12 * b06;
551	spf( -`2`) = a00 * b09 - a01 * b07 + a02 * b06;
552	spf( -`1`) = a31 * b01 - a30 * b03 - a32 * b00;
553	spf( `0`) = a20 * b03 - a21 * b01 + a22 * b00;
554	}
555
556	static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
557	float* outReturn[], VValue globals[], const float uniforms[],
558	bool stripedOutput, int N, int baseIndex) {
559	#ifdef SKSLC_THREADED_CODE
560	static const void* labels[] = {
561	// If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as values"
562	// extension. If you add anything to this array, be sure to add the corresponding
563	// CHECK_LABEL() or CHECK__LABELS() assert below.*
564	VECTOR_MATRIX_LABELS(kAddF),
565	VECTOR_LABELS(kAddI),
566	&&kAndB,
567	&&kBranch,
568	&&kCall,
569	&&kCallExternal,
570	&&kClampIndex,
571	VECTOR_LABELS(kCompareIEQ),
572	VECTOR_LABELS(kCompareINEQ),
573	VECTOR_MATRIX_LABELS(kCompareFEQ),
574	VECTOR_MATRIX_LABELS(kCompareFNEQ),
575	VECTOR_LABELS(kCompareFGT),
576	VECTOR_LABELS(kCompareFGTEQ),
577	VECTOR_LABELS(kCompareFLT),
578	VECTOR_LABELS(kCompareFLTEQ),
579	VECTOR_LABELS(kCompareSGT),
580	VECTOR_LABELS(kCompareSGTEQ),
581	VECTOR_LABELS(kCompareSLT),
582	VECTOR_LABELS(kCompareSLTEQ),
583	VECTOR_LABELS(kCompareUGT),
584	VECTOR_LABELS(kCompareUGTEQ),
585	VECTOR_LABELS(kCompareULT),
586	VECTOR_LABELS(kCompareULTEQ),
587	VECTOR_LABELS(kConvertFtoI),
588	VECTOR_LABELS(kConvertStoF),
589	VECTOR_LABELS(kConvertUtoF),
590	VECTOR_LABELS(kCos),
591	VECTOR_MATRIX_LABELS(kDivideF),
592	VECTOR_LABELS(kDivideS),
593	VECTOR_LABELS(kDivideU),
594	VECTOR_MATRIX_LABELS(kDup),
595	&&kInverse2x2,
596	&&kInverse3x3,
597	&&kInverse4x4,
598	VECTOR_LABELS(kLoad),
599	VECTOR_LABELS(kLoadGlobal),
600	VECTOR_LABELS(kLoadUniform),
601	&&kLoadSwizzle,
602	&&kLoadSwizzleGlobal,
603	&&kLoadSwizzleUniform,
604	&&kLoadExtended,
605	&&kLoadExtendedGlobal,
606	&&kLoadExtendedUniform,
607	&&kMatrixToMatrix,
608	&&kMatrixMultiply,
609	VECTOR_MATRIX_LABELS(kNegateF),
610	VECTOR_LABELS(kNegateI),
611	VECTOR_MATRIX_LABELS(kMultiplyF),
612	VECTOR_LABELS(kMultiplyI),
613	&&kNotB,
614	&&kOrB,
615	VECTOR_MATRIX_LABELS(kPop),
616	&&kPushImmediate,
617	VECTOR_LABELS(kReadExternal),
618	VECTOR_LABELS(kRemainderF),
619	VECTOR_LABELS(kRemainderS),
620	VECTOR_LABELS(kRemainderU),
621	&&kReserve,
622	&&kReturn,
623	&&kScalarToMatrix,
624	&&kShiftLeft,
625	&&kShiftRightS,
626	&&kShiftRightU,
627	VECTOR_LABELS(kSin),
628	VECTOR_LABELS(kSqrt),
629	VECTOR_LABELS(kStore),
630	VECTOR_LABELS(kStoreGlobal),
631	&&kStoreExtended,
632	&&kStoreExtendedGlobal,
633	&&kStoreSwizzle,
634	&&kStoreSwizzleGlobal,
635	&&kStoreSwizzleIndirect,
636	&&kStoreSwizzleIndirectGlobal,
637	&&kSwizzle,
638	VECTOR_MATRIX_LABELS(kSubtractF),
639	VECTOR_LABELS(kSubtractI),
640	VECTOR_LABELS(kTan),
641	VECTOR_LABELS(kWriteExternal),
642	&&kXorB,
643
644	&&kMaskPush,
645	&&kMaskPop,
646	&&kMaskNegate,
647	&&kMaskBlend,
648	&&kBranchIfAllFalse,
649
650	&&kLoopBegin,
651	&&kLoopNext,
652	&&kLoopMask,
653	&&kLoopEnd,
654	&&kLoopBreak,
655	&&kLoopContinue,
656	};
657	// Verify that the order of the labels array matches the order of the ByteCodeInstruction enum.
658	CHECK_VECTOR_MATRIX_LABELS(kAddF);
659	CHECK_VECTOR_LABELS(kAddI);
660	CHECK_LABEL(kAndB);
661	CHECK_LABEL(kBranch);
662	CHECK_LABEL(kCall);
663	CHECK_LABEL(kCallExternal);
664	CHECK_LABEL(kClampIndex);
665	CHECK_VECTOR_LABELS(kCompareIEQ);
666	CHECK_VECTOR_LABELS(kCompareINEQ);
667	CHECK_VECTOR_MATRIX_LABELS(kCompareFEQ);
668	CHECK_VECTOR_MATRIX_LABELS(kCompareFNEQ);
669	CHECK_VECTOR_LABELS(kCompareFGT);
670	CHECK_VECTOR_LABELS(kCompareFGTEQ);
671	CHECK_VECTOR_LABELS(kCompareFLT);
672	CHECK_VECTOR_LABELS(kCompareFLTEQ);
673	CHECK_VECTOR_LABELS(kCompareSGT);
674	CHECK_VECTOR_LABELS(kCompareSGTEQ);
675	CHECK_VECTOR_LABELS(kCompareSLT);
676	CHECK_VECTOR_LABELS(kCompareSLTEQ);
677	CHECK_VECTOR_LABELS(kCompareUGT);
678	CHECK_VECTOR_LABELS(kCompareUGTEQ);
679	CHECK_VECTOR_LABELS(kCompareULT);
680	CHECK_VECTOR_LABELS(kCompareULTEQ);
681	CHECK_VECTOR_LABELS(kConvertFtoI);
682	CHECK_VECTOR_LABELS(kConvertStoF);
683	CHECK_VECTOR_LABELS(kConvertUtoF);
684	CHECK_VECTOR_LABELS(kCos);
685	CHECK_VECTOR_MATRIX_LABELS(kDivideF);
686	CHECK_VECTOR_LABELS(kDivideS);
687	CHECK_VECTOR_LABELS(kDivideU);
688	CHECK_VECTOR_MATRIX_LABELS(kDup);
689	CHECK_LABEL(kInverse2x2);
690	CHECK_LABEL(kInverse3x3);
691	CHECK_LABEL(kInverse4x4);
692	CHECK_VECTOR_LABELS(kLoad);
693	CHECK_VECTOR_LABELS(kLoadGlobal);
694	CHECK_VECTOR_LABELS(kLoadUniform);
695	CHECK_LABEL(kLoadSwizzle);
696	CHECK_LABEL(kLoadSwizzleGlobal);
697	CHECK_LABEL(kLoadSwizzleUniform);
698	CHECK_LABEL(kLoadExtended);
699	CHECK_LABEL(kLoadExtendedGlobal);
700	CHECK_LABEL(kLoadExtendedUniform);
701	CHECK_LABEL(kMatrixToMatrix);
702	CHECK_LABEL(kMatrixMultiply);
703	CHECK_VECTOR_MATRIX_LABELS(kNegateF);
704	CHECK_VECTOR_LABELS(kNegateI);
705	CHECK_VECTOR_MATRIX_LABELS(kMultiplyF);
706	CHECK_VECTOR_LABELS(kMultiplyI);
707	CHECK_LABEL(kNotB);
708	CHECK_LABEL(kOrB);
709	CHECK_VECTOR_MATRIX_LABELS(kPop);
710	CHECK_LABEL(kPushImmediate);
711	CHECK_VECTOR_LABELS(kReadExternal);
712	CHECK_VECTOR_LABELS(kRemainderF);
713	CHECK_VECTOR_LABELS(kRemainderS);
714	CHECK_VECTOR_LABELS(kRemainderU);
715	CHECK_LABEL(kReserve);
716	CHECK_LABEL(kReturn);
717	CHECK_LABEL(kScalarToMatrix);
718	CHECK_LABEL(kShiftLeft);
719	CHECK_LABEL(kShiftRightS);
720	CHECK_LABEL(kShiftRightU);
721	CHECK_VECTOR_LABELS(kSin);
722	CHECK_VECTOR_LABELS(kSqrt);
723	CHECK_VECTOR_LABELS(kStore);
724	CHECK_VECTOR_LABELS(kStoreGlobal);
725	CHECK_LABEL(kStoreExtended);
726	CHECK_LABEL(kStoreExtendedGlobal);
727	CHECK_LABEL(kStoreSwizzle);
728	CHECK_LABEL(kStoreSwizzleGlobal);
729	CHECK_LABEL(kStoreSwizzleIndirect);
730	CHECK_LABEL(kStoreSwizzleIndirectGlobal);
731	CHECK_LABEL(kSwizzle);
732	CHECK_VECTOR_MATRIX_LABELS(kSubtractF);
733	CHECK_VECTOR_LABELS(kSubtractI);
734	CHECK_VECTOR_LABELS(kTan);
735	CHECK_VECTOR_LABELS(kWriteExternal);
736	CHECK_LABEL(kXorB);
737	CHECK_LABEL(kMaskPush);
738	CHECK_LABEL(kMaskPop);
739	CHECK_LABEL(kMaskNegate);
740	CHECK_LABEL(kMaskBlend);
741	CHECK_LABEL(kBranchIfAllFalse);
742	CHECK_LABEL(kLoopBegin);
743	CHECK_LABEL(kLoopNext);
744	CHECK_LABEL(kLoopMask);
745	CHECK_LABEL(kLoopEnd);
746	CHECK_LABEL(kLoopBreak);
747	CHECK_LABEL(kLoopContinue);
748	f->fPreprocessOnce([f] { ((ByteCodeFunction*)f)->preprocess(labels); });
749	#endif
750
751	// Needs to be the first N non-negative integers, at least as large as VecWidth
752	static const Interpreter::I32 gLanes = {
753	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`
754	};
755
756	VValue* sp = stack + f->fParameterCount + f->fLocalCount - `1`;
757
758	#define POP() (*(sp--))
759	#define PUSH(v) (sp[1] = v, ++sp)
760
761	const uint8_t* code = f->fCode.data();
762	const uint8_t* ip = code;
763	std::vector<StackFrame> frames;
764
765	I32 condStack[`16`]; // Independent condition masks
766	I32 maskStack[`16`]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
767	I32 contStack[`16`]; // Continue flags for loops
768	I32 loopStack[`16`]; // Loop execution masks
769	condStack[`0`] = maskStack[`0`] = (gLanes < N);
770	contStack[`0`] = I32 ( `0`);
771	loopStack[`0`] = I32 (~`0`);
772	I32* condPtr = condStack;
773	I32* maskPtr = maskStack;
774	I32* contPtr = contStack;
775	I32* loopPtr = loopStack;
776
777	if (f->fConditionCount + `1` > (int)SK_ARRAY_COUNT(condStack) \|\|
778	f->fLoopCount + `1` > (int)SK_ARRAY_COUNT(loopStack)) {
779	return false;
780	}
781
782	auto mask = [&]() { return maskPtr & loopPtr; };
783
784	#ifdef SKSLC_THREADED_CODE
785	// If the "labels as values" extension is available, we implement this using threaded code.
786	// Instead of opcodes, the code directly contains the addresses of the labels to jump to. Then
787	// the code for each opcode simply grabs the address of the next opcode and uses a goto to jump
788	// there.
789	NEXT();
790	#else
791	// Otherwise, we have to use a switch statement and a loop to execute the right label.
792	for (;;) {
793	#ifdef TRACE
794	printf("at %3d ", (int) (ip - code));
795	disassemble_instruction(ip);
796	printf(" (stack: %d)\n", (int) (sp - stack) + `1`);
797	#endif
798	switch ((ByteCodeInstruction) READ16()) {
799	#endif
800
801	VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +)
802	VECTOR_BINARY_OP(kAddI, fSigned, +)
803
804	// Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
805	LABEL(kAndB)
806	sp[-`1`] = sp[-`1`].fSigned & sp[`0`].fSigned;
807	POP();
808	NEXT();
809	LABEL(kNotB)
810	sp[`0`] = ~sp[`0`].fSigned;
811	NEXT();
812	LABEL(kOrB)
813	sp[-`1`] = sp[-`1`].fSigned \| sp[`0`].fSigned;
814	POP();
815	NEXT();
816	LABEL(kXorB)
817	sp[-`1`] = sp[-`1`].fSigned ^ sp[`0`].fSigned;
818	POP();
819	NEXT();
820
821	LABEL(kBranch)
822	ip = code + READ16();
823	NEXT();
824
825	LABEL(kCall) {
826	// Precursor code reserved space for the return value, and pushed all parameters to
827	// the stack. Update our bottom of stack to point at the first parameter, and our
828	// sp to point past those parameters (plus space for locals).
829	int target = READ8();
830	const ByteCodeFunction* fun = byteCode->fFunctions [target].get();
831	#ifdef SKSLC_THREADED_CODE
832	fun->fPreprocessOnce([fun] { ((ByteCodeFunction*)fun)->preprocess(labels); });
833	#endif
834	if (skvx::any(mask())) {
835	frames.push_back({ code, ip, stack, fun->fParameterCount });
836	ip = code = fun->fCode.data();
837	stack = sp - fun->fParameterCount + `1`;
838	sp = stack + fun->fParameterCount + fun->fLocalCount - `1`;
839	}
840	NEXT();
841	}
842
843	LABEL(kCallExternal) {
844	CallExternal(byteCode, ip, sp, baseIndex, mask());
845	NEXT();
846	}
847
848	LABEL(kClampIndex) {
849	int length = READ8();
850	if (skvx::any(mask() & ((sp[`0`].fSigned < `0`) \| (sp[`0`].fSigned >= length)))) {
851	return false;
852	}
853	NEXT();
854	}
855
856	VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
857	VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==)
858	VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
859	VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=)
860	VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
861	VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
862	VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
863	VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
864	VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
865	VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
866	VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
867	VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
868	VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
869	VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
870	VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
871	VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
872
873	LABEL(kConvertFtoI4) sp[-`3`] = skvx::cast<int>(sp[-`3`].fFloat);
874	LABEL(kConvertFtoI3) sp[-`2`] = skvx::cast<int>(sp[-`2`].fFloat);
875	LABEL(kConvertFtoI2) sp[-`1`] = skvx::cast<int>(sp[-`1`].fFloat);
876	LABEL(kConvertFtoI) sp[ `0`] = skvx::cast<int>(sp[ `0`].fFloat);
877	NEXT();
878
879	LABEL(kConvertStoF4) sp[-`3`] = skvx::cast<float>(sp[-`3`].fSigned);
880	LABEL(kConvertStoF3) sp[-`2`] = skvx::cast<float>(sp[-`2`].fSigned);
881	LABEL(kConvertStoF2) sp[-`1`] = skvx::cast<float>(sp[-`1`].fSigned);
882	LABEL(kConvertStoF) sp[ `0`] = skvx::cast<float>(sp[ `0`].fSigned);
883	NEXT();
884
885	LABEL(kConvertUtoF4) sp[-`3`] = skvx::cast<float>(sp[-`3`].fUnsigned);
886	LABEL(kConvertUtoF3) sp[-`2`] = skvx::cast<float>(sp[-`2`].fUnsigned);
887	LABEL(kConvertUtoF2) sp[-`1`] = skvx::cast<float>(sp[-`1`].fUnsigned);
888	LABEL(kConvertUtoF) sp[ `0`] = skvx::cast<float>(sp[ `0`].fUnsigned);
889	NEXT();
890
891	VECTOR_UNARY_FN_VEC(kCos, cosf)
892
893	VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
894	VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
895	VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /)
896
897	LABEL(kDup4) PUSH(sp[`1` - ip[`0`]]);
898	LABEL(kDup3) PUSH(sp[`1` - ip[`0`]]);
899	LABEL(kDup2) PUSH(sp[`1` - ip[`0`]]);
900	LABEL(kDup) PUSH(sp[`1` - ip[`0`]]);
901	++ip;
902	NEXT();
903
904	LABEL(kDupN) {
905	int count = READ8();
906	memcpy(sp + `1`, sp - count + `1`, count * sizeof(VValue));
907	sp += count;
908	NEXT();
909	}
910
911	LABEL(kInverse2x2) {
912	Inverse2x2(sp);
913	NEXT();
914	}
915	LABEL(kInverse3x3) {
916	Inverse3x3(sp);
917	NEXT();
918	}
919	LABEL(kInverse4x4) {
920	Inverse4x4(sp);
921	NEXT();
922	}
923
924	LABEL(kLoad4) sp[`4`] = stack[ip[`1`] + `3`];
925	LABEL(kLoad3) sp[`3`] = stack[ip[`1`] + `2`];
926	LABEL(kLoad2) sp[`2`] = stack[ip[`1`] + `1`];
927	LABEL(kLoad) sp[`1`] = stack[ip[`1`] + `0`];
928	sp += ip[`0`];
929	ip += `2`;
930	NEXT();
931
932	LABEL(kLoadGlobal4) sp[`4`] = globals[ip[`1`] + `3`];
933	LABEL(kLoadGlobal3) sp[`3`] = globals[ip[`1`] + `2`];
934	LABEL(kLoadGlobal2) sp[`2`] = globals[ip[`1`] + `1`];
935	LABEL(kLoadGlobal) sp[`1`] = globals[ip[`1`] + `0`];
936	sp += ip[`0`];
937	ip += `2`;
938	NEXT();
939
940	LABEL(kLoadUniform4) sp[`4`].fFloat = uniforms[ip[`1`] + `3`];
941	LABEL(kLoadUniform3) sp[`3`].fFloat = uniforms[ip[`1`] + `2`];
942	LABEL(kLoadUniform2) sp[`2`].fFloat = uniforms[ip[`1`] + `1`];
943	LABEL(kLoadUniform) sp[`1`].fFloat = uniforms[ip[`1`] + `0`];
944	sp += ip[`0`];
945	ip += `2`;
946	NEXT();
947
948	LABEL(kLoadExtended) {
949	int count = READ8();
950	I32 src = POP().fSigned;
951	I32 m = mask();
952	for (int i = `0`; i < count; ++i) {
953	for (int j = `0`; j < VecWidth; ++j) {
954	if (m [j]) {
955	sp[i + `1`].fSigned [j] = stack[src [j] + i].fSigned [j];
956	}
957	}
958	}
959	sp += count;
960	NEXT();
961	}
962
963	LABEL(kLoadExtendedGlobal) {
964	int count = READ8();
965	I32 src = POP().fSigned;
966	I32 m = mask();
967	for (int i = `0`; i < count; ++i) {
968	for (int j = `0`; j < VecWidth; ++j) {
969	if (m [j]) {
970	sp[i + `1`].fSigned [j] = globals[src [j] + i].fSigned [j];
971	}
972	}
973	}
974	sp += count;
975	NEXT();
976	}
977
978	LABEL(kLoadExtendedUniform) {
979	int count = READ8();
980	I32 src = POP().fSigned;
981	I32 m = mask();
982	for (int i = `0`; i < count; ++i) {
983	for (int j = `0`; j < VecWidth; ++j) {
984	if (m [j]) {
985	sp[i + `1`].fFloat [j] = uniforms[src [j] + i];
986	}
987	}
988	}
989	sp += count;
990	NEXT();
991	}
992
993	LABEL(kLoadSwizzle) {
994	int src = READ8();
995	int count = READ8();
996	for (int i = `0`; i < count; ++i) {
997	PUSH(stack[src + *(ip + i)]);
998	}
999	ip += count;
1000	NEXT();
1001	}
1002
1003	LABEL(kLoadSwizzleGlobal) {
1004	int src = READ8();
1005	int count = READ8();
1006	for (int i = `0`; i < count; ++i) {
1007	PUSH(globals[src + *(ip + i)]);
1008	}
1009	ip += count;
1010	NEXT();
1011	}
1012
1013	LABEL(kLoadSwizzleUniform) {
1014	int src = READ8();
1015	int count = READ8();
1016	for (int i = `0`; i < count; ++i) {
1017	PUSH(F32 (uniforms[src + *(ip + i)]));
1018	}
1019	ip += count;
1020	NEXT();
1021	}
1022
1023	LABEL(kMatrixToMatrix) {
1024	int srcCols = READ8();
1025	int srcRows = READ8();
1026	int dstCols = READ8();
1027	int dstRows = READ8();
1028	SkASSERT(srcCols >= `2` && srcCols <= `4`);
1029	SkASSERT(srcRows >= `2` && srcRows <= `4`);
1030	SkASSERT(dstCols >= `2` && dstCols <= `4`);
1031	SkASSERT(dstRows >= `2` && dstRows <= `4`);
1032	F32 tmp[`16`];
1033	memset(tmp, `0`, sizeof(tmp));
1034	tmp[`0`] = tmp[`5`] = tmp[`10`] = tmp[`15`] = F32 (`1.0f`);
1035	for (int c = srcCols - `1`; c >= `0`; --c) {
1036	for (int r = srcRows - `1`; r >= `0`; --r) {
1037	tmp[c*`4` + r] = POP().fFloat;
1038	}
1039	}
1040	for (int c = `0`; c < dstCols; ++c) {
1041	for (int r = `0`; r < dstRows; ++r) {
1042	PUSH(tmp[c*`4` + r]);
1043	}
1044	}
1045	NEXT();
1046	}
1047
1048	LABEL(kMatrixMultiply) {
1049	int lCols = READ8();
1050	int lRows = READ8();
1051	int rCols = READ8();
1052	int rRows = lCols;
1053	F32 tmp[`16`] = { `0.0f` };
1054	F32* B = &(sp - (rCols * rRows) + `1`)->fFloat;
1055	F32* A = B - (lCols * lRows);
1056	for (int c = `0`; c < rCols; ++c) {
1057	for (int r = `0`; r < lRows; ++r) {
1058	for (int j = `0`; j < lCols; ++j) {
1059	tmp[clRows + r] += A[jlRows + r] * B[c*rRows + j];
1060	}
1061	}
1062	}
1063	sp -= (lCols * lRows) + (rCols * rRows);
1064	memcpy(sp + `1`, tmp, rCols * lRows * sizeof(VValue));
1065	sp += (rCols * lRows);
1066	NEXT();
1067	}
1068
1069	VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
1070	VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *)
1071
1072	LABEL(kNegateF4) sp[-`3`] = -sp[-`3`].fFloat;
1073	LABEL(kNegateF3) sp[-`2`] = -sp[-`2`].fFloat;
1074	LABEL(kNegateF2) sp[-`1`] = -sp[-`1`].fFloat;
1075	LABEL(kNegateF) sp[ `0`] = -sp[ `0`].fFloat;
1076	NEXT();
1077
1078	LABEL(kNegateFN) {
1079	int count = READ8();
1080	for (int i = count - `1`; i >= `0`; --i) {
1081	sp[-i] = -sp[-i].fFloat;
1082	}
1083	NEXT();
1084	}
1085
1086	LABEL(kNegateI4) sp[-`3`] = -sp[-`3`].fSigned;
1087	LABEL(kNegateI3) sp[-`2`] = -sp[-`2`].fSigned;
1088	LABEL(kNegateI2) sp[-`1`] = -sp[-`1`].fSigned;
1089	LABEL(kNegateI) sp[ `0`] = -sp[ `0`].fSigned;
1090	NEXT();
1091
1092	LABEL(kPop4) POP();
1093	LABEL(kPop3) POP();
1094	LABEL(kPop2) POP();
1095	LABEL(kPop) POP();
1096	NEXT();
1097
1098	LABEL(kPopN)
1099	sp -= READ8();
1100	NEXT();
1101
1102	LABEL(kPushImmediate)
1103	PUSH(U32 (READ32()));
1104	NEXT();
1105
1106	LABEL(kReadExternal)
1107	LABEL(kReadExternal2)
1108	LABEL(kReadExternal3)
1109	LABEL(kReadExternal4) {
1110	int count = READ8();
1111	int src = READ8();
1112	float tmp[`4`];
1113	I32 m = mask();
1114	for (int i = `0`; i < VecWidth; ++i) {
1115	if (m [i]) {
1116	byteCode->fExternalValues [src]->read(baseIndex + i, tmp);
1117	for (int j = `0`; j < count; ++j) {
1118	sp[j + `1`].fFloat [i] = tmp[j];
1119	}
1120	}
1121	}
1122	sp += count;
1123	NEXT();
1124	}
1125
1126	VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
1127	VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
1128	VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
1129
1130	LABEL(kReserve)
1131	sp += READ8();
1132	NEXT();
1133
1134	LABEL(kReturn) {
1135	int count = READ8();
1136	if (frames.empty()) {
1137	if (outReturn) {
1138	VValue* src = sp - count + `1`;
1139	if (stripedOutput) {
1140	for (int i = `0`; i < count; ++i) {
1141	memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
1142	++src;
1143	}
1144	} else {
1145	float* outPtr = outReturn[`0`];
1146	for (int i = `0`; i < count; ++i) {
1147	for (int j = `0`; j < N; ++j) {
1148	outPtr[count * j] = src->fFloat [j];
1149	}
1150	++outPtr;
1151	++src;
1152	}
1153	}
1154	}
1155	return true;
1156	} else {
1157	// When we were called, the caller reserved stack space for their copy of our
1158	// return value, then 'stack' was positioned after that, where our parameters
1159	// were placed. Copy our return values to their reserved area.
1160	memcpy(stack - count, sp - count + `1`, count * sizeof(VValue));
1161
1162	// Now move the stack pointer to the end of the passed-in parameters. This odd
1163	// calling convention requires the caller to pop the arguments after calling,
1164	// but allows them to store any out-parameters back during that unwinding.
1165	// After that sequence finishes, the return value will be the top of the stack.
1166	const StackFrame& frame(frames.back());
1167	sp = stack + frame.fParameterCount - `1`;
1168	stack = frame.fStack;
1169	code = frame.fCode;
1170	ip = frame.fIP;
1171	frames.pop_back();
1172	NEXT();
1173	}
1174	}
1175
1176	LABEL(kScalarToMatrix) {
1177	int cols = READ8();
1178	int rows = READ8();
1179	VValue v = POP();
1180	for (int c = `0`; c < cols; ++c) {
1181	for (int r = `0`; r < rows; ++r) {
1182	PUSH(c == r ? v : F32 (`0.0f`));
1183	}
1184	}
1185	NEXT();
1186	}
1187
1188	LABEL(kShiftLeft)
1189	sp[`0`] = sp[`0`].fSigned << READ8();
1190	NEXT();
1191	LABEL(kShiftRightS)
1192	sp[`0`] = sp[`0`].fSigned >> READ8();
1193	NEXT();
1194	LABEL(kShiftRightU)
1195	sp[`0`] = sp[`0`].fUnsigned >> READ8();
1196	NEXT();
1197
1198	VECTOR_UNARY_FN_VEC(kSin, sinf)
1199	VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
1200
1201	LABEL(kStore4)
1202	stack[ip+`3`] = skvx::if_then_else(mask(), POP().fFloat, stack[ip+`3`].fFloat);
1203	LABEL(kStore3)
1204	stack[ip+`2`] = skvx::if_then_else(mask(), POP().fFloat, stack[ip+`2`].fFloat);
1205	LABEL(kStore2)
1206	stack[ip+`1`] = skvx::if_then_else(mask(), POP().fFloat, stack[ip+`1`].fFloat);
1207	LABEL(kStore)
1208	stack[ip+`0`] = skvx::if_then_else(mask(), POP().fFloat, stack[ip+`0`].fFloat);
1209	++ip;
1210	NEXT();
1211
1212	LABEL(kStoreGlobal4)
1213	globals[ip+`3`] = skvx::if_then_else(mask(), POP().fFloat, globals[ip+`3`].fFloat);
1214	LABEL(kStoreGlobal3)
1215	globals[ip+`2`] = skvx::if_then_else(mask(), POP().fFloat, globals[ip+`2`].fFloat);
1216	LABEL(kStoreGlobal2)
1217	globals[ip+`1`] = skvx::if_then_else(mask(), POP().fFloat, globals[ip+`1`].fFloat);
1218	LABEL(kStoreGlobal)
1219	globals[ip+`0`] = skvx::if_then_else(mask(), POP().fFloat, globals[ip+`0`].fFloat);
1220	++ip;
1221	NEXT();
1222
1223	LABEL(kStoreExtended) {
1224	int count = READ8();
1225	I32 target = POP().fSigned;
1226	VValue* src = sp - count + `1`;
1227	I32 m = mask();
1228	for (int i = `0`; i < count; ++i) {
1229	for (int j = `0`; j < VecWidth; ++j) {
1230	if (m [j]) {
1231	stack[target [j] + i].fSigned [j] = src[i].fSigned [j];
1232	}
1233	}
1234	}
1235	sp -= count;
1236	NEXT();
1237	}
1238	LABEL(kStoreExtendedGlobal) {
1239	int count = READ8();
1240	I32 target = POP().fSigned;
1241	VValue* src = sp - count + `1`;
1242	I32 m = mask();
1243	for (int i = `0`; i < count; ++i) {
1244	for (int j = `0`; j < VecWidth; ++j) {
1245	if (m [j]) {
1246	globals[target [j] + i].fSigned [j] = src[i].fSigned [j];
1247	}
1248	}
1249	}
1250	sp -= count;
1251	NEXT();
1252	}
1253
1254	LABEL(kStoreSwizzle) {
1255	int target = READ8();
1256	int count = READ8();
1257	for (int i = count - `1`; i >= `0`; --i) {
1258	stack[target + *(ip + i)] = skvx::if_then_else(
1259	mask(), POP().fFloat, stack[target + *(ip + i)].fFloat);
1260	}
1261	ip += count;
1262	NEXT();
1263	}
1264
1265	LABEL(kStoreSwizzleGlobal) {
1266	int target = READ8();
1267	int count = READ8();
1268	for (int i = count - `1`; i >= `0`; --i) {
1269	globals[target + *(ip + i)] = skvx::if_then_else(
1270	mask(), POP().fFloat, globals[target + *(ip + i)].fFloat);
1271	}
1272	ip += count;
1273	NEXT();
1274	}
1275
1276	LABEL(kStoreSwizzleIndirect) {
1277	int count = READ8();
1278	I32 target = POP().fSigned;
1279	I32 m = mask();
1280	for (int i = count - `1`; i >= `0`; --i) {
1281	I32 v = POP().fSigned;
1282	for (int j = `0`; j < VecWidth; ++j) {
1283	if (m [j]) {
1284	stack[target [j] + *(ip + i)].fSigned [j] = v [j];
1285	}
1286	}
1287	}
1288	ip += count;
1289	NEXT();
1290	}
1291
1292	LABEL(kStoreSwizzleIndirectGlobal) {
1293	int count = READ8();
1294	I32 target = POP().fSigned;
1295	I32 m = mask();
1296	for (int i = count - `1`; i >= `0`; --i) {
1297	I32 v = POP().fSigned;
1298	for (int j = `0`; j < VecWidth; ++j) {
1299	if (m [j]) {
1300	globals[target [j] + *(ip + i)].fSigned [j] = v [j];
1301	}
1302	}
1303	}
1304	ip += count;
1305	NEXT();
1306	}
1307
1308	VECTOR_BINARY_OP(kSubtractI, fSigned, -)
1309	VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -)
1310
1311	LABEL(kSwizzle) {
1312	VValue tmp[`4`];
1313	for (int i = READ8() - `1`; i >= `0`; --i) {
1314	tmp[i] = POP();
1315	}
1316	for (int i = READ8() - `1`; i >= `0`; --i) {
1317	PUSH(tmp[READ8()]);
1318	}
1319	NEXT();
1320	}
1321
1322	VECTOR_UNARY_FN_VEC(kTan, tanf)
1323
1324	LABEL(kWriteExternal4)
1325	LABEL(kWriteExternal3)
1326	LABEL(kWriteExternal2)
1327	LABEL(kWriteExternal) {
1328	int count = READ8();
1329	int target = READ8();
1330	float tmp[`4`];
1331	I32 m = mask();
1332	sp -= count;
1333	for (int i = `0`; i < VecWidth; ++i) {
1334	if (m [i]) {
1335	for (int j = `0`; j < count; ++j) {
1336	tmp[j] = sp[j + `1`].fFloat [i];
1337	}
1338	byteCode->fExternalValues [target]->write(baseIndex + i, tmp);
1339	}
1340	}
1341	NEXT();
1342	}
1343
1344	LABEL(kMaskPush)
1345	condPtr[`1`] = POP().fSigned;
1346	maskPtr[`1`] = maskPtr[`0`] & condPtr[`1`];
1347	++condPtr; ++maskPtr;
1348	NEXT();
1349	LABEL(kMaskPop)
1350	--condPtr; --maskPtr;
1351	NEXT();
1352	LABEL(kMaskNegate)
1353	maskPtr[`0`] = maskPtr[-`1`] & ~condPtr[`0`];
1354	NEXT();
1355	LABEL(kMaskBlend) {
1356	int count = READ8();
1357	I32 m = condPtr[`0`];
1358	--condPtr; --maskPtr;
1359	for (int i = `0`; i < count; ++i) {
1360	sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[`0`].fFloat);
1361	--sp;
1362	}
1363	NEXT();
1364	}
1365	LABEL(kBranchIfAllFalse) {
1366	int target = READ16();
1367	if (!skvx::any(mask())) {
1368	ip = code + target;
1369	}
1370	NEXT();
1371	}
1372
1373	LABEL(kLoopBegin)
1374	contPtr[`1`] = `0`;
1375	loopPtr[`1`] = loopPtr[`0`];
1376	++contPtr; ++loopPtr;
1377	NEXT();
1378	LABEL(kLoopNext)
1379	loopPtr \|= contPtr;
1380	*contPtr = `0`;
1381	NEXT();
1382	LABEL(kLoopMask)
1383	*loopPtr &= POP().fSigned;
1384	NEXT();
1385	LABEL(kLoopEnd)
1386	--contPtr; --loopPtr;
1387	NEXT();
1388	LABEL(kLoopBreak)
1389	*loopPtr &= ~mask();
1390	NEXT();
1391	LABEL(kLoopContinue) {
1392	I32 m = mask();
1393	*contPtr \|= m;
1394	*loopPtr &= ~m;
1395	NEXT();
1396	}
1397	#ifdef SKSLC_THREADED_CODE
1398	#ifdef TRACE
1399	next:
1400	printf("at %3d (stack: %d) (disable threaded code for disassembly)\n",
1401	(int) (ip - code), (int) (sp - stack) + `1`);
1402	goto *READ_INST();
1403	#endif
1404	#else
1405	}
1406	}
1407	#endif
1408	}
1409
1410	}; // class Interpreter
1411
1412	#endif // SK_ENABLE_SKSL_INTERPRETER
1413
1414	#undef spf
1415
1416	void ByteCodeFunction::disassemble() const {
1417	#if defined(SK_ENABLE_SKSL_INTERPRETER)
1418	const uint8_t* ip = fCode.data();
1419	while (ip < fCode.data() + fCode.size()) {
1420	printf("%d: ", (int)(ip - fCode.data()));
1421	ip = Interpreter::DisassembleInstruction(ip);
1422	printf("\n");
1423	}
1424	#endif
1425	}
1426
1427	#define VECTOR_PREPROCESS(base) \
1428	case ByteCodeInstruction::base ## 4: \
1429	case ByteCodeInstruction::base ## 3: \
1430	case ByteCodeInstruction::base ## 2: \
1431	case ByteCodeInstruction::base: READ8(); break;
1432
1433	#define VECTOR_PREPROCESS_NO_COUNT(base) \
1434	case ByteCodeInstruction::base ## 4: \
1435	case ByteCodeInstruction::base ## 3: \
1436	case ByteCodeInstruction::base ## 2: \
1437	case ByteCodeInstruction::base: break;
1438
1439	#define VECTOR_MATRIX_PREPROCESS(base) \
1440	VECTOR_PREPROCESS(base) \
1441	case ByteCodeInstruction::base ## N: READ8(); break;
1442
1443	#define VECTOR_MATRIX_PREPROCESS_NO_COUNT(base) \
1444	VECTOR_PREPROCESS_NO_COUNT(base) \
1445	case ByteCodeInstruction::base ## N: READ8(); break;
1446
1447	void ByteCodeFunction::preprocess(const void* labels[]) {
1448	#if defined(SK_ENABLE_SKSL_INTERPRETER)
1449	#ifdef TRACE
1450	this->disassemble();
1451	#endif
1452	uint8_t* ip = fCode.data();
1453	while (ip < fCode.data() + fCode.size()) {
1454	ByteCodeInstruction inst = (ByteCodeInstruction) (intptr_t) READ_INST();
1455	const void* label = labels[(int) inst];
1456	memcpy(ip - sizeof(instruction), &label, sizeof(label));
1457	switch (inst) {
1458	VECTOR_MATRIX_PREPROCESS(kAddF)
1459	VECTOR_PREPROCESS(kAddI)
1460	case ByteCodeInstruction::kAndB: break;
1461	case ByteCodeInstruction::kBranch: READ16(); break;
1462	case ByteCodeInstruction::kCall: READ8(); break;
1463	case ByteCodeInstruction::kCallExternal: {
1464	READ8();
1465	READ8();
1466	READ8();
1467	break;
1468	}
1469	case ByteCodeInstruction::kClampIndex: READ8(); break;
1470	VECTOR_PREPROCESS(kCompareIEQ)
1471	VECTOR_PREPROCESS(kCompareINEQ)
1472	VECTOR_MATRIX_PREPROCESS(kCompareFEQ)
1473	VECTOR_MATRIX_PREPROCESS(kCompareFNEQ)
1474	VECTOR_PREPROCESS(kCompareFGT)
1475	VECTOR_PREPROCESS(kCompareFGTEQ)
1476	VECTOR_PREPROCESS(kCompareFLT)
1477	VECTOR_PREPROCESS(kCompareFLTEQ)
1478	VECTOR_PREPROCESS(kCompareSGT)
1479	VECTOR_PREPROCESS(kCompareSGTEQ)
1480	VECTOR_PREPROCESS(kCompareSLT)
1481	VECTOR_PREPROCESS(kCompareSLTEQ)
1482	VECTOR_PREPROCESS(kCompareUGT)
1483	VECTOR_PREPROCESS(kCompareUGTEQ)
1484	VECTOR_PREPROCESS(kCompareULT)
1485	VECTOR_PREPROCESS(kCompareULTEQ)
1486	VECTOR_PREPROCESS_NO_COUNT(kConvertFtoI)
1487	VECTOR_PREPROCESS_NO_COUNT(kConvertStoF)
1488	VECTOR_PREPROCESS_NO_COUNT(kConvertUtoF)
1489	VECTOR_PREPROCESS(kCos)
1490	VECTOR_MATRIX_PREPROCESS(kDivideF)
1491	VECTOR_PREPROCESS(kDivideS)
1492	VECTOR_PREPROCESS(kDivideU)
1493	VECTOR_MATRIX_PREPROCESS(kDup)
1494
1495	case ByteCodeInstruction::kInverse2x2:
1496	case ByteCodeInstruction::kInverse3x3:
1497	case ByteCodeInstruction::kInverse4x4: break;
1498
1499	case ByteCodeInstruction::kLoad:
1500	case ByteCodeInstruction::kLoad2:
1501	case ByteCodeInstruction::kLoad3:
1502	case ByteCodeInstruction::kLoad4:
1503	case ByteCodeInstruction::kLoadGlobal:
1504	case ByteCodeInstruction::kLoadGlobal2:
1505	case ByteCodeInstruction::kLoadGlobal3:
1506	case ByteCodeInstruction::kLoadGlobal4:
1507	case ByteCodeInstruction::kLoadUniform:
1508	case ByteCodeInstruction::kLoadUniform2:
1509	case ByteCodeInstruction::kLoadUniform3:
1510	case ByteCodeInstruction::kLoadUniform4: READ16(); break;
1511
1512	case ByteCodeInstruction::kLoadSwizzle:
1513	case ByteCodeInstruction::kLoadSwizzleGlobal:
1514	case ByteCodeInstruction::kLoadSwizzleUniform: {
1515	READ8();
1516	int count = READ8();
1517	ip += count;
1518	break;
1519	}
1520
1521	case ByteCodeInstruction::kLoadExtended:
1522	case ByteCodeInstruction::kLoadExtendedGlobal:
1523	case ByteCodeInstruction::kLoadExtendedUniform:
1524	READ8();
1525	break;
1526
1527	case ByteCodeInstruction::kMatrixToMatrix: {
1528	READ8();
1529	READ8();
1530	READ8();
1531	READ8();
1532	break;
1533	}
1534	case ByteCodeInstruction::kMatrixMultiply: {
1535	READ8();
1536	READ8();
1537	READ8();
1538	break;
1539	}
1540	VECTOR_MATRIX_PREPROCESS(kMultiplyF)
1541	VECTOR_PREPROCESS(kMultiplyI)
1542	VECTOR_MATRIX_PREPROCESS_NO_COUNT(kNegateF)
1543	VECTOR_PREPROCESS_NO_COUNT(kNegateI)
1544	case ByteCodeInstruction::kNotB: break;
1545	case ByteCodeInstruction::kOrB: break;
1546	VECTOR_MATRIX_PREPROCESS_NO_COUNT(kPop)
1547	case ByteCodeInstruction::kPushImmediate: READ32(); break;
1548
1549	case ByteCodeInstruction::kReadExternal:
1550	case ByteCodeInstruction::kReadExternal2:
1551	case ByteCodeInstruction::kReadExternal3:
1552	case ByteCodeInstruction::kReadExternal4: READ16(); break;
1553
1554	VECTOR_PREPROCESS(kRemainderF)
1555	VECTOR_PREPROCESS(kRemainderS)
1556	VECTOR_PREPROCESS(kRemainderU)
1557	case ByteCodeInstruction::kReserve: READ8(); break;
1558	case ByteCodeInstruction::kReturn: READ8(); break;
1559	case ByteCodeInstruction::kScalarToMatrix: READ8(); READ8(); break;
1560	case ByteCodeInstruction::kShiftLeft: READ8(); break;
1561	case ByteCodeInstruction::kShiftRightS: READ8(); break;
1562	case ByteCodeInstruction::kShiftRightU: READ8(); break;
1563	VECTOR_PREPROCESS(kSin)
1564	VECTOR_PREPROCESS_NO_COUNT(kSqrt)
1565
1566	case ByteCodeInstruction::kStore:
1567	case ByteCodeInstruction::kStore2:
1568	case ByteCodeInstruction::kStore3:
1569	case ByteCodeInstruction::kStore4:
1570	case ByteCodeInstruction::kStoreGlobal:
1571	case ByteCodeInstruction::kStoreGlobal2:
1572	case ByteCodeInstruction::kStoreGlobal3:
1573	case ByteCodeInstruction::kStoreGlobal4: READ8(); break;
1574
1575	case ByteCodeInstruction::kStoreSwizzle:
1576	case ByteCodeInstruction::kStoreSwizzleGlobal: {
1577	READ8();
1578	int count = READ8();
1579	ip += count;
1580	break;
1581	}
1582
1583	case ByteCodeInstruction::kStoreSwizzleIndirect:
1584	case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: {
1585	int count = READ8();
1586	ip += count;
1587	break;
1588	}
1589
1590	case ByteCodeInstruction::kStoreExtended: READ8(); break;
1591	case ByteCodeInstruction::kStoreExtendedGlobal: READ8(); break;
1592
1593	VECTOR_MATRIX_PREPROCESS(kSubtractF)
1594	VECTOR_PREPROCESS(kSubtractI)
1595
1596	case ByteCodeInstruction::kSwizzle: {
1597	READ8();
1598	int count = READ8();
1599	ip += count;
1600	break;
1601	}
1602	VECTOR_PREPROCESS(kTan)
1603	case ByteCodeInstruction::kWriteExternal:
1604	case ByteCodeInstruction::kWriteExternal2:
1605	case ByteCodeInstruction::kWriteExternal3:
1606	case ByteCodeInstruction::kWriteExternal4: READ16(); break;
1607
1608	case ByteCodeInstruction::kXorB: break;
1609	case ByteCodeInstruction::kMaskPush: break;
1610	case ByteCodeInstruction::kMaskPop: break;
1611	case ByteCodeInstruction::kMaskNegate: break;
1612	case ByteCodeInstruction::kMaskBlend: READ8(); break;
1613	case ByteCodeInstruction::kBranchIfAllFalse: READ16(); break;
1614	case ByteCodeInstruction::kLoopBegin: break;
1615	case ByteCodeInstruction::kLoopNext: break;
1616	case ByteCodeInstruction::kLoopMask: break;
1617	case ByteCodeInstruction::kLoopEnd: break;
1618	case ByteCodeInstruction::kLoopContinue: break;
1619	case ByteCodeInstruction::kLoopBreak: break;
1620	default:
1621	ip -= `2`;
1622	printf("unknown(%d)\n", READ16());
1623	SkASSERT(false);
1624	}
1625	}
1626	#endif
1627	}
1628
1629	bool ByteCode::run(const ByteCodeFunction* f,
1630	float* args, int argCount,
1631	float* outReturn, int returnCount,
1632	const float* uniforms, int uniformCount) const {
1633	#if defined(SK_ENABLE_SKSL_INTERPRETER)
1634	Interpreter::VValue stack[`128`];
1635	int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1636	if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1637	return false;
1638	}
1639
1640	if (argCount != f->fParameterCount \|\|
1641	returnCount != f->fReturnCount \|\|
1642	uniformCount != fUniformSlotCount) {
1643	return false;
1644	}
1645
1646	Interpreter::VValue globals[`32`];
1647	if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1648	return false;
1649	}
1650
1651	// Transpose args into stack
1652	{
1653	float* src = args;
1654	float* dst = (float*)stack;
1655	for (int i = `0`; i < argCount; ++i) {
1656	dst = src++;
1657	dst += VecWidth;
1658	}
1659	}
1660
1661	bool stripedOutput = false;
1662	float outArray = outReturn ? &outReturn : nullptr**;
1663	if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, `1`, `0`)) {
1664	return false;
1665	}
1666
1667	// Transpose out parameters back
1668	{
1669	float* dst = args;
1670	float* src = (float*)stack;
1671	for (const auto& p : f->fParameters) {
1672	if (p.fIsOutParameter) {
1673	for (int i = p.fSlotCount; i > `0`; --i) {
1674	dst++ = src;
1675	src += VecWidth;
1676	}
1677	} else {
1678	dst += p.fSlotCount;
1679	src += p.fSlotCount * VecWidth;
1680	}
1681	}
1682	}
1683
1684	return true;
1685	#else
1686	SkDEBUGFAIL("ByteCode interpreter not enabled");
1687	return false;
1688	#endif
1689	}
1690
1691	bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
1692	float* args[], int argCount,
1693	float* outReturn[], int returnCount,
1694	const float* uniforms, int uniformCount) const {
1695	#if defined(SK_ENABLE_SKSL_INTERPRETER)
1696	Interpreter::VValue stack[`128`];
1697	int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
1698	if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
1699	return false;
1700	}
1701
1702	if (argCount != f->fParameterCount \|\|
1703	returnCount != f->fReturnCount \|\|
1704	uniformCount != fUniformSlotCount) {
1705	return false;
1706	}
1707
1708	Interpreter::VValue globals[`32`];
1709	if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
1710	return false;
1711	}
1712
1713	// innerRun just takes outArgs, so clear it if the count is zero
1714	if (returnCount == `0`) {
1715	outReturn = nullptr;
1716	}
1717
1718	int baseIndex = `0`;
1719
1720	while (N) {
1721	int w = std::min(N, VecWidth);
1722
1723	// Copy args into stack
1724	for (int i = `0`; i < argCount; ++i) {
1725	memcpy((void)(stack + i), args[i], w sizeof(float));
1726	}
1727
1728	bool stripedOutput = true;
1729	if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1730	baseIndex)) {
1731	return false;
1732	}
1733
1734	// Copy out parameters back
1735	int slot = `0`;
1736	for (const auto& p : f->fParameters) {
1737	if (p.fIsOutParameter) {
1738	for (int i = slot; i < slot + p.fSlotCount; ++i) {
1739	memcpy(args[i], stack + i, w * sizeof(float));
1740	}
1741	}
1742	slot += p.fSlotCount;
1743	}
1744
1745	// Step each argument pointer ahead
1746	for (int i = `0`; i < argCount; ++i) {
1747	args[i] += w;
1748	}
1749	N -= w;
1750	baseIndex += w;
1751	}
1752
1753	return true;
1754	#else
1755	SkDEBUGFAIL("ByteCode interpreter not enabled");
1756	return false;
1757	#endif
1758	}
1759
1760	} // namespace SkSL
1761
1762	#endif
1763

Browse the source code of Skia/src/sksl/SkSLByteCode.cpp