SkSLByteCode.cpp source code [engine/third_party/skia/src/sksl/SkSLByteCode.cpp]

1	/*
2	* Copyright 2018 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#ifndef SKSL_STANDALONE
9
10	#include "include/core/SkPoint3.h"
11	#include "include/private/SkVx.h"
12	#include "src/core/SkUtils.h" // sk_unaligned_load
13	#include "src/sksl/SkSLByteCode.h"
14	#include "src/sksl/SkSLByteCodeGenerator.h"
15	#include "src/sksl/SkSLExternalValue.h"
16
17	#include <functional>
18	#include <vector>
19
20	namespace SkSL {
21
22	#if defined(SK_ENABLE_SKSL_INTERPRETER)
23
24	constexpr int VecWidth = ByteCode::kVecWidth;
25
26	struct Interpreter {
27
28	using F32 = skvx::Vec<VecWidth, float>;
29	using I32 = skvx::Vec<VecWidth, int32_t>;
30	using U32 = skvx::Vec<VecWidth, uint32_t>;
31
32	#define READ8() (*(ip++))
33	#define READ16() (ip += 2, sk_unaligned_load<uint16_t>(ip - 2))
34	#define READ32() (ip += 4, sk_unaligned_load<uint32_t>(ip - 4))
35	#define READ_INST() (ip += sizeof(ByteCodeInstruction), \
36	sk_unaligned_load<ByteCodeInstruction>(ip - sizeof(ByteCodeInstruction)))
37
38	#define DISASSEMBLE_COUNT(op, text) \
39	case ByteCodeInstruction::op: printf(text " %d", READ8()); break;
40
41	#define DISASSEMBLE_COUNT_SLOT(op, text) \
42	case ByteCodeInstruction::op: { \
43	int N = READ8(), \
44	slot = READ8(); \
45	printf(text " %d [%d]", N, slot); \
46	} break;
47
48	static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
49	auto inst = READ_INST();
50	printf("%02x ", (int)inst);
51	switch (inst) {
52	DISASSEMBLE_COUNT(kAddF, "addf")
53	DISASSEMBLE_COUNT(kAddI, "addi")
54	DISASSEMBLE_COUNT(kAndB, "andb")
55	DISASSEMBLE_COUNT(kATan, "atan")
56	case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break;
57	case ByteCodeInstruction::kCall: printf("call %d", READ8()); break;
58	case ByteCodeInstruction::kCallExternal: {
59	int argumentCount = READ8();
60	int returnCount = READ8();
61	int externalValue = READ8();
62	printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue);
63	break;
64	}
65	DISASSEMBLE_COUNT(kCeil, "ceil")
66	case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break;
67	DISASSEMBLE_COUNT(kCompareIEQ, "compareieq")
68	DISASSEMBLE_COUNT(kCompareINEQ, "compareineq")
69	DISASSEMBLE_COUNT(kCompareFEQ, "comparefeq")
70	DISASSEMBLE_COUNT(kCompareFNEQ, "comparefneq")
71	DISASSEMBLE_COUNT(kCompareFGT, "comparefgt")
72	DISASSEMBLE_COUNT(kCompareFGTEQ, "comparefgteq")
73	DISASSEMBLE_COUNT(kCompareFLT, "compareflt")
74	DISASSEMBLE_COUNT(kCompareFLTEQ, "compareflteq")
75	DISASSEMBLE_COUNT(kCompareSGT, "comparesgt")
76	DISASSEMBLE_COUNT(kCompareSGTEQ, "comparesgteq")
77	DISASSEMBLE_COUNT(kCompareSLT, "compareslt")
78	DISASSEMBLE_COUNT(kCompareSLTEQ, "compareslteq")
79	DISASSEMBLE_COUNT(kCompareUGT, "compareugt")
80	DISASSEMBLE_COUNT(kCompareUGTEQ, "compareugteq")
81	DISASSEMBLE_COUNT(kCompareULT, "compareult")
82	DISASSEMBLE_COUNT(kCompareULTEQ, "compareulteq")
83	DISASSEMBLE_COUNT(kConvertFtoI, "convertftoi")
84	DISASSEMBLE_COUNT(kConvertStoF, "convertstof")
85	DISASSEMBLE_COUNT(kConvertUtoF, "convertutof")
86	DISASSEMBLE_COUNT(kCos, "cos")
87	DISASSEMBLE_COUNT(kDivideF, "dividef")
88	DISASSEMBLE_COUNT(kDivideS, "divideS")
89	DISASSEMBLE_COUNT(kDivideU, "divideu")
90	DISASSEMBLE_COUNT(kDup, "dup")
91	DISASSEMBLE_COUNT(kFloor, "floor")
92	DISASSEMBLE_COUNT(kFract, "fract")
93	case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break;
94	case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break;
95	case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break;
96	DISASSEMBLE_COUNT(kLerp, "lerp")
97	DISASSEMBLE_COUNT_SLOT(kLoad, "load")
98	DISASSEMBLE_COUNT_SLOT(kLoadGlobal, "loadglobal")
99	DISASSEMBLE_COUNT_SLOT(kLoadUniform, "loaduniform")
100	DISASSEMBLE_COUNT(kLoadExtended, "loadextended")
101	DISASSEMBLE_COUNT(kLoadExtendedGlobal, "loadextendedglobal")
102	DISASSEMBLE_COUNT(kLoadExtendedUniform, "loadextendeduniform")
103	case ByteCodeInstruction::kLoadFragCoord: printf("loadfragcoord"); break;
104	case ByteCodeInstruction::kMatrixToMatrix: {
105	int srcCols = READ8();
106	int srcRows = READ8();
107	int dstCols = READ8();
108	int dstRows = READ8();
109	printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows);
110	break;
111	}
112	case ByteCodeInstruction::kMatrixMultiply: {
113	int lCols = READ8();
114	int lRows = READ8();
115	int rCols = READ8();
116	printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols);
117	break;
118	}
119	DISASSEMBLE_COUNT(kMaxF, "maxf")
120	DISASSEMBLE_COUNT(kMaxS, "maxs")
121	DISASSEMBLE_COUNT(kMinF, "minf")
122	DISASSEMBLE_COUNT(kMinS, "mins")
123	DISASSEMBLE_COUNT(kMix, "mix")
124	DISASSEMBLE_COUNT(kMultiplyF, "multiplyf")
125	DISASSEMBLE_COUNT(kMultiplyI, "multiplyi")
126	DISASSEMBLE_COUNT(kNegateF, "negatef")
127	DISASSEMBLE_COUNT(kNegateI, "negatei")
128	DISASSEMBLE_COUNT(kNotB, "notb")
129	DISASSEMBLE_COUNT(kOrB, "orb")
130	DISASSEMBLE_COUNT(kPop, "pop")
131	DISASSEMBLE_COUNT(kPow, "pow")
132	case ByteCodeInstruction::kPushImmediate: {
133	uint32_t v = READ32();
134	union { uint32_t u; float f; } pun = { v };
135	printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str());
136	break;
137	}
138	DISASSEMBLE_COUNT_SLOT(kReadExternal, "readexternal")
139	DISASSEMBLE_COUNT(kRemainderF, "remainderf")
140	DISASSEMBLE_COUNT(kRemainderS, "remainders")
141	DISASSEMBLE_COUNT(kRemainderU, "remainderu")
142	DISASSEMBLE_COUNT(kReserve, "reserve")
143	DISASSEMBLE_COUNT(kReturn, "return")
144	case ByteCodeInstruction::kSample: printf("sample %d", READ8()); break;
145	case ByteCodeInstruction::kSampleExplicit: printf("sampleExplicit %d", READ8()); break;
146	case ByteCodeInstruction::kSampleMatrix: printf("sampleMatrix %d", READ8()); break;
147	case ByteCodeInstruction::kScalarToMatrix: {
148	int cols = READ8();
149	int rows = READ8();
150	printf("scalartomatrix %dx%d", cols, rows);
151	break;
152	}
153	case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break;
154	case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break;
155	case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break;
156	DISASSEMBLE_COUNT(kSin, "sin")
157	DISASSEMBLE_COUNT(kSqrt, "sqrt")
158	DISASSEMBLE_COUNT_SLOT(kStore, "store")
159	DISASSEMBLE_COUNT_SLOT(kStoreGlobal, "storeglobal")
160	DISASSEMBLE_COUNT(kStoreExtended, "storeextended")
161	DISASSEMBLE_COUNT(kStoreExtendedGlobal, "storeextendedglobal")
162	DISASSEMBLE_COUNT(kSubtractF, "subtractf")
163	DISASSEMBLE_COUNT(kSubtractI, "subtracti")
164	case ByteCodeInstruction::kSwizzle: {
165	printf("swizzle %d, ", READ8());
166	int count = READ8();
167	printf("%d", count);
168	for (int i = `0`; i < count; ++i) {
169	printf(", %d", READ8());
170	}
171	break;
172	}
173	DISASSEMBLE_COUNT(kTan, "tan")
174	DISASSEMBLE_COUNT_SLOT(kWriteExternal, "writeexternal")
175	DISASSEMBLE_COUNT(kXorB, "xorb")
176	case ByteCodeInstruction::kMaskPush: printf("maskpush"); break;
177	case ByteCodeInstruction::kMaskPop: printf("maskpop"); break;
178	case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break;
179	case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break;
180	case ByteCodeInstruction::kBranchIfAllFalse:
181	printf("branchifallfalse %d", READ16());
182	break;
183	case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break;
184	case ByteCodeInstruction::kLoopNext: printf("loopnext"); break;
185	case ByteCodeInstruction::kLoopMask: printf("loopmask"); break;
186	case ByteCodeInstruction::kLoopEnd: printf("loopend"); break;
187	case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break;
188	case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break;
189	default:
190	ip -= sizeof(ByteCodeInstruction);
191	printf("unknown(%d)\n", (int) (intptr_t) READ_INST());
192	SkASSERT(false);
193	}
194	return ip;
195	}
196
197	// A naive implementation of / or % using skvx operations will likely crash with a divide by zero
198	// in inactive vector lanes, so we need to be sure to avoid masked-off lanes.
199	// TODO: Would it be better to do this with a select of (lane, 1) based on mask?
200	#define VECTOR_BINARY_MASKED_OP(inst, field, op) \
201	case ByteCodeInstruction::inst: { \
202	int count = READ8(); \
203	for (int i = count; i > 0; --i) { \
204	for (int j = 0; j < VecWidth; ++j) { \
205	if (mask()[j]) { \
206	sp[-count].field[j] op ## = sp[0].field[j]; \
207	} \
208	} \
209	POP(); \
210	} \
211	} continue;
212
213	#define VECTOR_BINARY_OP(inst, field, op) \
214	case ByteCodeInstruction::inst: { \
215	int count = READ8(); \
216	for (int i = count; i > 0; --i) { \
217	sp[-count] = sp[-count].field op sp[0].field; \
218	POP(); \
219	} \
220	} continue;
221
222	#define VECTOR_BINARY_FN(inst, field, fn) \
223	case ByteCodeInstruction::inst: { \
224	int count = READ8(); \
225	for (int i = count; i > 0; --i) { \
226	sp[-count] = fn(sp[-count].field, sp[0].field); \
227	POP(); \
228	} \
229	} continue;
230
231	#define VECTOR_UNARY_FN(inst, fn, field) \
232	case ByteCodeInstruction::inst: { \
233	int count = READ8(); \
234	for (int i = count; i --> 0; ) { \
235	sp[-i] = fn(sp[-i].field); \
236	} \
237	} continue;
238
239	union VValue {
240	VValue() {}
241	VValue(F32 f) : fFloat(f) {}
242	VValue(I32 s) : fSigned(s) {}
243	VValue(U32 u) : fUnsigned(u) {}
244
245	F32 fFloat;
246	I32 fSigned;
247	U32 fUnsigned;
248	};
249
250	struct StackFrame {
251	const uint8_t* fCode;
252	const uint8_t* fIP;
253	VValue* fStack;
254	int fParameterCount;
255	};
256
257	static F32 VecMod(F32 a, F32 b) {
258	return a - skvx::trunc(a / b) * b;
259	}
260
261	#define spf(index) sp[index].fFloat
262
263	static void CallExternal(const ByteCode* byteCode, const uint8_t& ip, VValue& sp,
264	int baseIndex, I32 mask) {
265	int argumentCount = READ8();
266	int returnCount = READ8();
267	int target = READ8();
268	ExternalValue* v = byteCode->fExternalValues[target];
269	sp -= argumentCount - `1`;
270
271	float tmpArgs[`4`];
272	float tmpReturn[`4`];
273	SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs));
274	SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn));
275
276	for (int i = `0`; i < VecWidth; ++i) {
277	if (mask[i]) {
278	for (int j = `0`; j < argumentCount; ++j) {
279	tmpArgs[j] = sp[j].fFloat[i];
280	}
281	v->call(baseIndex + i, tmpArgs, tmpReturn);
282	for (int j = `0`; j < returnCount; ++j) {
283	sp[j].fFloat[i] = tmpReturn[j];
284	}
285	}
286	}
287	sp += returnCount - `1`;
288	}
289
290	static void Inverse2x2(VValue* sp) {
291	F32 a = sp[-`3`].fFloat,
292	b = sp[-`2`].fFloat,
293	c = sp[-`1`].fFloat,
294	d = sp[ `0`].fFloat;
295	F32 idet = F32(`1`) / (ad - bc);
296	sp[-`3`].fFloat = d * idet;
297	sp[-`2`].fFloat = -b * idet;
298	sp[-`1`].fFloat = -c * idet;
299	sp[ `0`].fFloat = a * idet;
300	}
301
302	static void Inverse3x3(VValue* sp) {
303	F32 a11 = sp[-`8`].fFloat, a12 = sp[-`5`].fFloat, a13 = sp[-`2`].fFloat,
304	a21 = sp[-`7`].fFloat, a22 = sp[-`4`].fFloat, a23 = sp[-`1`].fFloat,
305	a31 = sp[-`6`].fFloat, a32 = sp[-`3`].fFloat, a33 = sp[ `0`].fFloat;
306	F32 idet = F32(`1`) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 -
307	a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31);
308	sp[-`8`].fFloat = (a22 * a33 - a23 * a32) * idet;
309	sp[-`7`].fFloat = (a23 * a31 - a21 * a33) * idet;
310	sp[-`6`].fFloat = (a21 * a32 - a22 * a31) * idet;
311	sp[-`5`].fFloat = (a13 * a32 - a12 * a33) * idet;
312	sp[-`4`].fFloat = (a11 * a33 - a13 * a31) * idet;
313	sp[-`3`].fFloat = (a12 * a31 - a11 * a32) * idet;
314	sp[-`2`].fFloat = (a12 * a23 - a13 * a22) * idet;
315	sp[-`1`].fFloat = (a13 * a21 - a11 * a23) * idet;
316	sp[ `0`].fFloat = (a11 * a22 - a12 * a21) * idet;
317	}
318
319	static void Inverse4x4(VValue* sp) {
320	F32 a00 = spf(-`15`), a10 = spf(-`11`), a20 = spf( -`7`), a30 = spf( -`3`),
321	a01 = spf(-`14`), a11 = spf(-`10`), a21 = spf( -`6`), a31 = spf( -`2`),
322	a02 = spf(-`13`), a12 = spf( -`9`), a22 = spf( -`5`), a32 = spf( -`1`),
323	a03 = spf(-`12`), a13 = spf( -`8`), a23 = spf( -`4`), a33 = spf( `0`);
324
325	F32 b00 = a00 * a11 - a01 * a10,
326	b01 = a00 * a12 - a02 * a10,
327	b02 = a00 * a13 - a03 * a10,
328	b03 = a01 * a12 - a02 * a11,
329	b04 = a01 * a13 - a03 * a11,
330	b05 = a02 * a13 - a03 * a12,
331	b06 = a20 * a31 - a21 * a30,
332	b07 = a20 * a32 - a22 * a30,
333	b08 = a20 * a33 - a23 * a30,
334	b09 = a21 * a32 - a22 * a31,
335	b10 = a21 * a33 - a23 * a31,
336	b11 = a22 * a33 - a23 * a32;
337
338	F32 idet = F32(`1`) /
339	(b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
340
341	b00 *= idet;
342	b01 *= idet;
343	b02 *= idet;
344	b03 *= idet;
345	b04 *= idet;
346	b05 *= idet;
347	b06 *= idet;
348	b07 *= idet;
349	b08 *= idet;
350	b09 *= idet;
351	b10 *= idet;
352	b11 *= idet;
353
354	spf(-`15`) = a11 * b11 - a12 * b10 + a13 * b09;
355	spf(-`14`) = a02 * b10 - a01 * b11 - a03 * b09;
356	spf(-`13`) = a31 * b05 - a32 * b04 + a33 * b03;
357	spf(-`12`) = a22 * b04 - a21 * b05 - a23 * b03;
358	spf(-`11`) = a12 * b08 - a10 * b11 - a13 * b07;
359	spf(-`10`) = a00 * b11 - a02 * b08 + a03 * b07;
360	spf( -`9`) = a32 * b02 - a30 * b05 - a33 * b01;
361	spf( -`8`) = a20 * b05 - a22 * b02 + a23 * b01;
362	spf( -`7`) = a10 * b10 - a11 * b08 + a13 * b06;
363	spf( -`6`) = a01 * b08 - a00 * b10 - a03 * b06;
364	spf( -`5`) = a30 * b04 - a31 * b02 + a33 * b00;
365	spf( -`4`) = a21 * b02 - a20 * b04 - a23 * b00;
366	spf( -`3`) = a11 * b07 - a10 * b09 - a12 * b06;
367	spf( -`2`) = a00 * b09 - a01 * b07 + a02 * b06;
368	spf( -`1`) = a31 * b01 - a30 * b03 - a32 * b00;
369	spf( `0`) = a20 * b03 - a21 * b01 + a22 * b00;
370	}
371
372	static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack,
373	float* outReturn[], VValue globals[], const float uniforms[],
374	bool stripedOutput, int N, int baseIndex) {
375	// Needs to be the first N non-negative integers, at least as large as VecWidth
376	static const Interpreter::I32 gLanes = {
377	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`
378	};
379
380	VValue* sp = stack + f->fParameterCount + f->fLocalCount - `1`;
381
382	#define POP() (*(sp--))
383	#define PUSH(v) (sp[1] = v, ++sp)
384
385	const uint8_t* code = f->fCode.data();
386	const uint8_t* ip = code;
387	std::vector<StackFrame> frames;
388
389	I32 condStack[`16`]; // Independent condition masks
390	I32 maskStack[`16`]; // Combined masks (eg maskStack[0] & maskStack[1] & ...)
391	I32 contStack[`16`]; // Continue flags for loops
392	I32 loopStack[`16`]; // Loop execution masks
393	condStack[`0`] = maskStack[`0`] = (gLanes < N);
394	contStack[`0`] = I32( `0`);
395	loopStack[`0`] = I32(~`0`);
396	I32* condPtr = condStack;
397	I32* maskPtr = maskStack;
398	I32* contPtr = contStack;
399	I32* loopPtr = loopStack;
400
401	if (f->fConditionCount + `1` > (int)SK_ARRAY_COUNT(condStack) \|\|
402	f->fLoopCount + `1` > (int)SK_ARRAY_COUNT(loopStack)) {
403	return false;
404	}
405
406	auto mask = [&]() { return maskPtr & loopPtr; };
407
408	for (;;) {
409	#ifdef TRACE
410	printf("at %3d ", (int) (ip - code));
411	disassemble_instruction(ip);
412	printf(" (stack: %d)\n", (int) (sp - stack) + `1`);
413	#endif
414	ByteCodeInstruction inst = READ_INST();
415	switch (inst) {
416
417	VECTOR_BINARY_OP(kAddF, fFloat, +)
418	VECTOR_BINARY_OP(kAddI, fSigned, +)
419
420	// Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want:
421	VECTOR_BINARY_OP(kAndB, fSigned, &)
422	VECTOR_BINARY_OP(kOrB, fSigned, \|)
423	VECTOR_BINARY_OP(kXorB, fSigned, ^)
424	VECTOR_UNARY_FN(kNotB, std::bit_not<>{}, fSigned)
425
426	case ByteCodeInstruction::kBranch:
427	ip = code + READ16();
428	continue;
429
430	case ByteCodeInstruction::kCall: {
431	// Precursor code reserved space for the return value, and pushed all parameters to
432	// the stack. Update our bottom of stack to point at the first parameter, and our
433	// sp to point past those parameters (plus space for locals).
434	int target = READ8();
435	const ByteCodeFunction* f = byteCode->fFunctions[target].get();
436	if (skvx::any(mask())) {
437	frames.push_back({ code, ip, stack, f->fParameterCount });
438	ip = code = f->fCode.data();
439	stack = sp - f->fParameterCount + `1`;
440	sp = stack + f->fParameterCount + f->fLocalCount - `1`;
441	// As we did in runStriped(), zero locals so they're safe to mask-store into.
442	for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
443	stack[i].fFloat = `0.0f`;
444	}
445	}
446	} continue;
447
448	case ByteCodeInstruction::kCallExternal:
449	CallExternal(byteCode, ip, sp, baseIndex, mask());
450	continue;
451
452	VECTOR_UNARY_FN(kCeil, skvx::ceil, fFloat)
453
454	case ByteCodeInstruction::kClampIndex: {
455	int length = READ8();
456	if (skvx::any(mask() & ((sp[`0`].fSigned < `0`) \| (sp[`0`].fSigned >= length)))) {
457	return false;
458	}
459	} continue;
460
461	VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==)
462	VECTOR_BINARY_OP(kCompareFEQ, fFloat, ==)
463	VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=)
464	VECTOR_BINARY_OP(kCompareFNEQ, fFloat, !=)
465	VECTOR_BINARY_OP(kCompareSGT, fSigned, >)
466	VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >)
467	VECTOR_BINARY_OP(kCompareFGT, fFloat, >)
468	VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=)
469	VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=)
470	VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=)
471	VECTOR_BINARY_OP(kCompareSLT, fSigned, <)
472	VECTOR_BINARY_OP(kCompareULT, fUnsigned, <)
473	VECTOR_BINARY_OP(kCompareFLT, fFloat, <)
474	VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=)
475	VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=)
476	VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=)
477
478	VECTOR_UNARY_FN(kConvertFtoI, skvx::cast<int>, fFloat)
479	VECTOR_UNARY_FN(kConvertStoF, skvx::cast<float>, fSigned)
480	VECTOR_UNARY_FN(kConvertUtoF, skvx::cast<float>, fUnsigned)
481
482	VECTOR_UNARY_FN(kCos, skvx::cos, fFloat)
483
484	VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /)
485	VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /)
486	VECTOR_BINARY_OP(kDivideF, fFloat, /)
487
488	case ByteCodeInstruction::kDup: {
489	int count = READ8();
490	memcpy(sp + `1`, sp - count + `1`, count * sizeof(VValue));
491	sp += count;
492	} continue;
493
494	VECTOR_UNARY_FN(kFloor, skvx::floor, fFloat)
495	VECTOR_UNARY_FN(kFract, skvx::fract, fFloat)
496
497	case ByteCodeInstruction::kInverse2x2:
498	Inverse2x2(sp);
499	continue;
500	case ByteCodeInstruction::kInverse3x3:
501	Inverse3x3(sp);
502	continue;
503	case ByteCodeInstruction::kInverse4x4:
504	Inverse4x4(sp);
505	continue;
506
507	case ByteCodeInstruction::kLerp: {
508	int count = READ8();
509	VValue* T = sp - count + `1`,
510	* B = T - count,
511	* A = B - count;
512	for (int i = count; i --> `0`; ) {
513	A[i].fFloat += (B[i].fFloat - A[i].fFloat) * T[i].fFloat;
514	}
515	sp -= `2` * count;
516	} continue;
517
518	case ByteCodeInstruction::kLoad: {
519	int count = READ8(),
520	slot = READ8();
521	memcpy(sp + `1`, stack + slot, count * sizeof(VValue));
522	sp += count;
523	} continue;
524
525	case ByteCodeInstruction::kLoadGlobal: {
526	int count = READ8(),
527	slot = READ8();
528	memcpy(sp + `1`, globals + slot, count * sizeof(VValue));
529	sp += count;
530	} continue;
531
532	case ByteCodeInstruction::kLoadUniform: {
533	int count = READ8(),
534	slot = READ8();
535	for (int i = `0`; i < count; ++i) {
536	sp[i + `1`].fFloat = uniforms[slot + i];
537	}
538	sp += count;
539	} continue;
540
541	case ByteCodeInstruction::kLoadExtended: {
542	int count = READ8();
543	I32 src = POP().fSigned;
544	I32 m = mask();
545	for (int i = `0`; i < count; ++i) {
546	for (int j = `0`; j < VecWidth; ++j) {
547	if (m[j]) {
548	sp[i + `1`].fSigned[j] = stack[src[j] + i].fSigned[j];
549	}
550	}
551	}
552	sp += count;
553	} continue;
554
555	case ByteCodeInstruction::kLoadExtendedGlobal: {
556	int count = READ8();
557	I32 src = POP().fSigned;
558	I32 m = mask();
559	for (int i = `0`; i < count; ++i) {
560	for (int j = `0`; j < VecWidth; ++j) {
561	if (m[j]) {
562	sp[i + `1`].fSigned[j] = globals[src[j] + i].fSigned[j];
563	}
564	}
565	}
566	sp += count;
567	} continue;
568
569	case ByteCodeInstruction::kLoadExtendedUniform: {
570	int count = READ8();
571	I32 src = POP().fSigned;
572	I32 m = mask();
573	for (int i = `0`; i < count; ++i) {
574	for (int j = `0`; j < VecWidth; ++j) {
575	if (m[j]) {
576	sp[i + `1`].fFloat[j] = uniforms[src[j] + i];
577	}
578	}
579	}
580	sp += count;
581	} continue;
582
583	case ByteCodeInstruction::kMatrixToMatrix: {
584	int srcCols = READ8();
585	int srcRows = READ8();
586	int dstCols = READ8();
587	int dstRows = READ8();
588	SkASSERT(srcCols >= `2` && srcCols <= `4`);
589	SkASSERT(srcRows >= `2` && srcRows <= `4`);
590	SkASSERT(dstCols >= `2` && dstCols <= `4`);
591	SkASSERT(dstRows >= `2` && dstRows <= `4`);
592	F32 tmp[`16`];
593	memset(tmp, `0`, sizeof(tmp));
594	tmp[`0`] = tmp[`5`] = tmp[`10`] = tmp[`15`] = F32(`1.0f`);
595	for (int c = srcCols - `1`; c >= `0`; --c) {
596	for (int r = srcRows - `1`; r >= `0`; --r) {
597	tmp[c*`4` + r] = POP().fFloat;
598	}
599	}
600	for (int c = `0`; c < dstCols; ++c) {
601	for (int r = `0`; r < dstRows; ++r) {
602	PUSH(tmp[c*`4` + r]);
603	}
604	}
605	} continue;
606
607	case ByteCodeInstruction::kMatrixMultiply: {
608	int lCols = READ8();
609	int lRows = READ8();
610	int rCols = READ8();
611	int rRows = lCols;
612	F32 tmp[`16`] = { `0.0f` };
613	F32* B = &(sp - (rCols * rRows) + `1`)->fFloat;
614	F32* A = B - (lCols * lRows);
615	for (int c = `0`; c < rCols; ++c) {
616	for (int r = `0`; r < lRows; ++r) {
617	for (int j = `0`; j < lCols; ++j) {
618	tmp[clRows + r] += A[jlRows + r] * B[c*rRows + j];
619	}
620	}
621	}
622	sp -= (lCols * lRows) + (rCols * rRows);
623	memcpy(sp + `1`, tmp, rCols * lRows * sizeof(VValue));
624	sp += (rCols * lRows);
625	} continue;
626
627	VECTOR_BINARY_FN(kMaxF, fFloat, skvx::max)
628	VECTOR_BINARY_FN(kMaxS, fSigned, skvx::max)
629	VECTOR_BINARY_FN(kMinF, fFloat, skvx::min)
630	VECTOR_BINARY_FN(kMinS, fSigned, skvx::min)
631
632	case ByteCodeInstruction::kMix: {
633	int count = READ8();
634	for (int i = count; i --> `0`; ) {
635	// GLSL's arguments are mix(else, true, cond)
636	sp[-(`2`*count + i)] = skvx::if_then_else(sp[-( i)].fSigned,
637	sp[-( count + i)].fFloat,
638	sp[-(`2`*count + i)].fFloat);
639	}
640	sp -= `2` * count;
641	} continue;
642
643	VECTOR_BINARY_OP(kMultiplyI, fSigned, *)
644	VECTOR_BINARY_OP(kMultiplyF, fFloat, *)
645
646	VECTOR_UNARY_FN(kNegateF, std::negate<>{}, fFloat)
647	VECTOR_UNARY_FN(kNegateI, std::negate<>{}, fSigned)
648
649	case ByteCodeInstruction::kPop:
650	sp -= READ8();
651	continue;
652
653	VECTOR_BINARY_FN(kPow, fFloat, skvx::pow)
654
655	case ByteCodeInstruction::kPushImmediate:
656	PUSH(U32(READ32()));
657	continue;
658
659	case ByteCodeInstruction::kReadExternal: {
660	int count = READ8(),
661	slot = READ8();
662	SkASSERT(count <= `4`);
663	float tmp[`4`];
664	I32 m = mask();
665	for (int i = `0`; i < VecWidth; ++i) {
666	if (m[i]) {
667	byteCode->fExternalValues[slot]->read(baseIndex + i, tmp);
668	for (int j = `0`; j < count; ++j) {
669	sp[j + `1`].fFloat[i] = tmp[j];
670	}
671	}
672	}
673	sp += count;
674	} continue;
675
676	VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod)
677	VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %)
678	VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %)
679
680	case ByteCodeInstruction::kReserve:
681	sp += READ8();
682	continue;
683
684	case ByteCodeInstruction::kReturn: {
685	int count = READ8();
686	if (frames.empty()) {
687	if (outReturn) {
688	VValue* src = sp - count + `1`;
689	if (stripedOutput) {
690	for (int i = `0`; i < count; ++i) {
691	memcpy(outReturn[i], &src->fFloat, N * sizeof(float));
692	++src;
693	}
694	} else {
695	float* outPtr = outReturn[`0`];
696	for (int i = `0`; i < count; ++i) {
697	for (int j = `0`; j < N; ++j) {
698	outPtr[count * j] = src->fFloat[j];
699	}
700	++outPtr;
701	++src;
702	}
703	}
704	}
705	return true;
706	} else {
707	// When we were called, the caller reserved stack space for their copy of our
708	// return value, then 'stack' was positioned after that, where our parameters
709	// were placed. Copy our return values to their reserved area.
710	memcpy(stack - count, sp - count + `1`, count * sizeof(VValue));
711
712	// Now move the stack pointer to the end of the passed-in parameters. This odd
713	// calling convention requires the caller to pop the arguments after calling,
714	// but allows them to store any out-parameters back during that unwinding.
715	// After that sequence finishes, the return value will be the top of the stack.
716	const StackFrame& frame(frames.back());
717	sp = stack + frame.fParameterCount - `1`;
718	stack = frame.fStack;
719	code = frame.fCode;
720	ip = frame.fIP;
721	frames.pop_back();
722	}
723	} continue;
724
725	case ByteCodeInstruction::kScalarToMatrix: {
726	int cols = READ8();
727	int rows = READ8();
728	VValue v = POP();
729	for (int c = `0`; c < cols; ++c) {
730	for (int r = `0`; r < rows; ++r) {
731	PUSH(c == r ? v : F32(`0.0f`));
732	}
733	}
734	} continue;
735
736	case ByteCodeInstruction::kShiftLeft:
737	sp[`0`] = sp[`0`].fSigned << READ8();
738	continue;
739	case ByteCodeInstruction::kShiftRightS:
740	sp[`0`] = sp[`0`].fSigned >> READ8();
741	continue;
742	case ByteCodeInstruction::kShiftRightU:
743	sp[`0`] = sp[`0`].fUnsigned >> READ8();
744	continue;
745
746	VECTOR_UNARY_FN(kSin, skvx::sin, fFloat)
747	VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat)
748
749	case ByteCodeInstruction::kStore: {
750	int count = READ8(),
751	slot = READ8();
752	auto m = mask();
753	for (int i = count; i --> `0`; ) {
754	stack[slot+i] = skvx::if_then_else(m, POP().fFloat, stack[slot+i].fFloat);
755	}
756	} continue;
757
758	case ByteCodeInstruction::kStoreGlobal: {
759	int count = READ8(),
760	slot = READ8();
761	auto m = mask();
762	for (int i = count; i --> `0`; ) {
763	globals[slot+i] = skvx::if_then_else(m, POP().fFloat, globals[slot+i].fFloat);
764	}
765	} continue;
766
767	case ByteCodeInstruction::kStoreExtended: {
768	int count = READ8();
769	I32 target = POP().fSigned;
770	VValue* src = sp - count + `1`;
771	I32 m = mask();
772	for (int i = `0`; i < count; ++i) {
773	for (int j = `0`; j < VecWidth; ++j) {
774	if (m[j]) {
775	stack[target[j] + i].fSigned[j] = src[i].fSigned[j];
776	}
777	}
778	}
779	sp -= count;
780	} continue;
781
782	case ByteCodeInstruction::kStoreExtendedGlobal: {
783	int count = READ8();
784	I32 target = POP().fSigned;
785	VValue* src = sp - count + `1`;
786	I32 m = mask();
787	for (int i = `0`; i < count; ++i) {
788	for (int j = `0`; j < VecWidth; ++j) {
789	if (m[j]) {
790	globals[target[j] + i].fSigned[j] = src[i].fSigned[j];
791	}
792	}
793	}
794	sp -= count;
795	} continue;
796
797	VECTOR_BINARY_OP(kSubtractI, fSigned, -)
798	VECTOR_BINARY_OP(kSubtractF, fFloat, -)
799
800	case ByteCodeInstruction::kSwizzle: {
801	VValue tmp[`4`];
802	for (int i = READ8() - `1`; i >= `0`; --i) {
803	tmp[i] = POP();
804	}
805	for (int i = READ8() - `1`; i >= `0`; --i) {
806	PUSH(tmp[READ8()]);
807	}
808	} continue;
809
810	VECTOR_UNARY_FN(kATan, skvx::atan, fFloat)
811	VECTOR_UNARY_FN(kTan, skvx::tan, fFloat)
812
813	case ByteCodeInstruction::kWriteExternal: {
814	int count = READ8(),
815	slot = READ8();
816	SkASSERT(count <= `4`);
817	float tmp[`4`];
818	I32 m = mask();
819	sp -= count;
820	for (int i = `0`; i < VecWidth; ++i) {
821	if (m[i]) {
822	for (int j = `0`; j < count; ++j) {
823	tmp[j] = sp[j + `1`].fFloat[i];
824	}
825	byteCode->fExternalValues[slot]->write(baseIndex + i, tmp);
826	}
827	}
828	} continue;
829
830	case ByteCodeInstruction::kMaskPush:
831	condPtr[`1`] = POP().fSigned;
832	maskPtr[`1`] = maskPtr[`0`] & condPtr[`1`];
833	++condPtr; ++maskPtr;
834	continue;
835	case ByteCodeInstruction::kMaskPop:
836	--condPtr; --maskPtr;
837	continue;
838	case ByteCodeInstruction::kMaskNegate:
839	maskPtr[`0`] = maskPtr[-`1`] & ~condPtr[`0`];
840	continue;
841	case ByteCodeInstruction::kMaskBlend: {
842	int count = READ8();
843	I32 m = condPtr[`0`];
844	--condPtr; --maskPtr;
845	for (int i = `0`; i < count; ++i) {
846	sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[`0`].fFloat);
847	--sp;
848	}
849	} continue;
850	case ByteCodeInstruction::kBranchIfAllFalse: {
851	int target = READ16();
852	if (!skvx::any(mask())) {
853	ip = code + target;
854	}
855	} continue;
856
857	case ByteCodeInstruction::kLoopBegin:
858	contPtr[`1`] = `0`;
859	loopPtr[`1`] = loopPtr[`0`];
860	++contPtr; ++loopPtr;
861	continue;
862	case ByteCodeInstruction::kLoopNext:
863	loopPtr \|= contPtr;
864	*contPtr = `0`;
865	continue;
866	case ByteCodeInstruction::kLoopMask:
867	*loopPtr &= POP().fSigned;
868	continue;
869	case ByteCodeInstruction::kLoopEnd:
870	--contPtr; --loopPtr;
871	continue;
872	case ByteCodeInstruction::kLoopBreak:
873	*loopPtr &= ~mask();
874	continue;
875	case ByteCodeInstruction::kLoopContinue: {
876	I32 m = mask();
877	*contPtr \|= m;
878	*loopPtr &= ~m;
879	} continue;
880
881	case ByteCodeInstruction::kLoadFragCoord:
882	case ByteCodeInstruction::kSample:
883	case ByteCodeInstruction::kSampleExplicit:
884	case ByteCodeInstruction::kSampleMatrix:
885	default:
886	// TODO: Support these?
887	SkASSERT(false);
888	return false;
889	}
890	}
891	}
892
893	}; // class Interpreter
894
895	#endif // SK_ENABLE_SKSL_INTERPRETER
896
897	#undef spf
898
899	void ByteCodeFunction::disassemble() const {
900	#if defined(SK_ENABLE_SKSL_INTERPRETER)
901	const uint8_t* ip = fCode.data();
902	while (ip < fCode.data() + fCode.size()) {
903	printf("%d: ", (int)(ip - fCode.data()));
904	ip = Interpreter::DisassembleInstruction(ip);
905	printf("\n");
906	}
907	#endif
908	}
909
910	bool ByteCode::run(const ByteCodeFunction* f,
911	float* args, int argCount,
912	float* outReturn, int returnCount,
913	const float* uniforms, int uniformCount) const {
914	#if defined(SK_ENABLE_SKSL_INTERPRETER)
915	Interpreter::VValue stack[`128`];
916	int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
917	if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
918	return false;
919	}
920
921	if (argCount != f->fParameterCount \|\|
922	returnCount != f->fReturnCount \|\|
923	uniformCount != fUniformSlotCount) {
924	return false;
925	}
926
927	Interpreter::VValue globals[`32`];
928	if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
929	return false;
930	}
931
932	// Transpose args into stack
933	{
934	float* src = args;
935	float* dst = (float*)stack;
936	for (int i = `0`; i < argCount; ++i) {
937	dst = src++;
938	dst += VecWidth;
939	}
940	}
941
942	bool stripedOutput = false;
943	float outArray = outReturn ? &outReturn : nullptr**;
944	if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, `1`, `0`)) {
945	return false;
946	}
947
948	// Transpose out parameters back
949	{
950	float* dst = args;
951	float* src = (float*)stack;
952	for (const auto& p : f->fParameters) {
953	if (p.fIsOutParameter) {
954	for (int i = p.fSlotCount; i > `0`; --i) {
955	dst++ = src;
956	src += VecWidth;
957	}
958	} else {
959	dst += p.fSlotCount;
960	src += p.fSlotCount * VecWidth;
961	}
962	}
963	}
964
965	return true;
966	#else
967	SkDEBUGFAIL("ByteCode interpreter not enabled");
968	return false;
969	#endif
970	}
971
972	bool ByteCode::runStriped(const ByteCodeFunction* f, int N,
973	float* args[], int argCount,
974	float* outReturn[], int returnCount,
975	const float* uniforms, int uniformCount) const {
976	#if defined(SK_ENABLE_SKSL_INTERPRETER)
977	Interpreter::VValue stack[`192`];
978	int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount;
979	if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) {
980	return false;
981	}
982
983	if (argCount != f->fParameterCount \|\|
984	returnCount != f->fReturnCount \|\|
985	uniformCount != fUniformSlotCount) {
986	return false;
987	}
988
989	Interpreter::VValue globals[`32`];
990	if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) {
991	return false;
992	}
993
994	// innerRun just takes outArgs, so clear it if the count is zero
995	if (returnCount == `0`) {
996	outReturn = nullptr;
997	}
998
999	// The instructions to store to locals and globals mask in the original value,
1000	// so they technically need to be initialized (to any value).
1001	for (int i = f->fParameterCount; i < f->fParameterCount + f->fLocalCount; i++) {
1002	stack[i].fFloat = `0.0f`;
1003	}
1004	for (int i = `0`; i < fGlobalSlotCount; i++) {
1005	globals[i].fFloat = `0.0f`;
1006	}
1007
1008	int baseIndex = `0`;
1009
1010	while (N) {
1011	int w = std::min(N, VecWidth);
1012
1013	// Copy args into stack
1014	for (int i = `0`; i < argCount; ++i) {
1015	memcpy((void)(stack + i), args[i], w sizeof(float));
1016	}
1017
1018	bool stripedOutput = true;
1019	if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w,
1020	baseIndex)) {
1021	return false;
1022	}
1023
1024	// Copy out parameters back
1025	int slot = `0`;
1026	for (const auto& p : f->fParameters) {
1027	if (p.fIsOutParameter) {
1028	for (int i = slot; i < slot + p.fSlotCount; ++i) {
1029	memcpy(args[i], stack + i, w * sizeof(float));
1030	}
1031	}
1032	slot += p.fSlotCount;
1033	}
1034
1035	// Step each argument pointer ahead
1036	for (int i = `0`; i < argCount; ++i) {
1037	args[i] += w;
1038	}
1039	N -= w;
1040	baseIndex += w;
1041	}
1042
1043	return true;
1044	#else
1045	SkDEBUGFAIL("ByteCode interpreter not enabled");
1046	return false;
1047	#endif
1048	}
1049
1050	} // namespace SkSL
1051
1052	#endif
1053

Browse the source code of engine/third_party/skia/src/sksl/SkSLByteCode.cpp