SkVM.h source code [engine/third_party/skia/src/core/SkVM.h]

1	/*
2	* Copyright 2019 Google LLC
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#ifndef SkVM_DEFINED
9	#define SkVM_DEFINED
10
11	#include "include/core/SkBlendMode.h"
12	#include "include/core/SkColor.h"
13	#include "include/private/SkMacros.h"
14	#include "include/private/SkTArray.h"
15	#include "include/private/SkTHash.h"
16	#include "src/core/SkSpan.h"
17	#include "src/core/SkVM_fwd.h"
18	#include <vector> // std::vector
19
20	class SkWStream;
21
22	#if defined(SKVM_JIT_WHEN_POSSIBLE)
23	#if defined(__x86_64__) \|\| defined(_M_X64)
24	#if defined(_WIN32) \|\| defined(__linux) \|\| defined(__APPLE__)
25	#define SKVM_JIT
26	#endif
27	#endif
28	#if defined(__aarch64__)
29	#if defined(__ANDROID__)
30	#define SKVM_JIT
31	#endif
32	#endif
33	#endif
34
35	#if 0
36	#define SKVM_LLVM
37	#endif
38
39	#if 0
40	#undef SKVM_JIT
41	#endif
42
43	namespace skvm {
44
45	bool fma_supported();
46
47	class Assembler {
48	public:
49	explicit Assembler(void* buf);
50
51	size_t size() const;
52
53	// Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each.
54	enum GP64 {
55	rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
56	r8 , r9 , r10, r11, r12, r13, r14, r15,
57	};
58	enum Xmm {
59	xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 ,
60	xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
61	};
62	enum Ymm {
63	ymm0, ymm1, ymm2 , ymm3 , ymm4 , ymm5 , ymm6 , ymm7 ,
64	ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
65	};
66
67	// X and V values match 5-bit encoding for each (nothing tricky).
68	enum X {
69	x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 ,
70	x8 , x9 , x10, x11, x12, x13, x14, x15,
71	x16, x17, x18, x19, x20, x21, x22, x23,
72	x24, x25, x26, x27, x28, x29, x30, xzr, sp=xzr,
73	};
74	enum V {
75	v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 ,
76	v8 , v9 , v10, v11, v12, v13, v14, v15,
77	v16, v17, v18, v19, v20, v21, v22, v23,
78	v24, v25, v26, v27, v28, v29, v30, v31,
79	};
80
81	void bytes(const void, int*);
82	void byte(uint8_t);
83	void word(uint32_t);
84
85	struct Label {
86	int offset = `0`;
87	enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
88	SkSTArray<`2`, int> references;
89	};
90
91	// x86-64
92
93	void align(int mod);
94
95	void int3();
96	void vzeroupper();
97	void ret();
98
99	// Mem represents a value at base + disp + scaleindex,*
100	// or simply at base + disp if index=rsp.
101	enum Scale { ONE, TWO, FOUR, EIGHT };
102	struct Mem {
103	GP64 base;
104	int disp = `0`;
105	GP64 index = rsp;
106	Scale scale = ONE;
107	};
108
109	struct Operand {
110	union {
111	int reg;
112	Mem mem;
113	Label* label;
114	};
115	enum { REG, MEM, LABEL } kind;
116
117	Operand(GP64 r) : reg (r), kind(REG ) {}
118	Operand(Xmm r) : reg (r), kind(REG ) {}
119	Operand(Ymm r) : reg (r), kind(REG ) {}
120	Operand(Mem m) : mem (m), kind(MEM ) {}
121	Operand(Label* l) : label(l), kind(LABEL) {}
122	};
123
124	void vpand (Ymm dst, Ymm x, Operand y);
125	void vpandn(Ymm dst, Ymm x, Operand y);
126	void vpor (Ymm dst, Ymm x, Operand y);
127	void vpxor (Ymm dst, Ymm x, Operand y);
128
129	void vpaddd (Ymm dst, Ymm x, Operand y);
130	void vpsubd (Ymm dst, Ymm x, Operand y);
131	void vpmulld(Ymm dst, Ymm x, Operand y);
132
133	void vpsubw (Ymm dst, Ymm x, Operand y);
134	void vpmullw(Ymm dst, Ymm x, Operand y);
135
136	void vaddps(Ymm dst, Ymm x, Operand y);
137	void vsubps(Ymm dst, Ymm x, Operand y);
138	void vmulps(Ymm dst, Ymm x, Operand y);
139	void vdivps(Ymm dst, Ymm x, Operand y);
140	void vminps(Ymm dst, Ymm x, Operand y);
141	void vmaxps(Ymm dst, Ymm x, Operand y);
142
143	void vsqrtps(Ymm dst, Operand x);
144
145	void vfmadd132ps(Ymm dst, Ymm x, Operand y);
146	void vfmadd213ps(Ymm dst, Ymm x, Operand y);
147	void vfmadd231ps(Ymm dst, Ymm x, Operand y);
148
149	void vfmsub132ps(Ymm dst, Ymm x, Operand y);
150	void vfmsub213ps(Ymm dst, Ymm x, Operand y);
151	void vfmsub231ps(Ymm dst, Ymm x, Operand y);
152
153	void vfnmadd132ps(Ymm dst, Ymm x, Operand y);
154	void vfnmadd213ps(Ymm dst, Ymm x, Operand y);
155	void vfnmadd231ps(Ymm dst, Ymm x, Operand y);
156
157	void vpackusdw(Ymm dst, Ymm x, Operand y);
158	void vpackuswb(Ymm dst, Ymm x, Operand y);
159
160	void vpunpckldq(Ymm dst, Ymm x, Operand y);
161	void vpunpckhdq(Ymm dst, Ymm x, Operand y);
162
163	void vpcmpeqd(Ymm dst, Ymm x, Operand y);
164	void vpcmpgtd(Ymm dst, Ymm x, Operand y);
165
166	void vcmpps (Ymm dst, Ymm x, Operand y, int imm);
167	void vcmpeqps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,`0`); }
168	void vcmpltps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,`1`); }
169	void vcmpleps (Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,`2`); }
170	void vcmpneqps(Ymm dst, Ymm x, Operand y) { this->vcmpps(dst,x,y,`4`); }
171
172	// Sadly, the x parameter cannot be a general Operand for these shifts.
173	void vpslld(Ymm dst, Ymm x, int imm);
174	void vpsrld(Ymm dst, Ymm x, int imm);
175	void vpsrad(Ymm dst, Ymm x, int imm);
176	void vpsrlw(Ymm dst, Ymm x, int imm);
177
178	void vpermq (Ymm dst, Operand x, int imm);
179	void vperm2f128(Ymm dst, Ymm x, Operand y, int imm);
180	void vpermps (Ymm dst, Ymm ix, Operand src); // dst[i] = src[ix[i]]
181
182	enum Rounding { NEAREST, FLOOR, CEIL, TRUNC, CURRENT };
183	void vroundps(Ymm dst, Operand x, Rounding);
184
185	void vmovdqa(Ymm dst, Operand x);
186	void vmovups(Ymm dst, Operand x);
187	void vmovups(Xmm dst, Operand x);
188	void vmovups(Operand dst, Ymm x);
189	void vmovups(Operand dst, Xmm x);
190
191	void vcvtdq2ps (Ymm dst, Operand x);
192	void vcvttps2dq(Ymm dst, Operand x);
193	void vcvtps2dq (Ymm dst, Operand x);
194
195	void vcvtps2ph(Operand dst, Ymm x, Rounding);
196	void vcvtph2ps(Ymm dst, Operand x);
197
198	void vpblendvb(Ymm dst, Ymm x, Operand y, Ymm z);
199
200	void vpshufb(Ymm dst, Ymm x, Operand y);
201
202	void vptest(Ymm x, Operand y);
203
204	void vbroadcastss(Ymm dst, Operand y);
205
206	void vpmovzxwd(Ymm dst, Operand src); // dst = src, 128-bit, uint16_t -> int
207	void vpmovzxbd(Ymm dst, Operand src); // dst = src, 64-bit, uint8_t -> int
208
209	void vmovq(Operand dst, Xmm src); // dst = src, 64-bit
210	void vmovd(Operand dst, Xmm src); // dst = src, 32-bit
211	void vmovd(Xmm dst, Operand src); // dst = src, 32-bit
212
213	void vpinsrd(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 32-bit
214	void vpinsrw(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 16-bit
215	void vpinsrb(Xmm dst, Xmm src, Operand y, int imm); // dst = src; dst[imm] = y, 8-bit
216
217	void vextracti128(Operand dst, Ymm src, int imm); // dst = src[imm], 128-bit
218	void vpextrd (Operand dst, Xmm src, int imm); // dst = src[imm], 32-bit
219	void vpextrw (Operand dst, Xmm src, int imm); // dst = src[imm], 16-bit
220	void vpextrb (Operand dst, Xmm src, int imm); // dst = src[imm], 8-bit
221
222	// if (mask & 0x8000'0000) {
223	// dst = base[scaleix];*
224	// }
225	// mask = 0;
226	void vgatherdps(Ymm dst, Scale scale, Ymm ix, GP64 base, Ymm mask);
227
228
229	void label(Label*);
230
231	void jmp(Label*);
232	void je (Label*);
233	void jne(Label*);
234	void jl (Label*);
235	void jc (Label*);
236
237	void add (Operand dst, int imm);
238	void sub (Operand dst, int imm);
239	void cmp (Operand dst, int imm);
240	void mov (Operand dst, int imm);
241	void movb(Operand dst, int imm);
242
243	void add (Operand dst, GP64 x);
244	void sub (Operand dst, GP64 x);
245	void cmp (Operand dst, GP64 x);
246	void mov (Operand dst, GP64 x);
247	void movb(Operand dst, GP64 x);
248
249	void add (GP64 dst, Operand x);
250	void sub (GP64 dst, Operand x);
251	void cmp (GP64 dst, Operand x);
252	void mov (GP64 dst, Operand x);
253	void movb(GP64 dst, Operand x);
254
255	// Disambiguators... choice is arbitrary (but generates different code!).
256	void add (GP64 dst, GP64 x) { this->add (Operand (dst), x); }
257	void sub (GP64 dst, GP64 x) { this->sub (Operand (dst), x); }
258	void cmp (GP64 dst, GP64 x) { this->cmp (Operand (dst), x); }
259	void mov (GP64 dst, GP64 x) { this->mov (Operand (dst), x); }
260	void movb(GP64 dst, GP64 x) { this->movb(Operand (dst), x); }
261
262	void movzbq(GP64 dst, Operand x); // dst = x, uint8_t -> int
263	void movzwq(GP64 dst, Operand x); // dst = x, uint16_t -> int
264
265	// aarch64
266
267	// d = op(n,m)
268	using DOpNM = void(V d, V n, V m);
269	DOpNM and16b, orr16b, eor16b, bic16b, bsl16b,
270	add4s, sub4s, mul4s,
271	cmeq4s, cmgt4s,
272	sub8h, mul8h,
273	fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s,
274	fcmeq4s, fcmgt4s, fcmge4s,
275	tbl;
276
277	// TODO: there are also float ==,<,<=,>,>= instructions with an immediate 0.0f,
278	// and the register comparison > and >= can also compare absolute values. Interesting.
279
280	// d += nm*
281	void fmla4s(V d, V n, V m);
282
283	// d -= nm*
284	void fmls4s(V d, V n, V m);
285
286	// d = op(n,imm)
287	using DOpNImm = void(V d, V n, int imm);
288	DOpNImm sli4s,
289	shl4s, sshr4s, ushr4s,
290	ushr8h;
291
292	// d = op(n)
293	using DOpN = void(V d, V n);
294	DOpN not16b, // d = ~n
295	fneg4s, // d = -n
296	scvtf4s, // int -> float
297	fcvtzs4s, // truncate float -> int
298	fcvtns4s, // round float -> int (nearest even)
299	xtns2h, // u32 -> u16
300	xtnh2b, // u16 -> u8
301	uxtlb2h, // u8 -> u16
302	uxtlh2s, // u16 -> u32
303	uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned
304
305	void brk (int imm16);
306	void ret (X);
307	void add (X d, X n, int imm12);
308	void sub (X d, X n, int imm12);
309	void subs(X d, X n, int imm12); // subtract setting condition flags
310
311	// There's another encoding for unconditional branches that can jump further,
312	// but this one encoded as b.al is simple to implement and should be fine.
313	void b (Label* l) { this->b(Condition::al, l); }
314	void bne(Label* l) { this->b(Condition::ne, l); }
315	void blt(Label* l) { this->b(Condition::lt, l); }
316
317	// "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
318	void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
319
320	// Compare and branch if zero/non-zero, as if
321	// cmp(t,0)
322	// beq/bne(l)
323	// but without setting condition flags.
324	void cbz (X t, Label* l);
325	void cbnz(X t, Label* l);
326
327	void ldrq(V dst, Label); // 128-bit PC-relative load*
328
329	void ldrq(V dst, X src, int imm12=`0`); // 128-bit dst = (src+imm1216)
330	void ldrs(V dst, X src, int imm12=`0`); // 32-bit dst = (src+imm124)
331	void ldrb(V dst, X src, int imm12=`0`); // 8-bit dst = (src+imm12)*
332
333	void strq(V src, X dst, int imm12=`0`); // 128-bit (dst+imm1216) = src
334	void strs(V src, X dst, int imm12=`0`); // 32-bit (dst+imm124) = src
335	void strb(V src, X dst, int imm12=`0`); // 8-bit (dst+imm12) = src*
336
337	void fmovs(X dst, V src); // dst = 32-bit src[0]
338
339	private:
340	// TODO: can probably track two of these three?
341	uint8_t* fCode;
342	uint8_t* fCurr;
343	size_t fSize;
344
345	// x86-64
346	enum W { W0, W1 }; // Are the lanes 64-bit (W1) or default (W0)? Intel Vol 2A 2.3.5.5
347	enum L { L128, L256 }; // Is this a 128- or 256-bit operation? Intel Vol 2A 2.3.6.2
348
349	// Helpers for vector instructions.
350	void op(int prefix, int map, int opcode, int dst, int x, Operand y, W,L);
351	void op(int p, int m, int o, Ymm d, Ymm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L256); }
352	void op(int p, int m, int o, Ymm d, Operand y, W w=W0) { op(p,m,o, d,`0`,y,w,L256); }
353	void op(int p, int m, int o, Xmm d, Xmm x, Operand y, W w=W0) { op(p,m,o, d,x,y,w,L128); }
354	void op(int p, int m, int o, Xmm d, Operand y, W w=W0) { op(p,m,o, d,`0`,y,w,L128); }
355
356	// Helpers for GP64 instructions.
357	void op(int opcode, Operand dst, GP64 x);
358	void op(int opcode, int opcode_ext, Operand dst, int imm);
359
360	void jump(uint8_t condition, Label*);
361	int disp32(Label*);
362	void imm_byte_after_operand(const Operand&, int byte);
363
364	// aarch64
365
366	// Opcode for 3-arguments ops is split between hi and lo:
367	// [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d]
368	void op(uint32_t hi, V m, uint32_t lo, V n, V d);
369
370	// 0,1,2-argument ops, with or without an immediate:
371	// [ 22 bits op ] [5 bits n] [5 bits d]
372	// Any immediate falls in the middle somewhere overlapping with either op, n, or both.
373	void op(uint32_t op22, V n, V d, int imm=`0`);
374	void op(uint32_t op22, X n, V d, int imm=`0`) { this->op(op22,(V)n, d,imm); }
375	void op(uint32_t op22, V n, X d, int imm=`0`) { this->op(op22, n,(V)d,imm); }
376	void op(uint32_t op22, X n, X d, int imm=`0`) { this->op(op22,(V)n,(V)d,imm); }
377	void op(uint32_t op22, int imm=`0`) { this->op(op22,(V)`0`,(V)`0`,imm); }
378	// (1-argument ops don't seem to have a consistent convention of passing as n or d.)
379
380
381	// Order matters... value is 4-bit encoding for condition code.
382	enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
383	void b(Condition, Label*);
384	int disp19(Label*);
385	};
386
387	// Order matters a little: Ops <=store128 are treated as having side effects.
388	#define SKVM_OPS(M) \
389	M(assert_true) \
390	M(store8) M(store16) M(store32) M(store64) M(store128) \
391	M(index) \
392	M(load8) M(load16) M(load32) M(load64) M(load128) \
393	M(gather8) M(gather16) M(gather32) \
394	M(uniform8) M(uniform16) M(uniform32) \
395	M(splat) \
396	M(add_f32) M(add_i32) \
397	M(sub_f32) M(sub_i32) \
398	M(mul_f32) M(mul_i32) \
399	M(div_f32) \
400	M(min_f32) \
401	M(max_f32) \
402	M(fma_f32) M(fms_f32) M(fnma_f32) \
403	M(sqrt_f32) \
404	M(shl_i32) M(shr_i32) M(sra_i32) \
405	M(ceil) M(floor) \
406	M(trunc) M(round) M(to_half) M(from_half) \
407	M(to_f32) \
408	M( eq_f32) M( eq_i32) \
409	M(neq_f32) \
410	M( gt_f32) M( gt_i32) \
411	M(gte_f32) \
412	M(bit_and) \
413	M(bit_or) \
414	M(bit_xor) \
415	M(bit_clear) \
416	M(select) M(pack) \
417	// End of SKVM_OPS
418
419	enum class Op : int {
420	#define M(op) op,
421	SKVM_OPS(M)
422	#undef M
423	};
424
425	static inline bool has_side_effect(Op op) {
426	return op <= Op::store128;
427	}
428	static inline bool is_always_varying(Op op) {
429	return op <= Op::gather32 && op != Op::assert_true;
430	}
431
432	using Val = int;
433	// We reserve an impossibe Val ID as a sentinel
434	// NA meaning none, n/a, null, nil, etc.
435	static const Val NA = -`1`;
436
437	struct Arg { int ix; };
438
439	struct I32 {
440	Builder* builder = nullptr;
441	Val id = NA;
442	explicit operator bool() const { return id != NA; }
443	Builder* operator->() const { return builder; }
444	};
445
446	struct F32 {
447	Builder* builder = nullptr;
448	Val id = NA;
449	explicit operator bool() const { return id != NA; }
450	Builder* operator->() const { return builder; }
451	};
452
453	// Some operations make sense with immediate arguments,
454	// so we use I32a and F32a to receive them transparently.
455	//
456	// We omit overloads that may indicate a bug or performance issue.
457	// In general it does not make sense to pass immediates to unary operations,
458	// and even sometimes not for binary operations, e.g.
459	//
460	// div(x,y) -- normal every day divide
461	// div(3.0f,y) -- yep, makes sense
462	// div(x,3.0f) -- omitted as a reminder you probably want mul(x, 1/3.0f).
463	//
464	// You can of course always splat() to override these opinions.
465	struct I32a {
466	I32a(I32 v) : SkDEBUGCODE(builder(v.builder),) id(v.id) {}
467	I32a(int v) : imm(v) {}
468
469	SkDEBUGCODE(Builder* builder = nullptr;)
470	Val id = NA;
471	int imm = `0`;
472	};
473
474	struct F32a {
475	F32a(F32 v) : SkDEBUGCODE(builder(v.builder),) id(v.id) {}
476	F32a(float v) : imm(v) {}
477
478	SkDEBUGCODE(Builder* builder = nullptr;)
479	Val id = NA;
480	float imm = `0`;
481	};
482
483	struct Color {
484	skvm::F32 r,g,b,a;
485	explicit operator bool() const { return r && g && b && a; }
486	Builder* operator->() const { return a.operator->(); }
487	};
488
489	struct HSLA {
490	skvm::F32 h,s,l,a;
491	explicit operator bool() const { return h && s && l && a; }
492	Builder* operator->() const { return a.operator->(); }
493	};
494
495	struct Coord {
496	F32 x,y;
497	explicit operator bool() const { return x && y; }
498	Builder* operator->() const { return x.operator->(); }
499	};
500
501	struct Uniform {
502	Arg ptr;
503	int offset;
504	};
505	struct Uniforms {
506	Arg base;
507	std::vector<int> buf;
508
509	explicit Uniforms(int init) : base(Arg{`0`}), buf (init) {}
510
511	Uniform push(int val) {
512	buf.push_back(val);
513	return {base, (int)( sizeof(int)*(buf.size() - `1`) )};
514	}
515
516	Uniform pushF(float val) {
517	int bits;
518	memcpy(&bits, &val, sizeof(int));
519	return this->push(bits);
520	}
521
522	Uniform pushPtr(const void* ptr) {
523	// Jam the pointer into 1 or 2 ints.
524	int ints[sizeof(ptr) / sizeof(int)];
525	memcpy(ints, &ptr, sizeof(ptr));
526	for (int bits : ints) {
527	buf.push_back(bits);
528	}
529	return {base, (int)( sizeof(int)*(buf.size() - SK_ARRAY_COUNT(ints)) )};
530	}
531	};
532
533	struct PixelFormat {
534	enum { UNORM, FLOAT} encoding;
535	int r_bits, g_bits, b_bits, a_bits,
536	r_shift, g_shift, b_shift, a_shift;
537	};
538	bool SkColorType_to_PixelFormat(SkColorType, PixelFormat*);
539
540	SK_BEGIN_REQUIRE_DENSE
541	struct Instruction {
542	Op op; // v = op(x,y,z,imm), where * == index of this Instruction.*
543	Val x,y,z; // Enough arguments for mad().
544	int immy,immz; // Immediate bit pattern, shift count, argument index, etc.
545	};
546	SK_END_REQUIRE_DENSE
547
548	bool operator==(const Instruction&, const Instruction&);
549	struct InstructionHash {
550	uint32_t operator()(const Instruction&, uint32_t seed=`0`) const;
551	};
552
553	struct OptimizedInstruction {
554	Op op;
555	Val x,y,z;
556	int immy,immz;
557
558	Val death;
559	bool can_hoist;
560	};
561
562	class Builder {
563	public:
564
565	Program done(const char* debug_name = nullptr) const;
566
567	// Mostly for debugging, tests, etc.
568	std::vector<Instruction> program() const { return fProgram; }
569	std::vector<OptimizedInstruction> optimize() const;
570
571	// Declare an argument with given stride (use stride=0 for uniforms).
572	// TODO: different types for varying and uniforms?
573	Arg arg(int stride);
574
575	// Convenience arg() wrappers for most common strides, sizeof(T) and 0.
576	template <typename T>
577	Arg varying() { return this->arg(sizeof(T)); }
578	Arg uniform() { return this->arg(`0`); }
579
580	// TODO: allow uniform (i.e. Arg) offsets to store and load?
581	// TODO: sign extension (signed types) for <32-bit loads?
582	// TODO: unsigned integer operations where relevant (just comparisons?)?
583
584	// Assert cond is true, printing debug when not.
585	void assert_true(I32 cond, I32 debug);
586	void assert_true(I32 cond, F32 debug) { assert_true(cond, bit_cast(debug)); }
587	void assert_true(I32 cond) { assert_true(cond, cond); }
588
589	// Store {8,16,32,64,128}-bit varying.
590	void store8 (Arg ptr, I32 val);
591	void store16 (Arg ptr, I32 val);
592	void store32 (Arg ptr, I32 val);
593	void storeF (Arg ptr, F32 val) { store32(ptr, bit_cast(val)); }
594	void store64 (Arg ptr, I32 lo, I32 hi); // ptr = lo\|(hi<<32)*
595	void store128(Arg ptr, I32 lo, I32 hi, int lane); // 64-bit lane 0-1 at ptr = lo\|(hi<<32).
596
597	// Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
598	I32 index();
599
600	// Load {8,16,32,64,128}-bit varying.
601	I32 load8 (Arg ptr);
602	I32 load16 (Arg ptr);
603	I32 load32 (Arg ptr);
604	F32 loadF (Arg ptr) { return bit_cast(load32(ptr)); }
605	I32 load64 (Arg ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value.
606	I32 load128(Arg ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value.
607
608	// Load u8,u16,i32 uniform with byte-count offset.
609	I32 uniform8 (Arg ptr, int offset);
610	I32 uniform16(Arg ptr, int offset);
611	I32 uniform32(Arg ptr, int offset);
612	F32 uniformF (Arg ptr, int offset) { return this->bit_cast(this->uniform32(ptr,offset)); }
613
614	// Load this color as a uniform, premultiplied and converted to dst SkColorSpace.
615	Color uniformPremul(SkColor4f, SkColorSpace* src,
616	Uniforms, SkColorSpace dst);
617
618	// Gather u8,u16,i32 with varying element-count index from (ptr + byte-count offset).*
619	I32 gather8 (Arg ptr, int offset, I32 index);
620	I32 gather16(Arg ptr, int offset, I32 index);
621	I32 gather32(Arg ptr, int offset, I32 index);
622	F32 gatherF (Arg ptr, int offset, I32 index) {
623	return bit_cast(gather32(ptr, offset, index));
624	}
625
626	// Convenience methods for working with skvm::Uniform(s).
627	I32 uniform8 (Uniform u) { return this->uniform8 (u.ptr, u.offset); }
628	I32 uniform16(Uniform u) { return this->uniform16(u.ptr, u.offset); }
629	I32 uniform32(Uniform u) { return this->uniform32(u.ptr, u.offset); }
630	F32 uniformF (Uniform u) { return this->uniformF (u.ptr, u.offset); }
631	I32 gather8 (Uniform u, I32 index) { return this->gather8 (u.ptr, u.offset, index); }
632	I32 gather16 (Uniform u, I32 index) { return this->gather16 (u.ptr, u.offset, index); }
633	I32 gather32 (Uniform u, I32 index) { return this->gather32 (u.ptr, u.offset, index); }
634	F32 gatherF (Uniform u, I32 index) { return this->gatherF (u.ptr, u.offset, index); }
635
636	// Load an immediate constant.
637	I32 splat(int n);
638	I32 splat(unsigned u) { return splat((int)u); }
639	F32 splat(float f);
640
641	// float math, comparisons, etc.
642	F32 add(F32, F32); F32 add(F32a x, F32a y) { return add(_(x), _(y)); }
643	F32 sub(F32, F32); F32 sub(F32a x, F32a y) { return sub(_(x), _(y)); }
644	F32 mul(F32, F32); F32 mul(F32a x, F32a y) { return mul(_(x), _(y)); }
645	F32 div(F32, F32); F32 div(F32a x, F32 y) { return div(_(x), y ); }
646	F32 min(F32, F32); F32 min(F32a x, F32a y) { return min(_(x), _(y)); }
647	F32 max(F32, F32); F32 max(F32a x, F32a y) { return max(_(x), _(y)); }
648
649	F32 mad(F32 x, F32 y, F32 z) { return add(mul(x,y), z); }
650	F32 mad(F32a x, F32a y, F32a z) { return mad(_(x), _(y), _(z)); }
651
652	F32 sqrt(F32);
653	F32 approx_log2(F32);
654	F32 approx_pow2(F32);
655	F32 approx_log (F32 x) { return mul(`0.69314718f`, approx_log2(x)); }
656	F32 approx_exp (F32 x) { return approx_pow2(mul(x, `1.4426950408889634074f`)); }
657
658	F32 approx_powf(F32 base, F32 exp);
659	F32 approx_powf(F32a base, F32a exp) { return approx_powf(_(base), _(exp)); }
660
661	F32 approx_sin(F32 radians);
662	F32 approx_cos(F32 radians) { return approx_sin(add(radians, SK_ScalarPI/`2`)); }
663	F32 approx_tan(F32 radians);
664
665	F32 approx_asin(F32 x);
666	F32 approx_acos(F32 x) { return sub(SK_ScalarPI/`2`, approx_asin(x)); }
667	F32 approx_atan(F32 x);
668	F32 approx_atan2(F32 y, F32 x);
669
670	F32 lerp(F32 lo, F32 hi, F32 t);
671	F32 lerp(F32a lo, F32a hi, F32a t) { return lerp(_(lo), _(hi), _(t)); }
672
673	F32 clamp(F32 x, F32 lo, F32 hi) { return max(lo, min(x, hi)); }
674	F32 clamp(F32a x, F32a lo, F32a hi) { return clamp(_(x), _(lo), _(hi)); }
675	F32 clamp01(F32 x) { return clamp(x, `0.0f`, `1.0f`); }
676
677	F32 abs(F32 x) { return bit_cast(bit_and(bit_cast(x), `0x7fff'ffff`)); }
678	F32 fract(F32 x) { return sub(x, floor(x)); }
679	F32 ceil(F32);
680	F32 floor(F32);
681	I32 is_NaN (F32 x) { return neq(x,x); }
682	I32 is_finite(F32 x) { return lt(bit_and(bit_cast(x), `0x7f80'0000`), `0x7f80'0000`); }
683
684	I32 trunc(F32 x);
685	I32 round(F32 x); // Round to int using current rounding mode (as if lrintf()).
686	I32 bit_cast(F32 x) { return {x.builder, x.id}; }
687
688	I32 to_half(F32 x);
689	F32 from_half(I32 x);
690
691	F32 norm(F32 x, F32 y) {
692	return sqrt(add(mul(x,x),
693	mul(y,y)));
694	}
695	F32 norm(F32a x, F32a y) { return norm(_(x), _(y)); }
696
697	I32 eq(F32, F32); I32 eq(F32a x, F32a y) { return eq(_(x), _(y)); }
698	I32 neq(F32, F32); I32 neq(F32a x, F32a y) { return neq(_(x), _(y)); }
699	I32 lt (F32, F32); I32 lt (F32a x, F32a y) { return lt (_(x), _(y)); }
700	I32 lte(F32, F32); I32 lte(F32a x, F32a y) { return lte(_(x), _(y)); }
701	I32 gt (F32, F32); I32 gt (F32a x, F32a y) { return gt (_(x), _(y)); }
702	I32 gte(F32, F32); I32 gte(F32a x, F32a y) { return gte(_(x), _(y)); }
703
704	// int math, comparisons, etc.
705	I32 add(I32, I32); I32 add(I32a x, I32a y) { return add(_(x), _(y)); }
706	I32 sub(I32, I32); I32 sub(I32a x, I32a y) { return sub(_(x), _(y)); }
707	I32 mul(I32, I32); I32 mul(I32a x, I32a y) { return mul(_(x), _(y)); }
708
709	I32 shl(I32 x, int bits);
710	I32 shr(I32 x, int bits);
711	I32 sra(I32 x, int bits);
712
713	I32 eq (I32 x, I32 y); I32 eq(I32a x, I32a y) { return eq(_(x), _(y)); }
714	I32 neq(I32 x, I32 y); I32 neq(I32a x, I32a y) { return neq(_(x), _(y)); }
715	I32 lt (I32 x, I32 y); I32 lt (I32a x, I32a y) { return lt (_(x), _(y)); }
716	I32 lte(I32 x, I32 y); I32 lte(I32a x, I32a y) { return lte(_(x), _(y)); }
717	I32 gt (I32 x, I32 y); I32 gt (I32a x, I32a y) { return gt (_(x), _(y)); }
718	I32 gte(I32 x, I32 y); I32 gte(I32a x, I32a y) { return gte(_(x), _(y)); }
719
720	F32 to_f32(I32 x);
721	F32 bit_cast(I32 x) { return {x.builder, x.id}; }
722
723	// Bitwise operations.
724	I32 bit_and (I32, I32); I32 bit_and (I32a x, I32a y) { return bit_and (_(x), _(y)); }
725	I32 bit_or (I32, I32); I32 bit_or (I32a x, I32a y) { return bit_or (_(x), _(y)); }
726	I32 bit_xor (I32, I32); I32 bit_xor (I32a x, I32a y) { return bit_xor (_(x), _(y)); }
727	I32 bit_clear(I32, I32); I32 bit_clear(I32a x, I32a y) { return bit_clear(_(x), _(y)); }
728
729	I32 min(I32 x, I32 y) { return select(lte(x,y), x, y); }
730	I32 max(I32 x, I32 y) { return select(gte(x,y), x, y); }
731
732	I32 min(I32a x, I32a y) { return min(_(x), _(y)); }
733	I32 max(I32a x, I32a y) { return max(_(x), _(y)); }
734
735	I32 select(I32 cond, I32 t, I32 f); // cond ? t : f
736	F32 select(I32 cond, F32 t, F32 f) {
737	return bit_cast(select(cond, bit_cast(t)
738	, bit_cast(f)));
739	}
740
741	I32 select(I32a cond, I32a t, I32a f) { return select(_(cond), _(t), _(f)); }
742	F32 select(I32a cond, F32a t, F32a f) { return select(_(cond), _(t), _(f)); }
743
744	I32 extract(I32 x, int bits, I32 z); // (x>>bits) & z
745	I32 pack (I32 x, I32 y, int bits); // x \| (y << bits), assuming (x & (y << bits)) == 0
746
747	I32 extract(I32a x, int bits, I32a z) { return extract(_(x), bits, _(z)); }
748	I32 pack (I32a x, I32a y, int bits) { return pack (_(x), _(y), bits); }
749
750
751	// Common idioms used in several places, worth centralizing for consistency.
752	F32 from_unorm(int bits, I32); // E.g. from_unorm(8, x) -> x (1/255.0f)*
753	I32 to_unorm(int bits, F32); // E.g. to_unorm(8, x) -> round(x 255)*
754
755	Color load(PixelFormat, Arg ptr);
756	bool store(PixelFormat, Arg ptr, Color);
757	Color gather(PixelFormat, Arg ptr, int offset, I32 index);
758	Color gather(PixelFormat f, Uniform u, I32 index) {
759	return gather(f, u.ptr, u.offset, index);
760	}
761
762	void premul(F32* r, F32* g, F32* b, F32 a);
763	void unpremul(F32* r, F32* g, F32* b, F32 a);
764
765	Color premul(Color c) { this->premul(&c.r, &c.g, &c.b, c.a); return c; }
766	Color unpremul(Color c) { this->unpremul(&c.r, &c.g, &c.b, c.a); return c; }
767	Color lerp(Color lo, Color hi, F32 t);
768	Color blend(SkBlendMode, Color src, Color dst);
769
770	HSLA to_hsla(Color);
771	Color to_rgba(HSLA);
772
773	void dump(SkWStream* = nullptr) const;
774	void dot (SkWStream* = nullptr) const;
775
776	uint64_t hash() const;
777
778	Val push(Instruction);
779	private:
780	Val push(Op op, Val x, Val y=NA, Val z=NA, int immy=`0`, int immz=`0`) {
781	return this->push(Instruction{op, x,y,z, immy,immz});
782	}
783
784	I32 _(I32a x) {
785	if (x.id != NA) {
786	SkASSERT(x.builder == this);
787	return {this, x.id};
788	}
789	return splat(x.imm);
790	}
791
792	F32 _(F32a x) {
793	if (x.id != NA) {
794	SkASSERT(x.builder == this);
795	return {this, x.id};
796	}
797	return splat(x.imm);
798	}
799
800	bool allImm() const;
801
802	template <typename T, typename... Rest>
803	bool allImm(Val, T* imm, Rest...) const;
804
805	template <typename T>
806	bool isImm(Val id, T want) const {
807	T imm = `0`;
808	return this->allImm(id, &imm) && imm == want;
809	}
810
811	SkTHashMap<Instruction, Val, InstructionHash> fIndex;
812	std::vector<Instruction> fProgram;
813	std::vector<int> fStrides;
814	};
815
816	template <typename... Fs>
817	void dump_instructions(const std::vector<Instruction>& instructions,
818	SkWStream* o = nullptr,
819	Fs... fs);
820
821	// Optimization passes and data structures normally used by Builder::optimize(),
822	// extracted here so they can be unit tested.
823	std::vector<Instruction> eliminate_dead_code(std::vector<Instruction>);
824	std::vector<Instruction> schedule (std::vector<Instruction>);
825	std::vector<OptimizedInstruction> finalize (std::vector<Instruction>);
826
827	class Usage {
828	public:
829	Usage(const std::vector<Instruction>&);
830
831	// Return a sorted span of Vals which use result of Instruction id.
832	SkSpan<const Val> operator[](Val id) const;
833
834	private:
835	std::vector<int> fIndex;
836	std::vector<Val> fTable;
837	};
838
839	using Reg = int;
840
841	// d = op(x, y/imm, z/imm)
842	struct InterpreterInstruction {
843	Op op;
844	Reg d,x;
845	union { Reg y; int immy; };
846	union { Reg z; int immz; };
847	};
848
849	class Program {
850	public:
851	Program(const std::vector<OptimizedInstruction>& instructions,
852	const std::vector<int>& strides,
853	const char* debug_name);
854
855	Program();
856	~Program();
857
858	Program(Program&&);
859	Program& operator=(Program&&);
860
861	Program(const Program&) = delete;
862	Program& operator=(const Program&) = delete;
863
864	void eval(int n, void* args[]) const;
865
866	template <typename... T>
867	void eval(int n, T... arg) const* {
868	SkASSERT(sizeof...(arg) == this->nargs());
869	// This nullptr isn't important except that it makes args[] non-empty if you pass none.
870	void* args[] = { (void)arg..., nullptr* };
871	this->eval(n, args);
872	}
873
874	std::vector<InterpreterInstruction> instructions() const;
875	int nargs() const;
876	int nregs() const;
877	int loop () const;
878	bool empty() const;
879
880	bool hasJIT() const; // Has this Program been JITted?
881	void dropJIT(); // If hasJIT(), drop it, forcing interpreter fallback.
882
883	void dump(SkWStream* = nullptr) const;
884
885	private:
886	void setupInterpreter(const std::vector<OptimizedInstruction>&);
887	void setupJIT (const std::vector<OptimizedInstruction>&, const char* debug_name);
888	void setupLLVM (const std::vector<OptimizedInstruction>&, const char* debug_name);
889
890	bool jit(const std::vector<OptimizedInstruction>&,
891	int* stack_hint, uint32_t* registers_used,
892	Assembler) const*;
893
894	void waitForLLVM() const;
895
896	struct Impl;
897	std::unique_ptr<Impl> fImpl;
898	};
899
900	// TODO: control flow
901	// TODO: 64-bit values?
902
903	static inline I32 operator+(I32 x, I32a y) { return x ->add(x,y); }
904	static inline I32 operator+(int x, I32 y) { return y ->add(x,y); }
905
906	static inline I32 operator-(I32 x, I32a y) { return x ->sub(x,y); }
907	static inline I32 operator-(int x, I32 y) { return y ->sub(x,y); }
908
909	static inline I32 operator(I32 x, I32a y) { return* x ->mul(x,y); }
910	static inline I32 operator(int* x, I32 y) { return y ->mul(x,y); }
911
912	static inline I32 min(I32 x, I32a y) { return x ->min(x,y); }
913	static inline I32 min(int x, I32 y) { return y ->min(x,y); }
914
915	static inline I32 max(I32 x, I32a y) { return x ->max(x,y); }
916	static inline I32 max(int x, I32 y) { return y ->max(x,y); }
917
918	static inline I32 operator==(I32 x, I32 y) { return x ->eq(x,y); }
919	static inline I32 operator==(I32 x, int y) { return x ->eq(x,y); }
920	static inline I32 operator==(int x, I32 y) { return y ->eq(x,y); }
921
922	static inline I32 operator!=(I32 x, I32 y) { return x ->neq(x,y); }
923	static inline I32 operator!=(I32 x, int y) { return x ->neq(x,y); }
924	static inline I32 operator!=(int x, I32 y) { return y ->neq(x,y); }
925
926	static inline I32 operator< (I32 x, I32a y) { return x ->lt(x,y); }
927	static inline I32 operator< (int x, I32 y) { return y ->lt(x,y); }
928
929	static inline I32 operator<=(I32 x, I32a y) { return x ->lte(x,y); }
930	static inline I32 operator<=(int x, I32 y) { return y ->lte(x,y); }
931
932	static inline I32 operator> (I32 x, I32a y) { return x ->gt(x,y); }
933	static inline I32 operator> (int x, I32 y) { return y ->gt(x,y); }
934
935	static inline I32 operator>=(I32 x, I32a y) { return x ->gte(x,y); }
936	static inline I32 operator>=(int x, I32 y) { return y ->gte(x,y); }
937
938
939	static inline F32 operator+(F32 x, F32a y) { return x ->add(x,y); }
940	static inline F32 operator+(float x, F32 y) { return y ->add(x,y); }
941
942	static inline F32 operator-(F32 x, F32a y) { return x ->sub(x,y); }
943	static inline F32 operator-(float x, F32 y) { return y ->sub(x,y); }
944
945	static inline F32 operator(F32 x, F32a y) { return* x ->mul(x,y); }
946	static inline F32 operator(float* x, F32 y) { return y ->mul(x,y); }
947
948	static inline F32 operator/(F32 x, F32 y) { return x ->div(x,y); }
949	static inline F32 operator/(float x, F32 y) { return y ->div(x,y); }
950
951	static inline F32 min(F32 x, F32a y) { return x ->min(x,y); }
952	static inline F32 min(float x, F32 y) { return y ->min(x,y); }
953
954	static inline F32 max(F32 x, F32a y) { return x ->max(x,y); }
955	static inline F32 max(float x, F32 y) { return y ->max(x,y); }
956
957	static inline I32 operator==(F32 x, F32 y) { return x ->eq(x,y); }
958	static inline I32 operator==(F32 x, float y) { return x ->eq(x,y); }
959	static inline I32 operator==(float x, F32 y) { return y ->eq(x,y); }
960
961	static inline I32 operator!=(F32 x, F32 y) { return x ->neq(x,y); }
962	static inline I32 operator!=(F32 x, float y) { return x ->neq(x,y); }
963	static inline I32 operator!=(float x, F32 y) { return y ->neq(x,y); }
964
965	static inline I32 operator< (F32 x, F32a y) { return x ->lt(x,y); }
966	static inline I32 operator< (float x, F32 y) { return y ->lt(x,y); }
967
968	static inline I32 operator<=(F32 x, F32a y) { return x ->lte(x,y); }
969	static inline I32 operator<=(float x, F32 y) { return y ->lte(x,y); }
970
971	static inline I32 operator> (F32 x, F32a y) { return x ->gt(x,y); }
972	static inline I32 operator> (float x, F32 y) { return y ->gt(x,y); }
973
974	static inline I32 operator>=(F32 x, F32a y) { return x ->gte(x,y); }
975	static inline I32 operator>=(float x, F32 y) { return y ->gte(x,y); }
976
977
978	static inline I32& operator+=(I32& x, I32a y) { return (x = x + y); }
979	static inline I32& operator-=(I32& x, I32a y) { return (x = x - y); }
980	static inline I32& operator=(I32& x, I32a y) { return* (x = x * y); }
981
982	static inline F32& operator+=(F32& x, F32a y) { return (x = x + y); }
983	static inline F32& operator-=(F32& x, F32a y) { return (x = x - y); }
984	static inline F32& operator=(F32& x, F32a y) { return* (x = x * y); }
985
986	static inline void assert_true(I32 cond, I32 debug) { cond ->assert_true(cond,debug); }
987	static inline void assert_true(I32 cond, F32 debug) { cond ->assert_true(cond,debug); }
988	static inline void assert_true(I32 cond) { cond ->assert_true(cond); }
989
990	static inline void store8 (Arg ptr, I32 val) { val ->store8 (ptr, val); }
991	static inline void store16 (Arg ptr, I32 val) { val ->store16 (ptr, val); }
992	static inline void store32 (Arg ptr, I32 val) { val ->store32 (ptr, val); }
993	static inline void storeF (Arg ptr, F32 val) { val ->storeF (ptr, val); }
994	static inline void store64 (Arg ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); }
995	static inline void store128(Arg ptr, I32 lo, I32 hi, int ix) { lo ->store128(ptr, lo,hi, ix); }
996
997	static inline I32 gather8 (Arg ptr, int off, I32 ix) { return ix ->gather8 (ptr, off, ix); }
998	static inline I32 gather16(Arg ptr, int off, I32 ix) { return ix ->gather16(ptr, off, ix); }
999	static inline I32 gather32(Arg ptr, int off, I32 ix) { return ix ->gather32(ptr, off, ix); }
1000	static inline F32 gatherF (Arg ptr, int off, I32 ix) { return ix ->gatherF (ptr, off, ix); }
1001
1002	static inline I32 gather8 (Uniform u, I32 ix) { return ix ->gather8 (u, ix); }
1003	static inline I32 gather16(Uniform u, I32 ix) { return ix ->gather16(u, ix); }
1004	static inline I32 gather32(Uniform u, I32 ix) { return ix ->gather32(u, ix); }
1005	static inline F32 gatherF (Uniform u, I32 ix) { return ix ->gatherF (u, ix); }
1006
1007	static inline F32 sqrt(F32 x) { return x -> sqrt(x); }
1008	static inline F32 approx_log2(F32 x) { return x ->approx_log2(x); }
1009	static inline F32 approx_pow2(F32 x) { return x ->approx_pow2(x); }
1010	static inline F32 approx_log (F32 x) { return x ->approx_log (x); }
1011	static inline F32 approx_exp (F32 x) { return x ->approx_exp (x); }
1012
1013	static inline F32 approx_powf(F32 base, F32a exp) { return base ->approx_powf(base, exp); }
1014	static inline F32 approx_powf(float base, F32 exp) { return exp ->approx_powf(base, exp); }
1015
1016	static inline F32 approx_sin(F32 radians) { return radians ->approx_sin(radians); }
1017	static inline F32 approx_cos(F32 radians) { return radians ->approx_cos(radians); }
1018	static inline F32 approx_tan(F32 radians) { return radians ->approx_tan(radians); }
1019
1020	static inline F32 approx_asin(F32 x) { return x ->approx_asin(x); }
1021	static inline F32 approx_acos(F32 x) { return x ->approx_acos(x); }
1022	static inline F32 approx_atan(F32 x) { return x ->approx_atan(x); }
1023	static inline F32 approx_atan2(F32 y, F32 x) { return x ->approx_atan2(y, x); }
1024
1025	static inline F32 clamp01(F32 x) { return x -> clamp01(x); }
1026	static inline F32 abs(F32 x) { return x -> abs(x); }
1027	static inline F32 ceil(F32 x) { return x -> ceil(x); }
1028	static inline F32 fract(F32 x) { return x -> fract(x); }
1029	static inline F32 floor(F32 x) { return x -> floor(x); }
1030	static inline I32 is_NaN(F32 x) { return x -> is_NaN(x); }
1031	static inline I32 is_finite(F32 x) { return x ->is_finite(x); }
1032
1033	static inline I32 trunc(F32 x) { return x -> trunc(x); }
1034	static inline I32 round(F32 x) { return x -> round(x); }
1035	static inline I32 bit_cast(F32 x) { return x -> bit_cast(x); }
1036	static inline F32 bit_cast(I32 x) { return x -> bit_cast(x); }
1037	static inline F32 to_f32(I32 x) { return x -> to_f32(x); }
1038	static inline I32 to_half(F32 x) { return x -> to_half(x); }
1039	static inline F32 from_half(I32 x) { return x ->from_half(x); }
1040
1041	static inline F32 lerp(F32 lo, F32a hi, F32a t) { return lo ->lerp(lo,hi,t); }
1042	static inline F32 lerp(float lo, F32 hi, F32a t) { return hi ->lerp(lo,hi,t); }
1043	static inline F32 lerp(float lo, float hi, F32 t) { return t ->lerp(lo,hi,t); }
1044
1045	static inline F32 clamp(F32 x, F32a lo, F32a hi) { return x ->clamp(x,lo,hi); }
1046	static inline F32 clamp(float x, F32 lo, F32a hi) { return lo ->clamp(x,lo,hi); }
1047	static inline F32 clamp(float x, float lo, F32 hi) { return hi ->clamp(x,lo,hi); }
1048
1049	static inline F32 norm(F32 x, F32a y) { return x ->norm(x,y); }
1050	static inline F32 norm(float x, F32 y) { return y ->norm(x,y); }
1051
1052	static inline I32 operator<<(I32 x, int bits) { return x ->shl(x, bits); }
1053	static inline I32 shl(I32 x, int bits) { return x ->shl(x, bits); }
1054	static inline I32 shr(I32 x, int bits) { return x ->shr(x, bits); }
1055	static inline I32 sra(I32 x, int bits) { return x ->sra(x, bits); }
1056
1057	static inline I32 operator&(I32 x, I32a y) { return x ->bit_and(x,y); }
1058	static inline I32 operator&(int x, I32 y) { return y ->bit_and(x,y); }
1059
1060	static inline I32 operator\|(I32 x, I32a y) { return x ->bit_or (x,y); }
1061	static inline I32 operator\|(int x, I32 y) { return y ->bit_or (x,y); }
1062
1063	static inline I32 operator^(I32 x, I32a y) { return x ->bit_xor(x,y); }
1064	static inline I32 operator^(int x, I32 y) { return y ->bit_xor(x,y); }
1065
1066	static inline I32& operator&=(I32& x, I32a y) { return (x = x & y); }
1067	static inline I32& operator\|=(I32& x, I32a y) { return (x = x \| y); }
1068	static inline I32& operator^=(I32& x, I32a y) { return (x = x ^ y); }
1069
1070	static inline I32 select(I32 cond, I32a t, I32a f) { return cond ->select(cond,t,f); }
1071	static inline F32 select(I32 cond, F32a t, F32a f) { return cond ->select(cond,t,f); }
1072
1073	static inline I32 extract(I32 x, int bits, I32a z) { return x ->extract(x,bits,z); }
1074	static inline I32 extract(int x, int bits, I32 z) { return z ->extract(x,bits,z); }
1075	static inline I32 pack (I32 x, I32a y, int bits) { return x ->pack (x,y,bits); }
1076	static inline I32 pack (int x, I32 y, int bits) { return y ->pack (x,y,bits); }
1077
1078	static inline I32 operator~(I32 x) { return ~`0`^x; }
1079	static inline I32 operator-(I32 x) { return `0`-x; }
1080	static inline F32 operator-(F32 x) { return `0`-x; }
1081
1082	static inline F32 from_unorm(int bits, I32 x) { return x ->from_unorm(bits,x); }
1083	static inline I32 to_unorm(int bits, F32 x) { return x -> to_unorm(bits,x); }
1084
1085	static inline bool store(PixelFormat f, Arg p, Color c) { return c ->store(f,p,c); }
1086	static inline Color gather(PixelFormat f, Arg p, int off, I32 ix) {
1087	return ix ->gather(f,p,off,ix);
1088	}
1089	static inline Color gather(PixelFormat f, Uniform u, I32 ix) {
1090	return ix ->gather(f,u,ix);
1091	}
1092
1093	static inline void premul(F32* r, F32* g, F32* b, F32 a) { a -> premul(r,g,b,a); }
1094	static inline void unpremul(F32* r, F32* g, F32* b, F32 a) { a ->unpremul(r,g,b,a); }
1095
1096	static inline Color premul(Color c) { return c -> premul(c); }
1097	static inline Color unpremul(Color c) { return c ->unpremul(c); }
1098
1099	static inline Color lerp(Color lo, Color hi, F32 t) { return t ->lerp(lo,hi,t); }
1100
1101	static inline Color blend(SkBlendMode m, Color s, Color d) { return s ->blend(m,s,d); }
1102
1103	static inline HSLA to_hsla(Color c) { return c ->to_hsla(c); }
1104	static inline Color to_rgba(HSLA c) { return c ->to_rgba(c); }
1105
1106	// Evaluate polynomials: ax^n + bx^(n-1) + ... for n >= 1
1107	template <typename... Rest>
1108	static inline F32 poly(F32 x, F32a a, F32a b, Rest... rest) {
1109	if constexpr (sizeof...(rest) == `0`) {
1110	return x *a +b;
1111	} else {
1112	return poly(x, x *a +b, rest...);
1113	}
1114	}
1115	} // namespace skvm
1116
1117	#endif//SkVM_DEFINED
1118

Browse the source code of engine/third_party/skia/src/core/SkVM.h