ShaderCore.hpp source code [engine/third_party/swiftshader/src/Shader/ShaderCore.hpp]

1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#ifndef sw_ShaderCore_hpp
16	#define sw_ShaderCore_hpp
17
18	#include "Shader.hpp"
19	#include "Reactor/Reactor.hpp"
20	#include "Common/Debug.hpp"
21
22	namespace sw
23	{
24	using namespace rr;
25
26	class Vector4s
27	{
28	public:
29	Vector4s();
30	Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
31	Vector4s(const Vector4s &rhs);
32
33	Short4 &operator[](int i);
34	Vector4s &operator=(const Vector4s &rhs);
35
36	Short4 x;
37	Short4 y;
38	Short4 z;
39	Short4 w;
40	};
41
42	class Vector4f
43	{
44	public:
45	Vector4f();
46	Vector4f(float x, float y, float z, float w);
47	Vector4f(const Vector4f &rhs);
48
49	Float4 &operator[](int i);
50	Vector4f &operator=(const Vector4f &rhs);
51
52	Float4 x;
53	Float4 y;
54	Float4 z;
55	Float4 w;
56	};
57
58	Float4 exponential2(RValue<Float4> x, bool pp = false);
59	Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
60	Float4 exponential(RValue<Float4> x, bool pp = false);
61	Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
62	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
63	Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
64	Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
65	Float4 modulo(RValue<Float4> x, RValue<Float4> y);
66	Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
67	Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
68	Float4 sine(RValue<Float4> x, bool pp = false);
69	Float4 cosine(RValue<Float4> x, bool pp = false);
70	Float4 tangent(RValue<Float4> x, bool pp = false);
71	Float4 arccos(RValue<Float4> x, bool pp = false);
72	Float4 arcsin(RValue<Float4> x, bool pp = false);
73	Float4 arctan(RValue<Float4> x, bool pp = false);
74	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
75	Float4 sineh(RValue<Float4> x, bool pp = false);
76	Float4 cosineh(RValue<Float4> x, bool pp = false);
77	Float4 tangenth(RValue<Float4> x, bool pp = false);
78	Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
79	Float4 arcsinh(RValue<Float4> x, bool pp = false);
80	Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
81
82	Float4 dot2(const Vector4f &v0, const Vector4f &v1);
83	Float4 dot3(const Vector4f &v0, const Vector4f &v1);
84	Float4 dot4(const Vector4f &v0, const Vector4f &v1);
85
86	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
87	void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
88	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
89	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
90	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
91	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
92	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
93	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
94
95	class Register
96	{
97	public:
98	Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x (x), y (y), z (z), w (w)
99	{
100	}
101
102	Reference<Float4> &operator[](int i)
103	{
104	switch(i)
105	{
106	default:
107	case `0`: return x;
108	case `1`: return y;
109	case `2`: return z;
110	case `3`: return w;
111	}
112	}
113
114	Register &operator=(const Register &rhs)
115	{
116	x = rhs.x;
117	y = rhs.y;
118	z = rhs.z;
119	w = rhs.w;
120
121	return *this;
122	}
123
124	Register &operator=(const Vector4f &rhs)
125	{
126	x = rhs.x;
127	y = rhs.y;
128	z = rhs.z;
129	w = rhs.w;
130
131	return *this;
132	}
133
134	operator Vector4f()
135	{
136	Vector4f v;
137
138	v.x = x;
139	v.y = y;
140	v.z = z;
141	v.w = w;
142
143	return v;
144	}
145
146	Reference<Float4> x;
147	Reference<Float4> y;
148	Reference<Float4> z;
149	Reference<Float4> w;
150	};
151
152	class RegisterFile
153	{
154	public:
155	RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
156	{
157	if(indirectAddressable)
158	{
159	x = new Array<Float4>(size);
160	y = new Array<Float4>(size);
161	z = new Array<Float4>(size);
162	w = new Array<Float4>(size);
163	}
164	else
165	{
166	x = new Array<Float4>[size];
167	y = new Array<Float4>[size];
168	z = new Array<Float4>[size];
169	w = new Array<Float4>[size];
170	}
171	}
172
173	~RegisterFile()
174	{
175	if(indirectAddressable)
176	{
177	delete x;
178	delete y;
179	delete z;
180	delete w;
181	}
182	else
183	{
184	delete[] x;
185	delete[] y;
186	delete[] z;
187	delete[] w;
188	}
189	}
190
191	Register operator[](int i)
192	{
193	ASSERT(i < size);
194	if(indirectAddressable)
195	{
196	return Register (x[`0`][i], y[`0`][i], z[`0`][i], w[`0`][i]);
197	}
198	else
199	{
200	return Register (x[i][`0`], y[i][`0`], z[i][`0`], w[i][`0`]);
201	}
202	}
203
204	Register operator[](RValue<Int> i)
205	{
206	ASSERT(indirectAddressable);
207
208	return Register (x[`0`][i], y[`0`][i], z[`0`][i], w[`0`][i]);
209	}
210
211	const Vector4f operator[](RValue<Int4> i); // Gather operation (read only).
212
213	void scatter_x(Int4 i, RValue<Float4> r);
214	void scatter_y(Int4 i, RValue<Float4> r);
215	void scatter_z(Int4 i, RValue<Float4> r);
216	void scatter_w(Int4 i, RValue<Float4> r);
217
218	protected:
219	const int size;
220	const bool indirectAddressable;
221	Array<Float4> *x;
222	Array<Float4> *y;
223	Array<Float4> *z;
224	Array<Float4> *w;
225	};
226
227	template<int S, bool I = false>
228	class RegisterArray : public RegisterFile
229	{
230	public:
231	RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
232	{
233	}
234	};
235
236	class ShaderCore
237	{
238	typedef Shader::Control Control;
239
240	public:
241	void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
242	void neg(Vector4f &dst, const Vector4f &src);
243	void ineg(Vector4f &dst, const Vector4f &src);
244	void f2b(Vector4f &dst, const Vector4f &src);
245	void b2f(Vector4f &dst, const Vector4f &src);
246	void f2i(Vector4f &dst, const Vector4f &src);
247	void i2f(Vector4f &dst, const Vector4f &src);
248	void f2u(Vector4f &dst, const Vector4f &src);
249	void u2f(Vector4f &dst, const Vector4f &src);
250	void i2b(Vector4f &dst, const Vector4f &src);
251	void b2i(Vector4f &dst, const Vector4f &src);
252	void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
253	void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
254	void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
255	void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
256	void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
257	void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
258	void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
259	void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
260	void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
261	void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
262	void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
263	void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
264	void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
265	void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266	void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267	void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268	void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269	void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
270	void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
271	void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
272	void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
273	void len2(Float4 &dst, const Vector4f &src, bool pp = false);
274	void len3(Float4 &dst, const Vector4f &src, bool pp = false);
275	void len4(Float4 &dst, const Vector4f &src, bool pp = false);
276	void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
277	void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
278	void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
279	void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
280	void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
281	void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282	void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
283	void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
284	void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
285	void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
286	void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
287	void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
288	void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
289	void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
290	void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
291	void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
292	void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
293	void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294	void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295	void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
296	void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
297	void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
298	void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
299	void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
300	void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
301	void log(Vector4f &dst, const Vector4f &src, bool pp = false);
302	void lit(Vector4f &dst, const Vector4f &src);
303	void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
304	void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
305	void isinf(Vector4f &dst, const Vector4f &src);
306	void isnan(Vector4f &dst, const Vector4f &src);
307	void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
308	void packHalf2x16(Vector4f &dst, const Vector4f &src);
309	void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
310	void packSnorm2x16(Vector4f &dst, const Vector4f &src);
311	void packUnorm2x16(Vector4f &dst, const Vector4f &src);
312	void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
313	void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
314	void frc(Vector4f &dst, const Vector4f &src);
315	void trunc(Vector4f &dst, const Vector4f &src);
316	void floor(Vector4f &dst, const Vector4f &src);
317	void round(Vector4f &dst, const Vector4f &src);
318	void roundEven(Vector4f &dst, const Vector4f &src);
319	void ceil(Vector4f &dst, const Vector4f &src);
320	void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
321	void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
322	void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
323	void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
324	void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
325	void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
326	void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
327	void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
328	void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
329	void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
330	void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
331	void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
332	void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
333	void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
334	void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
335	void sgn(Vector4f &dst, const Vector4f &src);
336	void isgn(Vector4f &dst, const Vector4f &src);
337	void abs(Vector4f &dst, const Vector4f &src);
338	void iabs(Vector4f &dst, const Vector4f &src);
339	void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
340	void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
341	void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
342	void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
343	void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
344	void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
345	void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
346	void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
347	void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
348	void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
349	void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
350	void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
351	void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
352	void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
353	void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
354	void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
355	void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
356	void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
357	void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
358	void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
359	void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
360	void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
361	void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
362	void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
363	void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
364	void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
365	void all(Float4 &dst, const Vector4f &src);
366	void any(Float4 &dst, const Vector4f &src);
367	void bitwise_not(Vector4f &dst, const Vector4f &src);
368	void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
369	void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
370	void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
371	void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
372	void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
373
374	private:
375	void sgn(Float4 &dst, const Float4 &src);
376	void isgn(Float4 &dst, const Float4 &src);
377	void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
378	void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
379	void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
380	void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
381	void halfToFloatBits(Float4& dst, const Float4& halfBits);
382	};
383	}
384
385	#ifdef ENABLE_RR_PRINT
386	namespace rr {
387	template <> struct PrintValue::Ty<sw::Vector4f>
388	{
389	static std::string fmt(const sw::Vector4f& v)
390	{
391	return "[x: " + PrintValue::fmt(v.x) + ","
392	" y: " + PrintValue::fmt(v.y) + ","
393	" z: " + PrintValue::fmt(v.z) + ","
394	" w: " + PrintValue::fmt(v.w) + "]";
395	}
396
397	static std::vector<rr::Value> val(const* sw::Vector4f& v)
398	{
399	return PrintValue::vals(v.x, v.y, v.z, v.w);
400	}
401	};
402	template <> struct PrintValue::Ty<sw::Vector4s>
403	{
404	static std::string fmt(const sw::Vector4s& v)
405	{
406	return "[x: " + PrintValue::fmt(v.x) + ","
407	" y: " + PrintValue::fmt(v.y) + ","
408	" z: " + PrintValue::fmt(v.z) + ","
409	" w: " + PrintValue::fmt(v.w) + "]";
410	}
411
412	static std::vector<rr::Value> val(const* sw::Vector4s& v)
413	{
414	return PrintValue::vals(v.x, v.y, v.z, v.w);
415	}
416	};
417	}
418	#endif // ENABLE_RR_PRINT
419
420	#endif // sw_ShaderCore_hpp
421

Browse the source code of engine/third_party/swiftshader/src/Shader/ShaderCore.hpp