1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef sw_ShaderCore_hpp |
16 | #define sw_ShaderCore_hpp |
17 | |
18 | #include "Shader.hpp" |
19 | #include "Reactor/Reactor.hpp" |
20 | #include "Common/Debug.hpp" |
21 | |
22 | namespace sw |
23 | { |
24 | using namespace rr; |
25 | |
26 | class Vector4s |
27 | { |
28 | public: |
29 | Vector4s(); |
30 | Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); |
31 | Vector4s(const Vector4s &rhs); |
32 | |
33 | Short4 &operator[](int i); |
34 | Vector4s &operator=(const Vector4s &rhs); |
35 | |
36 | Short4 x; |
37 | Short4 y; |
38 | Short4 z; |
39 | Short4 w; |
40 | }; |
41 | |
42 | class Vector4f |
43 | { |
44 | public: |
45 | Vector4f(); |
46 | Vector4f(float x, float y, float z, float w); |
47 | Vector4f(const Vector4f &rhs); |
48 | |
49 | Float4 &operator[](int i); |
50 | Vector4f &operator=(const Vector4f &rhs); |
51 | |
52 | Float4 x; |
53 | Float4 y; |
54 | Float4 z; |
55 | Float4 w; |
56 | }; |
57 | |
58 | Float4 exponential2(RValue<Float4> x, bool pp = false); |
59 | Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false); |
60 | Float4 exponential(RValue<Float4> x, bool pp = false); |
61 | Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false); |
62 | Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); |
63 | Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); |
64 | Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); |
65 | Float4 modulo(RValue<Float4> x, RValue<Float4> y); |
66 | Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range |
67 | Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range |
68 | Float4 sine(RValue<Float4> x, bool pp = false); |
69 | Float4 cosine(RValue<Float4> x, bool pp = false); |
70 | Float4 tangent(RValue<Float4> x, bool pp = false); |
71 | Float4 arccos(RValue<Float4> x, bool pp = false); |
72 | Float4 arcsin(RValue<Float4> x, bool pp = false); |
73 | Float4 arctan(RValue<Float4> x, bool pp = false); |
74 | Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); |
75 | Float4 sineh(RValue<Float4> x, bool pp = false); |
76 | Float4 cosineh(RValue<Float4> x, bool pp = false); |
77 | Float4 tangenth(RValue<Float4> x, bool pp = false); |
78 | Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1 |
79 | Float4 arcsinh(RValue<Float4> x, bool pp = false); |
80 | Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range |
81 | |
82 | Float4 dot2(const Vector4f &v0, const Vector4f &v1); |
83 | Float4 dot3(const Vector4f &v0, const Vector4f &v1); |
84 | Float4 dot4(const Vector4f &v0, const Vector4f &v1); |
85 | |
86 | void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); |
87 | void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); |
88 | void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
89 | void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
90 | void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
91 | void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
92 | void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); |
93 | void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); |
94 | |
95 | class Register |
96 | { |
97 | public: |
98 | Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w) |
99 | { |
100 | } |
101 | |
102 | Reference<Float4> &operator[](int i) |
103 | { |
104 | switch(i) |
105 | { |
106 | default: |
107 | case 0: return x; |
108 | case 1: return y; |
109 | case 2: return z; |
110 | case 3: return w; |
111 | } |
112 | } |
113 | |
114 | Register &operator=(const Register &rhs) |
115 | { |
116 | x = rhs.x; |
117 | y = rhs.y; |
118 | z = rhs.z; |
119 | w = rhs.w; |
120 | |
121 | return *this; |
122 | } |
123 | |
124 | Register &operator=(const Vector4f &rhs) |
125 | { |
126 | x = rhs.x; |
127 | y = rhs.y; |
128 | z = rhs.z; |
129 | w = rhs.w; |
130 | |
131 | return *this; |
132 | } |
133 | |
134 | operator Vector4f() |
135 | { |
136 | Vector4f v; |
137 | |
138 | v.x = x; |
139 | v.y = y; |
140 | v.z = z; |
141 | v.w = w; |
142 | |
143 | return v; |
144 | } |
145 | |
146 | Reference<Float4> x; |
147 | Reference<Float4> y; |
148 | Reference<Float4> z; |
149 | Reference<Float4> w; |
150 | }; |
151 | |
152 | class RegisterFile |
153 | { |
154 | public: |
155 | RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable) |
156 | { |
157 | if(indirectAddressable) |
158 | { |
159 | x = new Array<Float4>(size); |
160 | y = new Array<Float4>(size); |
161 | z = new Array<Float4>(size); |
162 | w = new Array<Float4>(size); |
163 | } |
164 | else |
165 | { |
166 | x = new Array<Float4>[size]; |
167 | y = new Array<Float4>[size]; |
168 | z = new Array<Float4>[size]; |
169 | w = new Array<Float4>[size]; |
170 | } |
171 | } |
172 | |
173 | ~RegisterFile() |
174 | { |
175 | if(indirectAddressable) |
176 | { |
177 | delete x; |
178 | delete y; |
179 | delete z; |
180 | delete w; |
181 | } |
182 | else |
183 | { |
184 | delete[] x; |
185 | delete[] y; |
186 | delete[] z; |
187 | delete[] w; |
188 | } |
189 | } |
190 | |
191 | Register operator[](int i) |
192 | { |
193 | ASSERT(i < size); |
194 | if(indirectAddressable) |
195 | { |
196 | return Register(x[0][i], y[0][i], z[0][i], w[0][i]); |
197 | } |
198 | else |
199 | { |
200 | return Register(x[i][0], y[i][0], z[i][0], w[i][0]); |
201 | } |
202 | } |
203 | |
204 | Register operator[](RValue<Int> i) |
205 | { |
206 | ASSERT(indirectAddressable); |
207 | |
208 | return Register(x[0][i], y[0][i], z[0][i], w[0][i]); |
209 | } |
210 | |
211 | const Vector4f operator[](RValue<Int4> i); // Gather operation (read only). |
212 | |
213 | void scatter_x(Int4 i, RValue<Float4> r); |
214 | void scatter_y(Int4 i, RValue<Float4> r); |
215 | void scatter_z(Int4 i, RValue<Float4> r); |
216 | void scatter_w(Int4 i, RValue<Float4> r); |
217 | |
218 | protected: |
219 | const int size; |
220 | const bool indirectAddressable; |
221 | Array<Float4> *x; |
222 | Array<Float4> *y; |
223 | Array<Float4> *z; |
224 | Array<Float4> *w; |
225 | }; |
226 | |
227 | template<int S, bool I = false> |
228 | class RegisterArray : public RegisterFile |
229 | { |
230 | public: |
231 | RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable) |
232 | { |
233 | } |
234 | }; |
235 | |
236 | class ShaderCore |
237 | { |
238 | typedef Shader::Control Control; |
239 | |
240 | public: |
241 | void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false); |
242 | void neg(Vector4f &dst, const Vector4f &src); |
243 | void ineg(Vector4f &dst, const Vector4f &src); |
244 | void f2b(Vector4f &dst, const Vector4f &src); |
245 | void b2f(Vector4f &dst, const Vector4f &src); |
246 | void f2i(Vector4f &dst, const Vector4f &src); |
247 | void i2f(Vector4f &dst, const Vector4f &src); |
248 | void f2u(Vector4f &dst, const Vector4f &src); |
249 | void u2f(Vector4f &dst, const Vector4f &src); |
250 | void i2b(Vector4f &dst, const Vector4f &src); |
251 | void b2i(Vector4f &dst, const Vector4f &src); |
252 | void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
253 | void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
254 | void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
255 | void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
256 | void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
257 | void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
258 | void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
259 | void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
260 | void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false); |
261 | void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
262 | void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
263 | void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
264 | void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
265 | void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
266 | void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
267 | void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
268 | void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
269 | void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
270 | void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false); |
271 | void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false); |
272 | void rsq(Vector4f &dst, const Vector4f &src, bool pp = false); |
273 | void len2(Float4 &dst, const Vector4f &src, bool pp = false); |
274 | void len3(Float4 &dst, const Vector4f &src, bool pp = false); |
275 | void len4(Float4 &dst, const Vector4f &src, bool pp = false); |
276 | void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
277 | void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
278 | void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
279 | void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
280 | void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
281 | void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
282 | void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
283 | void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
284 | void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
285 | void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
286 | void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
287 | void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3); |
288 | void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
289 | void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
290 | void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
291 | void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
292 | void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
293 | void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
294 | void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
295 | void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
296 | void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false); |
297 | void exp2(Vector4f &dst, const Vector4f &src, bool pp = false); |
298 | void exp(Vector4f &dst, const Vector4f &src, bool pp = false); |
299 | void log2x(Vector4f &dst, const Vector4f &src, bool pp = false); |
300 | void log2(Vector4f &dst, const Vector4f &src, bool pp = false); |
301 | void log(Vector4f &dst, const Vector4f &src, bool pp = false); |
302 | void lit(Vector4f &dst, const Vector4f &src); |
303 | void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
304 | void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
305 | void isinf(Vector4f &dst, const Vector4f &src); |
306 | void isnan(Vector4f &dst, const Vector4f &src); |
307 | void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
308 | void packHalf2x16(Vector4f &dst, const Vector4f &src); |
309 | void unpackHalf2x16(Vector4f &dst, const Vector4f &src); |
310 | void packSnorm2x16(Vector4f &dst, const Vector4f &src); |
311 | void packUnorm2x16(Vector4f &dst, const Vector4f &src); |
312 | void unpackSnorm2x16(Vector4f &dst, const Vector4f &src); |
313 | void unpackUnorm2x16(Vector4f &dst, const Vector4f &src); |
314 | void frc(Vector4f &dst, const Vector4f &src); |
315 | void trunc(Vector4f &dst, const Vector4f &src); |
316 | void floor(Vector4f &dst, const Vector4f &src); |
317 | void round(Vector4f &dst, const Vector4f &src); |
318 | void roundEven(Vector4f &dst, const Vector4f &src); |
319 | void ceil(Vector4f &dst, const Vector4f &src); |
320 | void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
321 | void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
322 | void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
323 | void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
324 | void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
325 | void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
326 | void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
327 | void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
328 | void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
329 | void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
330 | void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
331 | void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); |
332 | void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); |
333 | void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); |
334 | void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); |
335 | void sgn(Vector4f &dst, const Vector4f &src); |
336 | void isgn(Vector4f &dst, const Vector4f &src); |
337 | void abs(Vector4f &dst, const Vector4f &src); |
338 | void iabs(Vector4f &dst, const Vector4f &src); |
339 | void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false); |
340 | void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false); |
341 | void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false); |
342 | void sincos(Vector4f &dst, const Vector4f &src, bool pp = false); |
343 | void cos(Vector4f &dst, const Vector4f &src, bool pp = false); |
344 | void sin(Vector4f &dst, const Vector4f &src, bool pp = false); |
345 | void tan(Vector4f &dst, const Vector4f &src, bool pp = false); |
346 | void acos(Vector4f &dst, const Vector4f &src, bool pp = false); |
347 | void asin(Vector4f &dst, const Vector4f &src, bool pp = false); |
348 | void atan(Vector4f &dst, const Vector4f &src, bool pp = false); |
349 | void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); |
350 | void cosh(Vector4f &dst, const Vector4f &src, bool pp = false); |
351 | void sinh(Vector4f &dst, const Vector4f &src, bool pp = false); |
352 | void tanh(Vector4f &dst, const Vector4f &src, bool pp = false); |
353 | void acosh(Vector4f &dst, const Vector4f &src, bool pp = false); |
354 | void asinh(Vector4f &dst, const Vector4f &src, bool pp = false); |
355 | void atanh(Vector4f &dst, const Vector4f &src, bool pp = false); |
356 | void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel); |
357 | void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel); |
358 | void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
359 | void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); |
360 | void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); |
361 | void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); |
362 | void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); |
363 | void (Float4 &dst, const Vector4f &src0, const Float4 &src1); |
364 | void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index); |
365 | void all(Float4 &dst, const Vector4f &src); |
366 | void any(Float4 &dst, const Vector4f &src); |
367 | void bitwise_not(Vector4f &dst, const Vector4f &src); |
368 | void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
369 | void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
370 | void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
371 | void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
372 | void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); |
373 | |
374 | private: |
375 | void sgn(Float4 &dst, const Float4 &src); |
376 | void isgn(Float4 &dst, const Float4 &src); |
377 | void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); |
378 | void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); |
379 | void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2); |
380 | void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits); |
381 | void halfToFloatBits(Float4& dst, const Float4& halfBits); |
382 | }; |
383 | } |
384 | |
385 | #ifdef ENABLE_RR_PRINT |
386 | namespace rr { |
387 | template <> struct PrintValue::Ty<sw::Vector4f> |
388 | { |
389 | static std::string fmt(const sw::Vector4f& v) |
390 | { |
391 | return "[x: " + PrintValue::fmt(v.x) + "," |
392 | " y: " + PrintValue::fmt(v.y) + "," |
393 | " z: " + PrintValue::fmt(v.z) + "," |
394 | " w: " + PrintValue::fmt(v.w) + "]" ; |
395 | } |
396 | |
397 | static std::vector<rr::Value*> val(const sw::Vector4f& v) |
398 | { |
399 | return PrintValue::vals(v.x, v.y, v.z, v.w); |
400 | } |
401 | }; |
402 | template <> struct PrintValue::Ty<sw::Vector4s> |
403 | { |
404 | static std::string fmt(const sw::Vector4s& v) |
405 | { |
406 | return "[x: " + PrintValue::fmt(v.x) + "," |
407 | " y: " + PrintValue::fmt(v.y) + "," |
408 | " z: " + PrintValue::fmt(v.z) + "," |
409 | " w: " + PrintValue::fmt(v.w) + "]" ; |
410 | } |
411 | |
412 | static std::vector<rr::Value*> val(const sw::Vector4s& v) |
413 | { |
414 | return PrintValue::vals(v.x, v.y, v.z, v.w); |
415 | } |
416 | }; |
417 | } |
418 | #endif // ENABLE_RR_PRINT |
419 | |
420 | #endif // sw_ShaderCore_hpp |
421 | |