1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_ShaderCore_hpp
16#define sw_ShaderCore_hpp
17
18#include "Shader.hpp"
19#include "Reactor/Reactor.hpp"
20#include "Common/Debug.hpp"
21
22namespace sw
23{
24 using namespace rr;
25
26 class Vector4s
27 {
28 public:
29 Vector4s();
30 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
31 Vector4s(const Vector4s &rhs);
32
33 Short4 &operator[](int i);
34 Vector4s &operator=(const Vector4s &rhs);
35
36 Short4 x;
37 Short4 y;
38 Short4 z;
39 Short4 w;
40 };
41
42 class Vector4f
43 {
44 public:
45 Vector4f();
46 Vector4f(float x, float y, float z, float w);
47 Vector4f(const Vector4f &rhs);
48
49 Float4 &operator[](int i);
50 Vector4f &operator=(const Vector4f &rhs);
51
52 Float4 x;
53 Float4 y;
54 Float4 z;
55 Float4 w;
56 };
57
58 Float4 exponential2(RValue<Float4> x, bool pp = false);
59 Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
60 Float4 exponential(RValue<Float4> x, bool pp = false);
61 Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
62 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
63 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
64 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
65 Float4 modulo(RValue<Float4> x, RValue<Float4> y);
66 Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
67 Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range
68 Float4 sine(RValue<Float4> x, bool pp = false);
69 Float4 cosine(RValue<Float4> x, bool pp = false);
70 Float4 tangent(RValue<Float4> x, bool pp = false);
71 Float4 arccos(RValue<Float4> x, bool pp = false);
72 Float4 arcsin(RValue<Float4> x, bool pp = false);
73 Float4 arctan(RValue<Float4> x, bool pp = false);
74 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
75 Float4 sineh(RValue<Float4> x, bool pp = false);
76 Float4 cosineh(RValue<Float4> x, bool pp = false);
77 Float4 tangenth(RValue<Float4> x, bool pp = false);
78 Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1
79 Float4 arcsinh(RValue<Float4> x, bool pp = false);
80 Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range
81
82 Float4 dot2(const Vector4f &v0, const Vector4f &v1);
83 Float4 dot3(const Vector4f &v0, const Vector4f &v1);
84 Float4 dot4(const Vector4f &v0, const Vector4f &v1);
85
86 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
87 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
88 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
89 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
90 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
91 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
92 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
93 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
94
95 class Register
96 {
97 public:
98 Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
99 {
100 }
101
102 Reference<Float4> &operator[](int i)
103 {
104 switch(i)
105 {
106 default:
107 case 0: return x;
108 case 1: return y;
109 case 2: return z;
110 case 3: return w;
111 }
112 }
113
114 Register &operator=(const Register &rhs)
115 {
116 x = rhs.x;
117 y = rhs.y;
118 z = rhs.z;
119 w = rhs.w;
120
121 return *this;
122 }
123
124 Register &operator=(const Vector4f &rhs)
125 {
126 x = rhs.x;
127 y = rhs.y;
128 z = rhs.z;
129 w = rhs.w;
130
131 return *this;
132 }
133
134 operator Vector4f()
135 {
136 Vector4f v;
137
138 v.x = x;
139 v.y = y;
140 v.z = z;
141 v.w = w;
142
143 return v;
144 }
145
146 Reference<Float4> x;
147 Reference<Float4> y;
148 Reference<Float4> z;
149 Reference<Float4> w;
150 };
151
152 class RegisterFile
153 {
154 public:
155 RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
156 {
157 if(indirectAddressable)
158 {
159 x = new Array<Float4>(size);
160 y = new Array<Float4>(size);
161 z = new Array<Float4>(size);
162 w = new Array<Float4>(size);
163 }
164 else
165 {
166 x = new Array<Float4>[size];
167 y = new Array<Float4>[size];
168 z = new Array<Float4>[size];
169 w = new Array<Float4>[size];
170 }
171 }
172
173 ~RegisterFile()
174 {
175 if(indirectAddressable)
176 {
177 delete x;
178 delete y;
179 delete z;
180 delete w;
181 }
182 else
183 {
184 delete[] x;
185 delete[] y;
186 delete[] z;
187 delete[] w;
188 }
189 }
190
191 Register operator[](int i)
192 {
193 ASSERT(i < size);
194 if(indirectAddressable)
195 {
196 return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
197 }
198 else
199 {
200 return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
201 }
202 }
203
204 Register operator[](RValue<Int> i)
205 {
206 ASSERT(indirectAddressable);
207
208 return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
209 }
210
211 const Vector4f operator[](RValue<Int4> i); // Gather operation (read only).
212
213 void scatter_x(Int4 i, RValue<Float4> r);
214 void scatter_y(Int4 i, RValue<Float4> r);
215 void scatter_z(Int4 i, RValue<Float4> r);
216 void scatter_w(Int4 i, RValue<Float4> r);
217
218 protected:
219 const int size;
220 const bool indirectAddressable;
221 Array<Float4> *x;
222 Array<Float4> *y;
223 Array<Float4> *z;
224 Array<Float4> *w;
225 };
226
227 template<int S, bool I = false>
228 class RegisterArray : public RegisterFile
229 {
230 public:
231 RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
232 {
233 }
234 };
235
236 class ShaderCore
237 {
238 typedef Shader::Control Control;
239
240 public:
241 void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
242 void neg(Vector4f &dst, const Vector4f &src);
243 void ineg(Vector4f &dst, const Vector4f &src);
244 void f2b(Vector4f &dst, const Vector4f &src);
245 void b2f(Vector4f &dst, const Vector4f &src);
246 void f2i(Vector4f &dst, const Vector4f &src);
247 void i2f(Vector4f &dst, const Vector4f &src);
248 void f2u(Vector4f &dst, const Vector4f &src);
249 void u2f(Vector4f &dst, const Vector4f &src);
250 void i2b(Vector4f &dst, const Vector4f &src);
251 void b2i(Vector4f &dst, const Vector4f &src);
252 void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
253 void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
254 void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
255 void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
256 void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
257 void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
258 void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
259 void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
260 void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
261 void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
262 void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
263 void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
264 void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
265 void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266 void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267 void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268 void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269 void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
270 void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
271 void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
272 void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
273 void len2(Float4 &dst, const Vector4f &src, bool pp = false);
274 void len3(Float4 &dst, const Vector4f &src, bool pp = false);
275 void len4(Float4 &dst, const Vector4f &src, bool pp = false);
276 void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
277 void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
278 void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
279 void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
280 void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
281 void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282 void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
283 void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
284 void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
285 void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
286 void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
287 void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
288 void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
289 void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
290 void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
291 void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
292 void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
293 void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294 void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295 void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
296 void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
297 void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
298 void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
299 void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
300 void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
301 void log(Vector4f &dst, const Vector4f &src, bool pp = false);
302 void lit(Vector4f &dst, const Vector4f &src);
303 void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
304 void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
305 void isinf(Vector4f &dst, const Vector4f &src);
306 void isnan(Vector4f &dst, const Vector4f &src);
307 void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
308 void packHalf2x16(Vector4f &dst, const Vector4f &src);
309 void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
310 void packSnorm2x16(Vector4f &dst, const Vector4f &src);
311 void packUnorm2x16(Vector4f &dst, const Vector4f &src);
312 void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
313 void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
314 void frc(Vector4f &dst, const Vector4f &src);
315 void trunc(Vector4f &dst, const Vector4f &src);
316 void floor(Vector4f &dst, const Vector4f &src);
317 void round(Vector4f &dst, const Vector4f &src);
318 void roundEven(Vector4f &dst, const Vector4f &src);
319 void ceil(Vector4f &dst, const Vector4f &src);
320 void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
321 void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
322 void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
323 void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
324 void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
325 void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
326 void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
327 void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
328 void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
329 void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
330 void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
331 void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
332 void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
333 void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
334 void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
335 void sgn(Vector4f &dst, const Vector4f &src);
336 void isgn(Vector4f &dst, const Vector4f &src);
337 void abs(Vector4f &dst, const Vector4f &src);
338 void iabs(Vector4f &dst, const Vector4f &src);
339 void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
340 void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
341 void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
342 void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
343 void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
344 void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
345 void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
346 void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
347 void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
348 void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
349 void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
350 void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
351 void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
352 void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
353 void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
354 void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
355 void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
356 void expp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
357 void logp(Vector4f &dst, const Vector4f &src, unsigned short shaderModel);
358 void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
359 void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
360 void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
361 void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
362 void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
363 void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
364 void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
365 void all(Float4 &dst, const Vector4f &src);
366 void any(Float4 &dst, const Vector4f &src);
367 void bitwise_not(Vector4f &dst, const Vector4f &src);
368 void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
369 void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
370 void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
371 void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
372 void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
373
374 private:
375 void sgn(Float4 &dst, const Float4 &src);
376 void isgn(Float4 &dst, const Float4 &src);
377 void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
378 void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
379 void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
380 void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
381 void halfToFloatBits(Float4& dst, const Float4& halfBits);
382 };
383}
384
385#ifdef ENABLE_RR_PRINT
386namespace rr {
387 template <> struct PrintValue::Ty<sw::Vector4f>
388 {
389 static std::string fmt(const sw::Vector4f& v)
390 {
391 return "[x: " + PrintValue::fmt(v.x) + ","
392 " y: " + PrintValue::fmt(v.y) + ","
393 " z: " + PrintValue::fmt(v.z) + ","
394 " w: " + PrintValue::fmt(v.w) + "]";
395 }
396
397 static std::vector<rr::Value*> val(const sw::Vector4f& v)
398 {
399 return PrintValue::vals(v.x, v.y, v.z, v.w);
400 }
401 };
402 template <> struct PrintValue::Ty<sw::Vector4s>
403 {
404 static std::string fmt(const sw::Vector4s& v)
405 {
406 return "[x: " + PrintValue::fmt(v.x) + ","
407 " y: " + PrintValue::fmt(v.y) + ","
408 " z: " + PrintValue::fmt(v.z) + ","
409 " w: " + PrintValue::fmt(v.w) + "]";
410 }
411
412 static std::vector<rr::Value*> val(const sw::Vector4s& v)
413 {
414 return PrintValue::vals(v.x, v.y, v.z, v.w);
415 }
416 };
417}
418#endif // ENABLE_RR_PRINT
419
420#endif // sw_ShaderCore_hpp
421