1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Constants.hpp"
16
17#include "System/Math.hpp"
18#include "System/Half.hpp"
19
20#include <cstring>
21
22namespace sw
23{
24 Constants constants;
25
26 Constants::Constants()
27 {
28 static const unsigned int transposeBit0[16] =
29 {
30 0x00000000,
31 0x00000001,
32 0x00000010,
33 0x00000011,
34 0x00000100,
35 0x00000101,
36 0x00000110,
37 0x00000111,
38 0x00001000,
39 0x00001001,
40 0x00001010,
41 0x00001011,
42 0x00001100,
43 0x00001101,
44 0x00001110,
45 0x00001111
46 };
47
48 static const unsigned int transposeBit1[16] =
49 {
50 0x00000000,
51 0x00000002,
52 0x00000020,
53 0x00000022,
54 0x00000200,
55 0x00000202,
56 0x00000220,
57 0x00000222,
58 0x00002000,
59 0x00002002,
60 0x00002020,
61 0x00002022,
62 0x00002200,
63 0x00002202,
64 0x00002220,
65 0x00002222
66 };
67
68 static const unsigned int transposeBit2[16] =
69 {
70 0x00000000,
71 0x00000004,
72 0x00000040,
73 0x00000044,
74 0x00000400,
75 0x00000404,
76 0x00000440,
77 0x00000444,
78 0x00004000,
79 0x00004004,
80 0x00004040,
81 0x00004044,
82 0x00004400,
83 0x00004404,
84 0x00004440,
85 0x00004444
86 };
87
88 memcpy(&this->transposeBit0, transposeBit0, sizeof(transposeBit0));
89 memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1));
90 memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2));
91
92 static const ushort4 cWeight[17] =
93 {
94 {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF
95 {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}, // 0xFFFF / 1 = 0xFFFF
96 {0x8000, 0x8000, 0x8000, 0x8000}, // 0xFFFF / 2 = 0x8000
97 {0x5555, 0x5555, 0x5555, 0x5555}, // 0xFFFF / 3 = 0x5555
98 {0x4000, 0x4000, 0x4000, 0x4000}, // 0xFFFF / 4 = 0x4000
99 {0x3333, 0x3333, 0x3333, 0x3333}, // 0xFFFF / 5 = 0x3333
100 {0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA}, // 0xFFFF / 6 = 0x2AAA
101 {0x2492, 0x2492, 0x2492, 0x2492}, // 0xFFFF / 7 = 0x2492
102 {0x2000, 0x2000, 0x2000, 0x2000}, // 0xFFFF / 8 = 0x2000
103 {0x1C71, 0x1C71, 0x1C71, 0x1C71}, // 0xFFFF / 9 = 0x1C71
104 {0x1999, 0x1999, 0x1999, 0x1999}, // 0xFFFF / 10 = 0x1999
105 {0x1745, 0x1745, 0x1745, 0x1745}, // 0xFFFF / 11 = 0x1745
106 {0x1555, 0x1555, 0x1555, 0x1555}, // 0xFFFF / 12 = 0x1555
107 {0x13B1, 0x13B1, 0x13B1, 0x13B1}, // 0xFFFF / 13 = 0x13B1
108 {0x1249, 0x1249, 0x1249, 0x1249}, // 0xFFFF / 14 = 0x1249
109 {0x1111, 0x1111, 0x1111, 0x1111}, // 0xFFFF / 15 = 0x1111
110 {0x1000, 0x1000, 0x1000, 0x1000}, // 0xFFFF / 16 = 0x1000
111 };
112
113 static const float4 uvWeight[17] =
114 {
115 {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f},
116 {1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f},
117 {1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f},
118 {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f},
119 {1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f},
120 {1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f},
121 {1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f},
122 {1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f},
123 {1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f},
124 {1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f},
125 {1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f},
126 {1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f},
127 {1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f},
128 {1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f},
129 {1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f},
130 {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f},
131 {1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f},
132 };
133
134 static const float4 uvStart[17] =
135 {
136 {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f},
137 {-0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f},
138 {-1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f},
139 {-2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f},
140 {-3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f},
141 {-4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f},
142 {-5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f},
143 {-6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f},
144 {-7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f},
145 {-8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f},
146 {-9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f},
147 {-10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f},
148 {-11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f},
149 {-12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f},
150 {-13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f},
151 {-14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f},
152 {-15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f},
153 };
154
155 memcpy(&this->cWeight, cWeight, sizeof(cWeight));
156 memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight));
157 memcpy(&this->uvStart, uvStart, sizeof(uvStart));
158
159 static const unsigned int occlusionCount[16] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
160
161 memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount));
162
163 for(int i = 0; i < 16; i++)
164 {
165 maskB4Q[i][0] = -(i >> 0 & 1);
166 maskB4Q[i][1] = -(i >> 1 & 1);
167 maskB4Q[i][2] = -(i >> 2 & 1);
168 maskB4Q[i][3] = -(i >> 3 & 1);
169 maskB4Q[i][4] = -(i >> 0 & 1);
170 maskB4Q[i][5] = -(i >> 1 & 1);
171 maskB4Q[i][6] = -(i >> 2 & 1);
172 maskB4Q[i][7] = -(i >> 3 & 1);
173
174 invMaskB4Q[i][0] = ~maskB4Q[i][0];
175 invMaskB4Q[i][1] = ~maskB4Q[i][1];
176 invMaskB4Q[i][2] = ~maskB4Q[i][2];
177 invMaskB4Q[i][3] = ~maskB4Q[i][3];
178 invMaskB4Q[i][4] = ~maskB4Q[i][4];
179 invMaskB4Q[i][5] = ~maskB4Q[i][5];
180 invMaskB4Q[i][6] = ~maskB4Q[i][6];
181 invMaskB4Q[i][7] = ~maskB4Q[i][7];
182
183 maskW4Q[i][0] = -(i >> 0 & 1);
184 maskW4Q[i][1] = -(i >> 1 & 1);
185 maskW4Q[i][2] = -(i >> 2 & 1);
186 maskW4Q[i][3] = -(i >> 3 & 1);
187
188 invMaskW4Q[i][0] = ~maskW4Q[i][0];
189 invMaskW4Q[i][1] = ~maskW4Q[i][1];
190 invMaskW4Q[i][2] = ~maskW4Q[i][2];
191 invMaskW4Q[i][3] = ~maskW4Q[i][3];
192
193 maskD4X[i][0] = -(i >> 0 & 1);
194 maskD4X[i][1] = -(i >> 1 & 1);
195 maskD4X[i][2] = -(i >> 2 & 1);
196 maskD4X[i][3] = -(i >> 3 & 1);
197
198 invMaskD4X[i][0] = ~maskD4X[i][0];
199 invMaskD4X[i][1] = ~maskD4X[i][1];
200 invMaskD4X[i][2] = ~maskD4X[i][2];
201 invMaskD4X[i][3] = ~maskD4X[i][3];
202
203 maskQ0Q[i] = -(i >> 0 & 1);
204 maskQ1Q[i] = -(i >> 1 & 1);
205 maskQ2Q[i] = -(i >> 2 & 1);
206 maskQ3Q[i] = -(i >> 3 & 1);
207
208 invMaskQ0Q[i] = ~maskQ0Q[i];
209 invMaskQ1Q[i] = ~maskQ1Q[i];
210 invMaskQ2Q[i] = ~maskQ2Q[i];
211 invMaskQ3Q[i] = ~maskQ3Q[i];
212
213 maskX0X[i][0] = maskX0X[i][1] = maskX0X[i][2] = maskX0X[i][3] = -(i >> 0 & 1);
214 maskX1X[i][0] = maskX1X[i][1] = maskX1X[i][2] = maskX1X[i][3] = -(i >> 1 & 1);
215 maskX2X[i][0] = maskX2X[i][1] = maskX2X[i][2] = maskX2X[i][3] = -(i >> 2 & 1);
216 maskX3X[i][0] = maskX3X[i][1] = maskX3X[i][2] = maskX3X[i][3] = -(i >> 3 & 1);
217
218 invMaskX0X[i][0] = invMaskX0X[i][1] = invMaskX0X[i][2] = invMaskX0X[i][3] = ~maskX0X[i][0];
219 invMaskX1X[i][0] = invMaskX1X[i][1] = invMaskX1X[i][2] = invMaskX1X[i][3] = ~maskX1X[i][0];
220 invMaskX2X[i][0] = invMaskX2X[i][1] = invMaskX2X[i][2] = invMaskX2X[i][3] = ~maskX2X[i][0];
221 invMaskX3X[i][0] = invMaskX3X[i][1] = invMaskX3X[i][2] = invMaskX3X[i][3] = ~maskX3X[i][0];
222
223 maskD01Q[i][0] = -(i >> 0 & 1);
224 maskD01Q[i][1] = -(i >> 1 & 1);
225 maskD23Q[i][0] = -(i >> 2 & 1);
226 maskD23Q[i][1] = -(i >> 3 & 1);
227
228 invMaskD01Q[i][0] = ~maskD01Q[i][0];
229 invMaskD01Q[i][1] = ~maskD01Q[i][1];
230 invMaskD23Q[i][0] = ~maskD23Q[i][0];
231 invMaskD23Q[i][1] = ~maskD23Q[i][1];
232
233 maskQ01X[i][0] = -(i >> 0 & 1);
234 maskQ01X[i][1] = -(i >> 1 & 1);
235 maskQ23X[i][0] = -(i >> 2 & 1);
236 maskQ23X[i][1] = -(i >> 3 & 1);
237
238 invMaskQ01X[i][0] = ~maskQ01X[i][0];
239 invMaskQ01X[i][1] = ~maskQ01X[i][1];
240 invMaskQ23X[i][0] = ~maskQ23X[i][0];
241 invMaskQ23X[i][1] = ~maskQ23X[i][1];
242 }
243
244 for(int i = 0; i < 8; i++)
245 {
246 mask565Q[i][0] =
247 mask565Q[i][1] =
248 mask565Q[i][2] =
249 mask565Q[i][3] = (i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0);
250 }
251
252 for (int i = 0; i < 16; i++)
253 {
254 mask5551Q[i][0] =
255 mask5551Q[i][1] =
256 mask5551Q[i][2] =
257 mask5551Q[i][3] = (i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x03E0 : 0) | (i & 0x4 ? 0x7C00 : 0) | (i & 8 ? 0x8000 : 0);
258 }
259
260 for(int i = 0; i < 4; i++)
261 {
262 maskW01Q[i][0] = -(i >> 0 & 1);
263 maskW01Q[i][1] = -(i >> 1 & 1);
264 maskW01Q[i][2] = -(i >> 0 & 1);
265 maskW01Q[i][3] = -(i >> 1 & 1);
266
267 maskD01X[i][0] = -(i >> 0 & 1);
268 maskD01X[i][1] = -(i >> 1 & 1);
269 maskD01X[i][2] = -(i >> 0 & 1);
270 maskD01X[i][3] = -(i >> 1 & 1);
271 }
272
273 for (int i = 0; i < 16; i++)
274 {
275 mask10Q[i][0] = mask10Q[i][1] =
276 (i & 0x1 ? 0x3FF : 0) |
277 (i & 0x2 ? 0xFFC00 : 0) |
278 (i & 0x4 ? 0x3FF00000 : 0) |
279 (i & 0x8 ? 0xC0000000 : 0);
280 }
281
282 for(int i = 0; i < 256; i++)
283 {
284 sRGBtoLinear8_16[i] = (unsigned short)(sw::sRGBtoLinear((float)i / 0xFF) * 0xFFFF + 0.5f);
285 }
286
287 for(int i = 0; i < 0x1000; i++)
288 {
289 linearToSRGB12_16[i] = (unsigned short)(clamp(sw::linearToSRGB((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
290 sRGBtoLinear12_16[i] = (unsigned short)(clamp(sw::sRGBtoLinear((float)i / 0x0FFF) * 0xFFFF + 0.5f, 0.0f, (float)0xFFFF));
291 }
292
293 constexpr float4 X[4] = {
294 sw::replicate(SampleLocationsX[0]),
295 sw::replicate(SampleLocationsX[1]),
296 sw::replicate(SampleLocationsX[2]),
297 sw::replicate(SampleLocationsX[3]),
298 };
299
300 constexpr float4 Y[4] = {
301 sw::replicate(SampleLocationsY[0]),
302 sw::replicate(SampleLocationsY[1]),
303 sw::replicate(SampleLocationsY[2]),
304 sw::replicate(SampleLocationsY[3]),
305 };
306
307 for(int q = 0; q < 4; q++)
308 {
309 for(int c = 0; c < 16; c++)
310 {
311 for(int i = 0; i < 4; i++)
312 {
313 // Reorder sample points for centroid computation
314 const float Xs[4] = { X[1][0], X[2][0], X[0][0], X[3][0] };
315 const float Ys[4] = { Y[1][0], Y[2][0], Y[0][0], Y[3][0] };
316
317 sampleX[q][c][i] = c & (1 << i) ? Xs[q] : 0.0f;
318 sampleY[q][c][i] = c & (1 << i) ? Ys[q] : 0.0f;
319 weight[c][i] = c & (1 << i) ? 1.0f : 0.0f;
320 }
321 }
322 }
323
324 constexpr auto subPixB = vk::SUBPIXEL_PRECISION_BITS;
325
326 // Reorder sample points for fragment offset computation
327 const int Xf[4] = { toFixedPoint(X[2][0], subPixB), toFixedPoint(X[1][0], subPixB), toFixedPoint(X[3][0], subPixB), toFixedPoint(X[0][0], subPixB) };
328 const int Yf[4] = { toFixedPoint(Y[2][0], subPixB), toFixedPoint(Y[1][0], subPixB), toFixedPoint(Y[3][0], subPixB), toFixedPoint(Y[0][0], subPixB) };
329
330 memcpy(&this->Xf, &Xf, sizeof(Xf));
331 memcpy(&this->Yf, &Yf, sizeof(Yf));
332
333 memcpy(&this->X, &X, sizeof(X));
334 memcpy(&this->Y, &Y, sizeof(Y));
335
336 const dword maxX[16] = {0x00000000, 0x00000001, 0x00000100, 0x00000101, 0x00010000, 0x00010001, 0x00010100, 0x00010101, 0x01000000, 0x01000001, 0x01000100, 0x01000101, 0x01010000, 0x01010001, 0x01010100, 0x01010101};
337 const dword maxY[16] = {0x00000000, 0x00000002, 0x00000200, 0x00000202, 0x00020000, 0x00020002, 0x00020200, 0x00020202, 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02020000, 0x02020002, 0x02020200, 0x02020202};
338 const dword maxZ[16] = {0x00000000, 0x00000004, 0x00000400, 0x00000404, 0x00040000, 0x00040004, 0x00040400, 0x00040404, 0x04000000, 0x04000004, 0x04000400, 0x04000404, 0x04040000, 0x04040004, 0x04040400, 0x04040404};
339 const dword minX[16] = {0x00000000, 0x00000008, 0x00000800, 0x00000808, 0x00080000, 0x00080008, 0x00080800, 0x00080808, 0x08000000, 0x08000008, 0x08000800, 0x08000808, 0x08080000, 0x08080008, 0x08080800, 0x08080808};
340 const dword minY[16] = {0x00000000, 0x00000010, 0x00001000, 0x00001010, 0x00100000, 0x00100010, 0x00101000, 0x00101010, 0x10000000, 0x10000010, 0x10001000, 0x10001010, 0x10100000, 0x10100010, 0x10101000, 0x10101010};
341 const dword minZ[16] = {0x00000000, 0x00000020, 0x00002000, 0x00002020, 0x00200000, 0x00200020, 0x00202000, 0x00202020, 0x20000000, 0x20000020, 0x20002000, 0x20002020, 0x20200000, 0x20200020, 0x20202000, 0x20202020};
342 const dword fini[16] = {0x00000000, 0x00000080, 0x00008000, 0x00008080, 0x00800000, 0x00800080, 0x00808000, 0x00808080, 0x80000000, 0x80000080, 0x80008000, 0x80008080, 0x80800000, 0x80800080, 0x80808000, 0x80808080};
343
344 memcpy(&this->maxX, &maxX, sizeof(maxX));
345 memcpy(&this->maxY, &maxY, sizeof(maxY));
346 memcpy(&this->maxZ, &maxZ, sizeof(maxZ));
347 memcpy(&this->minX, &minX, sizeof(minX));
348 memcpy(&this->minY, &minY, sizeof(minY));
349 memcpy(&this->minZ, &minZ, sizeof(minZ));
350 memcpy(&this->fini, &fini, sizeof(fini));
351
352 static const dword4 maxPos = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFE};
353
354 memcpy(&this->maxPos, &maxPos, sizeof(maxPos));
355
356 static const float4 unscaleByte = {1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF, 1.0f / 0xFF};
357 static const float4 unscaleSByte = {1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F, 1.0f / 0x7F};
358 static const float4 unscaleShort = {1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF, 1.0f / 0x7FFF};
359 static const float4 unscaleUShort = {1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF};
360 static const float4 unscaleInt = {1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF, 1.0f / 0x7FFFFFFF};
361 static const float4 unscaleUInt = {1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF, 1.0f / 0xFFFFFFFF};
362 static const float4 unscaleFixed = {1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000, 1.0f / 0x00010000};
363
364 memcpy(&this->unscaleByte, &unscaleByte, sizeof(unscaleByte));
365 memcpy(&this->unscaleSByte, &unscaleSByte, sizeof(unscaleSByte));
366 memcpy(&this->unscaleShort, &unscaleShort, sizeof(unscaleShort));
367 memcpy(&this->unscaleUShort, &unscaleUShort, sizeof(unscaleUShort));
368 memcpy(&this->unscaleInt, &unscaleInt, sizeof(unscaleInt));
369 memcpy(&this->unscaleUInt, &unscaleUInt, sizeof(unscaleUInt));
370 memcpy(&this->unscaleFixed, &unscaleFixed, sizeof(unscaleFixed));
371
372 for(int i = 0; i <= 0xFFFF; i++)
373 {
374 half2float[i] = (float)reinterpret_cast<half&>(i);
375 }
376 }
377}