1// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "SpirvShader.hpp"
16
17#include "SamplerCore.hpp"
18#include "Reactor/Coroutine.hpp"
19#include "System/Math.hpp"
20#include "Vulkan/VkBuffer.hpp"
21#include "Vulkan/VkBufferView.hpp"
22#include "Vulkan/VkDebug.hpp"
23#include "Vulkan/VkDescriptorSet.hpp"
24#include "Vulkan/VkPipelineLayout.hpp"
25#include "Vulkan/VkDescriptorSetLayout.hpp"
26#include "Vulkan/VkRenderPass.hpp"
27#include "Device/Config.hpp"
28
29#include <spirv/unified1/spirv.hpp>
30#include <spirv/unified1/GLSL.std.450.h>
31
32#include <queue>
33
34namespace
35{
36 constexpr float PI = 3.141592653589793f;
37
38 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
39 {
40 return rr::SignMask(ints) != 0;
41 }
42
43 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
44 {
45 return rr::SignMask(~ints) != 0;
46 }
47
48 template <typename T>
49 rr::RValue<T> AndAll(rr::RValue<T> const &mask)
50 {
51 T v1 = mask; // [x] [y] [z] [w]
52 T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw]
53 return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
54 }
55
56 template <typename T>
57 rr::RValue<T> OrAll(rr::RValue<T> const &mask)
58 {
59 T v1 = mask; // [x] [y] [z] [w]
60 T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw]
61 return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
62 }
63
64 rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val)
65 {
66 return rr::As<sw::SIMD::Float>((rr::As<sw::SIMD::UInt>(val) & sw::SIMD::UInt(0x80000000)) | sw::SIMD::UInt(0x3f800000));
67 }
68
69 // Returns the <whole, frac> of val.
70 // Both whole and frac will have the same sign as val.
71 std::pair<rr::RValue<sw::SIMD::Float>, rr::RValue<sw::SIMD::Float>>
72 Modf(rr::RValue<sw::SIMD::Float> const &val)
73 {
74 auto abs = Abs(val);
75 auto sign = Sign(val);
76 auto whole = Floor(abs) * sign;
77 auto frac = Frac(abs) * sign;
78 return std::make_pair(whole, frac);
79 }
80
81 // Returns the number of 1s in bits, per lane.
82 sw::SIMD::UInt CountBits(rr::RValue<sw::SIMD::UInt> const &bits)
83 {
84 // TODO: Add an intrinsic to reactor. Even if there isn't a
85 // single vector instruction, there may be target-dependent
86 // ways to make this faster.
87 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
88 sw::SIMD::UInt c = bits - ((bits >> 1) & sw::SIMD::UInt(0x55555555));
89 c = ((c >> 2) & sw::SIMD::UInt(0x33333333)) + (c & sw::SIMD::UInt(0x33333333));
90 c = ((c >> 4) + c) & sw::SIMD::UInt(0x0F0F0F0F);
91 c = ((c >> 8) + c) & sw::SIMD::UInt(0x00FF00FF);
92 c = ((c >> 16) + c) & sw::SIMD::UInt(0x0000FFFF);
93 return c;
94 }
95
96 // Returns 1 << bits.
97 // If the resulting bit overflows a 32 bit integer, 0 is returned.
98 rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits)
99 {
100 return ((sw::SIMD::UInt(1) << bits) & rr::CmpLT(bits, sw::SIMD::UInt(32)));
101 }
102
103 // Returns bitCount number of of 1's starting from the LSB.
104 rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount)
105 {
106 return NthBit32(bitCount) - sw::SIMD::UInt(1);
107 }
108
109 // Performs a fused-multiply add, returning a * b + c.
110 rr::RValue<sw::SIMD::Float> FMA(
111 rr::RValue<sw::SIMD::Float> const &a,
112 rr::RValue<sw::SIMD::Float> const &b,
113 rr::RValue<sw::SIMD::Float> const &c)
114 {
115 return a * b + c;
116 }
117
118 // Returns the exponent of the floating point number f.
119 // Assumes IEEE 754
120 rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f)
121 {
122 auto v = rr::As<sw::SIMD::UInt>(f);
123 return (sw::SIMD::Int((v >> sw::SIMD::UInt(23)) & sw::SIMD::UInt(0xFF)) - sw::SIMD::Int(126));
124 }
125
126 // Returns y if y < x; otherwise result is x.
127 // If one operand is a NaN, the other operand is the result.
128 // If both operands are NaN, the result is a NaN.
129 rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
130 {
131 using namespace rr;
132 auto xIsNan = IsNan(x);
133 auto yIsNan = IsNan(y);
134 return As<sw::SIMD::Float>(
135 // If neither are NaN, return min
136 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) |
137 // If one operand is a NaN, the other operand is the result
138 // If both operands are NaN, the result is a NaN.
139 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
140 (( xIsNan ) & As<sw::SIMD::Int>(y)));
141 }
142
143 // Returns y if y > x; otherwise result is x.
144 // If one operand is a NaN, the other operand is the result.
145 // If both operands are NaN, the result is a NaN.
146 rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
147 {
148 using namespace rr;
149 auto xIsNan = IsNan(x);
150 auto yIsNan = IsNan(y);
151 return As<sw::SIMD::Float>(
152 // If neither are NaN, return max
153 ((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) |
154 // If one operand is a NaN, the other operand is the result
155 // If both operands are NaN, the result is a NaN.
156 ((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) |
157 (( xIsNan ) & As<sw::SIMD::Int>(y)));
158 }
159
160 // Returns the determinant of a 2x2 matrix.
161 rr::RValue<sw::SIMD::Float> Determinant(
162 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
163 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
164 {
165 return a*d - b*c;
166 }
167
168 // Returns the determinant of a 3x3 matrix.
169 rr::RValue<sw::SIMD::Float> Determinant(
170 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
171 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
172 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
173 {
174 return a*e*i + b*f*g + c*d*h - c*e*g - b*d*i - a*f*h;
175 }
176
177 // Returns the determinant of a 4x4 matrix.
178 rr::RValue<sw::SIMD::Float> Determinant(
179 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
180 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
181 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
182 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
183 {
184 return a * Determinant(f, g, h,
185 j, k, l,
186 n, o, p) -
187 b * Determinant(e, g, h,
188 i, k, l,
189 m, o, p) +
190 c * Determinant(e, f, h,
191 i, j, l,
192 m, n, p) -
193 d * Determinant(e, f, g,
194 i, j, k,
195 m, n, o);
196 }
197
198 // Returns the inverse of a 2x2 matrix.
199 std::array<rr::RValue<sw::SIMD::Float>, 4> MatrixInverse(
200 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
201 rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
202 {
203 auto s = sw::SIMD::Float(1.0f) / Determinant(a, b, c, d);
204 return {{s*d, -s*b, -s*c, s*a}};
205 }
206
207 // Returns the inverse of a 3x3 matrix.
208 std::array<rr::RValue<sw::SIMD::Float>, 9> MatrixInverse(
209 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
210 rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
211 rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
212 {
213 auto s = sw::SIMD::Float(1.0f) / Determinant(
214 a, b, c,
215 d, e, f,
216 g, h, i); // TODO: duplicate arithmetic calculating the det and below.
217
218 return {{
219 s * (e*i - f*h), s * (c*h - b*i), s * (b*f - c*e),
220 s * (f*g - d*i), s * (a*i - c*g), s * (c*d - a*f),
221 s * (d*h - e*g), s * (b*g - a*h), s * (a*e - b*d),
222 }};
223 }
224
225 // Returns the inverse of a 4x4 matrix.
226 std::array<rr::RValue<sw::SIMD::Float>, 16> MatrixInverse(
227 rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
228 rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
229 rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
230 rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
231 {
232 auto s = sw::SIMD::Float(1.0f) / Determinant(
233 a, b, c, d,
234 e, f, g, h,
235 i, j, k, l,
236 m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
237
238 auto kplo = k*p - l*o, jpln = j*p - l*n, jokn = j*o - k*n;
239 auto gpho = g*p - h*o, fphn = f*p - h*n, fogn = f*o - g*n;
240 auto glhk = g*l - h*k, flhj = f*l - h*j, fkgj = f*k - g*j;
241 auto iplm = i*p - l*m, iokm = i*o - k*m, ephm = e*p - h*m;
242 auto eogm = e*o - g*m, elhi = e*l - h*i, ekgi = e*k - g*i;
243 auto injm = i*n - j*m, enfm = e*n - f*m, ejfi = e*j - f*i;
244
245 return {{
246 s * ( f * kplo - g * jpln + h * jokn),
247 s * (-b * kplo + c * jpln - d * jokn),
248 s * ( b * gpho - c * fphn + d * fogn),
249 s * (-b * glhk + c * flhj - d * fkgj),
250
251 s * (-e * kplo + g * iplm - h * iokm),
252 s * ( a * kplo - c * iplm + d * iokm),
253 s * (-a * gpho + c * ephm - d * eogm),
254 s * ( a * glhk - c * elhi + d * ekgi),
255
256 s * ( e * jpln - f * iplm + h * injm),
257 s * (-a * jpln + b * iplm - d * injm),
258 s * ( a * fphn - b * ephm + d * enfm),
259 s * (-a * flhj + b * elhi - d * ejfi),
260
261 s * (-e * jokn + f * iokm - g * injm),
262 s * ( a * jokn - b * iokm + c * injm),
263 s * (-a * fogn + b * eogm - c * enfm),
264 s * ( a * fkgj - b * ekgi + c * ejfi),
265 }};
266 }
267
268
269 sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
270 {
271 p *= sw::SIMD::Width;
272 p.staticOffsets[0] += 0 * sizeof(float);
273 p.staticOffsets[1] += 1 * sizeof(float);
274 p.staticOffsets[2] += 2 * sizeof(float);
275 p.staticOffsets[3] += 3 * sizeof(float);
276 return p;
277 }
278
279 VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
280 {
281 switch (format)
282 {
283 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
284 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
285 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
286 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
287 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
288 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
289 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
290 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
291 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
292 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
293 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
294 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
295 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
296 case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
297 case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
298 case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
299
300 default:
301 UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
302 return VK_FORMAT_UNDEFINED;
303 }
304 }
305
306 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
307 {
308 sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
309 sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
310
311 sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
312
313 return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
314 }
315
316} // anonymous namespace
317
318namespace sw
319{
320 namespace SIMD
321 {
322
323 template<typename T>
324 T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
325 {
326 using EL = typename Element<T>::type;
327
328 if (ptr.isStaticallyInBounds(sizeof(float), robustness))
329 {
330 // All elements are statically known to be in-bounds.
331 // We can avoid costly conditional on masks.
332
333 if (ptr.hasStaticSequentialOffsets(sizeof(float)))
334 {
335 // Offsets are sequential. Perform regular load.
336 return rr::Load(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), alignment, atomic, order);
337 }
338 if (ptr.hasStaticEqualOffsets())
339 {
340 // Load one, replicate.
341 return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment));
342 }
343 }
344 else
345 {
346 switch(robustness)
347 {
348 case OutOfBoundsBehavior::Nullify:
349 case OutOfBoundsBehavior::RobustBufferAccess:
350 case OutOfBoundsBehavior::UndefinedValue:
351 mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
352 break;
353 case OutOfBoundsBehavior::UndefinedBehavior:
354 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
355 break;
356 }
357 }
358
359 auto offsets = ptr.offsets();
360
361 if (!atomic && order == std::memory_order_relaxed)
362 {
363 if (ptr.hasStaticEqualOffsets())
364 {
365 // Load one, replicate.
366 // Be careful of the case where the post-bounds-check mask
367 // is 0, in which case we must not load.
368 T out = T(0);
369 If(AnyTrue(mask))
370 {
371 EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment);
372 out = T(el);
373 }
374 return out;
375 }
376
377 bool zeroMaskedLanes = true;
378 switch(robustness)
379 {
380 case OutOfBoundsBehavior::Nullify:
381 case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
382 zeroMaskedLanes = true;
383 break;
384 case OutOfBoundsBehavior::UndefinedValue:
385 case OutOfBoundsBehavior::UndefinedBehavior:
386 zeroMaskedLanes = false;
387 break;
388 }
389
390 if (ptr.hasStaticSequentialOffsets(sizeof(float)))
391 {
392 return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, zeroMaskedLanes);
393 }
394
395 return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes);
396 }
397 else
398 {
399 T out;
400 auto anyLanesDisabled = AnyFalse(mask);
401 If(ptr.hasEqualOffsets() && !anyLanesDisabled)
402 {
403 // Load one, replicate.
404 auto offset = Extract(offsets, 0);
405 out = T(rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order));
406 }
407 Else If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
408 {
409 // Load all elements in a single SIMD instruction.
410 auto offset = Extract(offsets, 0);
411 out = rr::Load(rr::Pointer<T>(&ptr.base[offset]), alignment, atomic, order);
412 }
413 Else
414 {
415 // Divergent offsets or masked lanes.
416 out = T(0);
417 for (int i = 0; i < SIMD::Width; i++)
418 {
419 If(Extract(mask, i) != 0)
420 {
421 auto offset = Extract(offsets, i);
422 auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order);
423 out = Insert(out, el, i);
424 }
425 }
426 }
427 return out;
428 }
429 }
430
431 template<typename T>
432 void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
433 {
434 using EL = typename Element<T>::type;
435 constexpr size_t alignment = sizeof(float);
436 auto offsets = ptr.offsets();
437
438 switch(robustness)
439 {
440 case OutOfBoundsBehavior::Nullify:
441 case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
442 case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
443 mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
444 break;
445 case OutOfBoundsBehavior::UndefinedBehavior:
446 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
447 break;
448 }
449
450 if (!atomic && order == std::memory_order_relaxed)
451 {
452 if (ptr.hasStaticEqualOffsets())
453 {
454 If (AnyTrue(mask))
455 {
456 // All equal. One of these writes will win -- elect the winning lane.
457 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
458 auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx));
459 auto maskedVal = As<SIMD::Int>(val) & elect;
460 auto scalarVal = Extract(maskedVal, 0) |
461 Extract(maskedVal, 1) |
462 Extract(maskedVal, 2) |
463 Extract(maskedVal, 3);
464 *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment) = As<EL>(scalarVal);
465 }
466 }
467 else if (ptr.hasStaticSequentialOffsets(sizeof(float)))
468 {
469 if (ptr.isStaticallyInBounds(sizeof(float), robustness))
470 {
471 // Pointer has no elements OOB, and the store is not atomic.
472 // Perform a RMW.
473 auto p = rr::Pointer<SIMD::Int>(ptr.base + ptr.staticOffsets[0], alignment);
474 auto prev = *p;
475 *p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
476 }
477 else
478 {
479 rr::MaskedStore(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), val, mask, alignment);
480 }
481 }
482 else
483 {
484 rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, alignment);
485 }
486 }
487 else
488 {
489 auto anyLanesDisabled = AnyFalse(mask);
490 If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
491 {
492 // Store all elements in a single SIMD instruction.
493 auto offset = Extract(offsets, 0);
494 Store(val, rr::Pointer<T>(&ptr.base[offset]), alignment, atomic, order);
495 }
496 Else
497 {
498 // Divergent offsets or masked lanes.
499 for (int i = 0; i < SIMD::Width; i++)
500 {
501 If(Extract(mask, i) != 0)
502 {
503 auto offset = Extract(offsets, i);
504 rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order);
505 }
506 }
507 }
508 }
509 }
510
511 } // namespace SIMD
512
513 SpirvShader::SpirvShader(
514 uint32_t codeSerialID,
515 VkShaderStageFlagBits pipelineStage,
516 const char *entryPointName,
517 InsnStore const &insns,
518 const vk::RenderPass *renderPass,
519 uint32_t subpassIndex,
520 bool robustBufferAccess)
521 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
522 outputs{MAX_INTERFACE_COMPONENTS},
523 codeSerialID(codeSerialID),
524 robustBufferAccess(robustBufferAccess)
525 {
526 ASSERT(insns.size() > 0);
527
528 if (renderPass)
529 {
530 // capture formats of any input attachments present
531 auto subpass = renderPass->getSubpass(subpassIndex);
532 inputAttachmentFormats.reserve(subpass.inputAttachmentCount);
533 for (auto i = 0u; i < subpass.inputAttachmentCount; i++)
534 {
535 auto attachmentIndex = subpass.pInputAttachments[i].attachment;
536 inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
537 ? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
538 }
539 }
540
541 // Simplifying assumptions (to be satisfied by earlier transformations)
542 // - The only input/output OpVariables present are those used by the entrypoint
543
544 Function::ID currentFunction;
545 Block::ID currentBlock;
546 InsnIterator blockStart;
547
548 for (auto insn : *this)
549 {
550 spv::Op opcode = insn.opcode();
551
552 switch (opcode)
553 {
554 case spv::OpEntryPoint:
555 {
556 executionModel = spv::ExecutionModel(insn.word(1));
557 auto id = Function::ID(insn.word(2));
558 auto name = insn.string(3);
559 auto stage = executionModelToStage(executionModel);
560 if (stage == pipelineStage && strcmp(name, entryPointName) == 0)
561 {
562 ASSERT_MSG(entryPoint == 0, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
563 entryPoint = id;
564 }
565 break;
566 }
567
568 case spv::OpExecutionMode:
569 ProcessExecutionMode(insn);
570 break;
571
572 case spv::OpDecorate:
573 {
574 TypeOrObjectID targetId = insn.word(1);
575 auto decoration = static_cast<spv::Decoration>(insn.word(2));
576 uint32_t value = insn.wordCount() > 3 ? insn.word(3) : 0;
577
578 decorations[targetId].Apply(decoration, value);
579
580 switch(decoration)
581 {
582 case spv::DecorationDescriptorSet:
583 descriptorDecorations[targetId].DescriptorSet = value;
584 break;
585 case spv::DecorationBinding:
586 descriptorDecorations[targetId].Binding = value;
587 break;
588 case spv::DecorationInputAttachmentIndex:
589 descriptorDecorations[targetId].InputAttachmentIndex = value;
590 break;
591 default:
592 // Only handling descriptor decorations here.
593 break;
594 }
595
596 if (decoration == spv::DecorationCentroid)
597 modes.NeedsCentroid = true;
598 break;
599 }
600
601 case spv::OpMemberDecorate:
602 {
603 Type::ID targetId = insn.word(1);
604 auto memberIndex = insn.word(2);
605 auto decoration = static_cast<spv::Decoration>(insn.word(3));
606 uint32_t value = insn.wordCount() > 4 ? insn.word(4) : 0;
607
608 auto &d = memberDecorations[targetId];
609 if (memberIndex >= d.size())
610 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
611
612 d[memberIndex].Apply(decoration, value);
613
614 if (decoration == spv::DecorationCentroid)
615 modes.NeedsCentroid = true;
616 break;
617 }
618
619 case spv::OpDecorationGroup:
620 // Nothing to do here. We don't need to record the definition of the group; we'll just have
621 // the bundle of decorations float around. If we were to ever walk the decorations directly,
622 // we might think about introducing this as a real Object.
623 break;
624
625 case spv::OpGroupDecorate:
626 {
627 uint32_t group = insn.word(1);
628 auto const &groupDecorations = decorations[group];
629 auto const &descriptorGroupDecorations = descriptorDecorations[group];
630 for (auto i = 2u; i < insn.wordCount(); i++)
631 {
632 // Remaining operands are targets to apply the group to.
633 uint32_t target = insn.word(i);
634 decorations[target].Apply(groupDecorations);
635 descriptorDecorations[target].Apply(descriptorGroupDecorations);
636 }
637
638 break;
639 }
640
641 case spv::OpGroupMemberDecorate:
642 {
643 auto const &srcDecorations = decorations[insn.word(1)];
644 for (auto i = 2u; i < insn.wordCount(); i += 2)
645 {
646 // remaining operands are pairs of <id>, literal for members to apply to.
647 auto &d = memberDecorations[insn.word(i)];
648 auto memberIndex = insn.word(i + 1);
649 if (memberIndex >= d.size())
650 d.resize(memberIndex + 1); // on demand resize, see above...
651 d[memberIndex].Apply(srcDecorations);
652 }
653 break;
654 }
655
656 case spv::OpLabel:
657 {
658 ASSERT(currentBlock.value() == 0);
659 currentBlock = Block::ID(insn.word(1));
660 blockStart = insn;
661 break;
662 }
663
664 // Branch Instructions (subset of Termination Instructions):
665 case spv::OpBranch:
666 case spv::OpBranchConditional:
667 case spv::OpSwitch:
668 case spv::OpReturn:
669 // fallthrough
670
671 // Termination instruction:
672 case spv::OpKill:
673 case spv::OpUnreachable:
674 {
675 ASSERT(currentBlock.value() != 0);
676 ASSERT(currentFunction.value() != 0);
677
678 auto blockEnd = insn; blockEnd++;
679 functions[currentFunction].blocks[currentBlock] = Block(blockStart, blockEnd);
680 currentBlock = Block::ID(0);
681
682 if (opcode == spv::OpKill)
683 {
684 modes.ContainsKill = true;
685 }
686 break;
687 }
688
689 case spv::OpLoopMerge:
690 case spv::OpSelectionMerge:
691 break; // Nothing to do in analysis pass.
692
693 case spv::OpTypeVoid:
694 case spv::OpTypeBool:
695 case spv::OpTypeInt:
696 case spv::OpTypeFloat:
697 case spv::OpTypeVector:
698 case spv::OpTypeMatrix:
699 case spv::OpTypeImage:
700 case spv::OpTypeSampler:
701 case spv::OpTypeSampledImage:
702 case spv::OpTypeArray:
703 case spv::OpTypeRuntimeArray:
704 case spv::OpTypeStruct:
705 case spv::OpTypePointer:
706 case spv::OpTypeFunction:
707 DeclareType(insn);
708 break;
709
710 case spv::OpVariable:
711 {
712 Type::ID typeId = insn.word(1);
713 Object::ID resultId = insn.word(2);
714 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
715
716 auto &object = defs[resultId];
717 object.kind = Object::Kind::Pointer;
718 object.definition = insn;
719 object.type = typeId;
720
721 ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
722 ASSERT(getType(typeId).storageClass == storageClass);
723
724 switch (storageClass)
725 {
726 case spv::StorageClassInput:
727 case spv::StorageClassOutput:
728 ProcessInterfaceVariable(object);
729 break;
730
731 case spv::StorageClassUniform:
732 case spv::StorageClassStorageBuffer:
733 object.kind = Object::Kind::DescriptorSet;
734 break;
735
736 case spv::StorageClassPushConstant:
737 case spv::StorageClassPrivate:
738 case spv::StorageClassFunction:
739 case spv::StorageClassUniformConstant:
740 break; // Correctly handled.
741
742 case spv::StorageClassWorkgroup:
743 {
744 auto &elTy = getType(getType(typeId).element);
745 auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
746 workgroupMemory.allocate(resultId, sizeInBytes);
747 object.kind = Object::Kind::Pointer;
748 break;
749 }
750 case spv::StorageClassAtomicCounter:
751 case spv::StorageClassImage:
752 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
753 break;
754
755 case spv::StorageClassCrossWorkgroup:
756 UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
757 break;
758
759 case spv::StorageClassGeneric:
760 UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
761 break;
762
763 default:
764 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
765 break;
766 }
767 break;
768 }
769
770 case spv::OpConstant:
771 case spv::OpSpecConstant:
772 CreateConstant(insn).constantValue[0] = insn.word(3);
773 break;
774 case spv::OpConstantFalse:
775 case spv::OpSpecConstantFalse:
776 CreateConstant(insn).constantValue[0] = 0; // Represent Boolean false as zero.
777 break;
778 case spv::OpConstantTrue:
779 case spv::OpSpecConstantTrue:
780 CreateConstant(insn).constantValue[0] = ~0u; // Represent Boolean true as all bits set.
781 break;
782 case spv::OpConstantNull:
783 case spv::OpUndef:
784 {
785 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
786 // OpConstantNull forms a constant of arbitrary type, all zeros.
787 auto &object = CreateConstant(insn);
788 auto &objectTy = getType(object.type);
789 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
790 {
791 object.constantValue[i] = 0;
792 }
793 break;
794 }
795 case spv::OpConstantComposite:
796 case spv::OpSpecConstantComposite:
797 {
798 auto &object = CreateConstant(insn);
799 auto offset = 0u;
800 for (auto i = 0u; i < insn.wordCount() - 3; i++)
801 {
802 auto &constituent = getObject(insn.word(i + 3));
803 auto &constituentTy = getType(constituent.type);
804 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
805 {
806 object.constantValue[offset++] = constituent.constantValue[j];
807 }
808 }
809
810 auto objectId = Object::ID(insn.word(2));
811 auto decorationsIt = decorations.find(objectId);
812 if (decorationsIt != decorations.end() &&
813 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
814 {
815 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
816 // Decorating an object with the WorkgroupSize built-in
817 // decoration will make that object contain the dimensions
818 // of a local workgroup. If an object is decorated with the
819 // WorkgroupSize decoration, this must take precedence over
820 // any execution mode set for LocalSize.
821 // The object decorated with WorkgroupSize must be declared
822 // as a three-component vector of 32-bit integers.
823 ASSERT(getType(object.type).sizeInComponents == 3);
824 modes.WorkgroupSizeX = object.constantValue[0];
825 modes.WorkgroupSizeY = object.constantValue[1];
826 modes.WorkgroupSizeZ = object.constantValue[2];
827 }
828 break;
829 }
830 case spv::OpSpecConstantOp:
831 EvalSpecConstantOp(insn);
832 break;
833
834 case spv::OpCapability:
835 {
836 auto capability = static_cast<spv::Capability>(insn.word(1));
837 switch (capability)
838 {
839 case spv::CapabilityMatrix: capabilities.Matrix = true; break;
840 case spv::CapabilityShader: capabilities.Shader = true; break;
841 case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
842 case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
843 case spv::CapabilityImage1D: capabilities.Image1D = true; break;
844 case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
845 case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
846 case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
847 case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
848 case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
849 case spv::CapabilityMultiView: capabilities.MultiView = true; break;
850 case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
851 case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
852 case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
853 case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
854 case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
855 case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
856 default:
857 UNSUPPORTED("Unsupported capability %u", insn.word(1));
858 }
859 break; // Various capabilities will be declared, but none affect our code generation at this point.
860 }
861
862 case spv::OpMemoryModel:
863 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
864
865 case spv::OpFunction:
866 {
867 auto functionId = Function::ID(insn.word(2));
868 ASSERT_MSG(currentFunction == 0, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
869 currentFunction = functionId;
870 auto &function = functions[functionId];
871 function.result = Type::ID(insn.word(1));
872 function.type = Type::ID(insn.word(4));
873 // Scan forward to find the function's label.
874 for (auto it = insn; it != end() && function.entry == 0; it++)
875 {
876 switch (it.opcode())
877 {
878 case spv::OpFunction:
879 case spv::OpFunctionParameter:
880 break;
881 case spv::OpLabel:
882 function.entry = Block::ID(it.word(1));
883 break;
884 default:
885 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
886 }
887 }
888 ASSERT_MSG(function.entry != 0, "Function<%d> has no label", currentFunction.value());
889 break;
890 }
891
892 case spv::OpFunctionEnd:
893 currentFunction = 0;
894 break;
895
896 case spv::OpExtInstImport:
897 {
898 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
899 // Valid shaders will not attempt to import any other instruction sets.
900 auto ext = insn.string(2);
901 if (0 != strcmp("GLSL.std.450", ext))
902 {
903 UNSUPPORTED("SPIR-V Extension: %s", ext);
904 }
905 break;
906 }
907 case spv::OpName:
908 case spv::OpMemberName:
909 case spv::OpSource:
910 case spv::OpSourceContinued:
911 case spv::OpSourceExtension:
912 case spv::OpLine:
913 case spv::OpNoLine:
914 case spv::OpModuleProcessed:
915 case spv::OpString:
916 // No semantic impact
917 break;
918
919 case spv::OpFunctionParameter:
920 // These should have all been removed by preprocessing passes. If we see them here,
921 // our assumptions are wrong and we will probably generate wrong code.
922 UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
923 break;
924
925 case spv::OpFunctionCall:
926 // TODO(b/141246700): Add full support for spv::OpFunctionCall
927 break;
928
929 case spv::OpFConvert:
930 UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
931 break;
932
933 case spv::OpSConvert:
934 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
935 break;
936
937 case spv::OpUConvert:
938 UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
939 break;
940
941 case spv::OpLoad:
942 case spv::OpAccessChain:
943 case spv::OpInBoundsAccessChain:
944 case spv::OpSampledImage:
945 case spv::OpImage:
946 {
947 // Propagate the descriptor decorations to the result.
948 Object::ID resultId = insn.word(2);
949 Object::ID pointerId = insn.word(3);
950 const auto &d = descriptorDecorations.find(pointerId);
951
952 if(d != descriptorDecorations.end())
953 {
954 descriptorDecorations[resultId] = d->second;
955 }
956
957 DefineResult(insn);
958
959 if (opcode == spv::OpAccessChain || opcode == spv::OpInBoundsAccessChain)
960 {
961 Decorations dd{};
962 ApplyDecorationsForAccessChain(&dd, &descriptorDecorations[resultId], pointerId, insn.wordCount() - 4, insn.wordPointer(4));
963 // Note: offset is the one thing that does *not* propagate, as the access chain accounts for it.
964 dd.HasOffset = false;
965 decorations[resultId].Apply(dd);
966 }
967 }
968 break;
969
970 case spv::OpCompositeConstruct:
971 case spv::OpCompositeInsert:
972 case spv::OpCompositeExtract:
973 case spv::OpVectorShuffle:
974 case spv::OpVectorTimesScalar:
975 case spv::OpMatrixTimesScalar:
976 case spv::OpMatrixTimesVector:
977 case spv::OpVectorTimesMatrix:
978 case spv::OpMatrixTimesMatrix:
979 case spv::OpOuterProduct:
980 case spv::OpTranspose:
981 case spv::OpVectorExtractDynamic:
982 case spv::OpVectorInsertDynamic:
983 // Unary ops
984 case spv::OpNot:
985 case spv::OpBitFieldInsert:
986 case spv::OpBitFieldSExtract:
987 case spv::OpBitFieldUExtract:
988 case spv::OpBitReverse:
989 case spv::OpBitCount:
990 case spv::OpSNegate:
991 case spv::OpFNegate:
992 case spv::OpLogicalNot:
993 case spv::OpQuantizeToF16:
994 // Binary ops
995 case spv::OpIAdd:
996 case spv::OpISub:
997 case spv::OpIMul:
998 case spv::OpSDiv:
999 case spv::OpUDiv:
1000 case spv::OpFAdd:
1001 case spv::OpFSub:
1002 case spv::OpFMul:
1003 case spv::OpFDiv:
1004 case spv::OpFMod:
1005 case spv::OpFRem:
1006 case spv::OpFOrdEqual:
1007 case spv::OpFUnordEqual:
1008 case spv::OpFOrdNotEqual:
1009 case spv::OpFUnordNotEqual:
1010 case spv::OpFOrdLessThan:
1011 case spv::OpFUnordLessThan:
1012 case spv::OpFOrdGreaterThan:
1013 case spv::OpFUnordGreaterThan:
1014 case spv::OpFOrdLessThanEqual:
1015 case spv::OpFUnordLessThanEqual:
1016 case spv::OpFOrdGreaterThanEqual:
1017 case spv::OpFUnordGreaterThanEqual:
1018 case spv::OpSMod:
1019 case spv::OpSRem:
1020 case spv::OpUMod:
1021 case spv::OpIEqual:
1022 case spv::OpINotEqual:
1023 case spv::OpUGreaterThan:
1024 case spv::OpSGreaterThan:
1025 case spv::OpUGreaterThanEqual:
1026 case spv::OpSGreaterThanEqual:
1027 case spv::OpULessThan:
1028 case spv::OpSLessThan:
1029 case spv::OpULessThanEqual:
1030 case spv::OpSLessThanEqual:
1031 case spv::OpShiftRightLogical:
1032 case spv::OpShiftRightArithmetic:
1033 case spv::OpShiftLeftLogical:
1034 case spv::OpBitwiseOr:
1035 case spv::OpBitwiseXor:
1036 case spv::OpBitwiseAnd:
1037 case spv::OpLogicalOr:
1038 case spv::OpLogicalAnd:
1039 case spv::OpLogicalEqual:
1040 case spv::OpLogicalNotEqual:
1041 case spv::OpUMulExtended:
1042 case spv::OpSMulExtended:
1043 case spv::OpIAddCarry:
1044 case spv::OpISubBorrow:
1045 case spv::OpDot:
1046 case spv::OpConvertFToU:
1047 case spv::OpConvertFToS:
1048 case spv::OpConvertSToF:
1049 case spv::OpConvertUToF:
1050 case spv::OpBitcast:
1051 case spv::OpSelect:
1052 case spv::OpExtInst:
1053 case spv::OpIsInf:
1054 case spv::OpIsNan:
1055 case spv::OpAny:
1056 case spv::OpAll:
1057 case spv::OpDPdx:
1058 case spv::OpDPdxCoarse:
1059 case spv::OpDPdy:
1060 case spv::OpDPdyCoarse:
1061 case spv::OpFwidth:
1062 case spv::OpFwidthCoarse:
1063 case spv::OpDPdxFine:
1064 case spv::OpDPdyFine:
1065 case spv::OpFwidthFine:
1066 case spv::OpAtomicLoad:
1067 case spv::OpAtomicIAdd:
1068 case spv::OpAtomicISub:
1069 case spv::OpAtomicSMin:
1070 case spv::OpAtomicSMax:
1071 case spv::OpAtomicUMin:
1072 case spv::OpAtomicUMax:
1073 case spv::OpAtomicAnd:
1074 case spv::OpAtomicOr:
1075 case spv::OpAtomicXor:
1076 case spv::OpAtomicIIncrement:
1077 case spv::OpAtomicIDecrement:
1078 case spv::OpAtomicExchange:
1079 case spv::OpAtomicCompareExchange:
1080 case spv::OpPhi:
1081 case spv::OpImageSampleImplicitLod:
1082 case spv::OpImageSampleExplicitLod:
1083 case spv::OpImageSampleDrefImplicitLod:
1084 case spv::OpImageSampleDrefExplicitLod:
1085 case spv::OpImageSampleProjImplicitLod:
1086 case spv::OpImageSampleProjExplicitLod:
1087 case spv::OpImageSampleProjDrefImplicitLod:
1088 case spv::OpImageSampleProjDrefExplicitLod:
1089 case spv::OpImageGather:
1090 case spv::OpImageDrefGather:
1091 case spv::OpImageFetch:
1092 case spv::OpImageQuerySizeLod:
1093 case spv::OpImageQuerySize:
1094 case spv::OpImageQueryLod:
1095 case spv::OpImageQueryLevels:
1096 case spv::OpImageQuerySamples:
1097 case spv::OpImageRead:
1098 case spv::OpImageTexelPointer:
1099 case spv::OpGroupNonUniformElect:
1100 case spv::OpGroupNonUniformAll:
1101 case spv::OpGroupNonUniformAny:
1102 case spv::OpGroupNonUniformAllEqual:
1103 case spv::OpGroupNonUniformBroadcast:
1104 case spv::OpGroupNonUniformBroadcastFirst:
1105 case spv::OpGroupNonUniformBallot:
1106 case spv::OpGroupNonUniformInverseBallot:
1107 case spv::OpGroupNonUniformBallotBitExtract:
1108 case spv::OpGroupNonUniformBallotBitCount:
1109 case spv::OpGroupNonUniformBallotFindLSB:
1110 case spv::OpGroupNonUniformBallotFindMSB:
1111 case spv::OpGroupNonUniformShuffle:
1112 case spv::OpGroupNonUniformShuffleXor:
1113 case spv::OpGroupNonUniformShuffleUp:
1114 case spv::OpGroupNonUniformShuffleDown:
1115 case spv::OpCopyObject:
1116 case spv::OpArrayLength:
1117 // Instructions that yield an intermediate value or divergent pointer
1118 DefineResult(insn);
1119 break;
1120
1121 case spv::OpStore:
1122 case spv::OpAtomicStore:
1123 case spv::OpImageWrite:
1124 case spv::OpCopyMemory:
1125 case spv::OpMemoryBarrier:
1126 // Don't need to do anything during analysis pass
1127 break;
1128
1129 case spv::OpControlBarrier:
1130 modes.ContainsControlBarriers = true;
1131 break;
1132
1133 case spv::OpExtension:
1134 {
1135 auto ext = insn.string(1);
1136 // Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
1137 // extension per Appendix A, `Vulkan Environment for SPIR-V`.
1138 if (!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
1139 if (!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
1140 if (!strcmp(ext, "SPV_KHR_16bit_storage")) break;
1141 if (!strcmp(ext, "SPV_KHR_variable_pointers")) break;
1142 if (!strcmp(ext, "SPV_KHR_device_group")) break;
1143 if (!strcmp(ext, "SPV_KHR_multiview")) break;
1144 UNSUPPORTED("SPIR-V Extension: %s", ext);
1145 break;
1146 }
1147
1148 default:
1149 UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
1150 }
1151 }
1152
1153 ASSERT_MSG(entryPoint != 0, "Entry point '%s' not found", entryPointName);
1154 for (auto &it : functions)
1155 {
1156 it.second.AssignBlockFields();
1157 }
1158 }
1159
1160 void SpirvShader::DeclareType(InsnIterator insn)
1161 {
1162 Type::ID resultId = insn.word(1);
1163
1164 auto &type = types[resultId];
1165 type.definition = insn;
1166 type.sizeInComponents = ComputeTypeSize(insn);
1167
1168 // A structure is a builtin block if it has a builtin
1169 // member. All members of such a structure are builtins.
1170 switch (insn.opcode())
1171 {
1172 case spv::OpTypeStruct:
1173 {
1174 auto d = memberDecorations.find(resultId);
1175 if (d != memberDecorations.end())
1176 {
1177 for (auto &m : d->second)
1178 {
1179 if (m.HasBuiltIn)
1180 {
1181 type.isBuiltInBlock = true;
1182 break;
1183 }
1184 }
1185 }
1186 break;
1187 }
1188 case spv::OpTypePointer:
1189 {
1190 Type::ID elementTypeId = insn.word(3);
1191 type.element = elementTypeId;
1192 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
1193 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
1194 break;
1195 }
1196 case spv::OpTypeVector:
1197 case spv::OpTypeMatrix:
1198 case spv::OpTypeArray:
1199 case spv::OpTypeRuntimeArray:
1200 {
1201 Type::ID elementTypeId = insn.word(2);
1202 type.element = elementTypeId;
1203 break;
1204 }
1205 default:
1206 break;
1207 }
1208 }
1209
1210 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
1211 {
1212 Type::ID typeId = insn.word(1);
1213 Object::ID resultId = insn.word(2);
1214 auto &object = defs[resultId];
1215 auto &objectTy = getType(typeId);
1216 object.type = typeId;
1217 object.kind = Object::Kind::Constant;
1218 object.definition = insn;
1219 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
1220 return object;
1221 }
1222
1223 void SpirvShader::ProcessInterfaceVariable(Object &object)
1224 {
1225 auto &objectTy = getType(object.type);
1226 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
1227
1228 ASSERT(objectTy.opcode() == spv::OpTypePointer);
1229 auto pointeeTy = getType(objectTy.element);
1230
1231 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
1232 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
1233
1234 ASSERT(object.opcode() == spv::OpVariable);
1235 Object::ID resultId = object.definition.word(2);
1236
1237 if (objectTy.isBuiltInBlock)
1238 {
1239 // walk the builtin block, registering each of its members separately.
1240 auto m = memberDecorations.find(objectTy.element);
1241 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
1242 auto &structType = pointeeTy.definition;
1243 auto offset = 0u;
1244 auto word = 2u;
1245 for (auto &member : m->second)
1246 {
1247 auto &memberType = getType(structType.word(word));
1248
1249 if (member.HasBuiltIn)
1250 {
1251 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
1252 }
1253
1254 offset += memberType.sizeInComponents;
1255 ++word;
1256 }
1257 return;
1258 }
1259
1260 auto d = decorations.find(resultId);
1261 if (d != decorations.end() && d->second.HasBuiltIn)
1262 {
1263 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
1264 }
1265 else
1266 {
1267 object.kind = Object::Kind::InterfaceVariable;
1268 VisitInterface(resultId,
1269 [&userDefinedInterface](Decorations const &d, AttribType type) {
1270 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
1271 auto scalarSlot = (d.Location << 2) | d.Component;
1272 ASSERT(scalarSlot >= 0 &&
1273 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
1274
1275 auto &slot = userDefinedInterface[scalarSlot];
1276 slot.Type = type;
1277 slot.Flat = d.Flat;
1278 slot.NoPerspective = d.NoPerspective;
1279 slot.Centroid = d.Centroid;
1280 });
1281 }
1282 }
1283
1284 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
1285 {
1286 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
1287 switch (mode)
1288 {
1289 case spv::ExecutionModeEarlyFragmentTests:
1290 modes.EarlyFragmentTests = true;
1291 break;
1292 case spv::ExecutionModeDepthReplacing:
1293 modes.DepthReplacing = true;
1294 break;
1295 case spv::ExecutionModeDepthGreater:
1296 modes.DepthGreater = true;
1297 break;
1298 case spv::ExecutionModeDepthLess:
1299 modes.DepthLess = true;
1300 break;
1301 case spv::ExecutionModeDepthUnchanged:
1302 modes.DepthUnchanged = true;
1303 break;
1304 case spv::ExecutionModeLocalSize:
1305 modes.WorkgroupSizeX = insn.word(3);
1306 modes.WorkgroupSizeY = insn.word(4);
1307 modes.WorkgroupSizeZ = insn.word(5);
1308 break;
1309 case spv::ExecutionModeOriginUpperLeft:
1310 // This is always the case for a Vulkan shader. Do nothing.
1311 break;
1312 default:
1313 UNREACHABLE("Execution mode: %d", int(mode));
1314 }
1315 }
1316
1317 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
1318 {
1319 // Types are always built from the bottom up (with the exception of forward ptrs, which
1320 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
1321 // already been described (and so their sizes determined)
1322 switch (insn.opcode())
1323 {
1324 case spv::OpTypeVoid:
1325 case spv::OpTypeSampler:
1326 case spv::OpTypeImage:
1327 case spv::OpTypeSampledImage:
1328 case spv::OpTypeFunction:
1329 case spv::OpTypeRuntimeArray:
1330 // Objects that don't consume any space.
1331 // Descriptor-backed objects currently only need exist at compile-time.
1332 // Runtime arrays don't appear in places where their size would be interesting
1333 return 0;
1334
1335 case spv::OpTypeBool:
1336 case spv::OpTypeFloat:
1337 case spv::OpTypeInt:
1338 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
1339 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
1340 return 1;
1341
1342 case spv::OpTypeVector:
1343 case spv::OpTypeMatrix:
1344 // Vectors and matrices both consume element count * element size.
1345 return getType(insn.word(2)).sizeInComponents * insn.word(3);
1346
1347 case spv::OpTypeArray:
1348 {
1349 // Element count * element size. Array sizes come from constant ids.
1350 auto arraySize = GetConstScalarInt(insn.word(3));
1351 return getType(insn.word(2)).sizeInComponents * arraySize;
1352 }
1353
1354 case spv::OpTypeStruct:
1355 {
1356 uint32_t size = 0;
1357 for (uint32_t i = 2u; i < insn.wordCount(); i++)
1358 {
1359 size += getType(insn.word(i)).sizeInComponents;
1360 }
1361 return size;
1362 }
1363
1364 case spv::OpTypePointer:
1365 // Runtime representation of a pointer is a per-lane index.
1366 // Note: clients are expected to look through the pointer if they want the pointee size instead.
1367 return 1;
1368
1369 default:
1370 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
1371 return 0;
1372 }
1373 }
1374
1375 bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
1376 {
1377 switch (storageClass)
1378 {
1379 case spv::StorageClassUniform:
1380 case spv::StorageClassStorageBuffer:
1381 case spv::StorageClassImage:
1382 return false;
1383 default:
1384 return true;
1385 }
1386 }
1387
1388 bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
1389 {
1390 switch (storageClass)
1391 {
1392 case spv::StorageClassUniform:
1393 case spv::StorageClassStorageBuffer:
1394 case spv::StorageClassPushConstant:
1395 return true;
1396 default:
1397 return false;
1398 }
1399 }
1400
1401 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
1402 {
1403 switch (storageClass)
1404 {
1405 case spv::StorageClassUniform:
1406 case spv::StorageClassStorageBuffer:
1407 case spv::StorageClassPushConstant:
1408 case spv::StorageClassWorkgroup:
1409 case spv::StorageClassImage:
1410 return false;
1411 default:
1412 return true;
1413 }
1414 }
1415
1416 template<typename F>
1417 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
1418 {
1419 // Recursively walks variable definition and its type tree, taking into account
1420 // any explicit Location or Component decorations encountered; where explicit
1421 // Locations or Components are not specified, assigns them sequentially.
1422 // Collected decorations are carried down toward the leaves and across
1423 // siblings; Effect of decorations intentionally does not flow back up the tree.
1424 //
1425 // F is a functor to be called with the effective decoration set for every component.
1426 //
1427 // Returns the next available location, and calls f().
1428
1429 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
1430
1431 ApplyDecorationsForId(&d, id);
1432
1433 auto const &obj = getType(id);
1434 switch(obj.opcode())
1435 {
1436 case spv::OpTypePointer:
1437 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
1438 case spv::OpTypeMatrix:
1439 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
1440 {
1441 // consumes same components of N consecutive locations
1442 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
1443 }
1444 return d.Location;
1445 case spv::OpTypeVector:
1446 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
1447 {
1448 // consumes N consecutive components in the same location
1449 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
1450 }
1451 return d.Location + 1;
1452 case spv::OpTypeFloat:
1453 f(d, ATTRIBTYPE_FLOAT);
1454 return d.Location + 1;
1455 case spv::OpTypeInt:
1456 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
1457 return d.Location + 1;
1458 case spv::OpTypeBool:
1459 f(d, ATTRIBTYPE_UINT);
1460 return d.Location + 1;
1461 case spv::OpTypeStruct:
1462 {
1463 // iterate over members, which may themselves have Location/Component decorations
1464 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
1465 {
1466 ApplyDecorationsForIdMember(&d, id, i);
1467 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
1468 d.Component = 0; // Implicit locations always have component=0
1469 }
1470 return d.Location;
1471 }
1472 case spv::OpTypeArray:
1473 {
1474 auto arraySize = GetConstScalarInt(obj.definition.word(3));
1475 for (auto i = 0u; i < arraySize; i++)
1476 {
1477 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
1478 }
1479 return d.Location;
1480 }
1481 default:
1482 // Intentionally partial; most opcodes do not participate in type hierarchies
1483 return 0;
1484 }
1485 }
1486
1487 template<typename F>
1488 void SpirvShader::VisitInterface(Object::ID id, F f) const
1489 {
1490 // Walk a variable definition and call f for each component in it.
1491 Decorations d{};
1492 ApplyDecorationsForId(&d, id);
1493
1494 auto def = getObject(id).definition;
1495 ASSERT(def.opcode() == spv::OpVariable);
1496 VisitInterfaceInner<F>(def.word(1), d, f);
1497 }
1498
1499 template<typename F>
1500 void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, F f) const
1501 {
1502 // Walk a type tree in an explicitly laid out storage class, calling
1503 // a functor for each scalar element within the object.
1504
1505 // The functor's first parameter is the index of the scalar element;
1506 // the second parameter is the offset (in bytes) from the base of the
1507 // object.
1508
1509 ApplyDecorationsForId(&d, id);
1510 auto const &type = getType(id);
1511
1512 if (d.HasOffset)
1513 {
1514 offset += d.Offset;
1515 d.HasOffset = false;
1516 }
1517
1518 switch (type.opcode())
1519 {
1520 case spv::OpTypePointer:
1521 VisitMemoryObjectInner<F>(type.definition.word(3), d, index, offset, f);
1522 break;
1523 case spv::OpTypeInt:
1524 case spv::OpTypeFloat:
1525 f(index++, offset);
1526 break;
1527 case spv::OpTypeVector:
1528 {
1529 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
1530 for (auto i = 0u; i < type.definition.word(3); i++)
1531 {
1532 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
1533 }
1534 break;
1535 }
1536 case spv::OpTypeMatrix:
1537 {
1538 auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
1539 d.InsideMatrix = true;
1540 for (auto i = 0u; i < type.definition.word(3); i++)
1541 {
1542 ASSERT(d.HasMatrixStride);
1543 VisitMemoryObjectInner(type.definition.word(2), d, index, offset + columnStride * i, f);
1544 }
1545 break;
1546 }
1547 case spv::OpTypeStruct:
1548 for (auto i = 0u; i < type.definition.wordCount() - 2; i++)
1549 {
1550 ApplyDecorationsForIdMember(&d, id, i);
1551 VisitMemoryObjectInner<F>(type.definition.word(i + 2), d, index, offset, f);
1552 }
1553 break;
1554 case spv::OpTypeArray:
1555 {
1556 auto arraySize = GetConstScalarInt(type.definition.word(3));
1557 for (auto i = 0u; i < arraySize; i++)
1558 {
1559 ASSERT(d.HasArrayStride);
1560 VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
1561 }
1562 break;
1563 }
1564 default:
1565 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1566 }
1567 }
1568
1569 template<typename F>
1570 void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, F f) const
1571 {
1572 auto typeId = getObject(id).type;
1573 auto const & type = getType(typeId);
1574 if (IsExplicitLayout(type.storageClass))
1575 {
1576 Decorations d{};
1577 ApplyDecorationsForId(&d, id);
1578 uint32_t index = 0;
1579 VisitMemoryObjectInner<F>(typeId, d, index, 0, f);
1580 }
1581 else
1582 {
1583 // Objects without explicit layout are tightly packed.
1584 for (auto i = 0u; i < getType(type.element).sizeInComponents; i++)
1585 {
1586 f(i, i * sizeof(float));
1587 }
1588 }
1589 }
1590
1591 SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, int arrayIndex, EmitState const *state) const
1592 {
1593 auto routine = state->routine;
1594 auto &object = getObject(id);
1595 switch (object.kind)
1596 {
1597 case Object::Kind::Pointer:
1598 case Object::Kind::InterfaceVariable:
1599 return state->getPointer(id);
1600
1601 case Object::Kind::DescriptorSet:
1602 {
1603 const auto &d = descriptorDecorations.at(id);
1604 ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
1605 ASSERT(d.Binding >= 0);
1606
1607 auto set = state->getPointer(id);
1608
1609 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1610 ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
1611 int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
1612
1613 Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
1614 Pointer<Byte> data = *Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void*
1615 Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
1616 if (setLayout->isBindingDynamic(d.Binding))
1617 {
1618 uint32_t dynamicBindingIndex =
1619 routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
1620 setLayout->getDynamicDescriptorOffset(d.Binding) +
1621 arrayIndex;
1622 Int offset = routine->descriptorDynamicOffsets[dynamicBindingIndex];
1623 Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
1624 return SIMD::Pointer(data + offset, Min(size, robustnessSize - offset));
1625 }
1626 else
1627 {
1628 return SIMD::Pointer(data, size);
1629 }
1630 }
1631
1632 default:
1633 UNREACHABLE("Invalid pointer kind %d", int(object.kind));
1634 return SIMD::Pointer(Pointer<Byte>(), 0);
1635 }
1636 }
1637
1638 void SpirvShader::ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const
1639 {
1640 ApplyDecorationsForId(d, baseId);
1641 auto &baseObject = getObject(baseId);
1642 ApplyDecorationsForId(d, baseObject.type);
1643 auto typeId = getType(baseObject.type).element;
1644
1645 for (auto i = 0u; i < numIndexes; i++)
1646 {
1647 ApplyDecorationsForId(d, typeId);
1648 auto & type = getType(typeId);
1649 switch (type.opcode())
1650 {
1651 case spv::OpTypeStruct:
1652 {
1653 int memberIndex = GetConstScalarInt(indexIds[i]);
1654 ApplyDecorationsForIdMember(d, typeId, memberIndex);
1655 typeId = type.definition.word(2u + memberIndex);
1656 break;
1657 }
1658 case spv::OpTypeArray:
1659 case spv::OpTypeRuntimeArray:
1660 if (dd->InputAttachmentIndex >= 0)
1661 {
1662 dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
1663 }
1664 typeId = type.element;
1665 break;
1666 case spv::OpTypeVector:
1667 typeId = type.element;
1668 break;
1669 case spv::OpTypeMatrix:
1670 typeId = type.element;
1671 d->InsideMatrix = true;
1672 break;
1673 default:
1674 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
1675 }
1676 }
1677 }
1678
1679 SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const
1680 {
1681 // Produce a offset into external memory in sizeof(float) units
1682
1683 auto &baseObject = getObject(baseId);
1684 Type::ID typeId = getType(baseObject.type).element;
1685 Decorations d = {};
1686 ApplyDecorationsForId(&d, baseObject.type);
1687
1688 uint32_t arrayIndex = 0;
1689 if (baseObject.kind == Object::Kind::DescriptorSet)
1690 {
1691 auto type = getType(typeId).definition.opcode();
1692 if (type == spv::OpTypeArray || type == spv::OpTypeRuntimeArray)
1693 {
1694 ASSERT(getObject(indexIds[0]).kind == Object::Kind::Constant);
1695 arrayIndex = GetConstScalarInt(indexIds[0]);
1696
1697 numIndexes--;
1698 indexIds++;
1699 typeId = getType(typeId).element;
1700 }
1701 }
1702
1703 auto ptr = GetPointerToData(baseId, arrayIndex, state);
1704
1705 int constantOffset = 0;
1706
1707 for (auto i = 0u; i < numIndexes; i++)
1708 {
1709 auto & type = getType(typeId);
1710 ApplyDecorationsForId(&d, typeId);
1711
1712 switch (type.definition.opcode())
1713 {
1714 case spv::OpTypeStruct:
1715 {
1716 int memberIndex = GetConstScalarInt(indexIds[i]);
1717 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
1718 ASSERT(d.HasOffset);
1719 constantOffset += d.Offset;
1720 typeId = type.definition.word(2u + memberIndex);
1721 break;
1722 }
1723 case spv::OpTypeArray:
1724 case spv::OpTypeRuntimeArray:
1725 {
1726 // TODO: b/127950082: Check bounds.
1727 ASSERT(d.HasArrayStride);
1728 auto & obj = getObject(indexIds[i]);
1729 if (obj.kind == Object::Kind::Constant)
1730 {
1731 constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
1732 }
1733 else
1734 {
1735 ptr += SIMD::Int(d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(0);
1736 }
1737 typeId = type.element;
1738 break;
1739 }
1740 case spv::OpTypeMatrix:
1741 {
1742 // TODO: b/127950082: Check bounds.
1743 ASSERT(d.HasMatrixStride);
1744 d.InsideMatrix = true;
1745 auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
1746 auto & obj = getObject(indexIds[i]);
1747 if (obj.kind == Object::Kind::Constant)
1748 {
1749 constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
1750 }
1751 else
1752 {
1753 ptr += SIMD::Int(columnStride) * state->getIntermediate(indexIds[i]).Int(0);
1754 }
1755 typeId = type.element;
1756 break;
1757 }
1758 case spv::OpTypeVector:
1759 {
1760 auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
1761 auto & obj = getObject(indexIds[i]);
1762 if (obj.kind == Object::Kind::Constant)
1763 {
1764 constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
1765 }
1766 else
1767 {
1768 ptr += SIMD::Int(elemStride) * state->getIntermediate(indexIds[i]).Int(0);
1769 }
1770 typeId = type.element;
1771 break;
1772 }
1773 default:
1774 UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
1775 }
1776 }
1777
1778 ptr += constantOffset;
1779 return ptr;
1780 }
1781
1782 SIMD::Pointer SpirvShader::WalkAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const
1783 {
1784 // TODO: avoid doing per-lane work in some cases if we can?
1785 auto routine = state->routine;
1786 auto &baseObject = getObject(baseId);
1787 Type::ID typeId = getType(baseObject.type).element;
1788
1789 auto ptr = state->getPointer(baseId);
1790
1791 int constantOffset = 0;
1792
1793 for (auto i = 0u; i < numIndexes; i++)
1794 {
1795 auto & type = getType(typeId);
1796 switch(type.opcode())
1797 {
1798 case spv::OpTypeStruct:
1799 {
1800 int memberIndex = GetConstScalarInt(indexIds[i]);
1801 int offsetIntoStruct = 0;
1802 for (auto j = 0; j < memberIndex; j++) {
1803 auto memberType = type.definition.word(2u + j);
1804 offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
1805 }
1806 constantOffset += offsetIntoStruct;
1807 typeId = type.definition.word(2u + memberIndex);
1808 break;
1809 }
1810
1811 case spv::OpTypeVector:
1812 case spv::OpTypeMatrix:
1813 case spv::OpTypeArray:
1814 case spv::OpTypeRuntimeArray:
1815 {
1816 // TODO: b/127950082: Check bounds.
1817 if (getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
1818 {
1819 // indexing into an array of descriptors.
1820 auto &obj = getObject(indexIds[i]);
1821 if (obj.kind != Object::Kind::Constant)
1822 {
1823 UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
1824 }
1825
1826 auto d = descriptorDecorations.at(baseId);
1827 ASSERT(d.DescriptorSet >= 0);
1828 ASSERT(d.Binding >= 0);
1829 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1830 auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
1831 ptr.base += stride * GetConstScalarInt(indexIds[i]);
1832 }
1833 else
1834 {
1835 auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
1836 auto & obj = getObject(indexIds[i]);
1837 if (obj.kind == Object::Kind::Constant)
1838 {
1839 ptr += stride * GetConstScalarInt(indexIds[i]);
1840 }
1841 else
1842 {
1843 ptr += SIMD::Int(stride) * state->getIntermediate(indexIds[i]).Int(0);
1844 }
1845 }
1846 typeId = type.element;
1847 break;
1848 }
1849
1850 default:
1851 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1852 }
1853 }
1854
1855 if (constantOffset != 0)
1856 {
1857 ptr += constantOffset;
1858 }
1859 return ptr;
1860 }
1861
1862 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
1863 {
1864 uint32_t componentOffset = 0;
1865
1866 for (auto i = 0u; i < numIndexes; i++)
1867 {
1868 auto & type = getType(typeId);
1869 switch(type.opcode())
1870 {
1871 case spv::OpTypeStruct:
1872 {
1873 int memberIndex = indexes[i];
1874 int offsetIntoStruct = 0;
1875 for (auto j = 0; j < memberIndex; j++) {
1876 auto memberType = type.definition.word(2u + j);
1877 offsetIntoStruct += getType(memberType).sizeInComponents;
1878 }
1879 componentOffset += offsetIntoStruct;
1880 typeId = type.definition.word(2u + memberIndex);
1881 break;
1882 }
1883
1884 case spv::OpTypeVector:
1885 case spv::OpTypeMatrix:
1886 case spv::OpTypeArray:
1887 {
1888 auto elementType = type.definition.word(2);
1889 auto stride = getType(elementType).sizeInComponents;
1890 componentOffset += stride * indexes[i];
1891 typeId = elementType;
1892 break;
1893 }
1894
1895 default:
1896 UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1897 }
1898 }
1899
1900 return componentOffset;
1901 }
1902
1903 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
1904 {
1905 switch (decoration)
1906 {
1907 case spv::DecorationLocation:
1908 HasLocation = true;
1909 Location = static_cast<int32_t>(arg);
1910 break;
1911 case spv::DecorationComponent:
1912 HasComponent = true;
1913 Component = arg;
1914 break;
1915 case spv::DecorationBuiltIn:
1916 HasBuiltIn = true;
1917 BuiltIn = static_cast<spv::BuiltIn>(arg);
1918 break;
1919 case spv::DecorationFlat:
1920 Flat = true;
1921 break;
1922 case spv::DecorationNoPerspective:
1923 NoPerspective = true;
1924 break;
1925 case spv::DecorationCentroid:
1926 Centroid = true;
1927 break;
1928 case spv::DecorationBlock:
1929 Block = true;
1930 break;
1931 case spv::DecorationBufferBlock:
1932 BufferBlock = true;
1933 break;
1934 case spv::DecorationOffset:
1935 HasOffset = true;
1936 Offset = static_cast<int32_t>(arg);
1937 break;
1938 case spv::DecorationArrayStride:
1939 HasArrayStride = true;
1940 ArrayStride = static_cast<int32_t>(arg);
1941 break;
1942 case spv::DecorationMatrixStride:
1943 HasMatrixStride = true;
1944 MatrixStride = static_cast<int32_t>(arg);
1945 break;
1946 case spv::DecorationRelaxedPrecision:
1947 RelaxedPrecision = true;
1948 break;
1949 case spv::DecorationRowMajor:
1950 HasRowMajor = true;
1951 RowMajor = true;
1952 break;
1953 case spv::DecorationColMajor:
1954 HasRowMajor = true;
1955 RowMajor = false;
1956 default:
1957 // Intentionally partial, there are many decorations we just don't care about.
1958 break;
1959 }
1960 }
1961
1962 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1963 {
1964 // Apply a decoration group to this set of decorations
1965 if (src.HasBuiltIn)
1966 {
1967 HasBuiltIn = true;
1968 BuiltIn = src.BuiltIn;
1969 }
1970
1971 if (src.HasLocation)
1972 {
1973 HasLocation = true;
1974 Location = src.Location;
1975 }
1976
1977 if (src.HasComponent)
1978 {
1979 HasComponent = true;
1980 Component = src.Component;
1981 }
1982
1983 if (src.HasOffset)
1984 {
1985 HasOffset = true;
1986 Offset = src.Offset;
1987 }
1988
1989 if (src.HasArrayStride)
1990 {
1991 HasArrayStride = true;
1992 ArrayStride = src.ArrayStride;
1993 }
1994
1995 if (src.HasMatrixStride)
1996 {
1997 HasMatrixStride = true;
1998 MatrixStride = src.MatrixStride;
1999 }
2000
2001 if (src.HasRowMajor)
2002 {
2003 HasRowMajor = true;
2004 RowMajor = src.RowMajor;
2005 }
2006
2007 Flat |= src.Flat;
2008 NoPerspective |= src.NoPerspective;
2009 Centroid |= src.Centroid;
2010 Block |= src.Block;
2011 BufferBlock |= src.BufferBlock;
2012 RelaxedPrecision |= src.RelaxedPrecision;
2013 InsideMatrix |= src.InsideMatrix;
2014 }
2015
2016 void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src)
2017 {
2018 if(src.DescriptorSet >= 0)
2019 {
2020 DescriptorSet = src.DescriptorSet;
2021 }
2022
2023 if(src.Binding >= 0)
2024 {
2025 Binding = src.Binding;
2026 }
2027
2028 if (src.InputAttachmentIndex >= 0)
2029 {
2030 InputAttachmentIndex = src.InputAttachmentIndex;
2031 }
2032 }
2033
2034 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
2035 {
2036 auto it = decorations.find(id);
2037 if (it != decorations.end())
2038 d->Apply(it->second);
2039 }
2040
2041 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
2042 {
2043 auto it = memberDecorations.find(id);
2044 if (it != memberDecorations.end() && member < it->second.size())
2045 {
2046 d->Apply(it->second[member]);
2047 }
2048 }
2049
2050 void SpirvShader::DefineResult(const InsnIterator &insn)
2051 {
2052 Type::ID typeId = insn.word(1);
2053 Object::ID resultId = insn.word(2);
2054 auto &object = defs[resultId];
2055 object.type = typeId;
2056
2057 switch (getType(typeId).opcode())
2058 {
2059 case spv::OpTypePointer:
2060 case spv::OpTypeImage:
2061 case spv::OpTypeSampledImage:
2062 case spv::OpTypeSampler:
2063 object.kind = Object::Kind::Pointer;
2064 break;
2065
2066 default:
2067 object.kind = Object::Kind::Intermediate;
2068 }
2069
2070 object.definition = insn;
2071 }
2072
2073 OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
2074 {
2075 switch(storageClass)
2076 {
2077 case spv::StorageClassUniform:
2078 case spv::StorageClassStorageBuffer:
2079 // Buffer resource access. robustBufferAccess feature applies.
2080 return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
2081 : OutOfBoundsBehavior::UndefinedBehavior;
2082
2083 case spv::StorageClassImage:
2084 return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
2085
2086 case spv::StorageClassInput:
2087 if(executionModel == spv::ExecutionModelVertex)
2088 {
2089 // Vertex attributes follow robustBufferAccess rules.
2090 return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
2091 : OutOfBoundsBehavior::UndefinedBehavior;
2092 }
2093 // Fall through to default case.
2094 default:
2095 // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
2096 // TODO(b/131224163): Optimize cases statically known to be within bounds.
2097 return OutOfBoundsBehavior::UndefinedValue;
2098 }
2099
2100 return OutOfBoundsBehavior::Nullify;
2101 }
2102
2103 // emit-time
2104
2105 void SpirvShader::emitProlog(SpirvRoutine *routine) const
2106 {
2107 for (auto insn : *this)
2108 {
2109 switch (insn.opcode())
2110 {
2111 case spv::OpVariable:
2112 {
2113 Type::ID resultPointerTypeId = insn.word(1);
2114 auto resultPointerType = getType(resultPointerTypeId);
2115 auto pointeeType = getType(resultPointerType.element);
2116
2117 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
2118 {
2119 Object::ID resultId = insn.word(2);
2120 routine->createVariable(resultId, pointeeType.sizeInComponents);
2121 }
2122 break;
2123 }
2124 case spv::OpPhi:
2125 {
2126 auto type = getType(insn.word(1));
2127 Object::ID resultId = insn.word(2);
2128 routine->phis.emplace(resultId, SpirvRoutine::Variable(type.sizeInComponents));
2129 break;
2130 }
2131
2132 case spv::OpImageDrefGather:
2133 case spv::OpImageFetch:
2134 case spv::OpImageGather:
2135 case spv::OpImageQueryLod:
2136 case spv::OpImageSampleDrefExplicitLod:
2137 case spv::OpImageSampleDrefImplicitLod:
2138 case spv::OpImageSampleExplicitLod:
2139 case spv::OpImageSampleImplicitLod:
2140 case spv::OpImageSampleProjDrefExplicitLod:
2141 case spv::OpImageSampleProjDrefImplicitLod:
2142 case spv::OpImageSampleProjExplicitLod:
2143 case spv::OpImageSampleProjImplicitLod:
2144 {
2145 Object::ID resultId = insn.word(2);
2146 routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
2147 break;
2148 }
2149
2150 default:
2151 // Nothing else produces interface variables, so can all be safely ignored.
2152 break;
2153 }
2154 }
2155 }
2156
2157 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
2158 {
2159 EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
2160
2161 // Emit everything up to the first label
2162 // TODO: Separate out dispatch of block from non-block instructions?
2163 for (auto insn : *this)
2164 {
2165 if (insn.opcode() == spv::OpLabel)
2166 {
2167 break;
2168 }
2169 EmitInstruction(insn, &state);
2170 }
2171
2172 // Emit all the blocks starting from entryPoint.
2173 EmitBlocks(getFunction(entryPoint).entry, &state);
2174 }
2175
2176 void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
2177 {
2178 auto oldPending = state->pending;
2179 auto &function = getFunction(state->function);
2180
2181 std::deque<Block::ID> pending;
2182 state->pending = &pending;
2183 pending.push_front(id);
2184 while (pending.size() > 0)
2185 {
2186 auto id = pending.front();
2187
2188 auto const &block = function.getBlock(id);
2189 if (id == ignore)
2190 {
2191 pending.pop_front();
2192 continue;
2193 }
2194
2195 // Ensure all dependency blocks have been generated.
2196 auto depsDone = true;
2197 function.ForeachBlockDependency(id, [&](Block::ID dep)
2198 {
2199 if (state->visited.count(dep) == 0)
2200 {
2201 state->pending->push_front(dep);
2202 depsDone = false;
2203 }
2204 });
2205
2206 if (!depsDone)
2207 {
2208 continue;
2209 }
2210
2211 pending.pop_front();
2212
2213 state->block = id;
2214
2215 switch (block.kind)
2216 {
2217 case Block::Simple:
2218 case Block::StructuredBranchConditional:
2219 case Block::UnstructuredBranchConditional:
2220 case Block::StructuredSwitch:
2221 case Block::UnstructuredSwitch:
2222 EmitNonLoop(state);
2223 break;
2224
2225 case Block::Loop:
2226 EmitLoop(state);
2227 break;
2228
2229 default:
2230 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
2231 }
2232 }
2233
2234 state->pending = oldPending;
2235 }
2236
2237 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
2238 {
2239 for (auto insn = begin; insn != end; insn++)
2240 {
2241 auto res = EmitInstruction(insn, state);
2242 switch (res)
2243 {
2244 case EmitResult::Continue:
2245 continue;
2246 case EmitResult::Terminator:
2247 break;
2248 default:
2249 UNREACHABLE("Unexpected EmitResult %d", int(res));
2250 break;
2251 }
2252 }
2253 }
2254
2255 void SpirvShader::EmitNonLoop(EmitState *state) const
2256 {
2257 auto &function = getFunction(state->function);
2258 auto blockId = state->block;
2259 auto block = function.getBlock(blockId);
2260
2261 if (!state->visited.emplace(blockId).second)
2262 {
2263 return; // Already generated this block.
2264 }
2265
2266 if (blockId != function.entry)
2267 {
2268 // Set the activeLaneMask.
2269 SIMD::Int activeLaneMask(0);
2270 for (auto in : block.ins)
2271 {
2272 auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
2273 activeLaneMask |= inMask;
2274 }
2275 state->setActiveLaneMask(activeLaneMask);
2276 }
2277
2278 EmitInstructions(block.begin(), block.end(), state);
2279
2280 for (auto out : block.outs)
2281 {
2282 if (state->visited.count(out) == 0)
2283 {
2284 state->pending->push_back(out);
2285 }
2286 }
2287 }
2288
2289 void SpirvShader::EmitLoop(EmitState *state) const
2290 {
2291 auto &function = getFunction(state->function);
2292 auto blockId = state->block;
2293 auto &block = function.getBlock(blockId);
2294 auto mergeBlockId = block.mergeBlock;
2295 auto &mergeBlock = function.getBlock(mergeBlockId);
2296
2297 if (!state->visited.emplace(blockId).second)
2298 {
2299 return; // Already emitted this loop.
2300 }
2301
2302 // Gather all the blocks that make up the loop.
2303 std::unordered_set<Block::ID> loopBlocks;
2304 loopBlocks.emplace(block.mergeBlock);
2305 function.TraverseReachableBlocks(blockId, loopBlocks);
2306
2307 // incomingBlocks are block ins that are not back-edges.
2308 std::unordered_set<Block::ID> incomingBlocks;
2309 for (auto in : block.ins)
2310 {
2311 if (loopBlocks.count(in) == 0)
2312 {
2313 incomingBlocks.emplace(in);
2314 }
2315 }
2316
2317 // Emit the loop phi instructions, and initialize them with a value from
2318 // the incoming blocks.
2319 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
2320 {
2321 if (insn.opcode() == spv::OpPhi)
2322 {
2323 StorePhi(blockId, insn, state, incomingBlocks);
2324 }
2325 }
2326
2327 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
2328 // This is initialized with the incoming active lane masks.
2329 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
2330 for (auto in : incomingBlocks)
2331 {
2332 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
2333 }
2334
2335 // mergeActiveLaneMasks contains edge lane masks for the merge block.
2336 // This is the union of all edge masks across all iterations of the loop.
2337 std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
2338 for (auto in : function.getBlock(mergeBlockId).ins)
2339 {
2340 mergeActiveLaneMasks.emplace(in, SIMD::Int(0));
2341 }
2342
2343 // Create the loop basic blocks
2344 auto headerBasicBlock = Nucleus::createBasicBlock();
2345 auto mergeBasicBlock = Nucleus::createBasicBlock();
2346
2347 // Start emitting code inside the loop.
2348 Nucleus::createBr(headerBasicBlock);
2349 Nucleus::setInsertBlock(headerBasicBlock);
2350
2351 // Load the active lane mask.
2352 state->setActiveLaneMask(loopActiveLaneMask);
2353
2354 // Emit the non-phi loop header block's instructions.
2355 for (auto insn = block.begin(); insn != block.end(); insn++)
2356 {
2357 if (insn.opcode() == spv::OpPhi)
2358 {
2359 LoadPhi(insn, state);
2360 }
2361 else
2362 {
2363 EmitInstruction(insn, state);
2364 }
2365 }
2366
2367 // Emit all blocks between the loop header and the merge block, but
2368 // don't emit the merge block yet.
2369 for (auto out : block.outs)
2370 {
2371 EmitBlocks(out, state, mergeBlockId);
2372 }
2373
2374 // Restore current block id after emitting loop blocks.
2375 state->block = blockId;
2376
2377 // Rebuild the loopActiveLaneMask from the loop back edges.
2378 loopActiveLaneMask = SIMD::Int(0);
2379 for (auto in : block.ins)
2380 {
2381 if (function.ExistsPath(blockId, in, mergeBlockId))
2382 {
2383 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
2384 }
2385 }
2386
2387 // Add active lanes to the merge lane mask.
2388 for (auto in : function.getBlock(mergeBlockId).ins)
2389 {
2390 auto edge = Block::Edge{in, mergeBlockId};
2391 auto it = state->edgeActiveLaneMasks.find(edge);
2392 if (it != state->edgeActiveLaneMasks.end())
2393 {
2394 mergeActiveLaneMasks[in] |= it->second;
2395 }
2396 }
2397
2398 // Update loop phi values.
2399 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
2400 {
2401 if (insn.opcode() == spv::OpPhi)
2402 {
2403 StorePhi(blockId, insn, state, loopBlocks);
2404 }
2405 }
2406
2407 // Use the [loop -> merge] active lane masks to update the phi values in
2408 // the merge block. We need to do this to handle divergent control flow
2409 // in the loop.
2410 //
2411 // Consider the following:
2412 //
2413 // int phi_source = 0;
2414 // for (uint i = 0; i < 4; i++)
2415 // {
2416 // phi_source = 0;
2417 // if (gl_GlobalInvocationID.x % 4 == i) // divergent control flow
2418 // {
2419 // phi_source = 42; // single lane assignment.
2420 // break; // activeLaneMask for [loop->merge] is active for a single lane.
2421 // }
2422 // // -- we are here --
2423 // }
2424 // // merge block
2425 // int phi = phi_source; // OpPhi
2426 //
2427 // In this example, with each iteration of the loop, phi_source will
2428 // only have a single lane assigned. However by 'phi' value in the merge
2429 // block needs to be assigned the union of all the per-lane assignments
2430 // of phi_source when that lane exited the loop.
2431 for (auto insn = mergeBlock.begin(); insn != mergeBlock.end(); insn++)
2432 {
2433 if (insn.opcode() == spv::OpPhi)
2434 {
2435 StorePhi(mergeBlockId, insn, state, loopBlocks);
2436 }
2437 }
2438
2439 // Loop body now done.
2440 // If any lanes are still active, jump back to the loop header,
2441 // otherwise jump to the merge block.
2442 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
2443
2444 // Continue emitting from the merge block.
2445 Nucleus::setInsertBlock(mergeBasicBlock);
2446 state->pending->push_back(mergeBlockId);
2447 for (auto it : mergeActiveLaneMasks)
2448 {
2449 state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
2450 }
2451 }
2452
2453 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
2454 {
2455 auto opcode = insn.opcode();
2456
2457 switch (opcode)
2458 {
2459 case spv::OpTypeVoid:
2460 case spv::OpTypeInt:
2461 case spv::OpTypeFloat:
2462 case spv::OpTypeBool:
2463 case spv::OpTypeVector:
2464 case spv::OpTypeArray:
2465 case spv::OpTypeRuntimeArray:
2466 case spv::OpTypeMatrix:
2467 case spv::OpTypeStruct:
2468 case spv::OpTypePointer:
2469 case spv::OpTypeFunction:
2470 case spv::OpTypeImage:
2471 case spv::OpTypeSampledImage:
2472 case spv::OpTypeSampler:
2473 case spv::OpExecutionMode:
2474 case spv::OpMemoryModel:
2475 case spv::OpFunction:
2476 case spv::OpFunctionEnd:
2477 case spv::OpConstant:
2478 case spv::OpConstantNull:
2479 case spv::OpConstantTrue:
2480 case spv::OpConstantFalse:
2481 case spv::OpConstantComposite:
2482 case spv::OpSpecConstant:
2483 case spv::OpSpecConstantTrue:
2484 case spv::OpSpecConstantFalse:
2485 case spv::OpSpecConstantComposite:
2486 case spv::OpSpecConstantOp:
2487 case spv::OpUndef:
2488 case spv::OpExtension:
2489 case spv::OpCapability:
2490 case spv::OpEntryPoint:
2491 case spv::OpExtInstImport:
2492 case spv::OpDecorate:
2493 case spv::OpMemberDecorate:
2494 case spv::OpGroupDecorate:
2495 case spv::OpGroupMemberDecorate:
2496 case spv::OpDecorationGroup:
2497 case spv::OpName:
2498 case spv::OpMemberName:
2499 case spv::OpSource:
2500 case spv::OpSourceContinued:
2501 case spv::OpSourceExtension:
2502 case spv::OpLine:
2503 case spv::OpNoLine:
2504 case spv::OpModuleProcessed:
2505 case spv::OpString:
2506 // Nothing to do at emit time. These are either fully handled at analysis time,
2507 // or don't require any work at all.
2508 return EmitResult::Continue;
2509
2510 case spv::OpLabel:
2511 return EmitResult::Continue;
2512
2513 case spv::OpVariable:
2514 return EmitVariable(insn, state);
2515
2516 case spv::OpLoad:
2517 case spv::OpAtomicLoad:
2518 return EmitLoad(insn, state);
2519
2520 case spv::OpStore:
2521 case spv::OpAtomicStore:
2522 return EmitStore(insn, state);
2523
2524 case spv::OpAtomicIAdd:
2525 case spv::OpAtomicISub:
2526 case spv::OpAtomicSMin:
2527 case spv::OpAtomicSMax:
2528 case spv::OpAtomicUMin:
2529 case spv::OpAtomicUMax:
2530 case spv::OpAtomicAnd:
2531 case spv::OpAtomicOr:
2532 case spv::OpAtomicXor:
2533 case spv::OpAtomicIIncrement:
2534 case spv::OpAtomicIDecrement:
2535 case spv::OpAtomicExchange:
2536 return EmitAtomicOp(insn, state);
2537
2538 case spv::OpAtomicCompareExchange:
2539 return EmitAtomicCompareExchange(insn, state);
2540
2541 case spv::OpAccessChain:
2542 case spv::OpInBoundsAccessChain:
2543 return EmitAccessChain(insn, state);
2544
2545 case spv::OpCompositeConstruct:
2546 return EmitCompositeConstruct(insn, state);
2547
2548 case spv::OpCompositeInsert:
2549 return EmitCompositeInsert(insn, state);
2550
2551 case spv::OpCompositeExtract:
2552 return EmitCompositeExtract(insn, state);
2553
2554 case spv::OpVectorShuffle:
2555 return EmitVectorShuffle(insn, state);
2556
2557 case spv::OpVectorExtractDynamic:
2558 return EmitVectorExtractDynamic(insn, state);
2559
2560 case spv::OpVectorInsertDynamic:
2561 return EmitVectorInsertDynamic(insn, state);
2562
2563 case spv::OpVectorTimesScalar:
2564 case spv::OpMatrixTimesScalar:
2565 return EmitVectorTimesScalar(insn, state);
2566
2567 case spv::OpMatrixTimesVector:
2568 return EmitMatrixTimesVector(insn, state);
2569
2570 case spv::OpVectorTimesMatrix:
2571 return EmitVectorTimesMatrix(insn, state);
2572
2573 case spv::OpMatrixTimesMatrix:
2574 return EmitMatrixTimesMatrix(insn, state);
2575
2576 case spv::OpOuterProduct:
2577 return EmitOuterProduct(insn, state);
2578
2579 case spv::OpTranspose:
2580 return EmitTranspose(insn, state);
2581
2582 case spv::OpNot:
2583 case spv::OpBitFieldInsert:
2584 case spv::OpBitFieldSExtract:
2585 case spv::OpBitFieldUExtract:
2586 case spv::OpBitReverse:
2587 case spv::OpBitCount:
2588 case spv::OpSNegate:
2589 case spv::OpFNegate:
2590 case spv::OpLogicalNot:
2591 case spv::OpConvertFToU:
2592 case spv::OpConvertFToS:
2593 case spv::OpConvertSToF:
2594 case spv::OpConvertUToF:
2595 case spv::OpBitcast:
2596 case spv::OpIsInf:
2597 case spv::OpIsNan:
2598 case spv::OpDPdx:
2599 case spv::OpDPdxCoarse:
2600 case spv::OpDPdy:
2601 case spv::OpDPdyCoarse:
2602 case spv::OpFwidth:
2603 case spv::OpFwidthCoarse:
2604 case spv::OpDPdxFine:
2605 case spv::OpDPdyFine:
2606 case spv::OpFwidthFine:
2607 case spv::OpQuantizeToF16:
2608 return EmitUnaryOp(insn, state);
2609
2610 case spv::OpIAdd:
2611 case spv::OpISub:
2612 case spv::OpIMul:
2613 case spv::OpSDiv:
2614 case spv::OpUDiv:
2615 case spv::OpFAdd:
2616 case spv::OpFSub:
2617 case spv::OpFMul:
2618 case spv::OpFDiv:
2619 case spv::OpFMod:
2620 case spv::OpFRem:
2621 case spv::OpFOrdEqual:
2622 case spv::OpFUnordEqual:
2623 case spv::OpFOrdNotEqual:
2624 case spv::OpFUnordNotEqual:
2625 case spv::OpFOrdLessThan:
2626 case spv::OpFUnordLessThan:
2627 case spv::OpFOrdGreaterThan:
2628 case spv::OpFUnordGreaterThan:
2629 case spv::OpFOrdLessThanEqual:
2630 case spv::OpFUnordLessThanEqual:
2631 case spv::OpFOrdGreaterThanEqual:
2632 case spv::OpFUnordGreaterThanEqual:
2633 case spv::OpSMod:
2634 case spv::OpSRem:
2635 case spv::OpUMod:
2636 case spv::OpIEqual:
2637 case spv::OpINotEqual:
2638 case spv::OpUGreaterThan:
2639 case spv::OpSGreaterThan:
2640 case spv::OpUGreaterThanEqual:
2641 case spv::OpSGreaterThanEqual:
2642 case spv::OpULessThan:
2643 case spv::OpSLessThan:
2644 case spv::OpULessThanEqual:
2645 case spv::OpSLessThanEqual:
2646 case spv::OpShiftRightLogical:
2647 case spv::OpShiftRightArithmetic:
2648 case spv::OpShiftLeftLogical:
2649 case spv::OpBitwiseOr:
2650 case spv::OpBitwiseXor:
2651 case spv::OpBitwiseAnd:
2652 case spv::OpLogicalOr:
2653 case spv::OpLogicalAnd:
2654 case spv::OpLogicalEqual:
2655 case spv::OpLogicalNotEqual:
2656 case spv::OpUMulExtended:
2657 case spv::OpSMulExtended:
2658 case spv::OpIAddCarry:
2659 case spv::OpISubBorrow:
2660 return EmitBinaryOp(insn, state);
2661
2662 case spv::OpDot:
2663 return EmitDot(insn, state);
2664
2665 case spv::OpSelect:
2666 return EmitSelect(insn, state);
2667
2668 case spv::OpExtInst:
2669 return EmitExtendedInstruction(insn, state);
2670
2671 case spv::OpAny:
2672 return EmitAny(insn, state);
2673
2674 case spv::OpAll:
2675 return EmitAll(insn, state);
2676
2677 case spv::OpBranch:
2678 return EmitBranch(insn, state);
2679
2680 case spv::OpPhi:
2681 return EmitPhi(insn, state);
2682
2683 case spv::OpSelectionMerge:
2684 case spv::OpLoopMerge:
2685 return EmitResult::Continue;
2686
2687 case spv::OpBranchConditional:
2688 return EmitBranchConditional(insn, state);
2689
2690 case spv::OpSwitch:
2691 return EmitSwitch(insn, state);
2692
2693 case spv::OpUnreachable:
2694 return EmitUnreachable(insn, state);
2695
2696 case spv::OpReturn:
2697 return EmitReturn(insn, state);
2698
2699 case spv::OpFunctionCall:
2700 return EmitFunctionCall(insn, state);
2701
2702 case spv::OpKill:
2703 return EmitKill(insn, state);
2704
2705 case spv::OpImageSampleImplicitLod:
2706 return EmitImageSampleImplicitLod(None, insn, state);
2707
2708 case spv::OpImageSampleExplicitLod:
2709 return EmitImageSampleExplicitLod(None, insn, state);
2710
2711 case spv::OpImageSampleDrefImplicitLod:
2712 return EmitImageSampleImplicitLod(Dref, insn, state);
2713
2714 case spv::OpImageSampleDrefExplicitLod:
2715 return EmitImageSampleExplicitLod(Dref, insn, state);
2716
2717 case spv::OpImageSampleProjImplicitLod:
2718 return EmitImageSampleImplicitLod(Proj, insn, state);
2719
2720 case spv::OpImageSampleProjExplicitLod:
2721 return EmitImageSampleExplicitLod(Proj, insn, state);
2722
2723 case spv::OpImageSampleProjDrefImplicitLod:
2724 return EmitImageSampleImplicitLod(ProjDref, insn, state);
2725
2726 case spv::OpImageSampleProjDrefExplicitLod:
2727 return EmitImageSampleExplicitLod(ProjDref, insn, state);
2728
2729 case spv::OpImageGather:
2730 return EmitImageGather(None, insn, state);
2731
2732 case spv::OpImageDrefGather:
2733 return EmitImageGather(Dref, insn, state);
2734
2735 case spv::OpImageFetch:
2736 return EmitImageFetch(insn, state);
2737
2738 case spv::OpImageQuerySizeLod:
2739 return EmitImageQuerySizeLod(insn, state);
2740
2741 case spv::OpImageQuerySize:
2742 return EmitImageQuerySize(insn, state);
2743
2744 case spv::OpImageQueryLod:
2745 return EmitImageQueryLod(insn, state);
2746
2747 case spv::OpImageQueryLevels:
2748 return EmitImageQueryLevels(insn, state);
2749
2750 case spv::OpImageQuerySamples:
2751 return EmitImageQuerySamples(insn, state);
2752
2753 case spv::OpImageRead:
2754 return EmitImageRead(insn, state);
2755
2756 case spv::OpImageWrite:
2757 return EmitImageWrite(insn, state);
2758
2759 case spv::OpImageTexelPointer:
2760 return EmitImageTexelPointer(insn, state);
2761
2762 case spv::OpSampledImage:
2763 case spv::OpImage:
2764 return EmitSampledImageCombineOrSplit(insn, state);
2765
2766 case spv::OpCopyObject:
2767 return EmitCopyObject(insn, state);
2768
2769 case spv::OpCopyMemory:
2770 return EmitCopyMemory(insn, state);
2771
2772 case spv::OpControlBarrier:
2773 return EmitControlBarrier(insn, state);
2774
2775 case spv::OpMemoryBarrier:
2776 return EmitMemoryBarrier(insn, state);
2777
2778 case spv::OpGroupNonUniformElect:
2779 case spv::OpGroupNonUniformAll:
2780 case spv::OpGroupNonUniformAny:
2781 case spv::OpGroupNonUniformAllEqual:
2782 case spv::OpGroupNonUniformBroadcast:
2783 case spv::OpGroupNonUniformBroadcastFirst:
2784 case spv::OpGroupNonUniformBallot:
2785 case spv::OpGroupNonUniformInverseBallot:
2786 case spv::OpGroupNonUniformBallotBitExtract:
2787 case spv::OpGroupNonUniformBallotBitCount:
2788 case spv::OpGroupNonUniformBallotFindLSB:
2789 case spv::OpGroupNonUniformBallotFindMSB:
2790 case spv::OpGroupNonUniformShuffle:
2791 case spv::OpGroupNonUniformShuffleXor:
2792 case spv::OpGroupNonUniformShuffleUp:
2793 case spv::OpGroupNonUniformShuffleDown:
2794 return EmitGroupNonUniform(insn, state);
2795
2796 case spv::OpArrayLength:
2797 return EmitArrayLength(insn, state);
2798
2799 default:
2800 UNREACHABLE("%s", OpcodeName(opcode).c_str());
2801 break;
2802 }
2803
2804 return EmitResult::Continue;
2805 }
2806
2807 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
2808 {
2809 auto routine = state->routine;
2810 Object::ID resultId = insn.word(2);
2811 auto &object = getObject(resultId);
2812 auto &objectTy = getType(object.type);
2813
2814 switch (objectTy.storageClass)
2815 {
2816 case spv::StorageClassOutput:
2817 case spv::StorageClassPrivate:
2818 case spv::StorageClassFunction:
2819 {
2820 ASSERT(objectTy.opcode() == spv::OpTypePointer);
2821 auto base = &routine->getVariable(resultId)[0];
2822 auto elementTy = getType(objectTy.element);
2823 auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
2824 state->createPointer(resultId, SIMD::Pointer(base, size));
2825 break;
2826 }
2827 case spv::StorageClassWorkgroup:
2828 {
2829 ASSERT(objectTy.opcode() == spv::OpTypePointer);
2830 auto base = &routine->workgroupMemory[0];
2831 auto size = workgroupMemory.size();
2832 state->createPointer(resultId, SIMD::Pointer(base, size, workgroupMemory.offsetOf(resultId)));
2833 break;
2834 }
2835 case spv::StorageClassInput:
2836 {
2837 if (object.kind == Object::Kind::InterfaceVariable)
2838 {
2839 auto &dst = routine->getVariable(resultId);
2840 int offset = 0;
2841 VisitInterface(resultId,
2842 [&](Decorations const &d, AttribType type) {
2843 auto scalarSlot = d.Location << 2 | d.Component;
2844 dst[offset++] = routine->inputs[scalarSlot];
2845 });
2846 }
2847 ASSERT(objectTy.opcode() == spv::OpTypePointer);
2848 auto base = &routine->getVariable(resultId)[0];
2849 auto elementTy = getType(objectTy.element);
2850 auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
2851 state->createPointer(resultId, SIMD::Pointer(base, size));
2852 break;
2853 }
2854 case spv::StorageClassUniformConstant:
2855 {
2856 const auto &d = descriptorDecorations.at(resultId);
2857 ASSERT(d.DescriptorSet >= 0);
2858 ASSERT(d.Binding >= 0);
2859
2860 uint32_t arrayIndex = 0; // TODO(b/129523279)
2861 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
2862 if (setLayout->hasBinding(d.Binding))
2863 {
2864 uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
2865 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
2866 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
2867 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2868 state->createPointer(resultId, SIMD::Pointer(binding, size));
2869 }
2870 else
2871 {
2872 // TODO: Error if the variable with the non-existant binding is
2873 // used? Or perhaps strip these unused variable declarations as
2874 // a preprocess on the SPIR-V?
2875 }
2876 break;
2877 }
2878 case spv::StorageClassUniform:
2879 case spv::StorageClassStorageBuffer:
2880 {
2881 const auto &d = descriptorDecorations.at(resultId);
2882 ASSERT(d.DescriptorSet >= 0);
2883 auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2884 // Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
2885 // of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
2886 // is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
2887 if (d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
2888 {
2889 state->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
2890 }
2891 else
2892 {
2893 state->createPointer(resultId, SIMD::Pointer(nullptr, 0));
2894 }
2895 break;
2896 }
2897 case spv::StorageClassPushConstant:
2898 {
2899 state->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
2900 break;
2901 }
2902 default:
2903 UNREACHABLE("Storage class %d", objectTy.storageClass);
2904 break;
2905 }
2906
2907 if (insn.wordCount() > 4)
2908 {
2909 Object::ID initializerId = insn.word(4);
2910 if (getObject(initializerId).kind != Object::Kind::Constant)
2911 {
2912 UNIMPLEMENTED("Non-constant initializers not yet implemented");
2913 }
2914 switch (objectTy.storageClass)
2915 {
2916 case spv::StorageClassOutput:
2917 case spv::StorageClassPrivate:
2918 case spv::StorageClassFunction:
2919 {
2920 bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
2921 auto ptr = GetPointerToData(resultId, 0, state);
2922 GenericValue initialValue(this, state, initializerId);
2923 VisitMemoryObject(resultId, [&](uint32_t i, uint32_t offset)
2924 {
2925 auto p = ptr + offset;
2926 if (interleavedByLane) { p = interleaveByLane(p); }
2927 auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
2928 SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask());
2929 });
2930 break;
2931 }
2932 default:
2933 ASSERT_MSG(initializerId == 0, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
2934 }
2935 }
2936
2937 return EmitResult::Continue;
2938 }
2939
2940 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
2941 {
2942 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
2943 Object::ID resultId = insn.word(2);
2944 Object::ID pointerId = insn.word(3);
2945 auto &result = getObject(resultId);
2946 auto &resultTy = getType(result.type);
2947 auto &pointer = getObject(pointerId);
2948 auto &pointerTy = getType(pointer.type);
2949 std::memory_order memoryOrder = std::memory_order_relaxed;
2950
2951 ASSERT(getType(pointer.type).element == result.type);
2952 ASSERT(Type::ID(insn.word(1)) == result.type);
2953 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2954
2955 if(pointerTy.storageClass == spv::StorageClassUniformConstant)
2956 {
2957 // Just propagate the pointer.
2958 auto &ptr = state->getPointer(pointerId);
2959 state->createPointer(resultId, ptr);
2960 return EmitResult::Continue;
2961 }
2962
2963 if(atomic)
2964 {
2965 Object::ID semanticsId = insn.word(5);
2966 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
2967 memoryOrder = MemoryOrder(memorySemantics);
2968 }
2969
2970 auto ptr = GetPointerToData(pointerId, 0, state);
2971 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
2972 auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
2973 auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
2974
2975 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
2976 {
2977 auto p = ptr + offset;
2978 if (interleavedByLane) { p = interleaveByLane(p); } // TODO: Interleave once, then add offset?
2979 dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder));
2980 });
2981
2982 return EmitResult::Continue;
2983 }
2984
2985 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
2986 {
2987 bool atomic = (insn.opcode() == spv::OpAtomicStore);
2988 Object::ID pointerId = insn.word(1);
2989 Object::ID objectId = insn.word(atomic ? 4 : 2);
2990 auto &object = getObject(objectId);
2991 auto &pointer = getObject(pointerId);
2992 auto &pointerTy = getType(pointer.type);
2993 auto &elementTy = getType(pointerTy.element);
2994 std::memory_order memoryOrder = std::memory_order_relaxed;
2995
2996 if(atomic)
2997 {
2998 Object::ID semanticsId = insn.word(3);
2999 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
3000 memoryOrder = MemoryOrder(memorySemantics);
3001 }
3002
3003 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
3004
3005 auto ptr = GetPointerToData(pointerId, 0, state);
3006 bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
3007 auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
3008
3009 SIMD::Int mask = state->activeLaneMask();
3010 if (!StoresInHelperInvocation(pointerTy.storageClass))
3011 {
3012 mask = mask & state->storesAndAtomicsMask();
3013 }
3014
3015 if (object.kind == Object::Kind::Constant)
3016 {
3017 // Constant source data.
3018 const uint32_t *src = object.constantValue.get();
3019 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
3020 {
3021 auto p = ptr + offset;
3022 if (interleavedByLane) { p = interleaveByLane(p); }
3023 SIMD::Store(p, SIMD::Int(src[i]), robustness, mask, atomic, memoryOrder);
3024 });
3025 }
3026 else
3027 {
3028 // Intermediate source data.
3029 auto &src = state->getIntermediate(objectId);
3030 VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
3031 {
3032 auto p = ptr + offset;
3033 if (interleavedByLane) { p = interleaveByLane(p); }
3034 SIMD::Store(p, src.Float(i), robustness, mask, atomic, memoryOrder);
3035 });
3036 }
3037
3038 return EmitResult::Continue;
3039 }
3040
3041 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
3042 {
3043 Type::ID typeId = insn.word(1);
3044 Object::ID resultId = insn.word(2);
3045 Object::ID baseId = insn.word(3);
3046 uint32_t numIndexes = insn.wordCount() - 4;
3047 const uint32_t *indexes = insn.wordPointer(4);
3048 auto &type = getType(typeId);
3049 ASSERT(type.sizeInComponents == 1);
3050 ASSERT(getObject(resultId).kind == Object::Kind::Pointer);
3051
3052 if(type.storageClass == spv::StorageClassPushConstant ||
3053 type.storageClass == spv::StorageClassUniform ||
3054 type.storageClass == spv::StorageClassStorageBuffer)
3055 {
3056 auto ptr = WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, state);
3057 state->createPointer(resultId, ptr);
3058 }
3059 else
3060 {
3061 auto ptr = WalkAccessChain(baseId, numIndexes, indexes, state);
3062 state->createPointer(resultId, ptr);
3063 }
3064
3065 return EmitResult::Continue;
3066 }
3067
3068 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
3069 {
3070 auto &type = getType(insn.word(1));
3071 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3072 auto offset = 0u;
3073
3074 for (auto i = 0u; i < insn.wordCount() - 3; i++)
3075 {
3076 Object::ID srcObjectId = insn.word(3u + i);
3077 auto & srcObject = getObject(srcObjectId);
3078 auto & srcObjectTy = getType(srcObject.type);
3079 GenericValue srcObjectAccess(this, state, srcObjectId);
3080
3081 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
3082 {
3083 dst.move(offset++, srcObjectAccess.Float(j));
3084 }
3085 }
3086
3087 return EmitResult::Continue;
3088 }
3089
3090 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
3091 {
3092 Type::ID resultTypeId = insn.word(1);
3093 auto &type = getType(resultTypeId);
3094 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3095 auto &newPartObject = getObject(insn.word(3));
3096 auto &newPartObjectTy = getType(newPartObject.type);
3097 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
3098
3099 GenericValue srcObjectAccess(this, state, insn.word(4));
3100 GenericValue newPartObjectAccess(this, state, insn.word(3));
3101
3102 // old components before
3103 for (auto i = 0u; i < firstNewComponent; i++)
3104 {
3105 dst.move(i, srcObjectAccess.Float(i));
3106 }
3107 // new part
3108 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
3109 {
3110 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
3111 }
3112 // old components after
3113 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
3114 {
3115 dst.move(i, srcObjectAccess.Float(i));
3116 }
3117
3118 return EmitResult::Continue;
3119 }
3120
3121 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
3122 {
3123 auto &type = getType(insn.word(1));
3124 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3125 auto &compositeObject = getObject(insn.word(3));
3126 Type::ID compositeTypeId = compositeObject.definition.word(1);
3127 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
3128
3129 GenericValue compositeObjectAccess(this, state, insn.word(3));
3130 for (auto i = 0u; i < type.sizeInComponents; i++)
3131 {
3132 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
3133 }
3134
3135 return EmitResult::Continue;
3136 }
3137
3138 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
3139 {
3140 auto &type = getType(insn.word(1));
3141 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3142
3143 // Note: number of components in result type, first half type, and second
3144 // half type are all independent.
3145 auto &firstHalfType = getType(getObject(insn.word(3)).type);
3146
3147 GenericValue firstHalfAccess(this, state, insn.word(3));
3148 GenericValue secondHalfAccess(this, state, insn.word(4));
3149
3150 for (auto i = 0u; i < type.sizeInComponents; i++)
3151 {
3152 auto selector = insn.word(5 + i);
3153 if (selector == static_cast<uint32_t>(-1))
3154 {
3155 // Undefined value. Until we decide to do real undef values, zero is as good
3156 // a value as any
3157 dst.move(i, RValue<SIMD::Float>(0.0f));
3158 }
3159 else if (selector < firstHalfType.sizeInComponents)
3160 {
3161 dst.move(i, firstHalfAccess.Float(selector));
3162 }
3163 else
3164 {
3165 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
3166 }
3167 }
3168
3169 return EmitResult::Continue;
3170 }
3171
3172 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
3173 {
3174 auto &type = getType(insn.word(1));
3175 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3176 auto &srcType = getType(getObject(insn.word(3)).type);
3177
3178 GenericValue src(this, state, insn.word(3));
3179 GenericValue index(this, state, insn.word(4));
3180
3181 SIMD::UInt v = SIMD::UInt(0);
3182
3183 for (auto i = 0u; i < srcType.sizeInComponents; i++)
3184 {
3185 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
3186 }
3187
3188 dst.move(0, v);
3189 return EmitResult::Continue;
3190 }
3191
3192 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
3193 {
3194 auto &type = getType(insn.word(1));
3195 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3196
3197 GenericValue src(this, state, insn.word(3));
3198 GenericValue component(this, state, insn.word(4));
3199 GenericValue index(this, state, insn.word(5));
3200
3201 for (auto i = 0u; i < type.sizeInComponents; i++)
3202 {
3203 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
3204 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
3205 }
3206 return EmitResult::Continue;
3207 }
3208
3209 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
3210 {
3211 auto &type = getType(insn.word(1));
3212 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3213 auto lhs = GenericValue(this, state, insn.word(3));
3214 auto rhs = GenericValue(this, state, insn.word(4));
3215
3216 for (auto i = 0u; i < type.sizeInComponents; i++)
3217 {
3218 dst.move(i, lhs.Float(i) * rhs.Float(0));
3219 }
3220
3221 return EmitResult::Continue;
3222 }
3223
3224 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
3225 {
3226 auto &type = getType(insn.word(1));
3227 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3228 auto lhs = GenericValue(this, state, insn.word(3));
3229 auto rhs = GenericValue(this, state, insn.word(4));
3230 auto rhsType = getType(rhs.type);
3231
3232 for (auto i = 0u; i < type.sizeInComponents; i++)
3233 {
3234 SIMD::Float v = lhs.Float(i) * rhs.Float(0);
3235 for (auto j = 1u; j < rhsType.sizeInComponents; j++)
3236 {
3237 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
3238 }
3239 dst.move(i, v);
3240 }
3241
3242 return EmitResult::Continue;
3243 }
3244
3245 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
3246 {
3247 auto &type = getType(insn.word(1));
3248 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3249 auto lhs = GenericValue(this, state, insn.word(3));
3250 auto rhs = GenericValue(this, state, insn.word(4));
3251 auto lhsType = getType(lhs.type);
3252
3253 for (auto i = 0u; i < type.sizeInComponents; i++)
3254 {
3255 SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
3256 for (auto j = 1u; j < lhsType.sizeInComponents; j++)
3257 {
3258 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
3259 }
3260 dst.move(i, v);
3261 }
3262
3263 return EmitResult::Continue;
3264 }
3265
3266 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
3267 {
3268 auto &type = getType(insn.word(1));
3269 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3270 auto lhs = GenericValue(this, state, insn.word(3));
3271 auto rhs = GenericValue(this, state, insn.word(4));
3272
3273 auto numColumns = type.definition.word(3);
3274 auto numRows = getType(type.definition.word(2)).definition.word(3);
3275 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
3276
3277 for (auto row = 0u; row < numRows; row++)
3278 {
3279 for (auto col = 0u; col < numColumns; col++)
3280 {
3281 SIMD::Float v = SIMD::Float(0);
3282 for (auto i = 0u; i < numAdds; i++)
3283 {
3284 v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
3285 }
3286 dst.move(numRows * col + row, v);
3287 }
3288 }
3289
3290 return EmitResult::Continue;
3291 }
3292
3293 SpirvShader::EmitResult SpirvShader::EmitOuterProduct(InsnIterator insn, EmitState *state) const
3294 {
3295 auto &type = getType(insn.word(1));
3296 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3297 auto lhs = GenericValue(this, state, insn.word(3));
3298 auto rhs = GenericValue(this, state, insn.word(4));
3299 auto &lhsType = getType(lhs.type);
3300 auto &rhsType = getType(rhs.type);
3301
3302 ASSERT(type.definition.opcode() == spv::OpTypeMatrix);
3303 ASSERT(lhsType.definition.opcode() == spv::OpTypeVector);
3304 ASSERT(rhsType.definition.opcode() == spv::OpTypeVector);
3305 ASSERT(getType(lhsType.element).opcode() == spv::OpTypeFloat);
3306 ASSERT(getType(rhsType.element).opcode() == spv::OpTypeFloat);
3307
3308 auto numRows = lhsType.definition.word(3);
3309 auto numCols = rhsType.definition.word(3);
3310
3311 for (auto col = 0u; col < numCols; col++)
3312 {
3313 for (auto row = 0u; row < numRows; row++)
3314 {
3315 dst.move(col * numRows + row, lhs.Float(row) * rhs.Float(col));
3316 }
3317 }
3318
3319 return EmitResult::Continue;
3320 }
3321
3322 SpirvShader::EmitResult SpirvShader::EmitTranspose(InsnIterator insn, EmitState *state) const
3323 {
3324 auto &type = getType(insn.word(1));
3325 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3326 auto mat = GenericValue(this, state, insn.word(3));
3327
3328 auto numCols = type.definition.word(3);
3329 auto numRows = getType(type.definition.word(2)).sizeInComponents;
3330
3331 for (auto col = 0u; col < numCols; col++)
3332 {
3333 for (auto row = 0u; row < numRows; row++)
3334 {
3335 dst.move(col * numRows + row, mat.Float(row * numCols + col));
3336 }
3337 }
3338
3339 return EmitResult::Continue;
3340 }
3341
3342 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
3343 {
3344 auto &type = getType(insn.word(1));
3345 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3346 auto src = GenericValue(this, state, insn.word(3));
3347
3348 for (auto i = 0u; i < type.sizeInComponents; i++)
3349 {
3350 switch (insn.opcode())
3351 {
3352 case spv::OpNot:
3353 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
3354 dst.move(i, ~src.UInt(i));
3355 break;
3356 case spv::OpBitFieldInsert:
3357 {
3358 auto insert = GenericValue(this, state, insn.word(4)).UInt(i);
3359 auto offset = GenericValue(this, state, insn.word(5)).UInt(0);
3360 auto count = GenericValue(this, state, insn.word(6)).UInt(0);
3361 auto one = SIMD::UInt(1);
3362 auto v = src.UInt(i);
3363 auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
3364 dst.move(i, (v & ~mask) | ((insert << offset) & mask));
3365 break;
3366 }
3367 case spv::OpBitFieldSExtract:
3368 case spv::OpBitFieldUExtract:
3369 {
3370 auto offset = GenericValue(this, state, insn.word(4)).UInt(0);
3371 auto count = GenericValue(this, state, insn.word(5)).UInt(0);
3372 auto one = SIMD::UInt(1);
3373 auto v = src.UInt(i);
3374 SIMD::UInt out = (v >> offset) & Bitmask32(count);
3375 if (insn.opcode() == spv::OpBitFieldSExtract)
3376 {
3377 auto sign = out & NthBit32(count - one);
3378 auto sext = ~(sign - one);
3379 out |= sext;
3380 }
3381 dst.move(i, out);
3382 break;
3383 }
3384 case spv::OpBitReverse:
3385 {
3386 // TODO: Add an intrinsic to reactor. Even if there isn't a
3387 // single vector instruction, there may be target-dependent
3388 // ways to make this faster.
3389 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
3390 SIMD::UInt v = src.UInt(i);
3391 v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
3392 v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
3393 v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
3394 v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
3395 v = (v >> 16) | (v << 16);
3396 dst.move(i, v);
3397 break;
3398 }
3399 case spv::OpBitCount:
3400 dst.move(i, CountBits(src.UInt(i)));
3401 break;
3402 case spv::OpSNegate:
3403 dst.move(i, -src.Int(i));
3404 break;
3405 case spv::OpFNegate:
3406 dst.move(i, -src.Float(i));
3407 break;
3408 case spv::OpConvertFToU:
3409 dst.move(i, SIMD::UInt(src.Float(i)));
3410 break;
3411 case spv::OpConvertFToS:
3412 dst.move(i, SIMD::Int(src.Float(i)));
3413 break;
3414 case spv::OpConvertSToF:
3415 dst.move(i, SIMD::Float(src.Int(i)));
3416 break;
3417 case spv::OpConvertUToF:
3418 dst.move(i, SIMD::Float(src.UInt(i)));
3419 break;
3420 case spv::OpBitcast:
3421 dst.move(i, src.Float(i));
3422 break;
3423 case spv::OpIsInf:
3424 dst.move(i, IsInf(src.Float(i)));
3425 break;
3426 case spv::OpIsNan:
3427 dst.move(i, IsNan(src.Float(i)));
3428 break;
3429 case spv::OpDPdx:
3430 case spv::OpDPdxCoarse:
3431 // Derivative instructions: FS invocations are laid out like so:
3432 // 0 1
3433 // 2 3
3434 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
3435 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
3436 break;
3437 case spv::OpDPdy:
3438 case spv::OpDPdyCoarse:
3439 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
3440 break;
3441 case spv::OpFwidth:
3442 case spv::OpFwidthCoarse:
3443 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
3444 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
3445 break;
3446 case spv::OpDPdxFine:
3447 {
3448 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3449 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3450 SIMD::Float v = SIMD::Float(firstRow);
3451 v = Insert(v, secondRow, 2);
3452 v = Insert(v, secondRow, 3);
3453 dst.move(i, v);
3454 break;
3455 }
3456 case spv::OpDPdyFine:
3457 {
3458 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3459 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3460 SIMD::Float v = SIMD::Float(firstColumn);
3461 v = Insert(v, secondColumn, 1);
3462 v = Insert(v, secondColumn, 3);
3463 dst.move(i, v);
3464 break;
3465 }
3466 case spv::OpFwidthFine:
3467 {
3468 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
3469 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
3470 SIMD::Float dpdx = SIMD::Float(firstRow);
3471 dpdx = Insert(dpdx, secondRow, 2);
3472 dpdx = Insert(dpdx, secondRow, 3);
3473 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
3474 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
3475 SIMD::Float dpdy = SIMD::Float(firstColumn);
3476 dpdy = Insert(dpdy, secondColumn, 1);
3477 dpdy = Insert(dpdy, secondColumn, 3);
3478 dst.move(i, Abs(dpdx) + Abs(dpdy));
3479 break;
3480 }
3481 case spv::OpQuantizeToF16:
3482 {
3483 // Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
3484 auto abs = Abs(src.Float(i));
3485 auto sign = src.Int(i) & SIMD::Int(0x80000000);
3486 auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
3487 auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
3488 auto isNaN = IsNan(abs);
3489 auto isInfOrNan = isInf | isNaN;
3490 SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
3491 v &= ~isZero | SIMD::Int(0x80000000);
3492 v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
3493 v |= isNaN & SIMD::Int(0x400000);
3494 dst.move(i, v);
3495 break;
3496 }
3497 default:
3498 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
3499 }
3500 }
3501
3502 return EmitResult::Continue;
3503 }
3504
3505 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
3506 {
3507 auto &type = getType(insn.word(1));
3508 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3509 auto &lhsType = getType(getObject(insn.word(3)).type);
3510 auto lhs = GenericValue(this, state, insn.word(3));
3511 auto rhs = GenericValue(this, state, insn.word(4));
3512
3513 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
3514 {
3515 switch (insn.opcode())
3516 {
3517 case spv::OpIAdd:
3518 dst.move(i, lhs.Int(i) + rhs.Int(i));
3519 break;
3520 case spv::OpISub:
3521 dst.move(i, lhs.Int(i) - rhs.Int(i));
3522 break;
3523 case spv::OpIMul:
3524 dst.move(i, lhs.Int(i) * rhs.Int(i));
3525 break;
3526 case spv::OpSDiv:
3527 {
3528 SIMD::Int a = lhs.Int(i);
3529 SIMD::Int b = rhs.Int(i);
3530 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3531 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
3532 dst.move(i, a / b);
3533 break;
3534 }
3535 case spv::OpUDiv:
3536 {
3537 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
3538 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
3539 break;
3540 }
3541 case spv::OpSRem:
3542 {
3543 SIMD::Int a = lhs.Int(i);
3544 SIMD::Int b = rhs.Int(i);
3545 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3546 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
3547 dst.move(i, a % b);
3548 break;
3549 }
3550 case spv::OpSMod:
3551 {
3552 SIMD::Int a = lhs.Int(i);
3553 SIMD::Int b = rhs.Int(i);
3554 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
3555 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
3556 auto mod = a % b;
3557 // If a and b have opposite signs, the remainder operation takes
3558 // the sign from a but OpSMod is supposed to take the sign of b.
3559 // Adding b will ensure that the result has the correct sign and
3560 // that it is still congruent to a modulo b.
3561 //
3562 // See also http://mathforum.org/library/drmath/view/52343.html
3563 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
3564 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
3565 dst.move(i, As<SIMD::Float>(fixedMod));
3566 break;
3567 }
3568 case spv::OpUMod:
3569 {
3570 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
3571 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
3572 break;
3573 }
3574 case spv::OpIEqual:
3575 case spv::OpLogicalEqual:
3576 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
3577 break;
3578 case spv::OpINotEqual:
3579 case spv::OpLogicalNotEqual:
3580 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
3581 break;
3582 case spv::OpUGreaterThan:
3583 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
3584 break;
3585 case spv::OpSGreaterThan:
3586 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
3587 break;
3588 case spv::OpUGreaterThanEqual:
3589 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
3590 break;
3591 case spv::OpSGreaterThanEqual:
3592 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
3593 break;
3594 case spv::OpULessThan:
3595 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
3596 break;
3597 case spv::OpSLessThan:
3598 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
3599 break;
3600 case spv::OpULessThanEqual:
3601 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
3602 break;
3603 case spv::OpSLessThanEqual:
3604 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
3605 break;
3606 case spv::OpFAdd:
3607 dst.move(i, lhs.Float(i) + rhs.Float(i));
3608 break;
3609 case spv::OpFSub:
3610 dst.move(i, lhs.Float(i) - rhs.Float(i));
3611 break;
3612 case spv::OpFMul:
3613 dst.move(i, lhs.Float(i) * rhs.Float(i));
3614 break;
3615 case spv::OpFDiv:
3616 dst.move(i, lhs.Float(i) / rhs.Float(i));
3617 break;
3618 case spv::OpFMod:
3619 // TODO(b/126873455): inaccurate for values greater than 2^24
3620 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
3621 break;
3622 case spv::OpFRem:
3623 dst.move(i, lhs.Float(i) % rhs.Float(i));
3624 break;
3625 case spv::OpFOrdEqual:
3626 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
3627 break;
3628 case spv::OpFUnordEqual:
3629 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
3630 break;
3631 case spv::OpFOrdNotEqual:
3632 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
3633 break;
3634 case spv::OpFUnordNotEqual:
3635 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
3636 break;
3637 case spv::OpFOrdLessThan:
3638 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
3639 break;
3640 case spv::OpFUnordLessThan:
3641 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
3642 break;
3643 case spv::OpFOrdGreaterThan:
3644 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
3645 break;
3646 case spv::OpFUnordGreaterThan:
3647 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
3648 break;
3649 case spv::OpFOrdLessThanEqual:
3650 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
3651 break;
3652 case spv::OpFUnordLessThanEqual:
3653 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
3654 break;
3655 case spv::OpFOrdGreaterThanEqual:
3656 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
3657 break;
3658 case spv::OpFUnordGreaterThanEqual:
3659 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
3660 break;
3661 case spv::OpShiftRightLogical:
3662 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
3663 break;
3664 case spv::OpShiftRightArithmetic:
3665 dst.move(i, lhs.Int(i) >> rhs.Int(i));
3666 break;
3667 case spv::OpShiftLeftLogical:
3668 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
3669 break;
3670 case spv::OpBitwiseOr:
3671 case spv::OpLogicalOr:
3672 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
3673 break;
3674 case spv::OpBitwiseXor:
3675 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
3676 break;
3677 case spv::OpBitwiseAnd:
3678 case spv::OpLogicalAnd:
3679 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
3680 break;
3681 case spv::OpSMulExtended:
3682 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
3683 // In our flat view then, component i is the i'th component of the first member;
3684 // component i + N is the i'th component of the second member.
3685 dst.move(i, lhs.Int(i) * rhs.Int(i));
3686 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
3687 break;
3688 case spv::OpUMulExtended:
3689 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
3690 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
3691 break;
3692 case spv::OpIAddCarry:
3693 dst.move(i, lhs.UInt(i) + rhs.UInt(i));
3694 dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
3695 break;
3696 case spv::OpISubBorrow:
3697 dst.move(i, lhs.UInt(i) - rhs.UInt(i));
3698 dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
3699 break;
3700 default:
3701 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
3702 }
3703 }
3704
3705 return EmitResult::Continue;
3706 }
3707
3708 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
3709 {
3710 auto &type = getType(insn.word(1));
3711 ASSERT(type.sizeInComponents == 1);
3712 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3713 auto &lhsType = getType(getObject(insn.word(3)).type);
3714 auto lhs = GenericValue(this, state, insn.word(3));
3715 auto rhs = GenericValue(this, state, insn.word(4));
3716
3717 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
3718 return EmitResult::Continue;
3719 }
3720
3721 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
3722 {
3723 auto &type = getType(insn.word(1));
3724 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3725 auto cond = GenericValue(this, state, insn.word(3));
3726 auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
3727 auto lhs = GenericValue(this, state, insn.word(4));
3728 auto rhs = GenericValue(this, state, insn.word(5));
3729
3730 for (auto i = 0u; i < type.sizeInComponents; i++)
3731 {
3732 auto sel = cond.Int(condIsScalar ? 0 : i);
3733 dst.move(i, (sel & lhs.Int(i)) | (~sel & rhs.Int(i))); // TODO: IfThenElse()
3734 }
3735
3736 return EmitResult::Continue;
3737 }
3738
3739 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
3740 {
3741 auto &type = getType(insn.word(1));
3742 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
3743 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
3744
3745 switch (extInstIndex)
3746 {
3747 case GLSLstd450FAbs:
3748 {
3749 auto src = GenericValue(this, state, insn.word(5));
3750 for (auto i = 0u; i < type.sizeInComponents; i++)
3751 {
3752 dst.move(i, Abs(src.Float(i)));
3753 }
3754 break;
3755 }
3756 case GLSLstd450SAbs:
3757 {
3758 auto src = GenericValue(this, state, insn.word(5));
3759 for (auto i = 0u; i < type.sizeInComponents; i++)
3760 {
3761 dst.move(i, Abs(src.Int(i)));
3762 }
3763 break;
3764 }
3765 case GLSLstd450Cross:
3766 {
3767 auto lhs = GenericValue(this, state, insn.word(5));
3768 auto rhs = GenericValue(this, state, insn.word(6));
3769 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
3770 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
3771 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
3772 break;
3773 }
3774 case GLSLstd450Floor:
3775 {
3776 auto src = GenericValue(this, state, insn.word(5));
3777 for (auto i = 0u; i < type.sizeInComponents; i++)
3778 {
3779 dst.move(i, Floor(src.Float(i)));
3780 }
3781 break;
3782 }
3783 case GLSLstd450Trunc:
3784 {
3785 auto src = GenericValue(this, state, insn.word(5));
3786 for (auto i = 0u; i < type.sizeInComponents; i++)
3787 {
3788 dst.move(i, Trunc(src.Float(i)));
3789 }
3790 break;
3791 }
3792 case GLSLstd450Ceil:
3793 {
3794 auto src = GenericValue(this, state, insn.word(5));
3795 for (auto i = 0u; i < type.sizeInComponents; i++)
3796 {
3797 dst.move(i, Ceil(src.Float(i)));
3798 }
3799 break;
3800 }
3801 case GLSLstd450Fract:
3802 {
3803 auto src = GenericValue(this, state, insn.word(5));
3804 for (auto i = 0u; i < type.sizeInComponents; i++)
3805 {
3806 dst.move(i, Frac(src.Float(i)));
3807 }
3808 break;
3809 }
3810 case GLSLstd450Round:
3811 {
3812 auto src = GenericValue(this, state, insn.word(5));
3813 for (auto i = 0u; i < type.sizeInComponents; i++)
3814 {
3815 dst.move(i, Round(src.Float(i)));
3816 }
3817 break;
3818 }
3819 case GLSLstd450RoundEven:
3820 {
3821 auto src = GenericValue(this, state, insn.word(5));
3822 for (auto i = 0u; i < type.sizeInComponents; i++)
3823 {
3824 auto x = Round(src.Float(i));
3825 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
3826 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
3827 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
3828 }
3829 break;
3830 }
3831 case GLSLstd450FMin:
3832 {
3833 auto lhs = GenericValue(this, state, insn.word(5));
3834 auto rhs = GenericValue(this, state, insn.word(6));
3835 for (auto i = 0u; i < type.sizeInComponents; i++)
3836 {
3837 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
3838 }
3839 break;
3840 }
3841 case GLSLstd450FMax:
3842 {
3843 auto lhs = GenericValue(this, state, insn.word(5));
3844 auto rhs = GenericValue(this, state, insn.word(6));
3845 for (auto i = 0u; i < type.sizeInComponents; i++)
3846 {
3847 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
3848 }
3849 break;
3850 }
3851 case GLSLstd450SMin:
3852 {
3853 auto lhs = GenericValue(this, state, insn.word(5));
3854 auto rhs = GenericValue(this, state, insn.word(6));
3855 for (auto i = 0u; i < type.sizeInComponents; i++)
3856 {
3857 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
3858 }
3859 break;
3860 }
3861 case GLSLstd450SMax:
3862 {
3863 auto lhs = GenericValue(this, state, insn.word(5));
3864 auto rhs = GenericValue(this, state, insn.word(6));
3865 for (auto i = 0u; i < type.sizeInComponents; i++)
3866 {
3867 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
3868 }
3869 break;
3870 }
3871 case GLSLstd450UMin:
3872 {
3873 auto lhs = GenericValue(this, state, insn.word(5));
3874 auto rhs = GenericValue(this, state, insn.word(6));
3875 for (auto i = 0u; i < type.sizeInComponents; i++)
3876 {
3877 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
3878 }
3879 break;
3880 }
3881 case GLSLstd450UMax:
3882 {
3883 auto lhs = GenericValue(this, state, insn.word(5));
3884 auto rhs = GenericValue(this, state, insn.word(6));
3885 for (auto i = 0u; i < type.sizeInComponents; i++)
3886 {
3887 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
3888 }
3889 break;
3890 }
3891 case GLSLstd450Step:
3892 {
3893 auto edge = GenericValue(this, state, insn.word(5));
3894 auto x = GenericValue(this, state, insn.word(6));
3895 for (auto i = 0u; i < type.sizeInComponents; i++)
3896 {
3897 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
3898 }
3899 break;
3900 }
3901 case GLSLstd450SmoothStep:
3902 {
3903 auto edge0 = GenericValue(this, state, insn.word(5));
3904 auto edge1 = GenericValue(this, state, insn.word(6));
3905 auto x = GenericValue(this, state, insn.word(7));
3906 for (auto i = 0u; i < type.sizeInComponents; i++)
3907 {
3908 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
3909 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
3910 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
3911 }
3912 break;
3913 }
3914 case GLSLstd450FMix:
3915 {
3916 auto x = GenericValue(this, state, insn.word(5));
3917 auto y = GenericValue(this, state, insn.word(6));
3918 auto a = GenericValue(this, state, insn.word(7));
3919 for (auto i = 0u; i < type.sizeInComponents; i++)
3920 {
3921 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
3922 }
3923 break;
3924 }
3925 case GLSLstd450FClamp:
3926 {
3927 auto x = GenericValue(this, state, insn.word(5));
3928 auto minVal = GenericValue(this, state, insn.word(6));
3929 auto maxVal = GenericValue(this, state, insn.word(7));
3930 for (auto i = 0u; i < type.sizeInComponents; i++)
3931 {
3932 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
3933 }
3934 break;
3935 }
3936 case GLSLstd450SClamp:
3937 {
3938 auto x = GenericValue(this, state, insn.word(5));
3939 auto minVal = GenericValue(this, state, insn.word(6));
3940 auto maxVal = GenericValue(this, state, insn.word(7));
3941 for (auto i = 0u; i < type.sizeInComponents; i++)
3942 {
3943 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
3944 }
3945 break;
3946 }
3947 case GLSLstd450UClamp:
3948 {
3949 auto x = GenericValue(this, state, insn.word(5));
3950 auto minVal = GenericValue(this, state, insn.word(6));
3951 auto maxVal = GenericValue(this, state, insn.word(7));
3952 for (auto i = 0u; i < type.sizeInComponents; i++)
3953 {
3954 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
3955 }
3956 break;
3957 }
3958 case GLSLstd450FSign:
3959 {
3960 auto src = GenericValue(this, state, insn.word(5));
3961 for (auto i = 0u; i < type.sizeInComponents; i++)
3962 {
3963 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
3964 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
3965 dst.move(i, neg | pos);
3966 }
3967 break;
3968 }
3969 case GLSLstd450SSign:
3970 {
3971 auto src = GenericValue(this, state, insn.word(5));
3972 for (auto i = 0u; i < type.sizeInComponents; i++)
3973 {
3974 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
3975 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
3976 dst.move(i, neg | pos);
3977 }
3978 break;
3979 }
3980 case GLSLstd450Reflect:
3981 {
3982 auto I = GenericValue(this, state, insn.word(5));
3983 auto N = GenericValue(this, state, insn.word(6));
3984
3985 SIMD::Float d = Dot(type.sizeInComponents, I, N);
3986
3987 for (auto i = 0u; i < type.sizeInComponents; i++)
3988 {
3989 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
3990 }
3991 break;
3992 }
3993 case GLSLstd450Refract:
3994 {
3995 auto I = GenericValue(this, state, insn.word(5));
3996 auto N = GenericValue(this, state, insn.word(6));
3997 auto eta = GenericValue(this, state, insn.word(7));
3998
3999 SIMD::Float d = Dot(type.sizeInComponents, I, N);
4000 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
4001 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
4002 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
4003
4004 for (auto i = 0u; i < type.sizeInComponents; i++)
4005 {
4006 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
4007 }
4008 break;
4009 }
4010 case GLSLstd450FaceForward:
4011 {
4012 auto N = GenericValue(this, state, insn.word(5));
4013 auto I = GenericValue(this, state, insn.word(6));
4014 auto Nref = GenericValue(this, state, insn.word(7));
4015
4016 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
4017 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
4018
4019 for (auto i = 0u; i < type.sizeInComponents; i++)
4020 {
4021 auto n = N.Float(i);
4022 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
4023 }
4024 break;
4025 }
4026 case GLSLstd450Length:
4027 {
4028 auto x = GenericValue(this, state, insn.word(5));
4029 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
4030
4031 dst.move(0, Sqrt(d));
4032 break;
4033 }
4034 case GLSLstd450Normalize:
4035 {
4036 auto x = GenericValue(this, state, insn.word(5));
4037 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
4038 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
4039
4040 for (auto i = 0u; i < type.sizeInComponents; i++)
4041 {
4042 dst.move(i, invLength * x.Float(i));
4043 }
4044 break;
4045 }
4046 case GLSLstd450Distance:
4047 {
4048 auto p0 = GenericValue(this, state, insn.word(5));
4049 auto p1 = GenericValue(this, state, insn.word(6));
4050 auto p0Type = getType(p0.type);
4051
4052 // sqrt(dot(p0-p1, p0-p1))
4053 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
4054
4055 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
4056 {
4057 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
4058 }
4059
4060 dst.move(0, Sqrt(d));
4061 break;
4062 }
4063 case GLSLstd450Modf:
4064 {
4065 auto val = GenericValue(this, state, insn.word(5));
4066 auto ptrId = Object::ID(insn.word(6));
4067 auto ptrTy = getType(getObject(ptrId).type);
4068 auto ptr = GetPointerToData(ptrId, 0, state);
4069 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
4070 // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
4071 // to be in bounds even for inactive lanes.
4072 // - Clarify the SPIR-V spec.
4073 // - Eliminate lane masking and assume interleaving.
4074 auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
4075
4076 for (auto i = 0u; i < type.sizeInComponents; i++)
4077 {
4078 SIMD::Float whole, frac;
4079 std::tie(whole, frac) = Modf(val.Float(i));
4080 dst.move(i, frac);
4081 auto p = ptr + (i * sizeof(float));
4082 if (interleavedByLane) { p = interleaveByLane(p); }
4083 SIMD::Store(p, whole, robustness, state->activeLaneMask());
4084 }
4085 break;
4086 }
4087 case GLSLstd450ModfStruct:
4088 {
4089 auto val = GenericValue(this, state, insn.word(5));
4090 auto valTy = getType(val.type);
4091
4092 for (auto i = 0u; i < valTy.sizeInComponents; i++)
4093 {
4094 SIMD::Float whole, frac;
4095 std::tie(whole, frac) = Modf(val.Float(i));
4096 dst.move(i, frac);
4097 dst.move(i + valTy.sizeInComponents, whole);
4098 }
4099 break;
4100 }
4101 case GLSLstd450PackSnorm4x8:
4102 {
4103 auto val = GenericValue(this, state, insn.word(5));
4104 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
4105 SIMD::Int(0xFF)) |
4106 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
4107 SIMD::Int(0xFF)) << 8) |
4108 ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
4109 SIMD::Int(0xFF)) << 16) |
4110 ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
4111 SIMD::Int(0xFF)) << 24));
4112 break;
4113 }
4114 case GLSLstd450PackUnorm4x8:
4115 {
4116 auto val = GenericValue(this, state, insn.word(5));
4117 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
4118 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
4119 ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
4120 ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
4121 break;
4122 }
4123 case GLSLstd450PackSnorm2x16:
4124 {
4125 auto val = GenericValue(this, state, insn.word(5));
4126 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
4127 SIMD::Int(0xFFFF)) |
4128 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
4129 SIMD::Int(0xFFFF)) << 16));
4130 break;
4131 }
4132 case GLSLstd450PackUnorm2x16:
4133 {
4134 auto val = GenericValue(this, state, insn.word(5));
4135 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
4136 SIMD::UInt(0xFFFF)) |
4137 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
4138 SIMD::UInt(0xFFFF)) << 16));
4139 break;
4140 }
4141 case GLSLstd450PackHalf2x16:
4142 {
4143 auto val = GenericValue(this, state, insn.word(5));
4144 dst.move(0, FloatToHalfBits(val.UInt(0), false) | FloatToHalfBits(val.UInt(1), true));
4145 break;
4146 }
4147 case GLSLstd450UnpackSnorm4x8:
4148 {
4149 auto val = GenericValue(this, state, insn.word(5));
4150 dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4151 dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4152 dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4153 dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4154 break;
4155 }
4156 case GLSLstd450UnpackUnorm4x8:
4157 {
4158 auto val = GenericValue(this, state, insn.word(5));
4159 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
4160 dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
4161 dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
4162 dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
4163 break;
4164 }
4165 case GLSLstd450UnpackSnorm2x16:
4166 {
4167 auto val = GenericValue(this, state, insn.word(5));
4168 // clamp(f / 32767.0, -1.0, 1.0)
4169 dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
4170 SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4171 dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
4172 SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
4173 break;
4174 }
4175 case GLSLstd450UnpackUnorm2x16:
4176 {
4177 auto val = GenericValue(this, state, insn.word(5));
4178 // f / 65535.0
4179 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
4180 dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
4181 break;
4182 }
4183 case GLSLstd450UnpackHalf2x16:
4184 {
4185 auto val = GenericValue(this, state, insn.word(5));
4186 dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
4187 dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
4188 break;
4189 }
4190 case GLSLstd450Fma:
4191 {
4192 auto a = GenericValue(this, state, insn.word(5));
4193 auto b = GenericValue(this, state, insn.word(6));
4194 auto c = GenericValue(this, state, insn.word(7));
4195 for (auto i = 0u; i < type.sizeInComponents; i++)
4196 {
4197 dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
4198 }
4199 break;
4200 }
4201 case GLSLstd450Frexp:
4202 {
4203 auto val = GenericValue(this, state, insn.word(5));
4204 auto ptrId = Object::ID(insn.word(6));
4205 auto ptrTy = getType(getObject(ptrId).type);
4206 auto ptr = GetPointerToData(ptrId, 0, state);
4207 bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
4208 // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
4209 // to be in bounds even for inactive lanes.
4210 // - Clarify the SPIR-V spec.
4211 // - Eliminate lane masking and assume interleaving.
4212 auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
4213
4214 for (auto i = 0u; i < type.sizeInComponents; i++)
4215 {
4216 SIMD::Float significand;
4217 SIMD::Int exponent;
4218 std::tie(significand, exponent) = Frexp(val.Float(i));
4219
4220 dst.move(i, significand);
4221
4222 auto p = ptr + (i * sizeof(float));
4223 if (interleavedByLane) { p = interleaveByLane(p); }
4224 SIMD::Store(p, exponent, robustness, state->activeLaneMask());
4225 }
4226 break;
4227 }
4228 case GLSLstd450FrexpStruct:
4229 {
4230 auto val = GenericValue(this, state, insn.word(5));
4231 auto numComponents = getType(val.type).sizeInComponents;
4232 for (auto i = 0u; i < numComponents; i++)
4233 {
4234 auto significandAndExponent = Frexp(val.Float(i));
4235 dst.move(i, significandAndExponent.first);
4236 dst.move(i + numComponents, significandAndExponent.second);
4237 }
4238 break;
4239 }
4240 case GLSLstd450Ldexp:
4241 {
4242 auto significand = GenericValue(this, state, insn.word(5));
4243 auto exponent = GenericValue(this, state, insn.word(6));
4244 for (auto i = 0u; i < type.sizeInComponents; i++)
4245 {
4246 // Assumes IEEE 754
4247 auto in = significand.Float(i);
4248 auto significandExponent = Exponent(in);
4249 auto combinedExponent = exponent.Int(i) + significandExponent;
4250 auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
4251 auto isSignificandInf = SIMD::UInt(IsInf(in));
4252 auto isSignificandNaN = SIMD::UInt(IsNan(in));
4253 auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
4254 auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
4255 auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
4256
4257 SIMD::UInt v;
4258 v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
4259 v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
4260 v &= isExponentInBounds; // Clear v if the exponent is OOB.
4261
4262 v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
4263 v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
4264
4265 // If the input significand is zero, inf or nan, just return the
4266 // input significand.
4267 auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
4268 v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
4269
4270 dst.move(i, As<SIMD::Float>(v));
4271 }
4272 break;
4273 }
4274 case GLSLstd450Radians:
4275 {
4276 auto degrees = GenericValue(this, state, insn.word(5));
4277 for (auto i = 0u; i < type.sizeInComponents; i++)
4278 {
4279 dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
4280 }
4281 break;
4282 }
4283 case GLSLstd450Degrees:
4284 {
4285 auto radians = GenericValue(this, state, insn.word(5));
4286 for (auto i = 0u; i < type.sizeInComponents; i++)
4287 {
4288 dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
4289 }
4290 break;
4291 }
4292 case GLSLstd450Sin:
4293 {
4294 auto radians = GenericValue(this, state, insn.word(5));
4295 for (auto i = 0u; i < type.sizeInComponents; i++)
4296 {
4297 dst.move(i, Sin(radians.Float(i)));
4298 }
4299 break;
4300 }
4301 case GLSLstd450Cos:
4302 {
4303 auto radians = GenericValue(this, state, insn.word(5));
4304 for (auto i = 0u; i < type.sizeInComponents; i++)
4305 {
4306 dst.move(i, Cos(radians.Float(i)));
4307 }
4308 break;
4309 }
4310 case GLSLstd450Tan:
4311 {
4312 auto radians = GenericValue(this, state, insn.word(5));
4313 for (auto i = 0u; i < type.sizeInComponents; i++)
4314 {
4315 dst.move(i, Tan(radians.Float(i)));
4316 }
4317 break;
4318 }
4319 case GLSLstd450Asin:
4320 {
4321 auto val = GenericValue(this, state, insn.word(5));
4322 for (auto i = 0u; i < type.sizeInComponents; i++)
4323 {
4324 dst.move(i, Asin(val.Float(i)));
4325 }
4326 break;
4327 }
4328 case GLSLstd450Acos:
4329 {
4330 auto val = GenericValue(this, state, insn.word(5));
4331 for (auto i = 0u; i < type.sizeInComponents; i++)
4332 {
4333 dst.move(i, Acos(val.Float(i)));
4334 }
4335 break;
4336 }
4337 case GLSLstd450Atan:
4338 {
4339 auto val = GenericValue(this, state, insn.word(5));
4340 for (auto i = 0u; i < type.sizeInComponents; i++)
4341 {
4342 dst.move(i, Atan(val.Float(i)));
4343 }
4344 break;
4345 }
4346 case GLSLstd450Sinh:
4347 {
4348 auto val = GenericValue(this, state, insn.word(5));
4349 for (auto i = 0u; i < type.sizeInComponents; i++)
4350 {
4351 dst.move(i, Sinh(val.Float(i)));
4352 }
4353 break;
4354 }
4355 case GLSLstd450Cosh:
4356 {
4357 auto val = GenericValue(this, state, insn.word(5));
4358 for (auto i = 0u; i < type.sizeInComponents; i++)
4359 {
4360 dst.move(i, Cosh(val.Float(i)));
4361 }
4362 break;
4363 }
4364 case GLSLstd450Tanh:
4365 {
4366 auto val = GenericValue(this, state, insn.word(5));
4367 for (auto i = 0u; i < type.sizeInComponents; i++)
4368 {
4369 dst.move(i, Tanh(val.Float(i)));
4370 }
4371 break;
4372 }
4373 case GLSLstd450Asinh:
4374 {
4375 auto val = GenericValue(this, state, insn.word(5));
4376 for (auto i = 0u; i < type.sizeInComponents; i++)
4377 {
4378 dst.move(i, Asinh(val.Float(i)));
4379 }
4380 break;
4381 }
4382 case GLSLstd450Acosh:
4383 {
4384 auto val = GenericValue(this, state, insn.word(5));
4385 for (auto i = 0u; i < type.sizeInComponents; i++)
4386 {
4387 dst.move(i, Acosh(val.Float(i)));
4388 }
4389 break;
4390 }
4391 case GLSLstd450Atanh:
4392 {
4393 auto val = GenericValue(this, state, insn.word(5));
4394 for (auto i = 0u; i < type.sizeInComponents; i++)
4395 {
4396 dst.move(i, Atanh(val.Float(i)));
4397 }
4398 break;
4399 }
4400 case GLSLstd450Atan2:
4401 {
4402 auto x = GenericValue(this, state, insn.word(5));
4403 auto y = GenericValue(this, state, insn.word(6));
4404 for (auto i = 0u; i < type.sizeInComponents; i++)
4405 {
4406 dst.move(i, Atan2(x.Float(i), y.Float(i)));
4407 }
4408 break;
4409 }
4410 case GLSLstd450Pow:
4411 {
4412 auto x = GenericValue(this, state, insn.word(5));
4413 auto y = GenericValue(this, state, insn.word(6));
4414 for (auto i = 0u; i < type.sizeInComponents; i++)
4415 {
4416 dst.move(i, Pow(x.Float(i), y.Float(i)));
4417 }
4418 break;
4419 }
4420 case GLSLstd450Exp:
4421 {
4422 auto val = GenericValue(this, state, insn.word(5));
4423 for (auto i = 0u; i < type.sizeInComponents; i++)
4424 {
4425 dst.move(i, Exp(val.Float(i)));
4426 }
4427 break;
4428 }
4429 case GLSLstd450Log:
4430 {
4431 auto val = GenericValue(this, state, insn.word(5));
4432 for (auto i = 0u; i < type.sizeInComponents; i++)
4433 {
4434 dst.move(i, Log(val.Float(i)));
4435 }
4436 break;
4437 }
4438 case GLSLstd450Exp2:
4439 {
4440 auto val = GenericValue(this, state, insn.word(5));
4441 for (auto i = 0u; i < type.sizeInComponents; i++)
4442 {
4443 dst.move(i, Exp2(val.Float(i)));
4444 }
4445 break;
4446 }
4447 case GLSLstd450Log2:
4448 {
4449 auto val = GenericValue(this, state, insn.word(5));
4450 for (auto i = 0u; i < type.sizeInComponents; i++)
4451 {
4452 dst.move(i, Log2(val.Float(i)));
4453 }
4454 break;
4455 }
4456 case GLSLstd450Sqrt:
4457 {
4458 auto val = GenericValue(this, state, insn.word(5));
4459 for (auto i = 0u; i < type.sizeInComponents; i++)
4460 {
4461 dst.move(i, Sqrt(val.Float(i)));
4462 }
4463 break;
4464 }
4465 case GLSLstd450InverseSqrt:
4466 {
4467 auto val = GenericValue(this, state, insn.word(5));
4468 Decorations d;
4469 ApplyDecorationsForId(&d, insn.word(5));
4470 if (d.RelaxedPrecision)
4471 {
4472 for (auto i = 0u; i < type.sizeInComponents; i++)
4473 {
4474 dst.move(i, RcpSqrt_pp(val.Float(i)));
4475 }
4476 }
4477 else
4478 {
4479 for (auto i = 0u; i < type.sizeInComponents; i++)
4480 {
4481 dst.move(i, SIMD::Float(1.0f) / Sqrt(val.Float(i)));
4482 }
4483 }
4484 break;
4485 }
4486 case GLSLstd450Determinant:
4487 {
4488 auto mat = GenericValue(this, state, insn.word(5));
4489 auto numComponents = getType(mat.type).sizeInComponents;
4490 switch (numComponents)
4491 {
4492 case 4: // 2x2
4493 dst.move(0, Determinant(
4494 mat.Float(0), mat.Float(1),
4495 mat.Float(2), mat.Float(3)));
4496 break;
4497 case 9: // 3x3
4498 dst.move(0, Determinant(
4499 mat.Float(0), mat.Float(1), mat.Float(2),
4500 mat.Float(3), mat.Float(4), mat.Float(5),
4501 mat.Float(6), mat.Float(7), mat.Float(8)));
4502 break;
4503 case 16: // 4x4
4504 dst.move(0, Determinant(
4505 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4506 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4507 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4508 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
4509 break;
4510 default:
4511 UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
4512 }
4513 break;
4514 }
4515 case GLSLstd450MatrixInverse:
4516 {
4517 auto mat = GenericValue(this, state, insn.word(5));
4518 auto numComponents = getType(mat.type).sizeInComponents;
4519 switch (numComponents)
4520 {
4521 case 4: // 2x2
4522 {
4523 auto inv = MatrixInverse(
4524 mat.Float(0), mat.Float(1),
4525 mat.Float(2), mat.Float(3));
4526 for (uint32_t i = 0; i < inv.size(); i++)
4527 {
4528 dst.move(i, inv[i]);
4529 }
4530 break;
4531 }
4532 case 9: // 3x3
4533 {
4534 auto inv = MatrixInverse(
4535 mat.Float(0), mat.Float(1), mat.Float(2),
4536 mat.Float(3), mat.Float(4), mat.Float(5),
4537 mat.Float(6), mat.Float(7), mat.Float(8));
4538 for (uint32_t i = 0; i < inv.size(); i++)
4539 {
4540 dst.move(i, inv[i]);
4541 }
4542 break;
4543 }
4544 case 16: // 4x4
4545 {
4546 auto inv = MatrixInverse(
4547 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
4548 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
4549 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
4550 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
4551 for (uint32_t i = 0; i < inv.size(); i++)
4552 {
4553 dst.move(i, inv[i]);
4554 }
4555 break;
4556 }
4557 default:
4558 UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
4559 }
4560 break;
4561 }
4562 case GLSLstd450IMix:
4563 {
4564 UNREACHABLE("GLSLstd450IMix has been removed from the specification");
4565 break;
4566 }
4567 case GLSLstd450PackDouble2x32:
4568 {
4569 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
4570 break;
4571 }
4572 case GLSLstd450UnpackDouble2x32:
4573 {
4574 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
4575 break;
4576 }
4577 case GLSLstd450FindILsb:
4578 {
4579 auto val = GenericValue(this, state, insn.word(5));
4580 for (auto i = 0u; i < type.sizeInComponents; i++)
4581 {
4582 auto v = val.UInt(i);
4583 dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
4584 }
4585 break;
4586 }
4587 case GLSLstd450FindSMsb:
4588 {
4589 auto val = GenericValue(this, state, insn.word(5));
4590 for (auto i = 0u; i < type.sizeInComponents; i++)
4591 {
4592 auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
4593 dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
4594 }
4595 break;
4596 }
4597 case GLSLstd450FindUMsb:
4598 {
4599 auto val = GenericValue(this, state, insn.word(5));
4600 for (auto i = 0u; i < type.sizeInComponents; i++)
4601 {
4602 dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
4603 }
4604 break;
4605 }
4606 case GLSLstd450InterpolateAtCentroid:
4607 {
4608 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4609 break;
4610 }
4611 case GLSLstd450InterpolateAtSample:
4612 {
4613 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4614 break;
4615 }
4616 case GLSLstd450InterpolateAtOffset:
4617 {
4618 UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4619 break;
4620 }
4621 case GLSLstd450NMin:
4622 {
4623 auto x = GenericValue(this, state, insn.word(5));
4624 auto y = GenericValue(this, state, insn.word(6));
4625 for (auto i = 0u; i < type.sizeInComponents; i++)
4626 {
4627 dst.move(i, NMin(x.Float(i), y.Float(i)));
4628 }
4629 break;
4630 }
4631 case GLSLstd450NMax:
4632 {
4633 auto x = GenericValue(this, state, insn.word(5));
4634 auto y = GenericValue(this, state, insn.word(6));
4635 for (auto i = 0u; i < type.sizeInComponents; i++)
4636 {
4637 dst.move(i, NMax(x.Float(i), y.Float(i)));
4638 }
4639 break;
4640 }
4641 case GLSLstd450NClamp:
4642 {
4643 auto x = GenericValue(this, state, insn.word(5));
4644 auto minVal = GenericValue(this, state, insn.word(6));
4645 auto maxVal = GenericValue(this, state, insn.word(7));
4646 for (auto i = 0u; i < type.sizeInComponents; i++)
4647 {
4648 auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
4649 dst.move(i, clamp);
4650 }
4651 break;
4652 }
4653 default:
4654 UNREACHABLE("ExtInst %d", int(extInstIndex));
4655 break;
4656 }
4657
4658 return EmitResult::Continue;
4659 }
4660
4661 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
4662 {
4663 auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
4664 spv::MemorySemanticsAcquireMask |
4665 spv::MemorySemanticsReleaseMask |
4666 spv::MemorySemanticsAcquireReleaseMask |
4667 spv::MemorySemanticsSequentiallyConsistentMask
4668 );
4669 switch (control)
4670 {
4671 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
4672 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
4673 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
4674 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
4675 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
4676 default:
4677 // "it is invalid for more than one of these four bits to be set:
4678 // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
4679 UNREACHABLE("MemorySemanticsMask: %x", int(control));
4680 return std::memory_order_acq_rel;
4681 }
4682 }
4683
4684 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
4685 {
4686 SIMD::Float d = x.Float(0) * y.Float(0);
4687
4688 for (auto i = 1u; i < numComponents; i++)
4689 {
4690 d += x.Float(i) * y.Float(i);
4691 }
4692
4693 return d;
4694 }
4695
4696 SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
4697 {
4698 static const uint32_t mask_sign = 0x80000000u;
4699 static const uint32_t mask_round = ~0xfffu;
4700 static const uint32_t c_f32infty = 255 << 23;
4701 static const uint32_t c_magic = 15 << 23;
4702 static const uint32_t c_nanbit = 0x200;
4703 static const uint32_t c_infty_as_fp16 = 0x7c00;
4704 static const uint32_t c_clamp = (31 << 23) - 0x1000;
4705
4706 SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
4707 SIMD::UInt absf = floatBits ^ justsign;
4708 SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
4709
4710 // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
4711 // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
4712 SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
4713 As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
4714 ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) |
4715 SIMD::UInt(c_infty_as_fp16)));
4716
4717 return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
4718 }
4719
4720 std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
4721 {
4722 // Assumes IEEE 754
4723 auto v = As<SIMD::UInt>(val);
4724 auto isNotZero = CmpNEQ(v & SIMD::UInt(0x7FFFFFFF), SIMD::UInt(0));
4725 auto zeroSign = v & SIMD::UInt(0x80000000) & ~isNotZero;
4726 auto significand = As<SIMD::Float>((((v & SIMD::UInt(0x807FFFFF)) | SIMD::UInt(0x3F000000)) & isNotZero) | zeroSign);
4727 auto exponent = Exponent(val) & SIMD::Int(isNotZero);
4728 return std::make_pair(significand, exponent);
4729 }
4730
4731 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
4732 {
4733 auto &type = getType(insn.word(1));
4734 ASSERT(type.sizeInComponents == 1);
4735 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
4736 auto &srcType = getType(getObject(insn.word(3)).type);
4737 auto src = GenericValue(this, state, insn.word(3));
4738
4739 SIMD::UInt result = src.UInt(0);
4740
4741 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4742 {
4743 result |= src.UInt(i);
4744 }
4745
4746 dst.move(0, result);
4747 return EmitResult::Continue;
4748 }
4749
4750 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
4751 {
4752 auto &type = getType(insn.word(1));
4753 ASSERT(type.sizeInComponents == 1);
4754 auto &dst = state->createIntermediate(insn.word(2), type.sizeInComponents);
4755 auto &srcType = getType(getObject(insn.word(3)).type);
4756 auto src = GenericValue(this, state, insn.word(3));
4757
4758 SIMD::UInt result = src.UInt(0);
4759
4760 for (auto i = 1u; i < srcType.sizeInComponents; i++)
4761 {
4762 result &= src.UInt(i);
4763 }
4764
4765 dst.move(0, result);
4766 return EmitResult::Continue;
4767 }
4768
4769 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
4770 {
4771 auto target = Block::ID(insn.word(1));
4772 state->addActiveLaneMaskEdge(state->block, target, state->activeLaneMask());
4773 return EmitResult::Terminator;
4774 }
4775
4776 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
4777 {
4778 auto &function = getFunction(state->function);
4779 auto block = function.getBlock(state->block);
4780 ASSERT(block.branchInstruction == insn);
4781
4782 auto condId = Object::ID(block.branchInstruction.word(1));
4783 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
4784 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
4785
4786 auto cond = GenericValue(this, state, condId);
4787 ASSERT_MSG(getType(cond.type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
4788
4789 // TODO: Optimize for case where all lanes take same path.
4790
4791 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
4792 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
4793
4794 return EmitResult::Terminator;
4795 }
4796
4797 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
4798 {
4799 auto &function = getFunction(state->function);
4800 auto block = function.getBlock(state->block);
4801 ASSERT(block.branchInstruction == insn);
4802
4803 auto selId = Object::ID(block.branchInstruction.word(1));
4804
4805 auto sel = GenericValue(this, state, selId);
4806 ASSERT_MSG(getType(sel.type).sizeInComponents == 1, "Selector must be a scalar");
4807
4808 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
4809
4810 // TODO: Optimize for case where all lanes take same path.
4811
4812 SIMD::Int defaultLaneMask = state->activeLaneMask();
4813
4814 // Gather up the case label matches and calculate defaultLaneMask.
4815 std::vector<RValue<SIMD::Int>> caseLabelMatches;
4816 caseLabelMatches.reserve(numCases);
4817 for (uint32_t i = 0; i < numCases; i++)
4818 {
4819 auto label = block.branchInstruction.word(i * 2 + 3);
4820 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
4821 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
4822 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
4823 defaultLaneMask &= ~caseLabelMatch;
4824 }
4825
4826 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
4827 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
4828
4829 return EmitResult::Terminator;
4830 }
4831
4832 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
4833 {
4834 // TODO: Log something in this case?
4835 state->setActiveLaneMask(SIMD::Int(0));
4836 return EmitResult::Terminator;
4837 }
4838
4839 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
4840 {
4841 state->setActiveLaneMask(SIMD::Int(0));
4842 return EmitResult::Terminator;
4843 }
4844
4845 SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState *state) const
4846 {
4847 state->routine->killMask |= SignMask(state->activeLaneMask());
4848 state->setActiveLaneMask(SIMD::Int(0));
4849 return EmitResult::Terminator;
4850 }
4851
4852 SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState *state) const
4853 {
4854 auto functionId = Function::ID(insn.word(3));
4855 const auto& functionIt = functions.find(functionId);
4856 ASSERT(functionIt != functions.end());
4857 auto& function = functionIt->second;
4858
4859 // TODO(b/141246700): Add full support for spv::OpFunctionCall
4860 // The only supported function is a single OpKill wrapped in a
4861 // function, as a result of the "wrap OpKill" SPIRV-Tools pass
4862 ASSERT(function.blocks.size() == 1);
4863 spv::Op wrapOpKill[] = { spv::OpLabel, spv::OpKill };
4864
4865 for (auto block : function.blocks)
4866 {
4867 int insnNumber = 0;
4868 for (auto blockInsn : block.second)
4869 {
4870 if (insnNumber > 1)
4871 {
4872 UNIMPLEMENTED("Function block number of instructions: %d", insnNumber);
4873 return EmitResult::Continue;
4874 }
4875 if (blockInsn.opcode() != wrapOpKill[insnNumber++])
4876 {
4877 UNIMPLEMENTED("Function block instruction %d : %s", insnNumber - 1, OpcodeName(blockInsn.opcode()).c_str());
4878 return EmitResult::Continue;
4879 }
4880 if (blockInsn.opcode() == spv::OpKill)
4881 {
4882 EmitInstruction(blockInsn, state);
4883 }
4884 }
4885 }
4886
4887 return EmitResult::Continue;
4888 }
4889
4890 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
4891 {
4892 auto &function = getFunction(state->function);
4893 auto currentBlock = function.getBlock(state->block);
4894 if (!currentBlock.isLoopMerge)
4895 {
4896 // If this is a loop merge block, then don't attempt to update the
4897 // phi values from the ins. EmitLoop() has had to take special care
4898 // of this phi in order to correctly deal with divergent lanes.
4899 StorePhi(state->block, insn, state, currentBlock.ins);
4900 }
4901 LoadPhi(insn, state);
4902 return EmitResult::Continue;
4903 }
4904
4905 void SpirvShader::LoadPhi(InsnIterator insn, EmitState *state) const
4906 {
4907 auto typeId = Type::ID(insn.word(1));
4908 auto type = getType(typeId);
4909 auto objectId = Object::ID(insn.word(2));
4910
4911 auto storageIt = state->routine->phis.find(objectId);
4912 ASSERT(storageIt != state->routine->phis.end());
4913 auto &storage = storageIt->second;
4914
4915 auto &dst = state->createIntermediate(objectId, type.sizeInComponents);
4916 for(uint32_t i = 0; i < type.sizeInComponents; i++)
4917 {
4918 dst.move(i, storage[i]);
4919 }
4920 }
4921
4922 void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const
4923 {
4924 auto typeId = Type::ID(insn.word(1));
4925 auto type = getType(typeId);
4926 auto objectId = Object::ID(insn.word(2));
4927
4928 auto storageIt = state->routine->phis.find(objectId);
4929 ASSERT(storageIt != state->routine->phis.end());
4930 auto &storage = storageIt->second;
4931
4932 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
4933 {
4934 auto varId = Object::ID(insn.word(w + 0));
4935 auto blockId = Block::ID(insn.word(w + 1));
4936
4937 if (filter.count(blockId) == 0)
4938 {
4939 continue;
4940 }
4941
4942 auto mask = GetActiveLaneMaskEdge(state, blockId, currentBlock);
4943 auto in = GenericValue(this, state, varId);
4944
4945 for (uint32_t i = 0; i < type.sizeInComponents; i++)
4946 {
4947 storage[i] = As<SIMD::Float>((As<SIMD::Int>(storage[i]) & ~mask) | (in.Int(i) & mask));
4948 }
4949 }
4950 }
4951
4952 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
4953 {
4954 return EmitImageSample({variant, Implicit}, insn, state);
4955 }
4956
4957 SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
4958 {
4959 ImageInstruction instruction = {variant, Gather};
4960 instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
4961
4962 return EmitImageSample(instruction, insn, state);
4963 }
4964
4965 SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
4966 {
4967 auto isDref = (variant == Dref) || (variant == ProjDref);
4968 uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(isDref ? 6 : 5));
4969 imageOperands &= ~spv::ImageOperandsConstOffsetMask; // Dealt with later.
4970
4971 if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
4972 {
4973 return EmitImageSample({variant, Lod}, insn, state);
4974 }
4975 else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
4976 {
4977 return EmitImageSample({variant, Grad}, insn, state);
4978 }
4979 else UNIMPLEMENTED("Image Operands %x", imageOperands);
4980 return EmitResult::Continue;
4981 }
4982
4983 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
4984 {
4985 return EmitImageSample({None, Fetch}, insn, state);
4986 }
4987
4988 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
4989 {
4990 Type::ID resultTypeId = insn.word(1);
4991 Object::ID resultId = insn.word(2);
4992 Object::ID sampledImageId = insn.word(3); // For OpImageFetch this is just an Image, not a SampledImage.
4993 Object::ID coordinateId = insn.word(4);
4994 auto &resultType = getType(resultTypeId);
4995
4996 auto &result = state->createIntermediate(resultId, resultType.sizeInComponents);
4997 auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
4998
4999 // If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
5000 auto &sampledImage = getObject(sampledImageId);
5001 auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
5002 state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
5003
5004 auto coordinate = GenericValue(this, state, coordinateId);
5005 auto &coordinateType = getType(coordinate.type);
5006
5007 Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
5008 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
5009
5010 // Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
5011 // it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
5012 // it's just an Image operand, so there's no sampler descriptor data.
5013 if(getType(sampledImage.type).opcode() != spv::OpTypeSampledImage)
5014 {
5015 sampler = Pointer<Byte>(nullptr);
5016 }
5017
5018 uint32_t imageOperands = spv::ImageOperandsMaskNone;
5019 bool lodOrBias = false;
5020 Object::ID lodOrBiasId = 0;
5021 bool grad = false;
5022 Object::ID gradDxId = 0;
5023 Object::ID gradDyId = 0;
5024 bool constOffset = false;
5025 Object::ID offsetId = 0;
5026 bool sample = false;
5027 Object::ID sampleId = 0;
5028
5029 uint32_t operand = (instruction.isDref() || instruction.samplerMethod == Gather) ? 6 : 5;
5030
5031 if(insn.wordCount() > operand)
5032 {
5033 imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
5034
5035 if(imageOperands & spv::ImageOperandsBiasMask)
5036 {
5037 lodOrBias = true;
5038 lodOrBiasId = insn.word(operand);
5039 operand++;
5040 imageOperands &= ~spv::ImageOperandsBiasMask;
5041
5042 ASSERT(instruction.samplerMethod == Implicit);
5043 instruction.samplerMethod = Bias;
5044 }
5045
5046 if(imageOperands & spv::ImageOperandsLodMask)
5047 {
5048 lodOrBias = true;
5049 lodOrBiasId = insn.word(operand);
5050 operand++;
5051 imageOperands &= ~spv::ImageOperandsLodMask;
5052 }
5053
5054 if(imageOperands & spv::ImageOperandsGradMask)
5055 {
5056 ASSERT(!lodOrBias); // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
5057 grad = true;
5058 gradDxId = insn.word(operand + 0);
5059 gradDyId = insn.word(operand + 1);
5060 operand += 2;
5061 imageOperands &= ~spv::ImageOperandsGradMask;
5062 }
5063
5064 if(imageOperands & spv::ImageOperandsConstOffsetMask)
5065 {
5066 constOffset = true;
5067 offsetId = insn.word(operand);
5068 operand++;
5069 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
5070 }
5071
5072 if(imageOperands & spv::ImageOperandsSampleMask)
5073 {
5074 sample = true;
5075 sampleId = insn.word(operand);
5076 imageOperands &= ~spv::ImageOperandsSampleMask;
5077
5078 ASSERT(instruction.samplerMethod == Fetch);
5079 instruction.sample = true;
5080 }
5081
5082 if(imageOperands != 0)
5083 {
5084 UNSUPPORTED("Image operand %x", imageOperands);
5085 }
5086 }
5087
5088 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
5089
5090 uint32_t coordinates = coordinateType.sizeInComponents - instruction.isProj();
5091 instruction.coordinates = coordinates;
5092
5093 uint32_t i = 0;
5094 for( ; i < coordinates; i++)
5095 {
5096 if(instruction.isProj())
5097 {
5098 in[i] = coordinate.Float(i) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
5099 }
5100 else
5101 {
5102 in[i] = coordinate.Float(i);
5103 }
5104 }
5105
5106 if(instruction.isDref())
5107 {
5108 auto drefValue = GenericValue(this, state, insn.word(5));
5109
5110 if(instruction.isProj())
5111 {
5112 in[i] = drefValue.Float(0) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
5113 }
5114 else
5115 {
5116 in[i] = drefValue.Float(0);
5117 }
5118
5119 i++;
5120 }
5121
5122 if(lodOrBias)
5123 {
5124 auto lodValue = GenericValue(this, state, lodOrBiasId);
5125 in[i] = lodValue.Float(0);
5126 i++;
5127 }
5128 else if(grad)
5129 {
5130 auto dxValue = GenericValue(this, state, gradDxId);
5131 auto dyValue = GenericValue(this, state, gradDyId);
5132 auto &dxyType = getType(dxValue.type);
5133 ASSERT(dxyType.sizeInComponents == getType(dyValue.type).sizeInComponents);
5134
5135 instruction.grad = dxyType.sizeInComponents;
5136
5137 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
5138 {
5139 in[i] = dxValue.Float(j);
5140 }
5141
5142 for(uint32_t j = 0; j < dxyType.sizeInComponents; j++, i++)
5143 {
5144 in[i] = dyValue.Float(j);
5145 }
5146 }
5147 else if (instruction.samplerMethod == Fetch)
5148 {
5149 // The instruction didn't provide a lod operand, but the sampler's Fetch
5150 // function requires one to be present. If no lod is supplied, the default
5151 // is zero.
5152 in[i] = As<SIMD::Float>(SIMD::Int(0));
5153 i++;
5154 }
5155
5156 if(constOffset)
5157 {
5158 auto offsetValue = GenericValue(this, state, offsetId);
5159 auto &offsetType = getType(offsetValue.type);
5160
5161 instruction.offset = offsetType.sizeInComponents;
5162
5163 for(uint32_t j = 0; j < offsetType.sizeInComponents; j++, i++)
5164 {
5165 in[i] = As<SIMD::Float>(offsetValue.Int(j)); // Integer values, but transfered as float.
5166 }
5167 }
5168
5169 if(sample)
5170 {
5171 auto sampleValue = GenericValue(this, state, sampleId);
5172 in[i] = sampleValue.Float(0);
5173 }
5174
5175 auto cacheIt = state->routine->samplerCache.find(resultId);
5176 ASSERT(cacheIt != state->routine->samplerCache.end());
5177 auto &cache = cacheIt->second;
5178 auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
5179
5180 If(!cacheHit)
5181 {
5182 cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
5183 cache.imageDescriptor = imageDescriptor;
5184 cache.sampler = sampler;
5185 }
5186
5187 Array<SIMD::Float> out(4);
5188 Call<ImageSampler>(cache.function, texture, sampler, &in[0], &out[0], state->routine->constants);
5189
5190 for (auto i = 0u; i < resultType.sizeInComponents; i++) { result.move(i, out[i]); }
5191
5192 return EmitResult::Continue;
5193 }
5194
5195 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
5196 {
5197 auto &resultTy = getType(Type::ID(insn.word(1)));
5198 auto resultId = Object::ID(insn.word(2));
5199 auto imageId = Object::ID(insn.word(3));
5200 auto lodId = Object::ID(insn.word(4));
5201
5202 auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
5203 GetImageDimensions(state, resultTy, imageId, lodId, dst);
5204
5205 return EmitResult::Continue;
5206 }
5207
5208 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
5209 {
5210 auto &resultTy = getType(Type::ID(insn.word(1)));
5211 auto resultId = Object::ID(insn.word(2));
5212 auto imageId = Object::ID(insn.word(3));
5213 auto lodId = Object::ID(0);
5214
5215 auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
5216 GetImageDimensions(state, resultTy, imageId, lodId, dst);
5217
5218 return EmitResult::Continue;
5219 }
5220
5221 SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const
5222 {
5223 return EmitImageSample({None, Query}, insn, state);
5224 }
5225
5226 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
5227 {
5228 auto routine = state->routine;
5229 auto &image = getObject(imageId);
5230 auto &imageType = getType(image.type);
5231
5232 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5233 bool isArrayed = imageType.definition.word(5) != 0;
5234 bool isCubeMap = imageType.definition.word(3) == spv::DimCube;
5235
5236 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5237 auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5238 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5239
5240 Pointer<Byte> descriptor = state->getPointer(imageId).base;
5241
5242 Pointer<Int> extent;
5243 Int arrayLayers;
5244
5245 switch (bindingLayout.descriptorType)
5246 {
5247 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
5248 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
5249 {
5250 extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
5251 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t
5252 break;
5253 }
5254 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5255 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5256 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5257 {
5258 extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
5259 arrayLayers = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t
5260 break;
5261 }
5262 default:
5263 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5264 }
5265
5266 auto dimensions = resultTy.sizeInComponents - (isArrayed ? 1 : 0);
5267 std::vector<Int> out;
5268 if (lodId != 0)
5269 {
5270 auto lodVal = GenericValue(this, state, lodId);
5271 ASSERT(getType(lodVal.type).sizeInComponents == 1);
5272 auto lod = lodVal.Int(0);
5273 auto one = SIMD::Int(1);
5274 for (uint32_t i = 0; i < dimensions; i++)
5275 {
5276 dst.move(i, Max(SIMD::Int(extent[i]) >> lod, one));
5277 }
5278 }
5279 else
5280 {
5281 for (uint32_t i = 0; i < dimensions; i++)
5282 {
5283 dst.move(i, SIMD::Int(extent[i]));
5284 }
5285 }
5286
5287 if (isArrayed)
5288 {
5289 auto numElements = isCubeMap ? (arrayLayers / 6) : RValue<Int>(arrayLayers);
5290 dst.move(dimensions, SIMD::Int(numElements));
5291 }
5292 }
5293
5294 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
5295 {
5296 auto &resultTy = getType(Type::ID(insn.word(1)));
5297 ASSERT(resultTy.sizeInComponents == 1);
5298 auto resultId = Object::ID(insn.word(2));
5299 auto imageId = Object::ID(insn.word(3));
5300
5301 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5302 auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5303 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5304
5305 Pointer<Byte> descriptor = state->getPointer(imageId).base;
5306 Int mipLevels = 0;
5307 switch (bindingLayout.descriptorType)
5308 {
5309 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5310 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5311 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5312 mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
5313 break;
5314 default:
5315 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5316 }
5317
5318 auto &dst = state->createIntermediate(resultId, 1);
5319 dst.move(0, SIMD::Int(mipLevels));
5320
5321 return EmitResult::Continue;
5322 }
5323
5324 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
5325 {
5326 auto &resultTy = getType(Type::ID(insn.word(1)));
5327 ASSERT(resultTy.sizeInComponents == 1);
5328 auto resultId = Object::ID(insn.word(2));
5329 auto imageId = Object::ID(insn.word(3));
5330 auto imageTy = getType(getObject(imageId).type);
5331 ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
5332 ASSERT(imageTy.definition.word(3) == spv::Dim2D);
5333 ASSERT(imageTy.definition.word(6 /* MS */) == 1);
5334
5335 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5336 auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5337 auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5338
5339 Pointer<Byte> descriptor = state->getPointer(imageId).base;
5340 Int sampleCount = 0;
5341 switch (bindingLayout.descriptorType)
5342 {
5343 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
5344 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
5345 break;
5346 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5347 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5348 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5349 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
5350 break;
5351 default:
5352 UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5353 }
5354
5355 auto &dst = state->createIntermediate(resultId, 1);
5356 dst.move(0, SIMD::Int(sampleCount));
5357
5358 return EmitResult::Continue;
5359 }
5360
5361 SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
5362 {
5363 auto routine = state->routine;
5364 bool isArrayed = imageType.definition.word(5) != 0;
5365 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
5366 int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
5367
5368 SIMD::Int u = coordinate.Int(0);
5369 SIMD::Int v = SIMD::Int(0);
5370
5371 if (getType(coordinate.type).sizeInComponents > 1)
5372 {
5373 v = coordinate.Int(1);
5374 }
5375
5376 if (dim == spv::DimSubpassData)
5377 {
5378 u += routine->windowSpacePosition[0];
5379 v += routine->windowSpacePosition[1];
5380 }
5381
5382 if (useStencilAspect)
5383 {
5384 // Adjust addressing for quad layout. Pitches are already correct for the stencil aspect.
5385 // In the quad-layout block, pixel order is [x0,y0 x1,y0 x0,y1 x1,y1]
5386 u = ((v & SIMD::Int(1)) << 1) | ((u << 1) - (u & SIMD::Int(1)));
5387 v &= SIMD::Int(~1);
5388 }
5389
5390 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
5391 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
5392 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
5393 auto slicePitch = SIMD::Int(
5394 *Pointer<Int>(descriptor + (useStencilAspect
5395 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
5396 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
5397 auto samplePitch = SIMD::Int(
5398 *Pointer<Int>(descriptor + (useStencilAspect
5399 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
5400 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
5401
5402 ptr += u * SIMD::Int(texelSize);
5403 if (dims > 1)
5404 {
5405 ptr += v * rowPitch;
5406 }
5407 if (dims > 2)
5408 {
5409 ptr += coordinate.Int(2) * slicePitch;
5410 }
5411 if (isArrayed)
5412 {
5413 ptr += coordinate.Int(dims) * slicePitch;
5414 }
5415
5416 if (dim == spv::DimSubpassData)
5417 {
5418 // Multiview input attachment access is to the layer corresponding to the current view
5419 ptr += SIMD::Int(routine->viewID) * slicePitch;
5420 }
5421
5422 if (sampleId.value())
5423 {
5424 GenericValue sample(this, state, sampleId);
5425 ptr += sample.Int(0) * samplePitch;
5426 }
5427
5428 return ptr;
5429 }
5430
5431 void SpirvShader::Yield(YieldResult res) const
5432 {
5433 rr::Yield(RValue<Int>(int(res)));
5434 }
5435
5436 SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
5437 {
5438 auto &resultType = getType(Type::ID(insn.word(1)));
5439 auto imageId = Object::ID(insn.word(3));
5440 auto &image = getObject(imageId);
5441 auto &imageType = getType(image.type);
5442 Object::ID resultId = insn.word(2);
5443
5444 Object::ID sampleId = 0;
5445
5446 if (insn.wordCount() > 5)
5447 {
5448 int operand = 6;
5449 auto imageOperands = insn.word(5);
5450 if (imageOperands & spv::ImageOperandsSampleMask)
5451 {
5452 sampleId = insn.word(operand++);
5453 imageOperands &= ~spv::ImageOperandsSampleMask;
5454 }
5455
5456 // Should be no remaining image operands.
5457 ASSERT(!imageOperands);
5458 }
5459
5460 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5461 auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
5462
5463 auto coordinate = GenericValue(this, state, insn.word(4));
5464 const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5465
5466 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
5467 // the renderpass data instead. In all other cases, we can use the format in the instruction.
5468 auto vkFormat = (dim == spv::DimSubpassData)
5469 ? inputAttachmentFormats[d.InputAttachmentIndex]
5470 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
5471
5472 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
5473 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
5474 auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
5475 getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
5476
5477 if (useStencilAspect)
5478 {
5479 vkFormat = VK_FORMAT_S8_UINT;
5480 }
5481
5482 auto pointer = state->getPointer(imageId);
5483 Pointer<Byte> binding = pointer.base;
5484 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
5485 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
5486 : OFFSET(vk::StorageImageDescriptor, ptr)));
5487
5488 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5489
5490 auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5491
5492 auto texelSize = vk::Format(vkFormat).bytes();
5493 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
5494 auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
5495
5496 // "The value returned by a read of an invalid texel is undefined,
5497 // unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
5498 // TODO: Don't always assume a buffer resource.
5499 auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
5500
5501 SIMD::Int packed[4];
5502 // Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
5503 // of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
5504 // TODO: specialize for small formats?
5505 for (auto i = 0; i < (texelSize + 3)/4; i++)
5506 {
5507 packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
5508 texelPtr += sizeof(float);
5509 }
5510
5511 // Format support requirements here come from two sources:
5512 // - Minimum required set of formats for loads from storage images
5513 // - Any format supported as a color or depth/stencil attachment, for input attachments
5514 switch(vkFormat)
5515 {
5516 case VK_FORMAT_R32G32B32A32_SFLOAT:
5517 case VK_FORMAT_R32G32B32A32_SINT:
5518 case VK_FORMAT_R32G32B32A32_UINT:
5519 dst.move(0, packed[0]);
5520 dst.move(1, packed[1]);
5521 dst.move(2, packed[2]);
5522 dst.move(3, packed[3]);
5523 break;
5524 case VK_FORMAT_R32_SINT:
5525 case VK_FORMAT_R32_UINT:
5526 dst.move(0, packed[0]);
5527 // Fill remaining channels with 0,0,1 (of the correct type)
5528 dst.move(1, SIMD::Int(0));
5529 dst.move(2, SIMD::Int(0));
5530 dst.move(3, SIMD::Int(1));
5531 break;
5532 case VK_FORMAT_R32_SFLOAT:
5533 case VK_FORMAT_D32_SFLOAT:
5534 case VK_FORMAT_D32_SFLOAT_S8_UINT:
5535 dst.move(0, packed[0]);
5536 // Fill remaining channels with 0,0,1 (of the correct type)
5537 dst.move(1, SIMD::Float(0));
5538 dst.move(2, SIMD::Float(0));
5539 dst.move(3, SIMD::Float(1));
5540 break;
5541 case VK_FORMAT_D16_UNORM:
5542 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xffff)) * SIMD::Float(1.0f / 65535.0f));
5543 dst.move(1, SIMD::Float(0));
5544 dst.move(2, SIMD::Float(0));
5545 dst.move(3, SIMD::Float(1));
5546 break;
5547 case VK_FORMAT_R16G16B16A16_SINT:
5548 dst.move(0, (packed[0] << 16) >> 16);
5549 dst.move(1, (packed[0]) >> 16);
5550 dst.move(2, (packed[1] << 16) >> 16);
5551 dst.move(3, (packed[1]) >> 16);
5552 break;
5553 case VK_FORMAT_R16G16B16A16_UINT:
5554 dst.move(0, packed[0] & SIMD::Int(0xffff));
5555 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5556 dst.move(2, packed[1] & SIMD::Int(0xffff));
5557 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xffff));
5558 break;
5559 case VK_FORMAT_R16G16B16A16_SFLOAT:
5560 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5561 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
5562 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
5563 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
5564 break;
5565 case VK_FORMAT_R8G8B8A8_SNORM:
5566 dst.move(0, Min(Max(SIMD::Float(((packed[0]<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5567 dst.move(1, Min(Max(SIMD::Float(((packed[0]<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5568 dst.move(2, Min(Max(SIMD::Float(((packed[0]<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5569 dst.move(3, Min(Max(SIMD::Float(((packed[0]) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
5570 break;
5571 case VK_FORMAT_R8G8B8A8_UNORM:
5572 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
5573 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5574 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5575 dst.move(2, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5576 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5577 break;
5578 case VK_FORMAT_R8G8B8A8_SRGB:
5579 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
5580 dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5581 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5582 dst.move(2, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5583 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5584 break;
5585 case VK_FORMAT_B8G8R8A8_UNORM:
5586 dst.move(0, SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5587 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5588 dst.move(2, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5589 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5590 break;
5591 case VK_FORMAT_B8G8R8A8_SRGB:
5592 dst.move(0, ::sRGBtoLinear(SIMD::Float(((packed[0]>>16) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5593 dst.move(1, ::sRGBtoLinear(SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5594 dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f)));
5595 dst.move(3, SIMD::Float(((packed[0]>>24) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5596 break;
5597 case VK_FORMAT_R8G8B8A8_UINT:
5598 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
5599 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5600 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5601 dst.move(2, ((As<SIMD::UInt>(packed[0])>>16) & SIMD::UInt(0xFF)));
5602 dst.move(3, ((As<SIMD::UInt>(packed[0])>>24) & SIMD::UInt(0xFF)));
5603 break;
5604 case VK_FORMAT_R8G8B8A8_SINT:
5605 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
5606 dst.move(0, (packed[0] << 24) >> 24);
5607 dst.move(1, (packed[0] << 16) >> 24);
5608 dst.move(2, (packed[0] << 8) >> 24);
5609 dst.move(3, (packed[0]) >> 24);
5610 break;
5611 case VK_FORMAT_R8_UNORM:
5612 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5613 dst.move(1, SIMD::Float(0));
5614 dst.move(2, SIMD::Float(0));
5615 dst.move(3, SIMD::Float(1));
5616 break;
5617 case VK_FORMAT_R8_UINT:
5618 case VK_FORMAT_S8_UINT:
5619 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5620 dst.move(1, SIMD::UInt(0));
5621 dst.move(2, SIMD::UInt(0));
5622 dst.move(3, SIMD::UInt(1));
5623 break;
5624 case VK_FORMAT_R8_SINT:
5625 dst.move(0, (packed[0] << 24) >> 24);
5626 dst.move(1, SIMD::Int(0));
5627 dst.move(2, SIMD::Int(0));
5628 dst.move(3, SIMD::Int(1));
5629 break;
5630 case VK_FORMAT_R8G8_UNORM:
5631 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5632 dst.move(1, SIMD::Float(((packed[0]>>8) & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 255.f));
5633 dst.move(2, SIMD::Float(0));
5634 dst.move(3, SIMD::Float(1));
5635 break;
5636 case VK_FORMAT_R8G8_UINT:
5637 dst.move(0, (As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF)));
5638 dst.move(1, ((As<SIMD::UInt>(packed[0])>>8) & SIMD::UInt(0xFF)));
5639 dst.move(2, SIMD::UInt(0));
5640 dst.move(3, SIMD::UInt(1));
5641 break;
5642 case VK_FORMAT_R8G8_SINT:
5643 dst.move(0, (packed[0] << 24) >> 24);
5644 dst.move(1, (packed[0] << 16) >> 24);
5645 dst.move(2, SIMD::Int(0));
5646 dst.move(3, SIMD::Int(1));
5647 break;
5648 case VK_FORMAT_R16_SFLOAT:
5649 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5650 dst.move(1, SIMD::Float(0));
5651 dst.move(2, SIMD::Float(0));
5652 dst.move(3, SIMD::Float(1));
5653 break;
5654 case VK_FORMAT_R16_UINT:
5655 dst.move(0, packed[0] & SIMD::Int(0xffff));
5656 dst.move(1, SIMD::UInt(0));
5657 dst.move(2, SIMD::UInt(0));
5658 dst.move(3, SIMD::UInt(1));
5659 break;
5660 case VK_FORMAT_R16_SINT:
5661 dst.move(0, (packed[0] << 16) >> 16);
5662 dst.move(1, SIMD::Int(0));
5663 dst.move(2, SIMD::Int(0));
5664 dst.move(3, SIMD::Int(1));
5665 break;
5666 case VK_FORMAT_R16G16_SFLOAT:
5667 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
5668 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
5669 dst.move(2, SIMD::Float(0));
5670 dst.move(3, SIMD::Float(1));
5671 break;
5672 case VK_FORMAT_R16G16_UINT:
5673 dst.move(0, packed[0] & SIMD::Int(0xffff));
5674 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xffff));
5675 dst.move(2, SIMD::UInt(0));
5676 dst.move(3, SIMD::UInt(1));
5677 break;
5678 case VK_FORMAT_R16G16_SINT:
5679 dst.move(0, (packed[0] << 16) >> 16);
5680 dst.move(1, (packed[0]) >> 16);
5681 dst.move(2, SIMD::Int(0));
5682 dst.move(3, SIMD::Int(1));
5683 break;
5684 case VK_FORMAT_R32G32_SINT:
5685 case VK_FORMAT_R32G32_UINT:
5686 dst.move(0, packed[0]);
5687 dst.move(1, packed[1]);
5688 dst.move(2, SIMD::Int(0));
5689 dst.move(3, SIMD::Int(1));
5690 break;
5691 case VK_FORMAT_R32G32_SFLOAT:
5692 dst.move(0, packed[0]);
5693 dst.move(1, packed[1]);
5694 dst.move(2, SIMD::Float(0));
5695 dst.move(3, SIMD::Float(1));
5696 break;
5697 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
5698 dst.move(0, (packed[0]) & SIMD::Int(0x3FF));
5699 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
5700 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
5701 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
5702 break;
5703 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
5704 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5705 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5706 dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
5707 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
5708 break;
5709 case VK_FORMAT_R5G6B5_UNORM_PACK16:
5710 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5711 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
5712 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5713 dst.move(3, SIMD::Float(1));
5714 break;
5715 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
5716 dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5717 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5718 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
5719 dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
5720 break;
5721 default:
5722 UNIMPLEMENTED("VkFormat %d", int(vkFormat));
5723 break;
5724 }
5725
5726 return EmitResult::Continue;
5727 }
5728
5729 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
5730 {
5731 auto imageId = Object::ID(insn.word(1));
5732 auto &image = getObject(imageId);
5733 auto &imageType = getType(image.type);
5734
5735 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5736
5737 // TODO(b/131171141): Not handling any image operands yet.
5738 ASSERT(insn.wordCount() == 4);
5739
5740 auto coordinate = GenericValue(this, state, insn.word(2));
5741 auto texel = GenericValue(this, state, insn.word(3));
5742
5743 Pointer<Byte> binding = state->getPointer(imageId).base;
5744 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
5745 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5746
5747 SIMD::Int packed[4];
5748 auto numPackedElements = 0u;
5749 int texelSize = 0;
5750 auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
5751 switch (format)
5752 {
5753 case spv::ImageFormatRgba32f:
5754 case spv::ImageFormatRgba32i:
5755 case spv::ImageFormatRgba32ui:
5756 texelSize = 16;
5757 packed[0] = texel.Int(0);
5758 packed[1] = texel.Int(1);
5759 packed[2] = texel.Int(2);
5760 packed[3] = texel.Int(3);
5761 numPackedElements = 4;
5762 break;
5763 case spv::ImageFormatR32f:
5764 case spv::ImageFormatR32i:
5765 case spv::ImageFormatR32ui:
5766 texelSize = 4;
5767 packed[0] = texel.Int(0);
5768 numPackedElements = 1;
5769 break;
5770 case spv::ImageFormatRgba8:
5771 texelSize = 4;
5772 packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
5773 ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
5774 ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
5775 ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
5776 numPackedElements = 1;
5777 break;
5778 case spv::ImageFormatRgba8Snorm:
5779 texelSize = 4;
5780 packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5781 SIMD::Int(0xFF)) |
5782 ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5783 SIMD::Int(0xFF)) << 8) |
5784 ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5785 SIMD::Int(0xFF)) << 16) |
5786 ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
5787 SIMD::Int(0xFF)) << 24);
5788 numPackedElements = 1;
5789 break;
5790 case spv::ImageFormatRgba8i:
5791 case spv::ImageFormatRgba8ui:
5792 texelSize = 4;
5793 packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
5794 (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
5795 (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
5796 (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
5797 numPackedElements = 1;
5798 break;
5799 case spv::ImageFormatRgba16f:
5800 texelSize = 8;
5801 packed[0] = FloatToHalfBits(texel.UInt(0), false) | FloatToHalfBits(texel.UInt(1), true);
5802 packed[1] = FloatToHalfBits(texel.UInt(2), false) | FloatToHalfBits(texel.UInt(3), true);
5803 numPackedElements = 2;
5804 break;
5805 case spv::ImageFormatRgba16i:
5806 case spv::ImageFormatRgba16ui:
5807 texelSize = 8;
5808 packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xffff)) << 16);
5809 packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xffff)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xffff)) << 16);
5810 numPackedElements = 2;
5811 break;
5812 case spv::ImageFormatRg32f:
5813 case spv::ImageFormatRg32i:
5814 case spv::ImageFormatRg32ui:
5815 texelSize = 8;
5816 packed[0] = texel.Int(0);
5817 packed[1] = texel.Int(1);
5818 numPackedElements = 2;
5819 break;
5820
5821 case spv::ImageFormatRg16f:
5822 case spv::ImageFormatR11fG11fB10f:
5823 case spv::ImageFormatR16f:
5824 case spv::ImageFormatRgba16:
5825 case spv::ImageFormatRgb10A2:
5826 case spv::ImageFormatRg16:
5827 case spv::ImageFormatRg8:
5828 case spv::ImageFormatR16:
5829 case spv::ImageFormatR8:
5830 case spv::ImageFormatRgba16Snorm:
5831 case spv::ImageFormatRg16Snorm:
5832 case spv::ImageFormatRg8Snorm:
5833 case spv::ImageFormatR16Snorm:
5834 case spv::ImageFormatR8Snorm:
5835 case spv::ImageFormatRg16i:
5836 case spv::ImageFormatRg8i:
5837 case spv::ImageFormatR16i:
5838 case spv::ImageFormatR8i:
5839 case spv::ImageFormatRgb10a2ui:
5840 case spv::ImageFormatRg16ui:
5841 case spv::ImageFormatRg8ui:
5842 case spv::ImageFormatR16ui:
5843 case spv::ImageFormatR8ui:
5844 UNIMPLEMENTED("spv::ImageFormat %d", int(format));
5845 break;
5846
5847 default:
5848 UNREACHABLE("spv::ImageFormat %d", int(format));
5849 break;
5850 }
5851
5852 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
5853 auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false);
5854
5855 // SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
5856 auto robustness = OutOfBoundsBehavior::UndefinedValue;
5857
5858 for (auto i = 0u; i < numPackedElements; i++)
5859 {
5860 SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask());
5861 texelPtr += sizeof(float);
5862 }
5863
5864 return EmitResult::Continue;
5865 }
5866
5867 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState *state) const
5868 {
5869 auto &resultType = getType(Type::ID(insn.word(1)));
5870 auto imageId = Object::ID(insn.word(3));
5871 auto &image = getObject(imageId);
5872 // Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
5873 // Look through to get the actual image type.
5874 auto &imageType = getType(getType(image.type).element);
5875 Object::ID resultId = insn.word(2);
5876
5877 ASSERT(imageType.opcode() == spv::OpTypeImage);
5878 ASSERT(resultType.storageClass == spv::StorageClassImage);
5879 ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
5880
5881 auto coordinate = GenericValue(this, state, insn.word(4));
5882
5883 Pointer<Byte> binding = state->getPointer(imageId).base;
5884 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
5885 auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5886
5887 auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
5888 auto ptr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, sizeof(uint32_t), 0, false);
5889
5890 state->createPointer(resultId, ptr);
5891
5892 return EmitResult::Continue;
5893 }
5894
5895 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
5896 {
5897 // Propagate the image pointer in both cases.
5898 // Consumers of OpSampledImage will look through to find the sampler pointer.
5899
5900 Object::ID resultId = insn.word(2);
5901 Object::ID imageId = insn.word(3);
5902
5903 state->createPointer(resultId, state->getPointer(imageId));
5904
5905 return EmitResult::Continue;
5906 }
5907
5908 SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState *state) const
5909 {
5910 auto &resultType = getType(Type::ID(insn.word(1)));
5911 Object::ID resultId = insn.word(2);
5912 Object::ID semanticsId = insn.word(5);
5913 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
5914 auto memoryOrder = MemoryOrder(memorySemantics);
5915 // Where no value is provided (increment/decrement) use an implicit value of 1.
5916 auto value = (insn.wordCount() == 7) ? GenericValue(this, state, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
5917 auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5918 auto ptr = state->getPointer(insn.word(3));
5919 auto ptrOffsets = ptr.offsets();
5920
5921 SIMD::UInt x(0);
5922 auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
5923 for (int j = 0; j < SIMD::Width; j++)
5924 {
5925 If(Extract(mask, j) != 0)
5926 {
5927 auto offset = Extract(ptrOffsets, j);
5928 auto laneValue = Extract(value, j);
5929 UInt v;
5930 switch (insn.opcode())
5931 {
5932 case spv::OpAtomicIAdd:
5933 case spv::OpAtomicIIncrement:
5934 v = AddAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5935 break;
5936 case spv::OpAtomicISub:
5937 case spv::OpAtomicIDecrement:
5938 v = SubAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5939 break;
5940 case spv::OpAtomicAnd:
5941 v = AndAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5942 break;
5943 case spv::OpAtomicOr:
5944 v = OrAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5945 break;
5946 case spv::OpAtomicXor:
5947 v = XorAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5948 break;
5949 case spv::OpAtomicSMin:
5950 v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
5951 break;
5952 case spv::OpAtomicSMax:
5953 v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base[offset]), As<Int>(laneValue), memoryOrder));
5954 break;
5955 case spv::OpAtomicUMin:
5956 v = MinAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5957 break;
5958 case spv::OpAtomicUMax:
5959 v = MaxAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5960 break;
5961 case spv::OpAtomicExchange:
5962 v = ExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, memoryOrder);
5963 break;
5964 default:
5965 UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
5966 break;
5967 }
5968 x = Insert(x, v, j);
5969 }
5970 }
5971
5972 dst.move(0, x);
5973 return EmitResult::Continue;
5974 }
5975
5976 SpirvShader::EmitResult SpirvShader::EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const
5977 {
5978 // Separate from EmitAtomicOp due to different instruction encoding
5979 auto &resultType = getType(Type::ID(insn.word(1)));
5980 Object::ID resultId = insn.word(2);
5981
5982 auto memorySemanticsEqual = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(5)).constantValue[0]);
5983 auto memoryOrderEqual = MemoryOrder(memorySemanticsEqual);
5984 auto memorySemanticsUnequal = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(6)).constantValue[0]);
5985 auto memoryOrderUnequal = MemoryOrder(memorySemanticsUnequal);
5986
5987 auto value = GenericValue(this, state, insn.word(7));
5988 auto comparator = GenericValue(this, state, insn.word(8));
5989 auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5990 auto ptr = state->getPointer(insn.word(3));
5991 auto ptrOffsets = ptr.offsets();
5992
5993 SIMD::UInt x(0);
5994 auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
5995 for (int j = 0; j < SIMD::Width; j++)
5996 {
5997 If(Extract(mask, j) != 0)
5998 {
5999 auto offset = Extract(ptrOffsets, j);
6000 auto laneValue = Extract(value.UInt(0), j);
6001 auto laneComparator = Extract(comparator.UInt(0), j);
6002 UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
6003 x = Insert(x, v, j);
6004 }
6005 }
6006
6007 dst.move(0, x);
6008 return EmitResult::Continue;
6009 }
6010
6011 SpirvShader::EmitResult SpirvShader::EmitCopyObject(InsnIterator insn, EmitState *state) const
6012 {
6013 auto ty = getType(insn.word(1));
6014 auto &dst = state->createIntermediate(insn.word(2), ty.sizeInComponents);
6015 auto src = GenericValue(this, state, insn.word(3));
6016 for (uint32_t i = 0; i < ty.sizeInComponents; i++)
6017 {
6018 dst.move(i, src.Int(i));
6019 }
6020 return EmitResult::Continue;
6021 }
6022
6023 SpirvShader::EmitResult SpirvShader::EmitCopyMemory(InsnIterator insn, EmitState *state) const
6024 {
6025 Object::ID dstPtrId = insn.word(1);
6026 Object::ID srcPtrId = insn.word(2);
6027 auto &dstPtrTy = getType(getObject(dstPtrId).type);
6028 auto &srcPtrTy = getType(getObject(srcPtrId).type);
6029 ASSERT(dstPtrTy.element == srcPtrTy.element);
6030
6031 bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
6032 bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
6033 auto dstPtr = GetPointerToData(dstPtrId, 0, state);
6034 auto srcPtr = GetPointerToData(srcPtrId, 0, state);
6035
6036 std::unordered_map<uint32_t, uint32_t> srcOffsets;
6037
6038 VisitMemoryObject(srcPtrId, [&](uint32_t i, uint32_t srcOffset) { srcOffsets[i] = srcOffset; });
6039
6040 VisitMemoryObject(dstPtrId, [&](uint32_t i, uint32_t dstOffset)
6041 {
6042 auto it = srcOffsets.find(i);
6043 ASSERT(it != srcOffsets.end());
6044 auto srcOffset = it->second;
6045
6046 auto dst = dstPtr + dstOffset;
6047 auto src = srcPtr + srcOffset;
6048 if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
6049 if (srcInterleavedByLane) { src = interleaveByLane(src); }
6050
6051 // TODO(b/131224163): Optimize based on src/dst storage classes.
6052 auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
6053
6054 auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask());
6055 SIMD::Store(dst, value, robustness, state->activeLaneMask());
6056 });
6057 return EmitResult::Continue;
6058 }
6059
6060 SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState *state) const
6061 {
6062 auto executionScope = spv::Scope(GetConstScalarInt(insn.word(1)));
6063 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(3)));
6064 // TODO: We probably want to consider the memory scope here. For now,
6065 // just always emit the full fence.
6066 Fence(semantics);
6067
6068 switch (executionScope)
6069 {
6070 case spv::ScopeWorkgroup:
6071 Yield(YieldResult::ControlBarrier);
6072 break;
6073 case spv::ScopeSubgroup:
6074 break;
6075 default:
6076 // See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
6077 UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
6078 break;
6079 }
6080
6081 return EmitResult::Continue;
6082 }
6083
6084 SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
6085 {
6086 auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
6087 // TODO: We probably want to consider the memory scope here. For now,
6088 // just always emit the full fence.
6089 Fence(semantics);
6090 return EmitResult::Continue;
6091 }
6092
6093 void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
6094 {
6095 if (semantics == spv::MemorySemanticsMaskNone)
6096 {
6097 return; //no-op
6098 }
6099 rr::Fence(MemoryOrder(semantics));
6100 }
6101
6102 SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
6103 {
6104 static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
6105
6106 auto &type = getType(Type::ID(insn.word(1)));
6107 Object::ID resultId = insn.word(2);
6108 auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
6109 ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
6110
6111 auto &dst = state->createIntermediate(resultId, type.sizeInComponents);
6112
6113 switch (insn.opcode())
6114 {
6115 case spv::OpGroupNonUniformElect:
6116 {
6117 // Result is true only in the active invocation with the lowest id
6118 // in the group, otherwise result is false.
6119 SIMD::Int active = state->activeLaneMask();
6120 // TODO: Would be nice if we could write this as:
6121 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
6122 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
6123 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
6124 dst.move(0, elect);
6125 break;
6126 }
6127
6128 case spv::OpGroupNonUniformAll:
6129 {
6130 GenericValue predicate(this, state, insn.word(4));
6131 dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(state->activeLaneMask())));
6132 break;
6133 }
6134
6135 case spv::OpGroupNonUniformAny:
6136 {
6137 GenericValue predicate(this, state, insn.word(4));
6138 dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(state->activeLaneMask())));
6139 break;
6140 }
6141
6142 case spv::OpGroupNonUniformAllEqual:
6143 {
6144 GenericValue value(this, state, insn.word(4));
6145 auto res = SIMD::UInt(0xffffffff);
6146 SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
6147 SIMD::UInt inactive = ~active;
6148 for (auto i = 0u; i < type.sizeInComponents; i++)
6149 {
6150 SIMD::UInt v = value.UInt(i) & active;
6151 SIMD::UInt filled = v;
6152 for (int j = 0; j < SIMD::Width - 1; j++)
6153 {
6154 filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
6155 }
6156 res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
6157 }
6158 dst.move(0, res);
6159 break;
6160 }
6161
6162 case spv::OpGroupNonUniformBroadcast:
6163 {
6164 auto valueId = Object::ID(insn.word(4));
6165 auto id = SIMD::Int(GetConstScalarInt(insn.word(5)));
6166 GenericValue value(this, state, valueId);
6167 auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
6168 for (auto i = 0u; i < type.sizeInComponents; i++)
6169 {
6170 dst.move(i, OrAll(value.Int(i) & mask));
6171 }
6172 break;
6173 }
6174
6175 case spv::OpGroupNonUniformBroadcastFirst:
6176 {
6177 auto valueId = Object::ID(insn.word(4));
6178 GenericValue value(this, state, valueId);
6179 // Result is true only in the active invocation with the lowest id
6180 // in the group, otherwise result is false.
6181 SIMD::Int active = state->activeLaneMask();
6182 // TODO: Would be nice if we could write this as:
6183 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
6184 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
6185 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
6186 for (auto i = 0u; i < type.sizeInComponents; i++)
6187 {
6188 dst.move(i, OrAll(value.Int(i) & elect));
6189 }
6190 break;
6191 }
6192
6193 case spv::OpGroupNonUniformBallot:
6194 {
6195 ASSERT(type.sizeInComponents == 4);
6196 GenericValue predicate(this, state, insn.word(4));
6197 dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0))));
6198 dst.move(1, SIMD::Int(0));
6199 dst.move(2, SIMD::Int(0));
6200 dst.move(3, SIMD::Int(0));
6201 break;
6202 }
6203
6204 case spv::OpGroupNonUniformInverseBallot:
6205 {
6206 auto valueId = Object::ID(insn.word(4));
6207 ASSERT(type.sizeInComponents == 1);
6208 ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
6209 GenericValue value(this, state, valueId);
6210 auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
6211 dst.move(0, -bit);
6212 break;
6213 }
6214
6215 case spv::OpGroupNonUniformBallotBitExtract:
6216 {
6217 auto valueId = Object::ID(insn.word(4));
6218 auto indexId = Object::ID(insn.word(5));
6219 ASSERT(type.sizeInComponents == 1);
6220 ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
6221 ASSERT(getType(getObject(indexId).type).sizeInComponents == 1);
6222 GenericValue value(this, state, valueId);
6223 GenericValue index(this, state, indexId);
6224 auto vecIdx = index.Int(0) / SIMD::Int(32);
6225 auto bitIdx = index.Int(0) & SIMD::Int(31);
6226 auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
6227 (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
6228 (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
6229 (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
6230 dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
6231 break;
6232 }
6233
6234 case spv::OpGroupNonUniformBallotBitCount:
6235 {
6236 auto operation = spv::GroupOperation(insn.word(4));
6237 auto valueId = Object::ID(insn.word(5));
6238 ASSERT(type.sizeInComponents == 1);
6239 ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
6240 GenericValue value(this, state, valueId);
6241 switch (operation)
6242 {
6243 case spv::GroupOperationReduce:
6244 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
6245 break;
6246 case spv::GroupOperationInclusiveScan:
6247 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
6248 break;
6249 case spv::GroupOperationExclusiveScan:
6250 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
6251 break;
6252 default:
6253 UNSUPPORTED("GroupOperation %d", int(operation));
6254 }
6255 break;
6256 }
6257
6258 case spv::OpGroupNonUniformBallotFindLSB:
6259 {
6260 auto valueId = Object::ID(insn.word(4));
6261 ASSERT(type.sizeInComponents == 1);
6262 ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
6263 GenericValue value(this, state, valueId);
6264 dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true));
6265 break;
6266 }
6267
6268 case spv::OpGroupNonUniformBallotFindMSB:
6269 {
6270 auto valueId = Object::ID(insn.word(4));
6271 ASSERT(type.sizeInComponents == 1);
6272 ASSERT(getType(getObject(valueId).type).sizeInComponents == 4);
6273 GenericValue value(this, state, valueId);
6274 dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
6275 break;
6276 }
6277
6278 case spv::OpGroupNonUniformShuffle:
6279 {
6280 GenericValue value(this, state, insn.word(4));
6281 GenericValue id(this, state, insn.word(5));
6282 auto x = CmpEQ(SIMD::Int(0), id.Int(0));
6283 auto y = CmpEQ(SIMD::Int(1), id.Int(0));
6284 auto z = CmpEQ(SIMD::Int(2), id.Int(0));
6285 auto w = CmpEQ(SIMD::Int(3), id.Int(0));
6286 for (auto i = 0u; i < type.sizeInComponents; i++)
6287 {
6288 SIMD::Int v = value.Int(i);
6289 dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
6290 }
6291 break;
6292 }
6293
6294 case spv::OpGroupNonUniformShuffleXor:
6295 {
6296 GenericValue value(this, state, insn.word(4));
6297 GenericValue mask(this, state, insn.word(5));
6298 auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
6299 auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
6300 auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
6301 auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
6302 for (auto i = 0u; i < type.sizeInComponents; i++)
6303 {
6304 SIMD::Int v = value.Int(i);
6305 dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
6306 }
6307 break;
6308 }
6309
6310 case spv::OpGroupNonUniformShuffleUp:
6311 {
6312 GenericValue value(this, state, insn.word(4));
6313 GenericValue delta(this, state, insn.word(5));
6314 auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
6315 auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
6316 auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
6317 auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
6318 for (auto i = 0u; i < type.sizeInComponents; i++)
6319 {
6320 SIMD::Int v = value.Int(i);
6321 dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
6322 }
6323 break;
6324 }
6325
6326 case spv::OpGroupNonUniformShuffleDown:
6327 {
6328 GenericValue value(this, state, insn.word(4));
6329 GenericValue delta(this, state, insn.word(5));
6330 auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
6331 auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
6332 auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
6333 auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
6334 for (auto i = 0u; i < type.sizeInComponents; i++)
6335 {
6336 SIMD::Int v = value.Int(i);
6337 dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
6338 }
6339 break;
6340 }
6341
6342 default:
6343 UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
6344 }
6345 return EmitResult::Continue;
6346 }
6347
6348 SpirvShader::EmitResult SpirvShader::EmitArrayLength(InsnIterator insn, EmitState *state) const
6349 {
6350 auto resultTyId = Type::ID(insn.word(1));
6351 auto resultId = Object::ID(insn.word(2));
6352 auto structPtrId = Object::ID(insn.word(3));
6353 auto arrayFieldIdx = insn.word(4);
6354
6355 auto &resultType = getType(resultTyId);
6356 ASSERT(resultType.sizeInComponents == 1);
6357 ASSERT(resultType.definition.opcode() == spv::OpTypeInt);
6358
6359 auto &structPtrTy = getType(getObject(structPtrId).type);
6360 auto &structTy = getType(structPtrTy.element);
6361 auto &arrayTy = getType(structTy.definition.word(2 + arrayFieldIdx));
6362 ASSERT(arrayTy.definition.opcode() == spv::OpTypeRuntimeArray);
6363 auto &arrayElTy = getType(arrayTy.element);
6364
6365 auto &result = state->createIntermediate(resultId, 1);
6366 auto structBase = GetPointerToData(structPtrId, 0, state);
6367
6368 Decorations d = {};
6369 ApplyDecorationsForIdMember(&d, structPtrTy.element, arrayFieldIdx);
6370 ASSERT(d.HasOffset);
6371
6372 auto arrayBase = structBase + d.Offset;
6373 auto arraySizeInBytes = SIMD::Int(arrayBase.limit()) - arrayBase.offsets();
6374 auto arrayLength = arraySizeInBytes / SIMD::Int(arrayElTy.sizeInComponents * sizeof(float));
6375
6376 result.move(0, SIMD::Int(arrayLength));
6377
6378 return EmitResult::Continue;
6379 }
6380
6381 uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
6382 {
6383 auto &scopeObj = getObject(id);
6384 ASSERT(scopeObj.kind == Object::Kind::Constant);
6385 ASSERT(getType(scopeObj.type).sizeInComponents == 1);
6386 return scopeObj.constantValue[0];
6387 }
6388
6389 void SpirvShader::EvalSpecConstantOp(InsnIterator insn)
6390 {
6391 auto opcode = static_cast<spv::Op>(insn.word(3));
6392
6393 switch (opcode)
6394 {
6395 case spv::OpIAdd:
6396 case spv::OpISub:
6397 case spv::OpIMul:
6398 case spv::OpUDiv:
6399 case spv::OpSDiv:
6400 case spv::OpUMod:
6401 case spv::OpSMod:
6402 case spv::OpSRem:
6403 case spv::OpShiftRightLogical:
6404 case spv::OpShiftRightArithmetic:
6405 case spv::OpShiftLeftLogical:
6406 case spv::OpBitwiseOr:
6407 case spv::OpLogicalOr:
6408 case spv::OpBitwiseAnd:
6409 case spv::OpLogicalAnd:
6410 case spv::OpBitwiseXor:
6411 case spv::OpLogicalEqual:
6412 case spv::OpIEqual:
6413 case spv::OpLogicalNotEqual:
6414 case spv::OpINotEqual:
6415 case spv::OpULessThan:
6416 case spv::OpSLessThan:
6417 case spv::OpUGreaterThan:
6418 case spv::OpSGreaterThan:
6419 case spv::OpULessThanEqual:
6420 case spv::OpSLessThanEqual:
6421 case spv::OpUGreaterThanEqual:
6422 case spv::OpSGreaterThanEqual:
6423 EvalSpecConstantBinaryOp(insn);
6424 break;
6425
6426 case spv::OpSConvert:
6427 case spv::OpFConvert:
6428 case spv::OpUConvert:
6429 case spv::OpSNegate:
6430 case spv::OpNot:
6431 case spv::OpLogicalNot:
6432 case spv::OpQuantizeToF16:
6433 EvalSpecConstantUnaryOp(insn);
6434 break;
6435
6436 case spv::OpSelect:
6437 {
6438 auto &result = CreateConstant(insn);
6439 auto const &cond = getObject(insn.word(4));
6440 auto condIsScalar = (getType(cond.type).sizeInComponents == 1);
6441 auto const &left = getObject(insn.word(5));
6442 auto const &right = getObject(insn.word(6));
6443
6444 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
6445 {
6446 auto sel = cond.constantValue[condIsScalar ? 0 : i];
6447 result.constantValue[i] = sel ? left.constantValue[i] : right.constantValue[i];
6448 }
6449 break;
6450 }
6451
6452 case spv::OpCompositeExtract:
6453 {
6454 auto &result = CreateConstant(insn);
6455 auto const &compositeObject = getObject(insn.word(4));
6456 auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - 5, insn.wordPointer(5));
6457
6458 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
6459 {
6460 result.constantValue[i] = compositeObject.constantValue[firstComponent + i];
6461 }
6462 break;
6463 }
6464
6465 case spv::OpCompositeInsert:
6466 {
6467 auto &result = CreateConstant(insn);
6468 auto const &newPart = getObject(insn.word(4));
6469 auto const &oldObject = getObject(insn.word(5));
6470 auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - 6, insn.wordPointer(6));
6471
6472 // old components before
6473 for (auto i = 0u; i < firstNewComponent; i++)
6474 {
6475 result.constantValue[i] = oldObject.constantValue[i];
6476 }
6477 // new part
6478 for (auto i = 0u; i < getType(newPart.type).sizeInComponents; i++)
6479 {
6480 result.constantValue[firstNewComponent + i] = newPart.constantValue[i];
6481 }
6482 // old components after
6483 for (auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
6484 {
6485 result.constantValue[i] = oldObject.constantValue[i];
6486 }
6487 break;
6488 }
6489
6490 case spv::OpVectorShuffle:
6491 {
6492 auto &result = CreateConstant(insn);
6493 auto const &firstHalf = getObject(insn.word(4));
6494 auto const &secondHalf = getObject(insn.word(5));
6495
6496 for (auto i = 0u; i < getType(result.type).sizeInComponents; i++)
6497 {
6498 auto selector = insn.word(6 + i);
6499 if (selector == static_cast<uint32_t>(-1))
6500 {
6501 // Undefined value, we'll use zero
6502 result.constantValue[i] = 0;
6503 }
6504 else if (selector < getType(firstHalf.type).sizeInComponents)
6505 {
6506 result.constantValue[i] = firstHalf.constantValue[selector];
6507 }
6508 else
6509 {
6510 result.constantValue[i] = secondHalf.constantValue[selector - getType(firstHalf.type).sizeInComponents];
6511 }
6512 }
6513 break;
6514 }
6515
6516 default:
6517 // Other spec constant ops are possible, but require capabilities that are
6518 // not exposed in our Vulkan implementation (eg Kernel), so we should never
6519 // get here for correct shaders.
6520 UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
6521 }
6522 }
6523
6524 void SpirvShader::EvalSpecConstantUnaryOp(InsnIterator insn)
6525 {
6526 auto &result = CreateConstant(insn);
6527
6528 auto opcode = static_cast<spv::Op>(insn.word(3));
6529 auto const &lhs = getObject(insn.word(4));
6530 auto size = getType(lhs.type).sizeInComponents;
6531
6532 for (auto i = 0u; i < size; i++)
6533 {
6534 auto &v = result.constantValue[i];
6535 auto l = lhs.constantValue[i];
6536
6537 switch (opcode)
6538 {
6539 case spv::OpSConvert:
6540 case spv::OpFConvert:
6541 case spv::OpUConvert:
6542 UNREACHABLE("Not possible until we have multiple bit widths");
6543 break;
6544
6545 case spv::OpSNegate:
6546 v = -(int)l;
6547 break;
6548 case spv::OpNot:
6549 case spv::OpLogicalNot:
6550 v = ~l;
6551 break;
6552
6553 case spv::OpQuantizeToF16:
6554 {
6555 // Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
6556 auto abs = bit_cast<float>(l & 0x7FFFFFFF);
6557 auto sign = l & 0x80000000;
6558 auto isZero = abs < 0.000061035f ? ~0u : 0u;
6559 auto isInf = abs > 65504.0f ? ~0u : 0u;
6560 auto isNaN = (abs != abs) ? ~0u : 0u;
6561 auto isInfOrNan = isInf | isNaN;
6562 v = l & 0xFFFFE000;
6563 v &= ~isZero | 0x80000000;
6564 v = sign | (isInfOrNan & 0x7F800000) | (~isInfOrNan & v);
6565 v |= isNaN & 0x400000;
6566 break;
6567 }
6568 default:
6569 UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
6570 }
6571 }
6572 }
6573
6574 void SpirvShader::EvalSpecConstantBinaryOp(InsnIterator insn)
6575 {
6576 auto &result = CreateConstant(insn);
6577
6578 auto opcode = static_cast<spv::Op>(insn.word(3));
6579 auto const &lhs = getObject(insn.word(4));
6580 auto const &rhs = getObject(insn.word(5));
6581 auto size = getType(lhs.type).sizeInComponents;
6582
6583 for (auto i = 0u; i < size; i++)
6584 {
6585 auto &v = result.constantValue[i];
6586 auto l = lhs.constantValue[i];
6587 auto r = rhs.constantValue[i];
6588
6589 switch (opcode)
6590 {
6591 case spv::OpIAdd:
6592 v = l + r;
6593 break;
6594 case spv::OpISub:
6595 v = l - r;
6596 break;
6597 case spv::OpIMul:
6598 v = l * r;
6599 break;
6600 case spv::OpUDiv:
6601 v = (r == 0) ? 0 : l / r;
6602 break;
6603 case spv::OpUMod:
6604 v = (r == 0) ? 0 : l % r;
6605 break;
6606 case spv::OpSDiv:
6607 if (r == 0) r = UINT32_MAX;
6608 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6609 v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
6610 break;
6611 case spv::OpSRem:
6612 if (r == 0) r = UINT32_MAX;
6613 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6614 v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
6615 break;
6616 case spv::OpSMod:
6617 if (r == 0) r = UINT32_MAX;
6618 if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6619 // Test if a signed-multiply would be negative.
6620 v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
6621 if ((v & 0x80000000) != (r & 0x80000000))
6622 v += r;
6623 break;
6624 case spv::OpShiftRightLogical:
6625 v = l >> r;
6626 break;
6627 case spv::OpShiftRightArithmetic:
6628 v = static_cast<int32_t>(l) >> r;
6629 break;
6630 case spv::OpShiftLeftLogical:
6631 v = l << r;
6632 break;
6633 case spv::OpBitwiseOr:
6634 case spv::OpLogicalOr:
6635 v = l | r;
6636 break;
6637 case spv::OpBitwiseAnd:
6638 case spv::OpLogicalAnd:
6639 v = l & r;
6640 break;
6641 case spv::OpBitwiseXor:
6642 v = l ^ r;
6643 break;
6644 case spv::OpLogicalEqual:
6645 case spv::OpIEqual:
6646 v = (l == r) ? ~0u : 0u;
6647 break;
6648 case spv::OpLogicalNotEqual:
6649 case spv::OpINotEqual:
6650 v = (l != r) ? ~0u : 0u;
6651 break;
6652 case spv::OpULessThan:
6653 v = l < r ? ~0u : 0u;
6654 break;
6655 case spv::OpSLessThan:
6656 v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~0u : 0u;
6657 break;
6658 case spv::OpUGreaterThan:
6659 v = l > r ? ~0u : 0u;
6660 break;
6661 case spv::OpSGreaterThan:
6662 v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~0u : 0u;
6663 break;
6664 case spv::OpULessThanEqual:
6665 v = l <= r ? ~0u : 0u;
6666 break;
6667 case spv::OpSLessThanEqual:
6668 v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~0u : 0u;
6669 break;
6670 case spv::OpUGreaterThanEqual:
6671 v = l >= r ? ~0u : 0u;
6672 break;
6673 case spv::OpSGreaterThanEqual:
6674 v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~0u : 0u;
6675 break;
6676 default:
6677 UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
6678 }
6679 }
6680 }
6681
6682 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
6683 {
6684 for (auto insn : *this)
6685 {
6686 switch (insn.opcode())
6687 {
6688 case spv::OpVariable:
6689 {
6690 Object::ID resultId = insn.word(2);
6691 auto &object = getObject(resultId);
6692 auto &objectTy = getType(object.type);
6693 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
6694 {
6695 auto &dst = routine->getVariable(resultId);
6696 int offset = 0;
6697 VisitInterface(resultId,
6698 [&](Decorations const &d, AttribType type) {
6699 auto scalarSlot = d.Location << 2 | d.Component;
6700 routine->outputs[scalarSlot] = dst[offset++];
6701 });
6702 }
6703 break;
6704 }
6705 default:
6706 break;
6707 }
6708 }
6709
6710 // Clear phis that are no longer used. This serves two purposes:
6711 // (1) The phi rr::Variables are destructed, preventing pointless
6712 // materialization.
6713 // (2) Frees memory that will never be used again.
6714 routine->phis.clear();
6715 }
6716
6717 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
6718 {
6719 // Default to a Simple, this may change later.
6720 kind = Block::Simple;
6721
6722 // Walk the instructions to find the last two of the block.
6723 InsnIterator insns[2];
6724 for (auto insn : *this)
6725 {
6726 insns[0] = insns[1];
6727 insns[1] = insn;
6728 }
6729
6730 switch (insns[1].opcode())
6731 {
6732 case spv::OpBranch:
6733 branchInstruction = insns[1];
6734 outs.emplace(Block::ID(branchInstruction.word(1)));
6735
6736 switch (insns[0].opcode())
6737 {
6738 case spv::OpLoopMerge:
6739 kind = Loop;
6740 mergeInstruction = insns[0];
6741 mergeBlock = Block::ID(mergeInstruction.word(1));
6742 continueTarget = Block::ID(mergeInstruction.word(2));
6743 break;
6744
6745 default:
6746 kind = Block::Simple;
6747 break;
6748 }
6749 break;
6750
6751 case spv::OpBranchConditional:
6752 branchInstruction = insns[1];
6753 outs.emplace(Block::ID(branchInstruction.word(2)));
6754 outs.emplace(Block::ID(branchInstruction.word(3)));
6755
6756 switch (insns[0].opcode())
6757 {
6758 case spv::OpSelectionMerge:
6759 kind = StructuredBranchConditional;
6760 mergeInstruction = insns[0];
6761 mergeBlock = Block::ID(mergeInstruction.word(1));
6762 break;
6763
6764 case spv::OpLoopMerge:
6765 kind = Loop;
6766 mergeInstruction = insns[0];
6767 mergeBlock = Block::ID(mergeInstruction.word(1));
6768 continueTarget = Block::ID(mergeInstruction.word(2));
6769 break;
6770
6771 default:
6772 kind = UnstructuredBranchConditional;
6773 break;
6774 }
6775 break;
6776
6777 case spv::OpSwitch:
6778 branchInstruction = insns[1];
6779 outs.emplace(Block::ID(branchInstruction.word(2)));
6780 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
6781 {
6782 outs.emplace(Block::ID(branchInstruction.word(w)));
6783 }
6784
6785 switch (insns[0].opcode())
6786 {
6787 case spv::OpSelectionMerge:
6788 kind = StructuredSwitch;
6789 mergeInstruction = insns[0];
6790 mergeBlock = Block::ID(mergeInstruction.word(1));
6791 break;
6792
6793 default:
6794 kind = UnstructuredSwitch;
6795 break;
6796 }
6797 break;
6798
6799 default:
6800 break;
6801 }
6802 }
6803
6804 void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
6805 {
6806 if (reachable.count(id) == 0)
6807 {
6808 reachable.emplace(id);
6809 for (auto out : getBlock(id).outs)
6810 {
6811 TraverseReachableBlocks(out, reachable);
6812 }
6813 }
6814 }
6815
6816 void SpirvShader::Function::AssignBlockFields()
6817 {
6818 Block::Set reachable;
6819 TraverseReachableBlocks(entry, reachable);
6820
6821 for (auto &it : blocks)
6822 {
6823 auto &blockId = it.first;
6824 auto &block = it.second;
6825 if (reachable.count(blockId) > 0)
6826 {
6827 for (auto &outId : it.second.outs)
6828 {
6829 auto outIt = blocks.find(outId);
6830 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
6831 auto &out = outIt->second;
6832 out.ins.emplace(blockId);
6833 }
6834 if (block.kind == Block::Loop)
6835 {
6836 auto mergeIt = blocks.find(block.mergeBlock);
6837 ASSERT_MSG(mergeIt != blocks.end(), "Loop block %d has a non-existent merge block %d", blockId.value(), block.mergeBlock.value());
6838 mergeIt->second.isLoopMerge = true;
6839 }
6840 }
6841 }
6842 }
6843
6844 void SpirvShader::Function::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
6845 {
6846 auto block = getBlock(blockId);
6847 for (auto dep : block.ins)
6848 {
6849 if (block.kind != Block::Loop || // if not a loop...
6850 !ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
6851 {
6852 f(dep);
6853 }
6854 }
6855 }
6856
6857 bool SpirvShader::Function::ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
6858 {
6859 // TODO: Optimize: This can be cached on the block.
6860 Block::Set seen;
6861 seen.emplace(notPassingThrough);
6862
6863 std::queue<Block::ID> pending;
6864 pending.emplace(from);
6865
6866 while (pending.size() > 0)
6867 {
6868 auto id = pending.front();
6869 pending.pop();
6870 for (auto out : getBlock(id).outs)
6871 {
6872 if (seen.count(out) != 0) { continue; }
6873 if (out == to) { return true; }
6874 pending.emplace(out);
6875 }
6876 seen.emplace(id);
6877 }
6878
6879 return false;
6880 }
6881
6882 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
6883 {
6884 addActiveLaneMaskEdge(block, to, mask & activeLaneMask());
6885 }
6886
6887 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
6888 {
6889 auto edge = Block::Edge{from, to};
6890 auto it = edgeActiveLaneMasks.find(edge);
6891 if (it == edgeActiveLaneMasks.end())
6892 {
6893 edgeActiveLaneMasks.emplace(edge, mask);
6894 }
6895 else
6896 {
6897 auto combined = it->second | mask;
6898 edgeActiveLaneMasks.erase(edge);
6899 edgeActiveLaneMasks.emplace(edge, combined);
6900 }
6901 }
6902
6903 RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
6904 {
6905 auto edge = Block::Edge{from, to};
6906 auto it = state->edgeActiveLaneMasks.find(edge);
6907 ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
6908 return it->second;
6909 }
6910
6911 VkShaderStageFlagBits SpirvShader::executionModelToStage(spv::ExecutionModel model)
6912 {
6913 switch (model)
6914 {
6915 case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
6916 // case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
6917 // case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
6918 // case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
6919 case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
6920 case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
6921 // case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
6922 // case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
6923 // case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
6924 // case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
6925 // case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
6926 // case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
6927 // case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
6928 // case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
6929 // case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
6930 default:
6931 UNSUPPORTED("ExecutionModel: %d", int(model));
6932 return VkShaderStageFlagBits(0);
6933 }
6934 }
6935
6936 SpirvShader::GenericValue::GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId) :
6937 obj(shader->getObject(objId)),
6938 intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr),
6939 type(obj.type) {}
6940
6941 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
6942 pipelineLayout(pipelineLayout)
6943 {
6944 }
6945
6946 void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader)
6947 {
6948 setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6949 {
6950 ASSERT(builtin.SizeInComponents == 1);
6951 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 1, 2, 3));
6952 });
6953
6954 setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6955 {
6956 ASSERT(builtin.SizeInComponents == 4);
6957 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 2, 4, 8));
6958 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6959 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6960 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6961 });
6962
6963 setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6964 {
6965 ASSERT(builtin.SizeInComponents == 4);
6966 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(15, 14, 12, 8));
6967 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6968 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6969 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6970 });
6971
6972 setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6973 {
6974 ASSERT(builtin.SizeInComponents == 4);
6975 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(14, 12, 8, 0));
6976 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6977 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6978 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6979 });
6980
6981 setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6982 {
6983 ASSERT(builtin.SizeInComponents == 4);
6984 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(1, 3, 7, 15));
6985 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6986 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6987 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6988 });
6989
6990 setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6991 {
6992 ASSERT(builtin.SizeInComponents == 4);
6993 value[builtin.FirstComponent + 0] = As<SIMD::Float>(SIMD::Int(0, 1, 3, 7));
6994 value[builtin.FirstComponent + 1] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6995 value[builtin.FirstComponent + 2] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6996 value[builtin.FirstComponent + 3] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
6997 });
6998
6999 setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
7000 {
7001 ASSERT(builtin.SizeInComponents == 1);
7002 // Only a single physical device is supported.
7003 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(0, 0, 0, 0));
7004 });
7005 }
7006}
7007