SpirvShader.cpp source code [engine/third_party/swiftshader/src/Pipeline/SpirvShader.cpp]

1	// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "SpirvShader.hpp"
16
17	#include "SamplerCore.hpp"
18	#include "Reactor/Coroutine.hpp"
19	#include "System/Math.hpp"
20	#include "Vulkan/VkBuffer.hpp"
21	#include "Vulkan/VkBufferView.hpp"
22	#include "Vulkan/VkDebug.hpp"
23	#include "Vulkan/VkDescriptorSet.hpp"
24	#include "Vulkan/VkPipelineLayout.hpp"
25	#include "Vulkan/VkDescriptorSetLayout.hpp"
26	#include "Vulkan/VkRenderPass.hpp"
27	#include "Device/Config.hpp"
28
29	#include <spirv/unified1/spirv.hpp>
30	#include <spirv/unified1/GLSL.std.450.h>
31
32	#include <queue>
33
34	namespace
35	{
36	constexpr float PI = `3.141592653589793f`;
37
38	rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
39	{
40	return rr::SignMask(ints) != `0`;
41	}
42
43	rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
44	{
45	return rr::SignMask(~ints) != `0`;
46	}
47
48	template <typename T>
49	rr::RValue<T> AndAll(rr::RValue<T> const &mask)
50	{
51	T v1 = mask; // [x] [y] [z] [w]
52	T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw]
53	return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
54	}
55
56	template <typename T>
57	rr::RValue<T> OrAll(rr::RValue<T> const &mask)
58	{
59	T v1 = mask; // [x] [y] [z] [w]
60	T v2 = v1.xzxz \| v1.ywyw; // [xy] [zw] [xy] [zw]
61	return v2.xxxx \| v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw]
62	}
63
64	rr::RValue<sw::SIMD::Float> Sign(rr::RValue<sw::SIMD::Float> const &val)
65	{
66	return rr::As<sw::SIMD::Float>((rr::As<sw::SIMD::UInt>(val) & sw::SIMD::UInt (`0x80000000`)) \| sw::SIMD::UInt (`0x3f800000`));
67	}
68
69	// Returns the <whole, frac> of val.
70	// Both whole and frac will have the same sign as val.
71	std::pair<rr::RValue<sw::SIMD::Float>, rr::RValue<sw::SIMD::Float>>
72	Modf(rr::RValue<sw::SIMD::Float> const &val)
73	{
74	auto abs = Abs(val);
75	auto sign = Sign(val);
76	auto whole = Floor(abs) * sign;
77	auto frac = Frac(abs) * sign;
78	return std::make_pair(whole, frac);
79	}
80
81	// Returns the number of 1s in bits, per lane.
82	sw::SIMD::UInt CountBits(rr::RValue<sw::SIMD::UInt> const &bits)
83	{
84	// TODO: Add an intrinsic to reactor. Even if there isn't a
85	// single vector instruction, there may be target-dependent
86	// ways to make this faster.
87	// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
88	sw::SIMD::UInt c = bits - ((bits >> `1`) & sw::SIMD::UInt (`0x55555555`));
89	c = ((c >> `2`) & sw::SIMD::UInt (`0x33333333`)) + (c & sw::SIMD::UInt (`0x33333333`));
90	c = ((c >> `4`) + c) & sw::SIMD::UInt (`0x0F0F0F0F`);
91	c = ((c >> `8`) + c) & sw::SIMD::UInt (`0x00FF00FF`);
92	c = ((c >> `16`) + c) & sw::SIMD::UInt (`0x0000FFFF`);
93	return c;
94	}
95
96	// Returns 1 << bits.
97	// If the resulting bit overflows a 32 bit integer, 0 is returned.
98	rr::RValue<sw::SIMD::UInt> NthBit32(rr::RValue<sw::SIMD::UInt> const &bits)
99	{
100	return ((sw::SIMD::UInt (`1`) << bits) & rr::CmpLT(bits, sw::SIMD::UInt (`32`)));
101	}
102
103	// Returns bitCount number of of 1's starting from the LSB.
104	rr::RValue<sw::SIMD::UInt> Bitmask32(rr::RValue<sw::SIMD::UInt> const &bitCount)
105	{
106	return NthBit32(bitCount) - sw::SIMD::UInt (`1`);
107	}
108
109	// Performs a fused-multiply add, returning a b + c.*
110	rr::RValue<sw::SIMD::Float> FMA(
111	rr::RValue<sw::SIMD::Float> const &a,
112	rr::RValue<sw::SIMD::Float> const &b,
113	rr::RValue<sw::SIMD::Float> const &c)
114	{
115	return a * b + c;
116	}
117
118	// Returns the exponent of the floating point number f.
119	// Assumes IEEE 754
120	rr::RValue<sw::SIMD::Int> Exponent(rr::RValue<sw::SIMD::Float> f)
121	{
122	auto v = rr::As<sw::SIMD::UInt>(f);
123	return (sw::SIMD::Int ((v >> sw::SIMD::UInt (`23`)) & sw::SIMD::UInt (`0xFF`)) - sw::SIMD::Int (`126`));
124	}
125
126	// Returns y if y < x; otherwise result is x.
127	// If one operand is a NaN, the other operand is the result.
128	// If both operands are NaN, the result is a NaN.
129	rr::RValue<sw::SIMD::Float> NMin(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
130	{
131	using namespace rr;
132	auto xIsNan = IsNan(x);
133	auto yIsNan = IsNan(y);
134	return As<sw::SIMD::Float>(
135	// If neither are NaN, return min
136	((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Min(x, y))) \|
137	// If one operand is a NaN, the other operand is the result
138	// If both operands are NaN, the result is a NaN.
139	((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) \|
140	(( xIsNan ) & As<sw::SIMD::Int>(y)));
141	}
142
143	// Returns y if y > x; otherwise result is x.
144	// If one operand is a NaN, the other operand is the result.
145	// If both operands are NaN, the result is a NaN.
146	rr::RValue<sw::SIMD::Float> NMax(rr::RValue<sw::SIMD::Float> const &x, rr::RValue<sw::SIMD::Float> const &y)
147	{
148	using namespace rr;
149	auto xIsNan = IsNan(x);
150	auto yIsNan = IsNan(y);
151	return As<sw::SIMD::Float>(
152	// If neither are NaN, return max
153	((~xIsNan & ~yIsNan) & As<sw::SIMD::Int>(Max(x, y))) \|
154	// If one operand is a NaN, the other operand is the result
155	// If both operands are NaN, the result is a NaN.
156	((~xIsNan & yIsNan) & As<sw::SIMD::Int>(x)) \|
157	(( xIsNan ) & As<sw::SIMD::Int>(y)));
158	}
159
160	// Returns the determinant of a 2x2 matrix.
161	rr::RValue<sw::SIMD::Float> Determinant(
162	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
163	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
164	{
165	return a d - b c;
166	}
167
168	// Returns the determinant of a 3x3 matrix.
169	rr::RValue<sw::SIMD::Float> Determinant(
170	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
171	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
172	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
173	{
174	return a e i + b f g + c d h - c e g - b d i - a f h;
175	}
176
177	// Returns the determinant of a 4x4 matrix.
178	rr::RValue<sw::SIMD::Float> Determinant(
179	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
180	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
181	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
182	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
183	{
184	return a * Determinant(f, g, h,
185	j, k, l,
186	n, o, p) -
187	b * Determinant(e, g, h,
188	i, k, l,
189	m, o, p) +
190	c * Determinant(e, f, h,
191	i, j, l,
192	m, n, p) -
193	d * Determinant(e, f, g,
194	i, j, k,
195	m, n, o);
196	}
197
198	// Returns the inverse of a 2x2 matrix.
199	std::array<rr::RValue<sw::SIMD::Float>, `4`> MatrixInverse(
200	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b,
201	rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d)
202	{
203	auto s = sw::SIMD::Float (`1.0f`) / Determinant(a, b, c, d);
204	return {{s d, -s b, -s c, s a}};
205	}
206
207	// Returns the inverse of a 3x3 matrix.
208	std::array<rr::RValue<sw::SIMD::Float>, `9`> MatrixInverse(
209	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c,
210	rr::RValue<sw::SIMD::Float> const &d, rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f,
211	rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h, rr::RValue<sw::SIMD::Float> const &i)
212	{
213	auto s = sw::SIMD::Float (`1.0f`) / Determinant(
214	a, b, c,
215	d, e, f,
216	g, h, i); // TODO: duplicate arithmetic calculating the det and below.
217
218	return {{
219	s * (e i - f h), s * (c h - b i), s * (b f - c e),
220	s * (f g - d i), s * (a i - c g), s * (c d - a f),
221	s * (d h - e g), s * (b g - a h), s * (a e - b d),
222	}};
223	}
224
225	// Returns the inverse of a 4x4 matrix.
226	std::array<rr::RValue<sw::SIMD::Float>, `16`> MatrixInverse(
227	rr::RValue<sw::SIMD::Float> const &a, rr::RValue<sw::SIMD::Float> const &b, rr::RValue<sw::SIMD::Float> const &c, rr::RValue<sw::SIMD::Float> const &d,
228	rr::RValue<sw::SIMD::Float> const &e, rr::RValue<sw::SIMD::Float> const &f, rr::RValue<sw::SIMD::Float> const &g, rr::RValue<sw::SIMD::Float> const &h,
229	rr::RValue<sw::SIMD::Float> const &i, rr::RValue<sw::SIMD::Float> const &j, rr::RValue<sw::SIMD::Float> const &k, rr::RValue<sw::SIMD::Float> const &l,
230	rr::RValue<sw::SIMD::Float> const &m, rr::RValue<sw::SIMD::Float> const &n, rr::RValue<sw::SIMD::Float> const &o, rr::RValue<sw::SIMD::Float> const &p)
231	{
232	auto s = sw::SIMD::Float (`1.0f`) / Determinant(
233	a, b, c, d,
234	e, f, g, h,
235	i, j, k, l,
236	m, n, o, p); // TODO: duplicate arithmetic calculating the det and below.
237
238	auto kplo = k p - l o, jpln = j p - l n, jokn = j o - k n;
239	auto gpho = g p - h o, fphn = f p - h n, fogn = f o - g n;
240	auto glhk = g l - h k, flhj = f l - h j, fkgj = f k - g j;
241	auto iplm = i p - l m, iokm = i o - k m, ephm = e p - h m;
242	auto eogm = e o - g m, elhi = e l - h i, ekgi = e k - g i;
243	auto injm = i n - j m, enfm = e n - f m, ejfi = e j - f i;
244
245	return {{
246	s * ( f * kplo - g * jpln + h * jokn),
247	s * (-b * kplo + c * jpln - d * jokn),
248	s * ( b * gpho - c * fphn + d * fogn),
249	s * (-b * glhk + c * flhj - d * fkgj),
250
251	s * (-e * kplo + g * iplm - h * iokm),
252	s * ( a * kplo - c * iplm + d * iokm),
253	s * (-a * gpho + c * ephm - d * eogm),
254	s * ( a * glhk - c * elhi + d * ekgi),
255
256	s * ( e * jpln - f * iplm + h * injm),
257	s * (-a * jpln + b * iplm - d * injm),
258	s * ( a * fphn - b * ephm + d * enfm),
259	s * (-a * flhj + b * elhi - d * ejfi),
260
261	s * (-e * jokn + f * iokm - g * injm),
262	s * ( a * jokn - b * iokm + c * injm),
263	s * (-a * fogn + b * eogm - c * enfm),
264	s * ( a * fkgj - b * ekgi + c * ejfi),
265	}};
266	}
267
268
269	sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
270	{
271	p *= sw::SIMD::Width;
272	p.staticOffsets [`0`] += `0` * sizeof(float);
273	p.staticOffsets [`1`] += `1` * sizeof(float);
274	p.staticOffsets [`2`] += `2` * sizeof(float);
275	p.staticOffsets [`3`] += `3` * sizeof(float);
276	return p;
277	}
278
279	VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
280	{
281	switch (format)
282	{
283	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
284	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
285	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
286	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
287	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
288	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
289	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
290	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
291	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
292	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
293	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
294	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
295	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
296	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
297	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
298	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
299
300	default:
301	UNIMPLEMENTED("SPIR-V ImageFormat %u", format);
302	return VK_FORMAT_UNDEFINED;
303	}
304	}
305
306	sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
307	{
308	sw::SIMD::Float lc = c * sw::SIMD::Float (`1.0f` / `12.92f`);
309	sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float (`0.055f`)) * sw::SIMD::Float (`1.0f` / `1.055f`), sw::SIMD::Float (`2.4f`));
310
311	sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float (`0.04045f`));
312
313	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) \| (~linear & rr::As<sw::SIMD::Int>(ec))); // TODO: IfThenElse()
314	}
315
316	} // anonymous namespace
317
318	namespace sw
319	{
320	namespace SIMD
321	{
322
323	template<typename T>
324	T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic / = false /, std::memory_order order / = std::memory_order_relaxed /, int alignment / = sizeof(float) /)
325	{
326	using EL = typename Element<T>::type;
327
328	if (ptr.isStaticallyInBounds(sizeof(float), robustness))
329	{
330	// All elements are statically known to be in-bounds.
331	// We can avoid costly conditional on masks.
332
333	if (ptr.hasStaticSequentialOffsets(sizeof(float)))
334	{
335	// Offsets are sequential. Perform regular load.
336	return rr::Load(rr::Pointer<T>(ptr.base + ptr.staticOffsets [`0`]), alignment, atomic, order);
337	}
338	if (ptr.hasStaticEqualOffsets())
339	{
340	// Load one, replicate.
341	return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets [`0`], alignment));
342	}
343	}
344	else
345	{
346	switch(robustness)
347	{
348	case OutOfBoundsBehavior::Nullify:
349	case OutOfBoundsBehavior::RobustBufferAccess:
350	case OutOfBoundsBehavior::UndefinedValue:
351	mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
352	break;
353	case OutOfBoundsBehavior::UndefinedBehavior:
354	// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
355	break;
356	}
357	}
358
359	auto offsets = ptr.offsets();
360
361	if (!atomic && order == std::memory_order_relaxed)
362	{
363	if (ptr.hasStaticEqualOffsets())
364	{
365	// Load one, replicate.
366	// Be careful of the case where the post-bounds-check mask
367	// is 0, in which case we must not load.
368	T out = T(`0`);
369	If(AnyTrue(mask))
370	{
371	EL el = *rr::Pointer<EL>(ptr.base + ptr.staticOffsets [`0`], alignment);
372	out = T(el);
373	}
374	return out;
375	}
376
377	bool zeroMaskedLanes = true;
378	switch(robustness)
379	{
380	case OutOfBoundsBehavior::Nullify:
381	case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
382	zeroMaskedLanes = true;
383	break;
384	case OutOfBoundsBehavior::UndefinedValue:
385	case OutOfBoundsBehavior::UndefinedBehavior:
386	zeroMaskedLanes = false;
387	break;
388	}
389
390	if (ptr.hasStaticSequentialOffsets(sizeof(float)))
391	{
392	return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets [`0`]), mask, alignment, zeroMaskedLanes);
393	}
394
395	return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes);
396	}
397	else
398	{
399	T out;
400	auto anyLanesDisabled = AnyFalse(mask);
401	If(ptr.hasEqualOffsets() && !anyLanesDisabled)
402	{
403	// Load one, replicate.
404	auto offset = Extract(offsets, `0`);
405	out = T(rr::Load(rr::Pointer<EL>(&ptr.base [offset]), alignment, atomic, order));
406	}
407	Else If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
408	{
409	// Load all elements in a single SIMD instruction.
410	auto offset = Extract(offsets, `0`);
411	out = rr::Load(rr::Pointer<T>(&ptr.base [offset]), alignment, atomic, order);
412	}
413	Else
414	{
415	// Divergent offsets or masked lanes.
416	out = T(`0`);
417	for (int i = `0`; i < SIMD::Width; i++)
418	{
419	If(Extract(mask, i) != `0`)
420	{
421	auto offset = Extract(offsets, i);
422	auto el = rr::Load(rr::Pointer<EL>(&ptr.base [offset]), alignment, atomic, order);
423	out = Insert(out, el, i);
424	}
425	}
426	}
427	return out;
428	}
429	}
430
431	template<typename T>
432	void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic / = false /, std::memory_order order / = std::memory_order_relaxed /)
433	{
434	using EL = typename Element<T>::type;
435	constexpr size_t alignment = sizeof(float);
436	auto offsets = ptr.offsets();
437
438	switch(robustness)
439	{
440	case OutOfBoundsBehavior::Nullify:
441	case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
442	case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
443	mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
444	break;
445	case OutOfBoundsBehavior::UndefinedBehavior:
446	// Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
447	break;
448	}
449
450	if (!atomic && order == std::memory_order_relaxed)
451	{
452	if (ptr.hasStaticEqualOffsets())
453	{
454	If (AnyTrue(mask))
455	{
456	// All equal. One of these writes will win -- elect the winning lane.
457	auto v0111 = SIMD::Int (`0`, `0xFFFFFFFF`, `0xFFFFFFFF`, `0xFFFFFFFF`);
458	auto elect = mask & ~(v0111 & (mask.xxyz \| mask.xxxy \| mask.xxxx));
459	auto maskedVal = As<SIMD::Int>(val) & elect;
460	auto scalarVal = Extract(maskedVal, `0`) \|
461	Extract(maskedVal, `1`) \|
462	Extract(maskedVal, `2`) \|
463	Extract(maskedVal, `3`);
464	*rr::Pointer<EL>(ptr.base + ptr.staticOffsets [`0`], alignment) = As<EL>(scalarVal);
465	}
466	}
467	else if (ptr.hasStaticSequentialOffsets(sizeof(float)))
468	{
469	if (ptr.isStaticallyInBounds(sizeof(float), robustness))
470	{
471	// Pointer has no elements OOB, and the store is not atomic.
472	// Perform a RMW.
473	auto p = rr::Pointer<SIMD::Int>(ptr.base + ptr.staticOffsets [`0`], alignment);
474	auto prev = *p;
475	*p = (prev & ~mask) \| (As<SIMD::Int>(val) & mask);
476	}
477	else
478	{
479	rr::MaskedStore(rr::Pointer<T>(ptr.base + ptr.staticOffsets [`0`]), val, mask, alignment);
480	}
481	}
482	else
483	{
484	rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, alignment);
485	}
486	}
487	else
488	{
489	auto anyLanesDisabled = AnyFalse(mask);
490	If(ptr.hasSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
491	{
492	// Store all elements in a single SIMD instruction.
493	auto offset = Extract(offsets, `0`);
494	Store(val, rr::Pointer<T>(&ptr.base [offset]), alignment, atomic, order);
495	}
496	Else
497	{
498	// Divergent offsets or masked lanes.
499	for (int i = `0`; i < SIMD::Width; i++)
500	{
501	If(Extract(mask, i) != `0`)
502	{
503	auto offset = Extract(offsets, i);
504	rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base [offset]), alignment, atomic, order);
505	}
506	}
507	}
508	}
509	}
510
511	} // namespace SIMD
512
513	SpirvShader::SpirvShader(
514	uint32_t codeSerialID,
515	VkShaderStageFlagBits pipelineStage,
516	const char *entryPointName,
517	InsnStore const &insns,
518	const vk::RenderPass *renderPass,
519	uint32_t subpassIndex,
520	bool robustBufferAccess)
521	: insns {insns}, inputs {MAX_INTERFACE_COMPONENTS},
522	outputs {MAX_INTERFACE_COMPONENTS},
523	codeSerialID(codeSerialID),
524	robustBufferAccess(robustBufferAccess)
525	{
526	ASSERT(insns.size() > `0`);
527
528	if (renderPass)
529	{
530	// capture formats of any input attachments present
531	auto subpass = renderPass->getSubpass(subpassIndex);
532	inputAttachmentFormats.reserve(subpass.inputAttachmentCount);
533	for (auto i = `0u`; i < subpass.inputAttachmentCount; i++)
534	{
535	auto attachmentIndex = subpass.pInputAttachments[i].attachment;
536	inputAttachmentFormats.push_back(attachmentIndex != VK_ATTACHMENT_UNUSED
537	? renderPass->getAttachment(attachmentIndex).format : VK_FORMAT_UNDEFINED);
538	}
539	}
540
541	// Simplifying assumptions (to be satisfied by earlier transformations)
542	// - The only input/output OpVariables present are those used by the entrypoint
543
544	Function::ID currentFunction;
545	Block::ID currentBlock;
546	InsnIterator blockStart;
547
548	for (auto insn : *this)
549	{
550	spv::Op opcode = insn.opcode();
551
552	switch (opcode)
553	{
554	case spv::OpEntryPoint:
555	{
556	executionModel = spv::ExecutionModel(insn.word(`1`));
557	auto id = Function::ID (insn.word(`2`));
558	auto name = insn.string(`3`);
559	auto stage = executionModelToStage(executionModel);
560	if (stage == pipelineStage && strcmp(name, entryPointName) == `0`)
561	{
562	ASSERT_MSG(entryPoint == `0`, "Duplicate entry point with name '%s' and stage %d", name, int(stage));
563	entryPoint = id;
564	}
565	break;
566	}
567
568	case spv::OpExecutionMode:
569	ProcessExecutionMode(insn);
570	break;
571
572	case spv::OpDecorate:
573	{
574	TypeOrObjectID targetId = insn.word(`1`);
575	auto decoration = static_cast<spv::Decoration>(insn.word(`2`));
576	uint32_t value = insn.wordCount() > `3` ? insn.word(`3`) : `0`;
577
578	decorations [targetId].Apply(decoration, value);
579
580	switch(decoration)
581	{
582	case spv::DecorationDescriptorSet:
583	descriptorDecorations [targetId].DescriptorSet = value;
584	break;
585	case spv::DecorationBinding:
586	descriptorDecorations [targetId].Binding = value;
587	break;
588	case spv::DecorationInputAttachmentIndex:
589	descriptorDecorations [targetId].InputAttachmentIndex = value;
590	break;
591	default:
592	// Only handling descriptor decorations here.
593	break;
594	}
595
596	if (decoration == spv::DecorationCentroid)
597	modes.NeedsCentroid = true;
598	break;
599	}
600
601	case spv::OpMemberDecorate:
602	{
603	Type::ID targetId = insn.word(`1`);
604	auto memberIndex = insn.word(`2`);
605	auto decoration = static_cast<spv::Decoration>(insn.word(`3`));
606	uint32_t value = insn.wordCount() > `4` ? insn.word(`4`) : `0`;
607
608	auto &d = memberDecorations [targetId];
609	if (memberIndex >= d.size())
610	d.resize(memberIndex + `1`); // on demand; exact size would require another pass...
611
612	d [memberIndex].Apply(decoration, value);
613
614	if (decoration == spv::DecorationCentroid)
615	modes.NeedsCentroid = true;
616	break;
617	}
618
619	case spv::OpDecorationGroup:
620	// Nothing to do here. We don't need to record the definition of the group; we'll just have
621	// the bundle of decorations float around. If we were to ever walk the decorations directly,
622	// we might think about introducing this as a real Object.
623	break;
624
625	case spv::OpGroupDecorate:
626	{
627	uint32_t group = insn.word(`1`);
628	auto const &groupDecorations = decorations [group];
629	auto const &descriptorGroupDecorations = descriptorDecorations [group];
630	for (auto i = `2u`; i < insn.wordCount(); i++)
631	{
632	// Remaining operands are targets to apply the group to.
633	uint32_t target = insn.word(i);
634	decorations [target].Apply(groupDecorations);
635	descriptorDecorations [target].Apply(descriptorGroupDecorations);
636	}
637
638	break;
639	}
640
641	case spv::OpGroupMemberDecorate:
642	{
643	auto const &srcDecorations = decorations [insn.word(`1`)];
644	for (auto i = `2u`; i < insn.wordCount(); i += `2`)
645	{
646	// remaining operands are pairs of <id>, literal for members to apply to.
647	auto &d = memberDecorations [insn.word(i)];
648	auto memberIndex = insn.word(i + `1`);
649	if (memberIndex >= d.size())
650	d.resize(memberIndex + `1`); // on demand resize, see above...
651	d [memberIndex].Apply(srcDecorations);
652	}
653	break;
654	}
655
656	case spv::OpLabel:
657	{
658	ASSERT(currentBlock.value() == `0`);
659	currentBlock = Block::ID (insn.word(`1`));
660	blockStart = insn;
661	break;
662	}
663
664	// Branch Instructions (subset of Termination Instructions):
665	case spv::OpBranch:
666	case spv::OpBranchConditional:
667	case spv::OpSwitch:
668	case spv::OpReturn:
669	// fallthrough
670
671	// Termination instruction:
672	case spv::OpKill:
673	case spv::OpUnreachable:
674	{
675	ASSERT(currentBlock.value() != `0`);
676	ASSERT(currentFunction.value() != `0`);
677
678	auto blockEnd = insn; blockEnd ++;
679	functions [currentFunction].blocks [currentBlock] = Block (blockStart, blockEnd);
680	currentBlock = Block::ID (`0`);
681
682	if (opcode == spv::OpKill)
683	{
684	modes.ContainsKill = true;
685	}
686	break;
687	}
688
689	case spv::OpLoopMerge:
690	case spv::OpSelectionMerge:
691	break; // Nothing to do in analysis pass.
692
693	case spv::OpTypeVoid:
694	case spv::OpTypeBool:
695	case spv::OpTypeInt:
696	case spv::OpTypeFloat:
697	case spv::OpTypeVector:
698	case spv::OpTypeMatrix:
699	case spv::OpTypeImage:
700	case spv::OpTypeSampler:
701	case spv::OpTypeSampledImage:
702	case spv::OpTypeArray:
703	case spv::OpTypeRuntimeArray:
704	case spv::OpTypeStruct:
705	case spv::OpTypePointer:
706	case spv::OpTypeFunction:
707	DeclareType(insn);
708	break;
709
710	case spv::OpVariable:
711	{
712	Type::ID typeId = insn.word(`1`);
713	Object::ID resultId = insn.word(`2`);
714	auto storageClass = static_cast<spv::StorageClass>(insn.word(`3`));
715
716	auto &object = defs [resultId];
717	object.kind = Object::Kind::Pointer;
718	object.definition = insn;
719	object.type = typeId;
720
721	ASSERT(getType(typeId).definition.opcode() == spv::OpTypePointer);
722	ASSERT(getType(typeId).storageClass == storageClass);
723
724	switch (storageClass)
725	{
726	case spv::StorageClassInput:
727	case spv::StorageClassOutput:
728	ProcessInterfaceVariable(object);
729	break;
730
731	case spv::StorageClassUniform:
732	case spv::StorageClassStorageBuffer:
733	object.kind = Object::Kind::DescriptorSet;
734	break;
735
736	case spv::StorageClassPushConstant:
737	case spv::StorageClassPrivate:
738	case spv::StorageClassFunction:
739	case spv::StorageClassUniformConstant:
740	break; // Correctly handled.
741
742	case spv::StorageClassWorkgroup:
743	{
744	auto &elTy = getType(getType(typeId).element);
745	auto sizeInBytes = elTy.sizeInComponents * static_cast<uint32_t>(sizeof(float));
746	workgroupMemory.allocate(resultId, sizeInBytes);
747	object.kind = Object::Kind::Pointer;
748	break;
749	}
750	case spv::StorageClassAtomicCounter:
751	case spv::StorageClassImage:
752	UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
753	break;
754
755	case spv::StorageClassCrossWorkgroup:
756	UNSUPPORTED("SPIR-V OpenCL Execution Model (StorageClassCrossWorkgroup)");
757	break;
758
759	case spv::StorageClassGeneric:
760	UNSUPPORTED("SPIR-V GenericPointer Capability (StorageClassGeneric)");
761	break;
762
763	default:
764	UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
765	break;
766	}
767	break;
768	}
769
770	case spv::OpConstant:
771	case spv::OpSpecConstant:
772	CreateConstant(insn).constantValue [`0`] = insn.word(`3`);
773	break;
774	case spv::OpConstantFalse:
775	case spv::OpSpecConstantFalse:
776	CreateConstant(insn).constantValue [`0`] = `0`; // Represent Boolean false as zero.
777	break;
778	case spv::OpConstantTrue:
779	case spv::OpSpecConstantTrue:
780	CreateConstant(insn).constantValue [`0`] = ~`0u`; // Represent Boolean true as all bits set.
781	break;
782	case spv::OpConstantNull:
783	case spv::OpUndef:
784	{
785	// TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
786	// OpConstantNull forms a constant of arbitrary type, all zeros.
787	auto &object = CreateConstant(insn);
788	auto &objectTy = getType(object.type);
789	for (auto i = `0u`; i < objectTy.sizeInComponents; i++)
790	{
791	object.constantValue [i] = `0`;
792	}
793	break;
794	}
795	case spv::OpConstantComposite:
796	case spv::OpSpecConstantComposite:
797	{
798	auto &object = CreateConstant(insn);
799	auto offset = `0u`;
800	for (auto i = `0u`; i < insn.wordCount() - `3`; i++)
801	{
802	auto &constituent = getObject(insn.word(i + `3`));
803	auto &constituentTy = getType(constituent.type);
804	for (auto j = `0u`; j < constituentTy.sizeInComponents; j++)
805	{
806	object.constantValue [offset++] = constituent.constantValue [j];
807	}
808	}
809
810	auto objectId = Object::ID (insn.word(`2`));
811	auto decorationsIt = decorations.find(objectId);
812	if (decorationsIt != decorations.end() &&
813	decorationsIt ->second.BuiltIn == spv::BuiltInWorkgroupSize)
814	{
815	// https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
816	// Decorating an object with the WorkgroupSize built-in
817	// decoration will make that object contain the dimensions
818	// of a local workgroup. If an object is decorated with the
819	// WorkgroupSize decoration, this must take precedence over
820	// any execution mode set for LocalSize.
821	// The object decorated with WorkgroupSize must be declared
822	// as a three-component vector of 32-bit integers.
823	ASSERT(getType(object.type).sizeInComponents == `3`);
824	modes.WorkgroupSizeX = object.constantValue [`0`];
825	modes.WorkgroupSizeY = object.constantValue [`1`];
826	modes.WorkgroupSizeZ = object.constantValue [`2`];
827	}
828	break;
829	}
830	case spv::OpSpecConstantOp:
831	EvalSpecConstantOp(insn);
832	break;
833
834	case spv::OpCapability:
835	{
836	auto capability = static_cast<spv::Capability>(insn.word(`1`));
837	switch (capability)
838	{
839	case spv::CapabilityMatrix: capabilities.Matrix = true; break;
840	case spv::CapabilityShader: capabilities.Shader = true; break;
841	case spv::CapabilityInputAttachment: capabilities.InputAttachment = true; break;
842	case spv::CapabilitySampled1D: capabilities.Sampled1D = true; break;
843	case spv::CapabilityImage1D: capabilities.Image1D = true; break;
844	case spv::CapabilitySampledBuffer: capabilities.SampledBuffer = true; break;
845	case spv::CapabilityImageBuffer: capabilities.ImageBuffer = true; break;
846	case spv::CapabilityImageQuery: capabilities.ImageQuery = true; break;
847	case spv::CapabilityDerivativeControl: capabilities.DerivativeControl = true; break;
848	case spv::CapabilityGroupNonUniform: capabilities.GroupNonUniform = true; break;
849	case spv::CapabilityMultiView: capabilities.MultiView = true; break;
850	case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
851	case spv::CapabilityGroupNonUniformVote: capabilities.GroupNonUniformVote = true; break;
852	case spv::CapabilityGroupNonUniformBallot: capabilities.GroupNonUniformBallot = true; break;
853	case spv::CapabilityGroupNonUniformShuffle: capabilities.GroupNonUniformShuffle = true; break;
854	case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
855	case spv::CapabilityStorageImageExtendedFormats: capabilities.StorageImageExtendedFormats = true; break;
856	default:
857	UNSUPPORTED("Unsupported capability %u", insn.word(`1`));
858	}
859	break; // Various capabilities will be declared, but none affect our code generation at this point.
860	}
861
862	case spv::OpMemoryModel:
863	break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
864
865	case spv::OpFunction:
866	{
867	auto functionId = Function::ID (insn.word(`2`));
868	ASSERT_MSG(currentFunction == `0`, "Functions %d and %d overlap", currentFunction.value(), functionId.value());
869	currentFunction = functionId;
870	auto &function = functions [functionId];
871	function.result = Type::ID (insn.word(`1`));
872	function.type = Type::ID (insn.word(`4`));
873	// Scan forward to find the function's label.
874	for (auto it = insn; it != end() && function.entry == `0`; it ++)
875	{
876	switch (it.opcode())
877	{
878	case spv::OpFunction:
879	case spv::OpFunctionParameter:
880	break;
881	case spv::OpLabel:
882	function.entry = Block::ID (it.word(`1`));
883	break;
884	default:
885	WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
886	}
887	}
888	ASSERT_MSG(function.entry != `0`, "Function<%d> has no label", currentFunction.value());
889	break;
890	}
891
892	case spv::OpFunctionEnd:
893	currentFunction = `0`;
894	break;
895
896	case spv::OpExtInstImport:
897	{
898	// We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
899	// Valid shaders will not attempt to import any other instruction sets.
900	auto ext = insn.string(`2`);
901	if (`0` != strcmp("GLSL.std.450", ext))
902	{
903	UNSUPPORTED("SPIR-V Extension: %s", ext);
904	}
905	break;
906	}
907	case spv::OpName:
908	case spv::OpMemberName:
909	case spv::OpSource:
910	case spv::OpSourceContinued:
911	case spv::OpSourceExtension:
912	case spv::OpLine:
913	case spv::OpNoLine:
914	case spv::OpModuleProcessed:
915	case spv::OpString:
916	// No semantic impact
917	break;
918
919	case spv::OpFunctionParameter:
920	// These should have all been removed by preprocessing passes. If we see them here,
921	// our assumptions are wrong and we will probably generate wrong code.
922	UNREACHABLE("%s should have already been lowered.", OpcodeName(opcode).c_str());
923	break;
924
925	case spv::OpFunctionCall:
926	// TODO(b/141246700): Add full support for spv::OpFunctionCall
927	break;
928
929	case spv::OpFConvert:
930	UNSUPPORTED("SPIR-V Float16 or Float64 Capability (OpFConvert)");
931	break;
932
933	case spv::OpSConvert:
934	UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpSConvert)");
935	break;
936
937	case spv::OpUConvert:
938	UNSUPPORTED("SPIR-V Int16 or Int64 Capability (OpUConvert)");
939	break;
940
941	case spv::OpLoad:
942	case spv::OpAccessChain:
943	case spv::OpInBoundsAccessChain:
944	case spv::OpSampledImage:
945	case spv::OpImage:
946	{
947	// Propagate the descriptor decorations to the result.
948	Object::ID resultId = insn.word(`2`);
949	Object::ID pointerId = insn.word(`3`);
950	const auto &d = descriptorDecorations.find(pointerId);
951
952	if(d != descriptorDecorations.end())
953	{
954	descriptorDecorations [resultId] = d ->second;
955	}
956
957	DefineResult(insn);
958
959	if (opcode == spv::OpAccessChain \|\| opcode == spv::OpInBoundsAccessChain)
960	{
961	Decorations dd{};
962	ApplyDecorationsForAccessChain(&dd, &descriptorDecorations [resultId], pointerId, insn.wordCount() - `4`, insn.wordPointer(`4`));
963	// Note: offset is the one thing that does not* propagate, as the access chain accounts for it.*
964	dd.HasOffset = false;
965	decorations [resultId].Apply(dd);
966	}
967	}
968	break;
969
970	case spv::OpCompositeConstruct:
971	case spv::OpCompositeInsert:
972	case spv::OpCompositeExtract:
973	case spv::OpVectorShuffle:
974	case spv::OpVectorTimesScalar:
975	case spv::OpMatrixTimesScalar:
976	case spv::OpMatrixTimesVector:
977	case spv::OpVectorTimesMatrix:
978	case spv::OpMatrixTimesMatrix:
979	case spv::OpOuterProduct:
980	case spv::OpTranspose:
981	case spv::OpVectorExtractDynamic:
982	case spv::OpVectorInsertDynamic:
983	// Unary ops
984	case spv::OpNot:
985	case spv::OpBitFieldInsert:
986	case spv::OpBitFieldSExtract:
987	case spv::OpBitFieldUExtract:
988	case spv::OpBitReverse:
989	case spv::OpBitCount:
990	case spv::OpSNegate:
991	case spv::OpFNegate:
992	case spv::OpLogicalNot:
993	case spv::OpQuantizeToF16:
994	// Binary ops
995	case spv::OpIAdd:
996	case spv::OpISub:
997	case spv::OpIMul:
998	case spv::OpSDiv:
999	case spv::OpUDiv:
1000	case spv::OpFAdd:
1001	case spv::OpFSub:
1002	case spv::OpFMul:
1003	case spv::OpFDiv:
1004	case spv::OpFMod:
1005	case spv::OpFRem:
1006	case spv::OpFOrdEqual:
1007	case spv::OpFUnordEqual:
1008	case spv::OpFOrdNotEqual:
1009	case spv::OpFUnordNotEqual:
1010	case spv::OpFOrdLessThan:
1011	case spv::OpFUnordLessThan:
1012	case spv::OpFOrdGreaterThan:
1013	case spv::OpFUnordGreaterThan:
1014	case spv::OpFOrdLessThanEqual:
1015	case spv::OpFUnordLessThanEqual:
1016	case spv::OpFOrdGreaterThanEqual:
1017	case spv::OpFUnordGreaterThanEqual:
1018	case spv::OpSMod:
1019	case spv::OpSRem:
1020	case spv::OpUMod:
1021	case spv::OpIEqual:
1022	case spv::OpINotEqual:
1023	case spv::OpUGreaterThan:
1024	case spv::OpSGreaterThan:
1025	case spv::OpUGreaterThanEqual:
1026	case spv::OpSGreaterThanEqual:
1027	case spv::OpULessThan:
1028	case spv::OpSLessThan:
1029	case spv::OpULessThanEqual:
1030	case spv::OpSLessThanEqual:
1031	case spv::OpShiftRightLogical:
1032	case spv::OpShiftRightArithmetic:
1033	case spv::OpShiftLeftLogical:
1034	case spv::OpBitwiseOr:
1035	case spv::OpBitwiseXor:
1036	case spv::OpBitwiseAnd:
1037	case spv::OpLogicalOr:
1038	case spv::OpLogicalAnd:
1039	case spv::OpLogicalEqual:
1040	case spv::OpLogicalNotEqual:
1041	case spv::OpUMulExtended:
1042	case spv::OpSMulExtended:
1043	case spv::OpIAddCarry:
1044	case spv::OpISubBorrow:
1045	case spv::OpDot:
1046	case spv::OpConvertFToU:
1047	case spv::OpConvertFToS:
1048	case spv::OpConvertSToF:
1049	case spv::OpConvertUToF:
1050	case spv::OpBitcast:
1051	case spv::OpSelect:
1052	case spv::OpExtInst:
1053	case spv::OpIsInf:
1054	case spv::OpIsNan:
1055	case spv::OpAny:
1056	case spv::OpAll:
1057	case spv::OpDPdx:
1058	case spv::OpDPdxCoarse:
1059	case spv::OpDPdy:
1060	case spv::OpDPdyCoarse:
1061	case spv::OpFwidth:
1062	case spv::OpFwidthCoarse:
1063	case spv::OpDPdxFine:
1064	case spv::OpDPdyFine:
1065	case spv::OpFwidthFine:
1066	case spv::OpAtomicLoad:
1067	case spv::OpAtomicIAdd:
1068	case spv::OpAtomicISub:
1069	case spv::OpAtomicSMin:
1070	case spv::OpAtomicSMax:
1071	case spv::OpAtomicUMin:
1072	case spv::OpAtomicUMax:
1073	case spv::OpAtomicAnd:
1074	case spv::OpAtomicOr:
1075	case spv::OpAtomicXor:
1076	case spv::OpAtomicIIncrement:
1077	case spv::OpAtomicIDecrement:
1078	case spv::OpAtomicExchange:
1079	case spv::OpAtomicCompareExchange:
1080	case spv::OpPhi:
1081	case spv::OpImageSampleImplicitLod:
1082	case spv::OpImageSampleExplicitLod:
1083	case spv::OpImageSampleDrefImplicitLod:
1084	case spv::OpImageSampleDrefExplicitLod:
1085	case spv::OpImageSampleProjImplicitLod:
1086	case spv::OpImageSampleProjExplicitLod:
1087	case spv::OpImageSampleProjDrefImplicitLod:
1088	case spv::OpImageSampleProjDrefExplicitLod:
1089	case spv::OpImageGather:
1090	case spv::OpImageDrefGather:
1091	case spv::OpImageFetch:
1092	case spv::OpImageQuerySizeLod:
1093	case spv::OpImageQuerySize:
1094	case spv::OpImageQueryLod:
1095	case spv::OpImageQueryLevels:
1096	case spv::OpImageQuerySamples:
1097	case spv::OpImageRead:
1098	case spv::OpImageTexelPointer:
1099	case spv::OpGroupNonUniformElect:
1100	case spv::OpGroupNonUniformAll:
1101	case spv::OpGroupNonUniformAny:
1102	case spv::OpGroupNonUniformAllEqual:
1103	case spv::OpGroupNonUniformBroadcast:
1104	case spv::OpGroupNonUniformBroadcastFirst:
1105	case spv::OpGroupNonUniformBallot:
1106	case spv::OpGroupNonUniformInverseBallot:
1107	case spv::OpGroupNonUniformBallotBitExtract:
1108	case spv::OpGroupNonUniformBallotBitCount:
1109	case spv::OpGroupNonUniformBallotFindLSB:
1110	case spv::OpGroupNonUniformBallotFindMSB:
1111	case spv::OpGroupNonUniformShuffle:
1112	case spv::OpGroupNonUniformShuffleXor:
1113	case spv::OpGroupNonUniformShuffleUp:
1114	case spv::OpGroupNonUniformShuffleDown:
1115	case spv::OpCopyObject:
1116	case spv::OpArrayLength:
1117	// Instructions that yield an intermediate value or divergent pointer
1118	DefineResult(insn);
1119	break;
1120
1121	case spv::OpStore:
1122	case spv::OpAtomicStore:
1123	case spv::OpImageWrite:
1124	case spv::OpCopyMemory:
1125	case spv::OpMemoryBarrier:
1126	// Don't need to do anything during analysis pass
1127	break;
1128
1129	case spv::OpControlBarrier:
1130	modes.ContainsControlBarriers = true;
1131	break;
1132
1133	case spv::OpExtension:
1134	{
1135	auto ext = insn.string(`1`);
1136	// Part of core SPIR-V 1.3. Vulkan 1.1 implementations must also accept the pre-1.3
1137	// extension per Appendix A, `Vulkan Environment for SPIR-V`.
1138	if (!strcmp(ext, "SPV_KHR_storage_buffer_storage_class")) break;
1139	if (!strcmp(ext, "SPV_KHR_shader_draw_parameters")) break;
1140	if (!strcmp(ext, "SPV_KHR_16bit_storage")) break;
1141	if (!strcmp(ext, "SPV_KHR_variable_pointers")) break;
1142	if (!strcmp(ext, "SPV_KHR_device_group")) break;
1143	if (!strcmp(ext, "SPV_KHR_multiview")) break;
1144	UNSUPPORTED("SPIR-V Extension: %s", ext);
1145	break;
1146	}
1147
1148	default:
1149	UNIMPLEMENTED("%s", OpcodeName(opcode).c_str());
1150	}
1151	}
1152
1153	ASSERT_MSG(entryPoint != `0`, "Entry point '%s' not found", entryPointName);
1154	for (auto &it : functions)
1155	{
1156	it.second.AssignBlockFields();
1157	}
1158	}
1159
1160	void SpirvShader::DeclareType(InsnIterator insn)
1161	{
1162	Type::ID resultId = insn.word(`1`);
1163
1164	auto &type = types [resultId];
1165	type.definition = insn;
1166	type.sizeInComponents = ComputeTypeSize(insn);
1167
1168	// A structure is a builtin block if it has a builtin
1169	// member. All members of such a structure are builtins.
1170	switch (insn.opcode())
1171	{
1172	case spv::OpTypeStruct:
1173	{
1174	auto d = memberDecorations.find(resultId);
1175	if (d != memberDecorations.end())
1176	{
1177	for (auto &m : d ->second)
1178	{
1179	if (m.HasBuiltIn)
1180	{
1181	type.isBuiltInBlock = true;
1182	break;
1183	}
1184	}
1185	}
1186	break;
1187	}
1188	case spv::OpTypePointer:
1189	{
1190	Type::ID elementTypeId = insn.word(`3`);
1191	type.element = elementTypeId;
1192	type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
1193	type.storageClass = static_cast<spv::StorageClass>(insn.word(`2`));
1194	break;
1195	}
1196	case spv::OpTypeVector:
1197	case spv::OpTypeMatrix:
1198	case spv::OpTypeArray:
1199	case spv::OpTypeRuntimeArray:
1200	{
1201	Type::ID elementTypeId = insn.word(`2`);
1202	type.element = elementTypeId;
1203	break;
1204	}
1205	default:
1206	break;
1207	}
1208	}
1209
1210	SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
1211	{
1212	Type::ID typeId = insn.word(`1`);
1213	Object::ID resultId = insn.word(`2`);
1214	auto &object = defs [resultId];
1215	auto &objectTy = getType(typeId);
1216	object.type = typeId;
1217	object.kind = Object::Kind::Constant;
1218	object.definition = insn;
1219	object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
1220	return object;
1221	}
1222
1223	void SpirvShader::ProcessInterfaceVariable(Object &object)
1224	{
1225	auto &objectTy = getType(object.type);
1226	ASSERT(objectTy.storageClass == spv::StorageClassInput \|\| objectTy.storageClass == spv::StorageClassOutput);
1227
1228	ASSERT(objectTy.opcode() == spv::OpTypePointer);
1229	auto pointeeTy = getType(objectTy.element);
1230
1231	auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
1232	auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
1233
1234	ASSERT(object.opcode() == spv::OpVariable);
1235	Object::ID resultId = object.definition.word(`2`);
1236
1237	if (objectTy.isBuiltInBlock)
1238	{
1239	// walk the builtin block, registering each of its members separately.
1240	auto m = memberDecorations.find(objectTy.element);
1241	ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
1242	auto &structType = pointeeTy.definition;
1243	auto offset = `0u`;
1244	auto word = `2u`;
1245	for (auto &member : m ->second)
1246	{
1247	auto &memberType = getType(structType.word(word));
1248
1249	if (member.HasBuiltIn)
1250	{
1251	builtinInterface [member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
1252	}
1253
1254	offset += memberType.sizeInComponents;
1255	++word;
1256	}
1257	return;
1258	}
1259
1260	auto d = decorations.find(resultId);
1261	if (d != decorations.end() && d ->second.HasBuiltIn)
1262	{
1263	builtinInterface [d ->second.BuiltIn] = {resultId, `0`, pointeeTy.sizeInComponents};
1264	}
1265	else
1266	{
1267	object.kind = Object::Kind::InterfaceVariable;
1268	VisitInterface(resultId,
1269	[&userDefinedInterface](Decorations const &d, AttribType type) {
1270	// Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
1271	auto scalarSlot = (d.Location << `2`) \| d.Component;
1272	ASSERT(scalarSlot >= `0` &&
1273	scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
1274
1275	auto &slot = userDefinedInterface [scalarSlot];
1276	slot.Type = type;
1277	slot.Flat = d.Flat;
1278	slot.NoPerspective = d.NoPerspective;
1279	slot.Centroid = d.Centroid;
1280	});
1281	}
1282	}
1283
1284	void SpirvShader::ProcessExecutionMode(InsnIterator insn)
1285	{
1286	auto mode = static_cast<spv::ExecutionMode>(insn.word(`2`));
1287	switch (mode)
1288	{
1289	case spv::ExecutionModeEarlyFragmentTests:
1290	modes.EarlyFragmentTests = true;
1291	break;
1292	case spv::ExecutionModeDepthReplacing:
1293	modes.DepthReplacing = true;
1294	break;
1295	case spv::ExecutionModeDepthGreater:
1296	modes.DepthGreater = true;
1297	break;
1298	case spv::ExecutionModeDepthLess:
1299	modes.DepthLess = true;
1300	break;
1301	case spv::ExecutionModeDepthUnchanged:
1302	modes.DepthUnchanged = true;
1303	break;
1304	case spv::ExecutionModeLocalSize:
1305	modes.WorkgroupSizeX = insn.word(`3`);
1306	modes.WorkgroupSizeY = insn.word(`4`);
1307	modes.WorkgroupSizeZ = insn.word(`5`);
1308	break;
1309	case spv::ExecutionModeOriginUpperLeft:
1310	// This is always the case for a Vulkan shader. Do nothing.
1311	break;
1312	default:
1313	UNREACHABLE("Execution mode: %d", int(mode));
1314	}
1315	}
1316
1317	uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
1318	{
1319	// Types are always built from the bottom up (with the exception of forward ptrs, which
1320	// don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
1321	// already been described (and so their sizes determined)
1322	switch (insn.opcode())
1323	{
1324	case spv::OpTypeVoid:
1325	case spv::OpTypeSampler:
1326	case spv::OpTypeImage:
1327	case spv::OpTypeSampledImage:
1328	case spv::OpTypeFunction:
1329	case spv::OpTypeRuntimeArray:
1330	// Objects that don't consume any space.
1331	// Descriptor-backed objects currently only need exist at compile-time.
1332	// Runtime arrays don't appear in places where their size would be interesting
1333	return `0`;
1334
1335	case spv::OpTypeBool:
1336	case spv::OpTypeFloat:
1337	case spv::OpTypeInt:
1338	// All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
1339	// we might need to change this, but only 32 bit components are required for Vulkan 1.1.
1340	return `1`;
1341
1342	case spv::OpTypeVector:
1343	case spv::OpTypeMatrix:
1344	// Vectors and matrices both consume element count element size.*
1345	return getType(insn.word(`2`)).sizeInComponents * insn.word(`3`);
1346
1347	case spv::OpTypeArray:
1348	{
1349	// Element count element size. Array sizes come from constant ids.*
1350	auto arraySize = GetConstScalarInt(insn.word(`3`));
1351	return getType(insn.word(`2`)).sizeInComponents * arraySize;
1352	}
1353
1354	case spv::OpTypeStruct:
1355	{
1356	uint32_t size = `0`;
1357	for (uint32_t i = `2u`; i < insn.wordCount(); i++)
1358	{
1359	size += getType(insn.word(i)).sizeInComponents;
1360	}
1361	return size;
1362	}
1363
1364	case spv::OpTypePointer:
1365	// Runtime representation of a pointer is a per-lane index.
1366	// Note: clients are expected to look through the pointer if they want the pointee size instead.
1367	return `1`;
1368
1369	default:
1370	UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
1371	return `0`;
1372	}
1373	}
1374
1375	bool SpirvShader::StoresInHelperInvocation(spv::StorageClass storageClass)
1376	{
1377	switch (storageClass)
1378	{
1379	case spv::StorageClassUniform:
1380	case spv::StorageClassStorageBuffer:
1381	case spv::StorageClassImage:
1382	return false;
1383	default:
1384	return true;
1385	}
1386	}
1387
1388	bool SpirvShader::IsExplicitLayout(spv::StorageClass storageClass)
1389	{
1390	switch (storageClass)
1391	{
1392	case spv::StorageClassUniform:
1393	case spv::StorageClassStorageBuffer:
1394	case spv::StorageClassPushConstant:
1395	return true;
1396	default:
1397	return false;
1398	}
1399	}
1400
1401	bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
1402	{
1403	switch (storageClass)
1404	{
1405	case spv::StorageClassUniform:
1406	case spv::StorageClassStorageBuffer:
1407	case spv::StorageClassPushConstant:
1408	case spv::StorageClassWorkgroup:
1409	case spv::StorageClassImage:
1410	return false;
1411	default:
1412	return true;
1413	}
1414	}
1415
1416	template<typename F>
1417	int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
1418	{
1419	// Recursively walks variable definition and its type tree, taking into account
1420	// any explicit Location or Component decorations encountered; where explicit
1421	// Locations or Components are not specified, assigns them sequentially.
1422	// Collected decorations are carried down toward the leaves and across
1423	// siblings; Effect of decorations intentionally does not flow back up the tree.
1424	//
1425	// F is a functor to be called with the effective decoration set for every component.
1426	//
1427	// Returns the next available location, and calls f().
1428
1429	// This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
1430
1431	ApplyDecorationsForId(&d, id);
1432
1433	auto const &obj = getType(id);
1434	switch(obj.opcode())
1435	{
1436	case spv::OpTypePointer:
1437	return VisitInterfaceInner<F>(obj.definition.word(`3`), d, f);
1438	case spv::OpTypeMatrix:
1439	for (auto i = `0u`; i < obj.definition.word(`3`); i++, d.Location++)
1440	{
1441	// consumes same components of N consecutive locations
1442	VisitInterfaceInner<F>(obj.definition.word(`2`), d, f);
1443	}
1444	return d.Location;
1445	case spv::OpTypeVector:
1446	for (auto i = `0u`; i < obj.definition.word(`3`); i++, d.Component++)
1447	{
1448	// consumes N consecutive components in the same location
1449	VisitInterfaceInner<F>(obj.definition.word(`2`), d, f);
1450	}
1451	return d.Location + `1`;
1452	case spv::OpTypeFloat:
1453	f(d, ATTRIBTYPE_FLOAT);
1454	return d.Location + `1`;
1455	case spv::OpTypeInt:
1456	f(d, obj.definition.word(`3`) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
1457	return d.Location + `1`;
1458	case spv::OpTypeBool:
1459	f(d, ATTRIBTYPE_UINT);
1460	return d.Location + `1`;
1461	case spv::OpTypeStruct:
1462	{
1463	// iterate over members, which may themselves have Location/Component decorations
1464	for (auto i = `0u`; i < obj.definition.wordCount() - `2`; i++)
1465	{
1466	ApplyDecorationsForIdMember(&d, id, i);
1467	d.Location = VisitInterfaceInner<F>(obj.definition.word(i + `2`), d, f);
1468	d.Component = `0`; // Implicit locations always have component=0
1469	}
1470	return d.Location;
1471	}
1472	case spv::OpTypeArray:
1473	{
1474	auto arraySize = GetConstScalarInt(obj.definition.word(`3`));
1475	for (auto i = `0u`; i < arraySize; i++)
1476	{
1477	d.Location = VisitInterfaceInner<F>(obj.definition.word(`2`), d, f);
1478	}
1479	return d.Location;
1480	}
1481	default:
1482	// Intentionally partial; most opcodes do not participate in type hierarchies
1483	return `0`;
1484	}
1485	}
1486
1487	template<typename F>
1488	void SpirvShader::VisitInterface(Object::ID id, F f) const
1489	{
1490	// Walk a variable definition and call f for each component in it.
1491	Decorations d{};
1492	ApplyDecorationsForId(&d, id);
1493
1494	auto def = getObject(id).definition;
1495	ASSERT(def.opcode() == spv::OpVariable);
1496	VisitInterfaceInner<F>(def.word(`1`), d, f);
1497	}
1498
1499	template<typename F>
1500	void SpirvShader::VisitMemoryObjectInner(sw::SpirvShader::Type::ID id, sw::SpirvShader::Decorations d, uint32_t& index, uint32_t offset, F f) const
1501	{
1502	// Walk a type tree in an explicitly laid out storage class, calling
1503	// a functor for each scalar element within the object.
1504
1505	// The functor's first parameter is the index of the scalar element;
1506	// the second parameter is the offset (in bytes) from the base of the
1507	// object.
1508
1509	ApplyDecorationsForId(&d, id);
1510	auto const &type = getType(id);
1511
1512	if (d.HasOffset)
1513	{
1514	offset += d.Offset;
1515	d.HasOffset = false;
1516	}
1517
1518	switch (type.opcode())
1519	{
1520	case spv::OpTypePointer:
1521	VisitMemoryObjectInner<F>(type.definition.word(`3`), d, index, offset, f);
1522	break;
1523	case spv::OpTypeInt:
1524	case spv::OpTypeFloat:
1525	f(index++, offset);
1526	break;
1527	case spv::OpTypeVector:
1528	{
1529	auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
1530	for (auto i = `0u`; i < type.definition.word(`3`); i++)
1531	{
1532	VisitMemoryObjectInner(type.definition.word(`2`), d, index, offset + elemStride * i, f);
1533	}
1534	break;
1535	}
1536	case spv::OpTypeMatrix:
1537	{
1538	auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
1539	d.InsideMatrix = true;
1540	for (auto i = `0u`; i < type.definition.word(`3`); i++)
1541	{
1542	ASSERT(d.HasMatrixStride);
1543	VisitMemoryObjectInner(type.definition.word(`2`), d, index, offset + columnStride * i, f);
1544	}
1545	break;
1546	}
1547	case spv::OpTypeStruct:
1548	for (auto i = `0u`; i < type.definition.wordCount() - `2`; i++)
1549	{
1550	ApplyDecorationsForIdMember(&d, id, i);
1551	VisitMemoryObjectInner<F>(type.definition.word(i + `2`), d, index, offset, f);
1552	}
1553	break;
1554	case spv::OpTypeArray:
1555	{
1556	auto arraySize = GetConstScalarInt(type.definition.word(`3`));
1557	for (auto i = `0u`; i < arraySize; i++)
1558	{
1559	ASSERT(d.HasArrayStride);
1560	VisitMemoryObjectInner<F>(type.definition.word(`2`), d, index, offset + i * d.ArrayStride, f);
1561	}
1562	break;
1563	}
1564	default:
1565	UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1566	}
1567	}
1568
1569	template<typename F>
1570	void SpirvShader::VisitMemoryObject(sw::SpirvShader::Object::ID id, F f) const
1571	{
1572	auto typeId = getObject(id).type;
1573	auto const & type = getType(typeId);
1574	if (IsExplicitLayout(type.storageClass))
1575	{
1576	Decorations d{};
1577	ApplyDecorationsForId(&d, id);
1578	uint32_t index = `0`;
1579	VisitMemoryObjectInner<F>(typeId, d, index, `0`, f);
1580	}
1581	else
1582	{
1583	// Objects without explicit layout are tightly packed.
1584	for (auto i = `0u`; i < getType(type.element).sizeInComponents; i++)
1585	{
1586	f(i, i * sizeof(float));
1587	}
1588	}
1589	}
1590
1591	SIMD::Pointer SpirvShader::GetPointerToData(Object::ID id, int arrayIndex, EmitState const state) const*
1592	{
1593	auto routine = state->routine;
1594	auto &object = getObject(id);
1595	switch (object.kind)
1596	{
1597	case Object::Kind::Pointer:
1598	case Object::Kind::InterfaceVariable:
1599	return state->getPointer(id);
1600
1601	case Object::Kind::DescriptorSet:
1602	{
1603	const auto &d = descriptorDecorations.at(id);
1604	ASSERT(d.DescriptorSet >= `0` && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
1605	ASSERT(d.Binding >= `0`);
1606
1607	auto set = state->getPointer(id);
1608
1609	auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1610	ASSERT_MSG(setLayout->hasBinding(d.Binding), "Descriptor set %d does not contain binding %d", int(d.DescriptorSet), int(d.Binding));
1611	int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
1612
1613	Pointer<Byte> descriptor = set.base + bindingOffset; // BufferDescriptor*
1614	Pointer<Byte> data = Pointer<Pointer<Byte>>(descriptor + OFFSET(vk::BufferDescriptor, ptr)); // void
1615	Int size = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, sizeInBytes));
1616	if (setLayout->isBindingDynamic(d.Binding))
1617	{
1618	uint32_t dynamicBindingIndex =
1619	routine->pipelineLayout->getDynamicOffsetBase(d.DescriptorSet) +
1620	setLayout->getDynamicDescriptorOffset(d.Binding) +
1621	arrayIndex;
1622	Int offset = routine->descriptorDynamicOffsets [dynamicBindingIndex];
1623	Int robustnessSize = *Pointer<Int>(descriptor + OFFSET(vk::BufferDescriptor, robustnessSize));
1624	return SIMD::Pointer (data + offset, Min(size, robustnessSize - offset));
1625	}
1626	else
1627	{
1628	return SIMD::Pointer (data, size);
1629	}
1630	}
1631
1632	default:
1633	UNREACHABLE("Invalid pointer kind %d", int(object.kind));
1634	return SIMD::Pointer (Pointer<Byte>(), `0`);
1635	}
1636	}
1637
1638	void SpirvShader::ApplyDecorationsForAccessChain(Decorations d, DescriptorDecorations dd, Object::ID baseId, uint32_t numIndexes, uint32_t const indexIds) const*
1639	{
1640	ApplyDecorationsForId(d, baseId);
1641	auto &baseObject = getObject(baseId);
1642	ApplyDecorationsForId(d, baseObject.type);
1643	auto typeId = getType(baseObject.type).element;
1644
1645	for (auto i = `0u`; i < numIndexes; i++)
1646	{
1647	ApplyDecorationsForId(d, typeId);
1648	auto & type = getType(typeId);
1649	switch (type.opcode())
1650	{
1651	case spv::OpTypeStruct:
1652	{
1653	int memberIndex = GetConstScalarInt(indexIds[i]);
1654	ApplyDecorationsForIdMember(d, typeId, memberIndex);
1655	typeId = type.definition.word(`2u` + memberIndex);
1656	break;
1657	}
1658	case spv::OpTypeArray:
1659	case spv::OpTypeRuntimeArray:
1660	if (dd->InputAttachmentIndex >= `0`)
1661	{
1662	dd->InputAttachmentIndex += GetConstScalarInt(indexIds[i]);
1663	}
1664	typeId = type.element;
1665	break;
1666	case spv::OpTypeVector:
1667	typeId = type.element;
1668	break;
1669	case spv::OpTypeMatrix:
1670	typeId = type.element;
1671	d->InsideMatrix = true;
1672	break;
1673	default:
1674	UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
1675	}
1676	}
1677	}
1678
1679	SIMD::Pointer SpirvShader::WalkExplicitLayoutAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const indexIds, EmitState const* state) const*
1680	{
1681	// Produce a offset into external memory in sizeof(float) units
1682
1683	auto &baseObject = getObject(baseId);
1684	Type::ID typeId = getType(baseObject.type).element;
1685	Decorations d = {};
1686	ApplyDecorationsForId(&d, baseObject.type);
1687
1688	uint32_t arrayIndex = `0`;
1689	if (baseObject.kind == Object::Kind::DescriptorSet)
1690	{
1691	auto type = getType(typeId).definition.opcode();
1692	if (type == spv::OpTypeArray \|\| type == spv::OpTypeRuntimeArray)
1693	{
1694	ASSERT(getObject(indexIds[`0`]).kind == Object::Kind::Constant);
1695	arrayIndex = GetConstScalarInt(indexIds[`0`]);
1696
1697	numIndexes--;
1698	indexIds++;
1699	typeId = getType(typeId).element;
1700	}
1701	}
1702
1703	auto ptr = GetPointerToData(baseId, arrayIndex, state);
1704
1705	int constantOffset = `0`;
1706
1707	for (auto i = `0u`; i < numIndexes; i++)
1708	{
1709	auto & type = getType(typeId);
1710	ApplyDecorationsForId(&d, typeId);
1711
1712	switch (type.definition.opcode())
1713	{
1714	case spv::OpTypeStruct:
1715	{
1716	int memberIndex = GetConstScalarInt(indexIds[i]);
1717	ApplyDecorationsForIdMember(&d, typeId, memberIndex);
1718	ASSERT(d.HasOffset);
1719	constantOffset += d.Offset;
1720	typeId = type.definition.word(`2u` + memberIndex);
1721	break;
1722	}
1723	case spv::OpTypeArray:
1724	case spv::OpTypeRuntimeArray:
1725	{
1726	// TODO: b/127950082: Check bounds.
1727	ASSERT(d.HasArrayStride);
1728	auto & obj = getObject(indexIds[i]);
1729	if (obj.kind == Object::Kind::Constant)
1730	{
1731	constantOffset += d.ArrayStride * GetConstScalarInt(indexIds[i]);
1732	}
1733	else
1734	{
1735	ptr += SIMD::Int (d.ArrayStride) * state->getIntermediate(indexIds[i]).Int(`0`);
1736	}
1737	typeId = type.element;
1738	break;
1739	}
1740	case spv::OpTypeMatrix:
1741	{
1742	// TODO: b/127950082: Check bounds.
1743	ASSERT(d.HasMatrixStride);
1744	d.InsideMatrix = true;
1745	auto columnStride = (d.HasRowMajor && d.RowMajor) ? static_cast<int32_t>(sizeof(float)) : d.MatrixStride;
1746	auto & obj = getObject(indexIds[i]);
1747	if (obj.kind == Object::Kind::Constant)
1748	{
1749	constantOffset += columnStride * GetConstScalarInt(indexIds[i]);
1750	}
1751	else
1752	{
1753	ptr += SIMD::Int (columnStride) * state->getIntermediate(indexIds[i]).Int(`0`);
1754	}
1755	typeId = type.element;
1756	break;
1757	}
1758	case spv::OpTypeVector:
1759	{
1760	auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : static_cast<int32_t>(sizeof(float));
1761	auto & obj = getObject(indexIds[i]);
1762	if (obj.kind == Object::Kind::Constant)
1763	{
1764	constantOffset += elemStride * GetConstScalarInt(indexIds[i]);
1765	}
1766	else
1767	{
1768	ptr += SIMD::Int (elemStride) * state->getIntermediate(indexIds[i]).Int(`0`);
1769	}
1770	typeId = type.element;
1771	break;
1772	}
1773	default:
1774	UNREACHABLE("%s", OpcodeName(type.definition.opcode()).c_str());
1775	}
1776	}
1777
1778	ptr += constantOffset;
1779	return ptr;
1780	}
1781
1782	SIMD::Pointer SpirvShader::WalkAccessChain(Object::ID baseId, uint32_t numIndexes, uint32_t const indexIds, EmitState const* state) const*
1783	{
1784	// TODO: avoid doing per-lane work in some cases if we can?
1785	auto routine = state->routine;
1786	auto &baseObject = getObject(baseId);
1787	Type::ID typeId = getType(baseObject.type).element;
1788
1789	auto ptr = state->getPointer(baseId);
1790
1791	int constantOffset = `0`;
1792
1793	for (auto i = `0u`; i < numIndexes; i++)
1794	{
1795	auto & type = getType(typeId);
1796	switch(type.opcode())
1797	{
1798	case spv::OpTypeStruct:
1799	{
1800	int memberIndex = GetConstScalarInt(indexIds[i]);
1801	int offsetIntoStruct = `0`;
1802	for (auto j = `0`; j < memberIndex; j++) {
1803	auto memberType = type.definition.word(`2u` + j);
1804	offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
1805	}
1806	constantOffset += offsetIntoStruct;
1807	typeId = type.definition.word(`2u` + memberIndex);
1808	break;
1809	}
1810
1811	case spv::OpTypeVector:
1812	case spv::OpTypeMatrix:
1813	case spv::OpTypeArray:
1814	case spv::OpTypeRuntimeArray:
1815	{
1816	// TODO: b/127950082: Check bounds.
1817	if (getType(baseObject.type).storageClass == spv::StorageClassUniformConstant)
1818	{
1819	// indexing into an array of descriptors.
1820	auto &obj = getObject(indexIds[i]);
1821	if (obj.kind != Object::Kind::Constant)
1822	{
1823	UNSUPPORTED("SPIR-V SampledImageArrayDynamicIndexing Capability");
1824	}
1825
1826	auto d = descriptorDecorations.at(baseId);
1827	ASSERT(d.DescriptorSet >= `0`);
1828	ASSERT(d.Binding >= `0`);
1829	auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
1830	auto stride = static_cast<uint32_t>(setLayout->getBindingStride(d.Binding));
1831	ptr.base += stride * GetConstScalarInt(indexIds[i]);
1832	}
1833	else
1834	{
1835	auto stride = getType(type.element).sizeInComponents * static_cast<uint32_t>(sizeof(float));
1836	auto & obj = getObject(indexIds[i]);
1837	if (obj.kind == Object::Kind::Constant)
1838	{
1839	ptr += stride * GetConstScalarInt(indexIds[i]);
1840	}
1841	else
1842	{
1843	ptr += SIMD::Int (stride) * state->getIntermediate(indexIds[i]).Int(`0`);
1844	}
1845	}
1846	typeId = type.element;
1847	break;
1848	}
1849
1850	default:
1851	UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1852	}
1853	}
1854
1855	if (constantOffset != `0`)
1856	{
1857	ptr += constantOffset;
1858	}
1859	return ptr;
1860	}
1861
1862	uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const indexes) const*
1863	{
1864	uint32_t componentOffset = `0`;
1865
1866	for (auto i = `0u`; i < numIndexes; i++)
1867	{
1868	auto & type = getType(typeId);
1869	switch(type.opcode())
1870	{
1871	case spv::OpTypeStruct:
1872	{
1873	int memberIndex = indexes[i];
1874	int offsetIntoStruct = `0`;
1875	for (auto j = `0`; j < memberIndex; j++) {
1876	auto memberType = type.definition.word(`2u` + j);
1877	offsetIntoStruct += getType(memberType).sizeInComponents;
1878	}
1879	componentOffset += offsetIntoStruct;
1880	typeId = type.definition.word(`2u` + memberIndex);
1881	break;
1882	}
1883
1884	case spv::OpTypeVector:
1885	case spv::OpTypeMatrix:
1886	case spv::OpTypeArray:
1887	{
1888	auto elementType = type.definition.word(`2`);
1889	auto stride = getType(elementType).sizeInComponents;
1890	componentOffset += stride * indexes[i];
1891	typeId = elementType;
1892	break;
1893	}
1894
1895	default:
1896	UNREACHABLE("%s", OpcodeName(type.opcode()).c_str());
1897	}
1898	}
1899
1900	return componentOffset;
1901	}
1902
1903	void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
1904	{
1905	switch (decoration)
1906	{
1907	case spv::DecorationLocation:
1908	HasLocation = true;
1909	Location = static_cast<int32_t>(arg);
1910	break;
1911	case spv::DecorationComponent:
1912	HasComponent = true;
1913	Component = arg;
1914	break;
1915	case spv::DecorationBuiltIn:
1916	HasBuiltIn = true;
1917	BuiltIn = static_cast<spv::BuiltIn>(arg);
1918	break;
1919	case spv::DecorationFlat:
1920	Flat = true;
1921	break;
1922	case spv::DecorationNoPerspective:
1923	NoPerspective = true;
1924	break;
1925	case spv::DecorationCentroid:
1926	Centroid = true;
1927	break;
1928	case spv::DecorationBlock:
1929	Block = true;
1930	break;
1931	case spv::DecorationBufferBlock:
1932	BufferBlock = true;
1933	break;
1934	case spv::DecorationOffset:
1935	HasOffset = true;
1936	Offset = static_cast<int32_t>(arg);
1937	break;
1938	case spv::DecorationArrayStride:
1939	HasArrayStride = true;
1940	ArrayStride = static_cast<int32_t>(arg);
1941	break;
1942	case spv::DecorationMatrixStride:
1943	HasMatrixStride = true;
1944	MatrixStride = static_cast<int32_t>(arg);
1945	break;
1946	case spv::DecorationRelaxedPrecision:
1947	RelaxedPrecision = true;
1948	break;
1949	case spv::DecorationRowMajor:
1950	HasRowMajor = true;
1951	RowMajor = true;
1952	break;
1953	case spv::DecorationColMajor:
1954	HasRowMajor = true;
1955	RowMajor = false;
1956	default:
1957	// Intentionally partial, there are many decorations we just don't care about.
1958	break;
1959	}
1960	}
1961
1962	void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1963	{
1964	// Apply a decoration group to this set of decorations
1965	if (src.HasBuiltIn)
1966	{
1967	HasBuiltIn = true;
1968	BuiltIn = src.BuiltIn;
1969	}
1970
1971	if (src.HasLocation)
1972	{
1973	HasLocation = true;
1974	Location = src.Location;
1975	}
1976
1977	if (src.HasComponent)
1978	{
1979	HasComponent = true;
1980	Component = src.Component;
1981	}
1982
1983	if (src.HasOffset)
1984	{
1985	HasOffset = true;
1986	Offset = src.Offset;
1987	}
1988
1989	if (src.HasArrayStride)
1990	{
1991	HasArrayStride = true;
1992	ArrayStride = src.ArrayStride;
1993	}
1994
1995	if (src.HasMatrixStride)
1996	{
1997	HasMatrixStride = true;
1998	MatrixStride = src.MatrixStride;
1999	}
2000
2001	if (src.HasRowMajor)
2002	{
2003	HasRowMajor = true;
2004	RowMajor = src.RowMajor;
2005	}
2006
2007	Flat \|= src.Flat;
2008	NoPerspective \|= src.NoPerspective;
2009	Centroid \|= src.Centroid;
2010	Block \|= src.Block;
2011	BufferBlock \|= src.BufferBlock;
2012	RelaxedPrecision \|= src.RelaxedPrecision;
2013	InsideMatrix \|= src.InsideMatrix;
2014	}
2015
2016	void SpirvShader::DescriptorDecorations::Apply(const sw::SpirvShader::DescriptorDecorations &src)
2017	{
2018	if(src.DescriptorSet >= `0`)
2019	{
2020	DescriptorSet = src.DescriptorSet;
2021	}
2022
2023	if(src.Binding >= `0`)
2024	{
2025	Binding = src.Binding;
2026	}
2027
2028	if (src.InputAttachmentIndex >= `0`)
2029	{
2030	InputAttachmentIndex = src.InputAttachmentIndex;
2031	}
2032	}
2033
2034	void SpirvShader::ApplyDecorationsForId(Decorations d, TypeOrObjectID id) const*
2035	{
2036	auto it = decorations.find(id);
2037	if (it != decorations.end())
2038	d->Apply(it ->second);
2039	}
2040
2041	void SpirvShader::ApplyDecorationsForIdMember(Decorations d, Type::ID id, uint32_t member) const*
2042	{
2043	auto it = memberDecorations.find(id);
2044	if (it != memberDecorations.end() && member < it ->second.size())
2045	{
2046	d->Apply(it ->second [member]);
2047	}
2048	}
2049
2050	void SpirvShader::DefineResult(const InsnIterator &insn)
2051	{
2052	Type::ID typeId = insn.word(`1`);
2053	Object::ID resultId = insn.word(`2`);
2054	auto &object = defs [resultId];
2055	object.type = typeId;
2056
2057	switch (getType(typeId).opcode())
2058	{
2059	case spv::OpTypePointer:
2060	case spv::OpTypeImage:
2061	case spv::OpTypeSampledImage:
2062	case spv::OpTypeSampler:
2063	object.kind = Object::Kind::Pointer;
2064	break;
2065
2066	default:
2067	object.kind = Object::Kind::Intermediate;
2068	}
2069
2070	object.definition = insn;
2071	}
2072
2073	OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
2074	{
2075	switch(storageClass)
2076	{
2077	case spv::StorageClassUniform:
2078	case spv::StorageClassStorageBuffer:
2079	// Buffer resource access. robustBufferAccess feature applies.
2080	return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
2081	: OutOfBoundsBehavior::UndefinedBehavior;
2082
2083	case spv::StorageClassImage:
2084	return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
2085
2086	case spv::StorageClassInput:
2087	if(executionModel == spv::ExecutionModelVertex)
2088	{
2089	// Vertex attributes follow robustBufferAccess rules.
2090	return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
2091	: OutOfBoundsBehavior::UndefinedBehavior;
2092	}
2093	// Fall through to default case.
2094	default:
2095	// TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
2096	// TODO(b/131224163): Optimize cases statically known to be within bounds.
2097	return OutOfBoundsBehavior::UndefinedValue;
2098	}
2099
2100	return OutOfBoundsBehavior::Nullify;
2101	}
2102
2103	// emit-time
2104
2105	void SpirvShader::emitProlog(SpirvRoutine routine) const*
2106	{
2107	for (auto insn : *this)
2108	{
2109	switch (insn.opcode())
2110	{
2111	case spv::OpVariable:
2112	{
2113	Type::ID resultPointerTypeId = insn.word(`1`);
2114	auto resultPointerType = getType(resultPointerTypeId);
2115	auto pointeeType = getType(resultPointerType.element);
2116
2117	if(pointeeType.sizeInComponents > `0`) // TODO: what to do about zero-slot objects?
2118	{
2119	Object::ID resultId = insn.word(`2`);
2120	routine->createVariable(resultId, pointeeType.sizeInComponents);
2121	}
2122	break;
2123	}
2124	case spv::OpPhi:
2125	{
2126	auto type = getType(insn.word(`1`));
2127	Object::ID resultId = insn.word(`2`);
2128	routine->phis.emplace(resultId, SpirvRoutine::Variable (type.sizeInComponents));
2129	break;
2130	}
2131
2132	case spv::OpImageDrefGather:
2133	case spv::OpImageFetch:
2134	case spv::OpImageGather:
2135	case spv::OpImageQueryLod:
2136	case spv::OpImageSampleDrefExplicitLod:
2137	case spv::OpImageSampleDrefImplicitLod:
2138	case spv::OpImageSampleExplicitLod:
2139	case spv::OpImageSampleImplicitLod:
2140	case spv::OpImageSampleProjDrefExplicitLod:
2141	case spv::OpImageSampleProjDrefImplicitLod:
2142	case spv::OpImageSampleProjExplicitLod:
2143	case spv::OpImageSampleProjImplicitLod:
2144	{
2145	Object::ID resultId = insn.word(`2`);
2146	routine->samplerCache.emplace(resultId, SpirvRoutine::SamplerCache{});
2147	break;
2148	}
2149
2150	default:
2151	// Nothing else produces interface variables, so can all be safely ignored.
2152	break;
2153	}
2154	}
2155	}
2156
2157	void SpirvShader::emit(SpirvRoutine routine, RValue<SIMD::Int> const* &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const
2158	{
2159	EmitState state(routine, entryPoint, activeLaneMask, storesAndAtomicsMask, descriptorSets, robustBufferAccess, executionModel);
2160
2161	// Emit everything up to the first label
2162	// TODO: Separate out dispatch of block from non-block instructions?
2163	for (auto insn : *this)
2164	{
2165	if (insn.opcode() == spv::OpLabel)
2166	{
2167	break;
2168	}
2169	EmitInstruction(insn, &state);
2170	}
2171
2172	// Emit all the blocks starting from entryPoint.
2173	EmitBlocks(getFunction(entryPoint).entry, &state);
2174	}
2175
2176	void SpirvShader::EmitBlocks(Block::ID id, EmitState state, Block::ID ignore /* = 0 /) const
2177	{
2178	auto oldPending = state->pending;
2179	auto &function = getFunction(state->function);
2180
2181	std::deque<Block::ID> pending;
2182	state->pending = &pending;
2183	pending.push_front(id);
2184	while (pending.size() > `0`)
2185	{
2186	auto id = pending.front();
2187
2188	auto const &block = function.getBlock(id);
2189	if (id == ignore)
2190	{
2191	pending.pop_front();
2192	continue;
2193	}
2194
2195	// Ensure all dependency blocks have been generated.
2196	auto depsDone = true;
2197	function.ForeachBlockDependency(id, [&](Block::ID dep)
2198	{
2199	if (state->visited.count(dep) == `0`)
2200	{
2201	state->pending->push_front(dep);
2202	depsDone = false;
2203	}
2204	});
2205
2206	if (!depsDone)
2207	{
2208	continue;
2209	}
2210
2211	pending.pop_front();
2212
2213	state->block = id;
2214
2215	switch (block.kind)
2216	{
2217	case Block::Simple:
2218	case Block::StructuredBranchConditional:
2219	case Block::UnstructuredBranchConditional:
2220	case Block::StructuredSwitch:
2221	case Block::UnstructuredSwitch:
2222	EmitNonLoop(state);
2223	break;
2224
2225	case Block::Loop:
2226	EmitLoop(state);
2227	break;
2228
2229	default:
2230	UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
2231	}
2232	}
2233
2234	state->pending = oldPending;
2235	}
2236
2237	void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState state) const*
2238	{
2239	for (auto insn = begin; insn != end; insn ++)
2240	{
2241	auto res = EmitInstruction(insn, state);
2242	switch (res)
2243	{
2244	case EmitResult::Continue:
2245	continue;
2246	case EmitResult::Terminator:
2247	break;
2248	default:
2249	UNREACHABLE("Unexpected EmitResult %d", int(res));
2250	break;
2251	}
2252	}
2253	}
2254
2255	void SpirvShader::EmitNonLoop(EmitState state) const*
2256	{
2257	auto &function = getFunction(state->function);
2258	auto blockId = state->block;
2259	auto block = function.getBlock(blockId);
2260
2261	if (!state->visited.emplace(blockId).second)
2262	{
2263	return; // Already generated this block.
2264	}
2265
2266	if (blockId != function.entry)
2267	{
2268	// Set the activeLaneMask.
2269	SIMD::Int activeLaneMask(`0`);
2270	for (auto in : block.ins)
2271	{
2272	auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
2273	activeLaneMask \|= inMask;
2274	}
2275	state->setActiveLaneMask(activeLaneMask);
2276	}
2277
2278	EmitInstructions(block.begin(), block.end(), state);
2279
2280	for (auto out : block.outs)
2281	{
2282	if (state->visited.count(out) == `0`)
2283	{
2284	state->pending->push_back(out);
2285	}
2286	}
2287	}
2288
2289	void SpirvShader::EmitLoop(EmitState state) const*
2290	{
2291	auto &function = getFunction(state->function);
2292	auto blockId = state->block;
2293	auto &block = function.getBlock(blockId);
2294	auto mergeBlockId = block.mergeBlock;
2295	auto &mergeBlock = function.getBlock(mergeBlockId);
2296
2297	if (!state->visited.emplace(blockId).second)
2298	{
2299	return; // Already emitted this loop.
2300	}
2301
2302	// Gather all the blocks that make up the loop.
2303	std::unordered_set<Block::ID> loopBlocks;
2304	loopBlocks.emplace(block.mergeBlock);
2305	function.TraverseReachableBlocks(blockId, loopBlocks);
2306
2307	// incomingBlocks are block ins that are not back-edges.
2308	std::unordered_set<Block::ID> incomingBlocks;
2309	for (auto in : block.ins)
2310	{
2311	if (loopBlocks.count(in) == `0`)
2312	{
2313	incomingBlocks.emplace(in);
2314	}
2315	}
2316
2317	// Emit the loop phi instructions, and initialize them with a value from
2318	// the incoming blocks.
2319	for (auto insn = block.begin(); insn != block.mergeInstruction; insn ++)
2320	{
2321	if (insn.opcode() == spv::OpPhi)
2322	{
2323	StorePhi(blockId, insn, state, incomingBlocks);
2324	}
2325	}
2326
2327	// loopActiveLaneMask is the mask of lanes that are continuing to loop.
2328	// This is initialized with the incoming active lane masks.
2329	SIMD::Int loopActiveLaneMask = SIMD::Int (`0`);
2330	for (auto in : incomingBlocks)
2331	{
2332	loopActiveLaneMask \|= GetActiveLaneMaskEdge(state, in, blockId);
2333	}
2334
2335	// mergeActiveLaneMasks contains edge lane masks for the merge block.
2336	// This is the union of all edge masks across all iterations of the loop.
2337	std::unordered_map<Block::ID, SIMD::Int> mergeActiveLaneMasks;
2338	for (auto in : function.getBlock(mergeBlockId).ins)
2339	{
2340	mergeActiveLaneMasks.emplace(in, SIMD::Int (`0`));
2341	}
2342
2343	// Create the loop basic blocks
2344	auto headerBasicBlock = Nucleus::createBasicBlock();
2345	auto mergeBasicBlock = Nucleus::createBasicBlock();
2346
2347	// Start emitting code inside the loop.
2348	Nucleus::createBr(headerBasicBlock);
2349	Nucleus::setInsertBlock(headerBasicBlock);
2350
2351	// Load the active lane mask.
2352	state->setActiveLaneMask(loopActiveLaneMask);
2353
2354	// Emit the non-phi loop header block's instructions.
2355	for (auto insn = block.begin(); insn != block.end(); insn ++)
2356	{
2357	if (insn.opcode() == spv::OpPhi)
2358	{
2359	LoadPhi(insn, state);
2360	}
2361	else
2362	{
2363	EmitInstruction(insn, state);
2364	}
2365	}
2366
2367	// Emit all blocks between the loop header and the merge block, but
2368	// don't emit the merge block yet.
2369	for (auto out : block.outs)
2370	{
2371	EmitBlocks(out, state, mergeBlockId);
2372	}
2373
2374	// Restore current block id after emitting loop blocks.
2375	state->block = blockId;
2376
2377	// Rebuild the loopActiveLaneMask from the loop back edges.
2378	loopActiveLaneMask = SIMD::Int (`0`);
2379	for (auto in : block.ins)
2380	{
2381	if (function.ExistsPath(blockId, in, mergeBlockId))
2382	{
2383	loopActiveLaneMask \|= GetActiveLaneMaskEdge(state, in, blockId);
2384	}
2385	}
2386
2387	// Add active lanes to the merge lane mask.
2388	for (auto in : function.getBlock(mergeBlockId).ins)
2389	{
2390	auto edge = Block::Edge{in, mergeBlockId};
2391	auto it = state->edgeActiveLaneMasks.find(edge);
2392	if (it != state->edgeActiveLaneMasks.end())
2393	{
2394	mergeActiveLaneMasks [in] \|= it ->second;
2395	}
2396	}
2397
2398	// Update loop phi values.
2399	for (auto insn = block.begin(); insn != block.mergeInstruction; insn ++)
2400	{
2401	if (insn.opcode() == spv::OpPhi)
2402	{
2403	StorePhi(blockId, insn, state, loopBlocks);
2404	}
2405	}
2406
2407	// Use the [loop -> merge] active lane masks to update the phi values in
2408	// the merge block. We need to do this to handle divergent control flow
2409	// in the loop.
2410	//
2411	// Consider the following:
2412	//
2413	// int phi_source = 0;
2414	// for (uint i = 0; i < 4; i++)
2415	// {
2416	// phi_source = 0;
2417	// if (gl_GlobalInvocationID.x % 4 == i) // divergent control flow
2418	// {
2419	// phi_source = 42; // single lane assignment.
2420	// break; // activeLaneMask for [loop->merge] is active for a single lane.
2421	// }
2422	// // -- we are here --
2423	// }
2424	// // merge block
2425	// int phi = phi_source; // OpPhi
2426	//
2427	// In this example, with each iteration of the loop, phi_source will
2428	// only have a single lane assigned. However by 'phi' value in the merge
2429	// block needs to be assigned the union of all the per-lane assignments
2430	// of phi_source when that lane exited the loop.
2431	for (auto insn = mergeBlock.begin(); insn != mergeBlock.end(); insn ++)
2432	{
2433	if (insn.opcode() == spv::OpPhi)
2434	{
2435	StorePhi(mergeBlockId, insn, state, loopBlocks);
2436	}
2437	}
2438
2439	// Loop body now done.
2440	// If any lanes are still active, jump back to the loop header,
2441	// otherwise jump to the merge block.
2442	Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
2443
2444	// Continue emitting from the merge block.
2445	Nucleus::setInsertBlock(mergeBasicBlock);
2446	state->pending->push_back(mergeBlockId);
2447	for (auto it : mergeActiveLaneMasks)
2448	{
2449	state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
2450	}
2451	}
2452
2453	SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState state) const*
2454	{
2455	auto opcode = insn.opcode();
2456
2457	switch (opcode)
2458	{
2459	case spv::OpTypeVoid:
2460	case spv::OpTypeInt:
2461	case spv::OpTypeFloat:
2462	case spv::OpTypeBool:
2463	case spv::OpTypeVector:
2464	case spv::OpTypeArray:
2465	case spv::OpTypeRuntimeArray:
2466	case spv::OpTypeMatrix:
2467	case spv::OpTypeStruct:
2468	case spv::OpTypePointer:
2469	case spv::OpTypeFunction:
2470	case spv::OpTypeImage:
2471	case spv::OpTypeSampledImage:
2472	case spv::OpTypeSampler:
2473	case spv::OpExecutionMode:
2474	case spv::OpMemoryModel:
2475	case spv::OpFunction:
2476	case spv::OpFunctionEnd:
2477	case spv::OpConstant:
2478	case spv::OpConstantNull:
2479	case spv::OpConstantTrue:
2480	case spv::OpConstantFalse:
2481	case spv::OpConstantComposite:
2482	case spv::OpSpecConstant:
2483	case spv::OpSpecConstantTrue:
2484	case spv::OpSpecConstantFalse:
2485	case spv::OpSpecConstantComposite:
2486	case spv::OpSpecConstantOp:
2487	case spv::OpUndef:
2488	case spv::OpExtension:
2489	case spv::OpCapability:
2490	case spv::OpEntryPoint:
2491	case spv::OpExtInstImport:
2492	case spv::OpDecorate:
2493	case spv::OpMemberDecorate:
2494	case spv::OpGroupDecorate:
2495	case spv::OpGroupMemberDecorate:
2496	case spv::OpDecorationGroup:
2497	case spv::OpName:
2498	case spv::OpMemberName:
2499	case spv::OpSource:
2500	case spv::OpSourceContinued:
2501	case spv::OpSourceExtension:
2502	case spv::OpLine:
2503	case spv::OpNoLine:
2504	case spv::OpModuleProcessed:
2505	case spv::OpString:
2506	// Nothing to do at emit time. These are either fully handled at analysis time,
2507	// or don't require any work at all.
2508	return EmitResult::Continue;
2509
2510	case spv::OpLabel:
2511	return EmitResult::Continue;
2512
2513	case spv::OpVariable:
2514	return EmitVariable(insn, state);
2515
2516	case spv::OpLoad:
2517	case spv::OpAtomicLoad:
2518	return EmitLoad(insn, state);
2519
2520	case spv::OpStore:
2521	case spv::OpAtomicStore:
2522	return EmitStore(insn, state);
2523
2524	case spv::OpAtomicIAdd:
2525	case spv::OpAtomicISub:
2526	case spv::OpAtomicSMin:
2527	case spv::OpAtomicSMax:
2528	case spv::OpAtomicUMin:
2529	case spv::OpAtomicUMax:
2530	case spv::OpAtomicAnd:
2531	case spv::OpAtomicOr:
2532	case spv::OpAtomicXor:
2533	case spv::OpAtomicIIncrement:
2534	case spv::OpAtomicIDecrement:
2535	case spv::OpAtomicExchange:
2536	return EmitAtomicOp(insn, state);
2537
2538	case spv::OpAtomicCompareExchange:
2539	return EmitAtomicCompareExchange(insn, state);
2540
2541	case spv::OpAccessChain:
2542	case spv::OpInBoundsAccessChain:
2543	return EmitAccessChain(insn, state);
2544
2545	case spv::OpCompositeConstruct:
2546	return EmitCompositeConstruct(insn, state);
2547
2548	case spv::OpCompositeInsert:
2549	return EmitCompositeInsert(insn, state);
2550
2551	case spv::OpCompositeExtract:
2552	return EmitCompositeExtract(insn, state);
2553
2554	case spv::OpVectorShuffle:
2555	return EmitVectorShuffle(insn, state);
2556
2557	case spv::OpVectorExtractDynamic:
2558	return EmitVectorExtractDynamic(insn, state);
2559
2560	case spv::OpVectorInsertDynamic:
2561	return EmitVectorInsertDynamic(insn, state);
2562
2563	case spv::OpVectorTimesScalar:
2564	case spv::OpMatrixTimesScalar:
2565	return EmitVectorTimesScalar(insn, state);
2566
2567	case spv::OpMatrixTimesVector:
2568	return EmitMatrixTimesVector(insn, state);
2569
2570	case spv::OpVectorTimesMatrix:
2571	return EmitVectorTimesMatrix(insn, state);
2572
2573	case spv::OpMatrixTimesMatrix:
2574	return EmitMatrixTimesMatrix(insn, state);
2575
2576	case spv::OpOuterProduct:
2577	return EmitOuterProduct(insn, state);
2578
2579	case spv::OpTranspose:
2580	return EmitTranspose(insn, state);
2581
2582	case spv::OpNot:
2583	case spv::OpBitFieldInsert:
2584	case spv::OpBitFieldSExtract:
2585	case spv::OpBitFieldUExtract:
2586	case spv::OpBitReverse:
2587	case spv::OpBitCount:
2588	case spv::OpSNegate:
2589	case spv::OpFNegate:
2590	case spv::OpLogicalNot:
2591	case spv::OpConvertFToU:
2592	case spv::OpConvertFToS:
2593	case spv::OpConvertSToF:
2594	case spv::OpConvertUToF:
2595	case spv::OpBitcast:
2596	case spv::OpIsInf:
2597	case spv::OpIsNan:
2598	case spv::OpDPdx:
2599	case spv::OpDPdxCoarse:
2600	case spv::OpDPdy:
2601	case spv::OpDPdyCoarse:
2602	case spv::OpFwidth:
2603	case spv::OpFwidthCoarse:
2604	case spv::OpDPdxFine:
2605	case spv::OpDPdyFine:
2606	case spv::OpFwidthFine:
2607	case spv::OpQuantizeToF16:
2608	return EmitUnaryOp(insn, state);
2609
2610	case spv::OpIAdd:
2611	case spv::OpISub:
2612	case spv::OpIMul:
2613	case spv::OpSDiv:
2614	case spv::OpUDiv:
2615	case spv::OpFAdd:
2616	case spv::OpFSub:
2617	case spv::OpFMul:
2618	case spv::OpFDiv:
2619	case spv::OpFMod:
2620	case spv::OpFRem:
2621	case spv::OpFOrdEqual:
2622	case spv::OpFUnordEqual:
2623	case spv::OpFOrdNotEqual:
2624	case spv::OpFUnordNotEqual:
2625	case spv::OpFOrdLessThan:
2626	case spv::OpFUnordLessThan:
2627	case spv::OpFOrdGreaterThan:
2628	case spv::OpFUnordGreaterThan:
2629	case spv::OpFOrdLessThanEqual:
2630	case spv::OpFUnordLessThanEqual:
2631	case spv::OpFOrdGreaterThanEqual:
2632	case spv::OpFUnordGreaterThanEqual:
2633	case spv::OpSMod:
2634	case spv::OpSRem:
2635	case spv::OpUMod:
2636	case spv::OpIEqual:
2637	case spv::OpINotEqual:
2638	case spv::OpUGreaterThan:
2639	case spv::OpSGreaterThan:
2640	case spv::OpUGreaterThanEqual:
2641	case spv::OpSGreaterThanEqual:
2642	case spv::OpULessThan:
2643	case spv::OpSLessThan:
2644	case spv::OpULessThanEqual:
2645	case spv::OpSLessThanEqual:
2646	case spv::OpShiftRightLogical:
2647	case spv::OpShiftRightArithmetic:
2648	case spv::OpShiftLeftLogical:
2649	case spv::OpBitwiseOr:
2650	case spv::OpBitwiseXor:
2651	case spv::OpBitwiseAnd:
2652	case spv::OpLogicalOr:
2653	case spv::OpLogicalAnd:
2654	case spv::OpLogicalEqual:
2655	case spv::OpLogicalNotEqual:
2656	case spv::OpUMulExtended:
2657	case spv::OpSMulExtended:
2658	case spv::OpIAddCarry:
2659	case spv::OpISubBorrow:
2660	return EmitBinaryOp(insn, state);
2661
2662	case spv::OpDot:
2663	return EmitDot(insn, state);
2664
2665	case spv::OpSelect:
2666	return EmitSelect(insn, state);
2667
2668	case spv::OpExtInst:
2669	return EmitExtendedInstruction(insn, state);
2670
2671	case spv::OpAny:
2672	return EmitAny(insn, state);
2673
2674	case spv::OpAll:
2675	return EmitAll(insn, state);
2676
2677	case spv::OpBranch:
2678	return EmitBranch(insn, state);
2679
2680	case spv::OpPhi:
2681	return EmitPhi(insn, state);
2682
2683	case spv::OpSelectionMerge:
2684	case spv::OpLoopMerge:
2685	return EmitResult::Continue;
2686
2687	case spv::OpBranchConditional:
2688	return EmitBranchConditional(insn, state);
2689
2690	case spv::OpSwitch:
2691	return EmitSwitch(insn, state);
2692
2693	case spv::OpUnreachable:
2694	return EmitUnreachable(insn, state);
2695
2696	case spv::OpReturn:
2697	return EmitReturn(insn, state);
2698
2699	case spv::OpFunctionCall:
2700	return EmitFunctionCall(insn, state);
2701
2702	case spv::OpKill:
2703	return EmitKill(insn, state);
2704
2705	case spv::OpImageSampleImplicitLod:
2706	return EmitImageSampleImplicitLod(None, insn, state);
2707
2708	case spv::OpImageSampleExplicitLod:
2709	return EmitImageSampleExplicitLod(None, insn, state);
2710
2711	case spv::OpImageSampleDrefImplicitLod:
2712	return EmitImageSampleImplicitLod(Dref, insn, state);
2713
2714	case spv::OpImageSampleDrefExplicitLod:
2715	return EmitImageSampleExplicitLod(Dref, insn, state);
2716
2717	case spv::OpImageSampleProjImplicitLod:
2718	return EmitImageSampleImplicitLod(Proj, insn, state);
2719
2720	case spv::OpImageSampleProjExplicitLod:
2721	return EmitImageSampleExplicitLod(Proj, insn, state);
2722
2723	case spv::OpImageSampleProjDrefImplicitLod:
2724	return EmitImageSampleImplicitLod(ProjDref, insn, state);
2725
2726	case spv::OpImageSampleProjDrefExplicitLod:
2727	return EmitImageSampleExplicitLod(ProjDref, insn, state);
2728
2729	case spv::OpImageGather:
2730	return EmitImageGather(None, insn, state);
2731
2732	case spv::OpImageDrefGather:
2733	return EmitImageGather(Dref, insn, state);
2734
2735	case spv::OpImageFetch:
2736	return EmitImageFetch(insn, state);
2737
2738	case spv::OpImageQuerySizeLod:
2739	return EmitImageQuerySizeLod(insn, state);
2740
2741	case spv::OpImageQuerySize:
2742	return EmitImageQuerySize(insn, state);
2743
2744	case spv::OpImageQueryLod:
2745	return EmitImageQueryLod(insn, state);
2746
2747	case spv::OpImageQueryLevels:
2748	return EmitImageQueryLevels(insn, state);
2749
2750	case spv::OpImageQuerySamples:
2751	return EmitImageQuerySamples(insn, state);
2752
2753	case spv::OpImageRead:
2754	return EmitImageRead(insn, state);
2755
2756	case spv::OpImageWrite:
2757	return EmitImageWrite(insn, state);
2758
2759	case spv::OpImageTexelPointer:
2760	return EmitImageTexelPointer(insn, state);
2761
2762	case spv::OpSampledImage:
2763	case spv::OpImage:
2764	return EmitSampledImageCombineOrSplit(insn, state);
2765
2766	case spv::OpCopyObject:
2767	return EmitCopyObject(insn, state);
2768
2769	case spv::OpCopyMemory:
2770	return EmitCopyMemory(insn, state);
2771
2772	case spv::OpControlBarrier:
2773	return EmitControlBarrier(insn, state);
2774
2775	case spv::OpMemoryBarrier:
2776	return EmitMemoryBarrier(insn, state);
2777
2778	case spv::OpGroupNonUniformElect:
2779	case spv::OpGroupNonUniformAll:
2780	case spv::OpGroupNonUniformAny:
2781	case spv::OpGroupNonUniformAllEqual:
2782	case spv::OpGroupNonUniformBroadcast:
2783	case spv::OpGroupNonUniformBroadcastFirst:
2784	case spv::OpGroupNonUniformBallot:
2785	case spv::OpGroupNonUniformInverseBallot:
2786	case spv::OpGroupNonUniformBallotBitExtract:
2787	case spv::OpGroupNonUniformBallotBitCount:
2788	case spv::OpGroupNonUniformBallotFindLSB:
2789	case spv::OpGroupNonUniformBallotFindMSB:
2790	case spv::OpGroupNonUniformShuffle:
2791	case spv::OpGroupNonUniformShuffleXor:
2792	case spv::OpGroupNonUniformShuffleUp:
2793	case spv::OpGroupNonUniformShuffleDown:
2794	return EmitGroupNonUniform(insn, state);
2795
2796	case spv::OpArrayLength:
2797	return EmitArrayLength(insn, state);
2798
2799	default:
2800	UNREACHABLE("%s", OpcodeName(opcode).c_str());
2801	break;
2802	}
2803
2804	return EmitResult::Continue;
2805	}
2806
2807	SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState state) const*
2808	{
2809	auto routine = state->routine;
2810	Object::ID resultId = insn.word(`2`);
2811	auto &object = getObject(resultId);
2812	auto &objectTy = getType(object.type);
2813
2814	switch (objectTy.storageClass)
2815	{
2816	case spv::StorageClassOutput:
2817	case spv::StorageClassPrivate:
2818	case spv::StorageClassFunction:
2819	{
2820	ASSERT(objectTy.opcode() == spv::OpTypePointer);
2821	auto base = &routine->getVariable(resultId)[`0`];
2822	auto elementTy = getType(objectTy.element);
2823	auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
2824	state->createPointer(resultId, SIMD::Pointer (base, size));
2825	break;
2826	}
2827	case spv::StorageClassWorkgroup:
2828	{
2829	ASSERT(objectTy.opcode() == spv::OpTypePointer);
2830	auto base = &routine->workgroupMemory [`0`];
2831	auto size = workgroupMemory.size();
2832	state->createPointer(resultId, SIMD::Pointer (base, size, workgroupMemory.offsetOf(resultId)));
2833	break;
2834	}
2835	case spv::StorageClassInput:
2836	{
2837	if (object.kind == Object::Kind::InterfaceVariable)
2838	{
2839	auto &dst = routine->getVariable(resultId);
2840	int offset = `0`;
2841	VisitInterface(resultId,
2842	[&](Decorations const &d, AttribType type) {
2843	auto scalarSlot = d.Location << `2` \| d.Component;
2844	dst [offset++] = routine->inputs [scalarSlot];
2845	});
2846	}
2847	ASSERT(objectTy.opcode() == spv::OpTypePointer);
2848	auto base = &routine->getVariable(resultId)[`0`];
2849	auto elementTy = getType(objectTy.element);
2850	auto size = elementTy.sizeInComponents * static_cast<uint32_t>(sizeof(float)) * SIMD::Width;
2851	state->createPointer(resultId, SIMD::Pointer (base, size));
2852	break;
2853	}
2854	case spv::StorageClassUniformConstant:
2855	{
2856	const auto &d = descriptorDecorations.at(resultId);
2857	ASSERT(d.DescriptorSet >= `0`);
2858	ASSERT(d.Binding >= `0`);
2859
2860	uint32_t arrayIndex = `0`; // TODO(b/129523279)
2861	auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
2862	if (setLayout->hasBinding(d.Binding))
2863	{
2864	uint32_t bindingOffset = static_cast<uint32_t>(setLayout->getBindingOffset(d.Binding, arrayIndex));
2865	Pointer<Byte> set = routine->descriptorSets [d.DescriptorSet]; // DescriptorSet*
2866	Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // vk::SampledImageDescriptor*
2867	auto size = `0`; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2868	state->createPointer(resultId, SIMD::Pointer (binding, size));
2869	}
2870	else
2871	{
2872	// TODO: Error if the variable with the non-existant binding is
2873	// used? Or perhaps strip these unused variable declarations as
2874	// a preprocess on the SPIR-V?
2875	}
2876	break;
2877	}
2878	case spv::StorageClassUniform:
2879	case spv::StorageClassStorageBuffer:
2880	{
2881	const auto &d = descriptorDecorations.at(resultId);
2882	ASSERT(d.DescriptorSet >= `0`);
2883	auto size = `0`; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
2884	// Note: the module may contain descriptor set references that are not suitable for this implementation -- using a set index higher than the number
2885	// of descriptor set binding points we support. As long as the selected entrypoint doesn't actually touch the out of range binding points, this
2886	// is valid. In this case make the value nullptr to make it easier to diagnose an attempt to dereference it.
2887	if (d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS)
2888	{
2889	state->createPointer(resultId, SIMD::Pointer (routine->descriptorSets [d.DescriptorSet], size));
2890	}
2891	else
2892	{
2893	state->createPointer(resultId, SIMD::Pointer (nullptr, `0`));
2894	}
2895	break;
2896	}
2897	case spv::StorageClassPushConstant:
2898	{
2899	state->createPointer(resultId, SIMD::Pointer (routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
2900	break;
2901	}
2902	default:
2903	UNREACHABLE("Storage class %d", objectTy.storageClass);
2904	break;
2905	}
2906
2907	if (insn.wordCount() > `4`)
2908	{
2909	Object::ID initializerId = insn.word(`4`);
2910	if (getObject(initializerId).kind != Object::Kind::Constant)
2911	{
2912	UNIMPLEMENTED("Non-constant initializers not yet implemented");
2913	}
2914	switch (objectTy.storageClass)
2915	{
2916	case spv::StorageClassOutput:
2917	case spv::StorageClassPrivate:
2918	case spv::StorageClassFunction:
2919	{
2920	bool interleavedByLane = IsStorageInterleavedByLane(objectTy.storageClass);
2921	auto ptr = GetPointerToData(resultId, `0`, state);
2922	GenericValue initialValue(this, state, initializerId);
2923	VisitMemoryObject(resultId, [&](uint32_t i, uint32_t offset)
2924	{
2925	auto p = ptr + offset;
2926	if (interleavedByLane) { p = interleaveByLane(p); }
2927	auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
2928	SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask());
2929	});
2930	break;
2931	}
2932	default:
2933	ASSERT_MSG(initializerId == `0`, "Vulkan does not permit variables of storage class %d to have initializers", int(objectTy.storageClass));
2934	}
2935	}
2936
2937	return EmitResult::Continue;
2938	}
2939
2940	SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState state) const*
2941	{
2942	bool atomic = (insn.opcode() == spv::OpAtomicLoad);
2943	Object::ID resultId = insn.word(`2`);
2944	Object::ID pointerId = insn.word(`3`);
2945	auto &result = getObject(resultId);
2946	auto &resultTy = getType(result.type);
2947	auto &pointer = getObject(pointerId);
2948	auto &pointerTy = getType(pointer.type);
2949	std::memory_order memoryOrder = std::memory_order_relaxed;
2950
2951	ASSERT(getType(pointer.type).element == result.type);
2952	ASSERT(Type::ID (insn.word(`1`)) == result.type);
2953	ASSERT(!atomic \|\| getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
2954
2955	if(pointerTy.storageClass == spv::StorageClassUniformConstant)
2956	{
2957	// Just propagate the pointer.
2958	auto &ptr = state->getPointer(pointerId);
2959	state->createPointer(resultId, ptr);
2960	return EmitResult::Continue;
2961	}
2962
2963	if(atomic)
2964	{
2965	Object::ID semanticsId = insn.word(`5`);
2966	auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue [`0`]);
2967	memoryOrder = MemoryOrder(memorySemantics);
2968	}
2969
2970	auto ptr = GetPointerToData(pointerId, `0`, state);
2971	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
2972	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
2973	auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
2974
2975	VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
2976	{
2977	auto p = ptr + offset;
2978	if (interleavedByLane) { p = interleaveByLane(p); } // TODO: Interleave once, then add offset?
2979	dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder));
2980	});
2981
2982	return EmitResult::Continue;
2983	}
2984
2985	SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState state) const*
2986	{
2987	bool atomic = (insn.opcode() == spv::OpAtomicStore);
2988	Object::ID pointerId = insn.word(`1`);
2989	Object::ID objectId = insn.word(atomic ? `4` : `2`);
2990	auto &object = getObject(objectId);
2991	auto &pointer = getObject(pointerId);
2992	auto &pointerTy = getType(pointer.type);
2993	auto &elementTy = getType(pointerTy.element);
2994	std::memory_order memoryOrder = std::memory_order_relaxed;
2995
2996	if(atomic)
2997	{
2998	Object::ID semanticsId = insn.word(`3`);
2999	auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue [`0`]);
3000	memoryOrder = MemoryOrder(memorySemantics);
3001	}
3002
3003	ASSERT(!atomic \|\| elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
3004
3005	auto ptr = GetPointerToData(pointerId, `0`, state);
3006	bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
3007	auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
3008
3009	SIMD::Int mask = state->activeLaneMask();
3010	if (!StoresInHelperInvocation(pointerTy.storageClass))
3011	{
3012	mask = mask & state->storesAndAtomicsMask();
3013	}
3014
3015	if (object.kind == Object::Kind::Constant)
3016	{
3017	// Constant source data.
3018	const uint32_t *src = object.constantValue.get();
3019	VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
3020	{
3021	auto p = ptr + offset;
3022	if (interleavedByLane) { p = interleaveByLane(p); }
3023	SIMD::Store(p, SIMD::Int (src[i]), robustness, mask, atomic, memoryOrder);
3024	});
3025	}
3026	else
3027	{
3028	// Intermediate source data.
3029	auto &src = state->getIntermediate(objectId);
3030	VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
3031	{
3032	auto p = ptr + offset;
3033	if (interleavedByLane) { p = interleaveByLane(p); }
3034	SIMD::Store(p, src.Float(i), robustness, mask, atomic, memoryOrder);
3035	});
3036	}
3037
3038	return EmitResult::Continue;
3039	}
3040
3041	SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState state) const*
3042	{
3043	Type::ID typeId = insn.word(`1`);
3044	Object::ID resultId = insn.word(`2`);
3045	Object::ID baseId = insn.word(`3`);
3046	uint32_t numIndexes = insn.wordCount() - `4`;
3047	const uint32_t *indexes = insn.wordPointer(`4`);
3048	auto &type = getType(typeId);
3049	ASSERT(type.sizeInComponents == `1`);
3050	ASSERT(getObject(resultId).kind == Object::Kind::Pointer);
3051
3052	if(type.storageClass == spv::StorageClassPushConstant \|\|
3053	type.storageClass == spv::StorageClassUniform \|\|
3054	type.storageClass == spv::StorageClassStorageBuffer)
3055	{
3056	auto ptr = WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, state);
3057	state->createPointer(resultId, ptr);
3058	}
3059	else
3060	{
3061	auto ptr = WalkAccessChain(baseId, numIndexes, indexes, state);
3062	state->createPointer(resultId, ptr);
3063	}
3064
3065	return EmitResult::Continue;
3066	}
3067
3068	SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState state) const*
3069	{
3070	auto &type = getType(insn.word(`1`));
3071	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3072	auto offset = `0u`;
3073
3074	for (auto i = `0u`; i < insn.wordCount() - `3`; i++)
3075	{
3076	Object::ID srcObjectId = insn.word(`3u` + i);
3077	auto & srcObject = getObject(srcObjectId);
3078	auto & srcObjectTy = getType(srcObject.type);
3079	GenericValue srcObjectAccess(this, state, srcObjectId);
3080
3081	for (auto j = `0u`; j < srcObjectTy.sizeInComponents; j++)
3082	{
3083	dst.move(offset++, srcObjectAccess.Float(j));
3084	}
3085	}
3086
3087	return EmitResult::Continue;
3088	}
3089
3090	SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState state) const*
3091	{
3092	Type::ID resultTypeId = insn.word(`1`);
3093	auto &type = getType(resultTypeId);
3094	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3095	auto &newPartObject = getObject(insn.word(`3`));
3096	auto &newPartObjectTy = getType(newPartObject.type);
3097	auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - `5`, insn.wordPointer(`5`));
3098
3099	GenericValue srcObjectAccess(this, state, insn.word(`4`));
3100	GenericValue newPartObjectAccess(this, state, insn.word(`3`));
3101
3102	// old components before
3103	for (auto i = `0u`; i < firstNewComponent; i++)
3104	{
3105	dst.move(i, srcObjectAccess.Float(i));
3106	}
3107	// new part
3108	for (auto i = `0u`; i < newPartObjectTy.sizeInComponents; i++)
3109	{
3110	dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
3111	}
3112	// old components after
3113	for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
3114	{
3115	dst.move(i, srcObjectAccess.Float(i));
3116	}
3117
3118	return EmitResult::Continue;
3119	}
3120
3121	SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState state) const*
3122	{
3123	auto &type = getType(insn.word(`1`));
3124	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3125	auto &compositeObject = getObject(insn.word(`3`));
3126	Type::ID compositeTypeId = compositeObject.definition.word(`1`);
3127	auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - `4`, insn.wordPointer(`4`));
3128
3129	GenericValue compositeObjectAccess(this, state, insn.word(`3`));
3130	for (auto i = `0u`; i < type.sizeInComponents; i++)
3131	{
3132	dst.move(i, compositeObjectAccess.Float(firstComponent + i));
3133	}
3134
3135	return EmitResult::Continue;
3136	}
3137
3138	SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState state) const*
3139	{
3140	auto &type = getType(insn.word(`1`));
3141	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3142
3143	// Note: number of components in result type, first half type, and second
3144	// half type are all independent.
3145	auto &firstHalfType = getType(getObject(insn.word(`3`)).type);
3146
3147	GenericValue firstHalfAccess(this, state, insn.word(`3`));
3148	GenericValue secondHalfAccess(this, state, insn.word(`4`));
3149
3150	for (auto i = `0u`; i < type.sizeInComponents; i++)
3151	{
3152	auto selector = insn.word(`5` + i);
3153	if (selector == static_cast<uint32_t>(-`1`))
3154	{
3155	// Undefined value. Until we decide to do real undef values, zero is as good
3156	// a value as any
3157	dst.move(i, RValue<SIMD::Float>(`0.0f`));
3158	}
3159	else if (selector < firstHalfType.sizeInComponents)
3160	{
3161	dst.move(i, firstHalfAccess.Float(selector));
3162	}
3163	else
3164	{
3165	dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
3166	}
3167	}
3168
3169	return EmitResult::Continue;
3170	}
3171
3172	SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState state) const*
3173	{
3174	auto &type = getType(insn.word(`1`));
3175	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3176	auto &srcType = getType(getObject(insn.word(`3`)).type);
3177
3178	GenericValue src(this, state, insn.word(`3`));
3179	GenericValue index(this, state, insn.word(`4`));
3180
3181	SIMD::UInt v = SIMD::UInt (`0`);
3182
3183	for (auto i = `0u`; i < srcType.sizeInComponents; i++)
3184	{
3185	v \|= CmpEQ(index.UInt(`0`), SIMD::UInt (i)) & src.UInt(i);
3186	}
3187
3188	dst.move(`0`, v);
3189	return EmitResult::Continue;
3190	}
3191
3192	SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState state) const*
3193	{
3194	auto &type = getType(insn.word(`1`));
3195	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3196
3197	GenericValue src(this, state, insn.word(`3`));
3198	GenericValue component(this, state, insn.word(`4`));
3199	GenericValue index(this, state, insn.word(`5`));
3200
3201	for (auto i = `0u`; i < type.sizeInComponents; i++)
3202	{
3203	SIMD::UInt mask = CmpEQ(SIMD::UInt (i), index.UInt(`0`));
3204	dst.move(i, (src.UInt(i) & ~mask) \| (component.UInt(`0`) & mask));
3205	}
3206	return EmitResult::Continue;
3207	}
3208
3209	SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState state) const*
3210	{
3211	auto &type = getType(insn.word(`1`));
3212	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3213	auto lhs = GenericValue (this, state, insn.word(`3`));
3214	auto rhs = GenericValue (this, state, insn.word(`4`));
3215
3216	for (auto i = `0u`; i < type.sizeInComponents; i++)
3217	{
3218	dst.move(i, lhs.Float(i) * rhs.Float(`0`));
3219	}
3220
3221	return EmitResult::Continue;
3222	}
3223
3224	SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState state) const*
3225	{
3226	auto &type = getType(insn.word(`1`));
3227	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3228	auto lhs = GenericValue (this, state, insn.word(`3`));
3229	auto rhs = GenericValue (this, state, insn.word(`4`));
3230	auto rhsType = getType(rhs.type);
3231
3232	for (auto i = `0u`; i < type.sizeInComponents; i++)
3233	{
3234	SIMD::Float v = lhs.Float(i) * rhs.Float(`0`);
3235	for (auto j = `1u`; j < rhsType.sizeInComponents; j++)
3236	{
3237	v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
3238	}
3239	dst.move(i, v);
3240	}
3241
3242	return EmitResult::Continue;
3243	}
3244
3245	SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState state) const*
3246	{
3247	auto &type = getType(insn.word(`1`));
3248	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3249	auto lhs = GenericValue (this, state, insn.word(`3`));
3250	auto rhs = GenericValue (this, state, insn.word(`4`));
3251	auto lhsType = getType(lhs.type);
3252
3253	for (auto i = `0u`; i < type.sizeInComponents; i++)
3254	{
3255	SIMD::Float v = lhs.Float(`0`) * rhs.Float(i * lhsType.sizeInComponents);
3256	for (auto j = `1u`; j < lhsType.sizeInComponents; j++)
3257	{
3258	v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
3259	}
3260	dst.move(i, v);
3261	}
3262
3263	return EmitResult::Continue;
3264	}
3265
3266	SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState state) const*
3267	{
3268	auto &type = getType(insn.word(`1`));
3269	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3270	auto lhs = GenericValue (this, state, insn.word(`3`));
3271	auto rhs = GenericValue (this, state, insn.word(`4`));
3272
3273	auto numColumns = type.definition.word(`3`);
3274	auto numRows = getType(type.definition.word(`2`)).definition.word(`3`);
3275	auto numAdds = getType(getObject(insn.word(`3`)).type).definition.word(`3`);
3276
3277	for (auto row = `0u`; row < numRows; row++)
3278	{
3279	for (auto col = `0u`; col < numColumns; col++)
3280	{
3281	SIMD::Float v = SIMD::Float (`0`);
3282	for (auto i = `0u`; i < numAdds; i++)
3283	{
3284	v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
3285	}
3286	dst.move(numRows * col + row, v);
3287	}
3288	}
3289
3290	return EmitResult::Continue;
3291	}
3292
3293	SpirvShader::EmitResult SpirvShader::EmitOuterProduct(InsnIterator insn, EmitState state) const*
3294	{
3295	auto &type = getType(insn.word(`1`));
3296	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3297	auto lhs = GenericValue (this, state, insn.word(`3`));
3298	auto rhs = GenericValue (this, state, insn.word(`4`));
3299	auto &lhsType = getType(lhs.type);
3300	auto &rhsType = getType(rhs.type);
3301
3302	ASSERT(type.definition.opcode() == spv::OpTypeMatrix);
3303	ASSERT(lhsType.definition.opcode() == spv::OpTypeVector);
3304	ASSERT(rhsType.definition.opcode() == spv::OpTypeVector);
3305	ASSERT(getType(lhsType.element).opcode() == spv::OpTypeFloat);
3306	ASSERT(getType(rhsType.element).opcode() == spv::OpTypeFloat);
3307
3308	auto numRows = lhsType.definition.word(`3`);
3309	auto numCols = rhsType.definition.word(`3`);
3310
3311	for (auto col = `0u`; col < numCols; col++)
3312	{
3313	for (auto row = `0u`; row < numRows; row++)
3314	{
3315	dst.move(col * numRows + row, lhs.Float(row) * rhs.Float(col));
3316	}
3317	}
3318
3319	return EmitResult::Continue;
3320	}
3321
3322	SpirvShader::EmitResult SpirvShader::EmitTranspose(InsnIterator insn, EmitState state) const*
3323	{
3324	auto &type = getType(insn.word(`1`));
3325	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3326	auto mat = GenericValue (this, state, insn.word(`3`));
3327
3328	auto numCols = type.definition.word(`3`);
3329	auto numRows = getType(type.definition.word(`2`)).sizeInComponents;
3330
3331	for (auto col = `0u`; col < numCols; col++)
3332	{
3333	for (auto row = `0u`; row < numRows; row++)
3334	{
3335	dst.move(col * numRows + row, mat.Float(row * numCols + col));
3336	}
3337	}
3338
3339	return EmitResult::Continue;
3340	}
3341
3342	SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState state) const*
3343	{
3344	auto &type = getType(insn.word(`1`));
3345	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3346	auto src = GenericValue (this, state, insn.word(`3`));
3347
3348	for (auto i = `0u`; i < type.sizeInComponents; i++)
3349	{
3350	switch (insn.opcode())
3351	{
3352	case spv::OpNot:
3353	case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
3354	dst.move(i, ~src.UInt(i));
3355	break;
3356	case spv::OpBitFieldInsert:
3357	{
3358	auto insert = GenericValue (this, state, insn.word(`4`)).UInt(i);
3359	auto offset = GenericValue (this, state, insn.word(`5`)).UInt(`0`);
3360	auto count = GenericValue (this, state, insn.word(`6`)).UInt(`0`);
3361	auto one = SIMD::UInt (`1`);
3362	auto v = src.UInt(i);
3363	auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
3364	dst.move(i, (v & ~mask) \| ((insert << offset) & mask));
3365	break;
3366	}
3367	case spv::OpBitFieldSExtract:
3368	case spv::OpBitFieldUExtract:
3369	{
3370	auto offset = GenericValue (this, state, insn.word(`4`)).UInt(`0`);
3371	auto count = GenericValue (this, state, insn.word(`5`)).UInt(`0`);
3372	auto one = SIMD::UInt (`1`);
3373	auto v = src.UInt(i);
3374	SIMD::UInt out = (v >> offset) & Bitmask32(count);
3375	if (insn.opcode() == spv::OpBitFieldSExtract)
3376	{
3377	auto sign = out & NthBit32(count - one);
3378	auto sext = ~(sign - one);
3379	out \|= sext;
3380	}
3381	dst.move(i, out);
3382	break;
3383	}
3384	case spv::OpBitReverse:
3385	{
3386	// TODO: Add an intrinsic to reactor. Even if there isn't a
3387	// single vector instruction, there may be target-dependent
3388	// ways to make this faster.
3389	// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
3390	SIMD::UInt v = src.UInt(i);
3391	v = ((v >> `1`) & SIMD::UInt (`0x55555555`)) \| ((v & SIMD::UInt (`0x55555555`)) << `1`);
3392	v = ((v >> `2`) & SIMD::UInt (`0x33333333`)) \| ((v & SIMD::UInt (`0x33333333`)) << `2`);
3393	v = ((v >> `4`) & SIMD::UInt (`0x0F0F0F0F`)) \| ((v & SIMD::UInt (`0x0F0F0F0F`)) << `4`);
3394	v = ((v >> `8`) & SIMD::UInt (`0x00FF00FF`)) \| ((v & SIMD::UInt (`0x00FF00FF`)) << `8`);
3395	v = (v >> `16`) \| (v << `16`);
3396	dst.move(i, v);
3397	break;
3398	}
3399	case spv::OpBitCount:
3400	dst.move(i, CountBits(src.UInt(i)));
3401	break;
3402	case spv::OpSNegate:
3403	dst.move(i, -src.Int(i));
3404	break;
3405	case spv::OpFNegate:
3406	dst.move(i, -src.Float(i));
3407	break;
3408	case spv::OpConvertFToU:
3409	dst.move(i, SIMD::UInt (src.Float(i)));
3410	break;
3411	case spv::OpConvertFToS:
3412	dst.move(i, SIMD::Int (src.Float(i)));
3413	break;
3414	case spv::OpConvertSToF:
3415	dst.move(i, SIMD::Float (src.Int(i)));
3416	break;
3417	case spv::OpConvertUToF:
3418	dst.move(i, SIMD::Float (src.UInt(i)));
3419	break;
3420	case spv::OpBitcast:
3421	dst.move(i, src.Float(i));
3422	break;
3423	case spv::OpIsInf:
3424	dst.move(i, IsInf(src.Float(i)));
3425	break;
3426	case spv::OpIsNan:
3427	dst.move(i, IsNan(src.Float(i)));
3428	break;
3429	case spv::OpDPdx:
3430	case spv::OpDPdxCoarse:
3431	// Derivative instructions: FS invocations are laid out like so:
3432	// 0 1
3433	// 2 3
3434	static_assert(SIMD::Width == `4`, "All cross-lane instructions will need care when using a different width");
3435	dst.move(i, SIMD::Float (Extract(src.Float(i), `1`) - Extract(src.Float(i), `0`)));
3436	break;
3437	case spv::OpDPdy:
3438	case spv::OpDPdyCoarse:
3439	dst.move(i, SIMD::Float (Extract(src.Float(i), `2`) - Extract(src.Float(i), `0`)));
3440	break;
3441	case spv::OpFwidth:
3442	case spv::OpFwidthCoarse:
3443	dst.move(i, SIMD::Float (Abs(Extract(src.Float(i), `1`) - Extract(src.Float(i), `0`))
3444	+ Abs(Extract(src.Float(i), `2`) - Extract(src.Float(i), `0`))));
3445	break;
3446	case spv::OpDPdxFine:
3447	{
3448	auto firstRow = Extract(src.Float(i), `1`) - Extract(src.Float(i), `0`);
3449	auto secondRow = Extract(src.Float(i), `3`) - Extract(src.Float(i), `2`);
3450	SIMD::Float v = SIMD::Float (firstRow);
3451	v = Insert(v, secondRow, `2`);
3452	v = Insert(v, secondRow, `3`);
3453	dst.move(i, v);
3454	break;
3455	}
3456	case spv::OpDPdyFine:
3457	{
3458	auto firstColumn = Extract(src.Float(i), `2`) - Extract(src.Float(i), `0`);
3459	auto secondColumn = Extract(src.Float(i), `3`) - Extract(src.Float(i), `1`);
3460	SIMD::Float v = SIMD::Float (firstColumn);
3461	v = Insert(v, secondColumn, `1`);
3462	v = Insert(v, secondColumn, `3`);
3463	dst.move(i, v);
3464	break;
3465	}
3466	case spv::OpFwidthFine:
3467	{
3468	auto firstRow = Extract(src.Float(i), `1`) - Extract(src.Float(i), `0`);
3469	auto secondRow = Extract(src.Float(i), `3`) - Extract(src.Float(i), `2`);
3470	SIMD::Float dpdx = SIMD::Float (firstRow);
3471	dpdx = Insert(dpdx, secondRow, `2`);
3472	dpdx = Insert(dpdx, secondRow, `3`);
3473	auto firstColumn = Extract(src.Float(i), `2`) - Extract(src.Float(i), `0`);
3474	auto secondColumn = Extract(src.Float(i), `3`) - Extract(src.Float(i), `1`);
3475	SIMD::Float dpdy = SIMD::Float (firstColumn);
3476	dpdy = Insert(dpdy, secondColumn, `1`);
3477	dpdy = Insert(dpdy, secondColumn, `3`);
3478	dst.move(i, Abs(dpdx) + Abs(dpdy));
3479	break;
3480	}
3481	case spv::OpQuantizeToF16:
3482	{
3483	// Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
3484	auto abs = Abs(src.Float(i));
3485	auto sign = src.Int(i) & SIMD::Int (`0x80000000`);
3486	auto isZero = CmpLT(abs, SIMD::Float (`0.000061035f`));
3487	auto isInf = CmpGT(abs, SIMD::Float (`65504.0f`));
3488	auto isNaN = IsNan(abs);
3489	auto isInfOrNan = isInf \| isNaN;
3490	SIMD::Int v = src.Int(i) & SIMD::Int (`0xFFFFE000`);
3491	v &= ~isZero \| SIMD::Int (`0x80000000`);
3492	v = sign \| (isInfOrNan & SIMD::Int (`0x7F800000`)) \| (~isInfOrNan & v);
3493	v \|= isNaN & SIMD::Int (`0x400000`);
3494	dst.move(i, v);
3495	break;
3496	}
3497	default:
3498	UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
3499	}
3500	}
3501
3502	return EmitResult::Continue;
3503	}
3504
3505	SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState state) const*
3506	{
3507	auto &type = getType(insn.word(`1`));
3508	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3509	auto &lhsType = getType(getObject(insn.word(`3`)).type);
3510	auto lhs = GenericValue (this, state, insn.word(`3`));
3511	auto rhs = GenericValue (this, state, insn.word(`4`));
3512
3513	for (auto i = `0u`; i < lhsType.sizeInComponents; i++)
3514	{
3515	switch (insn.opcode())
3516	{
3517	case spv::OpIAdd:
3518	dst.move(i, lhs.Int(i) + rhs.Int(i));
3519	break;
3520	case spv::OpISub:
3521	dst.move(i, lhs.Int(i) - rhs.Int(i));
3522	break;
3523	case spv::OpIMul:
3524	dst.move(i, lhs.Int(i) * rhs.Int(i));
3525	break;
3526	case spv::OpSDiv:
3527	{
3528	SIMD::Int a = lhs.Int(i);
3529	SIMD::Int b = rhs.Int(i);
3530	b = b \| CmpEQ(b, SIMD::Int (`0`)); // prevent divide-by-zero
3531	a = a \| (CmpEQ(a, SIMD::Int (`0x80000000`)) & CmpEQ(b, SIMD::Int (-`1`))); // prevent integer overflow
3532	dst.move(i, a / b);
3533	break;
3534	}
3535	case spv::OpUDiv:
3536	{
3537	auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int (`0`)));
3538	dst.move(i, lhs.UInt(i) / (rhs.UInt(i) \| zeroMask));
3539	break;
3540	}
3541	case spv::OpSRem:
3542	{
3543	SIMD::Int a = lhs.Int(i);
3544	SIMD::Int b = rhs.Int(i);
3545	b = b \| CmpEQ(b, SIMD::Int (`0`)); // prevent divide-by-zero
3546	a = a \| (CmpEQ(a, SIMD::Int (`0x80000000`)) & CmpEQ(b, SIMD::Int (-`1`))); // prevent integer overflow
3547	dst.move(i, a % b);
3548	break;
3549	}
3550	case spv::OpSMod:
3551	{
3552	SIMD::Int a = lhs.Int(i);
3553	SIMD::Int b = rhs.Int(i);
3554	b = b \| CmpEQ(b, SIMD::Int (`0`)); // prevent divide-by-zero
3555	a = a \| (CmpEQ(a, SIMD::Int (`0x80000000`)) & CmpEQ(b, SIMD::Int (-`1`))); // prevent integer overflow
3556	auto mod = a % b;
3557	// If a and b have opposite signs, the remainder operation takes
3558	// the sign from a but OpSMod is supposed to take the sign of b.
3559	// Adding b will ensure that the result has the correct sign and
3560	// that it is still congruent to a modulo b.
3561	//
3562	// See also http://mathforum.org/library/drmath/view/52343.html
3563	auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int (`0`)), CmpGE(b, SIMD::Int (`0`)));
3564	auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int (`0`)) & signDiff);
3565	dst.move(i, As<SIMD::Float>(fixedMod));
3566	break;
3567	}
3568	case spv::OpUMod:
3569	{
3570	auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int (`0`)));
3571	dst.move(i, lhs.UInt(i) % (rhs.UInt(i) \| zeroMask));
3572	break;
3573	}
3574	case spv::OpIEqual:
3575	case spv::OpLogicalEqual:
3576	dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
3577	break;
3578	case spv::OpINotEqual:
3579	case spv::OpLogicalNotEqual:
3580	dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
3581	break;
3582	case spv::OpUGreaterThan:
3583	dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
3584	break;
3585	case spv::OpSGreaterThan:
3586	dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
3587	break;
3588	case spv::OpUGreaterThanEqual:
3589	dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
3590	break;
3591	case spv::OpSGreaterThanEqual:
3592	dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
3593	break;
3594	case spv::OpULessThan:
3595	dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
3596	break;
3597	case spv::OpSLessThan:
3598	dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
3599	break;
3600	case spv::OpULessThanEqual:
3601	dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
3602	break;
3603	case spv::OpSLessThanEqual:
3604	dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
3605	break;
3606	case spv::OpFAdd:
3607	dst.move(i, lhs.Float(i) + rhs.Float(i));
3608	break;
3609	case spv::OpFSub:
3610	dst.move(i, lhs.Float(i) - rhs.Float(i));
3611	break;
3612	case spv::OpFMul:
3613	dst.move(i, lhs.Float(i) * rhs.Float(i));
3614	break;
3615	case spv::OpFDiv:
3616	dst.move(i, lhs.Float(i) / rhs.Float(i));
3617	break;
3618	case spv::OpFMod:
3619	// TODO(b/126873455): inaccurate for values greater than 2^24
3620	dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
3621	break;
3622	case spv::OpFRem:
3623	dst.move(i, lhs.Float(i) % rhs.Float(i));
3624	break;
3625	case spv::OpFOrdEqual:
3626	dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
3627	break;
3628	case spv::OpFUnordEqual:
3629	dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
3630	break;
3631	case spv::OpFOrdNotEqual:
3632	dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
3633	break;
3634	case spv::OpFUnordNotEqual:
3635	dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
3636	break;
3637	case spv::OpFOrdLessThan:
3638	dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
3639	break;
3640	case spv::OpFUnordLessThan:
3641	dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
3642	break;
3643	case spv::OpFOrdGreaterThan:
3644	dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
3645	break;
3646	case spv::OpFUnordGreaterThan:
3647	dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
3648	break;
3649	case spv::OpFOrdLessThanEqual:
3650	dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
3651	break;
3652	case spv::OpFUnordLessThanEqual:
3653	dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
3654	break;
3655	case spv::OpFOrdGreaterThanEqual:
3656	dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
3657	break;
3658	case spv::OpFUnordGreaterThanEqual:
3659	dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
3660	break;
3661	case spv::OpShiftRightLogical:
3662	dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
3663	break;
3664	case spv::OpShiftRightArithmetic:
3665	dst.move(i, lhs.Int(i) >> rhs.Int(i));
3666	break;
3667	case spv::OpShiftLeftLogical:
3668	dst.move(i, lhs.UInt(i) << rhs.UInt(i));
3669	break;
3670	case spv::OpBitwiseOr:
3671	case spv::OpLogicalOr:
3672	dst.move(i, lhs.UInt(i) \| rhs.UInt(i));
3673	break;
3674	case spv::OpBitwiseXor:
3675	dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
3676	break;
3677	case spv::OpBitwiseAnd:
3678	case spv::OpLogicalAnd:
3679	dst.move(i, lhs.UInt(i) & rhs.UInt(i));
3680	break;
3681	case spv::OpSMulExtended:
3682	// Extended ops: result is a structure containing two members of the same type as lhs & rhs.
3683	// In our flat view then, component i is the i'th component of the first member;
3684	// component i + N is the i'th component of the second member.
3685	dst.move(i, lhs.Int(i) * rhs.Int(i));
3686	dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
3687	break;
3688	case spv::OpUMulExtended:
3689	dst.move(i, lhs.UInt(i) * rhs.UInt(i));
3690	dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
3691	break;
3692	case spv::OpIAddCarry:
3693	dst.move(i, lhs.UInt(i) + rhs.UInt(i));
3694	dst.move(i + lhsType.sizeInComponents, CmpLT(dst.UInt(i), lhs.UInt(i)) >> `31`);
3695	break;
3696	case spv::OpISubBorrow:
3697	dst.move(i, lhs.UInt(i) - rhs.UInt(i));
3698	dst.move(i + lhsType.sizeInComponents, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> `31`);
3699	break;
3700	default:
3701	UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
3702	}
3703	}
3704
3705	return EmitResult::Continue;
3706	}
3707
3708	SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState state) const*
3709	{
3710	auto &type = getType(insn.word(`1`));
3711	ASSERT(type.sizeInComponents == `1`);
3712	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3713	auto &lhsType = getType(getObject(insn.word(`3`)).type);
3714	auto lhs = GenericValue (this, state, insn.word(`3`));
3715	auto rhs = GenericValue (this, state, insn.word(`4`));
3716
3717	dst.move(`0`, Dot(lhsType.sizeInComponents, lhs, rhs));
3718	return EmitResult::Continue;
3719	}
3720
3721	SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState state) const*
3722	{
3723	auto &type = getType(insn.word(`1`));
3724	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3725	auto cond = GenericValue (this, state, insn.word(`3`));
3726	auto condIsScalar = (getType(cond.type).sizeInComponents == `1`);
3727	auto lhs = GenericValue (this, state, insn.word(`4`));
3728	auto rhs = GenericValue (this, state, insn.word(`5`));
3729
3730	for (auto i = `0u`; i < type.sizeInComponents; i++)
3731	{
3732	auto sel = cond.Int(condIsScalar ? `0` : i);
3733	dst.move(i, (sel & lhs.Int(i)) \| (~sel & rhs.Int(i))); // TODO: IfThenElse()
3734	}
3735
3736	return EmitResult::Continue;
3737	}
3738
3739	SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState state) const*
3740	{
3741	auto &type = getType(insn.word(`1`));
3742	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
3743	auto extInstIndex = static_cast<GLSLstd450>(insn.word(`4`));
3744
3745	switch (extInstIndex)
3746	{
3747	case GLSLstd450FAbs:
3748	{
3749	auto src = GenericValue (this, state, insn.word(`5`));
3750	for (auto i = `0u`; i < type.sizeInComponents; i++)
3751	{
3752	dst.move(i, Abs(src.Float(i)));
3753	}
3754	break;
3755	}
3756	case GLSLstd450SAbs:
3757	{
3758	auto src = GenericValue (this, state, insn.word(`5`));
3759	for (auto i = `0u`; i < type.sizeInComponents; i++)
3760	{
3761	dst.move(i, Abs(src.Int(i)));
3762	}
3763	break;
3764	}
3765	case GLSLstd450Cross:
3766	{
3767	auto lhs = GenericValue (this, state, insn.word(`5`));
3768	auto rhs = GenericValue (this, state, insn.word(`6`));
3769	dst.move(`0`, lhs.Float(`1`) * rhs.Float(`2`) - rhs.Float(`1`) * lhs.Float(`2`));
3770	dst.move(`1`, lhs.Float(`2`) * rhs.Float(`0`) - rhs.Float(`2`) * lhs.Float(`0`));
3771	dst.move(`2`, lhs.Float(`0`) * rhs.Float(`1`) - rhs.Float(`0`) * lhs.Float(`1`));
3772	break;
3773	}
3774	case GLSLstd450Floor:
3775	{
3776	auto src = GenericValue (this, state, insn.word(`5`));
3777	for (auto i = `0u`; i < type.sizeInComponents; i++)
3778	{
3779	dst.move(i, Floor(src.Float(i)));
3780	}
3781	break;
3782	}
3783	case GLSLstd450Trunc:
3784	{
3785	auto src = GenericValue (this, state, insn.word(`5`));
3786	for (auto i = `0u`; i < type.sizeInComponents; i++)
3787	{
3788	dst.move(i, Trunc(src.Float(i)));
3789	}
3790	break;
3791	}
3792	case GLSLstd450Ceil:
3793	{
3794	auto src = GenericValue (this, state, insn.word(`5`));
3795	for (auto i = `0u`; i < type.sizeInComponents; i++)
3796	{
3797	dst.move(i, Ceil(src.Float(i)));
3798	}
3799	break;
3800	}
3801	case GLSLstd450Fract:
3802	{
3803	auto src = GenericValue (this, state, insn.word(`5`));
3804	for (auto i = `0u`; i < type.sizeInComponents; i++)
3805	{
3806	dst.move(i, Frac(src.Float(i)));
3807	}
3808	break;
3809	}
3810	case GLSLstd450Round:
3811	{
3812	auto src = GenericValue (this, state, insn.word(`5`));
3813	for (auto i = `0u`; i < type.sizeInComponents; i++)
3814	{
3815	dst.move(i, Round(src.Float(i)));
3816	}
3817	break;
3818	}
3819	case GLSLstd450RoundEven:
3820	{
3821	auto src = GenericValue (this, state, insn.word(`5`));
3822	for (auto i = `0u`; i < type.sizeInComponents; i++)
3823	{
3824	auto x = Round(src.Float(i));
3825	// dst = round(src) + ((round(src) < src) 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));*
3826	dst.move(i, x + ((SIMD::Float (CmpLT(x, src.Float(i)) & SIMD::Int (`1`)) * SIMD::Float (`2.0f`)) - SIMD::Float (`1.0f`)) *
3827	SIMD::Float (CmpEQ(Frac(src.Float(i)), SIMD::Float (`0.5f`)) & SIMD::Int (`1`)) * SIMD::Float (Int4 (x) & SIMD::Int (`1`)));
3828	}
3829	break;
3830	}
3831	case GLSLstd450FMin:
3832	{
3833	auto lhs = GenericValue (this, state, insn.word(`5`));
3834	auto rhs = GenericValue (this, state, insn.word(`6`));
3835	for (auto i = `0u`; i < type.sizeInComponents; i++)
3836	{
3837	dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
3838	}
3839	break;
3840	}
3841	case GLSLstd450FMax:
3842	{
3843	auto lhs = GenericValue (this, state, insn.word(`5`));
3844	auto rhs = GenericValue (this, state, insn.word(`6`));
3845	for (auto i = `0u`; i < type.sizeInComponents; i++)
3846	{
3847	dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
3848	}
3849	break;
3850	}
3851	case GLSLstd450SMin:
3852	{
3853	auto lhs = GenericValue (this, state, insn.word(`5`));
3854	auto rhs = GenericValue (this, state, insn.word(`6`));
3855	for (auto i = `0u`; i < type.sizeInComponents; i++)
3856	{
3857	dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
3858	}
3859	break;
3860	}
3861	case GLSLstd450SMax:
3862	{
3863	auto lhs = GenericValue (this, state, insn.word(`5`));
3864	auto rhs = GenericValue (this, state, insn.word(`6`));
3865	for (auto i = `0u`; i < type.sizeInComponents; i++)
3866	{
3867	dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
3868	}
3869	break;
3870	}
3871	case GLSLstd450UMin:
3872	{
3873	auto lhs = GenericValue (this, state, insn.word(`5`));
3874	auto rhs = GenericValue (this, state, insn.word(`6`));
3875	for (auto i = `0u`; i < type.sizeInComponents; i++)
3876	{
3877	dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
3878	}
3879	break;
3880	}
3881	case GLSLstd450UMax:
3882	{
3883	auto lhs = GenericValue (this, state, insn.word(`5`));
3884	auto rhs = GenericValue (this, state, insn.word(`6`));
3885	for (auto i = `0u`; i < type.sizeInComponents; i++)
3886	{
3887	dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
3888	}
3889	break;
3890	}
3891	case GLSLstd450Step:
3892	{
3893	auto edge = GenericValue (this, state, insn.word(`5`));
3894	auto x = GenericValue (this, state, insn.word(`6`));
3895	for (auto i = `0u`; i < type.sizeInComponents; i++)
3896	{
3897	dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float (`1.0f`)));
3898	}
3899	break;
3900	}
3901	case GLSLstd450SmoothStep:
3902	{
3903	auto edge0 = GenericValue (this, state, insn.word(`5`));
3904	auto edge1 = GenericValue (this, state, insn.word(`6`));
3905	auto x = GenericValue (this, state, insn.word(`7`));
3906	for (auto i = `0u`; i < type.sizeInComponents; i++)
3907	{
3908	auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
3909	(edge1.Float(i) - edge0.Float(i)), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`));
3910	dst.move(i, tx * tx * (Float4 (`3.0f`) - Float4 (`2.0f`) * tx));
3911	}
3912	break;
3913	}
3914	case GLSLstd450FMix:
3915	{
3916	auto x = GenericValue (this, state, insn.word(`5`));
3917	auto y = GenericValue (this, state, insn.word(`6`));
3918	auto a = GenericValue (this, state, insn.word(`7`));
3919	for (auto i = `0u`; i < type.sizeInComponents; i++)
3920	{
3921	dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
3922	}
3923	break;
3924	}
3925	case GLSLstd450FClamp:
3926	{
3927	auto x = GenericValue (this, state, insn.word(`5`));
3928	auto minVal = GenericValue (this, state, insn.word(`6`));
3929	auto maxVal = GenericValue (this, state, insn.word(`7`));
3930	for (auto i = `0u`; i < type.sizeInComponents; i++)
3931	{
3932	dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
3933	}
3934	break;
3935	}
3936	case GLSLstd450SClamp:
3937	{
3938	auto x = GenericValue (this, state, insn.word(`5`));
3939	auto minVal = GenericValue (this, state, insn.word(`6`));
3940	auto maxVal = GenericValue (this, state, insn.word(`7`));
3941	for (auto i = `0u`; i < type.sizeInComponents; i++)
3942	{
3943	dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
3944	}
3945	break;
3946	}
3947	case GLSLstd450UClamp:
3948	{
3949	auto x = GenericValue (this, state, insn.word(`5`));
3950	auto minVal = GenericValue (this, state, insn.word(`6`));
3951	auto maxVal = GenericValue (this, state, insn.word(`7`));
3952	for (auto i = `0u`; i < type.sizeInComponents; i++)
3953	{
3954	dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
3955	}
3956	break;
3957	}
3958	case GLSLstd450FSign:
3959	{
3960	auto src = GenericValue (this, state, insn.word(`5`));
3961	for (auto i = `0u`; i < type.sizeInComponents; i++)
3962	{
3963	auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float (-`0.0f`))) & As<SIMD::Int>(SIMD::Float (-`1.0f`));
3964	auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float (+`0.0f`))) & As<SIMD::Int>(SIMD::Float (`1.0f`));
3965	dst.move(i, neg \| pos);
3966	}
3967	break;
3968	}
3969	case GLSLstd450SSign:
3970	{
3971	auto src = GenericValue (this, state, insn.word(`5`));
3972	for (auto i = `0u`; i < type.sizeInComponents; i++)
3973	{
3974	auto neg = CmpLT(src.Int(i), SIMD::Int (`0`)) & SIMD::Int (-`1`);
3975	auto pos = CmpNLE(src.Int(i), SIMD::Int (`0`)) & SIMD::Int (`1`);
3976	dst.move(i, neg \| pos);
3977	}
3978	break;
3979	}
3980	case GLSLstd450Reflect:
3981	{
3982	auto I = GenericValue (this, state, insn.word(`5`));
3983	auto N = GenericValue (this, state, insn.word(`6`));
3984
3985	SIMD::Float d = Dot(type.sizeInComponents, I, N);
3986
3987	for (auto i = `0u`; i < type.sizeInComponents; i++)
3988	{
3989	dst.move(i, I.Float(i) - SIMD::Float (`2.0f`) * d * N.Float(i));
3990	}
3991	break;
3992	}
3993	case GLSLstd450Refract:
3994	{
3995	auto I = GenericValue (this, state, insn.word(`5`));
3996	auto N = GenericValue (this, state, insn.word(`6`));
3997	auto eta = GenericValue (this, state, insn.word(`7`));
3998
3999	SIMD::Float d = Dot(type.sizeInComponents, I, N);
4000	SIMD::Float k = SIMD::Float (`1.0f`) - eta.Float(`0`) * eta.Float(`0`) * (SIMD::Float (`1.0f`) - d * d);
4001	SIMD::Int pos = CmpNLT(k, SIMD::Float (`0.0f`));
4002	SIMD::Float t = (eta.Float(`0`) * d + Sqrt(k));
4003
4004	for (auto i = `0u`; i < type.sizeInComponents; i++)
4005	{
4006	dst.move(i, pos & As<SIMD::Int>(eta.Float(`0`) * I.Float(i) - t * N.Float(i)));
4007	}
4008	break;
4009	}
4010	case GLSLstd450FaceForward:
4011	{
4012	auto N = GenericValue (this, state, insn.word(`5`));
4013	auto I = GenericValue (this, state, insn.word(`6`));
4014	auto Nref = GenericValue (this, state, insn.word(`7`));
4015
4016	SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
4017	SIMD::Int neg = CmpLT(d, SIMD::Float (`0.0f`));
4018
4019	for (auto i = `0u`; i < type.sizeInComponents; i++)
4020	{
4021	auto n = N.Float(i);
4022	dst.move(i, (neg & As<SIMD::Int>(n)) \| (~neg & As<SIMD::Int>(-n)));
4023	}
4024	break;
4025	}
4026	case GLSLstd450Length:
4027	{
4028	auto x = GenericValue (this, state, insn.word(`5`));
4029	SIMD::Float d = Dot(getType(getObject(insn.word(`5`)).type).sizeInComponents, x, x);
4030
4031	dst.move(`0`, Sqrt(d));
4032	break;
4033	}
4034	case GLSLstd450Normalize:
4035	{
4036	auto x = GenericValue (this, state, insn.word(`5`));
4037	SIMD::Float d = Dot(getType(getObject(insn.word(`5`)).type).sizeInComponents, x, x);
4038	SIMD::Float invLength = SIMD::Float (`1.0f`) / Sqrt(d);
4039
4040	for (auto i = `0u`; i < type.sizeInComponents; i++)
4041	{
4042	dst.move(i, invLength * x.Float(i));
4043	}
4044	break;
4045	}
4046	case GLSLstd450Distance:
4047	{
4048	auto p0 = GenericValue (this, state, insn.word(`5`));
4049	auto p1 = GenericValue (this, state, insn.word(`6`));
4050	auto p0Type = getType(p0.type);
4051
4052	// sqrt(dot(p0-p1, p0-p1))
4053	SIMD::Float d = (p0.Float(`0`) - p1.Float(`0`)) * (p0.Float(`0`) - p1.Float(`0`));
4054
4055	for (auto i = `1u`; i < p0Type.sizeInComponents; i++)
4056	{
4057	d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
4058	}
4059
4060	dst.move(`0`, Sqrt(d));
4061	break;
4062	}
4063	case GLSLstd450Modf:
4064	{
4065	auto val = GenericValue (this, state, insn.word(`5`));
4066	auto ptrId = Object::ID (insn.word(`6`));
4067	auto ptrTy = getType(getObject(ptrId).type);
4068	auto ptr = GetPointerToData(ptrId, `0`, state);
4069	bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
4070	// TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
4071	// to be in bounds even for inactive lanes.
4072	// - Clarify the SPIR-V spec.
4073	// - Eliminate lane masking and assume interleaving.
4074	auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
4075
4076	for (auto i = `0u`; i < type.sizeInComponents; i++)
4077	{
4078	SIMD::Float whole, frac;
4079	std::tie(whole, frac) = Modf(val.Float(i));
4080	dst.move(i, frac);
4081	auto p = ptr + (i * sizeof(float));
4082	if (interleavedByLane) { p = interleaveByLane(p); }
4083	SIMD::Store(p, whole, robustness, state->activeLaneMask());
4084	}
4085	break;
4086	}
4087	case GLSLstd450ModfStruct:
4088	{
4089	auto val = GenericValue (this, state, insn.word(`5`));
4090	auto valTy = getType(val.type);
4091
4092	for (auto i = `0u`; i < valTy.sizeInComponents; i++)
4093	{
4094	SIMD::Float whole, frac;
4095	std::tie(whole, frac) = Modf(val.Float(i));
4096	dst.move(i, frac);
4097	dst.move(i + valTy.sizeInComponents, whole);
4098	}
4099	break;
4100	}
4101	case GLSLstd450PackSnorm4x8:
4102	{
4103	auto val = GenericValue (this, state, insn.word(`5`));
4104	dst.move(`0`, (SIMD::Int (Round(Min(Max(val.Float(`0`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
4105	SIMD::Int (`0xFF`)) \|
4106	((SIMD::Int (Round(Min(Max(val.Float(`1`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
4107	SIMD::Int (`0xFF`)) << `8`) \|
4108	((SIMD::Int (Round(Min(Max(val.Float(`2`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
4109	SIMD::Int (`0xFF`)) << `16`) \|
4110	((SIMD::Int (Round(Min(Max(val.Float(`3`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
4111	SIMD::Int (`0xFF`)) << `24`));
4112	break;
4113	}
4114	case GLSLstd450PackUnorm4x8:
4115	{
4116	auto val = GenericValue (this, state, insn.word(`5`));
4117	dst.move(`0`, (SIMD::UInt (Round(Min(Max(val.Float(`0`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) \|
4118	((SIMD::UInt (Round(Min(Max(val.Float(`1`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `8`) \|
4119	((SIMD::UInt (Round(Min(Max(val.Float(`2`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `16`) \|
4120	((SIMD::UInt (Round(Min(Max(val.Float(`3`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `24`));
4121	break;
4122	}
4123	case GLSLstd450PackSnorm2x16:
4124	{
4125	auto val = GenericValue (this, state, insn.word(`5`));
4126	dst.move(`0`, (SIMD::Int (Round(Min(Max(val.Float(`0`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`32767.0f`))) &
4127	SIMD::Int (`0xFFFF`)) \|
4128	((SIMD::Int (Round(Min(Max(val.Float(`1`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`32767.0f`))) &
4129	SIMD::Int (`0xFFFF`)) << `16`));
4130	break;
4131	}
4132	case GLSLstd450PackUnorm2x16:
4133	{
4134	auto val = GenericValue (this, state, insn.word(`5`));
4135	dst.move(`0`, (SIMD::UInt (Round(Min(Max(val.Float(`0`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`65535.0f`))) &
4136	SIMD::UInt (`0xFFFF`)) \|
4137	((SIMD::UInt (Round(Min(Max(val.Float(`1`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`65535.0f`))) &
4138	SIMD::UInt (`0xFFFF`)) << `16`));
4139	break;
4140	}
4141	case GLSLstd450PackHalf2x16:
4142	{
4143	auto val = GenericValue (this, state, insn.word(`5`));
4144	dst.move(`0`, FloatToHalfBits(val.UInt(`0`), false) \| FloatToHalfBits(val.UInt(`1`), true));
4145	break;
4146	}
4147	case GLSLstd450UnpackSnorm4x8:
4148	{
4149	auto val = GenericValue (this, state, insn.word(`5`));
4150	dst.move(`0`, Min(Max(SIMD::Float (((val.Int(`0`)<<`24`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4151	dst.move(`1`, Min(Max(SIMD::Float (((val.Int(`0`)<<`16`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4152	dst.move(`2`, Min(Max(SIMD::Float (((val.Int(`0`)<<`8`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4153	dst.move(`3`, Min(Max(SIMD::Float (((val.Int(`0`)) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4154	break;
4155	}
4156	case GLSLstd450UnpackUnorm4x8:
4157	{
4158	auto val = GenericValue (this, state, insn.word(`5`));
4159	dst.move(`0`, SIMD::Float ((val.UInt(`0`) & SIMD::UInt (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
4160	dst.move(`1`, SIMD::Float (((val.UInt(`0`)>>`8`) & SIMD::UInt (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
4161	dst.move(`2`, SIMD::Float (((val.UInt(`0`)>>`16`) & SIMD::UInt (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
4162	dst.move(`3`, SIMD::Float (((val.UInt(`0`)>>`24`) & SIMD::UInt (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
4163	break;
4164	}
4165	case GLSLstd450UnpackSnorm2x16:
4166	{
4167	auto val = GenericValue (this, state, insn.word(`5`));
4168	// clamp(f / 32767.0, -1.0, 1.0)
4169	dst.move(`0`, Min(Max(SIMD::Float (As<SIMD::Int>((val.UInt(`0`) & SIMD::UInt (`0x0000FFFF`)) << `16`)) *
4170	SIMD::Float (`1.0f` / float(`0x7FFF0000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4171	dst.move(`1`, Min(Max(SIMD::Float (As<SIMD::Int>(val.UInt(`0`) & SIMD::UInt (`0xFFFF0000`))) * SIMD::Float (`1.0f` / float(`0x7FFF0000`)),
4172	SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
4173	break;
4174	}
4175	case GLSLstd450UnpackUnorm2x16:
4176	{
4177	auto val = GenericValue (this, state, insn.word(`5`));
4178	// f / 65535.0
4179	dst.move(`0`, SIMD::Float ((val.UInt(`0`) & SIMD::UInt (`0x0000FFFF`)) << `16`) * SIMD::Float (`1.0f` / float(`0xFFFF0000`)));
4180	dst.move(`1`, SIMD::Float (val.UInt(`0`) & SIMD::UInt (`0xFFFF0000`)) * SIMD::Float (`1.0f` / float(`0xFFFF0000`)));
4181	break;
4182	}
4183	case GLSLstd450UnpackHalf2x16:
4184	{
4185	auto val = GenericValue (this, state, insn.word(`5`));
4186	dst.move(`0`, halfToFloatBits(val.UInt(`0`) & SIMD::UInt (`0x0000FFFF`)));
4187	dst.move(`1`, halfToFloatBits((val.UInt(`0`) & SIMD::UInt (`0xFFFF0000`)) >> `16`));
4188	break;
4189	}
4190	case GLSLstd450Fma:
4191	{
4192	auto a = GenericValue (this, state, insn.word(`5`));
4193	auto b = GenericValue (this, state, insn.word(`6`));
4194	auto c = GenericValue (this, state, insn.word(`7`));
4195	for (auto i = `0u`; i < type.sizeInComponents; i++)
4196	{
4197	dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
4198	}
4199	break;
4200	}
4201	case GLSLstd450Frexp:
4202	{
4203	auto val = GenericValue (this, state, insn.word(`5`));
4204	auto ptrId = Object::ID (insn.word(`6`));
4205	auto ptrTy = getType(getObject(ptrId).type);
4206	auto ptr = GetPointerToData(ptrId, `0`, state);
4207	bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
4208	// TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
4209	// to be in bounds even for inactive lanes.
4210	// - Clarify the SPIR-V spec.
4211	// - Eliminate lane masking and assume interleaving.
4212	auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
4213
4214	for (auto i = `0u`; i < type.sizeInComponents; i++)
4215	{
4216	SIMD::Float significand;
4217	SIMD::Int exponent;
4218	std::tie(significand, exponent) = Frexp(val.Float(i));
4219
4220	dst.move(i, significand);
4221
4222	auto p = ptr + (i * sizeof(float));
4223	if (interleavedByLane) { p = interleaveByLane(p); }
4224	SIMD::Store(p, exponent, robustness, state->activeLaneMask());
4225	}
4226	break;
4227	}
4228	case GLSLstd450FrexpStruct:
4229	{
4230	auto val = GenericValue (this, state, insn.word(`5`));
4231	auto numComponents = getType(val.type).sizeInComponents;
4232	for (auto i = `0u`; i < numComponents; i++)
4233	{
4234	auto significandAndExponent = Frexp(val.Float(i));
4235	dst.move(i, significandAndExponent.first);
4236	dst.move(i + numComponents, significandAndExponent.second);
4237	}
4238	break;
4239	}
4240	case GLSLstd450Ldexp:
4241	{
4242	auto significand = GenericValue (this, state, insn.word(`5`));
4243	auto exponent = GenericValue (this, state, insn.word(`6`));
4244	for (auto i = `0u`; i < type.sizeInComponents; i++)
4245	{
4246	// Assumes IEEE 754
4247	auto in = significand.Float(i);
4248	auto significandExponent = Exponent(in);
4249	auto combinedExponent = exponent.Int(i) + significandExponent;
4250	auto isSignificandZero = SIMD::UInt (CmpEQ(significand.Int(i), SIMD::Int (`0`)));
4251	auto isSignificandInf = SIMD::UInt (IsInf(in));
4252	auto isSignificandNaN = SIMD::UInt (IsNan(in));
4253	auto isExponentNotTooSmall = SIMD::UInt (CmpGE(combinedExponent, SIMD::Int (-`126`)));
4254	auto isExponentNotTooLarge = SIMD::UInt (CmpLE(combinedExponent, SIMD::Int (`128`)));
4255	auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
4256
4257	SIMD::UInt v;
4258	v = significand.UInt(i) & SIMD::UInt (`0x7FFFFF`); // Add significand.
4259	v \|= (SIMD::UInt (combinedExponent + SIMD::Int (`126`)) << SIMD::UInt (`23`)); // Add exponent.
4260	v &= isExponentInBounds; // Clear v if the exponent is OOB.
4261
4262	v \|= significand.UInt(i) & SIMD::UInt (`0x80000000`); // Add sign bit.
4263	v \|= ~isExponentNotTooLarge & SIMD::UInt (`0x7F800000`); // Mark as inf if the exponent is too great.
4264
4265	// If the input significand is zero, inf or nan, just return the
4266	// input significand.
4267	auto passthrough = isSignificandZero \| isSignificandInf \| isSignificandNaN;
4268	v = (v & ~passthrough) \| (significand.UInt(i) & passthrough);
4269
4270	dst.move(i, As<SIMD::Float>(v));
4271	}
4272	break;
4273	}
4274	case GLSLstd450Radians:
4275	{
4276	auto degrees = GenericValue (this, state, insn.word(`5`));
4277	for (auto i = `0u`; i < type.sizeInComponents; i++)
4278	{
4279	dst.move(i, degrees.Float(i) * SIMD::Float (PI / `180.0f`));
4280	}
4281	break;
4282	}
4283	case GLSLstd450Degrees:
4284	{
4285	auto radians = GenericValue (this, state, insn.word(`5`));
4286	for (auto i = `0u`; i < type.sizeInComponents; i++)
4287	{
4288	dst.move(i, radians.Float(i) * SIMD::Float (`180.0f` / PI));
4289	}
4290	break;
4291	}
4292	case GLSLstd450Sin:
4293	{
4294	auto radians = GenericValue (this, state, insn.word(`5`));
4295	for (auto i = `0u`; i < type.sizeInComponents; i++)
4296	{
4297	dst.move(i, Sin(radians.Float(i)));
4298	}
4299	break;
4300	}
4301	case GLSLstd450Cos:
4302	{
4303	auto radians = GenericValue (this, state, insn.word(`5`));
4304	for (auto i = `0u`; i < type.sizeInComponents; i++)
4305	{
4306	dst.move(i, Cos(radians.Float(i)));
4307	}
4308	break;
4309	}
4310	case GLSLstd450Tan:
4311	{
4312	auto radians = GenericValue (this, state, insn.word(`5`));
4313	for (auto i = `0u`; i < type.sizeInComponents; i++)
4314	{
4315	dst.move(i, Tan(radians.Float(i)));
4316	}
4317	break;
4318	}
4319	case GLSLstd450Asin:
4320	{
4321	auto val = GenericValue (this, state, insn.word(`5`));
4322	for (auto i = `0u`; i < type.sizeInComponents; i++)
4323	{
4324	dst.move(i, Asin(val.Float(i)));
4325	}
4326	break;
4327	}
4328	case GLSLstd450Acos:
4329	{
4330	auto val = GenericValue (this, state, insn.word(`5`));
4331	for (auto i = `0u`; i < type.sizeInComponents; i++)
4332	{
4333	dst.move(i, Acos(val.Float(i)));
4334	}
4335	break;
4336	}
4337	case GLSLstd450Atan:
4338	{
4339	auto val = GenericValue (this, state, insn.word(`5`));
4340	for (auto i = `0u`; i < type.sizeInComponents; i++)
4341	{
4342	dst.move(i, Atan(val.Float(i)));
4343	}
4344	break;
4345	}
4346	case GLSLstd450Sinh:
4347	{
4348	auto val = GenericValue (this, state, insn.word(`5`));
4349	for (auto i = `0u`; i < type.sizeInComponents; i++)
4350	{
4351	dst.move(i, Sinh(val.Float(i)));
4352	}
4353	break;
4354	}
4355	case GLSLstd450Cosh:
4356	{
4357	auto val = GenericValue (this, state, insn.word(`5`));
4358	for (auto i = `0u`; i < type.sizeInComponents; i++)
4359	{
4360	dst.move(i, Cosh(val.Float(i)));
4361	}
4362	break;
4363	}
4364	case GLSLstd450Tanh:
4365	{
4366	auto val = GenericValue (this, state, insn.word(`5`));
4367	for (auto i = `0u`; i < type.sizeInComponents; i++)
4368	{
4369	dst.move(i, Tanh(val.Float(i)));
4370	}
4371	break;
4372	}
4373	case GLSLstd450Asinh:
4374	{
4375	auto val = GenericValue (this, state, insn.word(`5`));
4376	for (auto i = `0u`; i < type.sizeInComponents; i++)
4377	{
4378	dst.move(i, Asinh(val.Float(i)));
4379	}
4380	break;
4381	}
4382	case GLSLstd450Acosh:
4383	{
4384	auto val = GenericValue (this, state, insn.word(`5`));
4385	for (auto i = `0u`; i < type.sizeInComponents; i++)
4386	{
4387	dst.move(i, Acosh(val.Float(i)));
4388	}
4389	break;
4390	}
4391	case GLSLstd450Atanh:
4392	{
4393	auto val = GenericValue (this, state, insn.word(`5`));
4394	for (auto i = `0u`; i < type.sizeInComponents; i++)
4395	{
4396	dst.move(i, Atanh(val.Float(i)));
4397	}
4398	break;
4399	}
4400	case GLSLstd450Atan2:
4401	{
4402	auto x = GenericValue (this, state, insn.word(`5`));
4403	auto y = GenericValue (this, state, insn.word(`6`));
4404	for (auto i = `0u`; i < type.sizeInComponents; i++)
4405	{
4406	dst.move(i, Atan2(x.Float(i), y.Float(i)));
4407	}
4408	break;
4409	}
4410	case GLSLstd450Pow:
4411	{
4412	auto x = GenericValue (this, state, insn.word(`5`));
4413	auto y = GenericValue (this, state, insn.word(`6`));
4414	for (auto i = `0u`; i < type.sizeInComponents; i++)
4415	{
4416	dst.move(i, Pow(x.Float(i), y.Float(i)));
4417	}
4418	break;
4419	}
4420	case GLSLstd450Exp:
4421	{
4422	auto val = GenericValue (this, state, insn.word(`5`));
4423	for (auto i = `0u`; i < type.sizeInComponents; i++)
4424	{
4425	dst.move(i, Exp(val.Float(i)));
4426	}
4427	break;
4428	}
4429	case GLSLstd450Log:
4430	{
4431	auto val = GenericValue (this, state, insn.word(`5`));
4432	for (auto i = `0u`; i < type.sizeInComponents; i++)
4433	{
4434	dst.move(i, Log(val.Float(i)));
4435	}
4436	break;
4437	}
4438	case GLSLstd450Exp2:
4439	{
4440	auto val = GenericValue (this, state, insn.word(`5`));
4441	for (auto i = `0u`; i < type.sizeInComponents; i++)
4442	{
4443	dst.move(i, Exp2(val.Float(i)));
4444	}
4445	break;
4446	}
4447	case GLSLstd450Log2:
4448	{
4449	auto val = GenericValue (this, state, insn.word(`5`));
4450	for (auto i = `0u`; i < type.sizeInComponents; i++)
4451	{
4452	dst.move(i, Log2(val.Float(i)));
4453	}
4454	break;
4455	}
4456	case GLSLstd450Sqrt:
4457	{
4458	auto val = GenericValue (this, state, insn.word(`5`));
4459	for (auto i = `0u`; i < type.sizeInComponents; i++)
4460	{
4461	dst.move(i, Sqrt(val.Float(i)));
4462	}
4463	break;
4464	}
4465	case GLSLstd450InverseSqrt:
4466	{
4467	auto val = GenericValue (this, state, insn.word(`5`));
4468	Decorations d;
4469	ApplyDecorationsForId(&d, insn.word(`5`));
4470	if (d.RelaxedPrecision)
4471	{
4472	for (auto i = `0u`; i < type.sizeInComponents; i++)
4473	{
4474	dst.move(i, RcpSqrt_pp(val.Float(i)));
4475	}
4476	}
4477	else
4478	{
4479	for (auto i = `0u`; i < type.sizeInComponents; i++)
4480	{
4481	dst.move(i, SIMD::Float (`1.0f`) / Sqrt(val.Float(i)));
4482	}
4483	}
4484	break;
4485	}
4486	case GLSLstd450Determinant:
4487	{
4488	auto mat = GenericValue (this, state, insn.word(`5`));
4489	auto numComponents = getType(mat.type).sizeInComponents;
4490	switch (numComponents)
4491	{
4492	case `4`: // 2x2
4493	dst.move(`0`, Determinant(
4494	mat.Float(`0`), mat.Float(`1`),
4495	mat.Float(`2`), mat.Float(`3`)));
4496	break;
4497	case `9`: // 3x3
4498	dst.move(`0`, Determinant(
4499	mat.Float(`0`), mat.Float(`1`), mat.Float(`2`),
4500	mat.Float(`3`), mat.Float(`4`), mat.Float(`5`),
4501	mat.Float(`6`), mat.Float(`7`), mat.Float(`8`)));
4502	break;
4503	case `16`: // 4x4
4504	dst.move(`0`, Determinant(
4505	mat.Float(`0`), mat.Float(`1`), mat.Float(`2`), mat.Float(`3`),
4506	mat.Float(`4`), mat.Float(`5`), mat.Float(`6`), mat.Float(`7`),
4507	mat.Float(`8`), mat.Float(`9`), mat.Float(`10`), mat.Float(`11`),
4508	mat.Float(`12`), mat.Float(`13`), mat.Float(`14`), mat.Float(`15`)));
4509	break;
4510	default:
4511	UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(numComponents));
4512	}
4513	break;
4514	}
4515	case GLSLstd450MatrixInverse:
4516	{
4517	auto mat = GenericValue (this, state, insn.word(`5`));
4518	auto numComponents = getType(mat.type).sizeInComponents;
4519	switch (numComponents)
4520	{
4521	case `4`: // 2x2
4522	{
4523	auto inv = MatrixInverse(
4524	mat.Float(`0`), mat.Float(`1`),
4525	mat.Float(`2`), mat.Float(`3`));
4526	for (uint32_t i = `0`; i < inv.size(); i++)
4527	{
4528	dst.move(i, inv [i]);
4529	}
4530	break;
4531	}
4532	case `9`: // 3x3
4533	{
4534	auto inv = MatrixInverse(
4535	mat.Float(`0`), mat.Float(`1`), mat.Float(`2`),
4536	mat.Float(`3`), mat.Float(`4`), mat.Float(`5`),
4537	mat.Float(`6`), mat.Float(`7`), mat.Float(`8`));
4538	for (uint32_t i = `0`; i < inv.size(); i++)
4539	{
4540	dst.move(i, inv [i]);
4541	}
4542	break;
4543	}
4544	case `16`: // 4x4
4545	{
4546	auto inv = MatrixInverse(
4547	mat.Float(`0`), mat.Float(`1`), mat.Float(`2`), mat.Float(`3`),
4548	mat.Float(`4`), mat.Float(`5`), mat.Float(`6`), mat.Float(`7`),
4549	mat.Float(`8`), mat.Float(`9`), mat.Float(`10`), mat.Float(`11`),
4550	mat.Float(`12`), mat.Float(`13`), mat.Float(`14`), mat.Float(`15`));
4551	for (uint32_t i = `0`; i < inv.size(); i++)
4552	{
4553	dst.move(i, inv [i]);
4554	}
4555	break;
4556	}
4557	default:
4558	UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(numComponents));
4559	}
4560	break;
4561	}
4562	case GLSLstd450IMix:
4563	{
4564	UNREACHABLE("GLSLstd450IMix has been removed from the specification");
4565	break;
4566	}
4567	case GLSLstd450PackDouble2x32:
4568	{
4569	UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
4570	break;
4571	}
4572	case GLSLstd450UnpackDouble2x32:
4573	{
4574	UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
4575	break;
4576	}
4577	case GLSLstd450FindILsb:
4578	{
4579	auto val = GenericValue (this, state, insn.word(`5`));
4580	for (auto i = `0u`; i < type.sizeInComponents; i++)
4581	{
4582	auto v = val.UInt(i);
4583	dst.move(i, Cttz(v, true) \| CmpEQ(v, SIMD::UInt (`0`)));
4584	}
4585	break;
4586	}
4587	case GLSLstd450FindSMsb:
4588	{
4589	auto val = GenericValue (this, state, insn.word(`5`));
4590	for (auto i = `0u`; i < type.sizeInComponents; i++)
4591	{
4592	auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int (`0`)));
4593	dst.move(i, SIMD::UInt (`31`) - Ctlz(v, false));
4594	}
4595	break;
4596	}
4597	case GLSLstd450FindUMsb:
4598	{
4599	auto val = GenericValue (this, state, insn.word(`5`));
4600	for (auto i = `0u`; i < type.sizeInComponents; i++)
4601	{
4602	dst.move(i, SIMD::UInt (`31`) - Ctlz(val.UInt(i), false));
4603	}
4604	break;
4605	}
4606	case GLSLstd450InterpolateAtCentroid:
4607	{
4608	UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4609	break;
4610	}
4611	case GLSLstd450InterpolateAtSample:
4612	{
4613	UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4614	break;
4615	}
4616	case GLSLstd450InterpolateAtOffset:
4617	{
4618	UNSUPPORTED("SPIR-V SampleRateShading Capability (GLSLstd450InterpolateAtCentroid)");
4619	break;
4620	}
4621	case GLSLstd450NMin:
4622	{
4623	auto x = GenericValue (this, state, insn.word(`5`));
4624	auto y = GenericValue (this, state, insn.word(`6`));
4625	for (auto i = `0u`; i < type.sizeInComponents; i++)
4626	{
4627	dst.move(i, NMin(x.Float(i), y.Float(i)));
4628	}
4629	break;
4630	}
4631	case GLSLstd450NMax:
4632	{
4633	auto x = GenericValue (this, state, insn.word(`5`));
4634	auto y = GenericValue (this, state, insn.word(`6`));
4635	for (auto i = `0u`; i < type.sizeInComponents; i++)
4636	{
4637	dst.move(i, NMax(x.Float(i), y.Float(i)));
4638	}
4639	break;
4640	}
4641	case GLSLstd450NClamp:
4642	{
4643	auto x = GenericValue (this, state, insn.word(`5`));
4644	auto minVal = GenericValue (this, state, insn.word(`6`));
4645	auto maxVal = GenericValue (this, state, insn.word(`7`));
4646	for (auto i = `0u`; i < type.sizeInComponents; i++)
4647	{
4648	auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
4649	dst.move(i, clamp);
4650	}
4651	break;
4652	}
4653	default:
4654	UNREACHABLE("ExtInst %d", int(extInstIndex));
4655	break;
4656	}
4657
4658	return EmitResult::Continue;
4659	}
4660
4661	std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
4662	{
4663	auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
4664	spv::MemorySemanticsAcquireMask \|
4665	spv::MemorySemanticsReleaseMask \|
4666	spv::MemorySemanticsAcquireReleaseMask \|
4667	spv::MemorySemanticsSequentiallyConsistentMask
4668	);
4669	switch (control)
4670	{
4671	case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
4672	case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
4673	case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
4674	case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
4675	case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
4676	default:
4677	// "it is invalid for more than one of these four bits to be set:
4678	// Acquire, Release, AcquireRelease, or SequentiallyConsistent."
4679	UNREACHABLE("MemorySemanticsMask: %x", int(control));
4680	return std::memory_order_acq_rel;
4681	}
4682	}
4683
4684	SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
4685	{
4686	SIMD::Float d = x.Float(`0`) * y.Float(`0`);
4687
4688	for (auto i = `1u`; i < numComponents; i++)
4689	{
4690	d += x.Float(i) * y.Float(i);
4691	}
4692
4693	return d;
4694	}
4695
4696	SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
4697	{
4698	static const uint32_t mask_sign = `0x80000000u`;
4699	static const uint32_t mask_round = ~`0xfffu`;
4700	static const uint32_t c_f32infty = `255` << `23`;
4701	static const uint32_t c_magic = `15` << `23`;
4702	static const uint32_t c_nanbit = `0x200`;
4703	static const uint32_t c_infty_as_fp16 = `0x7c00`;
4704	static const uint32_t c_clamp = (`31` << `23`) - `0x1000`;
4705
4706	SIMD::UInt justsign = SIMD::UInt (mask_sign) & floatBits;
4707	SIMD::UInt absf = floatBits ^ justsign;
4708	SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt (c_f32infty), absf);
4709
4710	// Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
4711	// instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
4712	SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt (mask_round)) * As<SIMD::Float>(SIMD::UInt (c_magic)),
4713	As<SIMD::Float>(SIMD::UInt (c_clamp))))) - SIMD::UInt (mask_round)) >> `13`) & b_isnormal) \|
4714	((b_isnormal ^ SIMD::UInt (`0xFFFFFFFF`)) & ((CmpNLE(absf, SIMD::UInt (c_f32infty)) & SIMD::UInt (c_nanbit)) \|
4715	SIMD::UInt (c_infty_as_fp16)));
4716
4717	return storeInUpperBits ? ((joined << `16`) \| justsign) : joined \| (justsign >> `16`);
4718	}
4719
4720	std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
4721	{
4722	// Assumes IEEE 754
4723	auto v = As<SIMD::UInt>(val);
4724	auto isNotZero = CmpNEQ(v & SIMD::UInt (`0x7FFFFFFF`), SIMD::UInt (`0`));
4725	auto zeroSign = v & SIMD::UInt (`0x80000000`) & ~isNotZero;
4726	auto significand = As<SIMD::Float>((((v & SIMD::UInt (`0x807FFFFF`)) \| SIMD::UInt (`0x3F000000`)) & isNotZero) \| zeroSign);
4727	auto exponent = Exponent(val) & SIMD::Int (isNotZero);
4728	return std::make_pair(significand, exponent);
4729	}
4730
4731	SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState state) const*
4732	{
4733	auto &type = getType(insn.word(`1`));
4734	ASSERT(type.sizeInComponents == `1`);
4735	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
4736	auto &srcType = getType(getObject(insn.word(`3`)).type);
4737	auto src = GenericValue (this, state, insn.word(`3`));
4738
4739	SIMD::UInt result = src.UInt(`0`);
4740
4741	for (auto i = `1u`; i < srcType.sizeInComponents; i++)
4742	{
4743	result \|= src.UInt(i);
4744	}
4745
4746	dst.move(`0`, result);
4747	return EmitResult::Continue;
4748	}
4749
4750	SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState state) const*
4751	{
4752	auto &type = getType(insn.word(`1`));
4753	ASSERT(type.sizeInComponents == `1`);
4754	auto &dst = state->createIntermediate(insn.word(`2`), type.sizeInComponents);
4755	auto &srcType = getType(getObject(insn.word(`3`)).type);
4756	auto src = GenericValue (this, state, insn.word(`3`));
4757
4758	SIMD::UInt result = src.UInt(`0`);
4759
4760	for (auto i = `1u`; i < srcType.sizeInComponents; i++)
4761	{
4762	result &= src.UInt(i);
4763	}
4764
4765	dst.move(`0`, result);
4766	return EmitResult::Continue;
4767	}
4768
4769	SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState state) const*
4770	{
4771	auto target = Block::ID (insn.word(`1`));
4772	state->addActiveLaneMaskEdge(state->block, target, state->activeLaneMask());
4773	return EmitResult::Terminator;
4774	}
4775
4776	SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState state) const*
4777	{
4778	auto &function = getFunction(state->function);
4779	auto block = function.getBlock(state->block);
4780	ASSERT(block.branchInstruction == insn);
4781
4782	auto condId = Object::ID (block.branchInstruction.word(`1`));
4783	auto trueBlockId = Block::ID (block.branchInstruction.word(`2`));
4784	auto falseBlockId = Block::ID (block.branchInstruction.word(`3`));
4785
4786	auto cond = GenericValue (this, state, condId);
4787	ASSERT_MSG(getType(cond.type).sizeInComponents == `1`, "Condition must be a Boolean type scalar");
4788
4789	// TODO: Optimize for case where all lanes take same path.
4790
4791	state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(`0`));
4792	state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(`0`));
4793
4794	return EmitResult::Terminator;
4795	}
4796
4797	SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState state) const*
4798	{
4799	auto &function = getFunction(state->function);
4800	auto block = function.getBlock(state->block);
4801	ASSERT(block.branchInstruction == insn);
4802
4803	auto selId = Object::ID (block.branchInstruction.word(`1`));
4804
4805	auto sel = GenericValue (this, state, selId);
4806	ASSERT_MSG(getType(sel.type).sizeInComponents == `1`, "Selector must be a scalar");
4807
4808	auto numCases = (block.branchInstruction.wordCount() - `3`) / `2`;
4809
4810	// TODO: Optimize for case where all lanes take same path.
4811
4812	SIMD::Int defaultLaneMask = state->activeLaneMask();
4813
4814	// Gather up the case label matches and calculate defaultLaneMask.
4815	std::vector<RValue<SIMD::Int>> caseLabelMatches;
4816	caseLabelMatches.reserve(numCases);
4817	for (uint32_t i = `0`; i < numCases; i++)
4818	{
4819	auto label = block.branchInstruction.word(i * `2` + `3`);
4820	auto caseBlockId = Block::ID (block.branchInstruction.word(i * `2` + `4`));
4821	auto caseLabelMatch = CmpEQ(sel.Int(`0`), SIMD::Int (label));
4822	state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
4823	defaultLaneMask &= ~caseLabelMatch;
4824	}
4825
4826	auto defaultBlockId = Block::ID (block.branchInstruction.word(`2`));
4827	state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
4828
4829	return EmitResult::Terminator;
4830	}
4831
4832	SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState state) const*
4833	{
4834	// TODO: Log something in this case?
4835	state->setActiveLaneMask(SIMD::Int (`0`));
4836	return EmitResult::Terminator;
4837	}
4838
4839	SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState state) const*
4840	{
4841	state->setActiveLaneMask(SIMD::Int (`0`));
4842	return EmitResult::Terminator;
4843	}
4844
4845	SpirvShader::EmitResult SpirvShader::EmitKill(InsnIterator insn, EmitState state) const*
4846	{
4847	state->routine->killMask \|= SignMask(state->activeLaneMask());
4848	state->setActiveLaneMask(SIMD::Int (`0`));
4849	return EmitResult::Terminator;
4850	}
4851
4852	SpirvShader::EmitResult SpirvShader::EmitFunctionCall(InsnIterator insn, EmitState state) const*
4853	{
4854	auto functionId = Function::ID (insn.word(`3`));
4855	const auto& functionIt = functions.find(functionId);
4856	ASSERT(functionIt != functions.end());
4857	auto& function = functionIt ->second;
4858
4859	// TODO(b/141246700): Add full support for spv::OpFunctionCall
4860	// The only supported function is a single OpKill wrapped in a
4861	// function, as a result of the "wrap OpKill" SPIRV-Tools pass
4862	ASSERT(function.blocks.size() == `1`);
4863	spv::Op wrapOpKill[] = { spv::OpLabel, spv::OpKill };
4864
4865	for (auto block : function.blocks)
4866	{
4867	int insnNumber = `0`;
4868	for (auto blockInsn : block.second)
4869	{
4870	if (insnNumber > `1`)
4871	{
4872	UNIMPLEMENTED("Function block number of instructions: %d", insnNumber);
4873	return EmitResult::Continue;
4874	}
4875	if (blockInsn.opcode() != wrapOpKill[insnNumber++])
4876	{
4877	UNIMPLEMENTED("Function block instruction %d : %s", insnNumber - `1`, OpcodeName(blockInsn.opcode()).c_str());
4878	return EmitResult::Continue;
4879	}
4880	if (blockInsn.opcode() == spv::OpKill)
4881	{
4882	EmitInstruction(blockInsn, state);
4883	}
4884	}
4885	}
4886
4887	return EmitResult::Continue;
4888	}
4889
4890	SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState state) const*
4891	{
4892	auto &function = getFunction(state->function);
4893	auto currentBlock = function.getBlock(state->block);
4894	if (!currentBlock.isLoopMerge)
4895	{
4896	// If this is a loop merge block, then don't attempt to update the
4897	// phi values from the ins. EmitLoop() has had to take special care
4898	// of this phi in order to correctly deal with divergent lanes.
4899	StorePhi(state->block, insn, state, currentBlock.ins);
4900	}
4901	LoadPhi(insn, state);
4902	return EmitResult::Continue;
4903	}
4904
4905	void SpirvShader::LoadPhi(InsnIterator insn, EmitState state) const*
4906	{
4907	auto typeId = Type::ID (insn.word(`1`));
4908	auto type = getType(typeId);
4909	auto objectId = Object::ID (insn.word(`2`));
4910
4911	auto storageIt = state->routine->phis.find(objectId);
4912	ASSERT(storageIt != state->routine->phis.end());
4913	auto &storage = storageIt ->second;
4914
4915	auto &dst = state->createIntermediate(objectId, type.sizeInComponents);
4916	for(uint32_t i = `0`; i < type.sizeInComponents; i++)
4917	{
4918	dst.move(i, storage [i]);
4919	}
4920	}
4921
4922	void SpirvShader::StorePhi(Block::ID currentBlock, InsnIterator insn, EmitState state, std::unordered_set<SpirvShader::Block::ID> const& filter) const*
4923	{
4924	auto typeId = Type::ID (insn.word(`1`));
4925	auto type = getType(typeId);
4926	auto objectId = Object::ID (insn.word(`2`));
4927
4928	auto storageIt = state->routine->phis.find(objectId);
4929	ASSERT(storageIt != state->routine->phis.end());
4930	auto &storage = storageIt ->second;
4931
4932	for (uint32_t w = `3`; w < insn.wordCount(); w += `2`)
4933	{
4934	auto varId = Object::ID (insn.word(w + `0`));
4935	auto blockId = Block::ID (insn.word(w + `1`));
4936
4937	if (filter.count(blockId) == `0`)
4938	{
4939	continue;
4940	}
4941
4942	auto mask = GetActiveLaneMaskEdge(state, blockId, currentBlock);
4943	auto in = GenericValue (this, state, varId);
4944
4945	for (uint32_t i = `0`; i < type.sizeInComponents; i++)
4946	{
4947	storage [i] = As<SIMD::Float>((As<SIMD::Int>(storage [i]) & ~mask) \| (in.Int(i) & mask));
4948	}
4949	}
4950	}
4951
4952	SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState state) const*
4953	{
4954	return EmitImageSample({variant, Implicit}, insn, state);
4955	}
4956
4957	SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState state) const*
4958	{
4959	ImageInstruction instruction = {variant, Gather};
4960	instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(`5`)).constantValue [`0`] : `0`;
4961
4962	return EmitImageSample(instruction, insn, state);
4963	}
4964
4965	SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState state) const*
4966	{
4967	auto isDref = (variant == Dref) \|\| (variant == ProjDref);
4968	uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(isDref ? `6` : `5`));
4969	imageOperands &= ~spv::ImageOperandsConstOffsetMask; // Dealt with later.
4970
4971	if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
4972	{
4973	return EmitImageSample({variant, Lod}, insn, state);
4974	}
4975	else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
4976	{
4977	return EmitImageSample({variant, Grad}, insn, state);
4978	}
4979	else UNIMPLEMENTED("Image Operands %x", imageOperands);
4980	return EmitResult::Continue;
4981	}
4982
4983	SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState state) const*
4984	{
4985	return EmitImageSample({None, Fetch}, insn, state);
4986	}
4987
4988	SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState state) const*
4989	{
4990	Type::ID resultTypeId = insn.word(`1`);
4991	Object::ID resultId = insn.word(`2`);
4992	Object::ID sampledImageId = insn.word(`3`); // For OpImageFetch this is just an Image, not a SampledImage.
4993	Object::ID coordinateId = insn.word(`4`);
4994	auto &resultType = getType(resultTypeId);
4995
4996	auto &result = state->createIntermediate(resultId, resultType.sizeInComponents);
4997	auto imageDescriptor = state->getPointer(sampledImageId).base; // vk::SampledImageDescriptor*
4998
4999	// If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
5000	auto &sampledImage = getObject(sampledImageId);
5001	auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ?
5002	state->getPointer(sampledImage.definition.word(`4`)).base : imageDescriptor;
5003
5004	auto coordinate = GenericValue (this, state, coordinateId);
5005	auto &coordinateType = getType(coordinate.type);
5006
5007	Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler); // vk::Sampler*
5008	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
5009
5010	// Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
5011	// it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
5012	// it's just an Image operand, so there's no sampler descriptor data.
5013	if(getType(sampledImage.type).opcode() != spv::OpTypeSampledImage)
5014	{
5015	sampler = Pointer<Byte>(nullptr);
5016	}
5017
5018	uint32_t imageOperands = spv::ImageOperandsMaskNone;
5019	bool lodOrBias = false;
5020	Object::ID lodOrBiasId = `0`;
5021	bool grad = false;
5022	Object::ID gradDxId = `0`;
5023	Object::ID gradDyId = `0`;
5024	bool constOffset = false;
5025	Object::ID offsetId = `0`;
5026	bool sample = false;
5027	Object::ID sampleId = `0`;
5028
5029	uint32_t operand = (instruction.isDref() \|\| instruction.samplerMethod == Gather) ? `6` : `5`;
5030
5031	if(insn.wordCount() > operand)
5032	{
5033	imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
5034
5035	if(imageOperands & spv::ImageOperandsBiasMask)
5036	{
5037	lodOrBias = true;
5038	lodOrBiasId = insn.word(operand);
5039	operand++;
5040	imageOperands &= ~spv::ImageOperandsBiasMask;
5041
5042	ASSERT(instruction.samplerMethod == Implicit);
5043	instruction.samplerMethod = Bias;
5044	}
5045
5046	if(imageOperands & spv::ImageOperandsLodMask)
5047	{
5048	lodOrBias = true;
5049	lodOrBiasId = insn.word(operand);
5050	operand++;
5051	imageOperands &= ~spv::ImageOperandsLodMask;
5052	}
5053
5054	if(imageOperands & spv::ImageOperandsGradMask)
5055	{
5056	ASSERT(!lodOrBias); // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
5057	grad = true;
5058	gradDxId = insn.word(operand + `0`);
5059	gradDyId = insn.word(operand + `1`);
5060	operand += `2`;
5061	imageOperands &= ~spv::ImageOperandsGradMask;
5062	}
5063
5064	if(imageOperands & spv::ImageOperandsConstOffsetMask)
5065	{
5066	constOffset = true;
5067	offsetId = insn.word(operand);
5068	operand++;
5069	imageOperands &= ~spv::ImageOperandsConstOffsetMask;
5070	}
5071
5072	if(imageOperands & spv::ImageOperandsSampleMask)
5073	{
5074	sample = true;
5075	sampleId = insn.word(operand);
5076	imageOperands &= ~spv::ImageOperandsSampleMask;
5077
5078	ASSERT(instruction.samplerMethod == Fetch);
5079	instruction.sample = true;
5080	}
5081
5082	if(imageOperands != `0`)
5083	{
5084	UNSUPPORTED("Image operand %x", imageOperands);
5085	}
5086	}
5087
5088	Array<SIMD::Float> in(`16`); // Maximum 16 input parameter components.
5089
5090	uint32_t coordinates = coordinateType.sizeInComponents - instruction.isProj();
5091	instruction.coordinates = coordinates;
5092
5093	uint32_t i = `0`;
5094	for( ; i < coordinates; i++)
5095	{
5096	if(instruction.isProj())
5097	{
5098	in [i] = coordinate.Float(i) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
5099	}
5100	else
5101	{
5102	in [i] = coordinate.Float(i);
5103	}
5104	}
5105
5106	if(instruction.isDref())
5107	{
5108	auto drefValue = GenericValue (this, state, insn.word(`5`));
5109
5110	if(instruction.isProj())
5111	{
5112	in [i] = drefValue.Float(`0`) / coordinate.Float(coordinates); // TODO(b/129523279): Optimize using reciprocal.
5113	}
5114	else
5115	{
5116	in [i] = drefValue.Float(`0`);
5117	}
5118
5119	i++;
5120	}
5121
5122	if(lodOrBias)
5123	{
5124	auto lodValue = GenericValue (this, state, lodOrBiasId);
5125	in [i] = lodValue.Float(`0`);
5126	i++;
5127	}
5128	else if(grad)
5129	{
5130	auto dxValue = GenericValue (this, state, gradDxId);
5131	auto dyValue = GenericValue (this, state, gradDyId);
5132	auto &dxyType = getType(dxValue.type);
5133	ASSERT(dxyType.sizeInComponents == getType(dyValue.type).sizeInComponents);
5134
5135	instruction.grad = dxyType.sizeInComponents;
5136
5137	for(uint32_t j = `0`; j < dxyType.sizeInComponents; j++, i++)
5138	{
5139	in [i] = dxValue.Float(j);
5140	}
5141
5142	for(uint32_t j = `0`; j < dxyType.sizeInComponents; j++, i++)
5143	{
5144	in [i] = dyValue.Float(j);
5145	}
5146	}
5147	else if (instruction.samplerMethod == Fetch)
5148	{
5149	// The instruction didn't provide a lod operand, but the sampler's Fetch
5150	// function requires one to be present. If no lod is supplied, the default
5151	// is zero.
5152	in [i] = As<SIMD::Float>(SIMD::Int (`0`));
5153	i++;
5154	}
5155
5156	if(constOffset)
5157	{
5158	auto offsetValue = GenericValue (this, state, offsetId);
5159	auto &offsetType = getType(offsetValue.type);
5160
5161	instruction.offset = offsetType.sizeInComponents;
5162
5163	for(uint32_t j = `0`; j < offsetType.sizeInComponents; j++, i++)
5164	{
5165	in [i] = As<SIMD::Float>(offsetValue.Int(j)); // Integer values, but transfered as float.
5166	}
5167	}
5168
5169	if(sample)
5170	{
5171	auto sampleValue = GenericValue (this, state, sampleId);
5172	in [i] = sampleValue.Float(`0`);
5173	}
5174
5175	auto cacheIt = state->routine->samplerCache.find(resultId);
5176	ASSERT(cacheIt != state->routine->samplerCache.end());
5177	auto &cache = cacheIt ->second;
5178	auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
5179
5180	If(!cacheHit)
5181	{
5182	cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
5183	cache.imageDescriptor = imageDescriptor;
5184	cache.sampler = sampler;
5185	}
5186
5187	Array<SIMD::Float> out(`4`);
5188	Call<ImageSampler>(cache.function, texture, sampler, &in [`0`], &out [`0`], state->routine->constants);
5189
5190	for (auto i = `0u`; i < resultType.sizeInComponents; i++) { result.move(i, out [i]); }
5191
5192	return EmitResult::Continue;
5193	}
5194
5195	SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState state) const*
5196	{
5197	auto &resultTy = getType(Type::ID (insn.word(`1`)));
5198	auto resultId = Object::ID (insn.word(`2`));
5199	auto imageId = Object::ID (insn.word(`3`));
5200	auto lodId = Object::ID (insn.word(`4`));
5201
5202	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
5203	GetImageDimensions(state, resultTy, imageId, lodId, dst);
5204
5205	return EmitResult::Continue;
5206	}
5207
5208	SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState state) const*
5209	{
5210	auto &resultTy = getType(Type::ID (insn.word(`1`)));
5211	auto resultId = Object::ID (insn.word(`2`));
5212	auto imageId = Object::ID (insn.word(`3`));
5213	auto lodId = Object::ID (`0`);
5214
5215	auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
5216	GetImageDimensions(state, resultTy, imageId, lodId, dst);
5217
5218	return EmitResult::Continue;
5219	}
5220
5221	SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState state) const*
5222	{
5223	return EmitImageSample({None, Query}, insn, state);
5224	}
5225
5226	void SpirvShader::GetImageDimensions(EmitState const state, Type const* &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
5227	{
5228	auto routine = state->routine;
5229	auto &image = getObject(imageId);
5230	auto &imageType = getType(image.type);
5231
5232	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5233	bool isArrayed = imageType.definition.word(`5`) != `0`;
5234	bool isCubeMap = imageType.definition.word(`3`) == spv::DimCube;
5235
5236	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5237	auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5238	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5239
5240	Pointer<Byte> descriptor = state->getPointer(imageId).base;
5241
5242	Pointer<Int> extent;
5243	Int arrayLayers;
5244
5245	switch (bindingLayout.descriptorType)
5246	{
5247	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
5248	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
5249	{
5250	extent = descriptor + OFFSET(vk::StorageImageDescriptor, extent); // int[3]*
5251	arrayLayers = Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, arrayLayers)); // uint32_t*
5252	break;
5253	}
5254	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5255	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5256	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5257	{
5258	extent = descriptor + OFFSET(vk::SampledImageDescriptor, extent); // int[3]*
5259	arrayLayers = Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, arrayLayers)); // uint32_t*
5260	break;
5261	}
5262	default:
5263	UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5264	}
5265
5266	auto dimensions = resultTy.sizeInComponents - (isArrayed ? `1` : `0`);
5267	std::vector<Int> out;
5268	if (lodId != `0`)
5269	{
5270	auto lodVal = GenericValue (this, state, lodId);
5271	ASSERT(getType(lodVal.type).sizeInComponents == `1`);
5272	auto lod = lodVal.Int(`0`);
5273	auto one = SIMD::Int (`1`);
5274	for (uint32_t i = `0`; i < dimensions; i++)
5275	{
5276	dst.move(i, Max(SIMD::Int (extent [i]) >> lod, one));
5277	}
5278	}
5279	else
5280	{
5281	for (uint32_t i = `0`; i < dimensions; i++)
5282	{
5283	dst.move(i, SIMD::Int (extent [i]));
5284	}
5285	}
5286
5287	if (isArrayed)
5288	{
5289	auto numElements = isCubeMap ? (arrayLayers / `6`) : RValue<Int>(arrayLayers);
5290	dst.move(dimensions, SIMD::Int (numElements));
5291	}
5292	}
5293
5294	SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState state) const*
5295	{
5296	auto &resultTy = getType(Type::ID (insn.word(`1`)));
5297	ASSERT(resultTy.sizeInComponents == `1`);
5298	auto resultId = Object::ID (insn.word(`2`));
5299	auto imageId = Object::ID (insn.word(`3`));
5300
5301	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5302	auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5303	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5304
5305	Pointer<Byte> descriptor = state->getPointer(imageId).base;
5306	Int mipLevels = `0`;
5307	switch (bindingLayout.descriptorType)
5308	{
5309	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5310	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5311	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5312	mipLevels = Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t*
5313	break;
5314	default:
5315	UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5316	}
5317
5318	auto &dst = state->createIntermediate(resultId, `1`);
5319	dst.move(`0`, SIMD::Int (mipLevels));
5320
5321	return EmitResult::Continue;
5322	}
5323
5324	SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState state) const*
5325	{
5326	auto &resultTy = getType(Type::ID (insn.word(`1`)));
5327	ASSERT(resultTy.sizeInComponents == `1`);
5328	auto resultId = Object::ID (insn.word(`2`));
5329	auto imageId = Object::ID (insn.word(`3`));
5330	auto imageTy = getType(getObject(imageId).type);
5331	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
5332	ASSERT(imageTy.definition.word(`3`) == spv::Dim2D);
5333	ASSERT(imageTy.definition.word(`6` / MS /) == `1`);
5334
5335	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5336	auto setLayout = state->routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
5337	auto &bindingLayout = setLayout->getBindingLayout(d.Binding);
5338
5339	Pointer<Byte> descriptor = state->getPointer(imageId).base;
5340	Int sampleCount = `0`;
5341	switch (bindingLayout.descriptorType)
5342	{
5343	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
5344	sampleCount = Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t*
5345	break;
5346	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
5347	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
5348	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
5349	sampleCount = Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t*
5350	break;
5351	default:
5352	UNREACHABLE("Image descriptorType: %d", int(bindingLayout.descriptorType));
5353	}
5354
5355	auto &dst = state->createIntermediate(resultId, `1`);
5356	dst.move(`0`, SIMD::Int (sampleCount));
5357
5358	return EmitResult::Continue;
5359	}
5360
5361	SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const state, SIMD::Pointer ptr, GenericValue const* & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const
5362	{
5363	auto routine = state->routine;
5364	bool isArrayed = imageType.definition.word(`5`) != `0`;
5365	auto dim = static_cast<spv::Dim>(imageType.definition.word(`3`));
5366	int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? `1` : `0`);
5367
5368	SIMD::Int u = coordinate.Int(`0`);
5369	SIMD::Int v = SIMD::Int (`0`);
5370
5371	if (getType(coordinate.type).sizeInComponents > `1`)
5372	{
5373	v = coordinate.Int(`1`);
5374	}
5375
5376	if (dim == spv::DimSubpassData)
5377	{
5378	u += routine->windowSpacePosition[`0`];
5379	v += routine->windowSpacePosition[`1`];
5380	}
5381
5382	if (useStencilAspect)
5383	{
5384	// Adjust addressing for quad layout. Pitches are already correct for the stencil aspect.
5385	// In the quad-layout block, pixel order is [x0,y0 x1,y0 x0,y1 x1,y1]
5386	u = ((v & SIMD::Int (`1`)) << `1`) \| ((u << `1`) - (u & SIMD::Int (`1`)));
5387	v &= SIMD::Int (~`1`);
5388	}
5389
5390	auto rowPitch = SIMD::Int (*Pointer<Int>(descriptor + (useStencilAspect
5391	? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
5392	: OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
5393	auto slicePitch = SIMD::Int (
5394	*Pointer<Int>(descriptor + (useStencilAspect
5395	? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
5396	: OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
5397	auto samplePitch = SIMD::Int (
5398	*Pointer<Int>(descriptor + (useStencilAspect
5399	? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
5400	: OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
5401
5402	ptr += u * SIMD::Int (texelSize);
5403	if (dims > `1`)
5404	{
5405	ptr += v * rowPitch;
5406	}
5407	if (dims > `2`)
5408	{
5409	ptr += coordinate.Int(`2`) * slicePitch;
5410	}
5411	if (isArrayed)
5412	{
5413	ptr += coordinate.Int(dims) * slicePitch;
5414	}
5415
5416	if (dim == spv::DimSubpassData)
5417	{
5418	// Multiview input attachment access is to the layer corresponding to the current view
5419	ptr += SIMD::Int (routine->viewID) * slicePitch;
5420	}
5421
5422	if (sampleId.value())
5423	{
5424	GenericValue sample(this, state, sampleId);
5425	ptr += sample.Int(`0`) * samplePitch;
5426	}
5427
5428	return ptr;
5429	}
5430
5431	void SpirvShader::Yield(YieldResult res) const
5432	{
5433	rr::Yield(RValue<Int>(int(res)));
5434	}
5435
5436	SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState state) const*
5437	{
5438	auto &resultType = getType(Type::ID (insn.word(`1`)));
5439	auto imageId = Object::ID (insn.word(`3`));
5440	auto &image = getObject(imageId);
5441	auto &imageType = getType(image.type);
5442	Object::ID resultId = insn.word(`2`);
5443
5444	Object::ID sampleId = `0`;
5445
5446	if (insn.wordCount() > `5`)
5447	{
5448	int operand = `6`;
5449	auto imageOperands = insn.word(`5`);
5450	if (imageOperands & spv::ImageOperandsSampleMask)
5451	{
5452	sampleId = insn.word(operand++);
5453	imageOperands &= ~spv::ImageOperandsSampleMask;
5454	}
5455
5456	// Should be no remaining image operands.
5457	ASSERT(!imageOperands);
5458	}
5459
5460	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5461	auto dim = static_cast<spv::Dim>(imageType.definition.word(`3`));
5462
5463	auto coordinate = GenericValue (this, state, insn.word(`4`));
5464	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
5465
5466	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
5467	// the renderpass data instead. In all other cases, we can use the format in the instruction.
5468	auto vkFormat = (dim == spv::DimSubpassData)
5469	? inputAttachmentFormats [d.InputAttachmentIndex]
5470	: SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(`8`)));
5471
5472	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
5473	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
5474	auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
5475	getType(imageType.definition.word(`2`)).opcode() == spv::OpTypeInt);
5476
5477	if (useStencilAspect)
5478	{
5479	vkFormat = VK_FORMAT_S8_UINT;
5480	}
5481
5482	auto pointer = state->getPointer(imageId);
5483	Pointer<Byte> binding = pointer.base;
5484	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
5485	? OFFSET(vk::StorageImageDescriptor, stencilPtr)
5486	: OFFSET(vk::StorageImageDescriptor, ptr)));
5487
5488	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5489
5490	auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5491
5492	auto texelSize = vk::Format (vkFormat).bytes();
5493	auto basePtr = SIMD::Pointer (imageBase, imageSizeInBytes);
5494	auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
5495
5496	// "The value returned by a read of an invalid texel is undefined,
5497	// unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
5498	// TODO: Don't always assume a buffer resource.
5499	auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
5500
5501	SIMD::Int packed[`4`];
5502	// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
5503	// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
5504	// TODO: specialize for small formats?
5505	for (auto i = `0`; i < (texelSize + `3`)/`4`; i++)
5506	{
5507	packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, `4`));
5508	texelPtr += sizeof(float);
5509	}
5510
5511	// Format support requirements here come from two sources:
5512	// - Minimum required set of formats for loads from storage images
5513	// - Any format supported as a color or depth/stencil attachment, for input attachments
5514	switch(vkFormat)
5515	{
5516	case VK_FORMAT_R32G32B32A32_SFLOAT:
5517	case VK_FORMAT_R32G32B32A32_SINT:
5518	case VK_FORMAT_R32G32B32A32_UINT:
5519	dst.move(`0`, packed[`0`]);
5520	dst.move(`1`, packed[`1`]);
5521	dst.move(`2`, packed[`2`]);
5522	dst.move(`3`, packed[`3`]);
5523	break;
5524	case VK_FORMAT_R32_SINT:
5525	case VK_FORMAT_R32_UINT:
5526	dst.move(`0`, packed[`0`]);
5527	// Fill remaining channels with 0,0,1 (of the correct type)
5528	dst.move(`1`, SIMD::Int (`0`));
5529	dst.move(`2`, SIMD::Int (`0`));
5530	dst.move(`3`, SIMD::Int (`1`));
5531	break;
5532	case VK_FORMAT_R32_SFLOAT:
5533	case VK_FORMAT_D32_SFLOAT:
5534	case VK_FORMAT_D32_SFLOAT_S8_UINT:
5535	dst.move(`0`, packed[`0`]);
5536	// Fill remaining channels with 0,0,1 (of the correct type)
5537	dst.move(`1`, SIMD::Float (`0`));
5538	dst.move(`2`, SIMD::Float (`0`));
5539	dst.move(`3`, SIMD::Float (`1`));
5540	break;
5541	case VK_FORMAT_D16_UNORM:
5542	dst.move(`0`, SIMD::Float (packed[`0`] & SIMD::Int (`0xffff`)) * SIMD::Float (`1.0f` / `65535.0f`));
5543	dst.move(`1`, SIMD::Float (`0`));
5544	dst.move(`2`, SIMD::Float (`0`));
5545	dst.move(`3`, SIMD::Float (`1`));
5546	break;
5547	case VK_FORMAT_R16G16B16A16_SINT:
5548	dst.move(`0`, (packed[`0`] << `16`) >> `16`);
5549	dst.move(`1`, (packed[`0`]) >> `16`);
5550	dst.move(`2`, (packed[`1`] << `16`) >> `16`);
5551	dst.move(`3`, (packed[`1`]) >> `16`);
5552	break;
5553	case VK_FORMAT_R16G16B16A16_UINT:
5554	dst.move(`0`, packed[`0`] & SIMD::Int (`0xffff`));
5555	dst.move(`1`, (packed[`0`] >> `16`) & SIMD::Int (`0xffff`));
5556	dst.move(`2`, packed[`1`] & SIMD::Int (`0xffff`));
5557	dst.move(`3`, (packed[`1`] >> `16`) & SIMD::Int (`0xffff`));
5558	break;
5559	case VK_FORMAT_R16G16B16A16_SFLOAT:
5560	dst.move(`0`, halfToFloatBits(As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0x0000FFFF`)));
5561	dst.move(`1`, halfToFloatBits((As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0xFFFF0000`)) >> `16`));
5562	dst.move(`2`, halfToFloatBits(As<SIMD::UInt>(packed[`1`]) & SIMD::UInt (`0x0000FFFF`)));
5563	dst.move(`3`, halfToFloatBits((As<SIMD::UInt>(packed[`1`]) & SIMD::UInt (`0xFFFF0000`)) >> `16`));
5564	break;
5565	case VK_FORMAT_R8G8B8A8_SNORM:
5566	dst.move(`0`, Min(Max(SIMD::Float (((packed[`0`]<<`24`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
5567	dst.move(`1`, Min(Max(SIMD::Float (((packed[`0`]<<`16`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
5568	dst.move(`2`, Min(Max(SIMD::Float (((packed[`0`]<<`8`) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
5569	dst.move(`3`, Min(Max(SIMD::Float (((packed[`0`]) & SIMD::Int (`0xFF000000`))) * SIMD::Float (`1.0f` / float(`0x7f000000`)), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)));
5570	break;
5571	case VK_FORMAT_R8G8B8A8_UNORM:
5572	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
5573	dst.move(`0`, SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5574	dst.move(`1`, SIMD::Float (((packed[`0`]>>`8`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5575	dst.move(`2`, SIMD::Float (((packed[`0`]>>`16`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5576	dst.move(`3`, SIMD::Float (((packed[`0`]>>`24`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5577	break;
5578	case VK_FORMAT_R8G8B8A8_SRGB:
5579	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
5580	dst.move(`0`, ::sRGBtoLinear(SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5581	dst.move(`1`, ::sRGBtoLinear(SIMD::Float (((packed[`0`]>>`8`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5582	dst.move(`2`, ::sRGBtoLinear(SIMD::Float (((packed[`0`]>>`16`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5583	dst.move(`3`, SIMD::Float (((packed[`0`]>>`24`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5584	break;
5585	case VK_FORMAT_B8G8R8A8_UNORM:
5586	dst.move(`0`, SIMD::Float (((packed[`0`]>>`16`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5587	dst.move(`1`, SIMD::Float (((packed[`0`]>>`8`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5588	dst.move(`2`, SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5589	dst.move(`3`, SIMD::Float (((packed[`0`]>>`24`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5590	break;
5591	case VK_FORMAT_B8G8R8A8_SRGB:
5592	dst.move(`0`, ::sRGBtoLinear(SIMD::Float (((packed[`0`]>>`16`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5593	dst.move(`1`, ::sRGBtoLinear(SIMD::Float (((packed[`0`]>>`8`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5594	dst.move(`2`, ::sRGBtoLinear(SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`)));
5595	dst.move(`3`, SIMD::Float (((packed[`0`]>>`24`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5596	break;
5597	case VK_FORMAT_R8G8B8A8_UINT:
5598	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
5599	dst.move(`0`, (As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0xFF`)));
5600	dst.move(`1`, ((As<SIMD::UInt>(packed[`0`])>>`8`) & SIMD::UInt (`0xFF`)));
5601	dst.move(`2`, ((As<SIMD::UInt>(packed[`0`])>>`16`) & SIMD::UInt (`0xFF`)));
5602	dst.move(`3`, ((As<SIMD::UInt>(packed[`0`])>>`24`) & SIMD::UInt (`0xFF`)));
5603	break;
5604	case VK_FORMAT_R8G8B8A8_SINT:
5605	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
5606	dst.move(`0`, (packed[`0`] << `24`) >> `24`);
5607	dst.move(`1`, (packed[`0`] << `16`) >> `24`);
5608	dst.move(`2`, (packed[`0`] << `8`) >> `24`);
5609	dst.move(`3`, (packed[`0`]) >> `24`);
5610	break;
5611	case VK_FORMAT_R8_UNORM:
5612	dst.move(`0`, SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5613	dst.move(`1`, SIMD::Float (`0`));
5614	dst.move(`2`, SIMD::Float (`0`));
5615	dst.move(`3`, SIMD::Float (`1`));
5616	break;
5617	case VK_FORMAT_R8_UINT:
5618	case VK_FORMAT_S8_UINT:
5619	dst.move(`0`, (As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0xFF`)));
5620	dst.move(`1`, SIMD::UInt (`0`));
5621	dst.move(`2`, SIMD::UInt (`0`));
5622	dst.move(`3`, SIMD::UInt (`1`));
5623	break;
5624	case VK_FORMAT_R8_SINT:
5625	dst.move(`0`, (packed[`0`] << `24`) >> `24`);
5626	dst.move(`1`, SIMD::Int (`0`));
5627	dst.move(`2`, SIMD::Int (`0`));
5628	dst.move(`3`, SIMD::Int (`1`));
5629	break;
5630	case VK_FORMAT_R8G8_UNORM:
5631	dst.move(`0`, SIMD::Float ((packed[`0`] & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5632	dst.move(`1`, SIMD::Float (((packed[`0`]>>`8`) & SIMD::Int (`0xFF`))) * SIMD::Float (`1.0f` / `255.f`));
5633	dst.move(`2`, SIMD::Float (`0`));
5634	dst.move(`3`, SIMD::Float (`1`));
5635	break;
5636	case VK_FORMAT_R8G8_UINT:
5637	dst.move(`0`, (As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0xFF`)));
5638	dst.move(`1`, ((As<SIMD::UInt>(packed[`0`])>>`8`) & SIMD::UInt (`0xFF`)));
5639	dst.move(`2`, SIMD::UInt (`0`));
5640	dst.move(`3`, SIMD::UInt (`1`));
5641	break;
5642	case VK_FORMAT_R8G8_SINT:
5643	dst.move(`0`, (packed[`0`] << `24`) >> `24`);
5644	dst.move(`1`, (packed[`0`] << `16`) >> `24`);
5645	dst.move(`2`, SIMD::Int (`0`));
5646	dst.move(`3`, SIMD::Int (`1`));
5647	break;
5648	case VK_FORMAT_R16_SFLOAT:
5649	dst.move(`0`, halfToFloatBits(As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0x0000FFFF`)));
5650	dst.move(`1`, SIMD::Float (`0`));
5651	dst.move(`2`, SIMD::Float (`0`));
5652	dst.move(`3`, SIMD::Float (`1`));
5653	break;
5654	case VK_FORMAT_R16_UINT:
5655	dst.move(`0`, packed[`0`] & SIMD::Int (`0xffff`));
5656	dst.move(`1`, SIMD::UInt (`0`));
5657	dst.move(`2`, SIMD::UInt (`0`));
5658	dst.move(`3`, SIMD::UInt (`1`));
5659	break;
5660	case VK_FORMAT_R16_SINT:
5661	dst.move(`0`, (packed[`0`] << `16`) >> `16`);
5662	dst.move(`1`, SIMD::Int (`0`));
5663	dst.move(`2`, SIMD::Int (`0`));
5664	dst.move(`3`, SIMD::Int (`1`));
5665	break;
5666	case VK_FORMAT_R16G16_SFLOAT:
5667	dst.move(`0`, halfToFloatBits(As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0x0000FFFF`)));
5668	dst.move(`1`, halfToFloatBits((As<SIMD::UInt>(packed[`0`]) & SIMD::UInt (`0xFFFF0000`)) >> `16`));
5669	dst.move(`2`, SIMD::Float (`0`));
5670	dst.move(`3`, SIMD::Float (`1`));
5671	break;
5672	case VK_FORMAT_R16G16_UINT:
5673	dst.move(`0`, packed[`0`] & SIMD::Int (`0xffff`));
5674	dst.move(`1`, (packed[`0`] >> `16`) & SIMD::Int (`0xffff`));
5675	dst.move(`2`, SIMD::UInt (`0`));
5676	dst.move(`3`, SIMD::UInt (`1`));
5677	break;
5678	case VK_FORMAT_R16G16_SINT:
5679	dst.move(`0`, (packed[`0`] << `16`) >> `16`);
5680	dst.move(`1`, (packed[`0`]) >> `16`);
5681	dst.move(`2`, SIMD::Int (`0`));
5682	dst.move(`3`, SIMD::Int (`1`));
5683	break;
5684	case VK_FORMAT_R32G32_SINT:
5685	case VK_FORMAT_R32G32_UINT:
5686	dst.move(`0`, packed[`0`]);
5687	dst.move(`1`, packed[`1`]);
5688	dst.move(`2`, SIMD::Int (`0`));
5689	dst.move(`3`, SIMD::Int (`1`));
5690	break;
5691	case VK_FORMAT_R32G32_SFLOAT:
5692	dst.move(`0`, packed[`0`]);
5693	dst.move(`1`, packed[`1`]);
5694	dst.move(`2`, SIMD::Float (`0`));
5695	dst.move(`3`, SIMD::Float (`1`));
5696	break;
5697	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
5698	dst.move(`0`, (packed[`0`]) & SIMD::Int (`0x3FF`));
5699	dst.move(`1`, (packed[`0`] >> `10`) & SIMD::Int (`0x3FF`));
5700	dst.move(`2`, (packed[`0`] >> `20`) & SIMD::Int (`0x3FF`));
5701	dst.move(`3`, (packed[`0`] >> `30`) & SIMD::Int (`0x3`));
5702	break;
5703	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
5704	dst.move(`0`, SIMD::Float ((packed[`0`]) & SIMD::Int (`0x3FF`)) * SIMD::Float (`1.0f` / `0x3FF`));
5705	dst.move(`1`, SIMD::Float ((packed[`0`] >> `10`) & SIMD::Int (`0x3FF`)) * SIMD::Float (`1.0f` / `0x3FF`));
5706	dst.move(`2`, SIMD::Float ((packed[`0`] >> `20`) & SIMD::Int (`0x3FF`)) * SIMD::Float (`1.0f` / `0x3FF`));
5707	dst.move(`3`, SIMD::Float ((packed[`0`] >> `30`) & SIMD::Int (`0x3`)) * SIMD::Float (`1.0f` / `0x3`));
5708	break;
5709	case VK_FORMAT_R5G6B5_UNORM_PACK16:
5710	dst.move(`0`, SIMD::Float ((packed[`0`] >> `11`) & SIMD::Int (`0x1F`)) * SIMD::Float (`1.0f` / `0x1F`));
5711	dst.move(`1`, SIMD::Float ((packed[`0`] >> `5`) & SIMD::Int (`0x3F`)) * SIMD::Float (`1.0f` / `0x3F`));
5712	dst.move(`2`, SIMD::Float ((packed[`0`]) & SIMD::Int (`0x1F`)) * SIMD::Float (`1.0f` / `0x1F`));
5713	dst.move(`3`, SIMD::Float (`1`));
5714	break;
5715	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
5716	dst.move(`0`, SIMD::Float ((packed[`0`] >> `10`) & SIMD::Int (`0x1F`)) * SIMD::Float (`1.0f` / `0x1F`));
5717	dst.move(`1`, SIMD::Float ((packed[`0`] >> `5`) & SIMD::Int (`0x1F`)) * SIMD::Float (`1.0f` / `0x1F`));
5718	dst.move(`2`, SIMD::Float ((packed[`0`]) & SIMD::Int (`0x1F`)) * SIMD::Float (`1.0f` / `0x1F`));
5719	dst.move(`3`, SIMD::Float ((packed[`0`] >> `15`) & SIMD::Int (`0x1`)));
5720	break;
5721	default:
5722	UNIMPLEMENTED("VkFormat %d", int(vkFormat));
5723	break;
5724	}
5725
5726	return EmitResult::Continue;
5727	}
5728
5729	SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState state) const*
5730	{
5731	auto imageId = Object::ID (insn.word(`1`));
5732	auto &image = getObject(imageId);
5733	auto &imageType = getType(image.type);
5734
5735	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
5736
5737	// TODO(b/131171141): Not handling any image operands yet.
5738	ASSERT(insn.wordCount() == `4`);
5739
5740	auto coordinate = GenericValue (this, state, insn.word(`2`));
5741	auto texel = GenericValue (this, state, insn.word(`3`));
5742
5743	Pointer<Byte> binding = state->getPointer(imageId).base;
5744	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
5745	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5746
5747	SIMD::Int packed[`4`];
5748	auto numPackedElements = `0u`;
5749	int texelSize = `0`;
5750	auto format = static_cast<spv::ImageFormat>(imageType.definition.word(`8`));
5751	switch (format)
5752	{
5753	case spv::ImageFormatRgba32f:
5754	case spv::ImageFormatRgba32i:
5755	case spv::ImageFormatRgba32ui:
5756	texelSize = `16`;
5757	packed[`0`] = texel.Int(`0`);
5758	packed[`1`] = texel.Int(`1`);
5759	packed[`2`] = texel.Int(`2`);
5760	packed[`3`] = texel.Int(`3`);
5761	numPackedElements = `4`;
5762	break;
5763	case spv::ImageFormatR32f:
5764	case spv::ImageFormatR32i:
5765	case spv::ImageFormatR32ui:
5766	texelSize = `4`;
5767	packed[`0`] = texel.Int(`0`);
5768	numPackedElements = `1`;
5769	break;
5770	case spv::ImageFormatRgba8:
5771	texelSize = `4`;
5772	packed[`0`] = (SIMD::UInt (Round(Min(Max(texel.Float(`0`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) \|
5773	((SIMD::UInt (Round(Min(Max(texel.Float(`1`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `8`) \|
5774	((SIMD::UInt (Round(Min(Max(texel.Float(`2`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `16`) \|
5775	((SIMD::UInt (Round(Min(Max(texel.Float(`3`), SIMD::Float (`0.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`255.0f`)))) << `24`);
5776	numPackedElements = `1`;
5777	break;
5778	case spv::ImageFormatRgba8Snorm:
5779	texelSize = `4`;
5780	packed[`0`] = (SIMD::Int (Round(Min(Max(texel.Float(`0`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
5781	SIMD::Int (`0xFF`)) \|
5782	((SIMD::Int (Round(Min(Max(texel.Float(`1`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
5783	SIMD::Int (`0xFF`)) << `8`) \|
5784	((SIMD::Int (Round(Min(Max(texel.Float(`2`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
5785	SIMD::Int (`0xFF`)) << `16`) \|
5786	((SIMD::Int (Round(Min(Max(texel.Float(`3`), SIMD::Float (-`1.0f`)), SIMD::Float (`1.0f`)) * SIMD::Float (`127.0f`))) &
5787	SIMD::Int (`0xFF`)) << `24`);
5788	numPackedElements = `1`;
5789	break;
5790	case spv::ImageFormatRgba8i:
5791	case spv::ImageFormatRgba8ui:
5792	texelSize = `4`;
5793	packed[`0`] = (SIMD::UInt (texel.UInt(`0`) & SIMD::UInt (`0xff`))) \|
5794	(SIMD::UInt (texel.UInt(`1`) & SIMD::UInt (`0xff`)) << `8`) \|
5795	(SIMD::UInt (texel.UInt(`2`) & SIMD::UInt (`0xff`)) << `16`) \|
5796	(SIMD::UInt (texel.UInt(`3`) & SIMD::UInt (`0xff`)) << `24`);
5797	numPackedElements = `1`;
5798	break;
5799	case spv::ImageFormatRgba16f:
5800	texelSize = `8`;
5801	packed[`0`] = FloatToHalfBits(texel.UInt(`0`), false) \| FloatToHalfBits(texel.UInt(`1`), true);
5802	packed[`1`] = FloatToHalfBits(texel.UInt(`2`), false) \| FloatToHalfBits(texel.UInt(`3`), true);
5803	numPackedElements = `2`;
5804	break;
5805	case spv::ImageFormatRgba16i:
5806	case spv::ImageFormatRgba16ui:
5807	texelSize = `8`;
5808	packed[`0`] = SIMD::UInt (texel.UInt(`0`) & SIMD::UInt (`0xffff`)) \| (SIMD::UInt (texel.UInt(`1`) & SIMD::UInt (`0xffff`)) << `16`);
5809	packed[`1`] = SIMD::UInt (texel.UInt(`2`) & SIMD::UInt (`0xffff`)) \| (SIMD::UInt (texel.UInt(`3`) & SIMD::UInt (`0xffff`)) << `16`);
5810	numPackedElements = `2`;
5811	break;
5812	case spv::ImageFormatRg32f:
5813	case spv::ImageFormatRg32i:
5814	case spv::ImageFormatRg32ui:
5815	texelSize = `8`;
5816	packed[`0`] = texel.Int(`0`);
5817	packed[`1`] = texel.Int(`1`);
5818	numPackedElements = `2`;
5819	break;
5820
5821	case spv::ImageFormatRg16f:
5822	case spv::ImageFormatR11fG11fB10f:
5823	case spv::ImageFormatR16f:
5824	case spv::ImageFormatRgba16:
5825	case spv::ImageFormatRgb10A2:
5826	case spv::ImageFormatRg16:
5827	case spv::ImageFormatRg8:
5828	case spv::ImageFormatR16:
5829	case spv::ImageFormatR8:
5830	case spv::ImageFormatRgba16Snorm:
5831	case spv::ImageFormatRg16Snorm:
5832	case spv::ImageFormatRg8Snorm:
5833	case spv::ImageFormatR16Snorm:
5834	case spv::ImageFormatR8Snorm:
5835	case spv::ImageFormatRg16i:
5836	case spv::ImageFormatRg8i:
5837	case spv::ImageFormatR16i:
5838	case spv::ImageFormatR8i:
5839	case spv::ImageFormatRgb10a2ui:
5840	case spv::ImageFormatRg16ui:
5841	case spv::ImageFormatRg8ui:
5842	case spv::ImageFormatR16ui:
5843	case spv::ImageFormatR8ui:
5844	UNIMPLEMENTED("spv::ImageFormat %d", int(format));
5845	break;
5846
5847	default:
5848	UNREACHABLE("spv::ImageFormat %d", int(format));
5849	break;
5850	}
5851
5852	auto basePtr = SIMD::Pointer (imageBase, imageSizeInBytes);
5853	auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, `0`, false);
5854
5855	// SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
5856	auto robustness = OutOfBoundsBehavior::UndefinedValue;
5857
5858	for (auto i = `0u`; i < numPackedElements; i++)
5859	{
5860	SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask());
5861	texelPtr += sizeof(float);
5862	}
5863
5864	return EmitResult::Continue;
5865	}
5866
5867	SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState state) const*
5868	{
5869	auto &resultType = getType(Type::ID (insn.word(`1`)));
5870	auto imageId = Object::ID (insn.word(`3`));
5871	auto &image = getObject(imageId);
5872	// Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
5873	// Look through to get the actual image type.
5874	auto &imageType = getType(getType(image.type).element);
5875	Object::ID resultId = insn.word(`2`);
5876
5877	ASSERT(imageType.opcode() == spv::OpTypeImage);
5878	ASSERT(resultType.storageClass == spv::StorageClassImage);
5879	ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
5880
5881	auto coordinate = GenericValue (this, state, insn.word(`4`));
5882
5883	Pointer<Byte> binding = state->getPointer(imageId).base;
5884	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
5885	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
5886
5887	auto basePtr = SIMD::Pointer (imageBase, imageSizeInBytes);
5888	auto ptr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, sizeof(uint32_t), `0`, false);
5889
5890	state->createPointer(resultId, ptr);
5891
5892	return EmitResult::Continue;
5893	}
5894
5895	SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState state) const*
5896	{
5897	// Propagate the image pointer in both cases.
5898	// Consumers of OpSampledImage will look through to find the sampler pointer.
5899
5900	Object::ID resultId = insn.word(`2`);
5901	Object::ID imageId = insn.word(`3`);
5902
5903	state->createPointer(resultId, state->getPointer(imageId));
5904
5905	return EmitResult::Continue;
5906	}
5907
5908	SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState state) const*
5909	{
5910	auto &resultType = getType(Type::ID (insn.word(`1`)));
5911	Object::ID resultId = insn.word(`2`);
5912	Object::ID semanticsId = insn.word(`5`);
5913	auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue [`0`]);
5914	auto memoryOrder = MemoryOrder(memorySemantics);
5915	// Where no value is provided (increment/decrement) use an implicit value of 1.
5916	auto value = (insn.wordCount() == `7`) ? GenericValue (this, state, insn.word(`6`)).UInt(`0`) : RValue<SIMD::UInt>(`1`);
5917	auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5918	auto ptr = state->getPointer(insn.word(`3`));
5919	auto ptrOffsets = ptr.offsets();
5920
5921	SIMD::UInt x(`0`);
5922	auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
5923	for (int j = `0`; j < SIMD::Width; j++)
5924	{
5925	If(Extract(mask, j) != `0`)
5926	{
5927	auto offset = Extract(ptrOffsets, j);
5928	auto laneValue = Extract(value, j);
5929	UInt v;
5930	switch (insn.opcode())
5931	{
5932	case spv::OpAtomicIAdd:
5933	case spv::OpAtomicIIncrement:
5934	v = AddAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5935	break;
5936	case spv::OpAtomicISub:
5937	case spv::OpAtomicIDecrement:
5938	v = SubAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5939	break;
5940	case spv::OpAtomicAnd:
5941	v = AndAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5942	break;
5943	case spv::OpAtomicOr:
5944	v = OrAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5945	break;
5946	case spv::OpAtomicXor:
5947	v = XorAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5948	break;
5949	case spv::OpAtomicSMin:
5950	v = As<UInt>(MinAtomic(Pointer<Int>(&ptr.base [offset]), As<Int>(laneValue), memoryOrder));
5951	break;
5952	case spv::OpAtomicSMax:
5953	v = As<UInt>(MaxAtomic(Pointer<Int>(&ptr.base [offset]), As<Int>(laneValue), memoryOrder));
5954	break;
5955	case spv::OpAtomicUMin:
5956	v = MinAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5957	break;
5958	case spv::OpAtomicUMax:
5959	v = MaxAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5960	break;
5961	case spv::OpAtomicExchange:
5962	v = ExchangeAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, memoryOrder);
5963	break;
5964	default:
5965	UNREACHABLE("%s", OpcodeName(insn.opcode()).c_str());
5966	break;
5967	}
5968	x = Insert(x, v, j);
5969	}
5970	}
5971
5972	dst.move(`0`, x);
5973	return EmitResult::Continue;
5974	}
5975
5976	SpirvShader::EmitResult SpirvShader::EmitAtomicCompareExchange(InsnIterator insn, EmitState state) const*
5977	{
5978	// Separate from EmitAtomicOp due to different instruction encoding
5979	auto &resultType = getType(Type::ID (insn.word(`1`)));
5980	Object::ID resultId = insn.word(`2`);
5981
5982	auto memorySemanticsEqual = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(`5`)).constantValue [`0`]);
5983	auto memoryOrderEqual = MemoryOrder(memorySemanticsEqual);
5984	auto memorySemanticsUnequal = static_cast<spv::MemorySemanticsMask>(getObject(insn.word(`6`)).constantValue [`0`]);
5985	auto memoryOrderUnequal = MemoryOrder(memorySemanticsUnequal);
5986
5987	auto value = GenericValue (this, state, insn.word(`7`));
5988	auto comparator = GenericValue (this, state, insn.word(`8`));
5989	auto &dst = state->createIntermediate(resultId, resultType.sizeInComponents);
5990	auto ptr = state->getPointer(insn.word(`3`));
5991	auto ptrOffsets = ptr.offsets();
5992
5993	SIMD::UInt x(`0`);
5994	auto mask = state->activeLaneMask() & state->storesAndAtomicsMask();
5995	for (int j = `0`; j < SIMD::Width; j++)
5996	{
5997	If(Extract(mask, j) != `0`)
5998	{
5999	auto offset = Extract(ptrOffsets, j);
6000	auto laneValue = Extract(value.UInt(`0`), j);
6001	auto laneComparator = Extract(comparator.UInt(`0`), j);
6002	UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base [offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
6003	x = Insert(x, v, j);
6004	}
6005	}
6006
6007	dst.move(`0`, x);
6008	return EmitResult::Continue;
6009	}
6010
6011	SpirvShader::EmitResult SpirvShader::EmitCopyObject(InsnIterator insn, EmitState state) const*
6012	{
6013	auto ty = getType(insn.word(`1`));
6014	auto &dst = state->createIntermediate(insn.word(`2`), ty.sizeInComponents);
6015	auto src = GenericValue (this, state, insn.word(`3`));
6016	for (uint32_t i = `0`; i < ty.sizeInComponents; i++)
6017	{
6018	dst.move(i, src.Int(i));
6019	}
6020	return EmitResult::Continue;
6021	}
6022
6023	SpirvShader::EmitResult SpirvShader::EmitCopyMemory(InsnIterator insn, EmitState state) const*
6024	{
6025	Object::ID dstPtrId = insn.word(`1`);
6026	Object::ID srcPtrId = insn.word(`2`);
6027	auto &dstPtrTy = getType(getObject(dstPtrId).type);
6028	auto &srcPtrTy = getType(getObject(srcPtrId).type);
6029	ASSERT(dstPtrTy.element == srcPtrTy.element);
6030
6031	bool dstInterleavedByLane = IsStorageInterleavedByLane(dstPtrTy.storageClass);
6032	bool srcInterleavedByLane = IsStorageInterleavedByLane(srcPtrTy.storageClass);
6033	auto dstPtr = GetPointerToData(dstPtrId, `0`, state);
6034	auto srcPtr = GetPointerToData(srcPtrId, `0`, state);
6035
6036	std::unordered_map<uint32_t, uint32_t> srcOffsets;
6037
6038	VisitMemoryObject(srcPtrId, [&](uint32_t i, uint32_t srcOffset) { srcOffsets [i] = srcOffset; });
6039
6040	VisitMemoryObject(dstPtrId, [&](uint32_t i, uint32_t dstOffset)
6041	{
6042	auto it = srcOffsets.find(i);
6043	ASSERT(it != srcOffsets.end());
6044	auto srcOffset = it ->second;
6045
6046	auto dst = dstPtr + dstOffset;
6047	auto src = srcPtr + srcOffset;
6048	if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
6049	if (srcInterleavedByLane) { src = interleaveByLane(src); }
6050
6051	// TODO(b/131224163): Optimize based on src/dst storage classes.
6052	auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
6053
6054	auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask());
6055	SIMD::Store(dst, value, robustness, state->activeLaneMask());
6056	});
6057	return EmitResult::Continue;
6058	}
6059
6060	SpirvShader::EmitResult SpirvShader::EmitControlBarrier(InsnIterator insn, EmitState state) const*
6061	{
6062	auto executionScope = spv::Scope(GetConstScalarInt(insn.word(`1`)));
6063	auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(`3`)));
6064	// TODO: We probably want to consider the memory scope here. For now,
6065	// just always emit the full fence.
6066	Fence(semantics);
6067
6068	switch (executionScope)
6069	{
6070	case spv::ScopeWorkgroup:
6071	Yield(YieldResult::ControlBarrier);
6072	break;
6073	case spv::ScopeSubgroup:
6074	break;
6075	default:
6076	// See Vulkan 1.1 spec, Appendix A, Validation Rules within a Module.
6077	UNREACHABLE("Scope for execution must be limited to Workgroup or Subgroup");
6078	break;
6079	}
6080
6081	return EmitResult::Continue;
6082	}
6083
6084	SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState state) const*
6085	{
6086	auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(`2`)));
6087	// TODO: We probably want to consider the memory scope here. For now,
6088	// just always emit the full fence.
6089	Fence(semantics);
6090	return EmitResult::Continue;
6091	}
6092
6093	void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
6094	{
6095	if (semantics == spv::MemorySemanticsMaskNone)
6096	{
6097	return; //no-op
6098	}
6099	rr::Fence(MemoryOrder(semantics));
6100	}
6101
6102	SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState state) const*
6103	{
6104	static_assert(SIMD::Width == `4`, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4");
6105
6106	auto &type = getType(Type::ID (insn.word(`1`)));
6107	Object::ID resultId = insn.word(`2`);
6108	auto scope = spv::Scope(GetConstScalarInt(insn.word(`3`)));
6109	ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
6110
6111	auto &dst = state->createIntermediate(resultId, type.sizeInComponents);
6112
6113	switch (insn.opcode())
6114	{
6115	case spv::OpGroupNonUniformElect:
6116	{
6117	// Result is true only in the active invocation with the lowest id
6118	// in the group, otherwise result is false.
6119	SIMD::Int active = state->activeLaneMask();
6120	// TODO: Would be nice if we could write this as:
6121	// elect = active & ~(active.Oxyz \| active.OOxy \| active.OOOx)
6122	auto v0111 = SIMD::Int (`0`, `0xFFFFFFFF`, `0xFFFFFFFF`, `0xFFFFFFFF`);
6123	auto elect = active & ~(v0111 & (active.xxyz \| active.xxxy \| active.xxxx));
6124	dst.move(`0`, elect);
6125	break;
6126	}
6127
6128	case spv::OpGroupNonUniformAll:
6129	{
6130	GenericValue predicate(this, state, insn.word(`4`));
6131	dst.move(`0`, AndAll(predicate.UInt(`0`) \| ~As<SIMD::UInt>(state->activeLaneMask())));
6132	break;
6133	}
6134
6135	case spv::OpGroupNonUniformAny:
6136	{
6137	GenericValue predicate(this, state, insn.word(`4`));
6138	dst.move(`0`, OrAll(predicate.UInt(`0`) & As<SIMD::UInt>(state->activeLaneMask())));
6139	break;
6140	}
6141
6142	case spv::OpGroupNonUniformAllEqual:
6143	{
6144	GenericValue value(this, state, insn.word(`4`));
6145	auto res = SIMD::UInt (`0xffffffff`);
6146	SIMD::UInt active = As<SIMD::UInt>(state->activeLaneMask());
6147	SIMD::UInt inactive = ~active;
6148	for (auto i = `0u`; i < type.sizeInComponents; i++)
6149	{
6150	SIMD::UInt v = value.UInt(i) & active;
6151	SIMD::UInt filled = v;
6152	for (int j = `0`; j < SIMD::Width - `1`; j++)
6153	{
6154	filled \|= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
6155	}
6156	res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
6157	}
6158	dst.move(`0`, res);
6159	break;
6160	}
6161
6162	case spv::OpGroupNonUniformBroadcast:
6163	{
6164	auto valueId = Object::ID (insn.word(`4`));
6165	auto id = SIMD::Int (GetConstScalarInt(insn.word(`5`)));
6166	GenericValue value(this, state, valueId);
6167	auto mask = CmpEQ(id, SIMD::Int (`0`, `1`, `2`, `3`));
6168	for (auto i = `0u`; i < type.sizeInComponents; i++)
6169	{
6170	dst.move(i, OrAll(value.Int(i) & mask));
6171	}
6172	break;
6173	}
6174
6175	case spv::OpGroupNonUniformBroadcastFirst:
6176	{
6177	auto valueId = Object::ID (insn.word(`4`));
6178	GenericValue value(this, state, valueId);
6179	// Result is true only in the active invocation with the lowest id
6180	// in the group, otherwise result is false.
6181	SIMD::Int active = state->activeLaneMask();
6182	// TODO: Would be nice if we could write this as:
6183	// elect = active & ~(active.Oxyz \| active.OOxy \| active.OOOx)
6184	auto v0111 = SIMD::Int (`0`, `0xFFFFFFFF`, `0xFFFFFFFF`, `0xFFFFFFFF`);
6185	auto elect = active & ~(v0111 & (active.xxyz \| active.xxxy \| active.xxxx));
6186	for (auto i = `0u`; i < type.sizeInComponents; i++)
6187	{
6188	dst.move(i, OrAll(value.Int(i) & elect));
6189	}
6190	break;
6191	}
6192
6193	case spv::OpGroupNonUniformBallot:
6194	{
6195	ASSERT(type.sizeInComponents == `4`);
6196	GenericValue predicate(this, state, insn.word(`4`));
6197	dst.move(`0`, SIMD::Int (SignMask(state->activeLaneMask() & predicate.Int(`0`))));
6198	dst.move(`1`, SIMD::Int (`0`));
6199	dst.move(`2`, SIMD::Int (`0`));
6200	dst.move(`3`, SIMD::Int (`0`));
6201	break;
6202	}
6203
6204	case spv::OpGroupNonUniformInverseBallot:
6205	{
6206	auto valueId = Object::ID (insn.word(`4`));
6207	ASSERT(type.sizeInComponents == `1`);
6208	ASSERT(getType(getObject(valueId).type).sizeInComponents == `4`);
6209	GenericValue value(this, state, valueId);
6210	auto bit = (value.Int(`0`) >> SIMD::Int (`0`, `1`, `2`, `3`)) & SIMD::Int (`1`);
6211	dst.move(`0`, -bit);
6212	break;
6213	}
6214
6215	case spv::OpGroupNonUniformBallotBitExtract:
6216	{
6217	auto valueId = Object::ID (insn.word(`4`));
6218	auto indexId = Object::ID (insn.word(`5`));
6219	ASSERT(type.sizeInComponents == `1`);
6220	ASSERT(getType(getObject(valueId).type).sizeInComponents == `4`);
6221	ASSERT(getType(getObject(indexId).type).sizeInComponents == `1`);
6222	GenericValue value(this, state, valueId);
6223	GenericValue index(this, state, indexId);
6224	auto vecIdx = index.Int(`0`) / SIMD::Int (`32`);
6225	auto bitIdx = index.Int(`0`) & SIMD::Int (`31`);
6226	auto bits = (value.Int(`0`) & CmpEQ(vecIdx, SIMD::Int (`0`))) \|
6227	(value.Int(`1`) & CmpEQ(vecIdx, SIMD::Int (`1`))) \|
6228	(value.Int(`2`) & CmpEQ(vecIdx, SIMD::Int (`2`))) \|
6229	(value.Int(`3`) & CmpEQ(vecIdx, SIMD::Int (`3`)));
6230	dst.move(`0`, -((bits >> bitIdx) & SIMD::Int (`1`)));
6231	break;
6232	}
6233
6234	case spv::OpGroupNonUniformBallotBitCount:
6235	{
6236	auto operation = spv::GroupOperation(insn.word(`4`));
6237	auto valueId = Object::ID (insn.word(`5`));
6238	ASSERT(type.sizeInComponents == `1`);
6239	ASSERT(getType(getObject(valueId).type).sizeInComponents == `4`);
6240	GenericValue value(this, state, valueId);
6241	switch (operation)
6242	{
6243	case spv::GroupOperationReduce:
6244	dst.move(`0`, CountBits(value.UInt(`0`) & SIMD::UInt (`15`)));
6245	break;
6246	case spv::GroupOperationInclusiveScan:
6247	dst.move(`0`, CountBits(value.UInt(`0`) & SIMD::UInt (`1`, `3`, `7`, `15`)));
6248	break;
6249	case spv::GroupOperationExclusiveScan:
6250	dst.move(`0`, CountBits(value.UInt(`0`) & SIMD::UInt (`0`, `1`, `3`, `7`)));
6251	break;
6252	default:
6253	UNSUPPORTED("GroupOperation %d", int(operation));
6254	}
6255	break;
6256	}
6257
6258	case spv::OpGroupNonUniformBallotFindLSB:
6259	{
6260	auto valueId = Object::ID (insn.word(`4`));
6261	ASSERT(type.sizeInComponents == `1`);
6262	ASSERT(getType(getObject(valueId).type).sizeInComponents == `4`);
6263	GenericValue value(this, state, valueId);
6264	dst.move(`0`, Cttz(value.UInt(`0`) & SIMD::UInt (`15`), true));
6265	break;
6266	}
6267
6268	case spv::OpGroupNonUniformBallotFindMSB:
6269	{
6270	auto valueId = Object::ID (insn.word(`4`));
6271	ASSERT(type.sizeInComponents == `1`);
6272	ASSERT(getType(getObject(valueId).type).sizeInComponents == `4`);
6273	GenericValue value(this, state, valueId);
6274	dst.move(`0`, SIMD::UInt (`31`) - Ctlz(value.UInt(`0`) & SIMD::UInt (`15`), false));
6275	break;
6276	}
6277
6278	case spv::OpGroupNonUniformShuffle:
6279	{
6280	GenericValue value(this, state, insn.word(`4`));
6281	GenericValue id(this, state, insn.word(`5`));
6282	auto x = CmpEQ(SIMD::Int (`0`), id.Int(`0`));
6283	auto y = CmpEQ(SIMD::Int (`1`), id.Int(`0`));
6284	auto z = CmpEQ(SIMD::Int (`2`), id.Int(`0`));
6285	auto w = CmpEQ(SIMD::Int (`3`), id.Int(`0`));
6286	for (auto i = `0u`; i < type.sizeInComponents; i++)
6287	{
6288	SIMD::Int v = value.Int(i);
6289	dst.move(i, (x & v.xxxx) \| (y & v.yyyy) \| (z & v.zzzz) \| (w & v.wwww));
6290	}
6291	break;
6292	}
6293
6294	case spv::OpGroupNonUniformShuffleXor:
6295	{
6296	GenericValue value(this, state, insn.word(`4`));
6297	GenericValue mask(this, state, insn.word(`5`));
6298	auto x = CmpEQ(SIMD::Int (`0`), SIMD::Int (`0`, `1`, `2`, `3`) ^ mask.Int(`0`));
6299	auto y = CmpEQ(SIMD::Int (`1`), SIMD::Int (`0`, `1`, `2`, `3`) ^ mask.Int(`0`));
6300	auto z = CmpEQ(SIMD::Int (`2`), SIMD::Int (`0`, `1`, `2`, `3`) ^ mask.Int(`0`));
6301	auto w = CmpEQ(SIMD::Int (`3`), SIMD::Int (`0`, `1`, `2`, `3`) ^ mask.Int(`0`));
6302	for (auto i = `0u`; i < type.sizeInComponents; i++)
6303	{
6304	SIMD::Int v = value.Int(i);
6305	dst.move(i, (x & v.xxxx) \| (y & v.yyyy) \| (z & v.zzzz) \| (w & v.wwww));
6306	}
6307	break;
6308	}
6309
6310	case spv::OpGroupNonUniformShuffleUp:
6311	{
6312	GenericValue value(this, state, insn.word(`4`));
6313	GenericValue delta(this, state, insn.word(`5`));
6314	auto d0 = CmpEQ(SIMD::Int (`0`), delta.Int(`0`));
6315	auto d1 = CmpEQ(SIMD::Int (`1`), delta.Int(`0`));
6316	auto d2 = CmpEQ(SIMD::Int (`2`), delta.Int(`0`));
6317	auto d3 = CmpEQ(SIMD::Int (`3`), delta.Int(`0`));
6318	for (auto i = `0u`; i < type.sizeInComponents; i++)
6319	{
6320	SIMD::Int v = value.Int(i);
6321	dst.move(i, (d0 & v.xyzw) \| (d1 & v.xxyz) \| (d2 & v.xxxy) \| (d3 & v.xxxx));
6322	}
6323	break;
6324	}
6325
6326	case spv::OpGroupNonUniformShuffleDown:
6327	{
6328	GenericValue value(this, state, insn.word(`4`));
6329	GenericValue delta(this, state, insn.word(`5`));
6330	auto d0 = CmpEQ(SIMD::Int (`0`), delta.Int(`0`));
6331	auto d1 = CmpEQ(SIMD::Int (`1`), delta.Int(`0`));
6332	auto d2 = CmpEQ(SIMD::Int (`2`), delta.Int(`0`));
6333	auto d3 = CmpEQ(SIMD::Int (`3`), delta.Int(`0`));
6334	for (auto i = `0u`; i < type.sizeInComponents; i++)
6335	{
6336	SIMD::Int v = value.Int(i);
6337	dst.move(i, (d0 & v.xyzw) \| (d1 & v.yzww) \| (d2 & v.zwww) \| (d3 & v.wwww));
6338	}
6339	break;
6340	}
6341
6342	default:
6343	UNIMPLEMENTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode()).c_str());
6344	}
6345	return EmitResult::Continue;
6346	}
6347
6348	SpirvShader::EmitResult SpirvShader::EmitArrayLength(InsnIterator insn, EmitState state) const*
6349	{
6350	auto resultTyId = Type::ID (insn.word(`1`));
6351	auto resultId = Object::ID (insn.word(`2`));
6352	auto structPtrId = Object::ID (insn.word(`3`));
6353	auto arrayFieldIdx = insn.word(`4`);
6354
6355	auto &resultType = getType(resultTyId);
6356	ASSERT(resultType.sizeInComponents == `1`);
6357	ASSERT(resultType.definition.opcode() == spv::OpTypeInt);
6358
6359	auto &structPtrTy = getType(getObject(structPtrId).type);
6360	auto &structTy = getType(structPtrTy.element);
6361	auto &arrayTy = getType(structTy.definition.word(`2` + arrayFieldIdx));
6362	ASSERT(arrayTy.definition.opcode() == spv::OpTypeRuntimeArray);
6363	auto &arrayElTy = getType(arrayTy.element);
6364
6365	auto &result = state->createIntermediate(resultId, `1`);
6366	auto structBase = GetPointerToData(structPtrId, `0`, state);
6367
6368	Decorations d = {};
6369	ApplyDecorationsForIdMember(&d, structPtrTy.element, arrayFieldIdx);
6370	ASSERT(d.HasOffset);
6371
6372	auto arrayBase = structBase + d.Offset;
6373	auto arraySizeInBytes = SIMD::Int (arrayBase.limit()) - arrayBase.offsets();
6374	auto arrayLength = arraySizeInBytes / SIMD::Int (arrayElTy.sizeInComponents * sizeof(float));
6375
6376	result.move(`0`, SIMD::Int (arrayLength));
6377
6378	return EmitResult::Continue;
6379	}
6380
6381	uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
6382	{
6383	auto &scopeObj = getObject(id);
6384	ASSERT(scopeObj.kind == Object::Kind::Constant);
6385	ASSERT(getType(scopeObj.type).sizeInComponents == `1`);
6386	return scopeObj.constantValue [`0`];
6387	}
6388
6389	void SpirvShader::EvalSpecConstantOp(InsnIterator insn)
6390	{
6391	auto opcode = static_cast<spv::Op>(insn.word(`3`));
6392
6393	switch (opcode)
6394	{
6395	case spv::OpIAdd:
6396	case spv::OpISub:
6397	case spv::OpIMul:
6398	case spv::OpUDiv:
6399	case spv::OpSDiv:
6400	case spv::OpUMod:
6401	case spv::OpSMod:
6402	case spv::OpSRem:
6403	case spv::OpShiftRightLogical:
6404	case spv::OpShiftRightArithmetic:
6405	case spv::OpShiftLeftLogical:
6406	case spv::OpBitwiseOr:
6407	case spv::OpLogicalOr:
6408	case spv::OpBitwiseAnd:
6409	case spv::OpLogicalAnd:
6410	case spv::OpBitwiseXor:
6411	case spv::OpLogicalEqual:
6412	case spv::OpIEqual:
6413	case spv::OpLogicalNotEqual:
6414	case spv::OpINotEqual:
6415	case spv::OpULessThan:
6416	case spv::OpSLessThan:
6417	case spv::OpUGreaterThan:
6418	case spv::OpSGreaterThan:
6419	case spv::OpULessThanEqual:
6420	case spv::OpSLessThanEqual:
6421	case spv::OpUGreaterThanEqual:
6422	case spv::OpSGreaterThanEqual:
6423	EvalSpecConstantBinaryOp(insn);
6424	break;
6425
6426	case spv::OpSConvert:
6427	case spv::OpFConvert:
6428	case spv::OpUConvert:
6429	case spv::OpSNegate:
6430	case spv::OpNot:
6431	case spv::OpLogicalNot:
6432	case spv::OpQuantizeToF16:
6433	EvalSpecConstantUnaryOp(insn);
6434	break;
6435
6436	case spv::OpSelect:
6437	{
6438	auto &result = CreateConstant(insn);
6439	auto const &cond = getObject(insn.word(`4`));
6440	auto condIsScalar = (getType(cond.type).sizeInComponents == `1`);
6441	auto const &left = getObject(insn.word(`5`));
6442	auto const &right = getObject(insn.word(`6`));
6443
6444	for (auto i = `0u`; i < getType(result.type).sizeInComponents; i++)
6445	{
6446	auto sel = cond.constantValue [condIsScalar ? `0` : i];
6447	result.constantValue [i] = sel ? left.constantValue [i] : right.constantValue [i];
6448	}
6449	break;
6450	}
6451
6452	case spv::OpCompositeExtract:
6453	{
6454	auto &result = CreateConstant(insn);
6455	auto const &compositeObject = getObject(insn.word(`4`));
6456	auto firstComponent = WalkLiteralAccessChain(compositeObject.type, insn.wordCount() - `5`, insn.wordPointer(`5`));
6457
6458	for (auto i = `0u`; i < getType(result.type).sizeInComponents; i++)
6459	{
6460	result.constantValue [i] = compositeObject.constantValue [firstComponent + i];
6461	}
6462	break;
6463	}
6464
6465	case spv::OpCompositeInsert:
6466	{
6467	auto &result = CreateConstant(insn);
6468	auto const &newPart = getObject(insn.word(`4`));
6469	auto const &oldObject = getObject(insn.word(`5`));
6470	auto firstNewComponent = WalkLiteralAccessChain(result.type, insn.wordCount() - `6`, insn.wordPointer(`6`));
6471
6472	// old components before
6473	for (auto i = `0u`; i < firstNewComponent; i++)
6474	{
6475	result.constantValue [i] = oldObject.constantValue [i];
6476	}
6477	// new part
6478	for (auto i = `0u`; i < getType(newPart.type).sizeInComponents; i++)
6479	{
6480	result.constantValue [firstNewComponent + i] = newPart.constantValue [i];
6481	}
6482	// old components after
6483	for (auto i = firstNewComponent + getType(newPart.type).sizeInComponents; i < getType(result.type).sizeInComponents; i++)
6484	{
6485	result.constantValue [i] = oldObject.constantValue [i];
6486	}
6487	break;
6488	}
6489
6490	case spv::OpVectorShuffle:
6491	{
6492	auto &result = CreateConstant(insn);
6493	auto const &firstHalf = getObject(insn.word(`4`));
6494	auto const &secondHalf = getObject(insn.word(`5`));
6495
6496	for (auto i = `0u`; i < getType(result.type).sizeInComponents; i++)
6497	{
6498	auto selector = insn.word(`6` + i);
6499	if (selector == static_cast<uint32_t>(-`1`))
6500	{
6501	// Undefined value, we'll use zero
6502	result.constantValue [i] = `0`;
6503	}
6504	else if (selector < getType(firstHalf.type).sizeInComponents)
6505	{
6506	result.constantValue [i] = firstHalf.constantValue [selector];
6507	}
6508	else
6509	{
6510	result.constantValue [i] = secondHalf.constantValue [selector - getType(firstHalf.type).sizeInComponents];
6511	}
6512	}
6513	break;
6514	}
6515
6516	default:
6517	// Other spec constant ops are possible, but require capabilities that are
6518	// not exposed in our Vulkan implementation (eg Kernel), so we should never
6519	// get here for correct shaders.
6520	UNSUPPORTED("EvalSpecConstantOp op: %s", OpcodeName(opcode).c_str());
6521	}
6522	}
6523
6524	void SpirvShader::EvalSpecConstantUnaryOp(InsnIterator insn)
6525	{
6526	auto &result = CreateConstant(insn);
6527
6528	auto opcode = static_cast<spv::Op>(insn.word(`3`));
6529	auto const &lhs = getObject(insn.word(`4`));
6530	auto size = getType(lhs.type).sizeInComponents;
6531
6532	for (auto i = `0u`; i < size; i++)
6533	{
6534	auto &v = result.constantValue [i];
6535	auto l = lhs.constantValue [i];
6536
6537	switch (opcode)
6538	{
6539	case spv::OpSConvert:
6540	case spv::OpFConvert:
6541	case spv::OpUConvert:
6542	UNREACHABLE("Not possible until we have multiple bit widths");
6543	break;
6544
6545	case spv::OpSNegate:
6546	v = -(int)l;
6547	break;
6548	case spv::OpNot:
6549	case spv::OpLogicalNot:
6550	v = ~l;
6551	break;
6552
6553	case spv::OpQuantizeToF16:
6554	{
6555	// Can do this nicer with host code, but want to perfectly mirror the reactor code we emit.
6556	auto abs = bit_cast<float>(l & `0x7FFFFFFF`);
6557	auto sign = l & `0x80000000`;
6558	auto isZero = abs < `0.000061035f` ? ~`0u` : `0u`;
6559	auto isInf = abs > `65504.0f` ? ~`0u` : `0u`;
6560	auto isNaN = (abs != abs) ? ~`0u` : `0u`;
6561	auto isInfOrNan = isInf \| isNaN;
6562	v = l & `0xFFFFE000`;
6563	v &= ~isZero \| `0x80000000`;
6564	v = sign \| (isInfOrNan & `0x7F800000`) \| (~isInfOrNan & v);
6565	v \|= isNaN & `0x400000`;
6566	break;
6567	}
6568	default:
6569	UNREACHABLE("EvalSpecConstantUnaryOp op: %s", OpcodeName(opcode).c_str());
6570	}
6571	}
6572	}
6573
6574	void SpirvShader::EvalSpecConstantBinaryOp(InsnIterator insn)
6575	{
6576	auto &result = CreateConstant(insn);
6577
6578	auto opcode = static_cast<spv::Op>(insn.word(`3`));
6579	auto const &lhs = getObject(insn.word(`4`));
6580	auto const &rhs = getObject(insn.word(`5`));
6581	auto size = getType(lhs.type).sizeInComponents;
6582
6583	for (auto i = `0u`; i < size; i++)
6584	{
6585	auto &v = result.constantValue [i];
6586	auto l = lhs.constantValue [i];
6587	auto r = rhs.constantValue [i];
6588
6589	switch (opcode)
6590	{
6591	case spv::OpIAdd:
6592	v = l + r;
6593	break;
6594	case spv::OpISub:
6595	v = l - r;
6596	break;
6597	case spv::OpIMul:
6598	v = l * r;
6599	break;
6600	case spv::OpUDiv:
6601	v = (r == `0`) ? `0` : l / r;
6602	break;
6603	case spv::OpUMod:
6604	v = (r == `0`) ? `0` : l % r;
6605	break;
6606	case spv::OpSDiv:
6607	if (r == `0`) r = UINT32_MAX;
6608	if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6609	v = static_cast<int32_t>(l) / static_cast<int32_t>(r);
6610	break;
6611	case spv::OpSRem:
6612	if (r == `0`) r = UINT32_MAX;
6613	if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6614	v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
6615	break;
6616	case spv::OpSMod:
6617	if (r == `0`) r = UINT32_MAX;
6618	if (l == static_cast<uint32_t>(INT32_MIN)) l = UINT32_MAX;
6619	// Test if a signed-multiply would be negative.
6620	v = static_cast<int32_t>(l) % static_cast<int32_t>(r);
6621	if ((v & `0x80000000`) != (r & `0x80000000`))
6622	v += r;
6623	break;
6624	case spv::OpShiftRightLogical:
6625	v = l >> r;
6626	break;
6627	case spv::OpShiftRightArithmetic:
6628	v = static_cast<int32_t>(l) >> r;
6629	break;
6630	case spv::OpShiftLeftLogical:
6631	v = l << r;
6632	break;
6633	case spv::OpBitwiseOr:
6634	case spv::OpLogicalOr:
6635	v = l \| r;
6636	break;
6637	case spv::OpBitwiseAnd:
6638	case spv::OpLogicalAnd:
6639	v = l & r;
6640	break;
6641	case spv::OpBitwiseXor:
6642	v = l ^ r;
6643	break;
6644	case spv::OpLogicalEqual:
6645	case spv::OpIEqual:
6646	v = (l == r) ? ~`0u` : `0u`;
6647	break;
6648	case spv::OpLogicalNotEqual:
6649	case spv::OpINotEqual:
6650	v = (l != r) ? ~`0u` : `0u`;
6651	break;
6652	case spv::OpULessThan:
6653	v = l < r ? ~`0u` : `0u`;
6654	break;
6655	case spv::OpSLessThan:
6656	v = static_cast<int32_t>(l) < static_cast<int32_t>(r) ? ~`0u` : `0u`;
6657	break;
6658	case spv::OpUGreaterThan:
6659	v = l > r ? ~`0u` : `0u`;
6660	break;
6661	case spv::OpSGreaterThan:
6662	v = static_cast<int32_t>(l) > static_cast<int32_t>(r) ? ~`0u` : `0u`;
6663	break;
6664	case spv::OpULessThanEqual:
6665	v = l <= r ? ~`0u` : `0u`;
6666	break;
6667	case spv::OpSLessThanEqual:
6668	v = static_cast<int32_t>(l) <= static_cast<int32_t>(r) ? ~`0u` : `0u`;
6669	break;
6670	case spv::OpUGreaterThanEqual:
6671	v = l >= r ? ~`0u` : `0u`;
6672	break;
6673	case spv::OpSGreaterThanEqual:
6674	v = static_cast<int32_t>(l) >= static_cast<int32_t>(r) ? ~`0u` : `0u`;
6675	break;
6676	default:
6677	UNREACHABLE("EvalSpecConstantBinaryOp op: %s", OpcodeName(opcode).c_str());
6678	}
6679	}
6680	}
6681
6682	void SpirvShader::emitEpilog(SpirvRoutine routine) const*
6683	{
6684	for (auto insn : *this)
6685	{
6686	switch (insn.opcode())
6687	{
6688	case spv::OpVariable:
6689	{
6690	Object::ID resultId = insn.word(`2`);
6691	auto &object = getObject(resultId);
6692	auto &objectTy = getType(object.type);
6693	if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
6694	{
6695	auto &dst = routine->getVariable(resultId);
6696	int offset = `0`;
6697	VisitInterface(resultId,
6698	[&](Decorations const &d, AttribType type) {
6699	auto scalarSlot = d.Location << `2` \| d.Component;
6700	routine->outputs [scalarSlot] = dst [offset++];
6701	});
6702	}
6703	break;
6704	}
6705	default:
6706	break;
6707	}
6708	}
6709
6710	// Clear phis that are no longer used. This serves two purposes:
6711	// (1) The phi rr::Variables are destructed, preventing pointless
6712	// materialization.
6713	// (2) Frees memory that will never be used again.
6714	routine->phis.clear();
6715	}
6716
6717	SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
6718	{
6719	// Default to a Simple, this may change later.
6720	kind = Block::Simple;
6721
6722	// Walk the instructions to find the last two of the block.
6723	InsnIterator insns[`2`];
6724	for (auto insn : *this)
6725	{
6726	insns[`0`] = insns[`1`];
6727	insns[`1`] = insn;
6728	}
6729
6730	switch (insns[`1`].opcode())
6731	{
6732	case spv::OpBranch:
6733	branchInstruction = insns[`1`];
6734	outs.emplace(Block::ID (branchInstruction.word(`1`)));
6735
6736	switch (insns[`0`].opcode())
6737	{
6738	case spv::OpLoopMerge:
6739	kind = Loop;
6740	mergeInstruction = insns[`0`];
6741	mergeBlock = Block::ID (mergeInstruction.word(`1`));
6742	continueTarget = Block::ID (mergeInstruction.word(`2`));
6743	break;
6744
6745	default:
6746	kind = Block::Simple;
6747	break;
6748	}
6749	break;
6750
6751	case spv::OpBranchConditional:
6752	branchInstruction = insns[`1`];
6753	outs.emplace(Block::ID (branchInstruction.word(`2`)));
6754	outs.emplace(Block::ID (branchInstruction.word(`3`)));
6755
6756	switch (insns[`0`].opcode())
6757	{
6758	case spv::OpSelectionMerge:
6759	kind = StructuredBranchConditional;
6760	mergeInstruction = insns[`0`];
6761	mergeBlock = Block::ID (mergeInstruction.word(`1`));
6762	break;
6763
6764	case spv::OpLoopMerge:
6765	kind = Loop;
6766	mergeInstruction = insns[`0`];
6767	mergeBlock = Block::ID (mergeInstruction.word(`1`));
6768	continueTarget = Block::ID (mergeInstruction.word(`2`));
6769	break;
6770
6771	default:
6772	kind = UnstructuredBranchConditional;
6773	break;
6774	}
6775	break;
6776
6777	case spv::OpSwitch:
6778	branchInstruction = insns[`1`];
6779	outs.emplace(Block::ID (branchInstruction.word(`2`)));
6780	for (uint32_t w = `4`; w < branchInstruction.wordCount(); w += `2`)
6781	{
6782	outs.emplace(Block::ID (branchInstruction.word(w)));
6783	}
6784
6785	switch (insns[`0`].opcode())
6786	{
6787	case spv::OpSelectionMerge:
6788	kind = StructuredSwitch;
6789	mergeInstruction = insns[`0`];
6790	mergeBlock = Block::ID (mergeInstruction.word(`1`));
6791	break;
6792
6793	default:
6794	kind = UnstructuredSwitch;
6795	break;
6796	}
6797	break;
6798
6799	default:
6800	break;
6801	}
6802	}
6803
6804	void SpirvShader::Function::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable) const
6805	{
6806	if (reachable.count(id) == `0`)
6807	{
6808	reachable.emplace(id);
6809	for (auto out : getBlock(id).outs)
6810	{
6811	TraverseReachableBlocks(out, reachable);
6812	}
6813	}
6814	}
6815
6816	void SpirvShader::Function::AssignBlockFields()
6817	{
6818	Block::Set reachable;
6819	TraverseReachableBlocks(entry, reachable);
6820
6821	for (auto &it : blocks)
6822	{
6823	auto &blockId = it.first;
6824	auto &block = it.second;
6825	if (reachable.count(blockId) > `0`)
6826	{
6827	for (auto &outId : it.second.outs)
6828	{
6829	auto outIt = blocks.find(outId);
6830	ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
6831	auto &out = outIt ->second;
6832	out.ins.emplace(blockId);
6833	}
6834	if (block.kind == Block::Loop)
6835	{
6836	auto mergeIt = blocks.find(block.mergeBlock);
6837	ASSERT_MSG(mergeIt != blocks.end(), "Loop block %d has a non-existent merge block %d", blockId.value(), block.mergeBlock.value());
6838	mergeIt ->second.isLoopMerge = true;
6839	}
6840	}
6841	}
6842	}
6843
6844	void SpirvShader::Function::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
6845	{
6846	auto block = getBlock(blockId);
6847	for (auto dep : block.ins)
6848	{
6849	if (block.kind != Block::Loop \|\| // if not a loop...
6850	!ExistsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
6851	{
6852	f (dep);
6853	}
6854	}
6855	}
6856
6857	bool SpirvShader::Function::ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
6858	{
6859	// TODO: Optimize: This can be cached on the block.
6860	Block::Set seen;
6861	seen.emplace(notPassingThrough);
6862
6863	std::queue<Block::ID> pending;
6864	pending.emplace(from);
6865
6866	while (pending.size() > `0`)
6867	{
6868	auto id = pending.front();
6869	pending.pop();
6870	for (auto out : getBlock(id).outs)
6871	{
6872	if (seen.count(out) != `0`) { continue; }
6873	if (out == to) { return true; }
6874	pending.emplace(out);
6875	}
6876	seen.emplace(id);
6877	}
6878
6879	return false;
6880	}
6881
6882	void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
6883	{
6884	addActiveLaneMaskEdge(block, to, mask & activeLaneMask());
6885	}
6886
6887	void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
6888	{
6889	auto edge = Block::Edge{from, to};
6890	auto it = edgeActiveLaneMasks.find(edge);
6891	if (it == edgeActiveLaneMasks.end())
6892	{
6893	edgeActiveLaneMasks.emplace(edge, mask);
6894	}
6895	else
6896	{
6897	auto combined = it ->second \| mask;
6898	edgeActiveLaneMasks.erase(edge);
6899	edgeActiveLaneMasks.emplace(edge, combined);
6900	}
6901	}
6902
6903	RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState state, Block::ID from, Block::ID to) const*
6904	{
6905	auto edge = Block::Edge{from, to};
6906	auto it = state->edgeActiveLaneMasks.find(edge);
6907	ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
6908	return it ->second;
6909	}
6910
6911	VkShaderStageFlagBits SpirvShader::executionModelToStage(spv::ExecutionModel model)
6912	{
6913	switch (model)
6914	{
6915	case spv::ExecutionModelVertex: return VK_SHADER_STAGE_VERTEX_BIT;
6916	// case spv::ExecutionModelTessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
6917	// case spv::ExecutionModelTessellationEvaluation: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
6918	// case spv::ExecutionModelGeometry: return VK_SHADER_STAGE_GEOMETRY_BIT;
6919	case spv::ExecutionModelFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
6920	case spv::ExecutionModelGLCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
6921	// case spv::ExecutionModelKernel: return VkShaderStageFlagBits(0); // Not supported by vulkan.
6922	// case spv::ExecutionModelTaskNV: return VK_SHADER_STAGE_TASK_BIT_NV;
6923	// case spv::ExecutionModelMeshNV: return VK_SHADER_STAGE_MESH_BIT_NV;
6924	// case spv::ExecutionModelRayGenerationNV: return VK_SHADER_STAGE_RAYGEN_BIT_NV;
6925	// case spv::ExecutionModelIntersectionNV: return VK_SHADER_STAGE_INTERSECTION_BIT_NV;
6926	// case spv::ExecutionModelAnyHitNV: return VK_SHADER_STAGE_ANY_HIT_BIT_NV;
6927	// case spv::ExecutionModelClosestHitNV: return VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV;
6928	// case spv::ExecutionModelMissNV: return VK_SHADER_STAGE_MISS_BIT_NV;
6929	// case spv::ExecutionModelCallableNV: return VK_SHADER_STAGE_CALLABLE_BIT_NV;
6930	default:
6931	UNSUPPORTED("ExecutionModel: %d", int(model));
6932	return VkShaderStageFlagBits(`0`);
6933	}
6934	}
6935
6936	SpirvShader::GenericValue::GenericValue(SpirvShader const shader, EmitState const* *state, SpirvShader::Object::ID objId) :
6937	obj(shader->getObject(objId)),
6938	intermediate(obj.kind == SpirvShader::Object::Kind::Intermediate ? &state->getIntermediate(objId) : nullptr),
6939	type (obj.type) {}
6940
6941	SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
6942	pipelineLayout(pipelineLayout)
6943	{
6944	}
6945
6946	void SpirvRoutine::setImmutableInputBuiltins(SpirvShader const *shader)
6947	{
6948	setInputBuiltin(shader, spv::BuiltInSubgroupLocalInvocationId, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6949	{
6950	ASSERT(builtin.SizeInComponents == `1`);
6951	value [builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int (`0`, `1`, `2`, `3`));
6952	});
6953
6954	setInputBuiltin(shader, spv::BuiltInSubgroupEqMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6955	{
6956	ASSERT(builtin.SizeInComponents == `4`);
6957	value [builtin.FirstComponent + `0`] = As<SIMD::Float>(SIMD::Int (`1`, `2`, `4`, `8`));
6958	value [builtin.FirstComponent + `1`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6959	value [builtin.FirstComponent + `2`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6960	value [builtin.FirstComponent + `3`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6961	});
6962
6963	setInputBuiltin(shader, spv::BuiltInSubgroupGeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6964	{
6965	ASSERT(builtin.SizeInComponents == `4`);
6966	value [builtin.FirstComponent + `0`] = As<SIMD::Float>(SIMD::Int (`15`, `14`, `12`, `8`));
6967	value [builtin.FirstComponent + `1`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6968	value [builtin.FirstComponent + `2`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6969	value [builtin.FirstComponent + `3`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6970	});
6971
6972	setInputBuiltin(shader, spv::BuiltInSubgroupGtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6973	{
6974	ASSERT(builtin.SizeInComponents == `4`);
6975	value [builtin.FirstComponent + `0`] = As<SIMD::Float>(SIMD::Int (`14`, `12`, `8`, `0`));
6976	value [builtin.FirstComponent + `1`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6977	value [builtin.FirstComponent + `2`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6978	value [builtin.FirstComponent + `3`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6979	});
6980
6981	setInputBuiltin(shader, spv::BuiltInSubgroupLeMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6982	{
6983	ASSERT(builtin.SizeInComponents == `4`);
6984	value [builtin.FirstComponent + `0`] = As<SIMD::Float>(SIMD::Int (`1`, `3`, `7`, `15`));
6985	value [builtin.FirstComponent + `1`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6986	value [builtin.FirstComponent + `2`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6987	value [builtin.FirstComponent + `3`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6988	});
6989
6990	setInputBuiltin(shader, spv::BuiltInSubgroupLtMask, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
6991	{
6992	ASSERT(builtin.SizeInComponents == `4`);
6993	value [builtin.FirstComponent + `0`] = As<SIMD::Float>(SIMD::Int (`0`, `1`, `3`, `7`));
6994	value [builtin.FirstComponent + `1`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6995	value [builtin.FirstComponent + `2`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6996	value [builtin.FirstComponent + `3`] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
6997	});
6998
6999	setInputBuiltin(shader, spv::BuiltInDeviceIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
7000	{
7001	ASSERT(builtin.SizeInComponents == `1`);
7002	// Only a single physical device is supported.
7003	value [builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int (`0`, `0`, `0`, `0`));
7004	});
7005	}
7006	}
7007

Browse the source code of engine/third_party/swiftshader/src/Pipeline/SpirvShader.cpp