SpirvShader.hpp source code [engine/third_party/swiftshader/src/Pipeline/SpirvShader.hpp]

1	// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#ifndef sw_SpirvShader_hpp
16	#define sw_SpirvShader_hpp
17
18	#include "ShaderCore.hpp"
19	#include "SamplerCore.hpp"
20	#include "SpirvID.hpp"
21	#include "System/Types.hpp"
22	#include "Vulkan/VkDebug.hpp"
23	#include "Vulkan/VkConfig.h"
24	#include "Vulkan/VkDescriptorSet.hpp"
25	#include "Common/Types.hpp"
26	#include "Device/Config.hpp"
27	#include "Device/Sampler.hpp"
28
29	#include <spirv/unified1/spirv.hpp>
30
31	#include <array>
32	#include <atomic>
33	#include <cstdint>
34	#include <cstring>
35	#include <functional>
36	#include <memory>
37	#include <deque>
38	#include <string>
39	#include <type_traits>
40	#include <unordered_map>
41	#include <unordered_set>
42	#include <vector>
43
44	#undef Yield // b/127920555
45
46	namespace vk
47	{
48	class PipelineLayout;
49	class ImageView;
50	class Sampler;
51	class RenderPass;
52	struct SampledImageDescriptor;
53	} // namespace vk
54
55	namespace sw
56	{
57	// Forward declarations.
58	class SpirvRoutine;
59
60	enum class OutOfBoundsBehavior
61	{
62	Nullify, // Loads become zero, stores are elided.
63	RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
64	UndefinedValue, // Only for load operations. Not secure. No program termination.
65	UndefinedBehavior, // Program may terminate.
66	};
67
68	// SIMD contains types that represent multiple scalars packed into a single
69	// vector data type. Types in the SIMD namespace provide a semantic hint
70	// that the data should be treated as a per-execution-lane scalar instead of
71	// a typical euclidean-style vector type.
72	namespace SIMD
73	{
74	// Width is the number of per-lane scalars packed into each SIMD vector.
75	static constexpr int Width = `4`;
76
77	using Float = rr::Float4;
78	using Int = rr::Int4;
79	using UInt = rr::UInt4;
80
81	struct Pointer
82	{
83	Pointer(rr::Pointer<Byte> base, rr::Int limit)
84	: base (base),
85	dynamicLimit (limit), staticLimit(`0`),
86	dynamicOffsets (`0`), staticOffsets{},
87	hasDynamicLimit(true), hasDynamicOffsets(false) {}
88
89	Pointer(rr::Pointer<Byte> base, unsigned int limit)
90	: base (base),
91	dynamicLimit (`0`), staticLimit(limit),
92	dynamicOffsets (`0`), staticOffsets{},
93	hasDynamicLimit(false), hasDynamicOffsets(false) {}
94
95	Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
96	: base (base),
97	dynamicLimit (limit), staticLimit(`0`),
98	dynamicOffsets (offset), staticOffsets{},
99	hasDynamicLimit(true), hasDynamicOffsets(true) {}
100
101	Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
102	: base (base),
103	dynamicLimit (`0`), staticLimit(limit),
104	dynamicOffsets (offset), staticOffsets{},
105	hasDynamicLimit(false), hasDynamicOffsets(true) {}
106
107	inline Pointer& operator += (Int i)
108	{
109	dynamicOffsets += i;
110	hasDynamicOffsets = true;
111	return *this;
112	}
113
114	inline Pointer& operator *= (Int i)
115	{
116	dynamicOffsets = offsets() * i;
117	staticOffsets = {};
118	hasDynamicOffsets = true;
119	return *this;
120	}
121
122	inline Pointer operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; }
123	inline Pointer operator * (SIMD::Int i) { Pointer p = *this; p = i; return* p; }
124
125	inline Pointer& operator += (int i)
126	{
127	for (int el = `0`; el < SIMD::Width; el++) { staticOffsets [el] += i; }
128	return *this;
129	}
130
131	inline Pointer& operator = (int* i)
132	{
133	for (int el = `0`; el < SIMD::Width; el++) { staticOffsets [el] *= i; }
134	if (hasDynamicOffsets)
135	{
136	dynamicOffsets *= SIMD::Int (i);
137	}
138	return *this;
139	}
140
141	inline Pointer operator + (int i) { Pointer p = *this; p += i; return p; }
142	inline Pointer operator * (int i) { Pointer p = *this; p = i; return* p; }
143
144	inline SIMD::Int offsets() const
145	{
146	static_assert(SIMD::Width == `4`, "Expects SIMD::Width to be 4");
147	return dynamicOffsets + SIMD::Int (staticOffsets [`0`], staticOffsets [`1`], staticOffsets [`2`], staticOffsets [`3`]);
148	}
149
150	inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
151	{
152	ASSERT(accessSize > `0`);
153
154	if (isStaticallyInBounds(accessSize, robustness))
155	{
156	return SIMD::Int (`0xffffffff`);
157	}
158
159	if (!hasDynamicOffsets && !hasDynamicLimit)
160	{
161	// Common fast paths.
162	static_assert(SIMD::Width == `4`, "Expects SIMD::Width to be 4");
163	return SIMD::Int (
164	(staticOffsets [`0`] + accessSize - `1` < staticLimit) ? `0xffffffff` : `0`,
165	(staticOffsets [`1`] + accessSize - `1` < staticLimit) ? `0xffffffff` : `0`,
166	(staticOffsets [`2`] + accessSize - `1` < staticLimit) ? `0xffffffff` : `0`,
167	(staticOffsets [`3`] + accessSize - `1` < staticLimit) ? `0xffffffff` : `0`);
168	}
169
170	return CmpLT(offsets() + SIMD::Int (accessSize - `1`), SIMD::Int (limit()));
171	}
172
173	inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
174	{
175	if (hasDynamicOffsets)
176	{
177	return false;
178	}
179
180	if (hasDynamicLimit)
181	{
182	if (hasStaticEqualOffsets() \|\| hasStaticSequentialOffsets(accessSize))
183	{
184	switch(robustness)
185	{
186	case OutOfBoundsBehavior::UndefinedBehavior:
187	// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
188	// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
189	return true;
190	case OutOfBoundsBehavior::Nullify:
191	case OutOfBoundsBehavior::RobustBufferAccess:
192	case OutOfBoundsBehavior::UndefinedValue:
193	return false;
194	}
195	}
196	}
197
198	for (int i = `0`; i < SIMD::Width; i++)
199	{
200	if (staticOffsets [i] + accessSize - `1` >= staticLimit)
201	{
202	return false;
203	}
204	}
205
206	return true;
207	}
208
209	inline Int limit() const
210	{
211	return dynamicLimit + staticLimit;
212	}
213
214	// Returns true if all offsets are sequential
215	// (N+0step, N+1step, N+2step, N+3step)
216	inline rr::Bool hasSequentialOffsets(unsigned int step) const
217	{
218	if (hasDynamicOffsets)
219	{
220	auto o = offsets();
221	static_assert(SIMD::Width == `4`, "Expects SIMD::Width to be 4");
222	return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int (`1`step, `2`step, `3`*step, `0`))) == `0`;
223	}
224	return hasStaticSequentialOffsets(step);
225	}
226
227	// Returns true if all offsets are are compile-time static and
228	// sequential (N+0step, N+1step, N+2step, N+3step)
229	inline bool hasStaticSequentialOffsets(unsigned int step) const
230	{
231	if (hasDynamicOffsets)
232	{
233	return false;
234	}
235	for (int i = `1`; i < SIMD::Width; i++)
236	{
237	if (staticOffsets [i-`1`] + int32_t(step) != staticOffsets [i]) { return false; }
238	}
239	return true;
240	}
241
242	// Returns true if all offsets are equal (N, N, N, N)
243	inline rr::Bool hasEqualOffsets() const
244	{
245	if (hasDynamicOffsets)
246	{
247	auto o = offsets();
248	static_assert(SIMD::Width == `4`, "Expects SIMD::Width to be 4");
249	return rr::SignMask(~CmpEQ(o, o.yzwx)) == `0`;
250	}
251	return hasStaticEqualOffsets();
252	}
253
254	// Returns true if all offsets are compile-time static and are equal
255	// (N, N, N, N)
256	inline bool hasStaticEqualOffsets() const
257	{
258	if (hasDynamicOffsets)
259	{
260	return false;
261	}
262	for (int i = `1`; i < SIMD::Width; i++)
263	{
264	if (staticOffsets [i-`1`] != staticOffsets [i]) { return false; }
265	}
266	return true;
267	}
268
269	// Base address for the pointer, common across all lanes.
270	rr::Pointer<rr::Byte> base;
271
272	// Upper (non-inclusive) limit for offsets from base.
273	rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
274	unsigned int staticLimit;
275
276	// Per lane offsets from base.
277	SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
278	std::array<int32_t, SIMD::Width> staticOffsets;
279
280	bool hasDynamicLimit; // True if dynamicLimit is non-zero.
281	bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero.
282	};
283
284	template <typename T> struct Element {};
285	template <> struct Element<Float> { using type = rr::Float; };
286	template <> struct Element<Int> { using type = rr::Int; };
287	template <> struct Element<UInt> { using type = rr::UInt; };
288
289	template<typename T>
290	void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
291
292	template<typename T>
293	void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
294	{
295	Store(ptr, T(val), robustness, mask, atomic, order);
296	}
297
298	template<typename T>
299	T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
300	}
301
302	// Incrementally constructed complex bundle of rvalues
303	// Effectively a restricted vector, supporting only:
304	// - allocation to a (runtime-known) fixed size
305	// - in-place construction of elements
306	// - const operator[]
307	class Intermediate
308	{
309	public:
310	Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) {
311	memset(scalar, `0`, sizeof(rr::Value) size);
312	}
313
314	~Intermediate()
315	{
316	delete[] scalar;
317	}
318
319	void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); }
320	void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); }
321	void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); }
322
323	void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); }
324	void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); }
325	void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); }
326
327	// Value retrieval functions.
328	RValue<SIMD::Float> Float(uint32_t i) const
329	{
330	ASSERT(i < size);
331	ASSERT(scalar[i] != nullptr);
332	return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar)
333	}
334
335	RValue<SIMD::Int> Int(uint32_t i) const
336	{
337	ASSERT(i < size);
338	ASSERT(scalar[i] != nullptr);
339	return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar)
340	}
341
342	RValue<SIMD::UInt> UInt(uint32_t i) const
343	{
344	ASSERT(i < size);
345	ASSERT(scalar[i] != nullptr);
346	return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar)
347	}
348
349	// No copy/move construction or assignment
350	Intermediate(Intermediate const &) = delete;
351	Intermediate(Intermediate &&) = delete;
352	Intermediate & operator=(Intermediate const &) = delete;
353	Intermediate & operator=(Intermediate &&) = delete;
354
355	private:
356	void emplace(uint32_t i, rr::Value *value)
357	{
358	ASSERT(i < size);
359	ASSERT(scalar[i] == nullptr);
360	scalar[i] = value;
361	}
362
363	rr::Value **const scalar;
364	uint32_t size;
365	};
366
367	class SpirvShader
368	{
369	public:
370	using InsnStore = std::vector<uint32_t>;
371	InsnStore insns;
372
373	using ImageSampler = void(void* texture, void sampler, void** uvsIn, void* texelOut, void* constants);
374
375	enum class YieldResult
376	{
377	ControlBarrier,
378	};
379
380	/ Pseudo-iterator over SPIRV instructions, designed to support range-based-for. /
381	class InsnIterator
382	{
383	InsnStore::const_iterator iter;
384
385	public:
386	spv::Op opcode() const
387	{
388	return static_cast<spv::Op>(*iter & spv::OpCodeMask);
389	}
390
391	uint32_t wordCount() const
392	{
393	return *iter >> spv::WordCountShift;
394	}
395
396	uint32_t word(uint32_t n) const
397	{
398	ASSERT(n < wordCount());
399	return iter [n];
400	}
401
402	uint32_t const * wordPointer(uint32_t n) const
403	{
404	ASSERT(n < wordCount());
405	return &iter [n];
406	}
407
408	const char* string(uint32_t n) const
409	{
410	return reinterpret_cast<const char*>(wordPointer(n));
411	}
412
413	bool operator==(InsnIterator const &other) const
414	{
415	return iter == other.iter;
416	}
417
418	bool operator!=(InsnIterator const &other) const
419	{
420	return iter != other.iter;
421	}
422
423	InsnIterator operator() const*
424	{
425	return *this;
426	}
427
428	InsnIterator &operator++()
429	{
430	iter += wordCount();
431	return *this;
432	}
433
434	InsnIterator const operator++(int)
435	{
436	InsnIterator ret{*this};
437	iter += wordCount();
438	return ret;
439	}
440
441	InsnIterator(InsnIterator const &other) = default;
442
443	InsnIterator() = default;
444
445	explicit InsnIterator(InsnStore::const_iterator iter) : iter {iter}
446	{
447	}
448	};
449
450	/ range-based-for interface /
451	InsnIterator begin() const
452	{
453	return InsnIterator {insns.cbegin() + `5`};
454	}
455
456	InsnIterator end() const
457	{
458	return InsnIterator {insns.cend()};
459	}
460
461	class Type
462	{
463	public:
464	using ID = SpirvID<Type>;
465
466	spv::Op opcode() const { return definition.opcode(); }
467
468	InsnIterator definition;
469	spv::StorageClass storageClass = static_cast<spv::StorageClass>(-`1`);
470	uint32_t sizeInComponents = `0`;
471	bool isBuiltInBlock = false;
472
473	// Inner element type for pointers, arrays, vectors and matrices.
474	ID element;
475	};
476
477	class Object
478	{
479	public:
480	using ID = SpirvID<Object>;
481
482	spv::Op opcode() const { return definition.opcode(); }
483
484	InsnIterator definition;
485	Type::ID type;
486	std::unique_ptr<uint32_t[]> constantValue = nullptr;
487
488	enum class Kind
489	{
490	// Invalid default kind.
491	// If we get left with an object in this state, the module was
492	// broken.
493	Unknown,
494
495	// TODO: Better document this kind.
496	// A shader interface variable pointer.
497	// Pointer with uniform address across all lanes.
498	// Pointer held by SpirvRoutine::pointers
499	InterfaceVariable,
500
501	// Constant value held by Object::constantValue.
502	Constant,
503
504	// Value held by SpirvRoutine::intermediates.
505	Intermediate,
506
507	// Pointer held by SpirvRoutine::pointers
508	Pointer,
509
510	// A pointer to a vk::DescriptorSet.*
511	// Pointer held by SpirvRoutine::pointers.
512	DescriptorSet,
513	};
514
515	Kind kind = Kind::Unknown;
516	};
517
518	// Block is an interval of SPIR-V instructions, starting with the
519	// opening OpLabel, and ending with a termination instruction.
520	class Block
521	{
522	public:
523	using ID = SpirvID<Block>;
524	using Set = std::unordered_set<ID>;
525
526	// Edge represents the graph edge between two blocks.
527	struct Edge
528	{
529	ID from;
530	ID to;
531
532	bool operator == (const Edge& other) const { return from == other.from && to == other.to; }
533
534	struct Hash
535	{
536	std::size_t operator()(const Edge& edge) const noexcept
537	{
538	return std::hash<uint32_t>()(edge.from.value() * `31` + edge.to.value());
539	}
540	};
541	};
542
543	Block() = default;
544	Block(const Block& other) = default;
545	explicit Block(InsnIterator begin, InsnIterator end);
546
547	/ range-based-for interface /
548	inline InsnIterator begin() const { return begin_; }
549	inline InsnIterator end() const { return end_; }
550
551	enum Kind
552	{
553	Simple, // OpBranch or other simple terminator.
554	StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional
555	UnstructuredBranchConditional, // OpBranchConditional
556	StructuredSwitch, // OpSelectionMerge + OpSwitch
557	UnstructuredSwitch, // OpSwitch
558	Loop, // OpLoopMerge + [OpBranchConditional \| OpBranch]
559	};
560
561	Kind kind = Simple;
562	InsnIterator mergeInstruction; // Structured control flow merge instruction.
563	InsnIterator branchInstruction; // Branch instruction.
564	ID mergeBlock; // Structured flow merge block.
565	ID continueTarget; // Loop continue block.
566	Set ins; // Blocks that branch into this block.
567	Set outs; // Blocks that this block branches to.
568	bool isLoopMerge = false;
569	private:
570	InsnIterator begin_;
571	InsnIterator end_;
572	};
573
574	class Function
575	{
576	public:
577	using ID = SpirvID<Function>;
578
579	// Walks all reachable the blocks starting from id adding them to
580	// reachable.
581	void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const;
582
583	// AssignBlockFields() performs the following for all reachable blocks:
584	// Assigns Block::ins with the identifiers of all blocks that contain*
585	// this block in their Block::outs.
586	// Sets Block::isLoopMerge to true if the block is the merge of a*
587	// another loop block.
588	void AssignBlockFields();
589
590	// ForeachBlockDependency calls f with each dependency of the given
591	// block. A dependency is an incoming block that is not a loop-back
592	// edge.
593	void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
594
595	// ExistsPath returns true if there's a direct or indirect flow from
596	// the 'from' block to the 'to' block that does not pass through
597	// notPassingThrough.
598	bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const;
599
600	Block const &getBlock(Block::ID id) const
601	{
602	auto it = blocks.find(id);
603	ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value());
604	return it ->second;
605	}
606
607	Block::ID entry; // function entry point block.
608	HandleMap<Block> blocks; // blocks belonging to this function.
609	Type::ID type; // type of the function.
610	Type::ID result; // return type.
611	};
612
613	struct TypeOrObject {}; // Dummy struct to represent a Type or Object.
614
615	// TypeOrObjectID is an identifier that represents a Type or an Object,
616	// and supports implicit casting to and from Type::ID or Object::ID.
617	class TypeOrObjectID : public SpirvID<TypeOrObject>
618	{
619	public:
620	using Hash = std::hash<SpirvID<TypeOrObject>>;
621
622	inline TypeOrObjectID(uint32_t id) : SpirvID (id) {}
623	inline TypeOrObjectID(Type::ID id) : SpirvID (id.value()) {}
624	inline TypeOrObjectID(Object::ID id) : SpirvID (id.value()) {}
625	inline operator Type::ID() const { return Type::ID (value()); }
626	inline operator Object::ID() const { return Object::ID (value()); }
627	};
628
629	// OpImageSample variants
630	enum Variant
631	{
632	None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod.
633	Dref,
634	Proj,
635	ProjDref,
636	VARIANT_LAST = ProjDref
637	};
638
639	// Compact representation of image instruction parameters that is passed to the
640	// trampoline function for retrieving/generating the corresponding sampling routine.
641	struct ImageInstruction
642	{
643	ImageInstruction(Variant variant, SamplerMethod samplerMethod)
644	: parameters(`0`)
645	{
646	this->variant = variant;
647	this->samplerMethod = samplerMethod;
648	}
649
650	// Unmarshal from raw 32-bit data
651	ImageInstruction(uint32_t parameters) : parameters(parameters) {}
652
653	SamplerFunction getSamplerFunction() const
654	{
655	return { static_cast<SamplerMethod>(samplerMethod), offset != `0`, sample != `0` };
656	}
657
658	bool isDref() const
659	{
660	return (variant == Dref) \|\| (variant == ProjDref);
661	}
662
663	bool isProj() const
664	{
665	return (variant == Proj) \|\| (variant == ProjDref);
666	}
667
668	union
669	{
670	struct
671	{
672	uint32_t variant : BITS(VARIANT_LAST);
673	uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST);
674	uint32_t gatherComponent : `2`;
675
676	// Parameters are passed to the sampling routine in this order:
677	uint32_t coordinates : `3`; // 1-4 (does not contain projection component)
678	// uint32_t dref : 1; // Indicated by Variant::ProjDref\|Dref
679	// uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod\|Bias\|Fetch
680	uint32_t grad : `2`; // 0-3 components (for each of dx / dy)
681	uint32_t offset : `2`; // 0-3 components
682	uint32_t sample : `1`; // 0-1 scalar integer
683	};
684
685	uint32_t parameters;
686	};
687	};
688
689	static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit");
690
691	// This method is for retrieving an ID that uniquely identifies the
692	// shader entry point represented by this object.
693	uint64_t getSerialID() const
694	{
695	return ((uint64_t)entryPoint.value() << `32`) \| codeSerialID;
696	}
697
698	SpirvShader(uint32_t codeSerialID,
699	VkShaderStageFlagBits stage,
700	const char *entryPointName,
701	InsnStore const &insns,
702	const vk::RenderPass *renderPass,
703	uint32_t subpassIndex,
704	bool robustBufferAccess);
705
706	struct Modes
707	{
708	bool EarlyFragmentTests : `1`;
709	bool DepthReplacing : `1`;
710	bool DepthGreater : `1`;
711	bool DepthLess : `1`;
712	bool DepthUnchanged : `1`;
713	bool ContainsKill : `1`;
714	bool ContainsControlBarriers : `1`;
715	bool NeedsCentroid : `1`;
716
717	// Compute workgroup dimensions
718	int WorkgroupSizeX = `1`, WorkgroupSizeY = `1`, WorkgroupSizeZ = `1`;
719	};
720
721	Modes const &getModes() const
722	{
723	return modes;
724	}
725
726	struct Capabilities
727	{
728	bool Matrix : `1`;
729	bool Shader : `1`;
730	bool InputAttachment : `1`;
731	bool Sampled1D : `1`;
732	bool Image1D : `1`;
733	bool SampledBuffer : `1`;
734	bool ImageBuffer : `1`;
735	bool ImageQuery : `1`;
736	bool DerivativeControl : `1`;
737	bool GroupNonUniform : `1`;
738	bool MultiView : `1`;
739	bool DeviceGroup : `1`;
740	bool GroupNonUniformVote : `1`;
741	bool GroupNonUniformBallot : `1`;
742	bool GroupNonUniformShuffle : `1`;
743	bool GroupNonUniformShuffleRelative : `1`;
744	bool StorageImageExtendedFormats : `1`;
745	};
746
747	Capabilities const &getUsedCapabilities() const
748	{
749	return capabilities;
750	}
751
752	enum AttribType : unsigned char
753	{
754	ATTRIBTYPE_FLOAT,
755	ATTRIBTYPE_INT,
756	ATTRIBTYPE_UINT,
757	ATTRIBTYPE_UNUSED,
758
759	ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
760	};
761
762	bool hasBuiltinInput(spv::BuiltIn b) const
763	{
764	return inputBuiltins.find(b) != inputBuiltins.end();
765	}
766
767	bool hasBuiltinOutput(spv::BuiltIn b) const
768	{
769	return outputBuiltins.find(b) != outputBuiltins.end();
770	}
771
772	struct Decorations
773	{
774	int32_t Location = -`1`;
775	int32_t Component = `0`;
776	spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-`1`);
777	int32_t Offset = -`1`;
778	int32_t ArrayStride = -`1`;
779	int32_t MatrixStride = `1`;
780
781	bool HasLocation : `1`;
782	bool HasComponent : `1`;
783	bool HasBuiltIn : `1`;
784	bool HasOffset : `1`;
785	bool HasArrayStride : `1`;
786	bool HasMatrixStride : `1`;
787	bool HasRowMajor : `1`; // whether RowMajor bit is valid.
788
789	bool Flat : `1`;
790	bool Centroid : `1`;
791	bool NoPerspective : `1`;
792	bool Block : `1`;
793	bool BufferBlock : `1`;
794	bool RelaxedPrecision : `1`;
795	bool RowMajor : `1`; // RowMajor if true; ColMajor if false
796	bool InsideMatrix : `1`; // pseudo-decoration for whether we're inside a matrix.
797
798	Decorations()
799	: Location{-`1`}, Component{`0`},
800	BuiltIn{static_cast<spv::BuiltIn>(-`1`)},
801	Offset{-`1`}, ArrayStride{-`1`}, MatrixStride{-`1`},
802	HasLocation{false}, HasComponent{false},
803	HasBuiltIn{false}, HasOffset{false},
804	HasArrayStride{false}, HasMatrixStride{false},
805	HasRowMajor{false},
806	Flat{false}, Centroid{false}, NoPerspective{false},
807	Block{false}, BufferBlock{false},
808	RelaxedPrecision{false}, RowMajor{false},
809	InsideMatrix{false}
810	{
811	}
812
813	Decorations(Decorations const &) = default;
814
815	void Apply(Decorations const &src);
816
817	void Apply(spv::Decoration decoration, uint32_t arg);
818	};
819
820	std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations;
821	std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations;
822
823	struct DescriptorDecorations
824	{
825	int32_t DescriptorSet = -`1`;
826	int32_t Binding = -`1`;
827	int32_t InputAttachmentIndex = -`1`;
828
829	void Apply(DescriptorDecorations const &src);
830	};
831
832	std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations;
833	std::vector<VkFormat> inputAttachmentFormats;
834
835	struct InterfaceComponent
836	{
837	AttribType Type;
838
839	union
840	{
841	struct
842	{
843	bool Flat : `1`;
844	bool Centroid : `1`;
845	bool NoPerspective : `1`;
846	};
847
848	uint8_t DecorationBits;
849	};
850
851	InterfaceComponent()
852	: Type{ATTRIBTYPE_UNUSED}, DecorationBits{`0`}
853	{
854	}
855	};
856
857	struct BuiltinMapping
858	{
859	Object::ID Id;
860	uint32_t FirstComponent;
861	uint32_t SizeInComponents;
862	};
863
864	struct WorkgroupMemory
865	{
866	// allocates a new variable of size bytes with the given identifier.
867	inline void allocate(Object::ID id, uint32_t size)
868	{
869	uint32_t offset = totalSize;
870	auto it = offsets.emplace(id, offset);
871	ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
872	totalSize += size;
873	}
874	// returns the byte offset of the variable with the given identifier.
875	inline uint32_t offsetOf(Object::ID id) const
876	{
877	auto it = offsets.find(id);
878	ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
879	return it ->second;
880	}
881	// returns the total allocated size in bytes.
882	inline uint32_t size() const { return totalSize; }
883	private:
884	uint32_t totalSize = `0`; // in bytes
885	std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
886	};
887
888	std::vector<InterfaceComponent> inputs;
889	std::vector<InterfaceComponent> outputs;
890
891	void emitProlog(SpirvRoutine routine) const*;
892	void emit(SpirvRoutine routine, RValue<SIMD::Int> const* &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
893	void emitEpilog(SpirvRoutine routine) const*;
894
895	using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
896	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
897	std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
898	WorkgroupMemory workgroupMemory;
899
900	private:
901	const uint32_t codeSerialID;
902	Modes modes = {};
903	Capabilities capabilities = {};
904	HandleMap<Type> types;
905	HandleMap<Object> defs;
906	HandleMap<Function> functions;
907	Function::ID entryPoint;
908
909	const bool robustBufferAccess = true;
910	spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
911
912	// DeclareType creates a Type for the given OpTypeX instruction, storing
913	// it into the types map. It is called from the analysis pass (constructor).
914	void DeclareType(InsnIterator insn);
915
916	void ProcessExecutionMode(InsnIterator it);
917
918	uint32_t ComputeTypeSize(InsnIterator insn);
919	void ApplyDecorationsForId(Decorations d, TypeOrObjectID id) const*;
920	void ApplyDecorationsForIdMember(Decorations d, Type::ID id, uint32_t member) const*;
921	void ApplyDecorationsForAccessChain(Decorations d, DescriptorDecorations dd, Object::ID baseId, uint32_t numIndexes, uint32_t const indexIds) const*;
922
923	// Creates an Object for the instruction's result in 'defs'.
924	void DefineResult(const InsnIterator &insn);
925
926	// Returns true if data in the given storage class is word-interleaved
927	// by each SIMD vector lane, otherwise data is stored linerally.
928	//
929	// Each lane addresses a single word, picked by a base pointer and an
930	// integer offset.
931	//
932	// A word is currently 32 bits (single float, int32_t, uint32_t).
933	// A lane is a single element of a SIMD vector register.
934	//
935	// Storage interleaved by lane - (IsStorageInterleavedByLane() == true):
936	// ---------------------------------------------------------------------
937	//
938	// Address = PtrBase + sizeof(Word) (SIMD::Width * LaneOffset + LaneIndex)*
939	//
940	// Assuming SIMD::Width == 4:
941	//
942	// Lane[0] \| Lane[1] \| Lane[2] \| Lane[3]
943	// ===========+===========+===========+==========
944	// LaneOffset=0: \| Word[0] \| Word[1] \| Word[2] \| Word[3]
945	// ---------------+-----------+-----------+-----------+----------
946	// LaneOffset=1: \| Word[4] \| Word[5] \| Word[6] \| Word[7]
947	// ---------------+-----------+-----------+-----------+----------
948	// LaneOffset=2: \| Word[8] \| Word[9] \| Word[a] \| Word[b]
949	// ---------------+-----------+-----------+-----------+----------
950	// LaneOffset=3: \| Word[c] \| Word[d] \| Word[e] \| Word[f]
951	//
952	//
953	// Linear storage - (IsStorageInterleavedByLane() == false):
954	// ---------------------------------------------------------
955	//
956	// Address = PtrBase + sizeof(Word) LaneOffset*
957	//
958	// Lane[0] \| Lane[1] \| Lane[2] \| Lane[3]
959	// ===========+===========+===========+==========
960	// LaneOffset=0: \| Word[0] \| Word[0] \| Word[0] \| Word[0]
961	// ---------------+-----------+-----------+-----------+----------
962	// LaneOffset=1: \| Word[1] \| Word[1] \| Word[1] \| Word[1]
963	// ---------------+-----------+-----------+-----------+----------
964	// LaneOffset=2: \| Word[2] \| Word[2] \| Word[2] \| Word[2]
965	// ---------------+-----------+-----------+-----------+----------
966	// LaneOffset=3: \| Word[3] \| Word[3] \| Word[3] \| Word[3]
967	//
968	static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
969	static bool IsExplicitLayout(spv::StorageClass storageClass);
970
971	// Output storage buffers and images should not be affected by helper invocations
972	static bool StoresInHelperInvocation(spv::StorageClass storageClass);
973
974	template<typename F>
975	int VisitInterfaceInner(Type::ID id, Decorations d, F f) const;
976
977	template<typename F>
978	void VisitInterface(Object::ID id, F f) const;
979
980	template<typename F>
981	void VisitMemoryObject(Object::ID id, F f) const;
982
983	template<typename F>
984	void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, F f) const;
985
986	Object& CreateConstant(InsnIterator it);
987
988	void ProcessInterfaceVariable(Object &object);
989
990	// EmitState holds control-flow state for the emit() pass.
991	class EmitState
992	{
993	public:
994	EmitState(SpirvRoutine *routine,
995	Function::ID function,
996	RValue<SIMD::Int> activeLaneMask,
997	RValue<SIMD::Int> storesAndAtomicsMask,
998	const vk::DescriptorSet::Bindings &descriptorSets,
999	bool robustBufferAccess,
1000	spv::ExecutionModel executionModel)
1001	: routine(routine),
1002	function (function),
1003	activeLaneMaskValue(activeLaneMask.value),
1004	storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
1005	descriptorSets(descriptorSets),
1006	robustBufferAccess(robustBufferAccess),
1007	executionModel(executionModel)
1008	{
1009	ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(`0`)); // Must parse OpEntryPoint before emitting.
1010	}
1011
1012	RValue<SIMD::Int> activeLaneMask() const
1013	{
1014	ASSERT(activeLaneMaskValue != nullptr);
1015	return RValue<SIMD::Int>(activeLaneMaskValue);
1016	}
1017
1018	RValue<SIMD::Int> storesAndAtomicsMask() const
1019	{
1020	ASSERT(storesAndAtomicsMaskValue != nullptr);
1021	return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
1022	}
1023
1024	void setActiveLaneMask(RValue<SIMD::Int> mask)
1025	{
1026	activeLaneMaskValue = mask.value;
1027	}
1028
1029	// Add a new active lane mask edge from the current block to out.
1030	// The edge mask value will be (mask AND activeLaneMaskValue).
1031	// If multiple active lane masks are added for the same edge, then
1032	// they will be ORed together.
1033	void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask);
1034
1035	// Add a new active lane mask for the edge from -> to.
1036	// If multiple active lane masks are added for the same edge, then
1037	// they will be ORed together.
1038	void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
1039
1040	SpirvRoutine routine = nullptr; // The current routine being built.*
1041	Function::ID function; // The current function being built.
1042	Block::ID block; // The current block being built.
1043	rr::Value activeLaneMaskValue = nullptr; // The current active lane mask.*
1044	rr::Value storesAndAtomicsMaskValue = nullptr; // The current atomics mask.*
1045	Block::Set visited; // Blocks already built.
1046	std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
1047	std::deque<Block::ID> *pending;
1048
1049	const vk::DescriptorSet::Bindings &descriptorSets;
1050
1051	OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
1052
1053	Intermediate& createIntermediate(Object::ID id, uint32_t size)
1054	{
1055	auto it = intermediates.emplace(std::piecewise_construct,
1056	std::forward_as_tuple(id),
1057	std::forward_as_tuple(size));
1058	ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
1059	return it.first ->second;
1060	}
1061
1062	Intermediate const& getIntermediate(Object::ID id) const
1063	{
1064	auto it = intermediates.find(id);
1065	ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
1066	return it ->second;
1067	}
1068
1069	void createPointer(Object::ID id, SIMD::Pointer ptr)
1070	{
1071	bool added = pointers.emplace(id, ptr).second;
1072	ASSERT_MSG(added, "Pointer %d created twice", id.value());
1073	}
1074
1075	SIMD::Pointer const& getPointer(Object::ID id) const
1076	{
1077	auto it = pointers.find(id);
1078	ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
1079	return it ->second;
1080	}
1081
1082	private:
1083	std::unordered_map<Object::ID, Intermediate> intermediates;
1084	std::unordered_map<Object::ID, SIMD::Pointer> pointers;
1085
1086	const bool robustBufferAccess = true; // Emit robustBufferAccess safe code.
1087	const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
1088	};
1089
1090	// EmitResult is an enumerator of result values from the Emit functions.
1091	enum class EmitResult
1092	{
1093	Continue, // No termination instructions.
1094	Terminator, // Reached a termination instruction.
1095	};
1096
1097	// Generic wrapper over either per-lane intermediate value, or a constant.
1098	// Constants are transparently widened to per-lane values in operator[].
1099	// This is appropriate in most cases -- if we're not going to do something
1100	// significantly different based on whether the value is uniform across lanes.
1101	class GenericValue
1102	{
1103	SpirvShader::Object const &obj;
1104	Intermediate const *intermediate;
1105
1106	public:
1107	GenericValue(SpirvShader const shader, EmitState const* *state, SpirvShader::Object::ID objId);
1108
1109	RValue<SIMD::Float> Float(uint32_t i) const
1110	{
1111	if (intermediate)
1112	{
1113	return intermediate->Float(i);
1114	}
1115
1116	// Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact
1117	// bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant".
1118	// Thus we must first construct an integer constant, and bitcast to float.
1119	auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get());
1120	return As<SIMD::Float>(SIMD::UInt (constantValue[i]));
1121	}
1122
1123	RValue<SIMD::Int> Int(uint32_t i) const
1124	{
1125	if (intermediate)
1126	{
1127	return intermediate->Int(i);
1128	}
1129	auto constantValue = reinterpret_cast<int *>(obj.constantValue.get());
1130	return SIMD::Int (constantValue[i]);
1131	}
1132
1133	RValue<SIMD::UInt> UInt(uint32_t i) const
1134	{
1135	if (intermediate)
1136	{
1137	return intermediate->UInt(i);
1138	}
1139	auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get());
1140	return SIMD::UInt (constantValue[i]);
1141	}
1142
1143	SpirvShader::Type::ID const type;
1144	};
1145
1146	Type const &getType(Type::ID id) const
1147	{
1148	auto it = types.find(id);
1149	ASSERT_MSG(it != types.end(), "Unknown type %d", id.value());
1150	return it ->second;
1151	}
1152
1153	Object const &getObject(Object::ID id) const
1154	{
1155	auto it = defs.find(id);
1156	ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value());
1157	return it ->second;
1158	}
1159
1160	Function const &getFunction(Function::ID id) const
1161	{
1162	auto it = functions.find(id);
1163	ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value());
1164	return it ->second;
1165	}
1166
1167	// Returns a SIMD::Pointer to the underlying data for the given pointer
1168	// object.
1169	// Handles objects of the following kinds:
1170	// • DescriptorSet
1171	// • DivergentPointer
1172	// • InterfaceVariable
1173	// • NonDivergentPointer
1174	// Calling GetPointerToData with objects of any other kind will assert.
1175	SIMD::Pointer GetPointerToData(Object::ID id, int arrayIndex, EmitState const state) const*;
1176
1177	SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const indexIds, EmitState const* state) const*;
1178	SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const indexIds, EmitState const* state) const*;
1179
1180	// Returns the component* offset in the literal for the given access chain.*
1181	uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const indexes) const*;
1182
1183	// Lookup the active lane mask for the edge from -> to.
1184	// If from is unreachable, then a mask of all zeros is returned.
1185	// Asserts if from is reachable and the edge does not exist.
1186	RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState state, Block::ID from, Block::ID to) const*;
1187
1188	// Emit all the unvisited blocks (except for ignore) in DFS order,
1189	// starting with id.
1190	void EmitBlocks(Block::ID id, EmitState state, Block::ID ignore = `0`) const*;
1191	void EmitNonLoop(EmitState state) const*;
1192	void EmitLoop(EmitState state) const*;
1193
1194	void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState state) const*;
1195	EmitResult EmitInstruction(InsnIterator insn, EmitState state) const*;
1196
1197	// Emit pass instructions:
1198	EmitResult EmitVariable(InsnIterator insn, EmitState state) const*;
1199	EmitResult EmitLoad(InsnIterator insn, EmitState state) const*;
1200	EmitResult EmitStore(InsnIterator insn, EmitState state) const*;
1201	EmitResult EmitAccessChain(InsnIterator insn, EmitState state) const*;
1202	EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState state) const*;
1203	EmitResult EmitCompositeInsert(InsnIterator insn, EmitState state) const*;
1204	EmitResult EmitCompositeExtract(InsnIterator insn, EmitState state) const*;
1205	EmitResult EmitVectorShuffle(InsnIterator insn, EmitState state) const*;
1206	EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState state) const*;
1207	EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState state) const*;
1208	EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState state) const*;
1209	EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState state) const*;
1210	EmitResult EmitOuterProduct(InsnIterator insn, EmitState state) const*;
1211	EmitResult EmitTranspose(InsnIterator insn, EmitState state) const*;
1212	EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState state) const*;
1213	EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState state) const*;
1214	EmitResult EmitUnaryOp(InsnIterator insn, EmitState state) const*;
1215	EmitResult EmitBinaryOp(InsnIterator insn, EmitState state) const*;
1216	EmitResult EmitDot(InsnIterator insn, EmitState state) const*;
1217	EmitResult EmitSelect(InsnIterator insn, EmitState state) const*;
1218	EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState state) const*;
1219	EmitResult EmitAny(InsnIterator insn, EmitState state) const*;
1220	EmitResult EmitAll(InsnIterator insn, EmitState state) const*;
1221	EmitResult EmitBranch(InsnIterator insn, EmitState state) const*;
1222	EmitResult EmitBranchConditional(InsnIterator insn, EmitState state) const*;
1223	EmitResult EmitSwitch(InsnIterator insn, EmitState state) const*;
1224	EmitResult EmitUnreachable(InsnIterator insn, EmitState state) const*;
1225	EmitResult EmitReturn(InsnIterator insn, EmitState state) const*;
1226	EmitResult EmitKill(InsnIterator insn, EmitState state) const*;
1227	EmitResult EmitFunctionCall(InsnIterator insn, EmitState state) const*;
1228	EmitResult EmitPhi(InsnIterator insn, EmitState state) const*;
1229	EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState state) const*;
1230	EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState state) const*;
1231	EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState state) const*;
1232	EmitResult EmitImageFetch(InsnIterator insn, EmitState state) const*;
1233	EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState state) const*;
1234	EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState state) const*;
1235	EmitResult EmitImageQuerySize(InsnIterator insn, EmitState state) const*;
1236	EmitResult EmitImageQueryLod(InsnIterator insn, EmitState state) const*;
1237	EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState state) const*;
1238	EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState state) const*;
1239	EmitResult EmitImageRead(InsnIterator insn, EmitState state) const*;
1240	EmitResult EmitImageWrite(InsnIterator insn, EmitState state) const*;
1241	EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState state) const*;
1242	EmitResult EmitAtomicOp(InsnIterator insn, EmitState state) const*;
1243	EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState state) const*;
1244	EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState state) const*;
1245	EmitResult EmitCopyObject(InsnIterator insn, EmitState state) const*;
1246	EmitResult EmitCopyMemory(InsnIterator insn, EmitState state) const*;
1247	EmitResult EmitControlBarrier(InsnIterator insn, EmitState state) const*;
1248	EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState state) const*;
1249	EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState state) const*;
1250	EmitResult EmitArrayLength(InsnIterator insn, EmitState state) const*;
1251
1252	void GetImageDimensions(EmitState const state, Type const* &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
1253	SIMD::Pointer GetTexelAddress(EmitState const state, SIMD::Pointer base, GenericValue const* & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
1254	uint32_t GetConstScalarInt(Object::ID id) const;
1255	void EvalSpecConstantOp(InsnIterator insn);
1256	void EvalSpecConstantUnaryOp(InsnIterator insn);
1257	void EvalSpecConstantBinaryOp(InsnIterator insn);
1258
1259	// LoadPhi loads the phi values from the alloca storage and places the
1260	// load values into the intermediate with the phi's result id.
1261	void LoadPhi(InsnIterator insn, EmitState state) const*;
1262
1263	// StorePhi updates the phi's alloca storage value using the incoming
1264	// values from blocks that are both in the OpPhi instruction and in
1265	// filter.
1266	void StorePhi(Block::ID blockID, InsnIterator insn, EmitState state, std::unordered_set<SpirvShader::Block::ID> const& filter) const*;
1267
1268	// Emits a rr::Fence for the given MemorySemanticsMask.
1269	void Fence(spv::MemorySemanticsMask semantics) const;
1270
1271	// Helper for calling rr::Yield with res cast to an rr::Int.
1272	void Yield(YieldResult res) const;
1273
1274	// OpcodeName() returns the name of the opcode op.
1275	// If NDEBUG is defined, then OpcodeName() will only return the numerical code.
1276	static std::string OpcodeName(spv::Op op);
1277	static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
1278
1279	// Helper as we often need to take dot products as part of doing other things.
1280	SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
1281
1282	SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
1283
1284	// Splits x into a floating-point significand in the range [0.5, 1.0)
1285	// and an integral exponent of two, such that:
1286	// x = significand 2^exponent*
1287	// Returns the pair <significand, exponent>
1288	std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
1289
1290	static ImageSampler getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const* imageDescriptor, const* vk::Sampler *sampler);
1291	static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
1292
1293	// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
1294	static sw::FilterType convertFilterMode(const vk::Sampler *sampler);
1295	static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
1296	static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType);
1297
1298	// Returns 0 when invalid.
1299	static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
1300	};
1301
1302	class SpirvRoutine
1303	{
1304	public:
1305	SpirvRoutine(vk::PipelineLayout const *pipelineLayout);
1306
1307	using Variable = Array<SIMD::Float>;
1308
1309	struct SamplerCache
1310	{
1311	Pointer<Byte> imageDescriptor = nullptr;
1312	Pointer<Byte> sampler;
1313	Pointer<Byte> function;
1314	};
1315
1316	vk::PipelineLayout const * const pipelineLayout;
1317
1318	std::unordered_map<SpirvShader::Object::ID, Variable> variables;
1319	std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
1320	Variable inputs = Variable {MAX_INTERFACE_COMPONENTS};
1321	Variable outputs = Variable {MAX_INTERFACE_COMPONENTS};
1322
1323	Pointer<Byte> workgroupMemory;
1324	Pointer<Pointer<Byte>> descriptorSets;
1325	Pointer<Int> descriptorDynamicOffsets;
1326	Pointer<Byte> pushConstants;
1327	Pointer<Byte> constants;
1328	Int killMask = Int {`0`};
1329	SIMD::Int windowSpacePosition[`2`];
1330	Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
1331
1332	void createVariable(SpirvShader::Object::ID id, uint32_t size)
1333	{
1334	bool added = variables.emplace(id, Variable (size)).second;
1335	ASSERT_MSG(added, "Variable %d created twice", id.value());
1336	}
1337
1338	Variable& getVariable(SpirvShader::Object::ID id)
1339	{
1340	auto it = variables.find(id);
1341	ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
1342	return it ->second;
1343	}
1344
1345	// setImmutableInputBuiltins() sets all the immutable input builtins,
1346	// common for all shader types.
1347	void setImmutableInputBuiltins(SpirvShader const *shader);
1348
1349	// setInputBuiltin() calls f() with the builtin and value if the shader
1350	// uses the input builtin, otherwise the call is a no-op.
1351	// F is a function with the signature:
1352	// void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
1353	template <typename F>
1354	inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f)
1355	{
1356	auto it = shader->inputBuiltins.find(id);
1357	if (it != shader->inputBuiltins.end())
1358	{
1359	const auto& builtin = it ->second;
1360	f(builtin, getVariable(builtin.Id));
1361	}
1362	}
1363
1364	private:
1365	// The phis are only accessible to SpirvShader as they are only used and
1366	// exist between calls to SpirvShader::emitProlog() and
1367	// SpirvShader::emitEpilog().
1368	friend class SpirvShader;
1369
1370	std::unordered_map<SpirvShader::Object::ID, Variable> phis;
1371
1372	};
1373
1374	}
1375
1376	#endif // sw_SpirvShader_hpp
1377

Browse the source code of engine/third_party/swiftshader/src/Pipeline/SpirvShader.hpp