1// Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_SpirvShader_hpp
16#define sw_SpirvShader_hpp
17
18#include "ShaderCore.hpp"
19#include "SamplerCore.hpp"
20#include "SpirvID.hpp"
21#include "System/Types.hpp"
22#include "Vulkan/VkDebug.hpp"
23#include "Vulkan/VkConfig.h"
24#include "Vulkan/VkDescriptorSet.hpp"
25#include "Common/Types.hpp"
26#include "Device/Config.hpp"
27#include "Device/Sampler.hpp"
28
29#include <spirv/unified1/spirv.hpp>
30
31#include <array>
32#include <atomic>
33#include <cstdint>
34#include <cstring>
35#include <functional>
36#include <memory>
37#include <deque>
38#include <string>
39#include <type_traits>
40#include <unordered_map>
41#include <unordered_set>
42#include <vector>
43
44#undef Yield // b/127920555
45
46namespace vk
47{
48 class PipelineLayout;
49 class ImageView;
50 class Sampler;
51 class RenderPass;
52 struct SampledImageDescriptor;
53} // namespace vk
54
55namespace sw
56{
57 // Forward declarations.
58 class SpirvRoutine;
59
60 enum class OutOfBoundsBehavior
61 {
62 Nullify, // Loads become zero, stores are elided.
63 RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
64 UndefinedValue, // Only for load operations. Not secure. No program termination.
65 UndefinedBehavior, // Program may terminate.
66 };
67
68 // SIMD contains types that represent multiple scalars packed into a single
69 // vector data type. Types in the SIMD namespace provide a semantic hint
70 // that the data should be treated as a per-execution-lane scalar instead of
71 // a typical euclidean-style vector type.
72 namespace SIMD
73 {
74 // Width is the number of per-lane scalars packed into each SIMD vector.
75 static constexpr int Width = 4;
76
77 using Float = rr::Float4;
78 using Int = rr::Int4;
79 using UInt = rr::UInt4;
80
81 struct Pointer
82 {
83 Pointer(rr::Pointer<Byte> base, rr::Int limit)
84 : base(base),
85 dynamicLimit(limit), staticLimit(0),
86 dynamicOffsets(0), staticOffsets{},
87 hasDynamicLimit(true), hasDynamicOffsets(false) {}
88
89 Pointer(rr::Pointer<Byte> base, unsigned int limit)
90 : base(base),
91 dynamicLimit(0), staticLimit(limit),
92 dynamicOffsets(0), staticOffsets{},
93 hasDynamicLimit(false), hasDynamicOffsets(false) {}
94
95 Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
96 : base(base),
97 dynamicLimit(limit), staticLimit(0),
98 dynamicOffsets(offset), staticOffsets{},
99 hasDynamicLimit(true), hasDynamicOffsets(true) {}
100
101 Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
102 : base(base),
103 dynamicLimit(0), staticLimit(limit),
104 dynamicOffsets(offset), staticOffsets{},
105 hasDynamicLimit(false), hasDynamicOffsets(true) {}
106
107 inline Pointer& operator += (Int i)
108 {
109 dynamicOffsets += i;
110 hasDynamicOffsets = true;
111 return *this;
112 }
113
114 inline Pointer& operator *= (Int i)
115 {
116 dynamicOffsets = offsets() * i;
117 staticOffsets = {};
118 hasDynamicOffsets = true;
119 return *this;
120 }
121
122 inline Pointer operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; }
123 inline Pointer operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; }
124
125 inline Pointer& operator += (int i)
126 {
127 for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
128 return *this;
129 }
130
131 inline Pointer& operator *= (int i)
132 {
133 for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; }
134 if (hasDynamicOffsets)
135 {
136 dynamicOffsets *= SIMD::Int(i);
137 }
138 return *this;
139 }
140
141 inline Pointer operator + (int i) { Pointer p = *this; p += i; return p; }
142 inline Pointer operator * (int i) { Pointer p = *this; p *= i; return p; }
143
144 inline SIMD::Int offsets() const
145 {
146 static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
147 return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]);
148 }
149
150 inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
151 {
152 ASSERT(accessSize > 0);
153
154 if (isStaticallyInBounds(accessSize, robustness))
155 {
156 return SIMD::Int(0xffffffff);
157 }
158
159 if (!hasDynamicOffsets && !hasDynamicLimit)
160 {
161 // Common fast paths.
162 static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
163 return SIMD::Int(
164 (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
165 (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
166 (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
167 (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0);
168 }
169
170 return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
171 }
172
173 inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
174 {
175 if (hasDynamicOffsets)
176 {
177 return false;
178 }
179
180 if (hasDynamicLimit)
181 {
182 if (hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
183 {
184 switch(robustness)
185 {
186 case OutOfBoundsBehavior::UndefinedBehavior:
187 // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
188 // but since it can't know in advance which branches are taken this must be true even for inactives lanes.
189 return true;
190 case OutOfBoundsBehavior::Nullify:
191 case OutOfBoundsBehavior::RobustBufferAccess:
192 case OutOfBoundsBehavior::UndefinedValue:
193 return false;
194 }
195 }
196 }
197
198 for (int i = 0; i < SIMD::Width; i++)
199 {
200 if (staticOffsets[i] + accessSize - 1 >= staticLimit)
201 {
202 return false;
203 }
204 }
205
206 return true;
207 }
208
209 inline Int limit() const
210 {
211 return dynamicLimit + staticLimit;
212 }
213
214 // Returns true if all offsets are sequential
215 // (N+0*step, N+1*step, N+2*step, N+3*step)
216 inline rr::Bool hasSequentialOffsets(unsigned int step) const
217 {
218 if (hasDynamicOffsets)
219 {
220 auto o = offsets();
221 static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
222 return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0;
223 }
224 return hasStaticSequentialOffsets(step);
225 }
226
227 // Returns true if all offsets are are compile-time static and
228 // sequential (N+0*step, N+1*step, N+2*step, N+3*step)
229 inline bool hasStaticSequentialOffsets(unsigned int step) const
230 {
231 if (hasDynamicOffsets)
232 {
233 return false;
234 }
235 for (int i = 1; i < SIMD::Width; i++)
236 {
237 if (staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; }
238 }
239 return true;
240 }
241
242 // Returns true if all offsets are equal (N, N, N, N)
243 inline rr::Bool hasEqualOffsets() const
244 {
245 if (hasDynamicOffsets)
246 {
247 auto o = offsets();
248 static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
249 return rr::SignMask(~CmpEQ(o, o.yzwx)) == 0;
250 }
251 return hasStaticEqualOffsets();
252 }
253
254 // Returns true if all offsets are compile-time static and are equal
255 // (N, N, N, N)
256 inline bool hasStaticEqualOffsets() const
257 {
258 if (hasDynamicOffsets)
259 {
260 return false;
261 }
262 for (int i = 1; i < SIMD::Width; i++)
263 {
264 if (staticOffsets[i-1] != staticOffsets[i]) { return false; }
265 }
266 return true;
267 }
268
269 // Base address for the pointer, common across all lanes.
270 rr::Pointer<rr::Byte> base;
271
272 // Upper (non-inclusive) limit for offsets from base.
273 rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
274 unsigned int staticLimit;
275
276 // Per lane offsets from base.
277 SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
278 std::array<int32_t, SIMD::Width> staticOffsets;
279
280 bool hasDynamicLimit; // True if dynamicLimit is non-zero.
281 bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero.
282 };
283
284 template <typename T> struct Element {};
285 template <> struct Element<Float> { using type = rr::Float; };
286 template <> struct Element<Int> { using type = rr::Int; };
287 template <> struct Element<UInt> { using type = rr::UInt; };
288
289 template<typename T>
290 void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
291
292 template<typename T>
293 void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
294 {
295 Store(ptr, T(val), robustness, mask, atomic, order);
296 }
297
298 template<typename T>
299 T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
300 }
301
302 // Incrementally constructed complex bundle of rvalues
303 // Effectively a restricted vector, supporting only:
304 // - allocation to a (runtime-known) fixed size
305 // - in-place construction of elements
306 // - const operator[]
307 class Intermediate
308 {
309 public:
310 Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) {
311 memset(scalar, 0, sizeof(rr::Value*) * size);
312 }
313
314 ~Intermediate()
315 {
316 delete[] scalar;
317 }
318
319 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); }
320 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); }
321 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); }
322
323 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); }
324 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); }
325 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); }
326
327 // Value retrieval functions.
328 RValue<SIMD::Float> Float(uint32_t i) const
329 {
330 ASSERT(i < size);
331 ASSERT(scalar[i] != nullptr);
332 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar)
333 }
334
335 RValue<SIMD::Int> Int(uint32_t i) const
336 {
337 ASSERT(i < size);
338 ASSERT(scalar[i] != nullptr);
339 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar)
340 }
341
342 RValue<SIMD::UInt> UInt(uint32_t i) const
343 {
344 ASSERT(i < size);
345 ASSERT(scalar[i] != nullptr);
346 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar)
347 }
348
349 // No copy/move construction or assignment
350 Intermediate(Intermediate const &) = delete;
351 Intermediate(Intermediate &&) = delete;
352 Intermediate & operator=(Intermediate const &) = delete;
353 Intermediate & operator=(Intermediate &&) = delete;
354
355 private:
356 void emplace(uint32_t i, rr::Value *value)
357 {
358 ASSERT(i < size);
359 ASSERT(scalar[i] == nullptr);
360 scalar[i] = value;
361 }
362
363 rr::Value **const scalar;
364 uint32_t size;
365 };
366
367 class SpirvShader
368 {
369 public:
370 using InsnStore = std::vector<uint32_t>;
371 InsnStore insns;
372
373 using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants);
374
375 enum class YieldResult
376 {
377 ControlBarrier,
378 };
379
380 /* Pseudo-iterator over SPIRV instructions, designed to support range-based-for. */
381 class InsnIterator
382 {
383 InsnStore::const_iterator iter;
384
385 public:
386 spv::Op opcode() const
387 {
388 return static_cast<spv::Op>(*iter & spv::OpCodeMask);
389 }
390
391 uint32_t wordCount() const
392 {
393 return *iter >> spv::WordCountShift;
394 }
395
396 uint32_t word(uint32_t n) const
397 {
398 ASSERT(n < wordCount());
399 return iter[n];
400 }
401
402 uint32_t const * wordPointer(uint32_t n) const
403 {
404 ASSERT(n < wordCount());
405 return &iter[n];
406 }
407
408 const char* string(uint32_t n) const
409 {
410 return reinterpret_cast<const char*>(wordPointer(n));
411 }
412
413 bool operator==(InsnIterator const &other) const
414 {
415 return iter == other.iter;
416 }
417
418 bool operator!=(InsnIterator const &other) const
419 {
420 return iter != other.iter;
421 }
422
423 InsnIterator operator*() const
424 {
425 return *this;
426 }
427
428 InsnIterator &operator++()
429 {
430 iter += wordCount();
431 return *this;
432 }
433
434 InsnIterator const operator++(int)
435 {
436 InsnIterator ret{*this};
437 iter += wordCount();
438 return ret;
439 }
440
441 InsnIterator(InsnIterator const &other) = default;
442
443 InsnIterator() = default;
444
445 explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter}
446 {
447 }
448 };
449
450 /* range-based-for interface */
451 InsnIterator begin() const
452 {
453 return InsnIterator{insns.cbegin() + 5};
454 }
455
456 InsnIterator end() const
457 {
458 return InsnIterator{insns.cend()};
459 }
460
461 class Type
462 {
463 public:
464 using ID = SpirvID<Type>;
465
466 spv::Op opcode() const { return definition.opcode(); }
467
468 InsnIterator definition;
469 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1);
470 uint32_t sizeInComponents = 0;
471 bool isBuiltInBlock = false;
472
473 // Inner element type for pointers, arrays, vectors and matrices.
474 ID element;
475 };
476
477 class Object
478 {
479 public:
480 using ID = SpirvID<Object>;
481
482 spv::Op opcode() const { return definition.opcode(); }
483
484 InsnIterator definition;
485 Type::ID type;
486 std::unique_ptr<uint32_t[]> constantValue = nullptr;
487
488 enum class Kind
489 {
490 // Invalid default kind.
491 // If we get left with an object in this state, the module was
492 // broken.
493 Unknown,
494
495 // TODO: Better document this kind.
496 // A shader interface variable pointer.
497 // Pointer with uniform address across all lanes.
498 // Pointer held by SpirvRoutine::pointers
499 InterfaceVariable,
500
501 // Constant value held by Object::constantValue.
502 Constant,
503
504 // Value held by SpirvRoutine::intermediates.
505 Intermediate,
506
507 // Pointer held by SpirvRoutine::pointers
508 Pointer,
509
510 // A pointer to a vk::DescriptorSet*.
511 // Pointer held by SpirvRoutine::pointers.
512 DescriptorSet,
513 };
514
515 Kind kind = Kind::Unknown;
516 };
517
518 // Block is an interval of SPIR-V instructions, starting with the
519 // opening OpLabel, and ending with a termination instruction.
520 class Block
521 {
522 public:
523 using ID = SpirvID<Block>;
524 using Set = std::unordered_set<ID>;
525
526 // Edge represents the graph edge between two blocks.
527 struct Edge
528 {
529 ID from;
530 ID to;
531
532 bool operator == (const Edge& other) const { return from == other.from && to == other.to; }
533
534 struct Hash
535 {
536 std::size_t operator()(const Edge& edge) const noexcept
537 {
538 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value());
539 }
540 };
541 };
542
543 Block() = default;
544 Block(const Block& other) = default;
545 explicit Block(InsnIterator begin, InsnIterator end);
546
547 /* range-based-for interface */
548 inline InsnIterator begin() const { return begin_; }
549 inline InsnIterator end() const { return end_; }
550
551 enum Kind
552 {
553 Simple, // OpBranch or other simple terminator.
554 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional
555 UnstructuredBranchConditional, // OpBranchConditional
556 StructuredSwitch, // OpSelectionMerge + OpSwitch
557 UnstructuredSwitch, // OpSwitch
558 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch]
559 };
560
561 Kind kind = Simple;
562 InsnIterator mergeInstruction; // Structured control flow merge instruction.
563 InsnIterator branchInstruction; // Branch instruction.
564 ID mergeBlock; // Structured flow merge block.
565 ID continueTarget; // Loop continue block.
566 Set ins; // Blocks that branch into this block.
567 Set outs; // Blocks that this block branches to.
568 bool isLoopMerge = false;
569 private:
570 InsnIterator begin_;
571 InsnIterator end_;
572 };
573
574 class Function
575 {
576 public:
577 using ID = SpirvID<Function>;
578
579 // Walks all reachable the blocks starting from id adding them to
580 // reachable.
581 void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const;
582
583 // AssignBlockFields() performs the following for all reachable blocks:
584 // * Assigns Block::ins with the identifiers of all blocks that contain
585 // this block in their Block::outs.
586 // * Sets Block::isLoopMerge to true if the block is the merge of a
587 // another loop block.
588 void AssignBlockFields();
589
590 // ForeachBlockDependency calls f with each dependency of the given
591 // block. A dependency is an incoming block that is not a loop-back
592 // edge.
593 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
594
595 // ExistsPath returns true if there's a direct or indirect flow from
596 // the 'from' block to the 'to' block that does not pass through
597 // notPassingThrough.
598 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const;
599
600 Block const &getBlock(Block::ID id) const
601 {
602 auto it = blocks.find(id);
603 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value());
604 return it->second;
605 }
606
607 Block::ID entry; // function entry point block.
608 HandleMap<Block> blocks; // blocks belonging to this function.
609 Type::ID type; // type of the function.
610 Type::ID result; // return type.
611 };
612
613 struct TypeOrObject {}; // Dummy struct to represent a Type or Object.
614
615 // TypeOrObjectID is an identifier that represents a Type or an Object,
616 // and supports implicit casting to and from Type::ID or Object::ID.
617 class TypeOrObjectID : public SpirvID<TypeOrObject>
618 {
619 public:
620 using Hash = std::hash<SpirvID<TypeOrObject>>;
621
622 inline TypeOrObjectID(uint32_t id) : SpirvID(id) {}
623 inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {}
624 inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {}
625 inline operator Type::ID() const { return Type::ID(value()); }
626 inline operator Object::ID() const { return Object::ID(value()); }
627 };
628
629 // OpImageSample variants
630 enum Variant
631 {
632 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod.
633 Dref,
634 Proj,
635 ProjDref,
636 VARIANT_LAST = ProjDref
637 };
638
639 // Compact representation of image instruction parameters that is passed to the
640 // trampoline function for retrieving/generating the corresponding sampling routine.
641 struct ImageInstruction
642 {
643 ImageInstruction(Variant variant, SamplerMethod samplerMethod)
644 : parameters(0)
645 {
646 this->variant = variant;
647 this->samplerMethod = samplerMethod;
648 }
649
650 // Unmarshal from raw 32-bit data
651 ImageInstruction(uint32_t parameters) : parameters(parameters) {}
652
653 SamplerFunction getSamplerFunction() const
654 {
655 return { static_cast<SamplerMethod>(samplerMethod), offset != 0, sample != 0 };
656 }
657
658 bool isDref() const
659 {
660 return (variant == Dref) || (variant == ProjDref);
661 }
662
663 bool isProj() const
664 {
665 return (variant == Proj) || (variant == ProjDref);
666 }
667
668 union
669 {
670 struct
671 {
672 uint32_t variant : BITS(VARIANT_LAST);
673 uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST);
674 uint32_t gatherComponent : 2;
675
676 // Parameters are passed to the sampling routine in this order:
677 uint32_t coordinates : 3; // 1-4 (does not contain projection component)
678 // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref
679 // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch
680 uint32_t grad : 2; // 0-3 components (for each of dx / dy)
681 uint32_t offset : 2; // 0-3 components
682 uint32_t sample : 1; // 0-1 scalar integer
683 };
684
685 uint32_t parameters;
686 };
687 };
688
689 static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit");
690
691 // This method is for retrieving an ID that uniquely identifies the
692 // shader entry point represented by this object.
693 uint64_t getSerialID() const
694 {
695 return ((uint64_t)entryPoint.value() << 32) | codeSerialID;
696 }
697
698 SpirvShader(uint32_t codeSerialID,
699 VkShaderStageFlagBits stage,
700 const char *entryPointName,
701 InsnStore const &insns,
702 const vk::RenderPass *renderPass,
703 uint32_t subpassIndex,
704 bool robustBufferAccess);
705
706 struct Modes
707 {
708 bool EarlyFragmentTests : 1;
709 bool DepthReplacing : 1;
710 bool DepthGreater : 1;
711 bool DepthLess : 1;
712 bool DepthUnchanged : 1;
713 bool ContainsKill : 1;
714 bool ContainsControlBarriers : 1;
715 bool NeedsCentroid : 1;
716
717 // Compute workgroup dimensions
718 int WorkgroupSizeX = 1, WorkgroupSizeY = 1, WorkgroupSizeZ = 1;
719 };
720
721 Modes const &getModes() const
722 {
723 return modes;
724 }
725
726 struct Capabilities
727 {
728 bool Matrix : 1;
729 bool Shader : 1;
730 bool InputAttachment : 1;
731 bool Sampled1D : 1;
732 bool Image1D : 1;
733 bool SampledBuffer : 1;
734 bool ImageBuffer : 1;
735 bool ImageQuery : 1;
736 bool DerivativeControl : 1;
737 bool GroupNonUniform : 1;
738 bool MultiView : 1;
739 bool DeviceGroup : 1;
740 bool GroupNonUniformVote : 1;
741 bool GroupNonUniformBallot : 1;
742 bool GroupNonUniformShuffle : 1;
743 bool GroupNonUniformShuffleRelative : 1;
744 bool StorageImageExtendedFormats : 1;
745 };
746
747 Capabilities const &getUsedCapabilities() const
748 {
749 return capabilities;
750 }
751
752 enum AttribType : unsigned char
753 {
754 ATTRIBTYPE_FLOAT,
755 ATTRIBTYPE_INT,
756 ATTRIBTYPE_UINT,
757 ATTRIBTYPE_UNUSED,
758
759 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT
760 };
761
762 bool hasBuiltinInput(spv::BuiltIn b) const
763 {
764 return inputBuiltins.find(b) != inputBuiltins.end();
765 }
766
767 bool hasBuiltinOutput(spv::BuiltIn b) const
768 {
769 return outputBuiltins.find(b) != outputBuiltins.end();
770 }
771
772 struct Decorations
773 {
774 int32_t Location = -1;
775 int32_t Component = 0;
776 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1);
777 int32_t Offset = -1;
778 int32_t ArrayStride = -1;
779 int32_t MatrixStride = 1;
780
781 bool HasLocation : 1;
782 bool HasComponent : 1;
783 bool HasBuiltIn : 1;
784 bool HasOffset : 1;
785 bool HasArrayStride : 1;
786 bool HasMatrixStride : 1;
787 bool HasRowMajor : 1; // whether RowMajor bit is valid.
788
789 bool Flat : 1;
790 bool Centroid : 1;
791 bool NoPerspective : 1;
792 bool Block : 1;
793 bool BufferBlock : 1;
794 bool RelaxedPrecision : 1;
795 bool RowMajor : 1; // RowMajor if true; ColMajor if false
796 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix.
797
798 Decorations()
799 : Location{-1}, Component{0},
800 BuiltIn{static_cast<spv::BuiltIn>(-1)},
801 Offset{-1}, ArrayStride{-1}, MatrixStride{-1},
802 HasLocation{false}, HasComponent{false},
803 HasBuiltIn{false}, HasOffset{false},
804 HasArrayStride{false}, HasMatrixStride{false},
805 HasRowMajor{false},
806 Flat{false}, Centroid{false}, NoPerspective{false},
807 Block{false}, BufferBlock{false},
808 RelaxedPrecision{false}, RowMajor{false},
809 InsideMatrix{false}
810 {
811 }
812
813 Decorations(Decorations const &) = default;
814
815 void Apply(Decorations const &src);
816
817 void Apply(spv::Decoration decoration, uint32_t arg);
818 };
819
820 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations;
821 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations;
822
823 struct DescriptorDecorations
824 {
825 int32_t DescriptorSet = -1;
826 int32_t Binding = -1;
827 int32_t InputAttachmentIndex = -1;
828
829 void Apply(DescriptorDecorations const &src);
830 };
831
832 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations;
833 std::vector<VkFormat> inputAttachmentFormats;
834
835 struct InterfaceComponent
836 {
837 AttribType Type;
838
839 union
840 {
841 struct
842 {
843 bool Flat : 1;
844 bool Centroid : 1;
845 bool NoPerspective : 1;
846 };
847
848 uint8_t DecorationBits;
849 };
850
851 InterfaceComponent()
852 : Type{ATTRIBTYPE_UNUSED}, DecorationBits{0}
853 {
854 }
855 };
856
857 struct BuiltinMapping
858 {
859 Object::ID Id;
860 uint32_t FirstComponent;
861 uint32_t SizeInComponents;
862 };
863
864 struct WorkgroupMemory
865 {
866 // allocates a new variable of size bytes with the given identifier.
867 inline void allocate(Object::ID id, uint32_t size)
868 {
869 uint32_t offset = totalSize;
870 auto it = offsets.emplace(id, offset);
871 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value()));
872 totalSize += size;
873 }
874 // returns the byte offset of the variable with the given identifier.
875 inline uint32_t offsetOf(Object::ID id) const
876 {
877 auto it = offsets.find(id);
878 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value()));
879 return it->second;
880 }
881 // returns the total allocated size in bytes.
882 inline uint32_t size() const { return totalSize; }
883 private:
884 uint32_t totalSize = 0; // in bytes
885 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes
886 };
887
888 std::vector<InterfaceComponent> inputs;
889 std::vector<InterfaceComponent> outputs;
890
891 void emitProlog(SpirvRoutine *routine) const;
892 void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const;
893 void emitEpilog(SpirvRoutine *routine) const;
894
895 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>;
896 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins;
897 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins;
898 WorkgroupMemory workgroupMemory;
899
900 private:
901 const uint32_t codeSerialID;
902 Modes modes = {};
903 Capabilities capabilities = {};
904 HandleMap<Type> types;
905 HandleMap<Object> defs;
906 HandleMap<Function> functions;
907 Function::ID entryPoint;
908
909 const bool robustBufferAccess = true;
910 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
911
912 // DeclareType creates a Type for the given OpTypeX instruction, storing
913 // it into the types map. It is called from the analysis pass (constructor).
914 void DeclareType(InsnIterator insn);
915
916 void ProcessExecutionMode(InsnIterator it);
917
918 uint32_t ComputeTypeSize(InsnIterator insn);
919 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const;
920 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const;
921 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const;
922
923 // Creates an Object for the instruction's result in 'defs'.
924 void DefineResult(const InsnIterator &insn);
925
926 // Returns true if data in the given storage class is word-interleaved
927 // by each SIMD vector lane, otherwise data is stored linerally.
928 //
929 // Each lane addresses a single word, picked by a base pointer and an
930 // integer offset.
931 //
932 // A word is currently 32 bits (single float, int32_t, uint32_t).
933 // A lane is a single element of a SIMD vector register.
934 //
935 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true):
936 // ---------------------------------------------------------------------
937 //
938 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex)
939 //
940 // Assuming SIMD::Width == 4:
941 //
942 // Lane[0] | Lane[1] | Lane[2] | Lane[3]
943 // ===========+===========+===========+==========
944 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3]
945 // ---------------+-----------+-----------+-----------+----------
946 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7]
947 // ---------------+-----------+-----------+-----------+----------
948 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b]
949 // ---------------+-----------+-----------+-----------+----------
950 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f]
951 //
952 //
953 // Linear storage - (IsStorageInterleavedByLane() == false):
954 // ---------------------------------------------------------
955 //
956 // Address = PtrBase + sizeof(Word) * LaneOffset
957 //
958 // Lane[0] | Lane[1] | Lane[2] | Lane[3]
959 // ===========+===========+===========+==========
960 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0]
961 // ---------------+-----------+-----------+-----------+----------
962 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1]
963 // ---------------+-----------+-----------+-----------+----------
964 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2]
965 // ---------------+-----------+-----------+-----------+----------
966 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3]
967 //
968 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
969 static bool IsExplicitLayout(spv::StorageClass storageClass);
970
971 // Output storage buffers and images should not be affected by helper invocations
972 static bool StoresInHelperInvocation(spv::StorageClass storageClass);
973
974 template<typename F>
975 int VisitInterfaceInner(Type::ID id, Decorations d, F f) const;
976
977 template<typename F>
978 void VisitInterface(Object::ID id, F f) const;
979
980 template<typename F>
981 void VisitMemoryObject(Object::ID id, F f) const;
982
983 template<typename F>
984 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, F f) const;
985
986 Object& CreateConstant(InsnIterator it);
987
988 void ProcessInterfaceVariable(Object &object);
989
990 // EmitState holds control-flow state for the emit() pass.
991 class EmitState
992 {
993 public:
994 EmitState(SpirvRoutine *routine,
995 Function::ID function,
996 RValue<SIMD::Int> activeLaneMask,
997 RValue<SIMD::Int> storesAndAtomicsMask,
998 const vk::DescriptorSet::Bindings &descriptorSets,
999 bool robustBufferAccess,
1000 spv::ExecutionModel executionModel)
1001 : routine(routine),
1002 function(function),
1003 activeLaneMaskValue(activeLaneMask.value),
1004 storesAndAtomicsMaskValue(storesAndAtomicsMask.value),
1005 descriptorSets(descriptorSets),
1006 robustBufferAccess(robustBufferAccess),
1007 executionModel(executionModel)
1008 {
1009 ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting.
1010 }
1011
1012 RValue<SIMD::Int> activeLaneMask() const
1013 {
1014 ASSERT(activeLaneMaskValue != nullptr);
1015 return RValue<SIMD::Int>(activeLaneMaskValue);
1016 }
1017
1018 RValue<SIMD::Int> storesAndAtomicsMask() const
1019 {
1020 ASSERT(storesAndAtomicsMaskValue != nullptr);
1021 return RValue<SIMD::Int>(storesAndAtomicsMaskValue);
1022 }
1023
1024 void setActiveLaneMask(RValue<SIMD::Int> mask)
1025 {
1026 activeLaneMaskValue = mask.value;
1027 }
1028
1029 // Add a new active lane mask edge from the current block to out.
1030 // The edge mask value will be (mask AND activeLaneMaskValue).
1031 // If multiple active lane masks are added for the same edge, then
1032 // they will be ORed together.
1033 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask);
1034
1035 // Add a new active lane mask for the edge from -> to.
1036 // If multiple active lane masks are added for the same edge, then
1037 // they will be ORed together.
1038 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask);
1039
1040 SpirvRoutine *routine = nullptr; // The current routine being built.
1041 Function::ID function; // The current function being built.
1042 Block::ID block; // The current block being built.
1043 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask.
1044 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask.
1045 Block::Set visited; // Blocks already built.
1046 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
1047 std::deque<Block::ID> *pending;
1048
1049 const vk::DescriptorSet::Bindings &descriptorSets;
1050
1051 OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
1052
1053 Intermediate& createIntermediate(Object::ID id, uint32_t size)
1054 {
1055 auto it = intermediates.emplace(std::piecewise_construct,
1056 std::forward_as_tuple(id),
1057 std::forward_as_tuple(size));
1058 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value());
1059 return it.first->second;
1060 }
1061
1062 Intermediate const& getIntermediate(Object::ID id) const
1063 {
1064 auto it = intermediates.find(id);
1065 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value());
1066 return it->second;
1067 }
1068
1069 void createPointer(Object::ID id, SIMD::Pointer ptr)
1070 {
1071 bool added = pointers.emplace(id, ptr).second;
1072 ASSERT_MSG(added, "Pointer %d created twice", id.value());
1073 }
1074
1075 SIMD::Pointer const& getPointer(Object::ID id) const
1076 {
1077 auto it = pointers.find(id);
1078 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
1079 return it->second;
1080 }
1081
1082 private:
1083 std::unordered_map<Object::ID, Intermediate> intermediates;
1084 std::unordered_map<Object::ID, SIMD::Pointer> pointers;
1085
1086 const bool robustBufferAccess = true; // Emit robustBufferAccess safe code.
1087 const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
1088 };
1089
1090 // EmitResult is an enumerator of result values from the Emit functions.
1091 enum class EmitResult
1092 {
1093 Continue, // No termination instructions.
1094 Terminator, // Reached a termination instruction.
1095 };
1096
1097 // Generic wrapper over either per-lane intermediate value, or a constant.
1098 // Constants are transparently widened to per-lane values in operator[].
1099 // This is appropriate in most cases -- if we're not going to do something
1100 // significantly different based on whether the value is uniform across lanes.
1101 class GenericValue
1102 {
1103 SpirvShader::Object const &obj;
1104 Intermediate const *intermediate;
1105
1106 public:
1107 GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId);
1108
1109 RValue<SIMD::Float> Float(uint32_t i) const
1110 {
1111 if (intermediate)
1112 {
1113 return intermediate->Float(i);
1114 }
1115
1116 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact
1117 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant".
1118 // Thus we must first construct an integer constant, and bitcast to float.
1119 auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get());
1120 return As<SIMD::Float>(SIMD::UInt(constantValue[i]));
1121 }
1122
1123 RValue<SIMD::Int> Int(uint32_t i) const
1124 {
1125 if (intermediate)
1126 {
1127 return intermediate->Int(i);
1128 }
1129 auto constantValue = reinterpret_cast<int *>(obj.constantValue.get());
1130 return SIMD::Int(constantValue[i]);
1131 }
1132
1133 RValue<SIMD::UInt> UInt(uint32_t i) const
1134 {
1135 if (intermediate)
1136 {
1137 return intermediate->UInt(i);
1138 }
1139 auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get());
1140 return SIMD::UInt(constantValue[i]);
1141 }
1142
1143 SpirvShader::Type::ID const type;
1144 };
1145
1146 Type const &getType(Type::ID id) const
1147 {
1148 auto it = types.find(id);
1149 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value());
1150 return it->second;
1151 }
1152
1153 Object const &getObject(Object::ID id) const
1154 {
1155 auto it = defs.find(id);
1156 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value());
1157 return it->second;
1158 }
1159
1160 Function const &getFunction(Function::ID id) const
1161 {
1162 auto it = functions.find(id);
1163 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value());
1164 return it->second;
1165 }
1166
1167 // Returns a SIMD::Pointer to the underlying data for the given pointer
1168 // object.
1169 // Handles objects of the following kinds:
1170 // • DescriptorSet
1171 // • DivergentPointer
1172 // • InterfaceVariable
1173 // • NonDivergentPointer
1174 // Calling GetPointerToData with objects of any other kind will assert.
1175 SIMD::Pointer GetPointerToData(Object::ID id, int arrayIndex, EmitState const *state) const;
1176
1177 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1178 SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const;
1179
1180 // Returns the *component* offset in the literal for the given access chain.
1181 uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
1182
1183 // Lookup the active lane mask for the edge from -> to.
1184 // If from is unreachable, then a mask of all zeros is returned.
1185 // Asserts if from is reachable and the edge does not exist.
1186 RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const;
1187
1188 // Emit all the unvisited blocks (except for ignore) in DFS order,
1189 // starting with id.
1190 void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const;
1191 void EmitNonLoop(EmitState *state) const;
1192 void EmitLoop(EmitState *state) const;
1193
1194 void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const;
1195 EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const;
1196
1197 // Emit pass instructions:
1198 EmitResult EmitVariable(InsnIterator insn, EmitState *state) const;
1199 EmitResult EmitLoad(InsnIterator insn, EmitState *state) const;
1200 EmitResult EmitStore(InsnIterator insn, EmitState *state) const;
1201 EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const;
1202 EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const;
1203 EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const;
1204 EmitResult EmitCompositeExtract(InsnIterator insn, EmitState *state) const;
1205 EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const;
1206 EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const;
1207 EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const;
1208 EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const;
1209 EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const;
1210 EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const;
1211 EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const;
1212 EmitResult EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const;
1213 EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const;
1214 EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const;
1215 EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const;
1216 EmitResult EmitDot(InsnIterator insn, EmitState *state) const;
1217 EmitResult EmitSelect(InsnIterator insn, EmitState *state) const;
1218 EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const;
1219 EmitResult EmitAny(InsnIterator insn, EmitState *state) const;
1220 EmitResult EmitAll(InsnIterator insn, EmitState *state) const;
1221 EmitResult EmitBranch(InsnIterator insn, EmitState *state) const;
1222 EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const;
1223 EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const;
1224 EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const;
1225 EmitResult EmitReturn(InsnIterator insn, EmitState *state) const;
1226 EmitResult EmitKill(InsnIterator insn, EmitState *state) const;
1227 EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const;
1228 EmitResult EmitPhi(InsnIterator insn, EmitState *state) const;
1229 EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
1230 EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const;
1231 EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const;
1232 EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const;
1233 EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const;
1234 EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const;
1235 EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const;
1236 EmitResult EmitImageQueryLod(InsnIterator insn, EmitState *state) const;
1237 EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const;
1238 EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const;
1239 EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const;
1240 EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const;
1241 EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const;
1242 EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
1243 EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
1244 EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
1245 EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const;
1246 EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
1247 EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const;
1248 EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
1249 EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
1250 EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const;
1251
1252 void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
1253 SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
1254 uint32_t GetConstScalarInt(Object::ID id) const;
1255 void EvalSpecConstantOp(InsnIterator insn);
1256 void EvalSpecConstantUnaryOp(InsnIterator insn);
1257 void EvalSpecConstantBinaryOp(InsnIterator insn);
1258
1259 // LoadPhi loads the phi values from the alloca storage and places the
1260 // load values into the intermediate with the phi's result id.
1261 void LoadPhi(InsnIterator insn, EmitState *state) const;
1262
1263 // StorePhi updates the phi's alloca storage value using the incoming
1264 // values from blocks that are both in the OpPhi instruction and in
1265 // filter.
1266 void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const;
1267
1268 // Emits a rr::Fence for the given MemorySemanticsMask.
1269 void Fence(spv::MemorySemanticsMask semantics) const;
1270
1271 // Helper for calling rr::Yield with res cast to an rr::Int.
1272 void Yield(YieldResult res) const;
1273
1274 // OpcodeName() returns the name of the opcode op.
1275 // If NDEBUG is defined, then OpcodeName() will only return the numerical code.
1276 static std::string OpcodeName(spv::Op op);
1277 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
1278
1279 // Helper as we often need to take dot products as part of doing other things.
1280 SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
1281
1282 SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
1283
1284 // Splits x into a floating-point significand in the range [0.5, 1.0)
1285 // and an integral exponent of two, such that:
1286 // x = significand * 2^exponent
1287 // Returns the pair <significand, exponent>
1288 std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
1289
1290 static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
1291 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
1292
1293 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
1294 static sw::FilterType convertFilterMode(const vk::Sampler *sampler);
1295 static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
1296 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType);
1297
1298 // Returns 0 when invalid.
1299 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
1300 };
1301
1302 class SpirvRoutine
1303 {
1304 public:
1305 SpirvRoutine(vk::PipelineLayout const *pipelineLayout);
1306
1307 using Variable = Array<SIMD::Float>;
1308
1309 struct SamplerCache
1310 {
1311 Pointer<Byte> imageDescriptor = nullptr;
1312 Pointer<Byte> sampler;
1313 Pointer<Byte> function;
1314 };
1315
1316 vk::PipelineLayout const * const pipelineLayout;
1317
1318 std::unordered_map<SpirvShader::Object::ID, Variable> variables;
1319 std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache;
1320 Variable inputs = Variable{MAX_INTERFACE_COMPONENTS};
1321 Variable outputs = Variable{MAX_INTERFACE_COMPONENTS};
1322
1323 Pointer<Byte> workgroupMemory;
1324 Pointer<Pointer<Byte>> descriptorSets;
1325 Pointer<Int> descriptorDynamicOffsets;
1326 Pointer<Byte> pushConstants;
1327 Pointer<Byte> constants;
1328 Int killMask = Int{0};
1329 SIMD::Int windowSpacePosition[2];
1330 Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex
1331
1332 void createVariable(SpirvShader::Object::ID id, uint32_t size)
1333 {
1334 bool added = variables.emplace(id, Variable(size)).second;
1335 ASSERT_MSG(added, "Variable %d created twice", id.value());
1336 }
1337
1338 Variable& getVariable(SpirvShader::Object::ID id)
1339 {
1340 auto it = variables.find(id);
1341 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value());
1342 return it->second;
1343 }
1344
1345 // setImmutableInputBuiltins() sets all the immutable input builtins,
1346 // common for all shader types.
1347 void setImmutableInputBuiltins(SpirvShader const *shader);
1348
1349 // setInputBuiltin() calls f() with the builtin and value if the shader
1350 // uses the input builtin, otherwise the call is a no-op.
1351 // F is a function with the signature:
1352 // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
1353 template <typename F>
1354 inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f)
1355 {
1356 auto it = shader->inputBuiltins.find(id);
1357 if (it != shader->inputBuiltins.end())
1358 {
1359 const auto& builtin = it->second;
1360 f(builtin, getVariable(builtin.Id));
1361 }
1362 }
1363
1364 private:
1365 // The phis are only accessible to SpirvShader as they are only used and
1366 // exist between calls to SpirvShader::emitProlog() and
1367 // SpirvShader::emitEpilog().
1368 friend class SpirvShader;
1369
1370 std::unordered_map<SpirvShader::Object::ID, Variable> phis;
1371
1372 };
1373
1374}
1375
1376#endif // sw_SpirvShader_hpp
1377