1 | // Copyright 2018 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef sw_SpirvShader_hpp |
16 | #define sw_SpirvShader_hpp |
17 | |
18 | #include "ShaderCore.hpp" |
19 | #include "SamplerCore.hpp" |
20 | #include "SpirvID.hpp" |
21 | #include "System/Types.hpp" |
22 | #include "Vulkan/VkDebug.hpp" |
23 | #include "Vulkan/VkConfig.h" |
24 | #include "Vulkan/VkDescriptorSet.hpp" |
25 | #include "Common/Types.hpp" |
26 | #include "Device/Config.hpp" |
27 | #include "Device/Sampler.hpp" |
28 | |
29 | #include <spirv/unified1/spirv.hpp> |
30 | |
31 | #include <array> |
32 | #include <atomic> |
33 | #include <cstdint> |
34 | #include <cstring> |
35 | #include <functional> |
36 | #include <memory> |
37 | #include <deque> |
38 | #include <string> |
39 | #include <type_traits> |
40 | #include <unordered_map> |
41 | #include <unordered_set> |
42 | #include <vector> |
43 | |
44 | #undef Yield // b/127920555 |
45 | |
46 | namespace vk |
47 | { |
48 | class PipelineLayout; |
49 | class ImageView; |
50 | class Sampler; |
51 | class RenderPass; |
52 | struct SampledImageDescriptor; |
53 | } // namespace vk |
54 | |
55 | namespace sw |
56 | { |
57 | // Forward declarations. |
58 | class SpirvRoutine; |
59 | |
60 | enum class OutOfBoundsBehavior |
61 | { |
62 | Nullify, // Loads become zero, stores are elided. |
63 | RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing). |
64 | UndefinedValue, // Only for load operations. Not secure. No program termination. |
65 | UndefinedBehavior, // Program may terminate. |
66 | }; |
67 | |
68 | // SIMD contains types that represent multiple scalars packed into a single |
69 | // vector data type. Types in the SIMD namespace provide a semantic hint |
70 | // that the data should be treated as a per-execution-lane scalar instead of |
71 | // a typical euclidean-style vector type. |
72 | namespace SIMD |
73 | { |
74 | // Width is the number of per-lane scalars packed into each SIMD vector. |
75 | static constexpr int Width = 4; |
76 | |
77 | using Float = rr::Float4; |
78 | using Int = rr::Int4; |
79 | using UInt = rr::UInt4; |
80 | |
81 | struct Pointer |
82 | { |
83 | Pointer(rr::Pointer<Byte> base, rr::Int limit) |
84 | : base(base), |
85 | dynamicLimit(limit), staticLimit(0), |
86 | dynamicOffsets(0), staticOffsets{}, |
87 | hasDynamicLimit(true), hasDynamicOffsets(false) {} |
88 | |
89 | Pointer(rr::Pointer<Byte> base, unsigned int limit) |
90 | : base(base), |
91 | dynamicLimit(0), staticLimit(limit), |
92 | dynamicOffsets(0), staticOffsets{}, |
93 | hasDynamicLimit(false), hasDynamicOffsets(false) {} |
94 | |
95 | Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset) |
96 | : base(base), |
97 | dynamicLimit(limit), staticLimit(0), |
98 | dynamicOffsets(offset), staticOffsets{}, |
99 | hasDynamicLimit(true), hasDynamicOffsets(true) {} |
100 | |
101 | Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset) |
102 | : base(base), |
103 | dynamicLimit(0), staticLimit(limit), |
104 | dynamicOffsets(offset), staticOffsets{}, |
105 | hasDynamicLimit(false), hasDynamicOffsets(true) {} |
106 | |
107 | inline Pointer& operator += (Int i) |
108 | { |
109 | dynamicOffsets += i; |
110 | hasDynamicOffsets = true; |
111 | return *this; |
112 | } |
113 | |
114 | inline Pointer& operator *= (Int i) |
115 | { |
116 | dynamicOffsets = offsets() * i; |
117 | staticOffsets = {}; |
118 | hasDynamicOffsets = true; |
119 | return *this; |
120 | } |
121 | |
122 | inline Pointer operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; } |
123 | inline Pointer operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; } |
124 | |
125 | inline Pointer& operator += (int i) |
126 | { |
127 | for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; } |
128 | return *this; |
129 | } |
130 | |
131 | inline Pointer& operator *= (int i) |
132 | { |
133 | for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; } |
134 | if (hasDynamicOffsets) |
135 | { |
136 | dynamicOffsets *= SIMD::Int(i); |
137 | } |
138 | return *this; |
139 | } |
140 | |
141 | inline Pointer operator + (int i) { Pointer p = *this; p += i; return p; } |
142 | inline Pointer operator * (int i) { Pointer p = *this; p *= i; return p; } |
143 | |
144 | inline SIMD::Int offsets() const |
145 | { |
146 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
147 | return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]); |
148 | } |
149 | |
150 | inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const |
151 | { |
152 | ASSERT(accessSize > 0); |
153 | |
154 | if (isStaticallyInBounds(accessSize, robustness)) |
155 | { |
156 | return SIMD::Int(0xffffffff); |
157 | } |
158 | |
159 | if (!hasDynamicOffsets && !hasDynamicLimit) |
160 | { |
161 | // Common fast paths. |
162 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
163 | return SIMD::Int( |
164 | (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
165 | (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
166 | (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
167 | (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0); |
168 | } |
169 | |
170 | return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit())); |
171 | } |
172 | |
173 | inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const |
174 | { |
175 | if (hasDynamicOffsets) |
176 | { |
177 | return false; |
178 | } |
179 | |
180 | if (hasDynamicLimit) |
181 | { |
182 | if (hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize)) |
183 | { |
184 | switch(robustness) |
185 | { |
186 | case OutOfBoundsBehavior::UndefinedBehavior: |
187 | // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes, |
188 | // but since it can't know in advance which branches are taken this must be true even for inactives lanes. |
189 | return true; |
190 | case OutOfBoundsBehavior::Nullify: |
191 | case OutOfBoundsBehavior::RobustBufferAccess: |
192 | case OutOfBoundsBehavior::UndefinedValue: |
193 | return false; |
194 | } |
195 | } |
196 | } |
197 | |
198 | for (int i = 0; i < SIMD::Width; i++) |
199 | { |
200 | if (staticOffsets[i] + accessSize - 1 >= staticLimit) |
201 | { |
202 | return false; |
203 | } |
204 | } |
205 | |
206 | return true; |
207 | } |
208 | |
209 | inline Int limit() const |
210 | { |
211 | return dynamicLimit + staticLimit; |
212 | } |
213 | |
214 | // Returns true if all offsets are sequential |
215 | // (N+0*step, N+1*step, N+2*step, N+3*step) |
216 | inline rr::Bool hasSequentialOffsets(unsigned int step) const |
217 | { |
218 | if (hasDynamicOffsets) |
219 | { |
220 | auto o = offsets(); |
221 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
222 | return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0; |
223 | } |
224 | return hasStaticSequentialOffsets(step); |
225 | } |
226 | |
227 | // Returns true if all offsets are are compile-time static and |
228 | // sequential (N+0*step, N+1*step, N+2*step, N+3*step) |
229 | inline bool hasStaticSequentialOffsets(unsigned int step) const |
230 | { |
231 | if (hasDynamicOffsets) |
232 | { |
233 | return false; |
234 | } |
235 | for (int i = 1; i < SIMD::Width; i++) |
236 | { |
237 | if (staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; } |
238 | } |
239 | return true; |
240 | } |
241 | |
242 | // Returns true if all offsets are equal (N, N, N, N) |
243 | inline rr::Bool hasEqualOffsets() const |
244 | { |
245 | if (hasDynamicOffsets) |
246 | { |
247 | auto o = offsets(); |
248 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
249 | return rr::SignMask(~CmpEQ(o, o.yzwx)) == 0; |
250 | } |
251 | return hasStaticEqualOffsets(); |
252 | } |
253 | |
254 | // Returns true if all offsets are compile-time static and are equal |
255 | // (N, N, N, N) |
256 | inline bool hasStaticEqualOffsets() const |
257 | { |
258 | if (hasDynamicOffsets) |
259 | { |
260 | return false; |
261 | } |
262 | for (int i = 1; i < SIMD::Width; i++) |
263 | { |
264 | if (staticOffsets[i-1] != staticOffsets[i]) { return false; } |
265 | } |
266 | return true; |
267 | } |
268 | |
269 | // Base address for the pointer, common across all lanes. |
270 | rr::Pointer<rr::Byte> base; |
271 | |
272 | // Upper (non-inclusive) limit for offsets from base. |
273 | rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. |
274 | unsigned int staticLimit; |
275 | |
276 | // Per lane offsets from base. |
277 | SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. |
278 | std::array<int32_t, SIMD::Width> staticOffsets; |
279 | |
280 | bool hasDynamicLimit; // True if dynamicLimit is non-zero. |
281 | bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero. |
282 | }; |
283 | |
284 | template <typename T> struct Element {}; |
285 | template <> struct Element<Float> { using type = rr::Float; }; |
286 | template <> struct Element<Int> { using type = rr::Int; }; |
287 | template <> struct Element<UInt> { using type = rr::UInt; }; |
288 | |
289 | template<typename T> |
290 | void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
291 | |
292 | template<typename T> |
293 | void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed) |
294 | { |
295 | Store(ptr, T(val), robustness, mask, atomic, order); |
296 | } |
297 | |
298 | template<typename T> |
299 | T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); |
300 | } |
301 | |
302 | // Incrementally constructed complex bundle of rvalues |
303 | // Effectively a restricted vector, supporting only: |
304 | // - allocation to a (runtime-known) fixed size |
305 | // - in-place construction of elements |
306 | // - const operator[] |
307 | class Intermediate |
308 | { |
309 | public: |
310 | Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) { |
311 | memset(scalar, 0, sizeof(rr::Value*) * size); |
312 | } |
313 | |
314 | ~Intermediate() |
315 | { |
316 | delete[] scalar; |
317 | } |
318 | |
319 | void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); } |
320 | void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); } |
321 | void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); } |
322 | |
323 | void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); } |
324 | void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); } |
325 | void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); } |
326 | |
327 | // Value retrieval functions. |
328 | RValue<SIMD::Float> Float(uint32_t i) const |
329 | { |
330 | ASSERT(i < size); |
331 | ASSERT(scalar[i] != nullptr); |
332 | return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) |
333 | } |
334 | |
335 | RValue<SIMD::Int> Int(uint32_t i) const |
336 | { |
337 | ASSERT(i < size); |
338 | ASSERT(scalar[i] != nullptr); |
339 | return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) |
340 | } |
341 | |
342 | RValue<SIMD::UInt> UInt(uint32_t i) const |
343 | { |
344 | ASSERT(i < size); |
345 | ASSERT(scalar[i] != nullptr); |
346 | return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) |
347 | } |
348 | |
349 | // No copy/move construction or assignment |
350 | Intermediate(Intermediate const &) = delete; |
351 | Intermediate(Intermediate &&) = delete; |
352 | Intermediate & operator=(Intermediate const &) = delete; |
353 | Intermediate & operator=(Intermediate &&) = delete; |
354 | |
355 | private: |
356 | void emplace(uint32_t i, rr::Value *value) |
357 | { |
358 | ASSERT(i < size); |
359 | ASSERT(scalar[i] == nullptr); |
360 | scalar[i] = value; |
361 | } |
362 | |
363 | rr::Value **const scalar; |
364 | uint32_t size; |
365 | }; |
366 | |
367 | class SpirvShader |
368 | { |
369 | public: |
370 | using InsnStore = std::vector<uint32_t>; |
371 | InsnStore insns; |
372 | |
373 | using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants); |
374 | |
375 | enum class YieldResult |
376 | { |
377 | ControlBarrier, |
378 | }; |
379 | |
380 | /* Pseudo-iterator over SPIRV instructions, designed to support range-based-for. */ |
381 | class InsnIterator |
382 | { |
383 | InsnStore::const_iterator iter; |
384 | |
385 | public: |
386 | spv::Op opcode() const |
387 | { |
388 | return static_cast<spv::Op>(*iter & spv::OpCodeMask); |
389 | } |
390 | |
391 | uint32_t wordCount() const |
392 | { |
393 | return *iter >> spv::WordCountShift; |
394 | } |
395 | |
396 | uint32_t word(uint32_t n) const |
397 | { |
398 | ASSERT(n < wordCount()); |
399 | return iter[n]; |
400 | } |
401 | |
402 | uint32_t const * wordPointer(uint32_t n) const |
403 | { |
404 | ASSERT(n < wordCount()); |
405 | return &iter[n]; |
406 | } |
407 | |
408 | const char* string(uint32_t n) const |
409 | { |
410 | return reinterpret_cast<const char*>(wordPointer(n)); |
411 | } |
412 | |
413 | bool operator==(InsnIterator const &other) const |
414 | { |
415 | return iter == other.iter; |
416 | } |
417 | |
418 | bool operator!=(InsnIterator const &other) const |
419 | { |
420 | return iter != other.iter; |
421 | } |
422 | |
423 | InsnIterator operator*() const |
424 | { |
425 | return *this; |
426 | } |
427 | |
428 | InsnIterator &operator++() |
429 | { |
430 | iter += wordCount(); |
431 | return *this; |
432 | } |
433 | |
434 | InsnIterator const operator++(int) |
435 | { |
436 | InsnIterator ret{*this}; |
437 | iter += wordCount(); |
438 | return ret; |
439 | } |
440 | |
441 | InsnIterator(InsnIterator const &other) = default; |
442 | |
443 | InsnIterator() = default; |
444 | |
445 | explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter} |
446 | { |
447 | } |
448 | }; |
449 | |
450 | /* range-based-for interface */ |
451 | InsnIterator begin() const |
452 | { |
453 | return InsnIterator{insns.cbegin() + 5}; |
454 | } |
455 | |
456 | InsnIterator end() const |
457 | { |
458 | return InsnIterator{insns.cend()}; |
459 | } |
460 | |
461 | class Type |
462 | { |
463 | public: |
464 | using ID = SpirvID<Type>; |
465 | |
466 | spv::Op opcode() const { return definition.opcode(); } |
467 | |
468 | InsnIterator definition; |
469 | spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); |
470 | uint32_t sizeInComponents = 0; |
471 | bool isBuiltInBlock = false; |
472 | |
473 | // Inner element type for pointers, arrays, vectors and matrices. |
474 | ID element; |
475 | }; |
476 | |
477 | class Object |
478 | { |
479 | public: |
480 | using ID = SpirvID<Object>; |
481 | |
482 | spv::Op opcode() const { return definition.opcode(); } |
483 | |
484 | InsnIterator definition; |
485 | Type::ID type; |
486 | std::unique_ptr<uint32_t[]> constantValue = nullptr; |
487 | |
488 | enum class Kind |
489 | { |
490 | // Invalid default kind. |
491 | // If we get left with an object in this state, the module was |
492 | // broken. |
493 | Unknown, |
494 | |
495 | // TODO: Better document this kind. |
496 | // A shader interface variable pointer. |
497 | // Pointer with uniform address across all lanes. |
498 | // Pointer held by SpirvRoutine::pointers |
499 | InterfaceVariable, |
500 | |
501 | // Constant value held by Object::constantValue. |
502 | Constant, |
503 | |
504 | // Value held by SpirvRoutine::intermediates. |
505 | Intermediate, |
506 | |
507 | // Pointer held by SpirvRoutine::pointers |
508 | Pointer, |
509 | |
510 | // A pointer to a vk::DescriptorSet*. |
511 | // Pointer held by SpirvRoutine::pointers. |
512 | DescriptorSet, |
513 | }; |
514 | |
515 | Kind kind = Kind::Unknown; |
516 | }; |
517 | |
518 | // Block is an interval of SPIR-V instructions, starting with the |
519 | // opening OpLabel, and ending with a termination instruction. |
520 | class Block |
521 | { |
522 | public: |
523 | using ID = SpirvID<Block>; |
524 | using Set = std::unordered_set<ID>; |
525 | |
526 | // Edge represents the graph edge between two blocks. |
527 | struct Edge |
528 | { |
529 | ID from; |
530 | ID to; |
531 | |
532 | bool operator == (const Edge& other) const { return from == other.from && to == other.to; } |
533 | |
534 | struct Hash |
535 | { |
536 | std::size_t operator()(const Edge& edge) const noexcept |
537 | { |
538 | return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); |
539 | } |
540 | }; |
541 | }; |
542 | |
543 | Block() = default; |
544 | Block(const Block& other) = default; |
545 | explicit Block(InsnIterator begin, InsnIterator end); |
546 | |
547 | /* range-based-for interface */ |
548 | inline InsnIterator begin() const { return begin_; } |
549 | inline InsnIterator end() const { return end_; } |
550 | |
551 | enum Kind |
552 | { |
553 | Simple, // OpBranch or other simple terminator. |
554 | StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional |
555 | UnstructuredBranchConditional, // OpBranchConditional |
556 | StructuredSwitch, // OpSelectionMerge + OpSwitch |
557 | UnstructuredSwitch, // OpSwitch |
558 | Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] |
559 | }; |
560 | |
561 | Kind kind = Simple; |
562 | InsnIterator mergeInstruction; // Structured control flow merge instruction. |
563 | InsnIterator branchInstruction; // Branch instruction. |
564 | ID mergeBlock; // Structured flow merge block. |
565 | ID continueTarget; // Loop continue block. |
566 | Set ins; // Blocks that branch into this block. |
567 | Set outs; // Blocks that this block branches to. |
568 | bool isLoopMerge = false; |
569 | private: |
570 | InsnIterator begin_; |
571 | InsnIterator end_; |
572 | }; |
573 | |
574 | class Function |
575 | { |
576 | public: |
577 | using ID = SpirvID<Function>; |
578 | |
579 | // Walks all reachable the blocks starting from id adding them to |
580 | // reachable. |
581 | void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const; |
582 | |
583 | // AssignBlockFields() performs the following for all reachable blocks: |
584 | // * Assigns Block::ins with the identifiers of all blocks that contain |
585 | // this block in their Block::outs. |
586 | // * Sets Block::isLoopMerge to true if the block is the merge of a |
587 | // another loop block. |
588 | void AssignBlockFields(); |
589 | |
590 | // ForeachBlockDependency calls f with each dependency of the given |
591 | // block. A dependency is an incoming block that is not a loop-back |
592 | // edge. |
593 | void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; |
594 | |
595 | // ExistsPath returns true if there's a direct or indirect flow from |
596 | // the 'from' block to the 'to' block that does not pass through |
597 | // notPassingThrough. |
598 | bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; |
599 | |
600 | Block const &getBlock(Block::ID id) const |
601 | { |
602 | auto it = blocks.find(id); |
603 | ASSERT_MSG(it != blocks.end(), "Unknown block %d" , id.value()); |
604 | return it->second; |
605 | } |
606 | |
607 | Block::ID entry; // function entry point block. |
608 | HandleMap<Block> blocks; // blocks belonging to this function. |
609 | Type::ID type; // type of the function. |
610 | Type::ID result; // return type. |
611 | }; |
612 | |
613 | struct TypeOrObject {}; // Dummy struct to represent a Type or Object. |
614 | |
615 | // TypeOrObjectID is an identifier that represents a Type or an Object, |
616 | // and supports implicit casting to and from Type::ID or Object::ID. |
617 | class TypeOrObjectID : public SpirvID<TypeOrObject> |
618 | { |
619 | public: |
620 | using Hash = std::hash<SpirvID<TypeOrObject>>; |
621 | |
622 | inline TypeOrObjectID(uint32_t id) : SpirvID(id) {} |
623 | inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {} |
624 | inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {} |
625 | inline operator Type::ID() const { return Type::ID(value()); } |
626 | inline operator Object::ID() const { return Object::ID(value()); } |
627 | }; |
628 | |
629 | // OpImageSample variants |
630 | enum Variant |
631 | { |
632 | None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. |
633 | Dref, |
634 | Proj, |
635 | ProjDref, |
636 | VARIANT_LAST = ProjDref |
637 | }; |
638 | |
639 | // Compact representation of image instruction parameters that is passed to the |
640 | // trampoline function for retrieving/generating the corresponding sampling routine. |
641 | struct ImageInstruction |
642 | { |
643 | ImageInstruction(Variant variant, SamplerMethod samplerMethod) |
644 | : parameters(0) |
645 | { |
646 | this->variant = variant; |
647 | this->samplerMethod = samplerMethod; |
648 | } |
649 | |
650 | // Unmarshal from raw 32-bit data |
651 | ImageInstruction(uint32_t parameters) : parameters(parameters) {} |
652 | |
653 | SamplerFunction getSamplerFunction() const |
654 | { |
655 | return { static_cast<SamplerMethod>(samplerMethod), offset != 0, sample != 0 }; |
656 | } |
657 | |
658 | bool isDref() const |
659 | { |
660 | return (variant == Dref) || (variant == ProjDref); |
661 | } |
662 | |
663 | bool isProj() const |
664 | { |
665 | return (variant == Proj) || (variant == ProjDref); |
666 | } |
667 | |
668 | union |
669 | { |
670 | struct |
671 | { |
672 | uint32_t variant : BITS(VARIANT_LAST); |
673 | uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST); |
674 | uint32_t gatherComponent : 2; |
675 | |
676 | // Parameters are passed to the sampling routine in this order: |
677 | uint32_t coordinates : 3; // 1-4 (does not contain projection component) |
678 | // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref |
679 | // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch |
680 | uint32_t grad : 2; // 0-3 components (for each of dx / dy) |
681 | uint32_t offset : 2; // 0-3 components |
682 | uint32_t sample : 1; // 0-1 scalar integer |
683 | }; |
684 | |
685 | uint32_t parameters; |
686 | }; |
687 | }; |
688 | |
689 | static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit" ); |
690 | |
691 | // This method is for retrieving an ID that uniquely identifies the |
692 | // shader entry point represented by this object. |
693 | uint64_t getSerialID() const |
694 | { |
695 | return ((uint64_t)entryPoint.value() << 32) | codeSerialID; |
696 | } |
697 | |
698 | SpirvShader(uint32_t codeSerialID, |
699 | VkShaderStageFlagBits stage, |
700 | const char *entryPointName, |
701 | InsnStore const &insns, |
702 | const vk::RenderPass *renderPass, |
703 | uint32_t subpassIndex, |
704 | bool robustBufferAccess); |
705 | |
706 | struct Modes |
707 | { |
708 | bool EarlyFragmentTests : 1; |
709 | bool DepthReplacing : 1; |
710 | bool DepthGreater : 1; |
711 | bool DepthLess : 1; |
712 | bool DepthUnchanged : 1; |
713 | bool ContainsKill : 1; |
714 | bool ContainsControlBarriers : 1; |
715 | bool NeedsCentroid : 1; |
716 | |
717 | // Compute workgroup dimensions |
718 | int WorkgroupSizeX = 1, WorkgroupSizeY = 1, WorkgroupSizeZ = 1; |
719 | }; |
720 | |
721 | Modes const &getModes() const |
722 | { |
723 | return modes; |
724 | } |
725 | |
726 | struct Capabilities |
727 | { |
728 | bool Matrix : 1; |
729 | bool Shader : 1; |
730 | bool InputAttachment : 1; |
731 | bool Sampled1D : 1; |
732 | bool Image1D : 1; |
733 | bool SampledBuffer : 1; |
734 | bool ImageBuffer : 1; |
735 | bool ImageQuery : 1; |
736 | bool DerivativeControl : 1; |
737 | bool GroupNonUniform : 1; |
738 | bool MultiView : 1; |
739 | bool DeviceGroup : 1; |
740 | bool GroupNonUniformVote : 1; |
741 | bool GroupNonUniformBallot : 1; |
742 | bool GroupNonUniformShuffle : 1; |
743 | bool GroupNonUniformShuffleRelative : 1; |
744 | bool StorageImageExtendedFormats : 1; |
745 | }; |
746 | |
747 | Capabilities const &getUsedCapabilities() const |
748 | { |
749 | return capabilities; |
750 | } |
751 | |
752 | enum AttribType : unsigned char |
753 | { |
754 | ATTRIBTYPE_FLOAT, |
755 | ATTRIBTYPE_INT, |
756 | ATTRIBTYPE_UINT, |
757 | ATTRIBTYPE_UNUSED, |
758 | |
759 | ATTRIBTYPE_LAST = ATTRIBTYPE_UINT |
760 | }; |
761 | |
762 | bool hasBuiltinInput(spv::BuiltIn b) const |
763 | { |
764 | return inputBuiltins.find(b) != inputBuiltins.end(); |
765 | } |
766 | |
767 | bool hasBuiltinOutput(spv::BuiltIn b) const |
768 | { |
769 | return outputBuiltins.find(b) != outputBuiltins.end(); |
770 | } |
771 | |
772 | struct Decorations |
773 | { |
774 | int32_t Location = -1; |
775 | int32_t Component = 0; |
776 | spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); |
777 | int32_t Offset = -1; |
778 | int32_t ArrayStride = -1; |
779 | int32_t MatrixStride = 1; |
780 | |
781 | bool HasLocation : 1; |
782 | bool HasComponent : 1; |
783 | bool HasBuiltIn : 1; |
784 | bool HasOffset : 1; |
785 | bool HasArrayStride : 1; |
786 | bool HasMatrixStride : 1; |
787 | bool HasRowMajor : 1; // whether RowMajor bit is valid. |
788 | |
789 | bool Flat : 1; |
790 | bool Centroid : 1; |
791 | bool NoPerspective : 1; |
792 | bool Block : 1; |
793 | bool BufferBlock : 1; |
794 | bool RelaxedPrecision : 1; |
795 | bool RowMajor : 1; // RowMajor if true; ColMajor if false |
796 | bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. |
797 | |
798 | Decorations() |
799 | : Location{-1}, Component{0}, |
800 | BuiltIn{static_cast<spv::BuiltIn>(-1)}, |
801 | Offset{-1}, ArrayStride{-1}, MatrixStride{-1}, |
802 | HasLocation{false}, HasComponent{false}, |
803 | HasBuiltIn{false}, HasOffset{false}, |
804 | HasArrayStride{false}, HasMatrixStride{false}, |
805 | HasRowMajor{false}, |
806 | Flat{false}, Centroid{false}, NoPerspective{false}, |
807 | Block{false}, BufferBlock{false}, |
808 | RelaxedPrecision{false}, RowMajor{false}, |
809 | InsideMatrix{false} |
810 | { |
811 | } |
812 | |
813 | Decorations(Decorations const &) = default; |
814 | |
815 | void Apply(Decorations const &src); |
816 | |
817 | void Apply(spv::Decoration decoration, uint32_t arg); |
818 | }; |
819 | |
820 | std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; |
821 | std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; |
822 | |
823 | struct DescriptorDecorations |
824 | { |
825 | int32_t DescriptorSet = -1; |
826 | int32_t Binding = -1; |
827 | int32_t InputAttachmentIndex = -1; |
828 | |
829 | void Apply(DescriptorDecorations const &src); |
830 | }; |
831 | |
832 | std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; |
833 | std::vector<VkFormat> inputAttachmentFormats; |
834 | |
835 | struct InterfaceComponent |
836 | { |
837 | AttribType Type; |
838 | |
839 | union |
840 | { |
841 | struct |
842 | { |
843 | bool Flat : 1; |
844 | bool Centroid : 1; |
845 | bool NoPerspective : 1; |
846 | }; |
847 | |
848 | uint8_t DecorationBits; |
849 | }; |
850 | |
851 | InterfaceComponent() |
852 | : Type{ATTRIBTYPE_UNUSED}, DecorationBits{0} |
853 | { |
854 | } |
855 | }; |
856 | |
857 | struct BuiltinMapping |
858 | { |
859 | Object::ID Id; |
860 | uint32_t FirstComponent; |
861 | uint32_t SizeInComponents; |
862 | }; |
863 | |
864 | struct WorkgroupMemory |
865 | { |
866 | // allocates a new variable of size bytes with the given identifier. |
867 | inline void allocate(Object::ID id, uint32_t size) |
868 | { |
869 | uint32_t offset = totalSize; |
870 | auto it = offsets.emplace(id, offset); |
871 | ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d" , int(id.value())); |
872 | totalSize += size; |
873 | } |
874 | // returns the byte offset of the variable with the given identifier. |
875 | inline uint32_t offsetOf(Object::ID id) const |
876 | { |
877 | auto it = offsets.find(id); |
878 | ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d" , int(id.value())); |
879 | return it->second; |
880 | } |
881 | // returns the total allocated size in bytes. |
882 | inline uint32_t size() const { return totalSize; } |
883 | private: |
884 | uint32_t totalSize = 0; // in bytes |
885 | std::unordered_map<Object::ID, uint32_t> offsets; // in bytes |
886 | }; |
887 | |
888 | std::vector<InterfaceComponent> inputs; |
889 | std::vector<InterfaceComponent> outputs; |
890 | |
891 | void emitProlog(SpirvRoutine *routine) const; |
892 | void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const; |
893 | void emitEpilog(SpirvRoutine *routine) const; |
894 | |
895 | using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; |
896 | std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; |
897 | std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; |
898 | WorkgroupMemory workgroupMemory; |
899 | |
900 | private: |
901 | const uint32_t codeSerialID; |
902 | Modes modes = {}; |
903 | Capabilities capabilities = {}; |
904 | HandleMap<Type> types; |
905 | HandleMap<Object> defs; |
906 | HandleMap<Function> functions; |
907 | Function::ID entryPoint; |
908 | |
909 | const bool robustBufferAccess = true; |
910 | spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. |
911 | |
912 | // DeclareType creates a Type for the given OpTypeX instruction, storing |
913 | // it into the types map. It is called from the analysis pass (constructor). |
914 | void DeclareType(InsnIterator insn); |
915 | |
916 | void ProcessExecutionMode(InsnIterator it); |
917 | |
918 | uint32_t ComputeTypeSize(InsnIterator insn); |
919 | void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; |
920 | void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; |
921 | void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const; |
922 | |
923 | // Creates an Object for the instruction's result in 'defs'. |
924 | void DefineResult(const InsnIterator &insn); |
925 | |
926 | // Returns true if data in the given storage class is word-interleaved |
927 | // by each SIMD vector lane, otherwise data is stored linerally. |
928 | // |
929 | // Each lane addresses a single word, picked by a base pointer and an |
930 | // integer offset. |
931 | // |
932 | // A word is currently 32 bits (single float, int32_t, uint32_t). |
933 | // A lane is a single element of a SIMD vector register. |
934 | // |
935 | // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): |
936 | // --------------------------------------------------------------------- |
937 | // |
938 | // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) |
939 | // |
940 | // Assuming SIMD::Width == 4: |
941 | // |
942 | // Lane[0] | Lane[1] | Lane[2] | Lane[3] |
943 | // ===========+===========+===========+========== |
944 | // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] |
945 | // ---------------+-----------+-----------+-----------+---------- |
946 | // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] |
947 | // ---------------+-----------+-----------+-----------+---------- |
948 | // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] |
949 | // ---------------+-----------+-----------+-----------+---------- |
950 | // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] |
951 | // |
952 | // |
953 | // Linear storage - (IsStorageInterleavedByLane() == false): |
954 | // --------------------------------------------------------- |
955 | // |
956 | // Address = PtrBase + sizeof(Word) * LaneOffset |
957 | // |
958 | // Lane[0] | Lane[1] | Lane[2] | Lane[3] |
959 | // ===========+===========+===========+========== |
960 | // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] |
961 | // ---------------+-----------+-----------+-----------+---------- |
962 | // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] |
963 | // ---------------+-----------+-----------+-----------+---------- |
964 | // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] |
965 | // ---------------+-----------+-----------+-----------+---------- |
966 | // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] |
967 | // |
968 | static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); |
969 | static bool IsExplicitLayout(spv::StorageClass storageClass); |
970 | |
971 | // Output storage buffers and images should not be affected by helper invocations |
972 | static bool StoresInHelperInvocation(spv::StorageClass storageClass); |
973 | |
974 | template<typename F> |
975 | int VisitInterfaceInner(Type::ID id, Decorations d, F f) const; |
976 | |
977 | template<typename F> |
978 | void VisitInterface(Object::ID id, F f) const; |
979 | |
980 | template<typename F> |
981 | void VisitMemoryObject(Object::ID id, F f) const; |
982 | |
983 | template<typename F> |
984 | void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, F f) const; |
985 | |
986 | Object& CreateConstant(InsnIterator it); |
987 | |
988 | void ProcessInterfaceVariable(Object &object); |
989 | |
990 | // EmitState holds control-flow state for the emit() pass. |
991 | class EmitState |
992 | { |
993 | public: |
994 | EmitState(SpirvRoutine *routine, |
995 | Function::ID function, |
996 | RValue<SIMD::Int> activeLaneMask, |
997 | RValue<SIMD::Int> storesAndAtomicsMask, |
998 | const vk::DescriptorSet::Bindings &descriptorSets, |
999 | bool robustBufferAccess, |
1000 | spv::ExecutionModel executionModel) |
1001 | : routine(routine), |
1002 | function(function), |
1003 | activeLaneMaskValue(activeLaneMask.value), |
1004 | storesAndAtomicsMaskValue(storesAndAtomicsMask.value), |
1005 | descriptorSets(descriptorSets), |
1006 | robustBufferAccess(robustBufferAccess), |
1007 | executionModel(executionModel) |
1008 | { |
1009 | ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting. |
1010 | } |
1011 | |
1012 | RValue<SIMD::Int> activeLaneMask() const |
1013 | { |
1014 | ASSERT(activeLaneMaskValue != nullptr); |
1015 | return RValue<SIMD::Int>(activeLaneMaskValue); |
1016 | } |
1017 | |
1018 | RValue<SIMD::Int> storesAndAtomicsMask() const |
1019 | { |
1020 | ASSERT(storesAndAtomicsMaskValue != nullptr); |
1021 | return RValue<SIMD::Int>(storesAndAtomicsMaskValue); |
1022 | } |
1023 | |
1024 | void setActiveLaneMask(RValue<SIMD::Int> mask) |
1025 | { |
1026 | activeLaneMaskValue = mask.value; |
1027 | } |
1028 | |
1029 | // Add a new active lane mask edge from the current block to out. |
1030 | // The edge mask value will be (mask AND activeLaneMaskValue). |
1031 | // If multiple active lane masks are added for the same edge, then |
1032 | // they will be ORed together. |
1033 | void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); |
1034 | |
1035 | // Add a new active lane mask for the edge from -> to. |
1036 | // If multiple active lane masks are added for the same edge, then |
1037 | // they will be ORed together. |
1038 | void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); |
1039 | |
1040 | SpirvRoutine *routine = nullptr; // The current routine being built. |
1041 | Function::ID function; // The current function being built. |
1042 | Block::ID block; // The current block being built. |
1043 | rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. |
1044 | rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. |
1045 | Block::Set visited; // Blocks already built. |
1046 | std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; |
1047 | std::deque<Block::ID> *pending; |
1048 | |
1049 | const vk::DescriptorSet::Bindings &descriptorSets; |
1050 | |
1051 | OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; |
1052 | |
1053 | Intermediate& createIntermediate(Object::ID id, uint32_t size) |
1054 | { |
1055 | auto it = intermediates.emplace(std::piecewise_construct, |
1056 | std::forward_as_tuple(id), |
1057 | std::forward_as_tuple(size)); |
1058 | ASSERT_MSG(it.second, "Intermediate %d created twice" , id.value()); |
1059 | return it.first->second; |
1060 | } |
1061 | |
1062 | Intermediate const& getIntermediate(Object::ID id) const |
1063 | { |
1064 | auto it = intermediates.find(id); |
1065 | ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d" , id.value()); |
1066 | return it->second; |
1067 | } |
1068 | |
1069 | void createPointer(Object::ID id, SIMD::Pointer ptr) |
1070 | { |
1071 | bool added = pointers.emplace(id, ptr).second; |
1072 | ASSERT_MSG(added, "Pointer %d created twice" , id.value()); |
1073 | } |
1074 | |
1075 | SIMD::Pointer const& getPointer(Object::ID id) const |
1076 | { |
1077 | auto it = pointers.find(id); |
1078 | ASSERT_MSG(it != pointers.end(), "Unknown pointer %d" , id.value()); |
1079 | return it->second; |
1080 | } |
1081 | |
1082 | private: |
1083 | std::unordered_map<Object::ID, Intermediate> intermediates; |
1084 | std::unordered_map<Object::ID, SIMD::Pointer> pointers; |
1085 | |
1086 | const bool robustBufferAccess = true; // Emit robustBufferAccess safe code. |
1087 | const spv::ExecutionModel executionModel = spv::ExecutionModelMax; |
1088 | }; |
1089 | |
1090 | // EmitResult is an enumerator of result values from the Emit functions. |
1091 | enum class EmitResult |
1092 | { |
1093 | Continue, // No termination instructions. |
1094 | Terminator, // Reached a termination instruction. |
1095 | }; |
1096 | |
1097 | // Generic wrapper over either per-lane intermediate value, or a constant. |
1098 | // Constants are transparently widened to per-lane values in operator[]. |
1099 | // This is appropriate in most cases -- if we're not going to do something |
1100 | // significantly different based on whether the value is uniform across lanes. |
1101 | class GenericValue |
1102 | { |
1103 | SpirvShader::Object const &obj; |
1104 | Intermediate const *intermediate; |
1105 | |
1106 | public: |
1107 | GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId); |
1108 | |
1109 | RValue<SIMD::Float> Float(uint32_t i) const |
1110 | { |
1111 | if (intermediate) |
1112 | { |
1113 | return intermediate->Float(i); |
1114 | } |
1115 | |
1116 | // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact |
1117 | // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". |
1118 | // Thus we must first construct an integer constant, and bitcast to float. |
1119 | auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get()); |
1120 | return As<SIMD::Float>(SIMD::UInt(constantValue[i])); |
1121 | } |
1122 | |
1123 | RValue<SIMD::Int> Int(uint32_t i) const |
1124 | { |
1125 | if (intermediate) |
1126 | { |
1127 | return intermediate->Int(i); |
1128 | } |
1129 | auto constantValue = reinterpret_cast<int *>(obj.constantValue.get()); |
1130 | return SIMD::Int(constantValue[i]); |
1131 | } |
1132 | |
1133 | RValue<SIMD::UInt> UInt(uint32_t i) const |
1134 | { |
1135 | if (intermediate) |
1136 | { |
1137 | return intermediate->UInt(i); |
1138 | } |
1139 | auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get()); |
1140 | return SIMD::UInt(constantValue[i]); |
1141 | } |
1142 | |
1143 | SpirvShader::Type::ID const type; |
1144 | }; |
1145 | |
1146 | Type const &getType(Type::ID id) const |
1147 | { |
1148 | auto it = types.find(id); |
1149 | ASSERT_MSG(it != types.end(), "Unknown type %d" , id.value()); |
1150 | return it->second; |
1151 | } |
1152 | |
1153 | Object const &getObject(Object::ID id) const |
1154 | { |
1155 | auto it = defs.find(id); |
1156 | ASSERT_MSG(it != defs.end(), "Unknown object %d" , id.value()); |
1157 | return it->second; |
1158 | } |
1159 | |
1160 | Function const &getFunction(Function::ID id) const |
1161 | { |
1162 | auto it = functions.find(id); |
1163 | ASSERT_MSG(it != functions.end(), "Unknown function %d" , id.value()); |
1164 | return it->second; |
1165 | } |
1166 | |
1167 | // Returns a SIMD::Pointer to the underlying data for the given pointer |
1168 | // object. |
1169 | // Handles objects of the following kinds: |
1170 | // • DescriptorSet |
1171 | // • DivergentPointer |
1172 | // • InterfaceVariable |
1173 | // • NonDivergentPointer |
1174 | // Calling GetPointerToData with objects of any other kind will assert. |
1175 | SIMD::Pointer GetPointerToData(Object::ID id, int arrayIndex, EmitState const *state) const; |
1176 | |
1177 | SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; |
1178 | SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; |
1179 | |
1180 | // Returns the *component* offset in the literal for the given access chain. |
1181 | uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const; |
1182 | |
1183 | // Lookup the active lane mask for the edge from -> to. |
1184 | // If from is unreachable, then a mask of all zeros is returned. |
1185 | // Asserts if from is reachable and the edge does not exist. |
1186 | RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const; |
1187 | |
1188 | // Emit all the unvisited blocks (except for ignore) in DFS order, |
1189 | // starting with id. |
1190 | void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const; |
1191 | void EmitNonLoop(EmitState *state) const; |
1192 | void EmitLoop(EmitState *state) const; |
1193 | |
1194 | void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const; |
1195 | EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const; |
1196 | |
1197 | // Emit pass instructions: |
1198 | EmitResult EmitVariable(InsnIterator insn, EmitState *state) const; |
1199 | EmitResult EmitLoad(InsnIterator insn, EmitState *state) const; |
1200 | EmitResult EmitStore(InsnIterator insn, EmitState *state) const; |
1201 | EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const; |
1202 | EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const; |
1203 | EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const; |
1204 | EmitResult (InsnIterator insn, EmitState *state) const; |
1205 | EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const; |
1206 | EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const; |
1207 | EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const; |
1208 | EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const; |
1209 | EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const; |
1210 | EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const; |
1211 | EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const; |
1212 | EmitResult (InsnIterator insn, EmitState *state) const; |
1213 | EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const; |
1214 | EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const; |
1215 | EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const; |
1216 | EmitResult EmitDot(InsnIterator insn, EmitState *state) const; |
1217 | EmitResult EmitSelect(InsnIterator insn, EmitState *state) const; |
1218 | EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const; |
1219 | EmitResult EmitAny(InsnIterator insn, EmitState *state) const; |
1220 | EmitResult EmitAll(InsnIterator insn, EmitState *state) const; |
1221 | EmitResult EmitBranch(InsnIterator insn, EmitState *state) const; |
1222 | EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const; |
1223 | EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const; |
1224 | EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const; |
1225 | EmitResult EmitReturn(InsnIterator insn, EmitState *state) const; |
1226 | EmitResult EmitKill(InsnIterator insn, EmitState *state) const; |
1227 | EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const; |
1228 | EmitResult EmitPhi(InsnIterator insn, EmitState *state) const; |
1229 | EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; |
1230 | EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; |
1231 | EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const; |
1232 | EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const; |
1233 | EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const; |
1234 | EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const; |
1235 | EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const; |
1236 | EmitResult EmitImageQueryLod(InsnIterator insn, EmitState *state) const; |
1237 | EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const; |
1238 | EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const; |
1239 | EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const; |
1240 | EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const; |
1241 | EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const; |
1242 | EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; |
1243 | EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; |
1244 | EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const; |
1245 | EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const; |
1246 | EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const; |
1247 | EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const; |
1248 | EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const; |
1249 | EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const; |
1250 | EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; |
1251 | |
1252 | void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; |
1253 | SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const; |
1254 | uint32_t GetConstScalarInt(Object::ID id) const; |
1255 | void EvalSpecConstantOp(InsnIterator insn); |
1256 | void EvalSpecConstantUnaryOp(InsnIterator insn); |
1257 | void EvalSpecConstantBinaryOp(InsnIterator insn); |
1258 | |
1259 | // LoadPhi loads the phi values from the alloca storage and places the |
1260 | // load values into the intermediate with the phi's result id. |
1261 | void LoadPhi(InsnIterator insn, EmitState *state) const; |
1262 | |
1263 | // StorePhi updates the phi's alloca storage value using the incoming |
1264 | // values from blocks that are both in the OpPhi instruction and in |
1265 | // filter. |
1266 | void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const; |
1267 | |
1268 | // Emits a rr::Fence for the given MemorySemanticsMask. |
1269 | void Fence(spv::MemorySemanticsMask semantics) const; |
1270 | |
1271 | // Helper for calling rr::Yield with res cast to an rr::Int. |
1272 | void Yield(YieldResult res) const; |
1273 | |
1274 | // OpcodeName() returns the name of the opcode op. |
1275 | // If NDEBUG is defined, then OpcodeName() will only return the numerical code. |
1276 | static std::string OpcodeName(spv::Op op); |
1277 | static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); |
1278 | |
1279 | // Helper as we often need to take dot products as part of doing other things. |
1280 | SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const; |
1281 | |
1282 | SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const; |
1283 | |
1284 | // Splits x into a floating-point significand in the range [0.5, 1.0) |
1285 | // and an integral exponent of two, such that: |
1286 | // x = significand * 2^exponent |
1287 | // Returns the pair <significand, exponent> |
1288 | std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const; |
1289 | |
1290 | static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler); |
1291 | static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState); |
1292 | |
1293 | // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. |
1294 | static sw::FilterType convertFilterMode(const vk::Sampler *sampler); |
1295 | static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler); |
1296 | static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType); |
1297 | |
1298 | // Returns 0 when invalid. |
1299 | static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); |
1300 | }; |
1301 | |
1302 | class SpirvRoutine |
1303 | { |
1304 | public: |
1305 | SpirvRoutine(vk::PipelineLayout const *pipelineLayout); |
1306 | |
1307 | using Variable = Array<SIMD::Float>; |
1308 | |
1309 | struct SamplerCache |
1310 | { |
1311 | Pointer<Byte> imageDescriptor = nullptr; |
1312 | Pointer<Byte> sampler; |
1313 | Pointer<Byte> function; |
1314 | }; |
1315 | |
1316 | vk::PipelineLayout const * const pipelineLayout; |
1317 | |
1318 | std::unordered_map<SpirvShader::Object::ID, Variable> variables; |
1319 | std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache; |
1320 | Variable inputs = Variable{MAX_INTERFACE_COMPONENTS}; |
1321 | Variable outputs = Variable{MAX_INTERFACE_COMPONENTS}; |
1322 | |
1323 | Pointer<Byte> workgroupMemory; |
1324 | Pointer<Pointer<Byte>> descriptorSets; |
1325 | Pointer<Int> descriptorDynamicOffsets; |
1326 | Pointer<Byte> pushConstants; |
1327 | Pointer<Byte> constants; |
1328 | Int killMask = Int{0}; |
1329 | SIMD::Int windowSpacePosition[2]; |
1330 | Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex |
1331 | |
1332 | void createVariable(SpirvShader::Object::ID id, uint32_t size) |
1333 | { |
1334 | bool added = variables.emplace(id, Variable(size)).second; |
1335 | ASSERT_MSG(added, "Variable %d created twice" , id.value()); |
1336 | } |
1337 | |
1338 | Variable& getVariable(SpirvShader::Object::ID id) |
1339 | { |
1340 | auto it = variables.find(id); |
1341 | ASSERT_MSG(it != variables.end(), "Unknown variables %d" , id.value()); |
1342 | return it->second; |
1343 | } |
1344 | |
1345 | // setImmutableInputBuiltins() sets all the immutable input builtins, |
1346 | // common for all shader types. |
1347 | void setImmutableInputBuiltins(SpirvShader const *shader); |
1348 | |
1349 | // setInputBuiltin() calls f() with the builtin and value if the shader |
1350 | // uses the input builtin, otherwise the call is a no-op. |
1351 | // F is a function with the signature: |
1352 | // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
1353 | template <typename F> |
1354 | inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f) |
1355 | { |
1356 | auto it = shader->inputBuiltins.find(id); |
1357 | if (it != shader->inputBuiltins.end()) |
1358 | { |
1359 | const auto& builtin = it->second; |
1360 | f(builtin, getVariable(builtin.Id)); |
1361 | } |
1362 | } |
1363 | |
1364 | private: |
1365 | // The phis are only accessible to SpirvShader as they are only used and |
1366 | // exist between calls to SpirvShader::emitProlog() and |
1367 | // SpirvShader::emitEpilog(). |
1368 | friend class SpirvShader; |
1369 | |
1370 | std::unordered_map<SpirvShader::Object::ID, Variable> phis; |
1371 | |
1372 | }; |
1373 | |
1374 | } |
1375 | |
1376 | #endif // sw_SpirvShader_hpp |
1377 | |