| 1 | // Copyright 2018 The SwiftShader Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef sw_SpirvShader_hpp |
| 16 | #define sw_SpirvShader_hpp |
| 17 | |
| 18 | #include "ShaderCore.hpp" |
| 19 | #include "SamplerCore.hpp" |
| 20 | #include "SpirvID.hpp" |
| 21 | #include "System/Types.hpp" |
| 22 | #include "Vulkan/VkDebug.hpp" |
| 23 | #include "Vulkan/VkConfig.h" |
| 24 | #include "Vulkan/VkDescriptorSet.hpp" |
| 25 | #include "Common/Types.hpp" |
| 26 | #include "Device/Config.hpp" |
| 27 | #include "Device/Sampler.hpp" |
| 28 | |
| 29 | #include <spirv/unified1/spirv.hpp> |
| 30 | |
| 31 | #include <array> |
| 32 | #include <atomic> |
| 33 | #include <cstdint> |
| 34 | #include <cstring> |
| 35 | #include <functional> |
| 36 | #include <memory> |
| 37 | #include <deque> |
| 38 | #include <string> |
| 39 | #include <type_traits> |
| 40 | #include <unordered_map> |
| 41 | #include <unordered_set> |
| 42 | #include <vector> |
| 43 | |
| 44 | #undef Yield // b/127920555 |
| 45 | |
| 46 | namespace vk |
| 47 | { |
| 48 | class PipelineLayout; |
| 49 | class ImageView; |
| 50 | class Sampler; |
| 51 | class RenderPass; |
| 52 | struct SampledImageDescriptor; |
| 53 | } // namespace vk |
| 54 | |
| 55 | namespace sw |
| 56 | { |
| 57 | // Forward declarations. |
| 58 | class SpirvRoutine; |
| 59 | |
| 60 | enum class OutOfBoundsBehavior |
| 61 | { |
| 62 | Nullify, // Loads become zero, stores are elided. |
| 63 | RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing). |
| 64 | UndefinedValue, // Only for load operations. Not secure. No program termination. |
| 65 | UndefinedBehavior, // Program may terminate. |
| 66 | }; |
| 67 | |
| 68 | // SIMD contains types that represent multiple scalars packed into a single |
| 69 | // vector data type. Types in the SIMD namespace provide a semantic hint |
| 70 | // that the data should be treated as a per-execution-lane scalar instead of |
| 71 | // a typical euclidean-style vector type. |
| 72 | namespace SIMD |
| 73 | { |
| 74 | // Width is the number of per-lane scalars packed into each SIMD vector. |
| 75 | static constexpr int Width = 4; |
| 76 | |
| 77 | using Float = rr::Float4; |
| 78 | using Int = rr::Int4; |
| 79 | using UInt = rr::UInt4; |
| 80 | |
| 81 | struct Pointer |
| 82 | { |
| 83 | Pointer(rr::Pointer<Byte> base, rr::Int limit) |
| 84 | : base(base), |
| 85 | dynamicLimit(limit), staticLimit(0), |
| 86 | dynamicOffsets(0), staticOffsets{}, |
| 87 | hasDynamicLimit(true), hasDynamicOffsets(false) {} |
| 88 | |
| 89 | Pointer(rr::Pointer<Byte> base, unsigned int limit) |
| 90 | : base(base), |
| 91 | dynamicLimit(0), staticLimit(limit), |
| 92 | dynamicOffsets(0), staticOffsets{}, |
| 93 | hasDynamicLimit(false), hasDynamicOffsets(false) {} |
| 94 | |
| 95 | Pointer(rr::Pointer<Byte> base, rr::Int limit, SIMD::Int offset) |
| 96 | : base(base), |
| 97 | dynamicLimit(limit), staticLimit(0), |
| 98 | dynamicOffsets(offset), staticOffsets{}, |
| 99 | hasDynamicLimit(true), hasDynamicOffsets(true) {} |
| 100 | |
| 101 | Pointer(rr::Pointer<Byte> base, unsigned int limit, SIMD::Int offset) |
| 102 | : base(base), |
| 103 | dynamicLimit(0), staticLimit(limit), |
| 104 | dynamicOffsets(offset), staticOffsets{}, |
| 105 | hasDynamicLimit(false), hasDynamicOffsets(true) {} |
| 106 | |
| 107 | inline Pointer& operator += (Int i) |
| 108 | { |
| 109 | dynamicOffsets += i; |
| 110 | hasDynamicOffsets = true; |
| 111 | return *this; |
| 112 | } |
| 113 | |
| 114 | inline Pointer& operator *= (Int i) |
| 115 | { |
| 116 | dynamicOffsets = offsets() * i; |
| 117 | staticOffsets = {}; |
| 118 | hasDynamicOffsets = true; |
| 119 | return *this; |
| 120 | } |
| 121 | |
| 122 | inline Pointer operator + (SIMD::Int i) { Pointer p = *this; p += i; return p; } |
| 123 | inline Pointer operator * (SIMD::Int i) { Pointer p = *this; p *= i; return p; } |
| 124 | |
| 125 | inline Pointer& operator += (int i) |
| 126 | { |
| 127 | for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; } |
| 128 | return *this; |
| 129 | } |
| 130 | |
| 131 | inline Pointer& operator *= (int i) |
| 132 | { |
| 133 | for (int el = 0; el < SIMD::Width; el++) { staticOffsets[el] *= i; } |
| 134 | if (hasDynamicOffsets) |
| 135 | { |
| 136 | dynamicOffsets *= SIMD::Int(i); |
| 137 | } |
| 138 | return *this; |
| 139 | } |
| 140 | |
| 141 | inline Pointer operator + (int i) { Pointer p = *this; p += i; return p; } |
| 142 | inline Pointer operator * (int i) { Pointer p = *this; p *= i; return p; } |
| 143 | |
| 144 | inline SIMD::Int offsets() const |
| 145 | { |
| 146 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
| 147 | return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]); |
| 148 | } |
| 149 | |
| 150 | inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const |
| 151 | { |
| 152 | ASSERT(accessSize > 0); |
| 153 | |
| 154 | if (isStaticallyInBounds(accessSize, robustness)) |
| 155 | { |
| 156 | return SIMD::Int(0xffffffff); |
| 157 | } |
| 158 | |
| 159 | if (!hasDynamicOffsets && !hasDynamicLimit) |
| 160 | { |
| 161 | // Common fast paths. |
| 162 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
| 163 | return SIMD::Int( |
| 164 | (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
| 165 | (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
| 166 | (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, |
| 167 | (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xffffffff : 0); |
| 168 | } |
| 169 | |
| 170 | return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit())); |
| 171 | } |
| 172 | |
| 173 | inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const |
| 174 | { |
| 175 | if (hasDynamicOffsets) |
| 176 | { |
| 177 | return false; |
| 178 | } |
| 179 | |
| 180 | if (hasDynamicLimit) |
| 181 | { |
| 182 | if (hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize)) |
| 183 | { |
| 184 | switch(robustness) |
| 185 | { |
| 186 | case OutOfBoundsBehavior::UndefinedBehavior: |
| 187 | // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes, |
| 188 | // but since it can't know in advance which branches are taken this must be true even for inactives lanes. |
| 189 | return true; |
| 190 | case OutOfBoundsBehavior::Nullify: |
| 191 | case OutOfBoundsBehavior::RobustBufferAccess: |
| 192 | case OutOfBoundsBehavior::UndefinedValue: |
| 193 | return false; |
| 194 | } |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | for (int i = 0; i < SIMD::Width; i++) |
| 199 | { |
| 200 | if (staticOffsets[i] + accessSize - 1 >= staticLimit) |
| 201 | { |
| 202 | return false; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | return true; |
| 207 | } |
| 208 | |
| 209 | inline Int limit() const |
| 210 | { |
| 211 | return dynamicLimit + staticLimit; |
| 212 | } |
| 213 | |
| 214 | // Returns true if all offsets are sequential |
| 215 | // (N+0*step, N+1*step, N+2*step, N+3*step) |
| 216 | inline rr::Bool hasSequentialOffsets(unsigned int step) const |
| 217 | { |
| 218 | if (hasDynamicOffsets) |
| 219 | { |
| 220 | auto o = offsets(); |
| 221 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
| 222 | return rr::SignMask(~CmpEQ(o.yzww, o + SIMD::Int(1*step, 2*step, 3*step, 0))) == 0; |
| 223 | } |
| 224 | return hasStaticSequentialOffsets(step); |
| 225 | } |
| 226 | |
| 227 | // Returns true if all offsets are are compile-time static and |
| 228 | // sequential (N+0*step, N+1*step, N+2*step, N+3*step) |
| 229 | inline bool hasStaticSequentialOffsets(unsigned int step) const |
| 230 | { |
| 231 | if (hasDynamicOffsets) |
| 232 | { |
| 233 | return false; |
| 234 | } |
| 235 | for (int i = 1; i < SIMD::Width; i++) |
| 236 | { |
| 237 | if (staticOffsets[i-1] + int32_t(step) != staticOffsets[i]) { return false; } |
| 238 | } |
| 239 | return true; |
| 240 | } |
| 241 | |
| 242 | // Returns true if all offsets are equal (N, N, N, N) |
| 243 | inline rr::Bool hasEqualOffsets() const |
| 244 | { |
| 245 | if (hasDynamicOffsets) |
| 246 | { |
| 247 | auto o = offsets(); |
| 248 | static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4" ); |
| 249 | return rr::SignMask(~CmpEQ(o, o.yzwx)) == 0; |
| 250 | } |
| 251 | return hasStaticEqualOffsets(); |
| 252 | } |
| 253 | |
| 254 | // Returns true if all offsets are compile-time static and are equal |
| 255 | // (N, N, N, N) |
| 256 | inline bool hasStaticEqualOffsets() const |
| 257 | { |
| 258 | if (hasDynamicOffsets) |
| 259 | { |
| 260 | return false; |
| 261 | } |
| 262 | for (int i = 1; i < SIMD::Width; i++) |
| 263 | { |
| 264 | if (staticOffsets[i-1] != staticOffsets[i]) { return false; } |
| 265 | } |
| 266 | return true; |
| 267 | } |
| 268 | |
| 269 | // Base address for the pointer, common across all lanes. |
| 270 | rr::Pointer<rr::Byte> base; |
| 271 | |
| 272 | // Upper (non-inclusive) limit for offsets from base. |
| 273 | rr::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. |
| 274 | unsigned int staticLimit; |
| 275 | |
| 276 | // Per lane offsets from base. |
| 277 | SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. |
| 278 | std::array<int32_t, SIMD::Width> staticOffsets; |
| 279 | |
| 280 | bool hasDynamicLimit; // True if dynamicLimit is non-zero. |
| 281 | bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero. |
| 282 | }; |
| 283 | |
| 284 | template <typename T> struct Element {}; |
| 285 | template <> struct Element<Float> { using type = rr::Float; }; |
| 286 | template <> struct Element<Int> { using type = rr::Int; }; |
| 287 | template <> struct Element<UInt> { using type = rr::UInt; }; |
| 288 | |
| 289 | template<typename T> |
| 290 | void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
| 291 | |
| 292 | template<typename T> |
| 293 | void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed) |
| 294 | { |
| 295 | Store(ptr, T(val), robustness, mask, atomic, order); |
| 296 | } |
| 297 | |
| 298 | template<typename T> |
| 299 | T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); |
| 300 | } |
| 301 | |
| 302 | // Incrementally constructed complex bundle of rvalues |
| 303 | // Effectively a restricted vector, supporting only: |
| 304 | // - allocation to a (runtime-known) fixed size |
| 305 | // - in-place construction of elements |
| 306 | // - const operator[] |
| 307 | class Intermediate |
| 308 | { |
| 309 | public: |
| 310 | Intermediate(uint32_t size) : scalar(new rr::Value*[size]), size(size) { |
| 311 | memset(scalar, 0, sizeof(rr::Value*) * size); |
| 312 | } |
| 313 | |
| 314 | ~Intermediate() |
| 315 | { |
| 316 | delete[] scalar; |
| 317 | } |
| 318 | |
| 319 | void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value); } |
| 320 | void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value); } |
| 321 | void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value); } |
| 322 | |
| 323 | void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value); } |
| 324 | void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value); } |
| 325 | void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value); } |
| 326 | |
| 327 | // Value retrieval functions. |
| 328 | RValue<SIMD::Float> Float(uint32_t i) const |
| 329 | { |
| 330 | ASSERT(i < size); |
| 331 | ASSERT(scalar[i] != nullptr); |
| 332 | return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) |
| 333 | } |
| 334 | |
| 335 | RValue<SIMD::Int> Int(uint32_t i) const |
| 336 | { |
| 337 | ASSERT(i < size); |
| 338 | ASSERT(scalar[i] != nullptr); |
| 339 | return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) |
| 340 | } |
| 341 | |
| 342 | RValue<SIMD::UInt> UInt(uint32_t i) const |
| 343 | { |
| 344 | ASSERT(i < size); |
| 345 | ASSERT(scalar[i] != nullptr); |
| 346 | return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) |
| 347 | } |
| 348 | |
| 349 | // No copy/move construction or assignment |
| 350 | Intermediate(Intermediate const &) = delete; |
| 351 | Intermediate(Intermediate &&) = delete; |
| 352 | Intermediate & operator=(Intermediate const &) = delete; |
| 353 | Intermediate & operator=(Intermediate &&) = delete; |
| 354 | |
| 355 | private: |
| 356 | void emplace(uint32_t i, rr::Value *value) |
| 357 | { |
| 358 | ASSERT(i < size); |
| 359 | ASSERT(scalar[i] == nullptr); |
| 360 | scalar[i] = value; |
| 361 | } |
| 362 | |
| 363 | rr::Value **const scalar; |
| 364 | uint32_t size; |
| 365 | }; |
| 366 | |
| 367 | class SpirvShader |
| 368 | { |
| 369 | public: |
| 370 | using InsnStore = std::vector<uint32_t>; |
| 371 | InsnStore insns; |
| 372 | |
| 373 | using ImageSampler = void(void* texture, void *sampler, void* uvsIn, void* texelOut, void* constants); |
| 374 | |
| 375 | enum class YieldResult |
| 376 | { |
| 377 | ControlBarrier, |
| 378 | }; |
| 379 | |
| 380 | /* Pseudo-iterator over SPIRV instructions, designed to support range-based-for. */ |
| 381 | class InsnIterator |
| 382 | { |
| 383 | InsnStore::const_iterator iter; |
| 384 | |
| 385 | public: |
| 386 | spv::Op opcode() const |
| 387 | { |
| 388 | return static_cast<spv::Op>(*iter & spv::OpCodeMask); |
| 389 | } |
| 390 | |
| 391 | uint32_t wordCount() const |
| 392 | { |
| 393 | return *iter >> spv::WordCountShift; |
| 394 | } |
| 395 | |
| 396 | uint32_t word(uint32_t n) const |
| 397 | { |
| 398 | ASSERT(n < wordCount()); |
| 399 | return iter[n]; |
| 400 | } |
| 401 | |
| 402 | uint32_t const * wordPointer(uint32_t n) const |
| 403 | { |
| 404 | ASSERT(n < wordCount()); |
| 405 | return &iter[n]; |
| 406 | } |
| 407 | |
| 408 | const char* string(uint32_t n) const |
| 409 | { |
| 410 | return reinterpret_cast<const char*>(wordPointer(n)); |
| 411 | } |
| 412 | |
| 413 | bool operator==(InsnIterator const &other) const |
| 414 | { |
| 415 | return iter == other.iter; |
| 416 | } |
| 417 | |
| 418 | bool operator!=(InsnIterator const &other) const |
| 419 | { |
| 420 | return iter != other.iter; |
| 421 | } |
| 422 | |
| 423 | InsnIterator operator*() const |
| 424 | { |
| 425 | return *this; |
| 426 | } |
| 427 | |
| 428 | InsnIterator &operator++() |
| 429 | { |
| 430 | iter += wordCount(); |
| 431 | return *this; |
| 432 | } |
| 433 | |
| 434 | InsnIterator const operator++(int) |
| 435 | { |
| 436 | InsnIterator ret{*this}; |
| 437 | iter += wordCount(); |
| 438 | return ret; |
| 439 | } |
| 440 | |
| 441 | InsnIterator(InsnIterator const &other) = default; |
| 442 | |
| 443 | InsnIterator() = default; |
| 444 | |
| 445 | explicit InsnIterator(InsnStore::const_iterator iter) : iter{iter} |
| 446 | { |
| 447 | } |
| 448 | }; |
| 449 | |
| 450 | /* range-based-for interface */ |
| 451 | InsnIterator begin() const |
| 452 | { |
| 453 | return InsnIterator{insns.cbegin() + 5}; |
| 454 | } |
| 455 | |
| 456 | InsnIterator end() const |
| 457 | { |
| 458 | return InsnIterator{insns.cend()}; |
| 459 | } |
| 460 | |
| 461 | class Type |
| 462 | { |
| 463 | public: |
| 464 | using ID = SpirvID<Type>; |
| 465 | |
| 466 | spv::Op opcode() const { return definition.opcode(); } |
| 467 | |
| 468 | InsnIterator definition; |
| 469 | spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); |
| 470 | uint32_t sizeInComponents = 0; |
| 471 | bool isBuiltInBlock = false; |
| 472 | |
| 473 | // Inner element type for pointers, arrays, vectors and matrices. |
| 474 | ID element; |
| 475 | }; |
| 476 | |
| 477 | class Object |
| 478 | { |
| 479 | public: |
| 480 | using ID = SpirvID<Object>; |
| 481 | |
| 482 | spv::Op opcode() const { return definition.opcode(); } |
| 483 | |
| 484 | InsnIterator definition; |
| 485 | Type::ID type; |
| 486 | std::unique_ptr<uint32_t[]> constantValue = nullptr; |
| 487 | |
| 488 | enum class Kind |
| 489 | { |
| 490 | // Invalid default kind. |
| 491 | // If we get left with an object in this state, the module was |
| 492 | // broken. |
| 493 | Unknown, |
| 494 | |
| 495 | // TODO: Better document this kind. |
| 496 | // A shader interface variable pointer. |
| 497 | // Pointer with uniform address across all lanes. |
| 498 | // Pointer held by SpirvRoutine::pointers |
| 499 | InterfaceVariable, |
| 500 | |
| 501 | // Constant value held by Object::constantValue. |
| 502 | Constant, |
| 503 | |
| 504 | // Value held by SpirvRoutine::intermediates. |
| 505 | Intermediate, |
| 506 | |
| 507 | // Pointer held by SpirvRoutine::pointers |
| 508 | Pointer, |
| 509 | |
| 510 | // A pointer to a vk::DescriptorSet*. |
| 511 | // Pointer held by SpirvRoutine::pointers. |
| 512 | DescriptorSet, |
| 513 | }; |
| 514 | |
| 515 | Kind kind = Kind::Unknown; |
| 516 | }; |
| 517 | |
| 518 | // Block is an interval of SPIR-V instructions, starting with the |
| 519 | // opening OpLabel, and ending with a termination instruction. |
| 520 | class Block |
| 521 | { |
| 522 | public: |
| 523 | using ID = SpirvID<Block>; |
| 524 | using Set = std::unordered_set<ID>; |
| 525 | |
| 526 | // Edge represents the graph edge between two blocks. |
| 527 | struct Edge |
| 528 | { |
| 529 | ID from; |
| 530 | ID to; |
| 531 | |
| 532 | bool operator == (const Edge& other) const { return from == other.from && to == other.to; } |
| 533 | |
| 534 | struct Hash |
| 535 | { |
| 536 | std::size_t operator()(const Edge& edge) const noexcept |
| 537 | { |
| 538 | return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); |
| 539 | } |
| 540 | }; |
| 541 | }; |
| 542 | |
| 543 | Block() = default; |
| 544 | Block(const Block& other) = default; |
| 545 | explicit Block(InsnIterator begin, InsnIterator end); |
| 546 | |
| 547 | /* range-based-for interface */ |
| 548 | inline InsnIterator begin() const { return begin_; } |
| 549 | inline InsnIterator end() const { return end_; } |
| 550 | |
| 551 | enum Kind |
| 552 | { |
| 553 | Simple, // OpBranch or other simple terminator. |
| 554 | StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional |
| 555 | UnstructuredBranchConditional, // OpBranchConditional |
| 556 | StructuredSwitch, // OpSelectionMerge + OpSwitch |
| 557 | UnstructuredSwitch, // OpSwitch |
| 558 | Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] |
| 559 | }; |
| 560 | |
| 561 | Kind kind = Simple; |
| 562 | InsnIterator mergeInstruction; // Structured control flow merge instruction. |
| 563 | InsnIterator branchInstruction; // Branch instruction. |
| 564 | ID mergeBlock; // Structured flow merge block. |
| 565 | ID continueTarget; // Loop continue block. |
| 566 | Set ins; // Blocks that branch into this block. |
| 567 | Set outs; // Blocks that this block branches to. |
| 568 | bool isLoopMerge = false; |
| 569 | private: |
| 570 | InsnIterator begin_; |
| 571 | InsnIterator end_; |
| 572 | }; |
| 573 | |
| 574 | class Function |
| 575 | { |
| 576 | public: |
| 577 | using ID = SpirvID<Function>; |
| 578 | |
| 579 | // Walks all reachable the blocks starting from id adding them to |
| 580 | // reachable. |
| 581 | void TraverseReachableBlocks(Block::ID id, Block::Set& reachable) const; |
| 582 | |
| 583 | // AssignBlockFields() performs the following for all reachable blocks: |
| 584 | // * Assigns Block::ins with the identifiers of all blocks that contain |
| 585 | // this block in their Block::outs. |
| 586 | // * Sets Block::isLoopMerge to true if the block is the merge of a |
| 587 | // another loop block. |
| 588 | void AssignBlockFields(); |
| 589 | |
| 590 | // ForeachBlockDependency calls f with each dependency of the given |
| 591 | // block. A dependency is an incoming block that is not a loop-back |
| 592 | // edge. |
| 593 | void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; |
| 594 | |
| 595 | // ExistsPath returns true if there's a direct or indirect flow from |
| 596 | // the 'from' block to the 'to' block that does not pass through |
| 597 | // notPassingThrough. |
| 598 | bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; |
| 599 | |
| 600 | Block const &getBlock(Block::ID id) const |
| 601 | { |
| 602 | auto it = blocks.find(id); |
| 603 | ASSERT_MSG(it != blocks.end(), "Unknown block %d" , id.value()); |
| 604 | return it->second; |
| 605 | } |
| 606 | |
| 607 | Block::ID entry; // function entry point block. |
| 608 | HandleMap<Block> blocks; // blocks belonging to this function. |
| 609 | Type::ID type; // type of the function. |
| 610 | Type::ID result; // return type. |
| 611 | }; |
| 612 | |
| 613 | struct TypeOrObject {}; // Dummy struct to represent a Type or Object. |
| 614 | |
| 615 | // TypeOrObjectID is an identifier that represents a Type or an Object, |
| 616 | // and supports implicit casting to and from Type::ID or Object::ID. |
| 617 | class TypeOrObjectID : public SpirvID<TypeOrObject> |
| 618 | { |
| 619 | public: |
| 620 | using Hash = std::hash<SpirvID<TypeOrObject>>; |
| 621 | |
| 622 | inline TypeOrObjectID(uint32_t id) : SpirvID(id) {} |
| 623 | inline TypeOrObjectID(Type::ID id) : SpirvID(id.value()) {} |
| 624 | inline TypeOrObjectID(Object::ID id) : SpirvID(id.value()) {} |
| 625 | inline operator Type::ID() const { return Type::ID(value()); } |
| 626 | inline operator Object::ID() const { return Object::ID(value()); } |
| 627 | }; |
| 628 | |
| 629 | // OpImageSample variants |
| 630 | enum Variant |
| 631 | { |
| 632 | None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. |
| 633 | Dref, |
| 634 | Proj, |
| 635 | ProjDref, |
| 636 | VARIANT_LAST = ProjDref |
| 637 | }; |
| 638 | |
| 639 | // Compact representation of image instruction parameters that is passed to the |
| 640 | // trampoline function for retrieving/generating the corresponding sampling routine. |
| 641 | struct ImageInstruction |
| 642 | { |
| 643 | ImageInstruction(Variant variant, SamplerMethod samplerMethod) |
| 644 | : parameters(0) |
| 645 | { |
| 646 | this->variant = variant; |
| 647 | this->samplerMethod = samplerMethod; |
| 648 | } |
| 649 | |
| 650 | // Unmarshal from raw 32-bit data |
| 651 | ImageInstruction(uint32_t parameters) : parameters(parameters) {} |
| 652 | |
| 653 | SamplerFunction getSamplerFunction() const |
| 654 | { |
| 655 | return { static_cast<SamplerMethod>(samplerMethod), offset != 0, sample != 0 }; |
| 656 | } |
| 657 | |
| 658 | bool isDref() const |
| 659 | { |
| 660 | return (variant == Dref) || (variant == ProjDref); |
| 661 | } |
| 662 | |
| 663 | bool isProj() const |
| 664 | { |
| 665 | return (variant == Proj) || (variant == ProjDref); |
| 666 | } |
| 667 | |
| 668 | union |
| 669 | { |
| 670 | struct |
| 671 | { |
| 672 | uint32_t variant : BITS(VARIANT_LAST); |
| 673 | uint32_t samplerMethod : BITS(SAMPLER_METHOD_LAST); |
| 674 | uint32_t gatherComponent : 2; |
| 675 | |
| 676 | // Parameters are passed to the sampling routine in this order: |
| 677 | uint32_t coordinates : 3; // 1-4 (does not contain projection component) |
| 678 | // uint32_t dref : 1; // Indicated by Variant::ProjDref|Dref |
| 679 | // uint32_t lodOrBias : 1; // Indicated by SamplerMethod::Lod|Bias|Fetch |
| 680 | uint32_t grad : 2; // 0-3 components (for each of dx / dy) |
| 681 | uint32_t offset : 2; // 0-3 components |
| 682 | uint32_t sample : 1; // 0-1 scalar integer |
| 683 | }; |
| 684 | |
| 685 | uint32_t parameters; |
| 686 | }; |
| 687 | }; |
| 688 | |
| 689 | static_assert(sizeof(ImageInstruction) == sizeof(uint32_t), "ImageInstruction must be 32-bit" ); |
| 690 | |
| 691 | // This method is for retrieving an ID that uniquely identifies the |
| 692 | // shader entry point represented by this object. |
| 693 | uint64_t getSerialID() const |
| 694 | { |
| 695 | return ((uint64_t)entryPoint.value() << 32) | codeSerialID; |
| 696 | } |
| 697 | |
| 698 | SpirvShader(uint32_t codeSerialID, |
| 699 | VkShaderStageFlagBits stage, |
| 700 | const char *entryPointName, |
| 701 | InsnStore const &insns, |
| 702 | const vk::RenderPass *renderPass, |
| 703 | uint32_t subpassIndex, |
| 704 | bool robustBufferAccess); |
| 705 | |
| 706 | struct Modes |
| 707 | { |
| 708 | bool EarlyFragmentTests : 1; |
| 709 | bool DepthReplacing : 1; |
| 710 | bool DepthGreater : 1; |
| 711 | bool DepthLess : 1; |
| 712 | bool DepthUnchanged : 1; |
| 713 | bool ContainsKill : 1; |
| 714 | bool ContainsControlBarriers : 1; |
| 715 | bool NeedsCentroid : 1; |
| 716 | |
| 717 | // Compute workgroup dimensions |
| 718 | int WorkgroupSizeX = 1, WorkgroupSizeY = 1, WorkgroupSizeZ = 1; |
| 719 | }; |
| 720 | |
| 721 | Modes const &getModes() const |
| 722 | { |
| 723 | return modes; |
| 724 | } |
| 725 | |
| 726 | struct Capabilities |
| 727 | { |
| 728 | bool Matrix : 1; |
| 729 | bool Shader : 1; |
| 730 | bool InputAttachment : 1; |
| 731 | bool Sampled1D : 1; |
| 732 | bool Image1D : 1; |
| 733 | bool SampledBuffer : 1; |
| 734 | bool ImageBuffer : 1; |
| 735 | bool ImageQuery : 1; |
| 736 | bool DerivativeControl : 1; |
| 737 | bool GroupNonUniform : 1; |
| 738 | bool MultiView : 1; |
| 739 | bool DeviceGroup : 1; |
| 740 | bool GroupNonUniformVote : 1; |
| 741 | bool GroupNonUniformBallot : 1; |
| 742 | bool GroupNonUniformShuffle : 1; |
| 743 | bool GroupNonUniformShuffleRelative : 1; |
| 744 | bool StorageImageExtendedFormats : 1; |
| 745 | }; |
| 746 | |
| 747 | Capabilities const &getUsedCapabilities() const |
| 748 | { |
| 749 | return capabilities; |
| 750 | } |
| 751 | |
| 752 | enum AttribType : unsigned char |
| 753 | { |
| 754 | ATTRIBTYPE_FLOAT, |
| 755 | ATTRIBTYPE_INT, |
| 756 | ATTRIBTYPE_UINT, |
| 757 | ATTRIBTYPE_UNUSED, |
| 758 | |
| 759 | ATTRIBTYPE_LAST = ATTRIBTYPE_UINT |
| 760 | }; |
| 761 | |
| 762 | bool hasBuiltinInput(spv::BuiltIn b) const |
| 763 | { |
| 764 | return inputBuiltins.find(b) != inputBuiltins.end(); |
| 765 | } |
| 766 | |
| 767 | bool hasBuiltinOutput(spv::BuiltIn b) const |
| 768 | { |
| 769 | return outputBuiltins.find(b) != outputBuiltins.end(); |
| 770 | } |
| 771 | |
| 772 | struct Decorations |
| 773 | { |
| 774 | int32_t Location = -1; |
| 775 | int32_t Component = 0; |
| 776 | spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); |
| 777 | int32_t Offset = -1; |
| 778 | int32_t ArrayStride = -1; |
| 779 | int32_t MatrixStride = 1; |
| 780 | |
| 781 | bool HasLocation : 1; |
| 782 | bool HasComponent : 1; |
| 783 | bool HasBuiltIn : 1; |
| 784 | bool HasOffset : 1; |
| 785 | bool HasArrayStride : 1; |
| 786 | bool HasMatrixStride : 1; |
| 787 | bool HasRowMajor : 1; // whether RowMajor bit is valid. |
| 788 | |
| 789 | bool Flat : 1; |
| 790 | bool Centroid : 1; |
| 791 | bool NoPerspective : 1; |
| 792 | bool Block : 1; |
| 793 | bool BufferBlock : 1; |
| 794 | bool RelaxedPrecision : 1; |
| 795 | bool RowMajor : 1; // RowMajor if true; ColMajor if false |
| 796 | bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. |
| 797 | |
| 798 | Decorations() |
| 799 | : Location{-1}, Component{0}, |
| 800 | BuiltIn{static_cast<spv::BuiltIn>(-1)}, |
| 801 | Offset{-1}, ArrayStride{-1}, MatrixStride{-1}, |
| 802 | HasLocation{false}, HasComponent{false}, |
| 803 | HasBuiltIn{false}, HasOffset{false}, |
| 804 | HasArrayStride{false}, HasMatrixStride{false}, |
| 805 | HasRowMajor{false}, |
| 806 | Flat{false}, Centroid{false}, NoPerspective{false}, |
| 807 | Block{false}, BufferBlock{false}, |
| 808 | RelaxedPrecision{false}, RowMajor{false}, |
| 809 | InsideMatrix{false} |
| 810 | { |
| 811 | } |
| 812 | |
| 813 | Decorations(Decorations const &) = default; |
| 814 | |
| 815 | void Apply(Decorations const &src); |
| 816 | |
| 817 | void Apply(spv::Decoration decoration, uint32_t arg); |
| 818 | }; |
| 819 | |
| 820 | std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; |
| 821 | std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; |
| 822 | |
| 823 | struct DescriptorDecorations |
| 824 | { |
| 825 | int32_t DescriptorSet = -1; |
| 826 | int32_t Binding = -1; |
| 827 | int32_t InputAttachmentIndex = -1; |
| 828 | |
| 829 | void Apply(DescriptorDecorations const &src); |
| 830 | }; |
| 831 | |
| 832 | std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; |
| 833 | std::vector<VkFormat> inputAttachmentFormats; |
| 834 | |
| 835 | struct InterfaceComponent |
| 836 | { |
| 837 | AttribType Type; |
| 838 | |
| 839 | union |
| 840 | { |
| 841 | struct |
| 842 | { |
| 843 | bool Flat : 1; |
| 844 | bool Centroid : 1; |
| 845 | bool NoPerspective : 1; |
| 846 | }; |
| 847 | |
| 848 | uint8_t DecorationBits; |
| 849 | }; |
| 850 | |
| 851 | InterfaceComponent() |
| 852 | : Type{ATTRIBTYPE_UNUSED}, DecorationBits{0} |
| 853 | { |
| 854 | } |
| 855 | }; |
| 856 | |
| 857 | struct BuiltinMapping |
| 858 | { |
| 859 | Object::ID Id; |
| 860 | uint32_t FirstComponent; |
| 861 | uint32_t SizeInComponents; |
| 862 | }; |
| 863 | |
| 864 | struct WorkgroupMemory |
| 865 | { |
| 866 | // allocates a new variable of size bytes with the given identifier. |
| 867 | inline void allocate(Object::ID id, uint32_t size) |
| 868 | { |
| 869 | uint32_t offset = totalSize; |
| 870 | auto it = offsets.emplace(id, offset); |
| 871 | ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d" , int(id.value())); |
| 872 | totalSize += size; |
| 873 | } |
| 874 | // returns the byte offset of the variable with the given identifier. |
| 875 | inline uint32_t offsetOf(Object::ID id) const |
| 876 | { |
| 877 | auto it = offsets.find(id); |
| 878 | ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d" , int(id.value())); |
| 879 | return it->second; |
| 880 | } |
| 881 | // returns the total allocated size in bytes. |
| 882 | inline uint32_t size() const { return totalSize; } |
| 883 | private: |
| 884 | uint32_t totalSize = 0; // in bytes |
| 885 | std::unordered_map<Object::ID, uint32_t> offsets; // in bytes |
| 886 | }; |
| 887 | |
| 888 | std::vector<InterfaceComponent> inputs; |
| 889 | std::vector<InterfaceComponent> outputs; |
| 890 | |
| 891 | void emitProlog(SpirvRoutine *routine) const; |
| 892 | void emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, RValue<SIMD::Int> const &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets) const; |
| 893 | void emitEpilog(SpirvRoutine *routine) const; |
| 894 | |
| 895 | using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; |
| 896 | std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; |
| 897 | std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; |
| 898 | WorkgroupMemory workgroupMemory; |
| 899 | |
| 900 | private: |
| 901 | const uint32_t codeSerialID; |
| 902 | Modes modes = {}; |
| 903 | Capabilities capabilities = {}; |
| 904 | HandleMap<Type> types; |
| 905 | HandleMap<Object> defs; |
| 906 | HandleMap<Function> functions; |
| 907 | Function::ID entryPoint; |
| 908 | |
| 909 | const bool robustBufferAccess = true; |
| 910 | spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. |
| 911 | |
| 912 | // DeclareType creates a Type for the given OpTypeX instruction, storing |
| 913 | // it into the types map. It is called from the analysis pass (constructor). |
| 914 | void DeclareType(InsnIterator insn); |
| 915 | |
| 916 | void ProcessExecutionMode(InsnIterator it); |
| 917 | |
| 918 | uint32_t ComputeTypeSize(InsnIterator insn); |
| 919 | void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; |
| 920 | void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; |
| 921 | void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, uint32_t numIndexes, uint32_t const *indexIds) const; |
| 922 | |
| 923 | // Creates an Object for the instruction's result in 'defs'. |
| 924 | void DefineResult(const InsnIterator &insn); |
| 925 | |
| 926 | // Returns true if data in the given storage class is word-interleaved |
| 927 | // by each SIMD vector lane, otherwise data is stored linerally. |
| 928 | // |
| 929 | // Each lane addresses a single word, picked by a base pointer and an |
| 930 | // integer offset. |
| 931 | // |
| 932 | // A word is currently 32 bits (single float, int32_t, uint32_t). |
| 933 | // A lane is a single element of a SIMD vector register. |
| 934 | // |
| 935 | // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): |
| 936 | // --------------------------------------------------------------------- |
| 937 | // |
| 938 | // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) |
| 939 | // |
| 940 | // Assuming SIMD::Width == 4: |
| 941 | // |
| 942 | // Lane[0] | Lane[1] | Lane[2] | Lane[3] |
| 943 | // ===========+===========+===========+========== |
| 944 | // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] |
| 945 | // ---------------+-----------+-----------+-----------+---------- |
| 946 | // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] |
| 947 | // ---------------+-----------+-----------+-----------+---------- |
| 948 | // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] |
| 949 | // ---------------+-----------+-----------+-----------+---------- |
| 950 | // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] |
| 951 | // |
| 952 | // |
| 953 | // Linear storage - (IsStorageInterleavedByLane() == false): |
| 954 | // --------------------------------------------------------- |
| 955 | // |
| 956 | // Address = PtrBase + sizeof(Word) * LaneOffset |
| 957 | // |
| 958 | // Lane[0] | Lane[1] | Lane[2] | Lane[3] |
| 959 | // ===========+===========+===========+========== |
| 960 | // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] |
| 961 | // ---------------+-----------+-----------+-----------+---------- |
| 962 | // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] |
| 963 | // ---------------+-----------+-----------+-----------+---------- |
| 964 | // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] |
| 965 | // ---------------+-----------+-----------+-----------+---------- |
| 966 | // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] |
| 967 | // |
| 968 | static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); |
| 969 | static bool IsExplicitLayout(spv::StorageClass storageClass); |
| 970 | |
| 971 | // Output storage buffers and images should not be affected by helper invocations |
| 972 | static bool StoresInHelperInvocation(spv::StorageClass storageClass); |
| 973 | |
| 974 | template<typename F> |
| 975 | int VisitInterfaceInner(Type::ID id, Decorations d, F f) const; |
| 976 | |
| 977 | template<typename F> |
| 978 | void VisitInterface(Object::ID id, F f) const; |
| 979 | |
| 980 | template<typename F> |
| 981 | void VisitMemoryObject(Object::ID id, F f) const; |
| 982 | |
| 983 | template<typename F> |
| 984 | void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, F f) const; |
| 985 | |
| 986 | Object& CreateConstant(InsnIterator it); |
| 987 | |
| 988 | void ProcessInterfaceVariable(Object &object); |
| 989 | |
| 990 | // EmitState holds control-flow state for the emit() pass. |
| 991 | class EmitState |
| 992 | { |
| 993 | public: |
| 994 | EmitState(SpirvRoutine *routine, |
| 995 | Function::ID function, |
| 996 | RValue<SIMD::Int> activeLaneMask, |
| 997 | RValue<SIMD::Int> storesAndAtomicsMask, |
| 998 | const vk::DescriptorSet::Bindings &descriptorSets, |
| 999 | bool robustBufferAccess, |
| 1000 | spv::ExecutionModel executionModel) |
| 1001 | : routine(routine), |
| 1002 | function(function), |
| 1003 | activeLaneMaskValue(activeLaneMask.value), |
| 1004 | storesAndAtomicsMaskValue(storesAndAtomicsMask.value), |
| 1005 | descriptorSets(descriptorSets), |
| 1006 | robustBufferAccess(robustBufferAccess), |
| 1007 | executionModel(executionModel) |
| 1008 | { |
| 1009 | ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting. |
| 1010 | } |
| 1011 | |
| 1012 | RValue<SIMD::Int> activeLaneMask() const |
| 1013 | { |
| 1014 | ASSERT(activeLaneMaskValue != nullptr); |
| 1015 | return RValue<SIMD::Int>(activeLaneMaskValue); |
| 1016 | } |
| 1017 | |
| 1018 | RValue<SIMD::Int> storesAndAtomicsMask() const |
| 1019 | { |
| 1020 | ASSERT(storesAndAtomicsMaskValue != nullptr); |
| 1021 | return RValue<SIMD::Int>(storesAndAtomicsMaskValue); |
| 1022 | } |
| 1023 | |
| 1024 | void setActiveLaneMask(RValue<SIMD::Int> mask) |
| 1025 | { |
| 1026 | activeLaneMaskValue = mask.value; |
| 1027 | } |
| 1028 | |
| 1029 | // Add a new active lane mask edge from the current block to out. |
| 1030 | // The edge mask value will be (mask AND activeLaneMaskValue). |
| 1031 | // If multiple active lane masks are added for the same edge, then |
| 1032 | // they will be ORed together. |
| 1033 | void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); |
| 1034 | |
| 1035 | // Add a new active lane mask for the edge from -> to. |
| 1036 | // If multiple active lane masks are added for the same edge, then |
| 1037 | // they will be ORed together. |
| 1038 | void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); |
| 1039 | |
| 1040 | SpirvRoutine *routine = nullptr; // The current routine being built. |
| 1041 | Function::ID function; // The current function being built. |
| 1042 | Block::ID block; // The current block being built. |
| 1043 | rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. |
| 1044 | rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. |
| 1045 | Block::Set visited; // Blocks already built. |
| 1046 | std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; |
| 1047 | std::deque<Block::ID> *pending; |
| 1048 | |
| 1049 | const vk::DescriptorSet::Bindings &descriptorSets; |
| 1050 | |
| 1051 | OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const; |
| 1052 | |
| 1053 | Intermediate& createIntermediate(Object::ID id, uint32_t size) |
| 1054 | { |
| 1055 | auto it = intermediates.emplace(std::piecewise_construct, |
| 1056 | std::forward_as_tuple(id), |
| 1057 | std::forward_as_tuple(size)); |
| 1058 | ASSERT_MSG(it.second, "Intermediate %d created twice" , id.value()); |
| 1059 | return it.first->second; |
| 1060 | } |
| 1061 | |
| 1062 | Intermediate const& getIntermediate(Object::ID id) const |
| 1063 | { |
| 1064 | auto it = intermediates.find(id); |
| 1065 | ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d" , id.value()); |
| 1066 | return it->second; |
| 1067 | } |
| 1068 | |
| 1069 | void createPointer(Object::ID id, SIMD::Pointer ptr) |
| 1070 | { |
| 1071 | bool added = pointers.emplace(id, ptr).second; |
| 1072 | ASSERT_MSG(added, "Pointer %d created twice" , id.value()); |
| 1073 | } |
| 1074 | |
| 1075 | SIMD::Pointer const& getPointer(Object::ID id) const |
| 1076 | { |
| 1077 | auto it = pointers.find(id); |
| 1078 | ASSERT_MSG(it != pointers.end(), "Unknown pointer %d" , id.value()); |
| 1079 | return it->second; |
| 1080 | } |
| 1081 | |
| 1082 | private: |
| 1083 | std::unordered_map<Object::ID, Intermediate> intermediates; |
| 1084 | std::unordered_map<Object::ID, SIMD::Pointer> pointers; |
| 1085 | |
| 1086 | const bool robustBufferAccess = true; // Emit robustBufferAccess safe code. |
| 1087 | const spv::ExecutionModel executionModel = spv::ExecutionModelMax; |
| 1088 | }; |
| 1089 | |
| 1090 | // EmitResult is an enumerator of result values from the Emit functions. |
| 1091 | enum class EmitResult |
| 1092 | { |
| 1093 | Continue, // No termination instructions. |
| 1094 | Terminator, // Reached a termination instruction. |
| 1095 | }; |
| 1096 | |
| 1097 | // Generic wrapper over either per-lane intermediate value, or a constant. |
| 1098 | // Constants are transparently widened to per-lane values in operator[]. |
| 1099 | // This is appropriate in most cases -- if we're not going to do something |
| 1100 | // significantly different based on whether the value is uniform across lanes. |
| 1101 | class GenericValue |
| 1102 | { |
| 1103 | SpirvShader::Object const &obj; |
| 1104 | Intermediate const *intermediate; |
| 1105 | |
| 1106 | public: |
| 1107 | GenericValue(SpirvShader const *shader, EmitState const *state, SpirvShader::Object::ID objId); |
| 1108 | |
| 1109 | RValue<SIMD::Float> Float(uint32_t i) const |
| 1110 | { |
| 1111 | if (intermediate) |
| 1112 | { |
| 1113 | return intermediate->Float(i); |
| 1114 | } |
| 1115 | |
| 1116 | // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact |
| 1117 | // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". |
| 1118 | // Thus we must first construct an integer constant, and bitcast to float. |
| 1119 | auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get()); |
| 1120 | return As<SIMD::Float>(SIMD::UInt(constantValue[i])); |
| 1121 | } |
| 1122 | |
| 1123 | RValue<SIMD::Int> Int(uint32_t i) const |
| 1124 | { |
| 1125 | if (intermediate) |
| 1126 | { |
| 1127 | return intermediate->Int(i); |
| 1128 | } |
| 1129 | auto constantValue = reinterpret_cast<int *>(obj.constantValue.get()); |
| 1130 | return SIMD::Int(constantValue[i]); |
| 1131 | } |
| 1132 | |
| 1133 | RValue<SIMD::UInt> UInt(uint32_t i) const |
| 1134 | { |
| 1135 | if (intermediate) |
| 1136 | { |
| 1137 | return intermediate->UInt(i); |
| 1138 | } |
| 1139 | auto constantValue = reinterpret_cast<uint32_t *>(obj.constantValue.get()); |
| 1140 | return SIMD::UInt(constantValue[i]); |
| 1141 | } |
| 1142 | |
| 1143 | SpirvShader::Type::ID const type; |
| 1144 | }; |
| 1145 | |
| 1146 | Type const &getType(Type::ID id) const |
| 1147 | { |
| 1148 | auto it = types.find(id); |
| 1149 | ASSERT_MSG(it != types.end(), "Unknown type %d" , id.value()); |
| 1150 | return it->second; |
| 1151 | } |
| 1152 | |
| 1153 | Object const &getObject(Object::ID id) const |
| 1154 | { |
| 1155 | auto it = defs.find(id); |
| 1156 | ASSERT_MSG(it != defs.end(), "Unknown object %d" , id.value()); |
| 1157 | return it->second; |
| 1158 | } |
| 1159 | |
| 1160 | Function const &getFunction(Function::ID id) const |
| 1161 | { |
| 1162 | auto it = functions.find(id); |
| 1163 | ASSERT_MSG(it != functions.end(), "Unknown function %d" , id.value()); |
| 1164 | return it->second; |
| 1165 | } |
| 1166 | |
| 1167 | // Returns a SIMD::Pointer to the underlying data for the given pointer |
| 1168 | // object. |
| 1169 | // Handles objects of the following kinds: |
| 1170 | // • DescriptorSet |
| 1171 | // • DivergentPointer |
| 1172 | // • InterfaceVariable |
| 1173 | // • NonDivergentPointer |
| 1174 | // Calling GetPointerToData with objects of any other kind will assert. |
| 1175 | SIMD::Pointer GetPointerToData(Object::ID id, int arrayIndex, EmitState const *state) const; |
| 1176 | |
| 1177 | SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; |
| 1178 | SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, EmitState const *state) const; |
| 1179 | |
| 1180 | // Returns the *component* offset in the literal for the given access chain. |
| 1181 | uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const; |
| 1182 | |
| 1183 | // Lookup the active lane mask for the edge from -> to. |
| 1184 | // If from is unreachable, then a mask of all zeros is returned. |
| 1185 | // Asserts if from is reachable and the edge does not exist. |
| 1186 | RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const; |
| 1187 | |
| 1188 | // Emit all the unvisited blocks (except for ignore) in DFS order, |
| 1189 | // starting with id. |
| 1190 | void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const; |
| 1191 | void EmitNonLoop(EmitState *state) const; |
| 1192 | void EmitLoop(EmitState *state) const; |
| 1193 | |
| 1194 | void EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const; |
| 1195 | EmitResult EmitInstruction(InsnIterator insn, EmitState *state) const; |
| 1196 | |
| 1197 | // Emit pass instructions: |
| 1198 | EmitResult EmitVariable(InsnIterator insn, EmitState *state) const; |
| 1199 | EmitResult EmitLoad(InsnIterator insn, EmitState *state) const; |
| 1200 | EmitResult EmitStore(InsnIterator insn, EmitState *state) const; |
| 1201 | EmitResult EmitAccessChain(InsnIterator insn, EmitState *state) const; |
| 1202 | EmitResult EmitCompositeConstruct(InsnIterator insn, EmitState *state) const; |
| 1203 | EmitResult EmitCompositeInsert(InsnIterator insn, EmitState *state) const; |
| 1204 | EmitResult (InsnIterator insn, EmitState *state) const; |
| 1205 | EmitResult EmitVectorShuffle(InsnIterator insn, EmitState *state) const; |
| 1206 | EmitResult EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const; |
| 1207 | EmitResult EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const; |
| 1208 | EmitResult EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const; |
| 1209 | EmitResult EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const; |
| 1210 | EmitResult EmitOuterProduct(InsnIterator insn, EmitState *state) const; |
| 1211 | EmitResult EmitTranspose(InsnIterator insn, EmitState *state) const; |
| 1212 | EmitResult (InsnIterator insn, EmitState *state) const; |
| 1213 | EmitResult EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const; |
| 1214 | EmitResult EmitUnaryOp(InsnIterator insn, EmitState *state) const; |
| 1215 | EmitResult EmitBinaryOp(InsnIterator insn, EmitState *state) const; |
| 1216 | EmitResult EmitDot(InsnIterator insn, EmitState *state) const; |
| 1217 | EmitResult EmitSelect(InsnIterator insn, EmitState *state) const; |
| 1218 | EmitResult EmitExtendedInstruction(InsnIterator insn, EmitState *state) const; |
| 1219 | EmitResult EmitAny(InsnIterator insn, EmitState *state) const; |
| 1220 | EmitResult EmitAll(InsnIterator insn, EmitState *state) const; |
| 1221 | EmitResult EmitBranch(InsnIterator insn, EmitState *state) const; |
| 1222 | EmitResult EmitBranchConditional(InsnIterator insn, EmitState *state) const; |
| 1223 | EmitResult EmitSwitch(InsnIterator insn, EmitState *state) const; |
| 1224 | EmitResult EmitUnreachable(InsnIterator insn, EmitState *state) const; |
| 1225 | EmitResult EmitReturn(InsnIterator insn, EmitState *state) const; |
| 1226 | EmitResult EmitKill(InsnIterator insn, EmitState *state) const; |
| 1227 | EmitResult EmitFunctionCall(InsnIterator insn, EmitState *state) const; |
| 1228 | EmitResult EmitPhi(InsnIterator insn, EmitState *state) const; |
| 1229 | EmitResult EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; |
| 1230 | EmitResult EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const; |
| 1231 | EmitResult EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const; |
| 1232 | EmitResult EmitImageFetch(InsnIterator insn, EmitState *state) const; |
| 1233 | EmitResult EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const; |
| 1234 | EmitResult EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const; |
| 1235 | EmitResult EmitImageQuerySize(InsnIterator insn, EmitState *state) const; |
| 1236 | EmitResult EmitImageQueryLod(InsnIterator insn, EmitState *state) const; |
| 1237 | EmitResult EmitImageQueryLevels(InsnIterator insn, EmitState *state) const; |
| 1238 | EmitResult EmitImageQuerySamples(InsnIterator insn, EmitState *state) const; |
| 1239 | EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const; |
| 1240 | EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const; |
| 1241 | EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const; |
| 1242 | EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const; |
| 1243 | EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const; |
| 1244 | EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const; |
| 1245 | EmitResult EmitCopyObject(InsnIterator insn, EmitState *state) const; |
| 1246 | EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const; |
| 1247 | EmitResult EmitControlBarrier(InsnIterator insn, EmitState *state) const; |
| 1248 | EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const; |
| 1249 | EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const; |
| 1250 | EmitResult EmitArrayLength(InsnIterator insn, EmitState *state) const; |
| 1251 | |
| 1252 | void GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; |
| 1253 | SIMD::Pointer GetTexelAddress(EmitState const *state, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const; |
| 1254 | uint32_t GetConstScalarInt(Object::ID id) const; |
| 1255 | void EvalSpecConstantOp(InsnIterator insn); |
| 1256 | void EvalSpecConstantUnaryOp(InsnIterator insn); |
| 1257 | void EvalSpecConstantBinaryOp(InsnIterator insn); |
| 1258 | |
| 1259 | // LoadPhi loads the phi values from the alloca storage and places the |
| 1260 | // load values into the intermediate with the phi's result id. |
| 1261 | void LoadPhi(InsnIterator insn, EmitState *state) const; |
| 1262 | |
| 1263 | // StorePhi updates the phi's alloca storage value using the incoming |
| 1264 | // values from blocks that are both in the OpPhi instruction and in |
| 1265 | // filter. |
| 1266 | void StorePhi(Block::ID blockID, InsnIterator insn, EmitState *state, std::unordered_set<SpirvShader::Block::ID> const& filter) const; |
| 1267 | |
| 1268 | // Emits a rr::Fence for the given MemorySemanticsMask. |
| 1269 | void Fence(spv::MemorySemanticsMask semantics) const; |
| 1270 | |
| 1271 | // Helper for calling rr::Yield with res cast to an rr::Int. |
| 1272 | void Yield(YieldResult res) const; |
| 1273 | |
| 1274 | // OpcodeName() returns the name of the opcode op. |
| 1275 | // If NDEBUG is defined, then OpcodeName() will only return the numerical code. |
| 1276 | static std::string OpcodeName(spv::Op op); |
| 1277 | static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); |
| 1278 | |
| 1279 | // Helper as we often need to take dot products as part of doing other things. |
| 1280 | SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const; |
| 1281 | |
| 1282 | SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const; |
| 1283 | |
| 1284 | // Splits x into a floating-point significand in the range [0.5, 1.0) |
| 1285 | // and an integral exponent of two, such that: |
| 1286 | // x = significand * 2^exponent |
| 1287 | // Returns the pair <significand, exponent> |
| 1288 | std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const; |
| 1289 | |
| 1290 | static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler); |
| 1291 | static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState); |
| 1292 | |
| 1293 | // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. |
| 1294 | static sw::FilterType convertFilterMode(const vk::Sampler *sampler); |
| 1295 | static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler); |
| 1296 | static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::Sampler *sampler, VkImageViewType imageViewType); |
| 1297 | |
| 1298 | // Returns 0 when invalid. |
| 1299 | static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); |
| 1300 | }; |
| 1301 | |
| 1302 | class SpirvRoutine |
| 1303 | { |
| 1304 | public: |
| 1305 | SpirvRoutine(vk::PipelineLayout const *pipelineLayout); |
| 1306 | |
| 1307 | using Variable = Array<SIMD::Float>; |
| 1308 | |
| 1309 | struct SamplerCache |
| 1310 | { |
| 1311 | Pointer<Byte> imageDescriptor = nullptr; |
| 1312 | Pointer<Byte> sampler; |
| 1313 | Pointer<Byte> function; |
| 1314 | }; |
| 1315 | |
| 1316 | vk::PipelineLayout const * const pipelineLayout; |
| 1317 | |
| 1318 | std::unordered_map<SpirvShader::Object::ID, Variable> variables; |
| 1319 | std::unordered_map<SpirvShader::Object::ID, SamplerCache> samplerCache; |
| 1320 | Variable inputs = Variable{MAX_INTERFACE_COMPONENTS}; |
| 1321 | Variable outputs = Variable{MAX_INTERFACE_COMPONENTS}; |
| 1322 | |
| 1323 | Pointer<Byte> workgroupMemory; |
| 1324 | Pointer<Pointer<Byte>> descriptorSets; |
| 1325 | Pointer<Int> descriptorDynamicOffsets; |
| 1326 | Pointer<Byte> pushConstants; |
| 1327 | Pointer<Byte> constants; |
| 1328 | Int killMask = Int{0}; |
| 1329 | SIMD::Int windowSpacePosition[2]; |
| 1330 | Int viewID; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex |
| 1331 | |
| 1332 | void createVariable(SpirvShader::Object::ID id, uint32_t size) |
| 1333 | { |
| 1334 | bool added = variables.emplace(id, Variable(size)).second; |
| 1335 | ASSERT_MSG(added, "Variable %d created twice" , id.value()); |
| 1336 | } |
| 1337 | |
| 1338 | Variable& getVariable(SpirvShader::Object::ID id) |
| 1339 | { |
| 1340 | auto it = variables.find(id); |
| 1341 | ASSERT_MSG(it != variables.end(), "Unknown variables %d" , id.value()); |
| 1342 | return it->second; |
| 1343 | } |
| 1344 | |
| 1345 | // setImmutableInputBuiltins() sets all the immutable input builtins, |
| 1346 | // common for all shader types. |
| 1347 | void setImmutableInputBuiltins(SpirvShader const *shader); |
| 1348 | |
| 1349 | // setInputBuiltin() calls f() with the builtin and value if the shader |
| 1350 | // uses the input builtin, otherwise the call is a no-op. |
| 1351 | // F is a function with the signature: |
| 1352 | // void(const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
| 1353 | template <typename F> |
| 1354 | inline void setInputBuiltin(SpirvShader const *shader, spv::BuiltIn id, F&& f) |
| 1355 | { |
| 1356 | auto it = shader->inputBuiltins.find(id); |
| 1357 | if (it != shader->inputBuiltins.end()) |
| 1358 | { |
| 1359 | const auto& builtin = it->second; |
| 1360 | f(builtin, getVariable(builtin.Id)); |
| 1361 | } |
| 1362 | } |
| 1363 | |
| 1364 | private: |
| 1365 | // The phis are only accessible to SpirvShader as they are only used and |
| 1366 | // exist between calls to SpirvShader::emitProlog() and |
| 1367 | // SpirvShader::emitEpilog(). |
| 1368 | friend class SpirvShader; |
| 1369 | |
| 1370 | std::unordered_map<SpirvShader::Object::ID, Variable> phis; |
| 1371 | |
| 1372 | }; |
| 1373 | |
| 1374 | } |
| 1375 | |
| 1376 | #endif // sw_SpirvShader_hpp |
| 1377 | |