| 1 | /**************************************************************************/ |
| 2 | /* rendering_device_vulkan.h */ |
| 3 | /**************************************************************************/ |
| 4 | /* This file is part of: */ |
| 5 | /* GODOT ENGINE */ |
| 6 | /* https://godotengine.org */ |
| 7 | /**************************************************************************/ |
| 8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
| 9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
| 10 | /* */ |
| 11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
| 12 | /* a copy of this software and associated documentation files (the */ |
| 13 | /* "Software"), to deal in the Software without restriction, including */ |
| 14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
| 15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
| 16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
| 17 | /* the following conditions: */ |
| 18 | /* */ |
| 19 | /* The above copyright notice and this permission notice shall be */ |
| 20 | /* included in all copies or substantial portions of the Software. */ |
| 21 | /* */ |
| 22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
| 23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
| 24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
| 25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
| 26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
| 27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
| 28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
| 29 | /**************************************************************************/ |
| 30 | |
| 31 | #ifndef RENDERING_DEVICE_VULKAN_H |
| 32 | #define RENDERING_DEVICE_VULKAN_H |
| 33 | |
| 34 | #include "core/object/worker_thread_pool.h" |
| 35 | #include "core/os/thread_safe.h" |
| 36 | #include "core/templates/local_vector.h" |
| 37 | #include "core/templates/oa_hash_map.h" |
| 38 | #include "core/templates/rid_owner.h" |
| 39 | #include "servers/rendering/rendering_device.h" |
| 40 | |
| 41 | #ifdef DEBUG_ENABLED |
| 42 | #ifndef _DEBUG |
| 43 | #define _DEBUG |
| 44 | #endif |
| 45 | #endif |
| 46 | #include "vk_mem_alloc.h" |
| 47 | |
| 48 | #ifdef USE_VOLK |
| 49 | #include <volk.h> |
| 50 | #else |
| 51 | #include <vulkan/vulkan.h> |
| 52 | #endif |
| 53 | |
| 54 | class VulkanContext; |
| 55 | |
| 56 | class RenderingDeviceVulkan : public RenderingDevice { |
| 57 | _THREAD_SAFE_CLASS_ |
| 58 | |
| 59 | // Miscellaneous tables that map |
| 60 | // our enums to enums used |
| 61 | // by vulkan. |
| 62 | |
| 63 | VkPhysicalDeviceLimits limits; |
| 64 | static const VkFormat vulkan_formats[DATA_FORMAT_MAX]; |
| 65 | static const char *named_formats[DATA_FORMAT_MAX]; |
| 66 | static const VkCompareOp compare_operators[COMPARE_OP_MAX]; |
| 67 | static const VkStencilOp stencil_operations[STENCIL_OP_MAX]; |
| 68 | static const VkSampleCountFlagBits rasterization_sample_count[TEXTURE_SAMPLES_MAX]; |
| 69 | static const VkLogicOp logic_operations[RenderingDevice::LOGIC_OP_MAX]; |
| 70 | static const VkBlendFactor blend_factors[RenderingDevice::BLEND_FACTOR_MAX]; |
| 71 | static const VkBlendOp blend_operations[RenderingDevice::BLEND_OP_MAX]; |
| 72 | static const VkSamplerAddressMode address_modes[SAMPLER_REPEAT_MODE_MAX]; |
| 73 | static const VkBorderColor sampler_border_colors[SAMPLER_BORDER_COLOR_MAX]; |
| 74 | static const VkImageType vulkan_image_type[TEXTURE_TYPE_MAX]; |
| 75 | |
| 76 | // Functions used for format |
| 77 | // validation, and ensures the |
| 78 | // user passes valid data. |
| 79 | |
| 80 | static int get_format_vertex_size(DataFormat p_format); |
| 81 | static uint32_t get_image_format_pixel_size(DataFormat p_format); |
| 82 | static void get_compressed_image_format_block_dimensions(DataFormat p_format, uint32_t &r_w, uint32_t &r_h); |
| 83 | uint32_t get_compressed_image_format_block_byte_size(DataFormat p_format); |
| 84 | static uint32_t get_compressed_image_format_pixel_rshift(DataFormat p_format); |
| 85 | static uint32_t get_image_format_required_size(DataFormat p_format, uint32_t p_width, uint32_t p_height, uint32_t p_depth, uint32_t p_mipmaps, uint32_t *r_blockw = nullptr, uint32_t *r_blockh = nullptr, uint32_t *r_depth = nullptr); |
| 86 | static uint32_t get_image_required_mipmaps(uint32_t p_width, uint32_t p_height, uint32_t p_depth); |
| 87 | static bool format_has_stencil(DataFormat p_format); |
| 88 | |
| 89 | /***************************/ |
| 90 | /**** ID INFRASTRUCTURE ****/ |
| 91 | /***************************/ |
| 92 | |
| 93 | enum IDType { |
| 94 | ID_TYPE_FRAMEBUFFER_FORMAT, |
| 95 | ID_TYPE_VERTEX_FORMAT, |
| 96 | ID_TYPE_DRAW_LIST, |
| 97 | ID_TYPE_SPLIT_DRAW_LIST, |
| 98 | ID_TYPE_COMPUTE_LIST, |
| 99 | ID_TYPE_MAX, |
| 100 | ID_BASE_SHIFT = 58 // 5 bits for ID types. |
| 101 | }; |
| 102 | |
| 103 | VkDevice device = VK_NULL_HANDLE; |
| 104 | |
| 105 | HashMap<RID, HashSet<RID>> dependency_map; // IDs to IDs that depend on it. |
| 106 | HashMap<RID, HashSet<RID>> reverse_dependency_map; // Same as above, but in reverse. |
| 107 | |
| 108 | void _add_dependency(RID p_id, RID p_depends_on); |
| 109 | void _free_dependencies(RID p_id); |
| 110 | |
| 111 | /*****************/ |
| 112 | /**** TEXTURE ****/ |
| 113 | /*****************/ |
| 114 | |
| 115 | // In Vulkan, the concept of textures does not exist, |
| 116 | // instead there is the image (the memory pretty much, |
| 117 | // the view (how the memory is interpreted) and the |
| 118 | // sampler (how it's sampled from the shader). |
| 119 | // |
| 120 | // Texture here includes the first two stages, but |
| 121 | // It's possible to create textures sharing the image |
| 122 | // but with different views. The main use case for this |
| 123 | // is textures that can be read as both SRGB/Linear, |
| 124 | // or slices of a texture (a mipmap, a layer, a 3D slice) |
| 125 | // for a framebuffer to render into it. |
| 126 | |
| 127 | struct Texture { |
| 128 | VkImage image = VK_NULL_HANDLE; |
| 129 | VmaAllocation allocation = nullptr; |
| 130 | VmaAllocationInfo allocation_info; |
| 131 | VkImageView view = VK_NULL_HANDLE; |
| 132 | |
| 133 | TextureType type; |
| 134 | DataFormat format; |
| 135 | TextureSamples samples; |
| 136 | uint32_t width = 0; |
| 137 | uint32_t height = 0; |
| 138 | uint32_t depth = 0; |
| 139 | uint32_t layers = 0; |
| 140 | uint32_t mipmaps = 0; |
| 141 | uint32_t usage_flags = 0; |
| 142 | uint32_t base_mipmap = 0; |
| 143 | uint32_t base_layer = 0; |
| 144 | |
| 145 | Vector<DataFormat> allowed_shared_formats; |
| 146 | |
| 147 | VkImageLayout layout; |
| 148 | |
| 149 | uint64_t used_in_frame = 0; |
| 150 | bool used_in_transfer = false; |
| 151 | bool used_in_raster = false; |
| 152 | bool used_in_compute = false; |
| 153 | |
| 154 | bool is_resolve_buffer = false; |
| 155 | |
| 156 | uint32_t read_aspect_mask = 0; |
| 157 | uint32_t barrier_aspect_mask = 0; |
| 158 | bool bound = false; // Bound to framebffer. |
| 159 | RID owner; |
| 160 | }; |
| 161 | |
| 162 | RID_Owner<Texture, true> texture_owner; |
| 163 | uint32_t texture_upload_region_size_px = 0; |
| 164 | |
| 165 | Vector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d = false); |
| 166 | Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue); |
| 167 | |
| 168 | /*****************/ |
| 169 | /**** SAMPLER ****/ |
| 170 | /*****************/ |
| 171 | |
| 172 | RID_Owner<VkSampler> sampler_owner; |
| 173 | |
| 174 | /***************************/ |
| 175 | /**** BUFFER MANAGEMENT ****/ |
| 176 | /***************************/ |
| 177 | |
| 178 | // These are temporary buffers on CPU memory that hold |
| 179 | // the information until the CPU fetches it and places it |
| 180 | // either on GPU buffers, or images (textures). It ensures |
| 181 | // updates are properly synchronized with whatever the |
| 182 | // GPU is doing. |
| 183 | // |
| 184 | // The logic here is as follows, only 3 of these |
| 185 | // blocks are created at the beginning (one per frame) |
| 186 | // they can each belong to a frame (assigned to current when |
| 187 | // used) and they can only be reused after the same frame is |
| 188 | // recycled. |
| 189 | // |
| 190 | // When CPU requires to allocate more than what is available, |
| 191 | // more of these buffers are created. If a limit is reached, |
| 192 | // then a fence will ensure will wait for blocks allocated |
| 193 | // in previous frames are processed. If that fails, then |
| 194 | // another fence will ensure everything pending for the current |
| 195 | // frame is processed (effectively stalling). |
| 196 | // |
| 197 | // See the comments in the code to understand better how it works. |
| 198 | |
| 199 | struct StagingBufferBlock { |
| 200 | VkBuffer buffer = VK_NULL_HANDLE; |
| 201 | VmaAllocation allocation = nullptr; |
| 202 | uint64_t frame_used = 0; |
| 203 | uint32_t fill_amount = 0; |
| 204 | }; |
| 205 | |
| 206 | Vector<StagingBufferBlock> staging_buffer_blocks; |
| 207 | int staging_buffer_current = 0; |
| 208 | uint32_t staging_buffer_block_size = 0; |
| 209 | uint64_t staging_buffer_max_size = 0; |
| 210 | bool staging_buffer_used = false; |
| 211 | |
| 212 | Error _staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment = true); |
| 213 | Error _insert_staging_block(); |
| 214 | |
| 215 | struct Buffer { |
| 216 | uint32_t size = 0; |
| 217 | uint32_t usage = 0; |
| 218 | VkBuffer buffer = VK_NULL_HANDLE; |
| 219 | VmaAllocation allocation = nullptr; |
| 220 | VkDescriptorBufferInfo buffer_info; // Used for binding. |
| 221 | Buffer() { |
| 222 | } |
| 223 | }; |
| 224 | |
| 225 | Error _buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mem_usage, VmaAllocationCreateFlags p_mem_flags); |
| 226 | Error _buffer_free(Buffer *p_buffer); |
| 227 | Error _buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer = false, uint32_t p_required_align = 32); |
| 228 | |
| 229 | void _full_barrier(bool p_sync_with_draw); |
| 230 | void _memory_barrier(VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_access, bool p_sync_with_draw); |
| 231 | void _buffer_memory_barrier(VkBuffer buffer, uint64_t p_from, uint64_t p_size, VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_access, bool p_sync_with_draw); |
| 232 | |
| 233 | /*********************/ |
| 234 | /**** FRAMEBUFFER ****/ |
| 235 | /*********************/ |
| 236 | |
| 237 | // In Vulkan, framebuffers work similar to how they |
| 238 | // do in OpenGL, with the exception that |
| 239 | // the "format" (vkRenderPass) is not dynamic |
| 240 | // and must be more or less the same as the one |
| 241 | // used for the render pipelines. |
| 242 | |
| 243 | struct FramebufferFormatKey { |
| 244 | Vector<AttachmentFormat> attachments; |
| 245 | Vector<FramebufferPass> passes; |
| 246 | uint32_t view_count = 1; |
| 247 | |
| 248 | bool operator<(const FramebufferFormatKey &p_key) const { |
| 249 | if (view_count != p_key.view_count) { |
| 250 | return view_count < p_key.view_count; |
| 251 | } |
| 252 | |
| 253 | uint32_t pass_size = passes.size(); |
| 254 | uint32_t key_pass_size = p_key.passes.size(); |
| 255 | if (pass_size != key_pass_size) { |
| 256 | return pass_size < key_pass_size; |
| 257 | } |
| 258 | const FramebufferPass *pass_ptr = passes.ptr(); |
| 259 | const FramebufferPass *key_pass_ptr = p_key.passes.ptr(); |
| 260 | |
| 261 | for (uint32_t i = 0; i < pass_size; i++) { |
| 262 | { // Compare color attachments. |
| 263 | uint32_t attachment_size = pass_ptr[i].color_attachments.size(); |
| 264 | uint32_t key_attachment_size = key_pass_ptr[i].color_attachments.size(); |
| 265 | if (attachment_size != key_attachment_size) { |
| 266 | return attachment_size < key_attachment_size; |
| 267 | } |
| 268 | const int32_t *pass_attachment_ptr = pass_ptr[i].color_attachments.ptr(); |
| 269 | const int32_t *key_pass_attachment_ptr = key_pass_ptr[i].color_attachments.ptr(); |
| 270 | |
| 271 | for (uint32_t j = 0; j < attachment_size; j++) { |
| 272 | if (pass_attachment_ptr[j] != key_pass_attachment_ptr[j]) { |
| 273 | return pass_attachment_ptr[j] < key_pass_attachment_ptr[j]; |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | { // Compare input attachments. |
| 278 | uint32_t attachment_size = pass_ptr[i].input_attachments.size(); |
| 279 | uint32_t key_attachment_size = key_pass_ptr[i].input_attachments.size(); |
| 280 | if (attachment_size != key_attachment_size) { |
| 281 | return attachment_size < key_attachment_size; |
| 282 | } |
| 283 | const int32_t *pass_attachment_ptr = pass_ptr[i].input_attachments.ptr(); |
| 284 | const int32_t *key_pass_attachment_ptr = key_pass_ptr[i].input_attachments.ptr(); |
| 285 | |
| 286 | for (uint32_t j = 0; j < attachment_size; j++) { |
| 287 | if (pass_attachment_ptr[j] != key_pass_attachment_ptr[j]) { |
| 288 | return pass_attachment_ptr[j] < key_pass_attachment_ptr[j]; |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | { // Compare resolve attachments. |
| 293 | uint32_t attachment_size = pass_ptr[i].resolve_attachments.size(); |
| 294 | uint32_t key_attachment_size = key_pass_ptr[i].resolve_attachments.size(); |
| 295 | if (attachment_size != key_attachment_size) { |
| 296 | return attachment_size < key_attachment_size; |
| 297 | } |
| 298 | const int32_t *pass_attachment_ptr = pass_ptr[i].resolve_attachments.ptr(); |
| 299 | const int32_t *key_pass_attachment_ptr = key_pass_ptr[i].resolve_attachments.ptr(); |
| 300 | |
| 301 | for (uint32_t j = 0; j < attachment_size; j++) { |
| 302 | if (pass_attachment_ptr[j] != key_pass_attachment_ptr[j]) { |
| 303 | return pass_attachment_ptr[j] < key_pass_attachment_ptr[j]; |
| 304 | } |
| 305 | } |
| 306 | } |
| 307 | { // Compare preserve attachments. |
| 308 | uint32_t attachment_size = pass_ptr[i].preserve_attachments.size(); |
| 309 | uint32_t key_attachment_size = key_pass_ptr[i].preserve_attachments.size(); |
| 310 | if (attachment_size != key_attachment_size) { |
| 311 | return attachment_size < key_attachment_size; |
| 312 | } |
| 313 | const int32_t *pass_attachment_ptr = pass_ptr[i].preserve_attachments.ptr(); |
| 314 | const int32_t *key_pass_attachment_ptr = key_pass_ptr[i].preserve_attachments.ptr(); |
| 315 | |
| 316 | for (uint32_t j = 0; j < attachment_size; j++) { |
| 317 | if (pass_attachment_ptr[j] != key_pass_attachment_ptr[j]) { |
| 318 | return pass_attachment_ptr[j] < key_pass_attachment_ptr[j]; |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | if (pass_ptr[i].depth_attachment != key_pass_ptr[i].depth_attachment) { |
| 323 | return pass_ptr[i].depth_attachment < key_pass_ptr[i].depth_attachment; |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | int as = attachments.size(); |
| 328 | int bs = p_key.attachments.size(); |
| 329 | if (as != bs) { |
| 330 | return as < bs; |
| 331 | } |
| 332 | |
| 333 | const AttachmentFormat *af_a = attachments.ptr(); |
| 334 | const AttachmentFormat *af_b = p_key.attachments.ptr(); |
| 335 | for (int i = 0; i < as; i++) { |
| 336 | const AttachmentFormat &a = af_a[i]; |
| 337 | const AttachmentFormat &b = af_b[i]; |
| 338 | if (a.format != b.format) { |
| 339 | return a.format < b.format; |
| 340 | } |
| 341 | if (a.samples != b.samples) { |
| 342 | return a.samples < b.samples; |
| 343 | } |
| 344 | if (a.usage_flags != b.usage_flags) { |
| 345 | return a.usage_flags < b.usage_flags; |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | return false; // Equal. |
| 350 | } |
| 351 | }; |
| 352 | |
| 353 | VkRenderPass _render_pass_create(const Vector<AttachmentFormat> &p_attachments, const Vector<FramebufferPass> &p_passes, InitialAction p_initial_action, FinalAction p_final_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, uint32_t p_view_count = 1, Vector<TextureSamples> *r_samples = nullptr); |
| 354 | // This is a cache and it's never freed, it ensures |
| 355 | // IDs for a given format are always unique. |
| 356 | RBMap<FramebufferFormatKey, FramebufferFormatID> framebuffer_format_cache; |
| 357 | struct FramebufferFormat { |
| 358 | const RBMap<FramebufferFormatKey, FramebufferFormatID>::Element *E; |
| 359 | VkRenderPass render_pass = VK_NULL_HANDLE; // Here for constructing shaders, never used, see section (7.2. Render Pass Compatibility from Vulkan spec). |
| 360 | Vector<TextureSamples> pass_samples; |
| 361 | uint32_t view_count = 1; // Number of views. |
| 362 | }; |
| 363 | |
| 364 | HashMap<FramebufferFormatID, FramebufferFormat> framebuffer_formats; |
| 365 | |
| 366 | struct Framebuffer { |
| 367 | FramebufferFormatID format_id = 0; |
| 368 | struct VersionKey { |
| 369 | InitialAction initial_color_action; |
| 370 | FinalAction final_color_action; |
| 371 | InitialAction initial_depth_action; |
| 372 | FinalAction final_depth_action; |
| 373 | uint32_t view_count; |
| 374 | |
| 375 | bool operator<(const VersionKey &p_key) const { |
| 376 | if (initial_color_action == p_key.initial_color_action) { |
| 377 | if (final_color_action == p_key.final_color_action) { |
| 378 | if (initial_depth_action == p_key.initial_depth_action) { |
| 379 | if (final_depth_action == p_key.final_depth_action) { |
| 380 | return view_count < p_key.view_count; |
| 381 | } else { |
| 382 | return final_depth_action < p_key.final_depth_action; |
| 383 | } |
| 384 | } else { |
| 385 | return initial_depth_action < p_key.initial_depth_action; |
| 386 | } |
| 387 | } else { |
| 388 | return final_color_action < p_key.final_color_action; |
| 389 | } |
| 390 | } else { |
| 391 | return initial_color_action < p_key.initial_color_action; |
| 392 | } |
| 393 | } |
| 394 | }; |
| 395 | |
| 396 | uint32_t storage_mask = 0; |
| 397 | Vector<RID> texture_ids; |
| 398 | InvalidationCallback invalidated_callback = nullptr; |
| 399 | void *invalidated_callback_userdata = nullptr; |
| 400 | |
| 401 | struct Version { |
| 402 | VkFramebuffer framebuffer = VK_NULL_HANDLE; |
| 403 | VkRenderPass render_pass = VK_NULL_HANDLE; // This one is owned. |
| 404 | uint32_t subpass_count = 1; |
| 405 | }; |
| 406 | |
| 407 | RBMap<VersionKey, Version> framebuffers; |
| 408 | Size2 size; |
| 409 | uint32_t view_count; |
| 410 | }; |
| 411 | |
| 412 | RID_Owner<Framebuffer, true> framebuffer_owner; |
| 413 | |
| 414 | /***********************/ |
| 415 | /**** VERTEX BUFFER ****/ |
| 416 | /***********************/ |
| 417 | |
| 418 | // Vertex buffers in Vulkan are similar to how |
| 419 | // they work in OpenGL, except that instead of |
| 420 | // an attribute index, there is a buffer binding |
| 421 | // index (for binding the buffers in real-time) |
| 422 | // and a location index (what is used in the shader). |
| 423 | // |
| 424 | // This mapping is done here internally, and it's not |
| 425 | // exposed. |
| 426 | |
| 427 | RID_Owner<Buffer, true> vertex_buffer_owner; |
| 428 | |
| 429 | struct VertexDescriptionKey { |
| 430 | Vector<VertexAttribute> vertex_formats; |
| 431 | bool operator==(const VertexDescriptionKey &p_key) const { |
| 432 | int vdc = vertex_formats.size(); |
| 433 | int vdck = p_key.vertex_formats.size(); |
| 434 | |
| 435 | if (vdc != vdck) { |
| 436 | return false; |
| 437 | } else { |
| 438 | const VertexAttribute *a_ptr = vertex_formats.ptr(); |
| 439 | const VertexAttribute *b_ptr = p_key.vertex_formats.ptr(); |
| 440 | for (int i = 0; i < vdc; i++) { |
| 441 | const VertexAttribute &a = a_ptr[i]; |
| 442 | const VertexAttribute &b = b_ptr[i]; |
| 443 | |
| 444 | if (a.location != b.location) { |
| 445 | return false; |
| 446 | } |
| 447 | if (a.offset != b.offset) { |
| 448 | return false; |
| 449 | } |
| 450 | if (a.format != b.format) { |
| 451 | return false; |
| 452 | } |
| 453 | if (a.stride != b.stride) { |
| 454 | return false; |
| 455 | } |
| 456 | if (a.frequency != b.frequency) { |
| 457 | return false; |
| 458 | } |
| 459 | } |
| 460 | return true; // They are equal. |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | uint32_t hash() const { |
| 465 | int vdc = vertex_formats.size(); |
| 466 | uint32_t h = hash_murmur3_one_32(vdc); |
| 467 | const VertexAttribute *ptr = vertex_formats.ptr(); |
| 468 | for (int i = 0; i < vdc; i++) { |
| 469 | const VertexAttribute &vd = ptr[i]; |
| 470 | h = hash_murmur3_one_32(vd.location, h); |
| 471 | h = hash_murmur3_one_32(vd.offset, h); |
| 472 | h = hash_murmur3_one_32(vd.format, h); |
| 473 | h = hash_murmur3_one_32(vd.stride, h); |
| 474 | h = hash_murmur3_one_32(vd.frequency, h); |
| 475 | } |
| 476 | return hash_fmix32(h); |
| 477 | } |
| 478 | }; |
| 479 | |
| 480 | struct VertexDescriptionHash { |
| 481 | static _FORCE_INLINE_ uint32_t hash(const VertexDescriptionKey &p_key) { |
| 482 | return p_key.hash(); |
| 483 | } |
| 484 | }; |
| 485 | |
| 486 | // This is a cache and it's never freed, it ensures that |
| 487 | // ID used for a specific format always remain the same. |
| 488 | HashMap<VertexDescriptionKey, VertexFormatID, VertexDescriptionHash> vertex_format_cache; |
| 489 | |
| 490 | struct VertexDescriptionCache { |
| 491 | Vector<VertexAttribute> vertex_formats; |
| 492 | VkVertexInputBindingDescription *bindings = nullptr; |
| 493 | VkVertexInputAttributeDescription *attributes = nullptr; |
| 494 | VkPipelineVertexInputStateCreateInfo create_info; |
| 495 | }; |
| 496 | |
| 497 | HashMap<VertexFormatID, VertexDescriptionCache> vertex_formats; |
| 498 | |
| 499 | struct VertexArray { |
| 500 | RID buffer; |
| 501 | VertexFormatID description = 0; |
| 502 | int vertex_count = 0; |
| 503 | uint32_t max_instances_allowed = 0; |
| 504 | |
| 505 | Vector<VkBuffer> buffers; // Not owned, just referenced. |
| 506 | Vector<VkDeviceSize> offsets; |
| 507 | }; |
| 508 | |
| 509 | RID_Owner<VertexArray, true> vertex_array_owner; |
| 510 | |
| 511 | struct IndexBuffer : public Buffer { |
| 512 | uint32_t max_index = 0; // Used for validation. |
| 513 | uint32_t index_count = 0; |
| 514 | VkIndexType index_type = VK_INDEX_TYPE_NONE_NV; |
| 515 | bool supports_restart_indices = false; |
| 516 | }; |
| 517 | |
| 518 | RID_Owner<IndexBuffer, true> index_buffer_owner; |
| 519 | |
| 520 | struct IndexArray { |
| 521 | uint32_t max_index = 0; // Remember the maximum index here too, for validation. |
| 522 | VkBuffer buffer; // Not owned, inherited from index buffer. |
| 523 | uint32_t offset = 0; |
| 524 | uint32_t indices = 0; |
| 525 | VkIndexType index_type = VK_INDEX_TYPE_NONE_NV; |
| 526 | bool supports_restart_indices = false; |
| 527 | }; |
| 528 | |
| 529 | RID_Owner<IndexArray, true> index_array_owner; |
| 530 | |
| 531 | /****************/ |
| 532 | /**** SHADER ****/ |
| 533 | /****************/ |
| 534 | |
| 535 | // Vulkan specifies a really complex behavior for the application |
| 536 | // in order to tell when descriptor sets need to be re-bound (or not). |
| 537 | // "When binding a descriptor set (see Descriptor Set Binding) to set |
| 538 | // number N, if the previously bound descriptor sets for sets zero |
| 539 | // through N-1 were all bound using compatible pipeline layouts, |
| 540 | // then performing this binding does not disturb any of the lower numbered sets. |
| 541 | // If, additionally, the previous bound descriptor set for set N was |
| 542 | // bound using a pipeline layout compatible for set N, then the bindings |
| 543 | // in sets numbered greater than N are also not disturbed." |
| 544 | // As a result, we need to figure out quickly when something is no longer "compatible". |
| 545 | // in order to avoid costly rebinds. |
| 546 | |
| 547 | struct UniformInfo { |
| 548 | UniformType type = UniformType::UNIFORM_TYPE_MAX; |
| 549 | bool writable = false; |
| 550 | int binding = 0; |
| 551 | uint32_t stages = 0; |
| 552 | int length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements). |
| 553 | |
| 554 | bool operator!=(const UniformInfo &p_info) const { |
| 555 | return (binding != p_info.binding || type != p_info.type || writable != p_info.writable || stages != p_info.stages || length != p_info.length); |
| 556 | } |
| 557 | |
| 558 | bool operator<(const UniformInfo &p_info) const { |
| 559 | if (binding != p_info.binding) { |
| 560 | return binding < p_info.binding; |
| 561 | } |
| 562 | if (type != p_info.type) { |
| 563 | return type < p_info.type; |
| 564 | } |
| 565 | if (writable != p_info.writable) { |
| 566 | return writable < p_info.writable; |
| 567 | } |
| 568 | if (stages != p_info.stages) { |
| 569 | return stages < p_info.stages; |
| 570 | } |
| 571 | return length < p_info.length; |
| 572 | } |
| 573 | }; |
| 574 | |
| 575 | struct UniformSetFormat { |
| 576 | Vector<UniformInfo> uniform_info; |
| 577 | bool operator<(const UniformSetFormat &p_format) const { |
| 578 | uint32_t size = uniform_info.size(); |
| 579 | uint32_t psize = p_format.uniform_info.size(); |
| 580 | |
| 581 | if (size != psize) { |
| 582 | return size < psize; |
| 583 | } |
| 584 | |
| 585 | const UniformInfo *infoptr = uniform_info.ptr(); |
| 586 | const UniformInfo *pinfoptr = p_format.uniform_info.ptr(); |
| 587 | |
| 588 | for (uint32_t i = 0; i < size; i++) { |
| 589 | if (infoptr[i] != pinfoptr[i]) { |
| 590 | return infoptr[i] < pinfoptr[i]; |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | return false; |
| 595 | } |
| 596 | }; |
| 597 | |
| 598 | // Always grows, never shrinks, ensuring unique IDs, but we assume |
| 599 | // the amount of formats will never be a problem, as the amount of shaders |
| 600 | // in a game is limited. |
| 601 | RBMap<UniformSetFormat, uint32_t> uniform_set_format_cache; |
| 602 | |
| 603 | // Shaders in Vulkan are just pretty much |
| 604 | // precompiled blocks of SPIR-V bytecode. They |
| 605 | // are most likely not really compiled to host |
| 606 | // assembly until a pipeline is created. |
| 607 | // |
| 608 | // When supplying the shaders, this implementation |
| 609 | // will use the reflection abilities of glslang to |
| 610 | // understand and cache everything required to |
| 611 | // create and use the descriptor sets (Vulkan's |
| 612 | // biggest pain). |
| 613 | // |
| 614 | // Additionally, hashes are created for every set |
| 615 | // to do quick validation and ensuring the user |
| 616 | // does not submit something invalid. |
| 617 | |
| 618 | struct Shader { |
| 619 | struct Set { |
| 620 | Vector<UniformInfo> uniform_info; |
| 621 | VkDescriptorSetLayout descriptor_set_layout = VK_NULL_HANDLE; |
| 622 | }; |
| 623 | |
| 624 | uint32_t vertex_input_mask = 0; // Inputs used, this is mostly for validation. |
| 625 | uint32_t fragment_output_mask = 0; |
| 626 | |
| 627 | struct PushConstant { |
| 628 | uint32_t size = 0; |
| 629 | uint32_t vk_stages_mask = 0; |
| 630 | }; |
| 631 | |
| 632 | PushConstant push_constant; |
| 633 | |
| 634 | uint32_t compute_local_size[3] = { 0, 0, 0 }; |
| 635 | |
| 636 | struct SpecializationConstant { |
| 637 | PipelineSpecializationConstant constant; |
| 638 | uint32_t stage_flags = 0; |
| 639 | }; |
| 640 | |
| 641 | bool is_compute = false; |
| 642 | Vector<Set> sets; |
| 643 | Vector<uint32_t> set_formats; |
| 644 | Vector<VkPipelineShaderStageCreateInfo> pipeline_stages; |
| 645 | Vector<SpecializationConstant> specialization_constants; |
| 646 | VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; |
| 647 | String name; // Used for debug. |
| 648 | }; |
| 649 | |
| 650 | String _shader_uniform_debug(RID p_shader, int p_set = -1); |
| 651 | |
| 652 | RID_Owner<Shader, true> shader_owner; |
| 653 | |
| 654 | /******************/ |
| 655 | /**** UNIFORMS ****/ |
| 656 | /******************/ |
| 657 | |
| 658 | // Descriptor sets require allocation from a pool. |
| 659 | // The documentation on how to use pools properly |
| 660 | // is scarce, and the documentation is strange. |
| 661 | // |
| 662 | // Basically, you can mix and match pools as you |
| 663 | // like, but you'll run into fragmentation issues. |
| 664 | // Because of this, the recommended approach is to |
| 665 | // create a pool for every descriptor set type, as |
| 666 | // this prevents fragmentation. |
| 667 | // |
| 668 | // This is implemented here as a having a list of |
| 669 | // pools (each can contain up to 64 sets) for each |
| 670 | // set layout. The amount of sets for each type |
| 671 | // is used as the key. |
| 672 | |
| 673 | enum { |
| 674 | MAX_DESCRIPTOR_POOL_ELEMENT = 65535 |
| 675 | }; |
| 676 | |
| 677 | struct DescriptorPoolKey { |
| 678 | union { |
| 679 | struct { |
| 680 | uint16_t uniform_type[UNIFORM_TYPE_MAX]; // Using 16 bits because, for sending arrays, each element is a pool set. |
| 681 | }; |
| 682 | struct { |
| 683 | uint64_t key1; |
| 684 | uint64_t key2; |
| 685 | uint64_t key3; |
| 686 | }; |
| 687 | }; |
| 688 | bool operator<(const DescriptorPoolKey &p_key) const { |
| 689 | if (key1 != p_key.key1) { |
| 690 | return key1 < p_key.key1; |
| 691 | } |
| 692 | if (key2 != p_key.key2) { |
| 693 | return key2 < p_key.key2; |
| 694 | } |
| 695 | |
| 696 | return key3 < p_key.key3; |
| 697 | } |
| 698 | DescriptorPoolKey() { |
| 699 | key1 = 0; |
| 700 | key2 = 0; |
| 701 | key3 = 0; |
| 702 | } |
| 703 | }; |
| 704 | |
| 705 | struct DescriptorPool { |
| 706 | VkDescriptorPool pool; |
| 707 | uint32_t usage; |
| 708 | }; |
| 709 | |
| 710 | RBMap<DescriptorPoolKey, HashSet<DescriptorPool *>> descriptor_pools; |
| 711 | uint32_t max_descriptors_per_pool = 0; |
| 712 | |
| 713 | DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key); |
| 714 | void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool); |
| 715 | |
| 716 | RID_Owner<Buffer, true> uniform_buffer_owner; |
| 717 | RID_Owner<Buffer, true> storage_buffer_owner; |
| 718 | |
| 719 | // Texture buffer needs a view. |
| 720 | struct TextureBuffer { |
| 721 | Buffer buffer; |
| 722 | VkBufferView view = VK_NULL_HANDLE; |
| 723 | }; |
| 724 | |
| 725 | RID_Owner<TextureBuffer, true> texture_buffer_owner; |
| 726 | |
| 727 | // This structure contains the descriptor set. They _need_ to be allocated |
| 728 | // for a shader (and will be erased when this shader is erased), but should |
| 729 | // work for other shaders as long as the hash matches. This covers using |
| 730 | // them in shader variants. |
| 731 | // |
| 732 | // Keep also in mind that you can share buffers between descriptor sets, so |
| 733 | // the above restriction is not too serious. |
| 734 | |
| 735 | struct UniformSet { |
| 736 | uint32_t format = 0; |
| 737 | RID shader_id; |
| 738 | uint32_t shader_set = 0; |
| 739 | DescriptorPool *pool = nullptr; |
| 740 | DescriptorPoolKey pool_key; |
| 741 | VkDescriptorSet descriptor_set = VK_NULL_HANDLE; |
| 742 | //VkPipelineLayout pipeline_layout; // Not owned, inherited from shader. |
| 743 | struct AttachableTexture { |
| 744 | uint32_t bind; |
| 745 | RID texture; |
| 746 | }; |
| 747 | |
| 748 | LocalVector<AttachableTexture> attachable_textures; // Used for validation. |
| 749 | Vector<Texture *> mutable_sampled_textures; // Used for layout change. |
| 750 | Vector<Texture *> mutable_storage_textures; // Used for layout change. |
| 751 | InvalidationCallback invalidated_callback = nullptr; |
| 752 | void *invalidated_callback_userdata = nullptr; |
| 753 | }; |
| 754 | |
| 755 | RID_Owner<UniformSet, true> uniform_set_owner; |
| 756 | |
| 757 | /*******************/ |
| 758 | /**** PIPELINES ****/ |
| 759 | /*******************/ |
| 760 | |
| 761 | // Render pipeline contains ALL the |
| 762 | // information required for drawing. |
| 763 | // This includes all the rasterizer state |
| 764 | // as well as shader used, framebuffer format, |
| 765 | // etc. |
| 766 | // While the pipeline is just a single object |
| 767 | // (VkPipeline) a lot of values are also saved |
| 768 | // here to do validation (vulkan does none by |
| 769 | // default) and warn the user if something |
| 770 | // was not supplied as intended. |
| 771 | |
| 772 | struct RenderPipeline { |
| 773 | // Cached values for validation. |
| 774 | #ifdef DEBUG_ENABLED |
| 775 | struct Validation { |
| 776 | FramebufferFormatID framebuffer_format = 0; |
| 777 | uint32_t render_pass = 0; |
| 778 | uint32_t dynamic_state = 0; |
| 779 | VertexFormatID vertex_format = 0; |
| 780 | bool uses_restart_indices = false; |
| 781 | uint32_t primitive_minimum = 0; |
| 782 | uint32_t primitive_divisor = 0; |
| 783 | } validation; |
| 784 | #endif |
| 785 | // Actual pipeline. |
| 786 | RID shader; |
| 787 | Vector<uint32_t> set_formats; |
| 788 | VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; // Not owned, needed for push constants. |
| 789 | VkPipeline pipeline = VK_NULL_HANDLE; |
| 790 | uint32_t push_constant_size = 0; |
| 791 | uint32_t push_constant_stages_mask = 0; |
| 792 | }; |
| 793 | |
| 794 | RID_Owner<RenderPipeline, true> render_pipeline_owner; |
| 795 | |
| 796 | struct { |
| 797 | uint32_t ; |
| 798 | uint32_t ; |
| 799 | uint64_t ; |
| 800 | uint32_t ; |
| 801 | uint32_t ; |
| 802 | uint32_t ; |
| 803 | uint8_t [VK_UUID_SIZE]; |
| 804 | uint8_t ; |
| 805 | }; |
| 806 | |
| 807 | struct PipelineCache { |
| 808 | String file_path; |
| 809 | PipelineCacheHeader = {}; |
| 810 | size_t current_size = 0; |
| 811 | LocalVector<uint8_t> buffer; |
| 812 | VkPipelineCache cache_object = VK_NULL_HANDLE; |
| 813 | }; |
| 814 | |
| 815 | PipelineCache pipelines_cache; |
| 816 | |
| 817 | WorkerThreadPool::TaskID pipelines_cache_save_task = WorkerThreadPool::INVALID_TASK_ID; |
| 818 | |
| 819 | void _load_pipeline_cache(); |
| 820 | void _update_pipeline_cache(bool p_closing = false); |
| 821 | static void _save_pipeline_cache(void *p_data); |
| 822 | |
| 823 | struct ComputePipeline { |
| 824 | RID shader; |
| 825 | Vector<uint32_t> set_formats; |
| 826 | VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; // Not owned, needed for push constants. |
| 827 | VkPipeline pipeline = VK_NULL_HANDLE; |
| 828 | uint32_t push_constant_size = 0; |
| 829 | uint32_t push_constant_stages_mask = 0; |
| 830 | uint32_t local_group_size[3] = { 0, 0, 0 }; |
| 831 | }; |
| 832 | |
| 833 | RID_Owner<ComputePipeline, true> compute_pipeline_owner; |
| 834 | |
| 835 | /*******************/ |
| 836 | /**** DRAW LIST ****/ |
| 837 | /*******************/ |
| 838 | |
| 839 | // Draw list contains both the command buffer |
| 840 | // used for drawing as well as a LOT of |
| 841 | // information used for validation. This |
| 842 | // validation is cheap so most of it can |
| 843 | // also run in release builds. |
| 844 | |
| 845 | // When using split command lists, this is |
| 846 | // implemented internally using secondary command |
| 847 | // buffers. As they can be created in threads, |
| 848 | // each needs its own command pool. |
| 849 | |
| 850 | struct SplitDrawListAllocator { |
| 851 | VkCommandPool command_pool = VK_NULL_HANDLE; |
| 852 | Vector<VkCommandBuffer> command_buffers; // One for each frame. |
| 853 | }; |
| 854 | |
| 855 | Vector<SplitDrawListAllocator> split_draw_list_allocators; |
| 856 | |
| 857 | struct DrawList { |
| 858 | VkCommandBuffer command_buffer = VK_NULL_HANDLE; // If persistent, this is owned, otherwise it's shared with the ringbuffer. |
| 859 | Rect2i viewport; |
| 860 | bool viewport_set = false; |
| 861 | |
| 862 | struct SetState { |
| 863 | uint32_t pipeline_expected_format = 0; |
| 864 | uint32_t uniform_set_format = 0; |
| 865 | VkDescriptorSet descriptor_set = VK_NULL_HANDLE; |
| 866 | RID uniform_set; |
| 867 | bool bound = false; |
| 868 | }; |
| 869 | |
| 870 | struct State { |
| 871 | SetState sets[MAX_UNIFORM_SETS]; |
| 872 | uint32_t set_count = 0; |
| 873 | RID pipeline; |
| 874 | RID pipeline_shader; |
| 875 | VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; |
| 876 | RID vertex_array; |
| 877 | RID index_array; |
| 878 | uint32_t pipeline_push_constant_stages = 0; |
| 879 | } state; |
| 880 | |
| 881 | #ifdef DEBUG_ENABLED |
| 882 | struct Validation { |
| 883 | bool active = true; // Means command buffer was not closed, so you can keep adding things. |
| 884 | // Actual render pass values. |
| 885 | uint32_t dynamic_state = 0; |
| 886 | VertexFormatID vertex_format = INVALID_ID; |
| 887 | uint32_t vertex_array_size = 0; |
| 888 | uint32_t vertex_max_instances_allowed = 0xFFFFFFFF; |
| 889 | bool index_buffer_uses_restart_indices = false; |
| 890 | uint32_t index_array_size = 0; |
| 891 | uint32_t index_array_max_index = 0; |
| 892 | uint32_t index_array_offset = 0; |
| 893 | Vector<uint32_t> set_formats; |
| 894 | Vector<bool> set_bound; |
| 895 | Vector<RID> set_rids; |
| 896 | // Last pipeline set values. |
| 897 | bool pipeline_active = false; |
| 898 | uint32_t pipeline_dynamic_state = 0; |
| 899 | VertexFormatID pipeline_vertex_format = INVALID_ID; |
| 900 | RID pipeline_shader; |
| 901 | bool pipeline_uses_restart_indices = false; |
| 902 | uint32_t pipeline_primitive_divisor = 0; |
| 903 | uint32_t pipeline_primitive_minimum = 0; |
| 904 | uint32_t pipeline_push_constant_size = 0; |
| 905 | bool pipeline_push_constant_supplied = false; |
| 906 | } validation; |
| 907 | #else |
| 908 | struct Validation { |
| 909 | uint32_t vertex_array_size = 0; |
| 910 | uint32_t index_array_size = 0; |
| 911 | uint32_t index_array_offset; |
| 912 | } validation; |
| 913 | #endif |
| 914 | }; |
| 915 | |
| 916 | DrawList *draw_list = nullptr; // One for regular draw lists, multiple for split. |
| 917 | uint32_t draw_list_subpass_count = 0; |
| 918 | uint32_t draw_list_count = 0; |
| 919 | VkRenderPass draw_list_render_pass = VK_NULL_HANDLE; |
| 920 | VkFramebuffer draw_list_vkframebuffer = VK_NULL_HANDLE; |
| 921 | #ifdef DEBUG_ENABLED |
| 922 | FramebufferFormatID draw_list_framebuffer_format = INVALID_ID; |
| 923 | #endif |
| 924 | uint32_t draw_list_current_subpass = 0; |
| 925 | |
| 926 | bool draw_list_split = false; |
| 927 | Vector<RID> draw_list_bound_textures; |
| 928 | Vector<RID> draw_list_storage_textures; |
| 929 | bool draw_list_unbind_color_textures = false; |
| 930 | bool draw_list_unbind_depth_textures = false; |
| 931 | |
| 932 | void _draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector<Color> &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil); |
| 933 | Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass, uint32_t *r_subpass_count); |
| 934 | Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures); |
| 935 | _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); |
| 936 | Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, BitField<BarrierMask> p_post_barrier); |
| 937 | Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass); |
| 938 | void _draw_list_free(Rect2i *r_last_viewport = nullptr); |
| 939 | |
| 940 | /**********************/ |
| 941 | /**** COMPUTE LIST ****/ |
| 942 | /**********************/ |
| 943 | |
| 944 | struct ComputeList { |
| 945 | VkCommandBuffer command_buffer = VK_NULL_HANDLE; // If persistent, this is owned, otherwise it's shared with the ringbuffer. |
| 946 | |
| 947 | struct SetState { |
| 948 | uint32_t pipeline_expected_format = 0; |
| 949 | uint32_t uniform_set_format = 0; |
| 950 | VkDescriptorSet descriptor_set = VK_NULL_HANDLE; |
| 951 | RID uniform_set; |
| 952 | bool bound = false; |
| 953 | }; |
| 954 | |
| 955 | struct State { |
| 956 | HashSet<Texture *> textures_to_sampled_layout; |
| 957 | SetState sets[MAX_UNIFORM_SETS]; |
| 958 | uint32_t set_count = 0; |
| 959 | RID pipeline; |
| 960 | RID pipeline_shader; |
| 961 | uint32_t local_group_size[3] = { 0, 0, 0 }; |
| 962 | VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; |
| 963 | uint32_t pipeline_push_constant_stages = 0; |
| 964 | bool allow_draw_overlap; |
| 965 | } state; |
| 966 | |
| 967 | #ifdef DEBUG_ENABLED |
| 968 | struct Validation { |
| 969 | bool active = true; // Means command buffer was not closed, so you can keep adding things. |
| 970 | Vector<uint32_t> set_formats; |
| 971 | Vector<bool> set_bound; |
| 972 | Vector<RID> set_rids; |
| 973 | // Last pipeline set values. |
| 974 | bool pipeline_active = false; |
| 975 | RID pipeline_shader; |
| 976 | uint32_t invalid_set_from = 0; |
| 977 | uint32_t pipeline_push_constant_size = 0; |
| 978 | bool pipeline_push_constant_supplied = false; |
| 979 | } validation; |
| 980 | #endif |
| 981 | }; |
| 982 | |
| 983 | ComputeList *compute_list = nullptr; |
| 984 | |
| 985 | void _compute_list_add_barrier(BitField<BarrierMask> p_post_barrier, uint32_t p_barrier_flags, uint32_t p_access_flags); |
| 986 | |
| 987 | /**************************/ |
| 988 | /**** FRAME MANAGEMENT ****/ |
| 989 | /**************************/ |
| 990 | |
| 991 | // This is the frame structure. There are normally |
| 992 | // 3 of these (used for triple buffering), or 2 |
| 993 | // (double buffering). They are cycled constantly. |
| 994 | // |
| 995 | // It contains two command buffers, one that is |
| 996 | // used internally for setting up (creating stuff) |
| 997 | // and another used mostly for drawing. |
| 998 | // |
| 999 | // They also contains a list of things that need |
| 1000 | // to be disposed of when deleted, which can't |
| 1001 | // happen immediately due to the asynchronous |
| 1002 | // nature of the GPU. They will get deleted |
| 1003 | // when the frame is cycled. |
| 1004 | |
| 1005 | struct Frame { |
| 1006 | // List in usage order, from last to free to first to free. |
| 1007 | List<Buffer> buffers_to_dispose_of; |
| 1008 | List<Texture> textures_to_dispose_of; |
| 1009 | List<Framebuffer> framebuffers_to_dispose_of; |
| 1010 | List<VkSampler> samplers_to_dispose_of; |
| 1011 | List<Shader> shaders_to_dispose_of; |
| 1012 | List<VkBufferView> buffer_views_to_dispose_of; |
| 1013 | List<UniformSet> uniform_sets_to_dispose_of; |
| 1014 | List<RenderPipeline> render_pipelines_to_dispose_of; |
| 1015 | List<ComputePipeline> compute_pipelines_to_dispose_of; |
| 1016 | |
| 1017 | VkCommandPool command_pool = VK_NULL_HANDLE; |
| 1018 | VkCommandBuffer setup_command_buffer = VK_NULL_HANDLE; // Used at the beginning of every frame for set-up. |
| 1019 | VkCommandBuffer draw_command_buffer = VK_NULL_HANDLE; // Used at the beginning of every frame for set-up. |
| 1020 | |
| 1021 | struct Timestamp { |
| 1022 | String description; |
| 1023 | uint64_t value = 0; |
| 1024 | }; |
| 1025 | |
| 1026 | VkQueryPool timestamp_pool; |
| 1027 | |
| 1028 | TightLocalVector<String> timestamp_names; |
| 1029 | TightLocalVector<uint64_t> timestamp_cpu_values; |
| 1030 | uint32_t timestamp_count = 0; |
| 1031 | TightLocalVector<String> timestamp_result_names; |
| 1032 | TightLocalVector<uint64_t> timestamp_cpu_result_values; |
| 1033 | TightLocalVector<uint64_t> timestamp_result_values; |
| 1034 | uint32_t timestamp_result_count = 0; |
| 1035 | uint64_t index = 0; |
| 1036 | }; |
| 1037 | |
| 1038 | uint32_t max_timestamp_query_elements = 0; |
| 1039 | |
| 1040 | TightLocalVector<Frame> frames; // Frames available, for main device they are cycled (usually 3), for local devices only 1. |
| 1041 | int frame = 0; // Current frame. |
| 1042 | int frame_count = 0; // Total amount of frames. |
| 1043 | uint64_t frames_drawn = 0; |
| 1044 | RID local_device; |
| 1045 | bool local_device_processing = false; |
| 1046 | |
| 1047 | void _free_pending_resources(int p_frame); |
| 1048 | |
| 1049 | VmaAllocator allocator = nullptr; |
| 1050 | HashMap<uint32_t, VmaPool> small_allocs_pools; |
| 1051 | VmaPool _find_or_create_small_allocs_pool(uint32_t p_mem_type_index); |
| 1052 | |
| 1053 | VulkanContext *context = nullptr; |
| 1054 | |
| 1055 | uint64_t image_memory = 0; |
| 1056 | uint64_t buffer_memory = 0; |
| 1057 | |
| 1058 | void _free_internal(RID p_id); |
| 1059 | void _flush(bool p_current_frame); |
| 1060 | |
| 1061 | bool screen_prepared = false; |
| 1062 | |
| 1063 | template <class T> |
| 1064 | void _free_rids(T &p_owner, const char *p_type); |
| 1065 | |
| 1066 | void _finalize_command_bufers(); |
| 1067 | void _begin_frame(); |
| 1068 | |
| 1069 | #ifdef DEV_ENABLED |
| 1070 | HashMap<RID, String> resource_names; |
| 1071 | #endif |
| 1072 | |
| 1073 | VkSampleCountFlagBits _ensure_supported_sample_count(TextureSamples p_requested_sample_count) const; |
| 1074 | |
| 1075 | public: |
| 1076 | virtual RID texture_create(const TextureFormat &p_format, const TextureView &p_view, const Vector<Vector<uint8_t>> &p_data = Vector<Vector<uint8_t>>()); |
| 1077 | virtual RID texture_create_shared(const TextureView &p_view, RID p_with_texture); |
| 1078 | virtual RID texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, uint64_t p_flags, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers); |
| 1079 | |
| 1080 | virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps = 1, TextureSliceType p_slice_type = TEXTURE_SLICE_2D, uint32_t p_layers = 0); |
| 1081 | virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1082 | virtual Vector<uint8_t> texture_get_data(RID p_texture, uint32_t p_layer); |
| 1083 | |
| 1084 | virtual bool texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const; |
| 1085 | virtual bool texture_is_shared(RID p_texture); |
| 1086 | virtual bool texture_is_valid(RID p_texture); |
| 1087 | virtual TextureFormat texture_get_format(RID p_texture); |
| 1088 | virtual Size2i texture_size(RID p_texture); |
| 1089 | virtual uint64_t texture_get_native_handle(RID p_texture); |
| 1090 | |
| 1091 | virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1092 | virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1093 | virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1094 | |
| 1095 | /*********************/ |
| 1096 | /**** FRAMEBUFFER ****/ |
| 1097 | /*********************/ |
| 1098 | |
| 1099 | virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format, uint32_t p_view_count = 1); |
| 1100 | virtual FramebufferFormatID framebuffer_format_create_multipass(const Vector<AttachmentFormat> &p_attachments, const Vector<FramebufferPass> &p_passes, uint32_t p_view_count = 1); |
| 1101 | virtual FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1); |
| 1102 | virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass = 0); |
| 1103 | |
| 1104 | virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID, uint32_t p_view_count = 1); |
| 1105 | virtual RID framebuffer_create_multipass(const Vector<RID> &p_texture_attachments, const Vector<FramebufferPass> &p_passes, FramebufferFormatID p_format_check = INVALID_ID, uint32_t p_view_count = 1); |
| 1106 | virtual RID framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples = TEXTURE_SAMPLES_1, FramebufferFormatID p_format_check = INVALID_ID); |
| 1107 | virtual bool framebuffer_is_valid(RID p_framebuffer) const; |
| 1108 | virtual void framebuffer_set_invalidation_callback(RID p_framebuffer, InvalidationCallback p_callback, void *p_userdata); |
| 1109 | |
| 1110 | virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer); |
| 1111 | |
| 1112 | /*****************/ |
| 1113 | /**** SAMPLER ****/ |
| 1114 | /*****************/ |
| 1115 | |
| 1116 | virtual RID sampler_create(const SamplerState &p_state); |
| 1117 | virtual bool sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_sampler_filter) const; |
| 1118 | |
| 1119 | /**********************/ |
| 1120 | /**** VERTEX ARRAY ****/ |
| 1121 | /**********************/ |
| 1122 | |
| 1123 | virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_as_storage = false); |
| 1124 | |
| 1125 | // Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated. |
| 1126 | virtual VertexFormatID vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats); |
| 1127 | virtual RID vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets = Vector<uint64_t>()); |
| 1128 | |
| 1129 | virtual RID index_buffer_create(uint32_t p_size_indices, IndexBufferFormat p_format, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_restart_indices = false); |
| 1130 | |
| 1131 | virtual RID index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count); |
| 1132 | |
| 1133 | /****************/ |
| 1134 | /**** SHADER ****/ |
| 1135 | /****************/ |
| 1136 | |
| 1137 | virtual String shader_get_binary_cache_key() const; |
| 1138 | virtual Vector<uint8_t> shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name = "" ); |
| 1139 | |
| 1140 | virtual RID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, RID p_placeholder = RID()); |
| 1141 | virtual RID shader_create_placeholder(); |
| 1142 | |
| 1143 | virtual uint32_t shader_get_vertex_input_attribute_mask(RID p_shader); |
| 1144 | |
| 1145 | /*****************/ |
| 1146 | /**** UNIFORM ****/ |
| 1147 | /*****************/ |
| 1148 | |
| 1149 | virtual RID uniform_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>()); |
| 1150 | virtual RID storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>(), BitField<StorageBufferUsage> p_usage = 0); |
| 1151 | virtual RID texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data = Vector<uint8_t>()); |
| 1152 | |
| 1153 | virtual RID uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set); |
| 1154 | virtual bool uniform_set_is_valid(RID p_uniform_set); |
| 1155 | virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata); |
| 1156 | |
| 1157 | virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1158 | virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer. |
| 1159 | virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1160 | virtual Vector<uint8_t> buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); |
| 1161 | |
| 1162 | /*************************/ |
| 1163 | /**** RENDER PIPELINE ****/ |
| 1164 | /*************************/ |
| 1165 | |
| 1166 | virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField<PipelineDynamicStateFlags> p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0, const Vector<PipelineSpecializationConstant> &p_specialization_constants = Vector<PipelineSpecializationConstant>()); |
| 1167 | virtual bool render_pipeline_is_valid(RID p_pipeline); |
| 1168 | |
| 1169 | /**************************/ |
| 1170 | /**** COMPUTE PIPELINE ****/ |
| 1171 | /**************************/ |
| 1172 | |
| 1173 | virtual RID compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants = Vector<PipelineSpecializationConstant>()); |
| 1174 | virtual bool compute_pipeline_is_valid(RID p_pipeline); |
| 1175 | |
| 1176 | /****************/ |
| 1177 | /**** SCREEN ****/ |
| 1178 | /****************/ |
| 1179 | |
| 1180 | virtual int screen_get_width(DisplayServer::WindowID p_screen = 0) const; |
| 1181 | virtual int screen_get_height(DisplayServer::WindowID p_screen = 0) const; |
| 1182 | virtual FramebufferFormatID screen_get_framebuffer_format() const; |
| 1183 | |
| 1184 | /********************/ |
| 1185 | /**** DRAW LISTS ****/ |
| 1186 | /********************/ |
| 1187 | |
| 1188 | virtual DrawListID draw_list_begin_for_screen(DisplayServer::WindowID p_screen = 0, const Color &p_clear_color = Color()); |
| 1189 | |
| 1190 | virtual DrawListID draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()); |
| 1191 | virtual Error draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>()); |
| 1192 | |
| 1193 | virtual void draw_list_set_blend_constants(DrawListID p_list, const Color &p_color); |
| 1194 | virtual void draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline); |
| 1195 | virtual void draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index); |
| 1196 | virtual void draw_list_bind_vertex_array(DrawListID p_list, RID p_vertex_array); |
| 1197 | virtual void draw_list_bind_index_array(DrawListID p_list, RID p_index_array); |
| 1198 | virtual void draw_list_set_line_width(DrawListID p_list, float p_width); |
| 1199 | virtual void draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size); |
| 1200 | |
| 1201 | virtual void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0); |
| 1202 | |
| 1203 | virtual void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); |
| 1204 | virtual void draw_list_disable_scissor(DrawListID p_list); |
| 1205 | |
| 1206 | virtual uint32_t draw_list_get_current_pass(); |
| 1207 | virtual DrawListID draw_list_switch_to_next_pass(); |
| 1208 | virtual Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); |
| 1209 | |
| 1210 | virtual void draw_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1211 | |
| 1212 | /***********************/ |
| 1213 | /**** COMPUTE LISTS ****/ |
| 1214 | /***********************/ |
| 1215 | |
| 1216 | virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false); |
| 1217 | virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); |
| 1218 | virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); |
| 1219 | virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); |
| 1220 | virtual void compute_list_add_barrier(ComputeListID p_list); |
| 1221 | |
| 1222 | virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); |
| 1223 | virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); |
| 1224 | virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); |
| 1225 | virtual void compute_list_end(BitField<BarrierMask> p_post_barrier = BARRIER_MASK_ALL_BARRIERS); |
| 1226 | |
| 1227 | virtual void barrier(BitField<BarrierMask> p_from = BARRIER_MASK_ALL_BARRIERS, BitField<BarrierMask> p_to = BARRIER_MASK_ALL_BARRIERS); |
| 1228 | virtual void full_barrier(); |
| 1229 | |
| 1230 | /**************/ |
| 1231 | /**** FREE ****/ |
| 1232 | /**************/ |
| 1233 | |
| 1234 | virtual void free(RID p_id); |
| 1235 | |
| 1236 | /****************/ |
| 1237 | /**** Timing ****/ |
| 1238 | /****************/ |
| 1239 | |
| 1240 | virtual void capture_timestamp(const String &p_name); |
| 1241 | virtual uint32_t get_captured_timestamps_count() const; |
| 1242 | virtual uint64_t get_captured_timestamps_frame() const; |
| 1243 | virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const; |
| 1244 | virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const; |
| 1245 | virtual String get_captured_timestamp_name(uint32_t p_index) const; |
| 1246 | |
| 1247 | /****************/ |
| 1248 | /**** Limits ****/ |
| 1249 | /****************/ |
| 1250 | |
| 1251 | virtual uint64_t limit_get(Limit p_limit) const; |
| 1252 | |
| 1253 | virtual void prepare_screen_for_drawing(); |
| 1254 | void initialize(VulkanContext *p_context, bool p_local_device = false); |
| 1255 | void finalize(); |
| 1256 | |
| 1257 | virtual void swap_buffers(); // For main device. |
| 1258 | |
| 1259 | virtual void submit(); // For local device. |
| 1260 | virtual void sync(); // For local device. |
| 1261 | |
| 1262 | virtual uint32_t get_frame_delay() const; |
| 1263 | |
| 1264 | virtual RenderingDevice *create_local_device(); |
| 1265 | |
| 1266 | virtual uint64_t get_memory_usage(MemoryType p_type) const; |
| 1267 | |
| 1268 | virtual void set_resource_name(RID p_id, const String p_name); |
| 1269 | |
| 1270 | virtual void draw_command_begin_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); |
| 1271 | virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); |
| 1272 | virtual void draw_command_end_label(); |
| 1273 | |
| 1274 | virtual String get_device_vendor_name() const; |
| 1275 | virtual String get_device_name() const; |
| 1276 | virtual RenderingDevice::DeviceType get_device_type() const; |
| 1277 | virtual String get_device_api_version() const; |
| 1278 | virtual String get_device_pipeline_cache_uuid() const; |
| 1279 | |
| 1280 | virtual uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0); |
| 1281 | |
| 1282 | virtual bool has_feature(const Features p_feature) const; |
| 1283 | |
| 1284 | RenderingDeviceVulkan(); |
| 1285 | ~RenderingDeviceVulkan(); |
| 1286 | }; |
| 1287 | |
| 1288 | #endif // RENDERING_DEVICE_VULKAN_H |
| 1289 | |