1 | /**************************************************************************/ |
2 | /* cluster_builder_rd.h */ |
3 | /**************************************************************************/ |
4 | /* This file is part of: */ |
5 | /* GODOT ENGINE */ |
6 | /* https://godotengine.org */ |
7 | /**************************************************************************/ |
8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
10 | /* */ |
11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
12 | /* a copy of this software and associated documentation files (the */ |
13 | /* "Software"), to deal in the Software without restriction, including */ |
14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
17 | /* the following conditions: */ |
18 | /* */ |
19 | /* The above copyright notice and this permission notice shall be */ |
20 | /* included in all copies or substantial portions of the Software. */ |
21 | /* */ |
22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
29 | /**************************************************************************/ |
30 | |
31 | #ifndef CLUSTER_BUILDER_RD_H |
32 | #define CLUSTER_BUILDER_RD_H |
33 | |
34 | #include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h" |
35 | #include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h" |
36 | #include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h" |
37 | #include "servers/rendering/renderer_rd/storage_rd/material_storage.h" |
38 | |
39 | class ClusterBuilderSharedDataRD { |
40 | friend class ClusterBuilderRD; |
41 | |
42 | RID sphere_vertex_buffer; |
43 | RID sphere_vertex_array; |
44 | RID sphere_index_buffer; |
45 | RID sphere_index_array; |
46 | float sphere_overfit = 0.0; // Because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area. |
47 | |
48 | RID cone_vertex_buffer; |
49 | RID cone_vertex_array; |
50 | RID cone_index_buffer; |
51 | RID cone_index_array; |
52 | float cone_overfit = 0.0; // Because an cone mesh is not a perfect cone, we need to enlarge it to cover the actual cone area. |
53 | |
54 | RID box_vertex_buffer; |
55 | RID box_vertex_array; |
56 | RID box_index_buffer; |
57 | RID box_index_array; |
58 | |
59 | enum Divisor { |
60 | DIVISOR_1, |
61 | DIVISOR_2, |
62 | DIVISOR_4, |
63 | }; |
64 | |
65 | struct ClusterRender { |
66 | struct PushConstant { |
67 | uint32_t base_index; |
68 | uint32_t pad0; |
69 | uint32_t pad1; |
70 | uint32_t pad2; |
71 | }; |
72 | |
73 | ClusterRenderShaderRD cluster_render_shader; |
74 | RID shader_version; |
75 | RID shader; |
76 | |
77 | enum PipelineVersion { |
78 | PIPELINE_NORMAL, |
79 | PIPELINE_MSAA, |
80 | PIPELINE_MAX |
81 | }; |
82 | |
83 | RID shader_pipelines[PIPELINE_MAX]; |
84 | } cluster_render; |
85 | |
86 | struct ClusterStore { |
87 | struct PushConstant { |
88 | uint32_t cluster_render_data_size; // how much data for a single cluster takes |
89 | uint32_t max_render_element_count_div_32; // divided by 32 |
90 | uint32_t cluster_screen_size[2]; |
91 | uint32_t render_element_count_div_32; // divided by 32 |
92 | uint32_t max_cluster_element_count_div_32; // divided by 32 |
93 | |
94 | uint32_t pad1; |
95 | uint32_t pad2; |
96 | }; |
97 | |
98 | ClusterStoreShaderRD cluster_store_shader; |
99 | RID shader_version; |
100 | RID shader; |
101 | RID shader_pipeline; |
102 | } cluster_store; |
103 | |
104 | struct ClusterDebug { |
105 | struct PushConstant { |
106 | uint32_t screen_size[2]; |
107 | uint32_t cluster_screen_size[2]; |
108 | |
109 | uint32_t cluster_shift; |
110 | uint32_t cluster_type; |
111 | float z_near; |
112 | float z_far; |
113 | |
114 | uint32_t orthogonal; |
115 | uint32_t max_cluster_element_count_div_32; |
116 | |
117 | uint32_t pad1; |
118 | uint32_t pad2; |
119 | }; |
120 | |
121 | ClusterDebugShaderRD cluster_debug_shader; |
122 | RID shader_version; |
123 | RID shader; |
124 | RID shader_pipeline; |
125 | } cluster_debug; |
126 | |
127 | public: |
128 | ClusterBuilderSharedDataRD(); |
129 | ~ClusterBuilderSharedDataRD(); |
130 | }; |
131 | |
132 | class ClusterBuilderRD { |
133 | public: |
134 | static constexpr float WIDE_SPOT_ANGLE_THRESHOLD_DEG = 60.0f; |
135 | |
136 | enum LightType { |
137 | LIGHT_TYPE_OMNI, |
138 | LIGHT_TYPE_SPOT |
139 | }; |
140 | |
141 | enum BoxType { |
142 | BOX_TYPE_REFLECTION_PROBE, |
143 | BOX_TYPE_DECAL, |
144 | }; |
145 | |
146 | enum ElementType { |
147 | ELEMENT_TYPE_OMNI_LIGHT, |
148 | ELEMENT_TYPE_SPOT_LIGHT, |
149 | ELEMENT_TYPE_DECAL, |
150 | ELEMENT_TYPE_REFLECTION_PROBE, |
151 | ELEMENT_TYPE_MAX, |
152 | }; |
153 | |
154 | private: |
155 | ClusterBuilderSharedDataRD *shared = nullptr; |
156 | |
157 | struct RenderElementData { |
158 | uint32_t type; // 0-4 |
159 | uint32_t touches_near; |
160 | uint32_t touches_far; |
161 | uint32_t original_index; |
162 | float transform_inv[12]; // Transposed transform for less space. |
163 | float scale[3]; |
164 | uint32_t has_wide_spot_angle; |
165 | }; // Keep aligned to 32 bytes. |
166 | |
167 | uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {}; |
168 | uint32_t max_elements_by_type = 0; |
169 | |
170 | RenderElementData *render_elements = nullptr; |
171 | uint32_t render_element_count = 0; |
172 | uint32_t render_element_max = 0; |
173 | |
174 | Transform3D view_xform; |
175 | Projection adjusted_projection; |
176 | Projection projection; |
177 | float z_far = 0; |
178 | float z_near = 0; |
179 | bool camera_orthogonal = false; |
180 | |
181 | enum Divisor { |
182 | DIVISOR_1, |
183 | DIVISOR_2, |
184 | DIVISOR_4, |
185 | }; |
186 | |
187 | uint32_t cluster_size = 32; |
188 | bool use_msaa = true; |
189 | Divisor divisor = DIVISOR_4; |
190 | |
191 | Size2i screen_size; |
192 | Size2i cluster_screen_size; |
193 | |
194 | RID framebuffer; |
195 | RID cluster_render_buffer; // Used for creating. |
196 | RID cluster_buffer; // Used for rendering. |
197 | RID element_buffer; // Used for storing, to hint element touches far plane or near plane. |
198 | uint32_t cluster_render_buffer_size = 0; |
199 | uint32_t cluster_buffer_size = 0; |
200 | |
201 | RID cluster_render_uniform_set; |
202 | RID cluster_store_uniform_set; |
203 | |
204 | // Persistent data. |
205 | |
206 | void _clear(); |
207 | |
208 | struct StateUniform { |
209 | float projection[16]; |
210 | float inv_z_far; |
211 | uint32_t screen_to_clusters_shift; // Shift to obtain coordinates in block indices. |
212 | uint32_t cluster_screen_width; |
213 | uint32_t cluster_data_size; // How much data is needed for a single cluster. |
214 | uint32_t cluster_depth_offset; |
215 | |
216 | uint32_t pad0; |
217 | uint32_t pad1; |
218 | uint32_t pad2; |
219 | }; |
220 | |
221 | RID state_uniform; |
222 | |
223 | RID debug_uniform_set; |
224 | |
225 | public: |
226 | void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer); |
227 | |
228 | void begin(const Transform3D &p_view_transform, const Projection &p_cam_projection, bool p_flip_y); |
229 | |
230 | _FORCE_INLINE_ void add_light(LightType p_type, const Transform3D &p_transform, float p_radius, float p_spot_aperture) { |
231 | if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) { |
232 | return; // Max number elements reached. |
233 | } |
234 | if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) { |
235 | return; // Max number elements reached. |
236 | } |
237 | |
238 | RenderElementData &e = render_elements[render_element_count]; |
239 | |
240 | Transform3D xform = view_xform * p_transform; |
241 | |
242 | float radius = xform.basis.get_uniform_scale(); |
243 | if (radius < 0.98 || radius > 1.02) { |
244 | xform.basis.orthonormalize(); |
245 | } |
246 | |
247 | radius *= p_radius; |
248 | |
249 | if (p_type == LIGHT_TYPE_OMNI) { |
250 | radius *= shared->sphere_overfit; // Overfit icosphere. |
251 | |
252 | float depth = -xform.origin.z; |
253 | if (camera_orthogonal) { |
254 | e.touches_near = (depth - radius) < z_near; |
255 | } else { |
256 | // Contains camera inside light. |
257 | float radius2 = radius * shared->sphere_overfit; // Overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex) |
258 | e.touches_near = xform.origin.length_squared() < radius2 * radius2; |
259 | } |
260 | |
261 | e.touches_far = (depth + radius) > z_far; |
262 | e.scale[0] = radius; |
263 | e.scale[1] = radius; |
264 | e.scale[2] = radius; |
265 | e.type = ELEMENT_TYPE_OMNI_LIGHT; |
266 | e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]; |
267 | |
268 | RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv); |
269 | |
270 | cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++; |
271 | |
272 | } else /*LIGHT_TYPE_SPOT */ { |
273 | radius *= shared->cone_overfit; // Overfit icosphere |
274 | |
275 | real_t len = Math::tan(Math::deg_to_rad(p_spot_aperture)) * radius; |
276 | // Approximate, probably better to use a cone support function. |
277 | float max_d = -1e20; |
278 | float min_d = 1e20; |
279 | #define CONE_MINMAX(m_x, m_y) \ |
280 | { \ |
281 | float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \ |
282 | min_d = MIN(d, min_d); \ |
283 | max_d = MAX(d, max_d); \ |
284 | } |
285 | |
286 | CONE_MINMAX(1, 1); |
287 | CONE_MINMAX(-1, 1); |
288 | CONE_MINMAX(-1, -1); |
289 | CONE_MINMAX(1, -1); |
290 | |
291 | if (camera_orthogonal) { |
292 | e.touches_near = min_d < z_near; |
293 | } else { |
294 | Plane base_plane(-xform.basis.get_column(Vector3::AXIS_Z), xform.origin); |
295 | float dist = base_plane.distance_to(Vector3()); |
296 | if (dist >= 0 && dist < radius) { |
297 | // Contains camera inside light, check angle. |
298 | float angle = Math::rad_to_deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_column(Vector3::AXIS_Z)))); |
299 | e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit |
300 | } else { |
301 | e.touches_near = false; |
302 | } |
303 | } |
304 | |
305 | e.touches_far = max_d > z_far; |
306 | |
307 | // If the spot angle is above the threshold, use a sphere instead of a cone for building the clusters |
308 | // since the cone gets too flat/large (spot angle close to 90 degrees) or |
309 | // can't even cover the affected area of the light (spot angle above 90 degrees). |
310 | if (p_spot_aperture > WIDE_SPOT_ANGLE_THRESHOLD_DEG) { |
311 | e.scale[0] = radius; |
312 | e.scale[1] = radius; |
313 | e.scale[2] = radius; |
314 | e.has_wide_spot_angle = true; |
315 | } else { |
316 | e.scale[0] = len * shared->cone_overfit; |
317 | e.scale[1] = len * shared->cone_overfit; |
318 | e.scale[2] = radius; |
319 | e.has_wide_spot_angle = false; |
320 | } |
321 | |
322 | e.type = ELEMENT_TYPE_SPOT_LIGHT; |
323 | e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; // Use omni light since they share index. |
324 | |
325 | RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv); |
326 | |
327 | cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++; |
328 | } |
329 | |
330 | render_element_count++; |
331 | } |
332 | |
333 | _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform3D &p_transform, const Vector3 &p_half_size) { |
334 | if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) { |
335 | return; // Max number elements reached. |
336 | } |
337 | if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) { |
338 | return; // Max number elements reached. |
339 | } |
340 | |
341 | RenderElementData &e = render_elements[render_element_count]; |
342 | Transform3D xform = view_xform * p_transform; |
343 | |
344 | // Extract scale and scale the matrix by it, makes things simpler. |
345 | Vector3 scale = p_half_size; |
346 | for (uint32_t i = 0; i < 3; i++) { |
347 | float s = xform.basis.rows[i].length(); |
348 | scale[i] *= s; |
349 | xform.basis.rows[i] /= s; |
350 | }; |
351 | |
352 | float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale)); |
353 | float depth = -xform.origin.z; |
354 | |
355 | if (camera_orthogonal) { |
356 | e.touches_near = depth - box_depth < z_near; |
357 | } else { |
358 | // Contains camera inside box. |
359 | Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs(); |
360 | e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z; |
361 | } |
362 | |
363 | e.touches_far = depth + box_depth > z_far; |
364 | |
365 | e.scale[0] = scale.x; |
366 | e.scale[1] = scale.y; |
367 | e.scale[2] = scale.z; |
368 | |
369 | e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE; |
370 | e.original_index = cluster_count_by_type[e.type]; |
371 | |
372 | RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv); |
373 | |
374 | cluster_count_by_type[e.type]++; |
375 | render_element_count++; |
376 | } |
377 | |
378 | void bake_cluster(); |
379 | void debug(ElementType p_element); |
380 | |
381 | RID get_cluster_buffer() const; |
382 | uint32_t get_cluster_size() const; |
383 | uint32_t get_max_cluster_elements() const; |
384 | |
385 | void set_shared(ClusterBuilderSharedDataRD *p_shared); |
386 | |
387 | ClusterBuilderRD(); |
388 | ~ClusterBuilderRD(); |
389 | }; |
390 | |
391 | #endif // CLUSTER_BUILDER_RD_H |
392 | |