1/**************************************************************************/
2/* cluster_builder_rd.h */
3/**************************************************************************/
4/* This file is part of: */
5/* GODOT ENGINE */
6/* https://godotengine.org */
7/**************************************************************************/
8/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10/* */
11/* Permission is hereby granted, free of charge, to any person obtaining */
12/* a copy of this software and associated documentation files (the */
13/* "Software"), to deal in the Software without restriction, including */
14/* without limitation the rights to use, copy, modify, merge, publish, */
15/* distribute, sublicense, and/or sell copies of the Software, and to */
16/* permit persons to whom the Software is furnished to do so, subject to */
17/* the following conditions: */
18/* */
19/* The above copyright notice and this permission notice shall be */
20/* included in all copies or substantial portions of the Software. */
21/* */
22/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29/**************************************************************************/
30
31#ifndef CLUSTER_BUILDER_RD_H
32#define CLUSTER_BUILDER_RD_H
33
34#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"
35#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"
36#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"
37#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
38
39class ClusterBuilderSharedDataRD {
40 friend class ClusterBuilderRD;
41
42 RID sphere_vertex_buffer;
43 RID sphere_vertex_array;
44 RID sphere_index_buffer;
45 RID sphere_index_array;
46 float sphere_overfit = 0.0; // Because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area.
47
48 RID cone_vertex_buffer;
49 RID cone_vertex_array;
50 RID cone_index_buffer;
51 RID cone_index_array;
52 float cone_overfit = 0.0; // Because an cone mesh is not a perfect cone, we need to enlarge it to cover the actual cone area.
53
54 RID box_vertex_buffer;
55 RID box_vertex_array;
56 RID box_index_buffer;
57 RID box_index_array;
58
59 enum Divisor {
60 DIVISOR_1,
61 DIVISOR_2,
62 DIVISOR_4,
63 };
64
65 struct ClusterRender {
66 struct PushConstant {
67 uint32_t base_index;
68 uint32_t pad0;
69 uint32_t pad1;
70 uint32_t pad2;
71 };
72
73 ClusterRenderShaderRD cluster_render_shader;
74 RID shader_version;
75 RID shader;
76
77 enum PipelineVersion {
78 PIPELINE_NORMAL,
79 PIPELINE_MSAA,
80 PIPELINE_MAX
81 };
82
83 RID shader_pipelines[PIPELINE_MAX];
84 } cluster_render;
85
86 struct ClusterStore {
87 struct PushConstant {
88 uint32_t cluster_render_data_size; // how much data for a single cluster takes
89 uint32_t max_render_element_count_div_32; // divided by 32
90 uint32_t cluster_screen_size[2];
91 uint32_t render_element_count_div_32; // divided by 32
92 uint32_t max_cluster_element_count_div_32; // divided by 32
93
94 uint32_t pad1;
95 uint32_t pad2;
96 };
97
98 ClusterStoreShaderRD cluster_store_shader;
99 RID shader_version;
100 RID shader;
101 RID shader_pipeline;
102 } cluster_store;
103
104 struct ClusterDebug {
105 struct PushConstant {
106 uint32_t screen_size[2];
107 uint32_t cluster_screen_size[2];
108
109 uint32_t cluster_shift;
110 uint32_t cluster_type;
111 float z_near;
112 float z_far;
113
114 uint32_t orthogonal;
115 uint32_t max_cluster_element_count_div_32;
116
117 uint32_t pad1;
118 uint32_t pad2;
119 };
120
121 ClusterDebugShaderRD cluster_debug_shader;
122 RID shader_version;
123 RID shader;
124 RID shader_pipeline;
125 } cluster_debug;
126
127public:
128 ClusterBuilderSharedDataRD();
129 ~ClusterBuilderSharedDataRD();
130};
131
132class ClusterBuilderRD {
133public:
134 static constexpr float WIDE_SPOT_ANGLE_THRESHOLD_DEG = 60.0f;
135
136 enum LightType {
137 LIGHT_TYPE_OMNI,
138 LIGHT_TYPE_SPOT
139 };
140
141 enum BoxType {
142 BOX_TYPE_REFLECTION_PROBE,
143 BOX_TYPE_DECAL,
144 };
145
146 enum ElementType {
147 ELEMENT_TYPE_OMNI_LIGHT,
148 ELEMENT_TYPE_SPOT_LIGHT,
149 ELEMENT_TYPE_DECAL,
150 ELEMENT_TYPE_REFLECTION_PROBE,
151 ELEMENT_TYPE_MAX,
152 };
153
154private:
155 ClusterBuilderSharedDataRD *shared = nullptr;
156
157 struct RenderElementData {
158 uint32_t type; // 0-4
159 uint32_t touches_near;
160 uint32_t touches_far;
161 uint32_t original_index;
162 float transform_inv[12]; // Transposed transform for less space.
163 float scale[3];
164 uint32_t has_wide_spot_angle;
165 }; // Keep aligned to 32 bytes.
166
167 uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};
168 uint32_t max_elements_by_type = 0;
169
170 RenderElementData *render_elements = nullptr;
171 uint32_t render_element_count = 0;
172 uint32_t render_element_max = 0;
173
174 Transform3D view_xform;
175 Projection adjusted_projection;
176 Projection projection;
177 float z_far = 0;
178 float z_near = 0;
179 bool camera_orthogonal = false;
180
181 enum Divisor {
182 DIVISOR_1,
183 DIVISOR_2,
184 DIVISOR_4,
185 };
186
187 uint32_t cluster_size = 32;
188 bool use_msaa = true;
189 Divisor divisor = DIVISOR_4;
190
191 Size2i screen_size;
192 Size2i cluster_screen_size;
193
194 RID framebuffer;
195 RID cluster_render_buffer; // Used for creating.
196 RID cluster_buffer; // Used for rendering.
197 RID element_buffer; // Used for storing, to hint element touches far plane or near plane.
198 uint32_t cluster_render_buffer_size = 0;
199 uint32_t cluster_buffer_size = 0;
200
201 RID cluster_render_uniform_set;
202 RID cluster_store_uniform_set;
203
204 // Persistent data.
205
206 void _clear();
207
208 struct StateUniform {
209 float projection[16];
210 float inv_z_far;
211 uint32_t screen_to_clusters_shift; // Shift to obtain coordinates in block indices.
212 uint32_t cluster_screen_width;
213 uint32_t cluster_data_size; // How much data is needed for a single cluster.
214 uint32_t cluster_depth_offset;
215
216 uint32_t pad0;
217 uint32_t pad1;
218 uint32_t pad2;
219 };
220
221 RID state_uniform;
222
223 RID debug_uniform_set;
224
225public:
226 void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);
227
228 void begin(const Transform3D &p_view_transform, const Projection &p_cam_projection, bool p_flip_y);
229
230 _FORCE_INLINE_ void add_light(LightType p_type, const Transform3D &p_transform, float p_radius, float p_spot_aperture) {
231 if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {
232 return; // Max number elements reached.
233 }
234 if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {
235 return; // Max number elements reached.
236 }
237
238 RenderElementData &e = render_elements[render_element_count];
239
240 Transform3D xform = view_xform * p_transform;
241
242 float radius = xform.basis.get_uniform_scale();
243 if (radius < 0.98 || radius > 1.02) {
244 xform.basis.orthonormalize();
245 }
246
247 radius *= p_radius;
248
249 if (p_type == LIGHT_TYPE_OMNI) {
250 radius *= shared->sphere_overfit; // Overfit icosphere.
251
252 float depth = -xform.origin.z;
253 if (camera_orthogonal) {
254 e.touches_near = (depth - radius) < z_near;
255 } else {
256 // Contains camera inside light.
257 float radius2 = radius * shared->sphere_overfit; // Overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)
258 e.touches_near = xform.origin.length_squared() < radius2 * radius2;
259 }
260
261 e.touches_far = (depth + radius) > z_far;
262 e.scale[0] = radius;
263 e.scale[1] = radius;
264 e.scale[2] = radius;
265 e.type = ELEMENT_TYPE_OMNI_LIGHT;
266 e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];
267
268 RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
269
270 cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;
271
272 } else /*LIGHT_TYPE_SPOT */ {
273 radius *= shared->cone_overfit; // Overfit icosphere
274
275 real_t len = Math::tan(Math::deg_to_rad(p_spot_aperture)) * radius;
276 // Approximate, probably better to use a cone support function.
277 float max_d = -1e20;
278 float min_d = 1e20;
279#define CONE_MINMAX(m_x, m_y) \
280 { \
281 float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \
282 min_d = MIN(d, min_d); \
283 max_d = MAX(d, max_d); \
284 }
285
286 CONE_MINMAX(1, 1);
287 CONE_MINMAX(-1, 1);
288 CONE_MINMAX(-1, -1);
289 CONE_MINMAX(1, -1);
290
291 if (camera_orthogonal) {
292 e.touches_near = min_d < z_near;
293 } else {
294 Plane base_plane(-xform.basis.get_column(Vector3::AXIS_Z), xform.origin);
295 float dist = base_plane.distance_to(Vector3());
296 if (dist >= 0 && dist < radius) {
297 // Contains camera inside light, check angle.
298 float angle = Math::rad_to_deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_column(Vector3::AXIS_Z))));
299 e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit
300 } else {
301 e.touches_near = false;
302 }
303 }
304
305 e.touches_far = max_d > z_far;
306
307 // If the spot angle is above the threshold, use a sphere instead of a cone for building the clusters
308 // since the cone gets too flat/large (spot angle close to 90 degrees) or
309 // can't even cover the affected area of the light (spot angle above 90 degrees).
310 if (p_spot_aperture > WIDE_SPOT_ANGLE_THRESHOLD_DEG) {
311 e.scale[0] = radius;
312 e.scale[1] = radius;
313 e.scale[2] = radius;
314 e.has_wide_spot_angle = true;
315 } else {
316 e.scale[0] = len * shared->cone_overfit;
317 e.scale[1] = len * shared->cone_overfit;
318 e.scale[2] = radius;
319 e.has_wide_spot_angle = false;
320 }
321
322 e.type = ELEMENT_TYPE_SPOT_LIGHT;
323 e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; // Use omni light since they share index.
324
325 RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
326
327 cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
328 }
329
330 render_element_count++;
331 }
332
333 _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform3D &p_transform, const Vector3 &p_half_size) {
334 if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {
335 return; // Max number elements reached.
336 }
337 if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {
338 return; // Max number elements reached.
339 }
340
341 RenderElementData &e = render_elements[render_element_count];
342 Transform3D xform = view_xform * p_transform;
343
344 // Extract scale and scale the matrix by it, makes things simpler.
345 Vector3 scale = p_half_size;
346 for (uint32_t i = 0; i < 3; i++) {
347 float s = xform.basis.rows[i].length();
348 scale[i] *= s;
349 xform.basis.rows[i] /= s;
350 };
351
352 float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));
353 float depth = -xform.origin.z;
354
355 if (camera_orthogonal) {
356 e.touches_near = depth - box_depth < z_near;
357 } else {
358 // Contains camera inside box.
359 Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();
360 e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;
361 }
362
363 e.touches_far = depth + box_depth > z_far;
364
365 e.scale[0] = scale.x;
366 e.scale[1] = scale.y;
367 e.scale[2] = scale.z;
368
369 e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;
370 e.original_index = cluster_count_by_type[e.type];
371
372 RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
373
374 cluster_count_by_type[e.type]++;
375 render_element_count++;
376 }
377
378 void bake_cluster();
379 void debug(ElementType p_element);
380
381 RID get_cluster_buffer() const;
382 uint32_t get_cluster_size() const;
383 uint32_t get_max_cluster_elements() const;
384
385 void set_shared(ClusterBuilderSharedDataRD *p_shared);
386
387 ClusterBuilderRD();
388 ~ClusterBuilderRD();
389};
390
391#endif // CLUSTER_BUILDER_RD_H
392