| 1 | /**************************************************************************/ | 
|---|
| 2 | /*  raycast_occlusion_cull.cpp                                            */ | 
|---|
| 3 | /**************************************************************************/ | 
|---|
| 4 | /*                         This file is part of:                          */ | 
|---|
| 5 | /*                             GODOT ENGINE                               */ | 
|---|
| 6 | /*                        https://godotengine.org                         */ | 
|---|
| 7 | /**************************************************************************/ | 
|---|
| 8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ | 
|---|
| 9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */ | 
|---|
| 10 | /*                                                                        */ | 
|---|
| 11 | /* Permission is hereby granted, free of charge, to any person obtaining  */ | 
|---|
| 12 | /* a copy of this software and associated documentation files (the        */ | 
|---|
| 13 | /* "Software"), to deal in the Software without restriction, including    */ | 
|---|
| 14 | /* without limitation the rights to use, copy, modify, merge, publish,    */ | 
|---|
| 15 | /* distribute, sublicense, and/or sell copies of the Software, and to     */ | 
|---|
| 16 | /* permit persons to whom the Software is furnished to do so, subject to  */ | 
|---|
| 17 | /* the following conditions:                                              */ | 
|---|
| 18 | /*                                                                        */ | 
|---|
| 19 | /* The above copyright notice and this permission notice shall be         */ | 
|---|
| 20 | /* included in all copies or substantial portions of the Software.        */ | 
|---|
| 21 | /*                                                                        */ | 
|---|
| 22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */ | 
|---|
| 23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */ | 
|---|
| 24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ | 
|---|
| 25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */ | 
|---|
| 26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */ | 
|---|
| 27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */ | 
|---|
| 28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */ | 
|---|
| 29 | /**************************************************************************/ | 
|---|
| 30 |  | 
|---|
| 31 | #include "raycast_occlusion_cull.h" | 
|---|
| 32 |  | 
|---|
| 33 | #include "core/config/project_settings.h" | 
|---|
| 34 | #include "core/object/worker_thread_pool.h" | 
|---|
| 35 | #include "core/templates/local_vector.h" | 
|---|
| 36 |  | 
|---|
| 37 | #ifdef __SSE2__ | 
|---|
| 38 | #include <pmmintrin.h> | 
|---|
| 39 | #endif | 
|---|
| 40 |  | 
|---|
| 41 | RaycastOcclusionCull *RaycastOcclusionCull::raycast_singleton = nullptr; | 
|---|
| 42 |  | 
|---|
| 43 | void RaycastOcclusionCull::RaycastHZBuffer::clear() { | 
|---|
| 44 | HZBuffer::clear(); | 
|---|
| 45 |  | 
|---|
| 46 | if (camera_rays_unaligned_buffer) { | 
|---|
| 47 | memfree(camera_rays_unaligned_buffer); | 
|---|
| 48 | camera_rays_unaligned_buffer = nullptr; | 
|---|
| 49 | camera_rays = nullptr; | 
|---|
| 50 | } | 
|---|
| 51 | camera_ray_masks.clear(); | 
|---|
| 52 | camera_rays_tile_count = 0; | 
|---|
| 53 | tile_grid_size = Size2i(); | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) { | 
|---|
| 57 | if (p_size == Size2i()) { | 
|---|
| 58 | clear(); | 
|---|
| 59 | return; | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | if (!sizes.is_empty() && p_size == sizes[0]) { | 
|---|
| 63 | return; // Size didn't change | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | HZBuffer::resize(p_size); | 
|---|
| 67 |  | 
|---|
| 68 | tile_grid_size = Size2i(Math::ceil(p_size.x / (float)TILE_SIZE), Math::ceil(p_size.y / (float)TILE_SIZE)); | 
|---|
| 69 | camera_rays_tile_count = tile_grid_size.x * tile_grid_size.y; | 
|---|
| 70 |  | 
|---|
| 71 | if (camera_rays_unaligned_buffer) { | 
|---|
| 72 | memfree(camera_rays_unaligned_buffer); | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | const int alignment = 64; // Embree requires ray packets to be 64-aligned | 
|---|
| 76 | camera_rays_unaligned_buffer = (uint8_t *)memalloc(camera_rays_tile_count * sizeof(CameraRayTile) + alignment); | 
|---|
| 77 | camera_rays = (CameraRayTile *)(camera_rays_unaligned_buffer + alignment - (((uint64_t)camera_rays_unaligned_buffer) % alignment)); | 
|---|
| 78 |  | 
|---|
| 79 | camera_ray_masks.resize(camera_rays_tile_count * TILE_RAYS); | 
|---|
| 80 | memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t)); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) { | 
|---|
| 84 | CameraRayThreadData td; | 
|---|
| 85 | td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count(); | 
|---|
| 86 |  | 
|---|
| 87 | td.z_near = p_cam_projection.get_z_near(); | 
|---|
| 88 | td.z_far = p_cam_projection.get_z_far() * 1.05f; | 
|---|
| 89 | td.camera_pos = p_cam_transform.origin; | 
|---|
| 90 | td.camera_dir = -p_cam_transform.basis.get_column(2); | 
|---|
| 91 | td.camera_orthogonal = p_cam_orthogonal; | 
|---|
| 92 |  | 
|---|
| 93 | Projection inv_camera_matrix = p_cam_projection.inverse(); | 
|---|
| 94 | Vector3 camera_corner_proj = Vector3(-1.0f, -1.0f, -1.0f); | 
|---|
| 95 | Vector3 camera_corner_view = inv_camera_matrix.xform(camera_corner_proj); | 
|---|
| 96 | td.pixel_corner = p_cam_transform.xform(camera_corner_view); | 
|---|
| 97 |  | 
|---|
| 98 | Vector3 top_corner_proj = Vector3(-1.0f, 1.0f, -1.0f); | 
|---|
| 99 | Vector3 top_corner_view = inv_camera_matrix.xform(top_corner_proj); | 
|---|
| 100 | Vector3 top_corner_world = p_cam_transform.xform(top_corner_view); | 
|---|
| 101 |  | 
|---|
| 102 | Vector3 left_corner_proj = Vector3(1.0f, -1.0f, -1.0f); | 
|---|
| 103 | Vector3 left_corner_view = inv_camera_matrix.xform(left_corner_proj); | 
|---|
| 104 | Vector3 left_corner_world = p_cam_transform.xform(left_corner_view); | 
|---|
| 105 |  | 
|---|
| 106 | td.pixel_u_interp = left_corner_world - td.pixel_corner; | 
|---|
| 107 | td.pixel_v_interp = top_corner_world - td.pixel_corner; | 
|---|
| 108 |  | 
|---|
| 109 | debug_tex_range = td.z_far; | 
|---|
| 110 |  | 
|---|
| 111 | WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RaycastHZBuffer::_camera_rays_threaded, &td, td.thread_count, -1, true, SNAME( "RaycastOcclusionCullUpdateCamera")); | 
|---|
| 112 | WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data) { | 
|---|
| 116 | uint32_t total_tiles = camera_rays_tile_count; | 
|---|
| 117 | uint32_t total_threads = p_data->thread_count; | 
|---|
| 118 | uint32_t from = p_thread * total_tiles / total_threads; | 
|---|
| 119 | uint32_t to = (p_thread + 1 == total_threads) ? total_tiles : ((p_thread + 1) * total_tiles / total_threads); | 
|---|
| 120 | _generate_camera_rays(p_data, from, to); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const CameraRayThreadData *p_data, int p_from, int p_to) { | 
|---|
| 124 | const Size2i &buffer_size = sizes[0]; | 
|---|
| 125 |  | 
|---|
| 126 | for (int i = p_from; i < p_to; i++) { | 
|---|
| 127 | CameraRayTile &tile = camera_rays[i]; | 
|---|
| 128 | int tile_x = (i % tile_grid_size.x) * TILE_SIZE; | 
|---|
| 129 | int tile_y = (i / tile_grid_size.x) * TILE_SIZE; | 
|---|
| 130 |  | 
|---|
| 131 | for (int j = 0; j < TILE_RAYS; j++) { | 
|---|
| 132 | int x = tile_x + j % TILE_SIZE; | 
|---|
| 133 | int y = tile_y + j / TILE_SIZE; | 
|---|
| 134 |  | 
|---|
| 135 | float u = (float(x) + 0.5f) / buffer_size.x; | 
|---|
| 136 | float v = (float(y) + 0.5f) / buffer_size.y; | 
|---|
| 137 | Vector3 pixel_pos = p_data->pixel_corner + u * p_data->pixel_u_interp + v * p_data->pixel_v_interp; | 
|---|
| 138 |  | 
|---|
| 139 | tile.ray.tnear[j] = p_data->z_near; | 
|---|
| 140 |  | 
|---|
| 141 | Vector3 dir; | 
|---|
| 142 | if (p_data->camera_orthogonal) { | 
|---|
| 143 | dir = -p_data->camera_dir; | 
|---|
| 144 | tile.ray.org_x[j] = pixel_pos.x - dir.x * p_data->z_near; | 
|---|
| 145 | tile.ray.org_y[j] = pixel_pos.y - dir.y * p_data->z_near; | 
|---|
| 146 | tile.ray.org_z[j] = pixel_pos.z - dir.z * p_data->z_near; | 
|---|
| 147 | } else { | 
|---|
| 148 | dir = (pixel_pos - p_data->camera_pos).normalized(); | 
|---|
| 149 | tile.ray.org_x[j] = p_data->camera_pos.x; | 
|---|
| 150 | tile.ray.org_y[j] = p_data->camera_pos.y; | 
|---|
| 151 | tile.ray.org_z[j] = p_data->camera_pos.z; | 
|---|
| 152 | tile.ray.tnear[j] /= dir.dot(p_data->camera_dir); | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | tile.ray.dir_x[j] = dir.x; | 
|---|
| 156 | tile.ray.dir_y[j] = dir.y; | 
|---|
| 157 | tile.ray.dir_z[j] = dir.z; | 
|---|
| 158 |  | 
|---|
| 159 | tile.ray.tfar[j] = p_data->z_far; | 
|---|
| 160 | tile.ray.time[j] = 0.0f; | 
|---|
| 161 |  | 
|---|
| 162 | tile.ray.flags[j] = 0; | 
|---|
| 163 | tile.ray.mask[j] = ~0U; | 
|---|
| 164 | tile.hit.geomID[j] = RTC_INVALID_GEOMETRY_ID; | 
|---|
| 165 | } | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | void RaycastOcclusionCull::RaycastHZBuffer::sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal) { | 
|---|
| 170 | ERR_FAIL_COND(is_empty()); | 
|---|
| 171 |  | 
|---|
| 172 | Size2i buffer_size = sizes[0]; | 
|---|
| 173 | for (int i = 0; i < tile_grid_size.y; i++) { | 
|---|
| 174 | for (int j = 0; j < tile_grid_size.x; j++) { | 
|---|
| 175 | for (int tile_i = 0; tile_i < TILE_SIZE; tile_i++) { | 
|---|
| 176 | for (int tile_j = 0; tile_j < TILE_SIZE; tile_j++) { | 
|---|
| 177 | int x = j * TILE_SIZE + tile_j; | 
|---|
| 178 | int y = i * TILE_SIZE + tile_i; | 
|---|
| 179 | if (x >= buffer_size.x || y >= buffer_size.y) { | 
|---|
| 180 | continue; | 
|---|
| 181 | } | 
|---|
| 182 | int k = tile_i * TILE_SIZE + tile_j; | 
|---|
| 183 | int tile_index = i * tile_grid_size.x + j; | 
|---|
| 184 | float d = camera_rays[tile_index].ray.tfar[k]; | 
|---|
| 185 |  | 
|---|
| 186 | if (!p_orthogonal) { | 
|---|
| 187 | const float &dir_x = camera_rays[tile_index].ray.dir_x[k]; | 
|---|
| 188 | const float &dir_y = camera_rays[tile_index].ray.dir_y[k]; | 
|---|
| 189 | const float &dir_z = camera_rays[tile_index].ray.dir_z[k]; | 
|---|
| 190 | float cos_theta = p_camera_dir.x * dir_x + p_camera_dir.y * dir_y + p_camera_dir.z * dir_z; | 
|---|
| 191 | d *= cos_theta; | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | mips[0][y * buffer_size.x + x] = d; | 
|---|
| 195 | } | 
|---|
| 196 | } | 
|---|
| 197 | } | 
|---|
| 198 | } | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | RaycastOcclusionCull::RaycastHZBuffer::~RaycastHZBuffer() { | 
|---|
| 202 | if (camera_rays_unaligned_buffer) { | 
|---|
| 203 | memfree(camera_rays_unaligned_buffer); | 
|---|
| 204 | } | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | //////////////////////////////////////////////////////// | 
|---|
| 208 |  | 
|---|
| 209 | bool RaycastOcclusionCull::is_occluder(RID p_rid) { | 
|---|
| 210 | return occluder_owner.owns(p_rid); | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | RID RaycastOcclusionCull::occluder_allocate() { | 
|---|
| 214 | return occluder_owner.allocate_rid(); | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | void RaycastOcclusionCull::occluder_initialize(RID p_occluder) { | 
|---|
| 218 | Occluder *occluder = memnew(Occluder); | 
|---|
| 219 | occluder_owner.initialize_rid(p_occluder, occluder); | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | void RaycastOcclusionCull::occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices) { | 
|---|
| 223 | Occluder *occluder = occluder_owner.get_or_null(p_occluder); | 
|---|
| 224 | ERR_FAIL_COND(!occluder); | 
|---|
| 225 |  | 
|---|
| 226 | occluder->vertices = p_vertices; | 
|---|
| 227 | occluder->indices = p_indices; | 
|---|
| 228 |  | 
|---|
| 229 | for (const InstanceID &E : occluder->users) { | 
|---|
| 230 | RID scenario_rid = E.scenario; | 
|---|
| 231 | RID instance_rid = E.instance; | 
|---|
| 232 | ERR_CONTINUE(!scenarios.has(scenario_rid)); | 
|---|
| 233 | Scenario &scenario = scenarios[scenario_rid]; | 
|---|
| 234 | ERR_CONTINUE(!scenario.instances.has(instance_rid)); | 
|---|
| 235 |  | 
|---|
| 236 | if (!scenario.dirty_instances.has(instance_rid)) { | 
|---|
| 237 | scenario.dirty_instances.insert(instance_rid); | 
|---|
| 238 | scenario.dirty_instances_array.push_back(instance_rid); | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | void RaycastOcclusionCull::free_occluder(RID p_occluder) { | 
|---|
| 244 | Occluder *occluder = occluder_owner.get_or_null(p_occluder); | 
|---|
| 245 | ERR_FAIL_COND(!occluder); | 
|---|
| 246 | memdelete(occluder); | 
|---|
| 247 | occluder_owner.free(p_occluder); | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | //////////////////////////////////////////////////////// | 
|---|
| 251 |  | 
|---|
| 252 | void RaycastOcclusionCull::add_scenario(RID p_scenario) { | 
|---|
| 253 | if (scenarios.has(p_scenario)) { | 
|---|
| 254 | scenarios[p_scenario].removed = false; | 
|---|
| 255 | } else { | 
|---|
| 256 | scenarios[p_scenario] = Scenario(); | 
|---|
| 257 | } | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | void RaycastOcclusionCull::remove_scenario(RID p_scenario) { | 
|---|
| 261 | ERR_FAIL_COND(!scenarios.has(p_scenario)); | 
|---|
| 262 | Scenario &scenario = scenarios[p_scenario]; | 
|---|
| 263 | scenario.removed = true; | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|
| 266 | void RaycastOcclusionCull::scenario_set_instance(RID p_scenario, RID p_instance, RID p_occluder, const Transform3D &p_xform, bool p_enabled) { | 
|---|
| 267 | ERR_FAIL_COND(!scenarios.has(p_scenario)); | 
|---|
| 268 | Scenario &scenario = scenarios[p_scenario]; | 
|---|
| 269 |  | 
|---|
| 270 | if (!scenario.instances.has(p_instance)) { | 
|---|
| 271 | scenario.instances[p_instance] = OccluderInstance(); | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | OccluderInstance &instance = scenario.instances[p_instance]; | 
|---|
| 275 |  | 
|---|
| 276 | bool changed = false; | 
|---|
| 277 |  | 
|---|
| 278 | if (instance.removed) { | 
|---|
| 279 | instance.removed = false; | 
|---|
| 280 | scenario.removed_instances.erase(p_instance); | 
|---|
| 281 | changed = true; // It was removed and re-added, we might have missed some changes | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | if (instance.occluder != p_occluder) { | 
|---|
| 285 | Occluder *old_occluder = occluder_owner.get_or_null(instance.occluder); | 
|---|
| 286 | if (old_occluder) { | 
|---|
| 287 | old_occluder->users.erase(InstanceID(p_scenario, p_instance)); | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | instance.occluder = p_occluder; | 
|---|
| 291 |  | 
|---|
| 292 | if (p_occluder.is_valid()) { | 
|---|
| 293 | Occluder *occluder = occluder_owner.get_or_null(p_occluder); | 
|---|
| 294 | ERR_FAIL_COND(!occluder); | 
|---|
| 295 | occluder->users.insert(InstanceID(p_scenario, p_instance)); | 
|---|
| 296 | } | 
|---|
| 297 | changed = true; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | if (instance.xform != p_xform) { | 
|---|
| 301 | scenario.instances[p_instance].xform = p_xform; | 
|---|
| 302 | changed = true; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | if (instance.enabled != p_enabled) { | 
|---|
| 306 | instance.enabled = p_enabled; | 
|---|
| 307 | scenario.dirty = true; // The scenario needs a scene re-build, but the instance doesn't need update | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | if (changed && !scenario.dirty_instances.has(p_instance)) { | 
|---|
| 311 | scenario.dirty_instances.insert(p_instance); | 
|---|
| 312 | scenario.dirty_instances_array.push_back(p_instance); | 
|---|
| 313 | scenario.dirty = true; | 
|---|
| 314 | } | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 | void RaycastOcclusionCull::scenario_remove_instance(RID p_scenario, RID p_instance) { | 
|---|
| 318 | ERR_FAIL_COND(!scenarios.has(p_scenario)); | 
|---|
| 319 | Scenario &scenario = scenarios[p_scenario]; | 
|---|
| 320 |  | 
|---|
| 321 | if (scenario.instances.has(p_instance)) { | 
|---|
| 322 | OccluderInstance &instance = scenario.instances[p_instance]; | 
|---|
| 323 |  | 
|---|
| 324 | if (!instance.removed) { | 
|---|
| 325 | Occluder *occluder = occluder_owner.get_or_null(instance.occluder); | 
|---|
| 326 | if (occluder) { | 
|---|
| 327 | occluder->users.erase(InstanceID(p_scenario, p_instance)); | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | scenario.removed_instances.push_back(p_instance); | 
|---|
| 331 | instance.removed = true; | 
|---|
| 332 | } | 
|---|
| 333 | } | 
|---|
| 334 | } | 
|---|
| 335 |  | 
|---|
| 336 | void RaycastOcclusionCull::Scenario::_update_dirty_instance_thread(int p_idx, RID *p_instances) { | 
|---|
| 337 | _update_dirty_instance(p_idx, p_instances); | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances) { | 
|---|
| 341 | OccluderInstance *occ_inst = instances.getptr(p_instances[p_idx]); | 
|---|
| 342 |  | 
|---|
| 343 | if (!occ_inst) { | 
|---|
| 344 | return; | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | Occluder *occ = raycast_singleton->occluder_owner.get_or_null(occ_inst->occluder); | 
|---|
| 348 |  | 
|---|
| 349 | if (!occ) { | 
|---|
| 350 | return; | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | int vertices_size = occ->vertices.size(); | 
|---|
| 354 |  | 
|---|
| 355 | // Embree requires the last element to be readable by a 16-byte SSE load instruction, so we add padding to be safe. | 
|---|
| 356 | occ_inst->xformed_vertices.resize(vertices_size + 1); | 
|---|
| 357 |  | 
|---|
| 358 | const Vector3 *read_ptr = occ->vertices.ptr(); | 
|---|
| 359 | Vector3 *write_ptr = occ_inst->xformed_vertices.ptr(); | 
|---|
| 360 |  | 
|---|
| 361 | if (vertices_size > 1024) { | 
|---|
| 362 | TransformThreadData td; | 
|---|
| 363 | td.xform = occ_inst->xform; | 
|---|
| 364 | td.read = read_ptr; | 
|---|
| 365 | td.write = write_ptr; | 
|---|
| 366 | td.vertex_count = vertices_size; | 
|---|
| 367 | td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count(); | 
|---|
| 368 | WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME( "RaycastOcclusionCull")); | 
|---|
| 369 | WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); | 
|---|
| 370 |  | 
|---|
| 371 | } else { | 
|---|
| 372 | _transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size); | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | occ_inst->indices.resize(occ->indices.size()); | 
|---|
| 376 | memcpy(occ_inst->indices.ptr(), occ->indices.ptr(), occ->indices.size() * sizeof(int32_t)); | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data) { | 
|---|
| 380 | uint32_t vertex_total = p_data->vertex_count; | 
|---|
| 381 | uint32_t total_threads = p_data->thread_count; | 
|---|
| 382 | uint32_t from = p_thread * vertex_total / total_threads; | 
|---|
| 383 | uint32_t to = (p_thread + 1 == total_threads) ? vertex_total : ((p_thread + 1) * vertex_total / total_threads); | 
|---|
| 384 | _transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to); | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) { | 
|---|
| 388 | for (int i = p_from; i < p_to; i++) { | 
|---|
| 389 | p_write[i] = p_xform.xform(p_read[i]); | 
|---|
| 390 | } | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) { | 
|---|
| 394 | Scenario *scenario = (Scenario *)p_ud; | 
|---|
| 395 | int commit_idx = 1 - (scenario->current_scene_idx); | 
|---|
| 396 | rtcCommitScene(scenario->ebr_scene[commit_idx]); | 
|---|
| 397 | scenario->commit_done = true; | 
|---|
| 398 | } | 
|---|
| 399 |  | 
|---|
| 400 | bool RaycastOcclusionCull::Scenario::update() { | 
|---|
| 401 | ERR_FAIL_COND_V(singleton == nullptr, false); | 
|---|
| 402 |  | 
|---|
| 403 | if (commit_thread == nullptr) { | 
|---|
| 404 | commit_thread = memnew(Thread); | 
|---|
| 405 | } | 
|---|
| 406 |  | 
|---|
| 407 | if (commit_thread->is_started()) { | 
|---|
| 408 | if (commit_done) { | 
|---|
| 409 | commit_thread->wait_to_finish(); | 
|---|
| 410 | current_scene_idx = 1 - current_scene_idx; | 
|---|
| 411 | } else { | 
|---|
| 412 | return false; | 
|---|
| 413 | } | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | if (removed) { | 
|---|
| 417 | if (ebr_scene[0]) { | 
|---|
| 418 | rtcReleaseScene(ebr_scene[0]); | 
|---|
| 419 | } | 
|---|
| 420 | if (ebr_scene[1]) { | 
|---|
| 421 | rtcReleaseScene(ebr_scene[1]); | 
|---|
| 422 | } | 
|---|
| 423 | return true; | 
|---|
| 424 | } | 
|---|
| 425 |  | 
|---|
| 426 | if (!dirty && removed_instances.is_empty() && dirty_instances_array.is_empty()) { | 
|---|
| 427 | return false; | 
|---|
| 428 | } | 
|---|
| 429 |  | 
|---|
| 430 | for (const RID &scenario : removed_instances) { | 
|---|
| 431 | instances.erase(scenario); | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | if (dirty_instances_array.size() / WorkerThreadPool::get_singleton()->get_thread_count() > 128) { | 
|---|
| 435 | // Lots of instances, use per-instance threading | 
|---|
| 436 | WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr(), dirty_instances_array.size(), -1, true, SNAME( "RaycastOcclusionCullUpdate")); | 
|---|
| 437 | WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); | 
|---|
| 438 |  | 
|---|
| 439 | } else { | 
|---|
| 440 | // Few instances, use threading on the vertex transforms | 
|---|
| 441 | for (unsigned int i = 0; i < dirty_instances_array.size(); i++) { | 
|---|
| 442 | _update_dirty_instance(i, dirty_instances_array.ptr()); | 
|---|
| 443 | } | 
|---|
| 444 | } | 
|---|
| 445 |  | 
|---|
| 446 | dirty_instances.clear(); | 
|---|
| 447 | dirty_instances_array.clear(); | 
|---|
| 448 | removed_instances.clear(); | 
|---|
| 449 |  | 
|---|
| 450 | if (raycast_singleton->ebr_device == nullptr) { | 
|---|
| 451 | raycast_singleton->_init_embree(); | 
|---|
| 452 | } | 
|---|
| 453 |  | 
|---|
| 454 | int next_scene_idx = 1 - current_scene_idx; | 
|---|
| 455 | RTCScene &next_scene = ebr_scene[next_scene_idx]; | 
|---|
| 456 |  | 
|---|
| 457 | if (next_scene) { | 
|---|
| 458 | rtcReleaseScene(next_scene); | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | next_scene = rtcNewScene(raycast_singleton->ebr_device); | 
|---|
| 462 | rtcSetSceneBuildQuality(next_scene, RTCBuildQuality(raycast_singleton->build_quality)); | 
|---|
| 463 |  | 
|---|
| 464 | for (const KeyValue<RID, OccluderInstance> &E : instances) { | 
|---|
| 465 | const OccluderInstance *occ_inst = &E.value; | 
|---|
| 466 | const Occluder *occ = raycast_singleton->occluder_owner.get_or_null(occ_inst->occluder); | 
|---|
| 467 |  | 
|---|
| 468 | if (!occ || !occ_inst->enabled) { | 
|---|
| 469 | continue; | 
|---|
| 470 | } | 
|---|
| 471 |  | 
|---|
| 472 | RTCGeometry geom = rtcNewGeometry(raycast_singleton->ebr_device, RTC_GEOMETRY_TYPE_TRIANGLE); | 
|---|
| 473 | rtcSetSharedGeometryBuffer(geom, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, occ_inst->xformed_vertices.ptr(), 0, sizeof(Vector3), occ_inst->xformed_vertices.size()); | 
|---|
| 474 | rtcSetSharedGeometryBuffer(geom, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, occ_inst->indices.ptr(), 0, sizeof(uint32_t) * 3, occ_inst->indices.size() / 3); | 
|---|
| 475 | rtcCommitGeometry(geom); | 
|---|
| 476 | rtcAttachGeometry(next_scene, geom); | 
|---|
| 477 | rtcReleaseGeometry(geom); | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | dirty = false; | 
|---|
| 481 | commit_done = false; | 
|---|
| 482 | commit_thread->start(&Scenario::_commit_scene, this); | 
|---|
| 483 | return false; | 
|---|
| 484 | } | 
|---|
| 485 |  | 
|---|
| 486 | void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThreadData *p_raycast_data) const { | 
|---|
| 487 | RTCIntersectContext ctx; | 
|---|
| 488 | rtcInitIntersectContext(&ctx); | 
|---|
| 489 | ctx.flags = RTC_INTERSECT_CONTEXT_FLAG_COHERENT; | 
|---|
| 490 |  | 
|---|
| 491 | rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]); | 
|---|
| 492 | } | 
|---|
| 493 |  | 
|---|
| 494 | void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const { | 
|---|
| 495 | ERR_FAIL_COND(singleton == nullptr); | 
|---|
| 496 | if (raycast_singleton->ebr_device == nullptr) { | 
|---|
| 497 | return; // Embree is initialized on demand when there is some scenario with occluders in it. | 
|---|
| 498 | } | 
|---|
| 499 |  | 
|---|
| 500 | if (ebr_scene[current_scene_idx] == nullptr) { | 
|---|
| 501 | return; | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | RaycastThreadData td; | 
|---|
| 505 | td.rays = r_rays; | 
|---|
| 506 | td.masks = p_valid_masks; | 
|---|
| 507 |  | 
|---|
| 508 | WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_raycast, &td, p_tile_count, -1, true, SNAME( "RaycastOcclusionCullRaycast")); | 
|---|
| 509 | WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); | 
|---|
| 510 | } | 
|---|
| 511 |  | 
|---|
| 512 | //////////////////////////////////////////////////////// | 
|---|
| 513 |  | 
|---|
| 514 | void RaycastOcclusionCull::add_buffer(RID p_buffer) { | 
|---|
| 515 | ERR_FAIL_COND(buffers.has(p_buffer)); | 
|---|
| 516 | buffers[p_buffer] = RaycastHZBuffer(); | 
|---|
| 517 | } | 
|---|
| 518 |  | 
|---|
| 519 | void RaycastOcclusionCull::remove_buffer(RID p_buffer) { | 
|---|
| 520 | ERR_FAIL_COND(!buffers.has(p_buffer)); | 
|---|
| 521 | buffers.erase(p_buffer); | 
|---|
| 522 | } | 
|---|
| 523 |  | 
|---|
| 524 | void RaycastOcclusionCull::buffer_set_scenario(RID p_buffer, RID p_scenario) { | 
|---|
| 525 | ERR_FAIL_COND(!buffers.has(p_buffer)); | 
|---|
| 526 | ERR_FAIL_COND(p_scenario.is_valid() && !scenarios.has(p_scenario)); | 
|---|
| 527 | buffers[p_buffer].scenario_rid = p_scenario; | 
|---|
| 528 | } | 
|---|
| 529 |  | 
|---|
| 530 | void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size) { | 
|---|
| 531 | ERR_FAIL_COND(!buffers.has(p_buffer)); | 
|---|
| 532 | buffers[p_buffer].resize(p_size); | 
|---|
| 533 | } | 
|---|
| 534 |  | 
|---|
| 535 | void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) { | 
|---|
| 536 | if (!buffers.has(p_buffer)) { | 
|---|
| 537 | return; | 
|---|
| 538 | } | 
|---|
| 539 |  | 
|---|
| 540 | RaycastHZBuffer &buffer = buffers[p_buffer]; | 
|---|
| 541 |  | 
|---|
| 542 | if (buffer.is_empty() || !scenarios.has(buffer.scenario_rid)) { | 
|---|
| 543 | return; | 
|---|
| 544 | } | 
|---|
| 545 |  | 
|---|
| 546 | Scenario &scenario = scenarios[buffer.scenario_rid]; | 
|---|
| 547 |  | 
|---|
| 548 | bool removed = scenario.update(); | 
|---|
| 549 |  | 
|---|
| 550 | if (removed) { | 
|---|
| 551 | scenarios.erase(buffer.scenario_rid); | 
|---|
| 552 | return; | 
|---|
| 553 | } | 
|---|
| 554 |  | 
|---|
| 555 | buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal); | 
|---|
| 556 |  | 
|---|
| 557 | scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count); | 
|---|
| 558 | buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal); | 
|---|
| 559 | buffer.update_mips(); | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | RaycastOcclusionCull::HZBuffer *RaycastOcclusionCull::buffer_get_ptr(RID p_buffer) { | 
|---|
| 563 | if (!buffers.has(p_buffer)) { | 
|---|
| 564 | return nullptr; | 
|---|
| 565 | } | 
|---|
| 566 | return &buffers[p_buffer]; | 
|---|
| 567 | } | 
|---|
| 568 |  | 
|---|
| 569 | RID RaycastOcclusionCull::buffer_get_debug_texture(RID p_buffer) { | 
|---|
| 570 | ERR_FAIL_COND_V(!buffers.has(p_buffer), RID()); | 
|---|
| 571 | return buffers[p_buffer].get_debug_texture(); | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | //////////////////////////////////////////////////////// | 
|---|
| 575 |  | 
|---|
| 576 | void RaycastOcclusionCull::set_build_quality(RS::ViewportOcclusionCullingBuildQuality p_quality) { | 
|---|
| 577 | if (build_quality == p_quality) { | 
|---|
| 578 | return; | 
|---|
| 579 | } | 
|---|
| 580 |  | 
|---|
| 581 | build_quality = p_quality; | 
|---|
| 582 |  | 
|---|
| 583 | for (KeyValue<RID, Scenario> &K : scenarios) { | 
|---|
| 584 | K.value.dirty = true; | 
|---|
| 585 | } | 
|---|
| 586 | } | 
|---|
| 587 |  | 
|---|
| 588 | void RaycastOcclusionCull::_init_embree() { | 
|---|
| 589 | #ifdef __SSE2__ | 
|---|
| 590 | _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); | 
|---|
| 591 | _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); | 
|---|
| 592 | #endif | 
|---|
| 593 |  | 
|---|
| 594 | String settings = vformat( "threads=%d", MAX(1, OS::get_singleton()->get_processor_count() - 2)); | 
|---|
| 595 | ebr_device = rtcNewDevice(settings.utf8().ptr()); | 
|---|
| 596 | } | 
|---|
| 597 |  | 
|---|
| 598 | RaycastOcclusionCull::RaycastOcclusionCull() { | 
|---|
| 599 | raycast_singleton = this; | 
|---|
| 600 | int default_quality = GLOBAL_GET( "rendering/occlusion_culling/bvh_build_quality"); | 
|---|
| 601 | build_quality = RS::ViewportOcclusionCullingBuildQuality(default_quality); | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | RaycastOcclusionCull::~RaycastOcclusionCull() { | 
|---|
| 605 | for (KeyValue<RID, Scenario> &K : scenarios) { | 
|---|
| 606 | Scenario &scenario = K.value; | 
|---|
| 607 | if (scenario.commit_thread) { | 
|---|
| 608 | if (scenario.commit_thread->is_started()) { | 
|---|
| 609 | scenario.commit_thread->wait_to_finish(); | 
|---|
| 610 | } | 
|---|
| 611 | memdelete(scenario.commit_thread); | 
|---|
| 612 | } | 
|---|
| 613 |  | 
|---|
| 614 | for (int i = 0; i < 2; i++) { | 
|---|
| 615 | if (scenario.ebr_scene[i]) { | 
|---|
| 616 | rtcReleaseScene(scenario.ebr_scene[i]); | 
|---|
| 617 | } | 
|---|
| 618 | } | 
|---|
| 619 | } | 
|---|
| 620 |  | 
|---|
| 621 | if (ebr_device != nullptr) { | 
|---|
| 622 | rtcReleaseDevice(ebr_device); | 
|---|
| 623 | } | 
|---|
| 624 |  | 
|---|
| 625 | raycast_singleton = nullptr; | 
|---|
| 626 | } | 
|---|
| 627 |  | 
|---|