bvh_intersector_hybrid.cpp source code [Godot/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#include "bvh_intersector_hybrid.h"
5	#include "bvh_traverser1.h"
6	#include "node_intersector1.h"
7	#include "node_intersector_packet.h"
8
9	#include "../geometry/intersector_iterators.h"
10	#include "../geometry/triangle_intersector.h"
11	#include "../geometry/trianglev_intersector.h"
12	#include "../geometry/trianglev_mb_intersector.h"
13	#include "../geometry/trianglei_intersector.h"
14	#include "../geometry/quadv_intersector.h"
15	#include "../geometry/quadi_intersector.h"
16	#include "../geometry/curveNv_intersector.h"
17	#include "../geometry/curveNi_intersector.h"
18	#include "../geometry/curveNi_mb_intersector.h"
19	#include "../geometry/linei_intersector.h"
20	#include "../geometry/subdivpatch1_intersector.h"
21	#include "../geometry/object_intersector.h"
22	#include "../geometry/instance_intersector.h"
23	#include "../geometry/subgrid_intersector.h"
24	#include "../geometry/subgrid_mb_intersector.h"
25	#include "../geometry/curve_intersector_virtual.h"
26
27	#define SWITCH_DURING_DOWN_TRAVERSAL 1
28	#define FORCE_SINGLE_MODE 0
29
30	#define ENABLE_FAST_COHERENT_CODEPATHS 1
31
32	namespace embree
33	{
34	namespace isa
35	{
36	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
37	void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
38	const BVH* bvh,
39	NodeRef root,
40	size_t k,
41	Precalculations& pre,
42	RayHitK<K>& ray,
43	const TravRayK<K, robust>& tray,
44	IntersectContext* context)
45	{
46	/ stack state /
47	StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
48	StackItemT<NodeRef>* stackPtr = stack + `1`; // current stack pointer
49	StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
50	stack[`0`].ptr = root;
51	stack[`0`].dist = neg_inf;
52
53	/ load the ray into SIMD registers /
54	TravRay<N,robust> tray1;
55	tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
56
57	/ pop loop /
58	while (true) pop:
59	{
60	/ pop next node /
61	if (unlikely(stackPtr == stack)) break;
62	stackPtr--;
63	NodeRef cur = NodeRef(stackPtr->ptr);
64
65	/ if popped node is too far, pop next one /
66	if (unlikely((float**)&stackPtr->dist > ray.tfar[k]))
67	continue;
68
69	/ downtraversal loop /
70	while (true)
71	{
72	/ intersect node /
73	size_t mask; vfloat<N> tNear;
74	STAT3(normal.trav_nodes, `1`, `1`, `1`);
75	bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
76	if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-`1`,-`1`,-`1`); break; }
77
78	/ if no child is hit, pop next node /
79	if (unlikely(mask == `0`))
80	goto pop;
81
82	/ select next child and push other children /
83	BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
84	}
85
86	/ this is a leaf node /
87	assert(cur != BVH::emptyNode);
88	STAT3(normal.trav_leaves, `1`, `1`, `1`);
89	size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
90
91	size_t lazy_node = `0`;
92	PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
93
94	tray1.tfar = ray.tfar[k];
95
96	if (unlikely(lazy_node)) {
97	stackPtr->ptr = lazy_node;
98	stackPtr->dist = neg_inf;
99	stackPtr++;
100	}
101	}
102	}
103
104	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
105	void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
106	Accel::Intersectors* __restrict__ This,
107	RayHitK<K>& __restrict__ ray,
108	IntersectContext* __restrict__ context)
109	{
110	BVH* __restrict__ bvh = (BVH*)This->ptr;
111
112	/ we may traverse an empty BVH in case all geometry was invalid /
113	if (bvh->root == BVH::emptyNode)
114	return;
115
116	#if ENABLE_FAST_COHERENT_CODEPATHS == 1
117	assert(context);
118	if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
119	{
120	intersectCoherent(valid_i, This, ray, context);
121	return;
122	}
123	#endif
124
125	/ filter out invalid rays /
126	vbool<K> valid = *valid_i == -`1`;
127	#if defined(EMBREE_IGNORE_INVALID_RAYS)
128	valid &= ray.valid();
129	#endif
130
131	/ return if there are no valid rays /
132	size_t valid_bits = movemask(valid);
133
134	#if defined(__AVX__)
135	STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], `1`, `1`, `1`);
136	#endif
137
138	if (unlikely(valid_bits == `0`)) return;
139
140	/ verify correct input /
141	assert(all(valid, ray.valid()));
142	assert(all(valid, ray.tnear() >= `0.0f`));
143	assert(!(types & BVH_MB) \|\| all(valid, (ray.time() >= `0.0f`) & (ray.time() <= `1.0f`)));
144	Precalculations pre(valid, ray);
145
146	/ load ray /
147	TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : `0`);
148	const vfloat<K> org_ray_tnear = max(ray.tnear(), `0.0f`);
149	const vfloat<K> org_ray_tfar = max(ray.tfar , `0.0f`);
150
151	if (single)
152	{
153	tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
154	tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
155
156	for (; valid_bits!=`0`; ) {
157	const size_t i = bscf(valid_bits);
158	intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
159	}
160	return;
161	}
162
163	/ determine switch threshold based on flags /
164	const size_t switchThreshold = (context->user && context->isCoherent()) ? `2` : switchThresholdIncoherent;
165
166	vint<K> octant = ray.octant();
167	octant = select(valid, octant, vint<K>(`0xffffffff`));
168
169	/ test whether we have ray with opposing direction signs in the packet /
170	bool split = false;
171	{
172	size_t bits = valid_bits;
173	vbool<K> vsplit( false );
174	do
175	{
176	const size_t valid_index = bsf(bits);
177	vbool<K> octant_valid = octant[valid_index] == octant;
178	bits &= ~(size_t)movemask(octant_valid);
179	vsplit \|= vint<K>(octant[valid_index]) == (octant^vint<K>(`0x7`));
180	} while (bits);
181	if (any(vsplit)) split = true;
182	}
183
184	do
185	{
186	const size_t valid_index = bsf(valid_bits);
187	const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
188	const vint<K> count_diff_octant = \
189	((diff_octant >> `2`) & `1`) +
190	((diff_octant >> `1`) & `1`) +
191	((diff_octant >> `0`) & `1`);
192
193	vbool<K> octant_valid = (count_diff_octant <= `1`) & (octant != vint<K>(`0xffffffff`));
194	if (!single \|\| !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
195
196
197	octant = select(octant_valid,vint<K>(`0xffffffff`),octant);
198	valid_bits &= ~(size_t)movemask(octant_valid);
199
200	tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
201	tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
202
203	/ allocate stack and push root node /
204	vfloat<K> stack_near[stackSizeChunk];
205	NodeRef stack_node[stackSizeChunk];
206	stack_node[`0`] = BVH::invalidNode;
207	stack_near[`0`] = inf;
208	stack_node[`1`] = bvh->root;
209	stack_near[`1`] = tray.tnear;
210	NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
211	NodeRef* __restrict__ sptr_node = stack_node + `2`;
212	vfloat<K>* __restrict__ sptr_near = stack_near + `2`;
213
214	while (`1`) pop:
215	{
216	/ pop next node from stack /
217	assert(sptr_node > stack_node);
218	sptr_node--;
219	sptr_near--;
220	NodeRef cur = *sptr_node;
221	if (unlikely(cur == BVH::invalidNode)) {
222	assert(sptr_node == stack_node);
223	break;
224	}
225
226	/ cull node if behind closest hit point /
227	vfloat<K> curDist = *sptr_near;
228	const vbool<K> active = curDist < tray.tfar;
229	if (unlikely(none(active)))
230	continue;
231
232	/ switch to single ray traversal /
233	#if (!defined(__WIN32__) \|\| defined(__X86_64__)) && ((defined(__aarch64__)) \|\| defined(__SSE4_2__))
234	#if FORCE_SINGLE_MODE == 0
235	if (single)
236	#endif
237	{
238	size_t bits = movemask(active);
239	#if FORCE_SINGLE_MODE == 0
240	if (unlikely(popcnt(bits) <= switchThreshold))
241	#endif
242	{
243	for (; bits!=`0`; ) {
244	const size_t i = bscf(bits);
245	intersect1(This, bvh, cur, i, pre, ray, tray, context);
246	}
247	tray.tfar = min(tray.tfar, ray.tfar);
248	continue;
249	}
250	}
251	#endif
252	while (likely(!cur.isLeaf()))
253	{
254	/ process nodes /
255	const vbool<K> valid_node = tray.tfar > curDist;
256	STAT3(normal.trav_nodes, `1`, popcnt(valid_node), K);
257	const NodeRef nodeRef = cur;
258	const BaseNode* __restrict__ const node = nodeRef.baseNode();
259
260	/ set cur to invalid /
261	cur = BVH::emptyNode;
262	curDist = pos_inf;
263
264	size_t num_child_hits = `0`;
265
266	for (unsigned i = `0`; i < N; i++)
267	{
268	const NodeRef child = node->children[i];
269	if (unlikely(child == BVH::emptyNode)) break;
270	vfloat<K> lnearP;
271	vbool<K> lhit = valid_node;
272	BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
273
274	/ if we hit the child we choose to continue with that child if it*
275	is closer than the current next child, or we push it onto the stack /*
276	if (likely(any(lhit)))
277	{
278	assert(sptr_node < stackEnd);
279	assert(child != BVH::emptyNode);
280	const vfloat<K> childDist = select(lhit, lnearP, inf);
281	/ push cur node onto stack and continue with hit child /
282	if (any(childDist < curDist))
283	{
284	if (likely(cur != BVH::emptyNode)) {
285	num_child_hits++;
286	*sptr_node = cur; sptr_node++;
287	*sptr_near = curDist; sptr_near++;
288	}
289	curDist = childDist;
290	cur = child;
291	}
292
293	/ push hit child onto stack /
294	else {
295	num_child_hits++;
296	*sptr_node = child; sptr_node++;
297	*sptr_near = childDist; sptr_near++;
298	}
299	}
300	}
301
302	#if defined(__AVX__)
303	//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
304	#endif
305
306	if (unlikely(cur == BVH::emptyNode))
307	goto pop;
308
309	/ improved distance sorting for 3 or more hits /
310	if (unlikely(num_child_hits >= `2`))
311	{
312	if (any(sptr_near[-`2`] < sptr_near[-`1`]))
313	{
314	std::swap(sptr_near[-`2`],sptr_near[-`1`]);
315	std::swap(sptr_node[-`2`],sptr_node[-`1`]);
316	}
317	if (unlikely(num_child_hits >= `3`))
318	{
319	if (any(sptr_near[-`3`] < sptr_near[-`1`]))
320	{
321	std::swap(sptr_near[-`3`],sptr_near[-`1`]);
322	std::swap(sptr_node[-`3`],sptr_node[-`1`]);
323	}
324	if (any(sptr_near[-`3`] < sptr_near[-`2`]))
325	{
326	std::swap(sptr_near[-`3`],sptr_near[-`2`]);
327	std::swap(sptr_node[-`3`],sptr_node[-`2`]);
328	}
329	}
330	}
331
332	#if SWITCH_DURING_DOWN_TRAVERSAL == 1
333	if (single)
334	{
335	// seems to be the best place for testing utilization
336	if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
337	{
338	*sptr_node++ = cur;
339	*sptr_near++ = curDist;
340	goto pop;
341	}
342	}
343	#endif
344	}
345
346	/ return if stack is empty /
347	if (unlikely(cur == BVH::invalidNode)) {
348	assert(sptr_node == stack_node);
349	break;
350	}
351
352	/ intersect leaf /
353	assert(cur != BVH::emptyNode);
354	const vbool<K> valid_leaf = tray.tfar > curDist;
355	STAT3(normal.trav_leaves, `1`, popcnt(valid_leaf), K);
356	if (unlikely(none(valid_leaf))) continue;
357	size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
358
359	size_t lazy_node = `0`;
360	PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
361	tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
362
363	if (unlikely(lazy_node)) {
364	*sptr_node = lazy_node; sptr_node++;
365	*sptr_near = neg_inf; sptr_near++;
366	}
367	}
368	} while(valid_bits);
369	}
370
371
372	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
373	void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
374	Accel::Intersectors* __restrict__ This,
375	RayHitK<K>& __restrict__ ray,
376	IntersectContext* context)
377	{
378	BVH* __restrict__ bvh = (BVH*)This->ptr;
379
380	/ filter out invalid rays /
381	vbool<K> valid = *valid_i == -`1`;
382	#if defined(EMBREE_IGNORE_INVALID_RAYS)
383	valid &= ray.valid();
384	#endif
385
386	/ return if there are no valid rays /
387	size_t valid_bits = movemask(valid);
388	if (unlikely(valid_bits == `0`)) return;
389
390	/ verify correct input /
391	assert(all(valid, ray.valid()));
392	assert(all(valid, ray.tnear() >= `0.0f`));
393	assert(!(types & BVH_MB) \|\| all(valid, (ray.time() >= `0.0f`) & (ray.time() <= `1.0f`)));
394	Precalculations pre(valid, ray);
395
396	/ load ray /
397	TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : `0`);
398	const vfloat<K> org_ray_tnear = max(ray.tnear(), `0.0f`);
399	const vfloat<K> org_ray_tfar = max(ray.tfar , `0.0f`);
400
401	vint<K> octant = ray.octant();
402	octant = select(valid, octant, vint<K>(`0xffffffff`));
403
404	do
405	{
406	const size_t valid_index = bsf(valid_bits);
407	const vbool<K> octant_valid = octant[valid_index] == octant;
408	valid_bits &= ~(size_t)movemask(octant_valid);
409
410	tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
411	tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
412
413	Frustum<robust> frustum;
414	frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
415
416	StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
417	StackItemT<NodeRef>* stackPtr = stack + `1`; // current stack pointer
418	stack[`0`].ptr = bvh->root;
419	stack[`0`].dist = neg_inf;
420
421	while (`1`) pop:
422	{
423	/ pop next node from stack /
424	if (unlikely(stackPtr == stack)) break;
425
426	stackPtr--;
427	NodeRef cur = NodeRef(stackPtr->ptr);
428
429	/ cull node if behind closest hit point /
430	vfloat<K> curDist = (float**)&stackPtr->dist;
431	const vbool<K> active = curDist < tray.tfar;
432	if (unlikely(none(active))) continue;
433
434	while (likely(!cur.isLeaf()))
435	{
436	/ process nodes /
437	//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
438	const NodeRef nodeRef = cur;
439	const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
440
441	vfloat<N> fmin;
442	size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
443
444	if (unlikely(!m_frustum_node)) goto pop;
445	cur = BVH::emptyNode;
446	curDist = pos_inf;
447
448	#if defined(__AVX__)
449	//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
450	#endif
451	size_t num_child_hits = `0`;
452	do {
453	const size_t i = bscf(m_frustum_node);
454	vfloat<K> lnearP;
455	vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
456	STAT3(normal.trav_nodes, `1`, `1`, `1`);
457	BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
458
459	if (likely(any(lhit)))
460	{
461	const vfloat<K> childDist = fmin[i];
462	const NodeRef child = node->child(i);
463	BVHN<N>::prefetch(child);
464	if (any(childDist < curDist))
465	{
466	if (likely(cur != BVH::emptyNode)) {
467	num_child_hits++;
468	stackPtr->ptr = cur;
469	(float**)&stackPtr->dist = toScalar(curDist);
470	stackPtr++;
471	}
472	curDist = childDist;
473	cur = child;
474	}
475	/ push hit child onto stack /
476	else {
477	num_child_hits++;
478	stackPtr->ptr = child;
479	(float**)&stackPtr->dist = toScalar(childDist);
480	stackPtr++;
481	}
482	}
483	} while(m_frustum_node);
484
485	if (unlikely(cur == BVH::emptyNode)) goto pop;
486
487	/ improved distance sorting for 3 or more hits /
488	if (unlikely(num_child_hits >= `2`))
489	{
490	if (stackPtr[-`2`].dist < stackPtr[-`1`].dist)
491	std::swap(stackPtr[-`2`],stackPtr[-`1`]);
492	if (unlikely(num_child_hits >= `3`))
493	{
494	if (stackPtr[-`3`].dist < stackPtr[-`1`].dist)
495	std::swap(stackPtr[-`3`],stackPtr[-`1`]);
496	if (stackPtr[-`3`].dist < stackPtr[-`2`].dist)
497	std::swap(stackPtr[-`3`],stackPtr[-`2`]);
498	}
499	}
500	}
501
502	/ intersect leaf /
503	assert(cur != BVH::invalidNode);
504	assert(cur != BVH::emptyNode);
505	const vbool<K> valid_leaf = tray.tfar > curDist;
506	STAT3(normal.trav_leaves, `1`, popcnt(valid_leaf), K);
507	if (unlikely(none(valid_leaf))) continue;
508	size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
509
510	size_t lazy_node = `0`;
511	PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
512
513	/ reduce max distance interval on successful intersection /
514	if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
515	{
516	tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
517	frustum.template updateMaxDist<K>(tray.tfar);
518	}
519
520	if (unlikely(lazy_node)) {
521	stackPtr->ptr = lazy_node;
522	stackPtr->dist = neg_inf;
523	stackPtr++;
524	}
525	}
526
527	} while(valid_bits);
528	}
529
530	// ===================================================================================================================================================================
531	// ===================================================================================================================================================================
532	// ===================================================================================================================================================================
533
534	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
535	bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
536	const BVH* bvh,
537	NodeRef root,
538	size_t k,
539	Precalculations& pre,
540	RayK<K>& ray,
541	const TravRayK<K, robust>& tray,
542	IntersectContext* context)
543	{
544	/ stack state /
545	NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
546	NodeRef* stackPtr = stack+`1`; // current stack pointer
547	NodeRef* stackEnd = stack+stackSizeSingle;
548	stack[`0`] = root;
549
550	/ load the ray into SIMD registers /
551	TravRay<N,robust> tray1;
552	tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
553
554	/ pop loop /
555	while (true) pop:
556	{
557	/ pop next node /
558	if (unlikely(stackPtr == stack)) break;
559	stackPtr--;
560	NodeRef cur = (NodeRef)*stackPtr;
561
562	/ downtraversal loop /
563	while (true)
564	{
565	/ intersect node /
566	size_t mask; vfloat<N> tNear;
567	STAT3(shadow.trav_nodes, `1`, `1`, `1`);
568	bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
569	if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-`1`,-`1`,-`1`); break; }
570
571	/ if no child is hit, pop next node /
572	if (unlikely(mask == `0`))
573	goto pop;
574
575	/ select next child and push other children /
576	BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
577	}
578
579	/ this is a leaf node /
580	assert(cur != BVH::emptyNode);
581	STAT3(shadow.trav_leaves, `1`, `1`, `1`);
582	size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
583
584	size_t lazy_node = `0`;
585	if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
586	ray.tfar[k] = neg_inf;
587	return true;
588	}
589
590	if (unlikely(lazy_node)) {
591	*stackPtr = lazy_node;
592	stackPtr++;
593	}
594	}
595	return false;
596	}
597
598	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
599	void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
600	Accel::Intersectors* __restrict__ This,
601	RayK<K>& __restrict__ ray,
602	IntersectContext* context)
603	{
604	BVH* __restrict__ bvh = (BVH*)This->ptr;
605
606	/ we may traverse an empty BVH in case all geometry was invalid /
607	if (bvh->root == BVH::emptyNode)
608	return;
609
610	#if ENABLE_FAST_COHERENT_CODEPATHS == 1
611	assert(context);
612	if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
613	{
614	occludedCoherent(valid_i, This, ray, context);
615	return;
616	}
617	#endif
618
619	/ filter out already occluded and invalid rays /
620	vbool<K> valid = (*valid_i == -`1`) & (ray.tfar >= `0.0f`);
621	#if defined(EMBREE_IGNORE_INVALID_RAYS)
622	valid &= ray.valid();
623	#endif
624
625	/ return if there are no valid rays /
626	const size_t valid_bits = movemask(valid);
627	if (unlikely(valid_bits == `0`)) return;
628
629	/ verify correct input /
630	assert(all(valid, ray.valid()));
631	assert(all(valid, ray.tnear() >= `0.0f`));
632	assert(!(types & BVH_MB) \|\| all(valid, (ray.time() >= `0.0f`) & (ray.time() <= `1.0f`)));
633	Precalculations pre(valid, ray);
634
635	/ load ray /
636	TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : `0`);
637	const vfloat<K> org_ray_tnear = max(ray.tnear(), `0.0f`);
638	const vfloat<K> org_ray_tfar = max(ray.tfar , `0.0f`);
639
640	tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
641	tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
642
643	vbool<K> terminated = !valid;
644	const vfloat<K> inf = vfloat<K>(pos_inf);
645
646	/ determine switch threshold based on flags /
647	const size_t switchThreshold = (context->user && context->isCoherent()) ? `2` : switchThresholdIncoherent;
648
649	/ allocate stack and push root node /
650	vfloat<K> stack_near[stackSizeChunk];
651	NodeRef stack_node[stackSizeChunk];
652	stack_node[`0`] = BVH::invalidNode;
653	stack_near[`0`] = inf;
654	stack_node[`1`] = bvh->root;
655	stack_near[`1`] = tray.tnear;
656	NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
657	NodeRef* __restrict__ sptr_node = stack_node + `2`;
658	vfloat<K>* __restrict__ sptr_near = stack_near + `2`;
659
660	while (`1`) pop:
661	{
662	/ pop next node from stack /
663	assert(sptr_node > stack_node);
664	sptr_node--;
665	sptr_near--;
666	NodeRef cur = *sptr_node;
667	if (unlikely(cur == BVH::invalidNode)) {
668	assert(sptr_node == stack_node);
669	break;
670	}
671
672	/ cull node if behind closest hit point /
673	vfloat<K> curDist = *sptr_near;
674	const vbool<K> active = curDist < tray.tfar;
675	if (unlikely(none(active)))
676	continue;
677
678	/ switch to single ray traversal /
679	#if (!defined(__WIN32__) \|\| defined(__X86_64__)) && ((defined(__aarch64__)) \|\| defined(__SSE4_2__))
680	#if FORCE_SINGLE_MODE == 0
681	if (single)
682	#endif
683	{
684	size_t bits = movemask(active);
685	#if FORCE_SINGLE_MODE == 0
686	if (unlikely(popcnt(bits) <= switchThreshold))
687	#endif
688	{
689	for (; bits!=`0`; ) {
690	const size_t i = bscf(bits);
691	if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
692	set(terminated, i);
693	}
694	if (all(terminated)) break;
695	tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
696	continue;
697	}
698	}
699	#endif
700
701	while (likely(!cur.isLeaf()))
702	{
703	/ process nodes /
704	const vbool<K> valid_node = tray.tfar > curDist;
705	STAT3(shadow.trav_nodes, `1`, popcnt(valid_node), K);
706	const NodeRef nodeRef = cur;
707	const BaseNode* __restrict__ const node = nodeRef.baseNode();
708
709	/ set cur to invalid /
710	cur = BVH::emptyNode;
711	curDist = pos_inf;
712
713	for (unsigned i = `0`; i < N; i++)
714	{
715	const NodeRef child = node->children[i];
716	if (unlikely(child == BVH::emptyNode)) break;
717	vfloat<K> lnearP;
718	vbool<K> lhit = valid_node;
719	BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
720
721	/ if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child /
722	if (likely(any(lhit)))
723	{
724	assert(sptr_node < stackEnd);
725	assert(child != BVH::emptyNode);
726	const vfloat<K> childDist = select(lhit, lnearP, inf);
727
728	/ push 'cur' node onto stack and continue with hit child /
729	if (likely(cur != BVH::emptyNode)) {
730	*sptr_node = cur; sptr_node++;
731	*sptr_near = curDist; sptr_near++;
732	}
733	curDist = childDist;
734	cur = child;
735	}
736	}
737	if (unlikely(cur == BVH::emptyNode))
738	goto pop;
739
740	#if SWITCH_DURING_DOWN_TRAVERSAL == 1
741	if (single)
742	{
743	// seems to be the best place for testing utilization
744	if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
745	{
746	*sptr_node++ = cur;
747	*sptr_near++ = curDist;
748	goto pop;
749	}
750	}
751	#endif
752	}
753
754	/ return if stack is empty /
755	if (unlikely(cur == BVH::invalidNode)) {
756	assert(sptr_node == stack_node);
757	break;
758	}
759
760
761	/ intersect leaf /
762	assert(cur != BVH::emptyNode);
763	const vbool<K> valid_leaf = tray.tfar > curDist;
764	STAT3(shadow.trav_leaves, `1`, popcnt(valid_leaf), K);
765	if (unlikely(none(valid_leaf))) continue;
766	size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
767
768	size_t lazy_node = `0`;
769	terminated \|= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
770	if (all(terminated)) break;
771	tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
772
773	if (unlikely(lazy_node)) {
774	*sptr_node = lazy_node; sptr_node++;
775	*sptr_near = neg_inf; sptr_near++;
776	}
777	}
778
779	vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
780	}
781
782
783	template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
784	void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
785	Accel::Intersectors* __restrict__ This,
786	RayK<K>& __restrict__ ray,
787	IntersectContext* context)
788	{
789	BVH* __restrict__ bvh = (BVH*)This->ptr;
790
791	/ filter out invalid rays /
792	vbool<K> valid = *valid_i == -`1`;
793	#if defined(EMBREE_IGNORE_INVALID_RAYS)
794	valid &= ray.valid();
795	#endif
796
797	/ return if there are no valid rays /
798	size_t valid_bits = movemask(valid);
799	if (unlikely(valid_bits == `0`)) return;
800
801	/ verify correct input /
802	assert(all(valid, ray.valid()));
803	assert(all(valid, ray.tnear() >= `0.0f`));
804	assert(!(types & BVH_MB) \|\| all(valid, (ray.time() >= `0.0f`) & (ray.time() <= `1.0f`)));
805	Precalculations pre(valid,ray);
806
807	/ load ray /
808	TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : `0`);
809	const vfloat<K> org_ray_tnear = max(ray.tnear(), `0.0f`);
810	const vfloat<K> org_ray_tfar = max(ray.tfar , `0.0f`);
811
812	vbool<K> terminated = !valid;
813
814	vint<K> octant = ray.octant();
815	octant = select(valid, octant, vint<K>(`0xffffffff`));
816
817	do
818	{
819	const size_t valid_index = bsf(valid_bits);
820	vbool<K> octant_valid = octant[valid_index] == octant;
821	valid_bits &= ~(size_t)movemask(octant_valid);
822
823	tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
824	tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
825
826	Frustum<robust> frustum;
827	frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
828
829	StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
830	StackItemMaskT<NodeRef>* stackPtr = stack + `1`; // current stack pointer
831	stack[`0`].ptr = bvh->root;
832	stack[`0`].mask = movemask(octant_valid);
833
834	while (`1`) pop:
835	{
836	/ pop next node from stack /
837	if (unlikely(stackPtr == stack)) break;
838
839	stackPtr--;
840	NodeRef cur = NodeRef(stackPtr->ptr);
841
842	/ cull node of active rays have already been terminated /
843	size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
844
845	if (unlikely(m_active == `0`)) continue;
846
847	while (likely(!cur.isLeaf()))
848	{
849	/ process nodes /
850	//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
851	const NodeRef nodeRef = cur;
852	const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
853
854	vfloat<N> fmin;
855	size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
856
857	if (unlikely(!m_frustum_node)) goto pop;
858	cur = BVH::emptyNode;
859	m_active = `0`;
860
861	#if defined(__AVX__)
862	//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
863	#endif
864	size_t num_child_hits = `0`;
865	do {
866	const size_t i = bscf(m_frustum_node);
867	vfloat<K> lnearP;
868	vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
869	STAT3(normal.trav_nodes, `1`, `1`, `1`);
870	BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
871
872	if (likely(any(lhit)))
873	{
874	const NodeRef child = node->child(i);
875	assert(child != BVH::emptyNode);
876	BVHN<N>::prefetch(child);
877	if (likely(cur != BVH::emptyNode)) {
878	num_child_hits++;
879	stackPtr->ptr = cur;
880	stackPtr->mask = m_active;
881	stackPtr++;
882	}
883	cur = child;
884	m_active = movemask(lhit);
885	}
886	} while(m_frustum_node);
887
888	if (unlikely(cur == BVH::emptyNode)) goto pop;
889	}
890
891	/ intersect leaf /
892	assert(cur != BVH::invalidNode);
893	assert(cur != BVH::emptyNode);
894	#if defined(__AVX__)
895	STAT3(normal.trav_leaves, `1`, popcnt(m_active), K);
896	#endif
897	if (unlikely(!m_active)) continue;
898	size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
899
900	size_t lazy_node = `0`;
901	terminated \|= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
902	octant_valid &= !terminated;
903	if (unlikely(none(octant_valid))) break;
904	tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
905
906	if (unlikely(lazy_node)) {
907	stackPtr->ptr = lazy_node;
908	stackPtr->mask = movemask(octant_valid);
909	stackPtr++;
910	}
911	}
912	} while(valid_bits);
913
914	vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
915	}
916	}
917	}
918

Browse the source code of Godot/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp