1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "bvh_intersector_hybrid.h"
5#include "bvh_traverser1.h"
6#include "node_intersector1.h"
7#include "node_intersector_packet.h"
8
9#include "../geometry/intersector_iterators.h"
10#include "../geometry/triangle_intersector.h"
11#include "../geometry/trianglev_intersector.h"
12#include "../geometry/trianglev_mb_intersector.h"
13#include "../geometry/trianglei_intersector.h"
14#include "../geometry/quadv_intersector.h"
15#include "../geometry/quadi_intersector.h"
16#include "../geometry/curveNv_intersector.h"
17#include "../geometry/curveNi_intersector.h"
18#include "../geometry/curveNi_mb_intersector.h"
19#include "../geometry/linei_intersector.h"
20#include "../geometry/subdivpatch1_intersector.h"
21#include "../geometry/object_intersector.h"
22#include "../geometry/instance_intersector.h"
23#include "../geometry/subgrid_intersector.h"
24#include "../geometry/subgrid_mb_intersector.h"
25#include "../geometry/curve_intersector_virtual.h"
26
27#define SWITCH_DURING_DOWN_TRAVERSAL 1
28#define FORCE_SINGLE_MODE 0
29
30#define ENABLE_FAST_COHERENT_CODEPATHS 1
31
32namespace embree
33{
34 namespace isa
35 {
36 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
37 void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
38 const BVH* bvh,
39 NodeRef root,
40 size_t k,
41 Precalculations& pre,
42 RayHitK<K>& ray,
43 const TravRayK<K, robust>& tray,
44 IntersectContext* context)
45 {
46 /* stack state */
47 StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
48 StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
49 StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
50 stack[0].ptr = root;
51 stack[0].dist = neg_inf;
52
53 /* load the ray into SIMD registers */
54 TravRay<N,robust> tray1;
55 tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
56
57 /* pop loop */
58 while (true) pop:
59 {
60 /* pop next node */
61 if (unlikely(stackPtr == stack)) break;
62 stackPtr--;
63 NodeRef cur = NodeRef(stackPtr->ptr);
64
65 /* if popped node is too far, pop next one */
66 if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
67 continue;
68
69 /* downtraversal loop */
70 while (true)
71 {
72 /* intersect node */
73 size_t mask; vfloat<N> tNear;
74 STAT3(normal.trav_nodes, 1, 1, 1);
75 bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
76 if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
77
78 /* if no child is hit, pop next node */
79 if (unlikely(mask == 0))
80 goto pop;
81
82 /* select next child and push other children */
83 BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
84 }
85
86 /* this is a leaf node */
87 assert(cur != BVH::emptyNode);
88 STAT3(normal.trav_leaves, 1, 1, 1);
89 size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
90
91 size_t lazy_node = 0;
92 PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
93
94 tray1.tfar = ray.tfar[k];
95
96 if (unlikely(lazy_node)) {
97 stackPtr->ptr = lazy_node;
98 stackPtr->dist = neg_inf;
99 stackPtr++;
100 }
101 }
102 }
103
104 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
105 void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
106 Accel::Intersectors* __restrict__ This,
107 RayHitK<K>& __restrict__ ray,
108 IntersectContext* __restrict__ context)
109 {
110 BVH* __restrict__ bvh = (BVH*)This->ptr;
111
112 /* we may traverse an empty BVH in case all geometry was invalid */
113 if (bvh->root == BVH::emptyNode)
114 return;
115
116#if ENABLE_FAST_COHERENT_CODEPATHS == 1
117 assert(context);
118 if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
119 {
120 intersectCoherent(valid_i, This, ray, context);
121 return;
122 }
123#endif
124
125 /* filter out invalid rays */
126 vbool<K> valid = *valid_i == -1;
127#if defined(EMBREE_IGNORE_INVALID_RAYS)
128 valid &= ray.valid();
129#endif
130
131 /* return if there are no valid rays */
132 size_t valid_bits = movemask(valid);
133
134#if defined(__AVX__)
135 STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
136#endif
137
138 if (unlikely(valid_bits == 0)) return;
139
140 /* verify correct input */
141 assert(all(valid, ray.valid()));
142 assert(all(valid, ray.tnear() >= 0.0f));
143 assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
144 Precalculations pre(valid, ray);
145
146 /* load ray */
147 TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
148 const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
149 const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
150
151 if (single)
152 {
153 tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
154 tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
155
156 for (; valid_bits!=0; ) {
157 const size_t i = bscf(valid_bits);
158 intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
159 }
160 return;
161 }
162
163 /* determine switch threshold based on flags */
164 const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
165
166 vint<K> octant = ray.octant();
167 octant = select(valid, octant, vint<K>(0xffffffff));
168
169 /* test whether we have ray with opposing direction signs in the packet */
170 bool split = false;
171 {
172 size_t bits = valid_bits;
173 vbool<K> vsplit( false );
174 do
175 {
176 const size_t valid_index = bsf(bits);
177 vbool<K> octant_valid = octant[valid_index] == octant;
178 bits &= ~(size_t)movemask(octant_valid);
179 vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
180 } while (bits);
181 if (any(vsplit)) split = true;
182 }
183
184 do
185 {
186 const size_t valid_index = bsf(valid_bits);
187 const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
188 const vint<K> count_diff_octant = \
189 ((diff_octant >> 2) & 1) +
190 ((diff_octant >> 1) & 1) +
191 ((diff_octant >> 0) & 1);
192
193 vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
194 if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
195
196
197 octant = select(octant_valid,vint<K>(0xffffffff),octant);
198 valid_bits &= ~(size_t)movemask(octant_valid);
199
200 tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
201 tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
202
203 /* allocate stack and push root node */
204 vfloat<K> stack_near[stackSizeChunk];
205 NodeRef stack_node[stackSizeChunk];
206 stack_node[0] = BVH::invalidNode;
207 stack_near[0] = inf;
208 stack_node[1] = bvh->root;
209 stack_near[1] = tray.tnear;
210 NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
211 NodeRef* __restrict__ sptr_node = stack_node + 2;
212 vfloat<K>* __restrict__ sptr_near = stack_near + 2;
213
214 while (1) pop:
215 {
216 /* pop next node from stack */
217 assert(sptr_node > stack_node);
218 sptr_node--;
219 sptr_near--;
220 NodeRef cur = *sptr_node;
221 if (unlikely(cur == BVH::invalidNode)) {
222 assert(sptr_node == stack_node);
223 break;
224 }
225
226 /* cull node if behind closest hit point */
227 vfloat<K> curDist = *sptr_near;
228 const vbool<K> active = curDist < tray.tfar;
229 if (unlikely(none(active)))
230 continue;
231
232 /* switch to single ray traversal */
233#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
234#if FORCE_SINGLE_MODE == 0
235 if (single)
236#endif
237 {
238 size_t bits = movemask(active);
239#if FORCE_SINGLE_MODE == 0
240 if (unlikely(popcnt(bits) <= switchThreshold))
241#endif
242 {
243 for (; bits!=0; ) {
244 const size_t i = bscf(bits);
245 intersect1(This, bvh, cur, i, pre, ray, tray, context);
246 }
247 tray.tfar = min(tray.tfar, ray.tfar);
248 continue;
249 }
250 }
251#endif
252 while (likely(!cur.isLeaf()))
253 {
254 /* process nodes */
255 const vbool<K> valid_node = tray.tfar > curDist;
256 STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
257 const NodeRef nodeRef = cur;
258 const BaseNode* __restrict__ const node = nodeRef.baseNode();
259
260 /* set cur to invalid */
261 cur = BVH::emptyNode;
262 curDist = pos_inf;
263
264 size_t num_child_hits = 0;
265
266 for (unsigned i = 0; i < N; i++)
267 {
268 const NodeRef child = node->children[i];
269 if (unlikely(child == BVH::emptyNode)) break;
270 vfloat<K> lnearP;
271 vbool<K> lhit = valid_node;
272 BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
273
274 /* if we hit the child we choose to continue with that child if it
275 is closer than the current next child, or we push it onto the stack */
276 if (likely(any(lhit)))
277 {
278 assert(sptr_node < stackEnd);
279 assert(child != BVH::emptyNode);
280 const vfloat<K> childDist = select(lhit, lnearP, inf);
281 /* push cur node onto stack and continue with hit child */
282 if (any(childDist < curDist))
283 {
284 if (likely(cur != BVH::emptyNode)) {
285 num_child_hits++;
286 *sptr_node = cur; sptr_node++;
287 *sptr_near = curDist; sptr_near++;
288 }
289 curDist = childDist;
290 cur = child;
291 }
292
293 /* push hit child onto stack */
294 else {
295 num_child_hits++;
296 *sptr_node = child; sptr_node++;
297 *sptr_near = childDist; sptr_near++;
298 }
299 }
300 }
301
302#if defined(__AVX__)
303 //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
304#endif
305
306 if (unlikely(cur == BVH::emptyNode))
307 goto pop;
308
309 /* improved distance sorting for 3 or more hits */
310 if (unlikely(num_child_hits >= 2))
311 {
312 if (any(sptr_near[-2] < sptr_near[-1]))
313 {
314 std::swap(sptr_near[-2],sptr_near[-1]);
315 std::swap(sptr_node[-2],sptr_node[-1]);
316 }
317 if (unlikely(num_child_hits >= 3))
318 {
319 if (any(sptr_near[-3] < sptr_near[-1]))
320 {
321 std::swap(sptr_near[-3],sptr_near[-1]);
322 std::swap(sptr_node[-3],sptr_node[-1]);
323 }
324 if (any(sptr_near[-3] < sptr_near[-2]))
325 {
326 std::swap(sptr_near[-3],sptr_near[-2]);
327 std::swap(sptr_node[-3],sptr_node[-2]);
328 }
329 }
330 }
331
332#if SWITCH_DURING_DOWN_TRAVERSAL == 1
333 if (single)
334 {
335 // seems to be the best place for testing utilization
336 if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
337 {
338 *sptr_node++ = cur;
339 *sptr_near++ = curDist;
340 goto pop;
341 }
342 }
343#endif
344 }
345
346 /* return if stack is empty */
347 if (unlikely(cur == BVH::invalidNode)) {
348 assert(sptr_node == stack_node);
349 break;
350 }
351
352 /* intersect leaf */
353 assert(cur != BVH::emptyNode);
354 const vbool<K> valid_leaf = tray.tfar > curDist;
355 STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
356 if (unlikely(none(valid_leaf))) continue;
357 size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
358
359 size_t lazy_node = 0;
360 PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
361 tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
362
363 if (unlikely(lazy_node)) {
364 *sptr_node = lazy_node; sptr_node++;
365 *sptr_near = neg_inf; sptr_near++;
366 }
367 }
368 } while(valid_bits);
369 }
370
371
372 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
373 void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
374 Accel::Intersectors* __restrict__ This,
375 RayHitK<K>& __restrict__ ray,
376 IntersectContext* context)
377 {
378 BVH* __restrict__ bvh = (BVH*)This->ptr;
379
380 /* filter out invalid rays */
381 vbool<K> valid = *valid_i == -1;
382#if defined(EMBREE_IGNORE_INVALID_RAYS)
383 valid &= ray.valid();
384#endif
385
386 /* return if there are no valid rays */
387 size_t valid_bits = movemask(valid);
388 if (unlikely(valid_bits == 0)) return;
389
390 /* verify correct input */
391 assert(all(valid, ray.valid()));
392 assert(all(valid, ray.tnear() >= 0.0f));
393 assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
394 Precalculations pre(valid, ray);
395
396 /* load ray */
397 TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
398 const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
399 const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
400
401 vint<K> octant = ray.octant();
402 octant = select(valid, octant, vint<K>(0xffffffff));
403
404 do
405 {
406 const size_t valid_index = bsf(valid_bits);
407 const vbool<K> octant_valid = octant[valid_index] == octant;
408 valid_bits &= ~(size_t)movemask(octant_valid);
409
410 tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
411 tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
412
413 Frustum<robust> frustum;
414 frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
415
416 StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
417 StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
418 stack[0].ptr = bvh->root;
419 stack[0].dist = neg_inf;
420
421 while (1) pop:
422 {
423 /* pop next node from stack */
424 if (unlikely(stackPtr == stack)) break;
425
426 stackPtr--;
427 NodeRef cur = NodeRef(stackPtr->ptr);
428
429 /* cull node if behind closest hit point */
430 vfloat<K> curDist = *(float*)&stackPtr->dist;
431 const vbool<K> active = curDist < tray.tfar;
432 if (unlikely(none(active))) continue;
433
434 while (likely(!cur.isLeaf()))
435 {
436 /* process nodes */
437 //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
438 const NodeRef nodeRef = cur;
439 const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
440
441 vfloat<N> fmin;
442 size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
443
444 if (unlikely(!m_frustum_node)) goto pop;
445 cur = BVH::emptyNode;
446 curDist = pos_inf;
447
448#if defined(__AVX__)
449 //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
450#endif
451 size_t num_child_hits = 0;
452 do {
453 const size_t i = bscf(m_frustum_node);
454 vfloat<K> lnearP;
455 vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
456 STAT3(normal.trav_nodes, 1, 1, 1);
457 BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
458
459 if (likely(any(lhit)))
460 {
461 const vfloat<K> childDist = fmin[i];
462 const NodeRef child = node->child(i);
463 BVHN<N>::prefetch(child);
464 if (any(childDist < curDist))
465 {
466 if (likely(cur != BVH::emptyNode)) {
467 num_child_hits++;
468 stackPtr->ptr = cur;
469 *(float*)&stackPtr->dist = toScalar(curDist);
470 stackPtr++;
471 }
472 curDist = childDist;
473 cur = child;
474 }
475 /* push hit child onto stack */
476 else {
477 num_child_hits++;
478 stackPtr->ptr = child;
479 *(float*)&stackPtr->dist = toScalar(childDist);
480 stackPtr++;
481 }
482 }
483 } while(m_frustum_node);
484
485 if (unlikely(cur == BVH::emptyNode)) goto pop;
486
487 /* improved distance sorting for 3 or more hits */
488 if (unlikely(num_child_hits >= 2))
489 {
490 if (stackPtr[-2].dist < stackPtr[-1].dist)
491 std::swap(stackPtr[-2],stackPtr[-1]);
492 if (unlikely(num_child_hits >= 3))
493 {
494 if (stackPtr[-3].dist < stackPtr[-1].dist)
495 std::swap(stackPtr[-3],stackPtr[-1]);
496 if (stackPtr[-3].dist < stackPtr[-2].dist)
497 std::swap(stackPtr[-3],stackPtr[-2]);
498 }
499 }
500 }
501
502 /* intersect leaf */
503 assert(cur != BVH::invalidNode);
504 assert(cur != BVH::emptyNode);
505 const vbool<K> valid_leaf = tray.tfar > curDist;
506 STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
507 if (unlikely(none(valid_leaf))) continue;
508 size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
509
510 size_t lazy_node = 0;
511 PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
512
513 /* reduce max distance interval on successful intersection */
514 if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
515 {
516 tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
517 frustum.template updateMaxDist<K>(tray.tfar);
518 }
519
520 if (unlikely(lazy_node)) {
521 stackPtr->ptr = lazy_node;
522 stackPtr->dist = neg_inf;
523 stackPtr++;
524 }
525 }
526
527 } while(valid_bits);
528 }
529
530 // ===================================================================================================================================================================
531 // ===================================================================================================================================================================
532 // ===================================================================================================================================================================
533
534 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
535 bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
536 const BVH* bvh,
537 NodeRef root,
538 size_t k,
539 Precalculations& pre,
540 RayK<K>& ray,
541 const TravRayK<K, robust>& tray,
542 IntersectContext* context)
543 {
544 /* stack state */
545 NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
546 NodeRef* stackPtr = stack+1; // current stack pointer
547 NodeRef* stackEnd = stack+stackSizeSingle;
548 stack[0] = root;
549
550 /* load the ray into SIMD registers */
551 TravRay<N,robust> tray1;
552 tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
553
554 /* pop loop */
555 while (true) pop:
556 {
557 /* pop next node */
558 if (unlikely(stackPtr == stack)) break;
559 stackPtr--;
560 NodeRef cur = (NodeRef)*stackPtr;
561
562 /* downtraversal loop */
563 while (true)
564 {
565 /* intersect node */
566 size_t mask; vfloat<N> tNear;
567 STAT3(shadow.trav_nodes, 1, 1, 1);
568 bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
569 if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
570
571 /* if no child is hit, pop next node */
572 if (unlikely(mask == 0))
573 goto pop;
574
575 /* select next child and push other children */
576 BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
577 }
578
579 /* this is a leaf node */
580 assert(cur != BVH::emptyNode);
581 STAT3(shadow.trav_leaves, 1, 1, 1);
582 size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
583
584 size_t lazy_node = 0;
585 if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
586 ray.tfar[k] = neg_inf;
587 return true;
588 }
589
590 if (unlikely(lazy_node)) {
591 *stackPtr = lazy_node;
592 stackPtr++;
593 }
594 }
595 return false;
596 }
597
598 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
599 void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
600 Accel::Intersectors* __restrict__ This,
601 RayK<K>& __restrict__ ray,
602 IntersectContext* context)
603 {
604 BVH* __restrict__ bvh = (BVH*)This->ptr;
605
606 /* we may traverse an empty BVH in case all geometry was invalid */
607 if (bvh->root == BVH::emptyNode)
608 return;
609
610#if ENABLE_FAST_COHERENT_CODEPATHS == 1
611 assert(context);
612 if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
613 {
614 occludedCoherent(valid_i, This, ray, context);
615 return;
616 }
617#endif
618
619 /* filter out already occluded and invalid rays */
620 vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
621#if defined(EMBREE_IGNORE_INVALID_RAYS)
622 valid &= ray.valid();
623#endif
624
625 /* return if there are no valid rays */
626 const size_t valid_bits = movemask(valid);
627 if (unlikely(valid_bits == 0)) return;
628
629 /* verify correct input */
630 assert(all(valid, ray.valid()));
631 assert(all(valid, ray.tnear() >= 0.0f));
632 assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
633 Precalculations pre(valid, ray);
634
635 /* load ray */
636 TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
637 const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
638 const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
639
640 tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
641 tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
642
643 vbool<K> terminated = !valid;
644 const vfloat<K> inf = vfloat<K>(pos_inf);
645
646 /* determine switch threshold based on flags */
647 const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
648
649 /* allocate stack and push root node */
650 vfloat<K> stack_near[stackSizeChunk];
651 NodeRef stack_node[stackSizeChunk];
652 stack_node[0] = BVH::invalidNode;
653 stack_near[0] = inf;
654 stack_node[1] = bvh->root;
655 stack_near[1] = tray.tnear;
656 NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
657 NodeRef* __restrict__ sptr_node = stack_node + 2;
658 vfloat<K>* __restrict__ sptr_near = stack_near + 2;
659
660 while (1) pop:
661 {
662 /* pop next node from stack */
663 assert(sptr_node > stack_node);
664 sptr_node--;
665 sptr_near--;
666 NodeRef cur = *sptr_node;
667 if (unlikely(cur == BVH::invalidNode)) {
668 assert(sptr_node == stack_node);
669 break;
670 }
671
672 /* cull node if behind closest hit point */
673 vfloat<K> curDist = *sptr_near;
674 const vbool<K> active = curDist < tray.tfar;
675 if (unlikely(none(active)))
676 continue;
677
678 /* switch to single ray traversal */
679#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
680#if FORCE_SINGLE_MODE == 0
681 if (single)
682#endif
683 {
684 size_t bits = movemask(active);
685#if FORCE_SINGLE_MODE == 0
686 if (unlikely(popcnt(bits) <= switchThreshold))
687#endif
688 {
689 for (; bits!=0; ) {
690 const size_t i = bscf(bits);
691 if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
692 set(terminated, i);
693 }
694 if (all(terminated)) break;
695 tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
696 continue;
697 }
698 }
699#endif
700
701 while (likely(!cur.isLeaf()))
702 {
703 /* process nodes */
704 const vbool<K> valid_node = tray.tfar > curDist;
705 STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
706 const NodeRef nodeRef = cur;
707 const BaseNode* __restrict__ const node = nodeRef.baseNode();
708
709 /* set cur to invalid */
710 cur = BVH::emptyNode;
711 curDist = pos_inf;
712
713 for (unsigned i = 0; i < N; i++)
714 {
715 const NodeRef child = node->children[i];
716 if (unlikely(child == BVH::emptyNode)) break;
717 vfloat<K> lnearP;
718 vbool<K> lhit = valid_node;
719 BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
720
721 /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
722 if (likely(any(lhit)))
723 {
724 assert(sptr_node < stackEnd);
725 assert(child != BVH::emptyNode);
726 const vfloat<K> childDist = select(lhit, lnearP, inf);
727
728 /* push 'cur' node onto stack and continue with hit child */
729 if (likely(cur != BVH::emptyNode)) {
730 *sptr_node = cur; sptr_node++;
731 *sptr_near = curDist; sptr_near++;
732 }
733 curDist = childDist;
734 cur = child;
735 }
736 }
737 if (unlikely(cur == BVH::emptyNode))
738 goto pop;
739
740#if SWITCH_DURING_DOWN_TRAVERSAL == 1
741 if (single)
742 {
743 // seems to be the best place for testing utilization
744 if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
745 {
746 *sptr_node++ = cur;
747 *sptr_near++ = curDist;
748 goto pop;
749 }
750 }
751#endif
752 }
753
754 /* return if stack is empty */
755 if (unlikely(cur == BVH::invalidNode)) {
756 assert(sptr_node == stack_node);
757 break;
758 }
759
760
761 /* intersect leaf */
762 assert(cur != BVH::emptyNode);
763 const vbool<K> valid_leaf = tray.tfar > curDist;
764 STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
765 if (unlikely(none(valid_leaf))) continue;
766 size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
767
768 size_t lazy_node = 0;
769 terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
770 if (all(terminated)) break;
771 tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
772
773 if (unlikely(lazy_node)) {
774 *sptr_node = lazy_node; sptr_node++;
775 *sptr_near = neg_inf; sptr_near++;
776 }
777 }
778
779 vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
780 }
781
782
783 template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
784 void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
785 Accel::Intersectors* __restrict__ This,
786 RayK<K>& __restrict__ ray,
787 IntersectContext* context)
788 {
789 BVH* __restrict__ bvh = (BVH*)This->ptr;
790
791 /* filter out invalid rays */
792 vbool<K> valid = *valid_i == -1;
793#if defined(EMBREE_IGNORE_INVALID_RAYS)
794 valid &= ray.valid();
795#endif
796
797 /* return if there are no valid rays */
798 size_t valid_bits = movemask(valid);
799 if (unlikely(valid_bits == 0)) return;
800
801 /* verify correct input */
802 assert(all(valid, ray.valid()));
803 assert(all(valid, ray.tnear() >= 0.0f));
804 assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
805 Precalculations pre(valid,ray);
806
807 /* load ray */
808 TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
809 const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
810 const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
811
812 vbool<K> terminated = !valid;
813
814 vint<K> octant = ray.octant();
815 octant = select(valid, octant, vint<K>(0xffffffff));
816
817 do
818 {
819 const size_t valid_index = bsf(valid_bits);
820 vbool<K> octant_valid = octant[valid_index] == octant;
821 valid_bits &= ~(size_t)movemask(octant_valid);
822
823 tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
824 tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
825
826 Frustum<robust> frustum;
827 frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
828
829 StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
830 StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
831 stack[0].ptr = bvh->root;
832 stack[0].mask = movemask(octant_valid);
833
834 while (1) pop:
835 {
836 /* pop next node from stack */
837 if (unlikely(stackPtr == stack)) break;
838
839 stackPtr--;
840 NodeRef cur = NodeRef(stackPtr->ptr);
841
842 /* cull node of active rays have already been terminated */
843 size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
844
845 if (unlikely(m_active == 0)) continue;
846
847 while (likely(!cur.isLeaf()))
848 {
849 /* process nodes */
850 //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
851 const NodeRef nodeRef = cur;
852 const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
853
854 vfloat<N> fmin;
855 size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
856
857 if (unlikely(!m_frustum_node)) goto pop;
858 cur = BVH::emptyNode;
859 m_active = 0;
860
861#if defined(__AVX__)
862 //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
863#endif
864 size_t num_child_hits = 0;
865 do {
866 const size_t i = bscf(m_frustum_node);
867 vfloat<K> lnearP;
868 vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
869 STAT3(normal.trav_nodes, 1, 1, 1);
870 BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
871
872 if (likely(any(lhit)))
873 {
874 const NodeRef child = node->child(i);
875 assert(child != BVH::emptyNode);
876 BVHN<N>::prefetch(child);
877 if (likely(cur != BVH::emptyNode)) {
878 num_child_hits++;
879 stackPtr->ptr = cur;
880 stackPtr->mask = m_active;
881 stackPtr++;
882 }
883 cur = child;
884 m_active = movemask(lhit);
885 }
886 } while(m_frustum_node);
887
888 if (unlikely(cur == BVH::emptyNode)) goto pop;
889 }
890
891 /* intersect leaf */
892 assert(cur != BVH::invalidNode);
893 assert(cur != BVH::emptyNode);
894#if defined(__AVX__)
895 STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
896#endif
897 if (unlikely(!m_active)) continue;
898 size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
899
900 size_t lazy_node = 0;
901 terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
902 octant_valid &= !terminated;
903 if (unlikely(none(octant_valid))) break;
904 tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
905
906 if (unlikely(lazy_node)) {
907 stackPtr->ptr = lazy_node;
908 stackPtr->mask = movemask(octant_valid);
909 stackPtr++;
910 }
911 }
912 } while(valid_bits);
913
914 vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
915 }
916 }
917}
918