1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "bvh_intersector_stream.h"
5
6#include "../geometry/intersector_iterators.h"
7#include "../geometry/triangle_intersector.h"
8#include "../geometry/trianglev_intersector.h"
9#include "../geometry/trianglev_mb_intersector.h"
10#include "../geometry/trianglei_intersector.h"
11#include "../geometry/quadv_intersector.h"
12#include "../geometry/quadi_intersector.h"
13#include "../geometry/linei_intersector.h"
14#include "../geometry/subdivpatch1_intersector.h"
15#include "../geometry/object_intersector.h"
16#include "../geometry/instance_intersector.h"
17
18#include "../common/scene.h"
19#include <bitset>
20
21namespace embree
22{
23 namespace isa
24 {
25 __aligned(64) static const int shiftTable[32] = {
26 (int)1 << 0, (int)1 << 1, (int)1 << 2, (int)1 << 3, (int)1 << 4, (int)1 << 5, (int)1 << 6, (int)1 << 7,
27 (int)1 << 8, (int)1 << 9, (int)1 << 10, (int)1 << 11, (int)1 << 12, (int)1 << 13, (int)1 << 14, (int)1 << 15,
28 (int)1 << 16, (int)1 << 17, (int)1 << 18, (int)1 << 19, (int)1 << 20, (int)1 << 21, (int)1 << 22, (int)1 << 23,
29 (int)1 << 24, (int)1 << 25, (int)1 << 26, (int)1 << 27, (int)1 << 28, (int)1 << 29, (int)1 << 30, (int)1 << 31
30 };
31
32 template<int N, int types, bool robust, typename PrimitiveIntersector>
33 __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersect(Accel::Intersectors* __restrict__ This,
34 RayHitN** inputPackets,
35 size_t numOctantRays,
36 IntersectContext* context)
37 {
38 /* we may traverse an empty BVH in case all geometry was invalid */
39 BVH* __restrict__ bvh = (BVH*) This->ptr;
40 if (bvh->root == BVH::emptyNode)
41 return;
42
43 // Only the coherent code path is implemented
44 assert(context->isCoherent());
45 intersectCoherent(This, (RayHitK<VSIZEL>**)inputPackets, numOctantRays, context);
46 }
47
48 template<int N, int types, bool robust, typename PrimitiveIntersector>
49 template<int K>
50 __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersectCoherent(Accel::Intersectors* __restrict__ This,
51 RayHitK<K>** inputPackets,
52 size_t numOctantRays,
53 IntersectContext* context)
54 {
55 assert(context->isCoherent());
56
57 BVH* __restrict__ bvh = (BVH*) This->ptr;
58 __aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
59 assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
60
61 __aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
62 __aligned(64) Frustum<robust> frustum;
63
64 bool commonOctant = true;
65 const size_t m_active = initPacketsAndFrustum((RayK<K>**)inputPackets, numOctantRays, packets, frustum, commonOctant);
66 if (unlikely(m_active == 0)) return;
67
68 /* case of non-common origin */
69 if (unlikely(!commonOctant))
70 {
71 const size_t numPackets = (numOctantRays+K-1)/K;
72 for (size_t i = 0; i < numPackets; i++)
73 This->intersect(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
74 return;
75 }
76
77 stack[0].mask = m_active;
78 stack[0].parent = 0;
79 stack[0].child = bvh->root;
80
81 ///////////////////////////////////////////////////////////////////////////////////
82 ///////////////////////////////////////////////////////////////////////////////////
83 ///////////////////////////////////////////////////////////////////////////////////
84
85 StackItemMaskCoherent* stackPtr = stack + 1;
86
87 while (1) pop:
88 {
89 if (unlikely(stackPtr == stack)) break;
90
91 STAT3(normal.trav_stack_pop,1,1,1);
92 stackPtr--;
93 /*! pop next node */
94 NodeRef cur = NodeRef(stackPtr->child);
95 size_t m_trav_active = stackPtr->mask;
96 assert(m_trav_active);
97 NodeRef parent = stackPtr->parent;
98
99 while (1)
100 {
101 if (unlikely(cur.isLeaf())) break;
102 const AABBNode* __restrict__ const node = cur.getAABBNode();
103 parent = cur;
104
105 __aligned(64) size_t maskK[N];
106 for (size_t i = 0; i < N; i++)
107 maskK[i] = m_trav_active;
108 vfloat<N> dist;
109 const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
110 if (unlikely(m_node_hit == 0)) goto pop;
111
112 BVHNNodeTraverserStreamHitCoherent<N, types>::traverseClosestHit(cur, m_trav_active, vbool<N>((int)m_node_hit), dist, (size_t*)maskK, stackPtr);
113 assert(m_trav_active);
114 }
115
116 /* non-root and leaf => full culling test for all rays */
117 if (unlikely(parent != 0 && cur.isLeaf()))
118 {
119 const AABBNode* __restrict__ const node = parent.getAABBNode();
120 size_t boxID = 0xff;
121 for (size_t i = 0; i < N; i++)
122 if (node->child(i) == cur) { boxID = i; break; }
123 assert(boxID < N);
124 assert(cur == node->child(boxID));
125 m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
126 }
127
128 /*! this is a leaf node */
129 assert(cur != BVH::emptyNode);
130 STAT3(normal.trav_leaves, 1, 1, 1);
131 size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
132
133 size_t bits = m_trav_active;
134
135 /*! intersect stream of rays with all primitives */
136 size_t lazy_node = 0;
137#if defined(__SSE4_2__)
138 STAT_USER(1,(popcnt(bits)+K-1)/K*4);
139#endif
140 while(bits)
141 {
142 size_t i = bsf(bits) / K;
143 const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
144 assert(m_isec & bits);
145 bits &= ~m_isec;
146
147 TravRayKStream<K, robust>& p = packets[i];
148 vbool<K> m_valid = p.tnear <= p.tfar;
149 PrimitiveIntersectorK<K>::intersectK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
150 p.tfar = min(p.tfar, inputPackets[i]->tfar);
151 };
152
153 } // traversal + intersection
154 }
155
156 template<int N, int types, bool robust, typename PrimitiveIntersector>
157 __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occluded(Accel::Intersectors* __restrict__ This,
158 RayN** inputPackets,
159 size_t numOctantRays,
160 IntersectContext* context)
161 {
162 /* we may traverse an empty BVH in case all geometry was invalid */
163 BVH* __restrict__ bvh = (BVH*) This->ptr;
164 if (bvh->root == BVH::emptyNode)
165 return;
166
167 if (unlikely(context->isCoherent()))
168 occludedCoherent(This, (RayK<VSIZEL>**)inputPackets, numOctantRays, context);
169 else
170 occludedIncoherent(This, (RayK<VSIZEX>**)inputPackets, numOctantRays, context);
171 }
172
173 template<int N, int types, bool robust, typename PrimitiveIntersector>
174 template<int K>
175 __noinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedCoherent(Accel::Intersectors* __restrict__ This,
176 RayK<K>** inputPackets,
177 size_t numOctantRays,
178 IntersectContext* context)
179 {
180 assert(context->isCoherent());
181
182 BVH* __restrict__ bvh = (BVH*)This->ptr;
183 __aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
184 assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
185
186 /* inactive rays should have been filtered out before */
187 __aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
188 __aligned(64) Frustum<robust> frustum;
189
190 bool commonOctant = true;
191 size_t m_active = initPacketsAndFrustum(inputPackets, numOctantRays, packets, frustum, commonOctant);
192
193 /* valid rays */
194 if (unlikely(m_active == 0)) return;
195
196 /* case of non-common origin */
197 if (unlikely(!commonOctant))
198 {
199 const size_t numPackets = (numOctantRays+K-1)/K;
200 for (size_t i = 0; i < numPackets; i++)
201 This->occluded(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
202 return;
203 }
204
205 stack[0].mask = m_active;
206 stack[0].parent = 0;
207 stack[0].child = bvh->root;
208
209 ///////////////////////////////////////////////////////////////////////////////////
210 ///////////////////////////////////////////////////////////////////////////////////
211 ///////////////////////////////////////////////////////////////////////////////////
212
213 StackItemMaskCoherent* stackPtr = stack + 1;
214
215 while (1) pop:
216 {
217 if (unlikely(stackPtr == stack)) break;
218
219 STAT3(normal.trav_stack_pop,1,1,1);
220 stackPtr--;
221 /*! pop next node */
222 NodeRef cur = NodeRef(stackPtr->child);
223 size_t m_trav_active = stackPtr->mask & m_active;
224 if (unlikely(!m_trav_active)) continue;
225 assert(m_trav_active);
226 NodeRef parent = stackPtr->parent;
227
228 while (1)
229 {
230 if (unlikely(cur.isLeaf())) break;
231 const AABBNode* __restrict__ const node = cur.getAABBNode();
232 parent = cur;
233
234 __aligned(64) size_t maskK[N];
235 for (size_t i = 0; i < N; i++)
236 maskK[i] = m_trav_active;
237
238 vfloat<N> dist;
239 const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
240 if (unlikely(m_node_hit == 0)) goto pop;
241
242 BVHNNodeTraverserStreamHitCoherent<N, types>::traverseAnyHit(cur, m_trav_active, vbool<N>((int)m_node_hit), (size_t*)maskK, stackPtr);
243 assert(m_trav_active);
244 }
245
246 /* non-root and leaf => full culling test for all rays */
247 if (unlikely(parent != 0 && cur.isLeaf()))
248 {
249 const AABBNode* __restrict__ const node = parent.getAABBNode();
250 size_t boxID = 0xff;
251 for (size_t i = 0; i < N; i++)
252 if (node->child(i) == cur) { boxID = i; break; }
253 assert(boxID < N);
254 assert(cur == node->child(boxID));
255 m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
256 }
257
258 /*! this is a leaf node */
259 assert(cur != BVH::emptyNode);
260 STAT3(normal.trav_leaves, 1, 1, 1);
261 size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
262
263 size_t bits = m_trav_active & m_active;
264 /*! intersect stream of rays with all primitives */
265 size_t lazy_node = 0;
266#if defined(__SSE4_2__)
267 STAT_USER(1,(popcnt(bits)+K-1)/K*4);
268#endif
269 while (bits)
270 {
271 size_t i = bsf(bits) / K;
272 const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
273 assert(m_isec & bits);
274 bits &= ~m_isec;
275 TravRayKStream<K, robust>& p = packets[i];
276 vbool<K> m_valid = p.tnear <= p.tfar;
277 vbool<K> m_hit = PrimitiveIntersectorK<K>::occludedK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
278 inputPackets[i]->tfar = select(m_hit & m_valid, vfloat<K>(neg_inf), inputPackets[i]->tfar);
279 m_active &= ~((size_t)movemask(m_hit) << (i*K));
280 }
281
282 } // traversal + intersection
283 }
284
285
286 template<int N, int types, bool robust, typename PrimitiveIntersector>
287 template<int K>
288 __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedIncoherent(Accel::Intersectors* __restrict__ This,
289 RayK<K>** inputPackets,
290 size_t numOctantRays,
291 IntersectContext* context)
292 {
293 assert(!context->isCoherent());
294 assert(types & BVH_FLAG_ALIGNED_NODE);
295
296 __aligned(64) TravRayKStream<K,robust> packet[MAX_INTERNAL_STREAM_SIZE/K];
297
298 assert(numOctantRays <= 32);
299 const size_t numPackets = (numOctantRays+K-1)/K;
300 size_t m_active = 0;
301 for (size_t i = 0; i < numPackets; i++)
302 {
303 const vfloat<K> tnear = inputPackets[i]->tnear();
304 const vfloat<K> tfar = inputPackets[i]->tfar;
305 vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
306 m_active |= (size_t)movemask(m_valid) << (K*i);
307 const Vec3vf<K>& org = inputPackets[i]->org;
308 const Vec3vf<K>& dir = inputPackets[i]->dir;
309 vfloat<K> packet_min_dist = max(tnear, 0.0f);
310 vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
311 new (&packet[i]) TravRayKStream<K,robust>(org, dir, packet_min_dist, packet_max_dist);
312 }
313
314 BVH* __restrict__ bvh = (BVH*)This->ptr;
315
316 StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
317 StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
318 stack[0].ptr = bvh->root;
319 stack[0].mask = m_active;
320
321 size_t terminated = ~m_active;
322
323 /* near/far offsets based on first ray */
324 const NearFarPrecalculations nf(Vec3fa(packet[0].rdir.x[0], packet[0].rdir.y[0], packet[0].rdir.z[0]), N);
325
326 while (1) pop:
327 {
328 if (unlikely(stackPtr == stack)) break;
329 STAT3(shadow.trav_stack_pop,1,1,1);
330 stackPtr--;
331 NodeRef cur = NodeRef(stackPtr->ptr);
332 size_t cur_mask = stackPtr->mask & (~terminated);
333 if (unlikely(cur_mask == 0)) continue;
334
335 while (true)
336 {
337 /*! stop if we found a leaf node */
338 if (unlikely(cur.isLeaf())) break;
339 const AABBNode* __restrict__ const node = cur.getAABBNode();
340
341 const vint<N> vmask = traverseIncoherentStream(cur_mask, packet, node, nf, shiftTable);
342
343 size_t mask = movemask(vmask != vint<N>(zero));
344 if (unlikely(mask == 0)) goto pop;
345
346 __aligned(64) unsigned int child_mask[N];
347 vint<N>::storeu(child_mask, vmask); // this explicit store here causes much better code generation
348
349 /*! one child is hit, continue with that child */
350 size_t r = bscf(mask);
351 assert(r < N);
352 cur = node->child(r);
353 BVHN<N>::prefetch(cur,types);
354 cur_mask = child_mask[r];
355
356 /* simple in order sequence */
357 assert(cur != BVH::emptyNode);
358 if (likely(mask == 0)) continue;
359 stackPtr->ptr = cur;
360 stackPtr->mask = cur_mask;
361 stackPtr++;
362
363 for (; ;)
364 {
365 r = bscf(mask);
366 assert(r < N);
367
368 cur = node->child(r);
369 BVHN<N>::prefetch(cur,types);
370 cur_mask = child_mask[r];
371 assert(cur != BVH::emptyNode);
372 if (likely(mask == 0)) break;
373 stackPtr->ptr = cur;
374 stackPtr->mask = cur_mask;
375 stackPtr++;
376 }
377 }
378
379 /*! this is a leaf node */
380 assert(cur != BVH::emptyNode);
381 STAT3(shadow.trav_leaves,1,1,1);
382 size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
383
384 size_t bits = cur_mask;
385 size_t lazy_node = 0;
386
387 for (; bits != 0;)
388 {
389 const size_t rayID = bscf(bits);
390
391 RayK<K> &ray = *inputPackets[rayID / K];
392 const size_t k = rayID % K;
393 if (PrimitiveIntersectorK<K>::occluded(This, ray, k, context, prim, num, lazy_node))
394 {
395 ray.tfar[k] = neg_inf;
396 terminated |= (size_t)1 << rayID;
397 }
398
399 /* lazy node */
400 if (unlikely(lazy_node))
401 {
402 stackPtr->ptr = lazy_node;
403 stackPtr->mask = cur_mask;
404 stackPtr++;
405 }
406 }
407
408 if (unlikely(terminated == (size_t)-1)) break;
409 }
410 }
411
412 ////////////////////////////////////////////////////////////////////////////////
413 /// ArrayIntersectorKStream Definitions
414 ////////////////////////////////////////////////////////////////////////////////
415
416 template<bool filter>
417 struct Triangle4IntersectorStreamMoeller {
418 template<int K> using Type = ArrayIntersectorKStream<K,TriangleMIntersectorKMoeller<4 COMMA K COMMA true>>;
419 };
420
421 template<bool filter>
422 struct Triangle4vIntersectorStreamPluecker {
423 template<int K> using Type = ArrayIntersectorKStream<K,TriangleMvIntersectorKPluecker<4 COMMA K COMMA true>>;
424 };
425
426 template<bool filter>
427 struct Triangle4iIntersectorStreamMoeller {
428 template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKMoeller<4 COMMA K COMMA true>>;
429 };
430
431 template<bool filter>
432 struct Triangle4iIntersectorStreamPluecker {
433 template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKPluecker<4 COMMA K COMMA true>>;
434 };
435
436 template<bool filter>
437 struct Quad4vIntersectorStreamMoeller {
438 template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKMoeller<4 COMMA K COMMA true>>;
439 };
440
441 template<bool filter>
442 struct Quad4iIntersectorStreamMoeller {
443 template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKMoeller<4 COMMA K COMMA true>>;
444 };
445
446 template<bool filter>
447 struct Quad4vIntersectorStreamPluecker {
448 template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKPluecker<4 COMMA K COMMA true>>;
449 };
450
451 template<bool filter>
452 struct Quad4iIntersectorStreamPluecker {
453 template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKPluecker<4 COMMA K COMMA true>>;
454 };
455
456 struct ObjectIntersectorStream {
457 template<int K> using Type = ArrayIntersectorKStream<K,ObjectIntersectorK<K COMMA false>>;
458 };
459
460 struct InstanceIntersectorStream {
461 template<int K> using Type = ArrayIntersectorKStream<K,InstanceIntersectorK<K>>;
462 };
463
464 // =====================================================================================================
465 // =====================================================================================================
466 // =====================================================================================================
467
468 template<int N>
469 void BVHNIntersectorStreamPacketFallback<N>::intersect(Accel::Intersectors* __restrict__ This,
470 RayHitN** inputRays,
471 size_t numTotalRays,
472 IntersectContext* context)
473 {
474 if (unlikely(context->isCoherent()))
475 intersectK(This, (RayHitK<VSIZEL>**)inputRays, numTotalRays, context);
476 else
477 intersectK(This, (RayHitK<VSIZEX>**)inputRays, numTotalRays, context);
478 }
479
480 template<int N>
481 void BVHNIntersectorStreamPacketFallback<N>::occluded(Accel::Intersectors* __restrict__ This,
482 RayN** inputRays,
483 size_t numTotalRays,
484 IntersectContext* context)
485 {
486 if (unlikely(context->isCoherent()))
487 occludedK(This, (RayK<VSIZEL>**)inputRays, numTotalRays, context);
488 else
489 occludedK(This, (RayK<VSIZEX>**)inputRays, numTotalRays, context);
490 }
491
492 template<int N>
493 template<int K>
494 __noinline void BVHNIntersectorStreamPacketFallback<N>::intersectK(Accel::Intersectors* __restrict__ This,
495 RayHitK<K>** inputRays,
496 size_t numTotalRays,
497 IntersectContext* context)
498 {
499 /* fallback to packets */
500 for (size_t i = 0; i < numTotalRays; i += K)
501 {
502 const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
503 vbool<K> valid = vi < vint<K>(int(numTotalRays));
504 RayHitK<K>& ray = *(inputRays[i / K]);
505 valid &= ray.tnear() <= ray.tfar;
506 This->intersect(valid, ray, context);
507 }
508 }
509
510 template<int N>
511 template<int K>
512 __noinline void BVHNIntersectorStreamPacketFallback<N>::occludedK(Accel::Intersectors* __restrict__ This,
513 RayK<K>** inputRays,
514 size_t numTotalRays,
515 IntersectContext* context)
516 {
517 /* fallback to packets */
518 for (size_t i = 0; i < numTotalRays; i += K)
519 {
520 const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
521 vbool<K> valid = vi < vint<K>(int(numTotalRays));
522 RayK<K>& ray = *(inputRays[i / K]);
523 valid &= ray.tnear() <= ray.tfar;
524 This->occluded(valid, ray, context);
525 }
526 }
527 }
528}
529