1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "node_intersector.h"
7
8namespace embree
9{
10 namespace isa
11 {
12 //////////////////////////////////////////////////////////////////////////////////////
13 // Ray packet structure used in hybrid traversal
14 //////////////////////////////////////////////////////////////////////////////////////
15
16 template<int K, bool robust>
17 struct TravRayK;
18
19 /* Fast variant */
20 template<int K>
21 struct TravRayK<K, false>
22 {
23 __forceinline TravRayK() {}
24
25 __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
26 {
27 init(ray_org, ray_dir, N);
28 }
29
30 __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
31 {
32 init(ray_org, ray_dir, N);
33 tnear = ray_tnear;
34 tfar = ray_tfar;
35 }
36
37 __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
38 {
39 org = ray_org;
40 dir = ray_dir;
41 rdir = rcp_safe(ray_dir);
42#if defined(__aarch64__)
43 neg_org_rdir = -(org * rdir);
44#elif defined(__AVX2__)
45 org_rdir = org * rdir;
46#endif
47
48 if (N)
49 {
50 const int size = sizeof(float)*N;
51 nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
52 nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
53 nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
54 }
55 }
56
57 Vec3vf<K> org;
58 Vec3vf<K> dir;
59 Vec3vf<K> rdir;
60#if defined(__aarch64__)
61 Vec3vf<K> neg_org_rdir;
62#elif defined(__AVX2__)
63 Vec3vf<K> org_rdir;
64#endif
65 Vec3vi<K> nearXYZ;
66 vfloat<K> tnear;
67 vfloat<K> tfar;
68 };
69
70 template<int K>
71 using TravRayKFast = TravRayK<K, false>;
72
73 /* Robust variant */
74 template<int K>
75 struct TravRayK<K, true>
76 {
77 __forceinline TravRayK() {}
78
79 __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
80 {
81 init(ray_org, ray_dir, N);
82 }
83
84 __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
85 {
86 init(ray_org, ray_dir, N);
87 tnear = ray_tnear;
88 tfar = ray_tfar;
89 }
90
91 __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
92 {
93 org = ray_org;
94 dir = ray_dir;
95 rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
96
97 if (N)
98 {
99 const int size = sizeof(float)*N;
100 nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
101 nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
102 nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
103 }
104 }
105
106 Vec3vf<K> org;
107 Vec3vf<K> dir;
108 Vec3vf<K> rdir;
109 Vec3vi<K> nearXYZ;
110 vfloat<K> tnear;
111 vfloat<K> tfar;
112 };
113
114 template<int K>
115 using TravRayKRobust = TravRayK<K, true>;
116
117 //////////////////////////////////////////////////////////////////////////////////////
118 // Fast AABBNode intersection
119 //////////////////////////////////////////////////////////////////////////////////////
120
121 template<int N, int K>
122 __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
123 const TravRayKFast<K>& ray, vfloat<K>& dist)
124
125 {
126#if defined(__aarch64__)
127 const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
128 const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
129 const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
130 const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
131 const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
132 const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
133#elif defined(__AVX2__)
134 const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
135 const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
136 const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
137 const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
138 const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
139 const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
140 #else
141 const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
142 const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
143 const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
144 const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
145 const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
146 const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
147 #endif
148
149 #if defined(__AVX512F__) // SKX
150 if (K == 16)
151 {
152 /* use mixed float/int min/max */
153 const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
154 const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
155 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
156 dist = lnearP;
157 return lhit;
158 }
159 else
160 #endif
161 {
162 const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
163 const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
164 #if defined(__AVX512F__) // SKX
165 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
166 #else
167 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
168 #endif
169 dist = lnearP;
170 return lhit;
171 }
172 }
173
174 //////////////////////////////////////////////////////////////////////////////////////
175 // Robust AABBNode intersection
176 //////////////////////////////////////////////////////////////////////////////////////
177
178 template<int N, int K>
179 __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
180 const TravRayKRobust<K>& ray, vfloat<K>& dist)
181 {
182 // FIXME: use per instruction rounding for AVX512
183 const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
184 const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
185 const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
186 const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
187 const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
188 const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
189 const float round_up = 1.0f+3.0f*float(ulp);
190 const float round_down = 1.0f-3.0f*float(ulp);
191 const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
192 const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
193 const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
194 dist = lnearP;
195 return lhit;
196 }
197
198 //////////////////////////////////////////////////////////////////////////////////////
199 // Fast AABBNodeMB intersection
200 //////////////////////////////////////////////////////////////////////////////////////
201
202 template<int N, int K>
203 __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
204 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
205 {
206 const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
207 const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
208 const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
209 const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
210 const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
211 const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
212
213#if defined(__aarch64__)
214 const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
215 const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
216 const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
217 const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
218 const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
219 const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
220#elif defined(__AVX2__)
221 const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
222 const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
223 const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
224 const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
225 const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
226 const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
227#else
228 const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
229 const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
230 const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
231 const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
232 const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
233 const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
234#endif
235
236#if defined(__AVX512F__) // SKX
237 if (K == 16)
238 {
239 /* use mixed float/int min/max */
240 const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
241 const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
242 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
243 dist = lnearP;
244 return lhit;
245 }
246 else
247#endif
248 {
249 const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
250 const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
251#if defined(__AVX512F__) // SKX
252 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
253#else
254 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
255#endif
256 dist = lnearP;
257 return lhit;
258 }
259 }
260
261 //////////////////////////////////////////////////////////////////////////////////////
262 // Robust AABBNodeMB intersection
263 //////////////////////////////////////////////////////////////////////////////////////
264
265 template<int N, int K>
266 __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
267 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
268 {
269 const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
270 const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
271 const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
272 const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
273 const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
274 const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
275
276 const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
277 const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
278 const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
279 const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
280 const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
281 const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
282
283 const float round_up = 1.0f+3.0f*float(ulp);
284 const float round_down = 1.0f-3.0f*float(ulp);
285
286#if defined(__AVX512F__) // SKX
287 if (K == 16)
288 {
289 const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
290 const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
291 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
292 dist = lnearP;
293 return lhit;
294 }
295 else
296#endif
297 {
298 const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
299 const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
300 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
301 dist = lnearP;
302 return lhit;
303 }
304 }
305
306 //////////////////////////////////////////////////////////////////////////////////////
307 // Fast AABBNodeMB4D intersection
308 //////////////////////////////////////////////////////////////////////////////////////
309
310 template<int N, int K>
311 __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
312 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
313 {
314 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
315
316 const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
317 const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
318 const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
319 const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
320 const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
321 const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
322
323#if defined(__aarch64__)
324 const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
325 const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
326 const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
327 const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
328 const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
329 const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
330#elif defined(__AVX2__)
331 const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
332 const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
333 const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
334 const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
335 const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
336 const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
337#else
338 const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
339 const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
340 const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
341 const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
342 const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
343 const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
344#endif
345
346 const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
347 const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
348 vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
349 if (unlikely(ref.isAABBNodeMB4D())) {
350 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
351 lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
352 }
353 dist = lnearP;
354 return lhit;
355 }
356
357 //////////////////////////////////////////////////////////////////////////////////////
358 // Robust AABBNodeMB4D intersection
359 //////////////////////////////////////////////////////////////////////////////////////
360
361 template<int N, int K>
362 __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
363 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
364 {
365 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
366
367 const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
368 const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
369 const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
370 const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
371 const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
372 const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
373
374 const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
375 const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
376 const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
377 const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
378 const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
379 const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
380
381 const float round_up = 1.0f+3.0f*float(ulp);
382 const float round_down = 1.0f-3.0f*float(ulp);
383 const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
384 const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
385 vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
386
387 if (unlikely(ref.isAABBNodeMB4D())) {
388 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
389 lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
390 }
391 dist = lnearP;
392 return lhit;
393 }
394
395 //////////////////////////////////////////////////////////////////////////////////////
396 // Fast OBBNode intersection
397 //////////////////////////////////////////////////////////////////////////////////////
398
399 template<int N, int K, bool robust>
400 __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
401 const TravRayK<K,robust>& ray, vfloat<K>& dist)
402 {
403 const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
404 Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
405 Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
406 Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
407
408 const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
409 const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
410 const Vec3vf<K> org = xfmPoint(naabb, ray.org);
411
412 const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
413 const vfloat<K> lclipMinY = org.y * nrdir.y;
414 const vfloat<K> lclipMinZ = org.z * nrdir.z;
415 const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
416 const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
417 const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
418
419 vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
420 vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
421 if (robust) {
422 lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
423 lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
424 }
425 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
426 dist = lnearP;
427 return lhit;
428 }
429
430 //////////////////////////////////////////////////////////////////////////////////////
431 // Fast OBBNodeMB intersection
432 //////////////////////////////////////////////////////////////////////////////////////
433
434 template<int N, int K, bool robust>
435 __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
436 const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
437 {
438 const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
439 Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
440 Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
441 Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
442
443 const Vec3vf<K> b0_lower = zero;
444 const Vec3vf<K> b0_upper = one;
445 const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
446 const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
447 const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
448 const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
449
450 const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
451 const Vec3vf<K> rdir = rcp_safe(dir);
452 const Vec3vf<K> org = xfmPoint(xfm, ray.org);
453
454 const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
455 const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
456 const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
457 const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
458 const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
459 const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
460
461 vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
462 vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
463 if (robust) {
464 lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
465 lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
466 }
467
468 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
469 dist = lnearP;
470 return lhit;
471 }
472
473
474
475 //////////////////////////////////////////////////////////////////////////////////////
476 // QuantizedBaseNode intersection
477 //////////////////////////////////////////////////////////////////////////////////////
478
479 template<int N, int K>
480 __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
481 const TravRayK<K,false>& ray, vfloat<K>& dist)
482
483 {
484 assert(movemask(node->validMask()) & ((size_t)1 << i));
485 const vfloat<N> lower_x = node->dequantizeLowerX();
486 const vfloat<N> upper_x = node->dequantizeUpperX();
487 const vfloat<N> lower_y = node->dequantizeLowerY();
488 const vfloat<N> upper_y = node->dequantizeUpperY();
489 const vfloat<N> lower_z = node->dequantizeLowerZ();
490 const vfloat<N> upper_z = node->dequantizeUpperZ();
491
492 #if defined(__aarch64__)
493 const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
494 const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
495 const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
496 const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
497 const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
498 const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
499 #elif defined(__AVX2__)
500 const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
501 const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
502 const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
503 const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
504 const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
505 const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
506 #else
507 const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
508 const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
509 const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
510 const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
511 const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
512 const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
513 #endif
514
515 #if defined(__AVX512F__) // SKX
516 if (K == 16)
517 {
518 /* use mixed float/int min/max */
519 const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
520 const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
521 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
522 dist = lnearP;
523 return lhit;
524 }
525 else
526 #endif
527 {
528 const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
529 const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
530 #if defined(__AVX512F__) // SKX
531 const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
532 #else
533 const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
534 #endif
535 dist = lnearP;
536 return lhit;
537 }
538 }
539
540 template<int N, int K>
541 __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
542 const TravRayK<K,true>& ray, vfloat<K>& dist)
543
544 {
545 assert(movemask(node->validMask()) & ((size_t)1 << i));
546 const vfloat<N> lower_x = node->dequantizeLowerX();
547 const vfloat<N> upper_x = node->dequantizeUpperX();
548 const vfloat<N> lower_y = node->dequantizeLowerY();
549 const vfloat<N> upper_y = node->dequantizeUpperY();
550 const vfloat<N> lower_z = node->dequantizeLowerZ();
551 const vfloat<N> upper_z = node->dequantizeUpperZ();
552
553 const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
554 const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
555 const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
556 const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
557 const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
558 const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
559
560 const float round_up = 1.0f+3.0f*float(ulp);
561 const float round_down = 1.0f-3.0f*float(ulp);
562
563 const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
564 const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
565 const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
566 dist = lnearP;
567 return lhit;
568 }
569
570 template<int N, int K>
571 __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
572 const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
573
574 {
575 assert(movemask(node->validMask()) & ((size_t)1 << i));
576
577 const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
578 const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
579 const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
580 const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
581 const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
582 const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
583
584#if defined(__aarch64__)
585 const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
586 const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
587 const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
588 const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
589 const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
590 const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
591#elif defined(__AVX2__)
592 const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
593 const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
594 const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
595 const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
596 const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
597 const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
598#else
599 const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
600 const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
601 const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
602 const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
603 const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
604 const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
605 #endif
606 const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
607 const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
608 const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
609 dist = lnearP;
610 return lhit;
611 }
612
613
614 template<int N, int K>
615 __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
616 const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
617
618 {
619 assert(movemask(node->validMask()) & ((size_t)1 << i));
620
621 const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
622 const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
623 const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
624 const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
625 const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
626 const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
627
628 const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
629 const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
630 const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
631 const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
632 const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
633 const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
634
635 const float round_up = 1.0f+3.0f*float(ulp);
636 const float round_down = 1.0f-3.0f*float(ulp);
637
638 const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
639 const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
640 const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
641 dist = lnearP;
642 return lhit;
643 }
644
645
646 //////////////////////////////////////////////////////////////////////////////////////
647 // Node intersectors used in hybrid traversal
648 //////////////////////////////////////////////////////////////////////////////////////
649
650 /*! Intersects N nodes with K rays */
651 template<int N, int K, int types, bool robust>
652 struct BVHNNodeIntersectorK;
653
654 template<int N, int K>
655 struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
656 {
657 /* vmask is both an input and an output parameter! Its initial value should be the parent node
658 hit mask, which is used for correctly computing the current hit mask. The parent hit mask
659 is actually required only for motion blur node intersections (because different rays may
660 have different times), so for regular nodes vmask is simply overwritten. */
661 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
662 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
663 {
664 vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
665 return true;
666 }
667 };
668
669 template<int N, int K>
670 struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
671 {
672 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
673 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
674 {
675 vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
676 return true;
677 }
678 };
679
680 template<int N, int K>
681 struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
682 {
683 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
684 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
685 {
686 vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
687 return true;
688 }
689 };
690
691 template<int N, int K>
692 struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
693 {
694 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
695 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
696 {
697 vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
698 return true;
699 }
700 };
701
702 template<int N, int K>
703 struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
704 {
705 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
706 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
707 {
708 if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
709 else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
710 return true;
711 }
712 };
713
714 template<int N, int K>
715 struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
716 {
717 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
718 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
719 {
720 if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
721 else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
722 return true;
723 }
724 };
725
726 template<int N, int K>
727 struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
728 {
729 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
730 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
731 {
732 if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
733 else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
734 return true;
735 }
736 };
737
738 template<int N, int K>
739 struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
740 {
741 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
742 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
743 {
744 if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
745 else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
746 return true;
747 }
748 };
749
750 template<int N, int K>
751 struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
752 {
753 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
754 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
755 {
756 vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
757 return true;
758 }
759 };
760
761 template<int N, int K>
762 struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
763 {
764 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
765 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
766 {
767 vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
768 return true;
769 }
770 };
771
772 template<int N, int K>
773 struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
774 {
775 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
776 const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
777 {
778 if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
779 vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
780 } else /*if (unlikely(node.isOBBNodeMB()))*/ {
781 assert(node.isOBBNodeMB());
782 vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
783 }
784 return true;
785 }
786 };
787
788 template<int N, int K>
789 struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
790 {
791 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
792 const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
793 {
794 if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
795 vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
796 } else /*if (unlikely(node.isOBBNodeMB()))*/ {
797 assert(node.isOBBNodeMB());
798 vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
799 }
800 return true;
801 }
802 };
803
804
805 /*! Intersects N nodes with K rays */
806 template<int N, int K, bool robust>
807 struct BVHNQuantizedBaseNodeIntersectorK;
808
809 template<int N, int K>
810 struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
811 {
812 static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
813 const TravRayK<K,false>& ray, vfloat<K>& dist)
814 {
815 return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
816 }
817
818 static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
819 const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
820 {
821 return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
822 }
823
824 };
825
826 template<int N, int K>
827 struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
828 {
829 static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
830 const TravRayK<K,true>& ray, vfloat<K>& dist)
831 {
832 return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
833 }
834
835 static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
836 const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
837 {
838 return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
839 }
840 };
841
842
843 }
844}
845