1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "node_intersector.h"
7
8#if defined(__AVX2__)
9#define __FMA_X4__
10#endif
11
12#if defined(__aarch64__)
13#define __FMA_X4__
14#endif
15
16
17namespace embree
18{
19 namespace isa
20 {
21 //////////////////////////////////////////////////////////////////////////////////////
22 // Ray structure used in single-ray traversal
23 //////////////////////////////////////////////////////////////////////////////////////
24
25 template<int N, bool robust>
26 struct TravRayBase;
27
28 /* Base (without tnear and tfar) */
29 template<int N>
30 struct TravRayBase<N,false>
31 {
32 __forceinline TravRayBase() {}
33
34 __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
35 : org_xyz(ray_org), dir_xyz(ray_dir)
36 {
37 const Vec3fa ray_rdir = rcp_safe(ray_dir);
38 org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
39 dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
40 rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
41#if defined(__FMA_X4__)
42 const Vec3fa ray_org_rdir = ray_org*ray_rdir;
43#if !defined(__aarch64__)
44 org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
45#else
46 //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd
47 //x86 will use msub
48 neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z);
49#endif
50#endif
51 nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
52 nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
53 nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
54 farX = nearX ^ sizeof(vfloat<N>);
55 farY = nearY ^ sizeof(vfloat<N>);
56 farZ = nearZ ^ sizeof(vfloat<N>);
57 }
58
59 template<int K>
60 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
61 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
62 size_t flip = sizeof(vfloat<N>))
63 {
64 org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
65 dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
66 rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
67#if defined(__FMA_X4__)
68#if !defined(__aarch64__)
69 org_rdir = org*rdir;
70#else
71 neg_org_rdir = -(org*rdir);
72#endif
73#endif
74 nearX = nearXYZ.x[k];
75 nearY = nearXYZ.y[k];
76 nearZ = nearXYZ.z[k];
77 farX = nearX ^ flip;
78 farY = nearY ^ flip;
79 farZ = nearZ ^ flip;
80 }
81
82 Vec3fa org_xyz, dir_xyz;
83 Vec3vf<N> org, dir, rdir;
84#if defined(__FMA_X4__)
85#if !defined(__aarch64__)
86 Vec3vf<N> org_rdir;
87#else
88 //aarch64 version are keeping negation of the org_rdir and use madd
89 //x86 uses msub
90 Vec3vf<N> neg_org_rdir;
91#endif
92#endif
93 size_t nearX, nearY, nearZ;
94 size_t farX, farY, farZ;
95 };
96
97 /* Base (without tnear and tfar) */
98 template<int N>
99 struct TravRayBase<N,true>
100 {
101 __forceinline TravRayBase() {}
102
103 __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
104 : org_xyz(ray_org), dir_xyz(ray_dir)
105 {
106 const float round_down = 1.0f-3.0f*float(ulp);
107 const float round_up = 1.0f+3.0f*float(ulp);
108 const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir);
109 const Vec3fa ray_rdir_near = round_down*ray_rdir;
110 const Vec3fa ray_rdir_far = round_up *ray_rdir;
111 org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
112 dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
113 rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
114 rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
115
116 nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
117 nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
118 nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
119 farX = nearX ^ sizeof(vfloat<N>);
120 farY = nearY ^ sizeof(vfloat<N>);
121 farZ = nearZ ^ sizeof(vfloat<N>);
122 }
123
124 template<int K>
125 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
126 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
127 size_t flip = sizeof(vfloat<N>))
128 {
129 const vfloat<N> round_down = 1.0f-3.0f*float(ulp);
130 const vfloat<N> round_up = 1.0f+3.0f*float(ulp);
131 org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
132 dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
133 rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
134 rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
135
136 nearX = nearXYZ.x[k];
137 nearY = nearXYZ.y[k];
138 nearZ = nearXYZ.z[k];
139 farX = nearX ^ flip;
140 farY = nearY ^ flip;
141 farZ = nearZ ^ flip;
142 }
143
144 Vec3fa org_xyz, dir_xyz;
145 Vec3vf<N> org, dir, rdir_near, rdir_far;
146 size_t nearX, nearY, nearZ;
147 size_t farX, farY, farZ;
148 };
149
150 /* Full (with tnear and tfar) */
151 template<int N, bool robust>
152 struct TravRay : TravRayBase<N,robust>
153 {
154 __forceinline TravRay() {}
155
156 __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
157 : TravRayBase<N,robust>(ray_org, ray_dir),
158 tnear(ray_tnear), tfar(ray_tfar) {}
159
160 template<int K>
161 __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
162 const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
163 float ray_tnear, float ray_tfar,
164 size_t flip = sizeof(vfloat<N>))
165 {
166 TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip);
167 tnear = ray_tnear; tfar = ray_tfar;
168 }
169
170 vfloat<N> tnear;
171 vfloat<N> tfar;
172 };
173
174 //////////////////////////////////////////////////////////////////////////////////////
175 // Point Query structure used in single-ray traversal
176 //////////////////////////////////////////////////////////////////////////////////////
177
178 template<int N>
179 struct TravPointQuery
180 {
181 __forceinline TravPointQuery() {}
182
183 __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
184 {
185 org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
186 rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
187 }
188
189 __forceinline vfloat<N> const& tfar() const {
190 return rad.x;
191 }
192
193 Vec3vf<N> org, rad;
194 };
195
196 //////////////////////////////////////////////////////////////////////////////////////
197 // point query
198 //////////////////////////////////////////////////////////////////////////////////////
199
200 template<int N>
201 __forceinline size_t pointQuerySphereDistAndMask(
202 const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
203 vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
204 {
205 const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
206 const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
207 const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
208 dist = vX * vX + vY * vY + vZ * vZ;
209 const vbool<N> vmask = dist <= query.tfar()*query.tfar();
210 const vbool<N> valid = minX <= maxX;
211 return movemask(vmask) & movemask(valid);
212 }
213
214 template<int N>
215 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
216 {
217 const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
218 const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
219 const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
220 const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
221 const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
222 const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
223 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
224 }
225
226 template<int N>
227 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
228 {
229 const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
230 const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
231 const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
232 const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
233 const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
234 const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
235 const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
236 const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
237 const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
238 const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
239 const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
240 const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
241 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
242 }
243
244 template<int N>
245 __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
246 {
247 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
248 size_t mask = pointQueryNodeSphere(node, query, time, dist);
249
250 if (unlikely(ref.isAABBNodeMB4D())) {
251 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
252 const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
253 mask &= movemask(vmask);
254 }
255
256 return mask;
257 }
258
259 template<int N>
260 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
261 {
262 const vfloat<N> start_x(node->start.x);
263 const vfloat<N> scale_x(node->scale.x);
264 const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
265 const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
266 const vfloat<N> start_y(node->start.y);
267 const vfloat<N> scale_y(node->scale.y);
268 const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
269 const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
270 const vfloat<N> start_z(node->start.z);
271 const vfloat<N> scale_z(node->scale.z);
272 const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
273 const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
274 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
275 }
276
277 template<int N>
278 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
279 {
280 const vfloat<N> minX = node->dequantizeLowerX(time);
281 const vfloat<N> maxX = node->dequantizeUpperX(time);
282 const vfloat<N> minY = node->dequantizeLowerY(time);
283 const vfloat<N> maxY = node->dequantizeUpperY(time);
284 const vfloat<N> minZ = node->dequantizeLowerZ(time);
285 const vfloat<N> maxZ = node->dequantizeUpperZ(time);
286 return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
287 }
288
289 template<int N>
290 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
291 {
292 // TODO: point query - implement
293 const vbool<N> vmask = vbool<N>(true);
294 const size_t mask = movemask(vmask) & ((1<<N)-1);
295 dist = vfloat<N>(0.0f);
296 return mask;
297 }
298
299 template<int N>
300 __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
301 {
302 // TODO: point query - implement
303 const vbool<N> vmask = vbool<N>(true);
304 const size_t mask = movemask(vmask) & ((1<<N)-1);
305 dist = vfloat<N>(0.0f);
306 return mask;
307 }
308
309 template<int N>
310 __forceinline size_t pointQueryAABBDistAndMask(
311 const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
312 vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
313 {
314 const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
315 const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
316 const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
317 dist = vX * vX + vY * vY + vZ * vZ;
318 const vbool<N> valid = minX <= maxX;
319 const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
320 (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
321 (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
322 return movemask(vmask) & movemask(valid);
323 }
324
325 template<int N>
326 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
327 {
328 const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
329 const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
330 const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
331 const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
332 const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
333 const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
334 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
335 }
336
337 template<int N>
338 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
339 {
340 const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
341 const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
342 const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
343 const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
344 const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
345 const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
346 const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
347 const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
348 const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
349 const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
350 const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
351 const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
352 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
353 }
354
355 template<int N>
356 __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
357 {
358 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
359 size_t mask = pointQueryNodeAABB(node, query, time, dist);
360
361 if (unlikely(ref.isAABBNodeMB4D())) {
362 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
363 const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
364 mask &= movemask(vmask);
365 }
366
367 return mask;
368 }
369
370 template<int N>
371 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
372 {
373 const size_t mvalid = movemask(node->validMask());
374 const vfloat<N> start_x(node->start.x);
375 const vfloat<N> scale_x(node->scale.x);
376 const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
377 const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
378 const vfloat<N> start_y(node->start.y);
379 const vfloat<N> scale_y(node->scale.y);
380 const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
381 const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
382 const vfloat<N> start_z(node->start.z);
383 const vfloat<N> scale_z(node->scale.z);
384 const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
385 const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
386 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
387 }
388
389 template<int N>
390 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
391 {
392 const size_t mvalid = movemask(node->validMask());
393 const vfloat<N> minX = node->dequantizeLowerX(time);
394 const vfloat<N> maxX = node->dequantizeUpperX(time);
395 const vfloat<N> minY = node->dequantizeLowerY(time);
396 const vfloat<N> maxY = node->dequantizeUpperY(time);
397 const vfloat<N> minZ = node->dequantizeLowerZ(time);
398 const vfloat<N> maxZ = node->dequantizeUpperZ(time);
399 return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
400 }
401
402 template<int N>
403 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
404 {
405 // TODO: point query - implement
406 const vbool<N> vmask = vbool<N>(true);
407 const size_t mask = movemask(vmask) & ((1<<N)-1);
408 dist = vfloat<N>(0.0f);
409 return mask;
410 }
411
412 template<int N>
413 __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
414 {
415 // TODO: point query - implement
416 const vbool<N> vmask = vbool<N>(true);
417 const size_t mask = movemask(vmask) & ((1<<N)-1);
418 dist = vfloat<N>(0.0f);
419 return mask;
420 }
421
422 //////////////////////////////////////////////////////////////////////////////////////
423 // Fast AABBNode intersection
424 //////////////////////////////////////////////////////////////////////////////////////
425
426 template<int N, bool robust>
427 __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
428
429 template<>
430 __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist)
431 {
432#if defined(__FMA_X4__)
433#if defined(__aarch64__)
434 const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
435 const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
436 const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
437 const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
438 const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
439 const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
440#else
441 const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
442 const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
443 const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
444 const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
445 const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
446 const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
447#endif
448#else
449 const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
450 const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
451 const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
452 const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
453 const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
454 const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
455#endif
456
457#if defined(__aarch64__)
458 const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
459 const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
460 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
461 const size_t mask = movemask(vmask);
462#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
463 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
464 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
465 const vbool4 vmask = asInt(tNear) > asInt(tFar);
466 const size_t mask = movemask(vmask) ^ ((1<<4)-1);
467#elif defined(__AVX512F__) // SKX
468 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
469 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
470 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
471 const size_t mask = movemask(vmask);
472#else
473 const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
474 const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
475 const vbool4 vmask = tNear <= tFar;
476 const size_t mask = movemask(vmask);
477#endif
478 dist = tNear;
479 return mask;
480 }
481
482#if defined(__AVX__)
483
484 template<>
485 __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist)
486 {
487#if defined(__AVX2__)
488#if defined(__aarch64__)
489 const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
490 const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
491 const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
492 const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
493 const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
494 const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
495#else
496 const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
497 const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
498 const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
499 const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
500 const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
501 const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
502#endif
503
504#else
505 const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
506 const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
507 const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
508 const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
509 const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
510 const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
511#endif
512
513#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
514 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
515 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
516 const vbool8 vmask = asInt(tNear) > asInt(tFar);
517 const size_t mask = movemask(vmask) ^ ((1<<8)-1);
518#elif defined(__AVX512F__) // SKX
519 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
520 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
521 const vbool8 vmask = asInt(tNear) <= asInt(tFar);
522 const size_t mask = movemask(vmask);
523#else
524 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
525 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
526 const vbool8 vmask = tNear <= tFar;
527 const size_t mask = movemask(vmask);
528#endif
529 dist = tNear;
530 return mask;
531 }
532
533#endif
534
535 //////////////////////////////////////////////////////////////////////////////////////
536 // Robust AABBNode intersection
537 //////////////////////////////////////////////////////////////////////////////////////
538
539 template<int N>
540 __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
541 {
542 const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
543 const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
544 const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
545 const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
546 const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
547 const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
548 const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
549 const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
550 const vbool<N> vmask = tNear <= tFar;
551 const size_t mask = movemask(vmask);
552 dist = tNear;
553 return mask;
554 }
555
556 //////////////////////////////////////////////////////////////////////////////////////
557 // Fast AABBNodeMB intersection
558 //////////////////////////////////////////////////////////////////////////////////////
559
560 template<int N>
561 __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
562 {
563 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
564 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
565 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
566 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
567 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
568 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
569#if defined(__FMA_X4__)
570#if defined(__aarch64__)
571 const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
572 const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
573 const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
574 const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
575 const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
576 const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
577#else
578 const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
579 const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
580 const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
581 const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
582 const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
583 const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
584#endif
585#else
586 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
587 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
588 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
589 const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
590 const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
591 const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
592#endif
593#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW
594 const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
595 const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
596 const vbool<N> vmask = asInt(tNear) > asInt(tFar);
597 const size_t mask = movemask(vmask) ^ ((1<<N)-1);
598#elif defined(__AVX512F__) // SKX
599 const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
600 const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
601 const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
602 const size_t mask = movemask(vmask);
603#else
604 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
605 const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
606 const vbool<N> vmask = tNear <= tFar;
607 const size_t mask = movemask(vmask);
608#endif
609 dist = tNear;
610 return mask;
611 }
612
613 //////////////////////////////////////////////////////////////////////////////////////
614 // Robust AABBNodeMB intersection
615 //////////////////////////////////////////////////////////////////////////////////////
616
617 template<int N>
618 __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
619 {
620 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
621 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
622 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
623 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
624 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
625 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
626 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
627 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
628 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
629 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
630 const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
631 const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
632 const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
633 const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
634 const size_t mask = movemask(tNear <= tFar);
635 dist = tNear;
636 return mask;
637 }
638
639 //////////////////////////////////////////////////////////////////////////////////////
640 // Fast AABBNodeMB4D intersection
641 //////////////////////////////////////////////////////////////////////////////////////
642
643 template<int N>
644 __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
645 {
646 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
647
648 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
649 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
650 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
651 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
652 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
653 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
654#if defined (__FMA_X4__)
655#if defined(__aarch64__)
656 const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
657 const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
658 const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
659 const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
660 const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
661 const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
662#else
663 const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
664 const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
665 const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
666 const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
667 const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
668 const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
669#endif
670#else
671 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
672 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
673 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
674 const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
675 const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
676 const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
677#endif
678#if defined(__FMA_X4__) && !defined(__AVX512F__)
679 const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
680 const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
681#else
682 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
683 const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
684#endif
685 vbool<N> vmask = tNear <= tFar;
686 if (unlikely(ref.isAABBNodeMB4D())) {
687 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
688 vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
689 }
690 const size_t mask = movemask(vmask);
691 dist = tNear;
692 return mask;
693 }
694
695 //////////////////////////////////////////////////////////////////////////////////////
696 // Robust AABBNodeMB4D intersection
697 //////////////////////////////////////////////////////////////////////////////////////
698
699 template<int N>
700 __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
701 {
702 const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
703
704 const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
705 const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
706 const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
707 const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
708 const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
709 const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
710 const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
711 const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
712 const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
713 const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
714 const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
715 const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
716 const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
717 const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
718 vbool<N> vmask = tNear <= tFar;
719 if (unlikely(ref.isAABBNodeMB4D())) {
720 const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
721 vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
722 }
723 const size_t mask = movemask(vmask);
724 dist = tNear;
725 return mask;
726 }
727
728 //////////////////////////////////////////////////////////////////////////////////////
729 // Fast QuantizedBaseNode intersection
730 //////////////////////////////////////////////////////////////////////////////////////
731
732 template<int N, bool robust>
733 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
734
735 template<>
736 __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist)
737 {
738 const size_t mvalid = movemask(node->validMask());
739 const vfloat4 start_x(node->start.x);
740 const vfloat4 scale_x(node->scale.x);
741 const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
742 const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
743 const vfloat4 start_y(node->start.y);
744 const vfloat4 scale_y(node->scale.y);
745 const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
746 const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
747 const vfloat4 start_z(node->start.z);
748 const vfloat4 scale_z(node->scale.z);
749 const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
750 const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
751
752#if defined(__FMA_X4__)
753#if defined(__aarch64__)
754 const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
755 const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
756 const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
757 const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
758 const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
759 const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
760#else
761 const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
762 const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
763 const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
764 const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
765 const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
766 const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
767#endif
768#else
769 const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
770 const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
771 const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
772 const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
773 const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
774 const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
775#endif
776
777#if defined(__aarch64__) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
778 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
779 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
780 const vbool4 vmask = asInt(tNear) > asInt(tFar);
781 const size_t mask = movemask(vmask) ^ ((1<<4)-1);
782#elif defined(__AVX512F__) // SKX
783 const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
784 const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
785 const vbool4 vmask = asInt(tNear) <= asInt(tFar);
786 const size_t mask = movemask(vmask);
787#else
788 const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
789 const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
790 const vbool4 vmask = tNear <= tFar;
791 const size_t mask = movemask(vmask);
792#endif
793 dist = tNear;
794 return mask & mvalid;
795 }
796
797 template<>
798 __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist)
799 {
800 const size_t mvalid = movemask(node->validMask());
801 const vfloat4 start_x(node->start.x);
802 const vfloat4 scale_x(node->scale.x);
803 const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
804 const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
805 const vfloat4 start_y(node->start.y);
806 const vfloat4 scale_y(node->scale.y);
807 const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
808 const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
809 const vfloat4 start_z(node->start.z);
810 const vfloat4 scale_z(node->scale.z);
811 const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
812 const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
813
814 const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
815 const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
816 const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
817 const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
818 const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
819 const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
820
821 const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
822 const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
823 const vbool4 vmask = tNear <= tFar;
824 const size_t mask = movemask(vmask);
825 dist = tNear;
826 return mask & mvalid;
827 }
828
829
830#if defined(__AVX__)
831
832 template<>
833 __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist)
834 {
835 const size_t mvalid = movemask(node->validMask());
836 const vfloat8 start_x(node->start.x);
837 const vfloat8 scale_x(node->scale.x);
838 const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
839 const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
840 const vfloat8 start_y(node->start.y);
841 const vfloat8 scale_y(node->scale.y);
842 const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
843 const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
844 const vfloat8 start_z(node->start.z);
845 const vfloat8 scale_z(node->scale.z);
846 const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
847 const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
848
849#if defined(__AVX2__)
850#if defined(__aarch64__)
851 const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
852 const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
853 const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
854 const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
855 const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
856 const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
857#else
858 const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
859 const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
860 const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
861 const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
862 const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
863 const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
864#endif
865#else
866 const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
867 const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
868 const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
869 const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
870 const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
871 const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
872#endif
873
874#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
875 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
876 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
877 const vbool8 vmask = asInt(tNear) > asInt(tFar);
878 const size_t mask = movemask(vmask) ^ ((1<<8)-1);
879#elif defined(__AVX512F__) // SKX
880 const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
881 const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
882 const vbool8 vmask = asInt(tNear) <= asInt(tFar);
883 const size_t mask = movemask(vmask);
884#else
885 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
886 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
887 const vbool8 vmask = tNear <= tFar;
888 const size_t mask = movemask(vmask);
889#endif
890 dist = tNear;
891 return mask & mvalid;
892 }
893
894 template<>
895 __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist)
896 {
897 const size_t mvalid = movemask(node->validMask());
898 const vfloat8 start_x(node->start.x);
899 const vfloat8 scale_x(node->scale.x);
900 const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
901 const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
902 const vfloat8 start_y(node->start.y);
903 const vfloat8 scale_y(node->scale.y);
904 const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
905 const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
906 const vfloat8 start_z(node->start.z);
907 const vfloat8 scale_z(node->scale.z);
908 const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
909 const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
910
911 const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
912 const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
913 const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
914 const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
915 const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
916 const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
917
918 const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
919 const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
920 const vbool8 vmask = tNear <= tFar;
921 const size_t mask = movemask(vmask);
922
923 dist = tNear;
924 return mask & mvalid;
925 }
926
927
928#endif
929
930 template<int N>
931 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
932 {
933 const vboolf<N> mvalid = node->validMask();
934 const vfloat<N> lower_x = node->dequantizeLowerX(time);
935 const vfloat<N> upper_x = node->dequantizeUpperX(time);
936 const vfloat<N> lower_y = node->dequantizeLowerY(time);
937 const vfloat<N> upper_y = node->dequantizeUpperY(time);
938 const vfloat<N> lower_z = node->dequantizeLowerZ(time);
939 const vfloat<N> upper_z = node->dequantizeUpperZ(time);
940#if defined(__FMA_X4__)
941#if defined(__aarch64__)
942 const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
943 const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
944 const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
945 const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
946 const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
947 const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
948#else
949 const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
950 const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
951 const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
952 const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
953 const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
954 const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
955#endif
956#else
957 const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
958 const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
959 const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
960 const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
961 const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
962 const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
963#endif
964
965 const vfloat<N> tminX = mini(tNearX,tFarX);
966 const vfloat<N> tmaxX = maxi(tNearX,tFarX);
967 const vfloat<N> tminY = mini(tNearY,tFarY);
968 const vfloat<N> tmaxY = maxi(tNearY,tFarY);
969 const vfloat<N> tminZ = mini(tNearZ,tFarZ);
970 const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
971 const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
972 const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
973#if defined(__AVX512F__) // SKX
974 const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
975#else
976 const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
977#endif
978 const size_t mask = movemask(vmask);
979 dist = tNear;
980 return mask;
981 }
982
983 template<int N>
984 __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
985 {
986 const vboolf<N> mvalid = node->validMask();
987 const vfloat<N> lower_x = node->dequantizeLowerX(time);
988 const vfloat<N> upper_x = node->dequantizeUpperX(time);
989 const vfloat<N> lower_y = node->dequantizeLowerY(time);
990 const vfloat<N> upper_y = node->dequantizeUpperY(time);
991 const vfloat<N> lower_z = node->dequantizeLowerZ(time);
992 const vfloat<N> upper_z = node->dequantizeUpperZ(time);
993 const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
994 const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
995 const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
996 const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
997 const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
998 const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
999
1000 const vfloat<N> tminX = mini(tNearX,tFarX);
1001 const vfloat<N> tmaxX = maxi(tNearX,tFarX);
1002 const vfloat<N> tminY = mini(tNearY,tFarY);
1003 const vfloat<N> tmaxY = maxi(tNearY,tFarY);
1004 const vfloat<N> tminZ = mini(tNearZ,tFarZ);
1005 const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
1006 const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
1007 const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
1008#if defined(__AVX512F__) // SKX
1009 const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
1010#else
1011 const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
1012#endif
1013 const size_t mask = movemask(vmask);
1014 dist = tNear;
1015 return mask;
1016 }
1017
1018 //////////////////////////////////////////////////////////////////////////////////////
1019 // Fast OBBNode intersection
1020 //////////////////////////////////////////////////////////////////////////////////////
1021
1022 template<int N, bool robust>
1023 __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist)
1024 {
1025 const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
1026 //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
1027 const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
1028 const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
1029 const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
1030 const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
1031
1032 const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
1033 const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
1034 const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
1035 const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
1036 const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
1037 const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
1038 vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
1039 vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
1040 if (robust) {
1041 tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
1042 tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
1043 }
1044 const vbool<N> vmask = tNear <= tFar;
1045 dist = tNear;
1046 return movemask(vmask);
1047 }
1048
1049 //////////////////////////////////////////////////////////////////////////////////////
1050 // Fast OBBNodeMB intersection
1051 //////////////////////////////////////////////////////////////////////////////////////
1052
1053 template<int N, bool robust>
1054 __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist)
1055 {
1056 const AffineSpace3vf<N> xfm = node->space0;
1057 const Vec3vf<N> b0_lower = zero;
1058 const Vec3vf<N> b0_upper = one;
1059 const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
1060 const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
1061
1062 const BBox3vf<N> bounds(lower,upper);
1063 const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
1064 const Vec3vf<N> rdir = rcp_safe(dir);
1065 const Vec3vf<N> org = xfmPoint(xfm,ray.org);
1066
1067 const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
1068 const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
1069
1070 const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
1071 const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
1072 const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
1073 const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
1074 const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
1075 const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
1076 vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
1077 vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
1078 if (robust) {
1079 tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
1080 tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
1081 }
1082 const vbool<N> vmask = tNear <= tFar;
1083 dist = tNear;
1084 return movemask(vmask);
1085 }
1086
1087 //////////////////////////////////////////////////////////////////////////////////////
1088 // Node intersectors used in point query raversal
1089 //////////////////////////////////////////////////////////////////////////////////////
1090
1091 /*! Computes traversal information for N nodes with 1 point query */
1092 template<int N, int types>
1093 struct BVHNNodePointQuerySphere1;
1094
1095 template<int N>
1096 struct BVHNNodePointQuerySphere1<N, BVH_AN1>
1097 {
1098 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1099 {
1100 if (unlikely(node.isLeaf())) return false;
1101 mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1102 return true;
1103 }
1104 };
1105
1106 template<int N>
1107 struct BVHNNodePointQuerySphere1<N, BVH_AN2>
1108 {
1109 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1110 {
1111 if (unlikely(node.isLeaf())) return false;
1112 mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1113 return true;
1114 }
1115 };
1116
1117 template<int N>
1118 struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
1119 {
1120 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1121 {
1122 if (unlikely(node.isLeaf())) return false;
1123 mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
1124 return true;
1125 }
1126 };
1127
1128 template<int N>
1129 struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
1130 {
1131 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1132 {
1133 if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1134 else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
1135 else return false;
1136 return true;
1137 }
1138 };
1139
1140 template<int N>
1141 struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
1142 {
1143 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1144 {
1145 if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1146 else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1147 else return false;
1148 return true;
1149 }
1150 };
1151
1152 template<int N>
1153 struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
1154 {
1155 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1156 {
1157 if (unlikely(node.isLeaf())) return false;
1158 if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1159 else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
1160 return true;
1161 }
1162 };
1163
1164 template<int N>
1165 struct BVHNNodePointQuerySphere1<N, BVH_QN1>
1166 {
1167 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1168 {
1169 if (unlikely(node.isLeaf())) return false;
1170 mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1171 return true;
1172 }
1173 };
1174
1175 template<int N>
1176 struct BVHNQuantizedBaseNodePointQuerySphere1
1177 {
1178 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1179 {
1180 return pointQueryNodeSphere(node,query,dist);
1181 }
1182
1183 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1184 {
1185 return pointQueryNodeSphere(node,query,time,dist);
1186 }
1187 };
1188
1189 /*! Computes traversal information for N nodes with 1 point query */
1190 template<int N, int types>
1191 struct BVHNNodePointQueryAABB1;
1192
1193 template<int N>
1194 struct BVHNNodePointQueryAABB1<N, BVH_AN1>
1195 {
1196 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1197 {
1198 if (unlikely(node.isLeaf())) return false;
1199 mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1200 return true;
1201 }
1202 };
1203
1204 template<int N>
1205 struct BVHNNodePointQueryAABB1<N, BVH_AN2>
1206 {
1207 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1208 {
1209 if (unlikely(node.isLeaf())) return false;
1210 mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1211 return true;
1212 }
1213 };
1214
1215 template<int N>
1216 struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
1217 {
1218 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1219 {
1220 if (unlikely(node.isLeaf())) return false;
1221 mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
1222 return true;
1223 }
1224 };
1225
1226 template<int N>
1227 struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
1228 {
1229 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1230 {
1231 if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1232 else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
1233 else return false;
1234 return true;
1235 }
1236 };
1237
1238 template<int N>
1239 struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
1240 {
1241 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1242 {
1243 if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1244 else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1245 else return false;
1246 return true;
1247 }
1248 };
1249
1250 template<int N>
1251 struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
1252 {
1253 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1254 {
1255 if (unlikely(node.isLeaf())) return false;
1256 if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1257 else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
1258 return true;
1259 }
1260 };
1261
1262 template<int N>
1263 struct BVHNNodePointQueryAABB1<N, BVH_QN1>
1264 {
1265 static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1266 {
1267 if (unlikely(node.isLeaf())) return false;
1268 mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1269 return true;
1270 }
1271 };
1272
1273 template<int N>
1274 struct BVHNQuantizedBaseNodePointQueryAABB1
1275 {
1276 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1277 {
1278 return pointQueryNodeAABB(node,query,dist);
1279 }
1280
1281 static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1282 {
1283 return pointQueryNodeAABB(node,query,time,dist);
1284 }
1285 };
1286
1287
1288 //////////////////////////////////////////////////////////////////////////////////////
1289 // Node intersectors used in ray traversal
1290 //////////////////////////////////////////////////////////////////////////////////////
1291
1292 /*! Intersects N nodes with 1 ray */
1293 template<int N, int types, bool robust>
1294 struct BVHNNodeIntersector1;
1295
1296 template<int N>
1297 struct BVHNNodeIntersector1<N, BVH_AN1, false>
1298 {
1299 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1300 {
1301 if (unlikely(node.isLeaf())) return false;
1302 mask = intersectNode(node.getAABBNode(), ray, dist);
1303 return true;
1304 }
1305 };
1306
1307 template<int N>
1308 struct BVHNNodeIntersector1<N, BVH_AN1, true>
1309 {
1310 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1311 {
1312 if (unlikely(node.isLeaf())) return false;
1313 mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1314 return true;
1315 }
1316 };
1317
1318 template<int N>
1319 struct BVHNNodeIntersector1<N, BVH_AN2, false>
1320 {
1321 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1322 {
1323 if (unlikely(node.isLeaf())) return false;
1324 mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1325 return true;
1326 }
1327 };
1328
1329 template<int N>
1330 struct BVHNNodeIntersector1<N, BVH_AN2, true>
1331 {
1332 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1333 {
1334 if (unlikely(node.isLeaf())) return false;
1335 mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1336 return true;
1337 }
1338 };
1339
1340 template<int N>
1341 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false>
1342 {
1343 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1344 {
1345 if (unlikely(node.isLeaf())) return false;
1346 mask = intersectNodeMB4D<N>(node, ray, time, dist);
1347 return true;
1348 }
1349 };
1350
1351 template<int N>
1352 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true>
1353 {
1354 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1355 {
1356 if (unlikely(node.isLeaf())) return false;
1357 mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
1358 return true;
1359 }
1360 };
1361
1362 template<int N>
1363 struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false>
1364 {
1365 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1366 {
1367 if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
1368 else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1369 else return false;
1370 return true;
1371 }
1372 };
1373
1374 template<int N>
1375 struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true>
1376 {
1377 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1378 {
1379 if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1380 else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1381 else return false;
1382 return true;
1383 }
1384 };
1385
1386 template<int N>
1387 struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false>
1388 {
1389 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1390 {
1391 if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1392 else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1393 else return false;
1394 return true;
1395 }
1396 };
1397
1398 template<int N>
1399 struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true>
1400 {
1401 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1402 {
1403 if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1404 else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1405 else return false;
1406 return true;
1407 }
1408 };
1409
1410 template<int N>
1411 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false>
1412 {
1413 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1414 {
1415 if (unlikely(node.isLeaf())) return false;
1416 if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1417 else mask = intersectNodeMB4D(node, ray, time, dist);
1418 return true;
1419 }
1420 };
1421
1422 template<int N>
1423 struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true>
1424 {
1425 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1426 {
1427 if (unlikely(node.isLeaf())) return false;
1428 if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1429 else mask = intersectNodeMB4DRobust(node, ray, time, dist);
1430 return true;
1431 }
1432 };
1433
1434 template<int N>
1435 struct BVHNNodeIntersector1<N, BVH_QN1, false>
1436 {
1437 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1438 {
1439 if (unlikely(node.isLeaf())) return false;
1440 mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1441 return true;
1442 }
1443 };
1444
1445 template<int N>
1446 struct BVHNNodeIntersector1<N, BVH_QN1, true>
1447 {
1448 static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1449 {
1450 if (unlikely(node.isLeaf())) return false;
1451 mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1452 return true;
1453 }
1454 };
1455
1456 /*! Intersects N nodes with K rays */
1457 template<int N, bool robust>
1458 struct BVHNQuantizedBaseNodeIntersector1;
1459
1460 template<int N>
1461 struct BVHNQuantizedBaseNodeIntersector1<N, false>
1462 {
1463 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist)
1464 {
1465 return intersectNode(node,ray,dist);
1466 }
1467
1468 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
1469 {
1470 return intersectNode(node,ray,time,dist);
1471 }
1472
1473 };
1474
1475 template<int N>
1476 struct BVHNQuantizedBaseNodeIntersector1<N, true>
1477 {
1478 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
1479 {
1480 return intersectNode(node,ray,dist);
1481 }
1482
1483 static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
1484 {
1485 return intersectNode(node,ray,time,dist);
1486 }
1487
1488 };
1489
1490
1491 }
1492}
1493