1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | #if defined(__AVX2__) |
9 | #define __FMA_X4__ |
10 | #endif |
11 | |
12 | #if defined(__aarch64__) |
13 | #define __FMA_X4__ |
14 | #endif |
15 | |
16 | |
17 | namespace embree |
18 | { |
19 | namespace isa |
20 | { |
21 | ////////////////////////////////////////////////////////////////////////////////////// |
22 | // Ray structure used in single-ray traversal |
23 | ////////////////////////////////////////////////////////////////////////////////////// |
24 | |
25 | template<int N, bool robust> |
26 | struct TravRayBase; |
27 | |
28 | /* Base (without tnear and tfar) */ |
29 | template<int N> |
30 | struct TravRayBase<N,false> |
31 | { |
32 | __forceinline TravRayBase() {} |
33 | |
34 | __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) |
35 | : org_xyz(ray_org), dir_xyz(ray_dir) |
36 | { |
37 | const Vec3fa ray_rdir = rcp_safe(ray_dir); |
38 | org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); |
39 | dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); |
40 | rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z); |
41 | #if defined(__FMA_X4__) |
42 | const Vec3fa ray_org_rdir = ray_org*ray_rdir; |
43 | #if !defined(__aarch64__) |
44 | org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); |
45 | #else |
46 | //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd |
47 | //x86 will use msub |
48 | neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z); |
49 | #endif |
50 | #endif |
51 | nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); |
52 | nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); |
53 | nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); |
54 | farX = nearX ^ sizeof(vfloat<N>); |
55 | farY = nearY ^ sizeof(vfloat<N>); |
56 | farZ = nearZ ^ sizeof(vfloat<N>); |
57 | } |
58 | |
59 | template<int K> |
60 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
61 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
62 | size_t flip = sizeof(vfloat<N>)) |
63 | { |
64 | org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); |
65 | dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); |
66 | rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
67 | #if defined(__FMA_X4__) |
68 | #if !defined(__aarch64__) |
69 | org_rdir = org*rdir; |
70 | #else |
71 | neg_org_rdir = -(org*rdir); |
72 | #endif |
73 | #endif |
74 | nearX = nearXYZ.x[k]; |
75 | nearY = nearXYZ.y[k]; |
76 | nearZ = nearXYZ.z[k]; |
77 | farX = nearX ^ flip; |
78 | farY = nearY ^ flip; |
79 | farZ = nearZ ^ flip; |
80 | } |
81 | |
82 | Vec3fa org_xyz, dir_xyz; |
83 | Vec3vf<N> org, dir, rdir; |
84 | #if defined(__FMA_X4__) |
85 | #if !defined(__aarch64__) |
86 | Vec3vf<N> org_rdir; |
87 | #else |
88 | //aarch64 version are keeping negation of the org_rdir and use madd |
89 | //x86 uses msub |
90 | Vec3vf<N> neg_org_rdir; |
91 | #endif |
92 | #endif |
93 | size_t nearX, nearY, nearZ; |
94 | size_t farX, farY, farZ; |
95 | }; |
96 | |
97 | /* Base (without tnear and tfar) */ |
98 | template<int N> |
99 | struct TravRayBase<N,true> |
100 | { |
101 | __forceinline TravRayBase() {} |
102 | |
103 | __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir) |
104 | : org_xyz(ray_org), dir_xyz(ray_dir) |
105 | { |
106 | const float round_down = 1.0f-3.0f*float(ulp); |
107 | const float round_up = 1.0f+3.0f*float(ulp); |
108 | const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir); |
109 | const Vec3fa ray_rdir_near = round_down*ray_rdir; |
110 | const Vec3fa ray_rdir_far = round_up *ray_rdir; |
111 | org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); |
112 | dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); |
113 | rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z); |
114 | rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z); |
115 | |
116 | nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); |
117 | nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); |
118 | nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); |
119 | farX = nearX ^ sizeof(vfloat<N>); |
120 | farY = nearY ^ sizeof(vfloat<N>); |
121 | farZ = nearZ ^ sizeof(vfloat<N>); |
122 | } |
123 | |
124 | template<int K> |
125 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
126 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
127 | size_t flip = sizeof(vfloat<N>)) |
128 | { |
129 | const vfloat<N> round_down = 1.0f-3.0f*float(ulp); |
130 | const vfloat<N> round_up = 1.0f+3.0f*float(ulp); |
131 | org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); |
132 | dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); |
133 | rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
134 | rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); |
135 | |
136 | nearX = nearXYZ.x[k]; |
137 | nearY = nearXYZ.y[k]; |
138 | nearZ = nearXYZ.z[k]; |
139 | farX = nearX ^ flip; |
140 | farY = nearY ^ flip; |
141 | farZ = nearZ ^ flip; |
142 | } |
143 | |
144 | Vec3fa org_xyz, dir_xyz; |
145 | Vec3vf<N> org, dir, rdir_near, rdir_far; |
146 | size_t nearX, nearY, nearZ; |
147 | size_t farX, farY, farZ; |
148 | }; |
149 | |
150 | /* Full (with tnear and tfar) */ |
151 | template<int N, bool robust> |
152 | struct TravRay : TravRayBase<N,robust> |
153 | { |
154 | __forceinline TravRay() {} |
155 | |
156 | __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar) |
157 | : TravRayBase<N,robust>(ray_org, ray_dir), |
158 | tnear(ray_tnear), tfar(ray_tfar) {} |
159 | |
160 | template<int K> |
161 | __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, |
162 | const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, |
163 | float ray_tnear, float ray_tfar, |
164 | size_t flip = sizeof(vfloat<N>)) |
165 | { |
166 | TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip); |
167 | tnear = ray_tnear; tfar = ray_tfar; |
168 | } |
169 | |
170 | vfloat<N> tnear; |
171 | vfloat<N> tfar; |
172 | }; |
173 | |
174 | ////////////////////////////////////////////////////////////////////////////////////// |
175 | // Point Query structure used in single-ray traversal |
176 | ////////////////////////////////////////////////////////////////////////////////////// |
177 | |
178 | template<int N> |
179 | struct TravPointQuery |
180 | { |
181 | __forceinline TravPointQuery() {} |
182 | |
183 | __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad) |
184 | { |
185 | org = Vec3vf<N>(query_org.x, query_org.y, query_org.z); |
186 | rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z); |
187 | } |
188 | |
189 | __forceinline vfloat<N> const& tfar() const { |
190 | return rad.x; |
191 | } |
192 | |
193 | Vec3vf<N> org, rad; |
194 | }; |
195 | |
196 | ////////////////////////////////////////////////////////////////////////////////////// |
197 | // point query |
198 | ////////////////////////////////////////////////////////////////////////////////////// |
199 | |
200 | template<int N> |
201 | __forceinline size_t pointQuerySphereDistAndMask( |
202 | const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, |
203 | vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) |
204 | { |
205 | const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; |
206 | const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; |
207 | const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; |
208 | dist = vX * vX + vY * vY + vZ * vZ; |
209 | const vbool<N> vmask = dist <= query.tfar()*query.tfar(); |
210 | const vbool<N> valid = minX <= maxX; |
211 | return movemask(vmask) & movemask(valid); |
212 | } |
213 | |
214 | template<int N> |
215 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
216 | { |
217 | const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); |
218 | const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); |
219 | const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); |
220 | const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); |
221 | const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); |
222 | const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); |
223 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
224 | } |
225 | |
226 | template<int N> |
227 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
228 | { |
229 | const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); |
230 | const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); |
231 | const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); |
232 | const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); |
233 | const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); |
234 | const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); |
235 | const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); |
236 | const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); |
237 | const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); |
238 | const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); |
239 | const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); |
240 | const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); |
241 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
242 | } |
243 | |
244 | template<int N> |
245 | __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
246 | { |
247 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
248 | size_t mask = pointQueryNodeSphere(node, query, time, dist); |
249 | |
250 | if (unlikely(ref.isAABBNodeMB4D())) { |
251 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
252 | const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); |
253 | mask &= movemask(vmask); |
254 | } |
255 | |
256 | return mask; |
257 | } |
258 | |
259 | template<int N> |
260 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
261 | { |
262 | const vfloat<N> start_x(node->start.x); |
263 | const vfloat<N> scale_x(node->scale.x); |
264 | const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
265 | const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
266 | const vfloat<N> start_y(node->start.y); |
267 | const vfloat<N> scale_y(node->scale.y); |
268 | const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
269 | const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
270 | const vfloat<N> start_z(node->start.z); |
271 | const vfloat<N> scale_z(node->scale.z); |
272 | const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
273 | const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
274 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); |
275 | } |
276 | |
277 | template<int N> |
278 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
279 | { |
280 | const vfloat<N> minX = node->dequantizeLowerX(time); |
281 | const vfloat<N> maxX = node->dequantizeUpperX(time); |
282 | const vfloat<N> minY = node->dequantizeLowerY(time); |
283 | const vfloat<N> maxY = node->dequantizeUpperY(time); |
284 | const vfloat<N> minZ = node->dequantizeLowerZ(time); |
285 | const vfloat<N> maxZ = node->dequantizeUpperZ(time); |
286 | return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask()); |
287 | } |
288 | |
289 | template<int N> |
290 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
291 | { |
292 | // TODO: point query - implement |
293 | const vbool<N> vmask = vbool<N>(true); |
294 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
295 | dist = vfloat<N>(0.0f); |
296 | return mask; |
297 | } |
298 | |
299 | template<int N> |
300 | __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
301 | { |
302 | // TODO: point query - implement |
303 | const vbool<N> vmask = vbool<N>(true); |
304 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
305 | dist = vfloat<N>(0.0f); |
306 | return mask; |
307 | } |
308 | |
309 | template<int N> |
310 | __forceinline size_t pointQueryAABBDistAndMask( |
311 | const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX, |
312 | vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ) |
313 | { |
314 | const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x; |
315 | const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y; |
316 | const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z; |
317 | dist = vX * vX + vY * vY + vZ * vZ; |
318 | const vbool<N> valid = minX <= maxX; |
319 | const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) | |
320 | (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) | |
321 | (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z)); |
322 | return movemask(vmask) & movemask(valid); |
323 | } |
324 | |
325 | template<int N> |
326 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
327 | { |
328 | const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x)); |
329 | const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y)); |
330 | const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z)); |
331 | const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x)); |
332 | const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y)); |
333 | const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z)); |
334 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
335 | } |
336 | |
337 | template<int N> |
338 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
339 | { |
340 | const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x); |
341 | const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y); |
342 | const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z); |
343 | const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x); |
344 | const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y); |
345 | const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z); |
346 | const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0])); |
347 | const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0])); |
348 | const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0])); |
349 | const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0])); |
350 | const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0])); |
351 | const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0])); |
352 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ); |
353 | } |
354 | |
355 | template<int N> |
356 | __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
357 | { |
358 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
359 | size_t mask = pointQueryNodeAABB(node, query, time, dist); |
360 | |
361 | if (unlikely(ref.isAABBNodeMB4D())) { |
362 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
363 | const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t); |
364 | mask &= movemask(vmask); |
365 | } |
366 | |
367 | return mask; |
368 | } |
369 | |
370 | template<int N> |
371 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
372 | { |
373 | const size_t mvalid = movemask(node->validMask()); |
374 | const vfloat<N> start_x(node->start.x); |
375 | const vfloat<N> scale_x(node->scale.x); |
376 | const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
377 | const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x); |
378 | const vfloat<N> start_y(node->start.y); |
379 | const vfloat<N> scale_y(node->scale.y); |
380 | const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
381 | const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y); |
382 | const vfloat<N> start_z(node->start.z); |
383 | const vfloat<N> scale_z(node->scale.z); |
384 | const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
385 | const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z); |
386 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; |
387 | } |
388 | |
389 | template<int N> |
390 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
391 | { |
392 | const size_t mvalid = movemask(node->validMask()); |
393 | const vfloat<N> minX = node->dequantizeLowerX(time); |
394 | const vfloat<N> maxX = node->dequantizeUpperX(time); |
395 | const vfloat<N> minY = node->dequantizeLowerY(time); |
396 | const vfloat<N> maxY = node->dequantizeUpperY(time); |
397 | const vfloat<N> minZ = node->dequantizeLowerZ(time); |
398 | const vfloat<N> maxZ = node->dequantizeUpperZ(time); |
399 | return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid; |
400 | } |
401 | |
402 | template<int N> |
403 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
404 | { |
405 | // TODO: point query - implement |
406 | const vbool<N> vmask = vbool<N>(true); |
407 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
408 | dist = vfloat<N>(0.0f); |
409 | return mask; |
410 | } |
411 | |
412 | template<int N> |
413 | __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
414 | { |
415 | // TODO: point query - implement |
416 | const vbool<N> vmask = vbool<N>(true); |
417 | const size_t mask = movemask(vmask) & ((1<<N)-1); |
418 | dist = vfloat<N>(0.0f); |
419 | return mask; |
420 | } |
421 | |
422 | ////////////////////////////////////////////////////////////////////////////////////// |
423 | // Fast AABBNode intersection |
424 | ////////////////////////////////////////////////////////////////////////////////////// |
425 | |
426 | template<int N, bool robust> |
427 | __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); |
428 | |
429 | template<> |
430 | __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist) |
431 | { |
432 | #if defined(__FMA_X4__) |
433 | #if defined(__aarch64__) |
434 | const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); |
435 | const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); |
436 | const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); |
437 | const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); |
438 | const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); |
439 | const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); |
440 | #else |
441 | const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); |
442 | const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); |
443 | const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); |
444 | const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); |
445 | const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); |
446 | const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); |
447 | #endif |
448 | #else |
449 | const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; |
450 | const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; |
451 | const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; |
452 | const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; |
453 | const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; |
454 | const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; |
455 | #endif |
456 | |
457 | #if defined(__aarch64__) |
458 | const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear); |
459 | const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar); |
460 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
461 | const size_t mask = movemask(vmask); |
462 | #elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW |
463 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
464 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
465 | const vbool4 vmask = asInt(tNear) > asInt(tFar); |
466 | const size_t mask = movemask(vmask) ^ ((1<<4)-1); |
467 | #elif defined(__AVX512F__) // SKX |
468 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
469 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
470 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
471 | const size_t mask = movemask(vmask); |
472 | #else |
473 | const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
474 | const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
475 | const vbool4 vmask = tNear <= tFar; |
476 | const size_t mask = movemask(vmask); |
477 | #endif |
478 | dist = tNear; |
479 | return mask; |
480 | } |
481 | |
482 | #if defined(__AVX__) |
483 | |
484 | template<> |
485 | __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist) |
486 | { |
487 | #if defined(__AVX2__) |
488 | #if defined(__aarch64__) |
489 | const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); |
490 | const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); |
491 | const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); |
492 | const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); |
493 | const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); |
494 | const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); |
495 | #else |
496 | const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); |
497 | const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); |
498 | const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); |
499 | const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); |
500 | const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); |
501 | const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); |
502 | #endif |
503 | |
504 | #else |
505 | const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; |
506 | const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; |
507 | const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z; |
508 | const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x; |
509 | const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y; |
510 | const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z; |
511 | #endif |
512 | |
513 | #if defined(__AVX2__) && !defined(__AVX512F__) // HSW |
514 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
515 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
516 | const vbool8 vmask = asInt(tNear) > asInt(tFar); |
517 | const size_t mask = movemask(vmask) ^ ((1<<8)-1); |
518 | #elif defined(__AVX512F__) // SKX |
519 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
520 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
521 | const vbool8 vmask = asInt(tNear) <= asInt(tFar); |
522 | const size_t mask = movemask(vmask); |
523 | #else |
524 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
525 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
526 | const vbool8 vmask = tNear <= tFar; |
527 | const size_t mask = movemask(vmask); |
528 | #endif |
529 | dist = tNear; |
530 | return mask; |
531 | } |
532 | |
533 | #endif |
534 | |
535 | ////////////////////////////////////////////////////////////////////////////////////// |
536 | // Robust AABBNode intersection |
537 | ////////////////////////////////////////////////////////////////////////////////////// |
538 | |
539 | template<int N> |
540 | __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) |
541 | { |
542 | const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; |
543 | const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; |
544 | const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; |
545 | const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; |
546 | const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; |
547 | const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; |
548 | const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
549 | const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
550 | const vbool<N> vmask = tNear <= tFar; |
551 | const size_t mask = movemask(vmask); |
552 | dist = tNear; |
553 | return mask; |
554 | } |
555 | |
556 | ////////////////////////////////////////////////////////////////////////////////////// |
557 | // Fast AABBNodeMB intersection |
558 | ////////////////////////////////////////////////////////////////////////////////////// |
559 | |
560 | template<int N> |
561 | __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
562 | { |
563 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
564 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
565 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
566 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
567 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
568 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
569 | #if defined(__FMA_X4__) |
570 | #if defined(__aarch64__) |
571 | const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); |
572 | const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); |
573 | const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); |
574 | const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); |
575 | const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); |
576 | const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); |
577 | #else |
578 | const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); |
579 | const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); |
580 | const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); |
581 | const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); |
582 | const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); |
583 | const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); |
584 | #endif |
585 | #else |
586 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; |
587 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; |
588 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; |
589 | const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; |
590 | const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; |
591 | const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; |
592 | #endif |
593 | #if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW |
594 | const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
595 | const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
596 | const vbool<N> vmask = asInt(tNear) > asInt(tFar); |
597 | const size_t mask = movemask(vmask) ^ ((1<<N)-1); |
598 | #elif defined(__AVX512F__) // SKX |
599 | const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
600 | const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
601 | const vbool<N> vmask = asInt(tNear) <= asInt(tFar); |
602 | const size_t mask = movemask(vmask); |
603 | #else |
604 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
605 | const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
606 | const vbool<N> vmask = tNear <= tFar; |
607 | const size_t mask = movemask(vmask); |
608 | #endif |
609 | dist = tNear; |
610 | return mask; |
611 | } |
612 | |
613 | ////////////////////////////////////////////////////////////////////////////////////// |
614 | // Robust AABBNodeMB intersection |
615 | ////////////////////////////////////////////////////////////////////////////////////// |
616 | |
617 | template<int N> |
618 | __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
619 | { |
620 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
621 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
622 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
623 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; |
624 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; |
625 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; |
626 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
627 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
628 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
629 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
630 | const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; |
631 | const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; |
632 | const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; |
633 | const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); |
634 | const size_t mask = movemask(tNear <= tFar); |
635 | dist = tNear; |
636 | return mask; |
637 | } |
638 | |
639 | ////////////////////////////////////////////////////////////////////////////////////// |
640 | // Fast AABBNodeMB4D intersection |
641 | ////////////////////////////////////////////////////////////////////////////////////// |
642 | |
643 | template<int N> |
644 | __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
645 | { |
646 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
647 | |
648 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
649 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
650 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
651 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
652 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
653 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
654 | #if defined (__FMA_X4__) |
655 | #if defined(__aarch64__) |
656 | const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); |
657 | const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); |
658 | const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); |
659 | const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); |
660 | const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); |
661 | const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); |
662 | #else |
663 | const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); |
664 | const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); |
665 | const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); |
666 | const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); |
667 | const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); |
668 | const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); |
669 | #endif |
670 | #else |
671 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; |
672 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; |
673 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z; |
674 | const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x; |
675 | const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; |
676 | const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; |
677 | #endif |
678 | #if defined(__FMA_X4__) && !defined(__AVX512F__) |
679 | const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear)); |
680 | const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar )); |
681 | #else |
682 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
683 | const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
684 | #endif |
685 | vbool<N> vmask = tNear <= tFar; |
686 | if (unlikely(ref.isAABBNodeMB4D())) { |
687 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
688 | vmask &= (node1->lower_t <= time) & (time < node1->upper_t); |
689 | } |
690 | const size_t mask = movemask(vmask); |
691 | dist = tNear; |
692 | return mask; |
693 | } |
694 | |
695 | ////////////////////////////////////////////////////////////////////////////////////// |
696 | // Robust AABBNodeMB4D intersection |
697 | ////////////////////////////////////////////////////////////////////////////////////// |
698 | |
699 | template<int N> |
700 | __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
701 | { |
702 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
703 | |
704 | const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); |
705 | const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); |
706 | const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ); |
707 | const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x; |
708 | const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y; |
709 | const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z; |
710 | const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ); |
711 | const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); |
712 | const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); |
713 | const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); |
714 | const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x; |
715 | const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y; |
716 | const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z; |
717 | const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ); |
718 | vbool<N> vmask = tNear <= tFar; |
719 | if (unlikely(ref.isAABBNodeMB4D())) { |
720 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
721 | vmask &= (node1->lower_t <= time) & (time < node1->upper_t); |
722 | } |
723 | const size_t mask = movemask(vmask); |
724 | dist = tNear; |
725 | return mask; |
726 | } |
727 | |
728 | ////////////////////////////////////////////////////////////////////////////////////// |
729 | // Fast QuantizedBaseNode intersection |
730 | ////////////////////////////////////////////////////////////////////////////////////// |
731 | |
732 | template<int N, bool robust> |
733 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); |
734 | |
735 | template<> |
736 | __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist) |
737 | { |
738 | const size_t mvalid = movemask(node->validMask()); |
739 | const vfloat4 start_x(node->start.x); |
740 | const vfloat4 scale_x(node->scale.x); |
741 | const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x); |
742 | const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x); |
743 | const vfloat4 start_y(node->start.y); |
744 | const vfloat4 scale_y(node->scale.y); |
745 | const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y); |
746 | const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y); |
747 | const vfloat4 start_z(node->start.z); |
748 | const vfloat4 scale_z(node->scale.z); |
749 | const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z); |
750 | const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z); |
751 | |
752 | #if defined(__FMA_X4__) |
753 | #if defined(__aarch64__) |
754 | const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); |
755 | const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); |
756 | const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); |
757 | const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); |
758 | const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); |
759 | const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); |
760 | #else |
761 | const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
762 | const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
763 | const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
764 | const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
765 | const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
766 | const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
767 | #endif |
768 | #else |
769 | const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
770 | const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
771 | const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
772 | const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
773 | const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
774 | const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
775 | #endif |
776 | |
777 | #if defined(__aarch64__) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW |
778 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
779 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
780 | const vbool4 vmask = asInt(tNear) > asInt(tFar); |
781 | const size_t mask = movemask(vmask) ^ ((1<<4)-1); |
782 | #elif defined(__AVX512F__) // SKX |
783 | const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
784 | const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
785 | const vbool4 vmask = asInt(tNear) <= asInt(tFar); |
786 | const size_t mask = movemask(vmask); |
787 | #else |
788 | const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
789 | const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
790 | const vbool4 vmask = tNear <= tFar; |
791 | const size_t mask = movemask(vmask); |
792 | #endif |
793 | dist = tNear; |
794 | return mask & mvalid; |
795 | } |
796 | |
797 | template<> |
798 | __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist) |
799 | { |
800 | const size_t mvalid = movemask(node->validMask()); |
801 | const vfloat4 start_x(node->start.x); |
802 | const vfloat4 scale_x(node->scale.x); |
803 | const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x); |
804 | const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x); |
805 | const vfloat4 start_y(node->start.y); |
806 | const vfloat4 scale_y(node->scale.y); |
807 | const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y); |
808 | const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y); |
809 | const vfloat4 start_z(node->start.z); |
810 | const vfloat4 scale_z(node->scale.z); |
811 | const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z); |
812 | const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z); |
813 | |
814 | const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
815 | const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
816 | const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
817 | const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
818 | const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
819 | const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
820 | |
821 | const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
822 | const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
823 | const vbool4 vmask = tNear <= tFar; |
824 | const size_t mask = movemask(vmask); |
825 | dist = tNear; |
826 | return mask & mvalid; |
827 | } |
828 | |
829 | |
830 | #if defined(__AVX__) |
831 | |
832 | template<> |
833 | __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist) |
834 | { |
835 | const size_t mvalid = movemask(node->validMask()); |
836 | const vfloat8 start_x(node->start.x); |
837 | const vfloat8 scale_x(node->scale.x); |
838 | const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); |
839 | const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); |
840 | const vfloat8 start_y(node->start.y); |
841 | const vfloat8 scale_y(node->scale.y); |
842 | const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); |
843 | const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); |
844 | const vfloat8 start_z(node->start.z); |
845 | const vfloat8 scale_z(node->scale.z); |
846 | const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); |
847 | const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); |
848 | |
849 | #if defined(__AVX2__) |
850 | #if defined(__aarch64__) |
851 | const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); |
852 | const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); |
853 | const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); |
854 | const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); |
855 | const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); |
856 | const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); |
857 | #else |
858 | const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
859 | const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
860 | const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
861 | const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
862 | const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
863 | const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
864 | #endif |
865 | #else |
866 | const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
867 | const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
868 | const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
869 | const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
870 | const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
871 | const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
872 | #endif |
873 | |
874 | #if defined(__AVX2__) && !defined(__AVX512F__) // HSW |
875 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
876 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
877 | const vbool8 vmask = asInt(tNear) > asInt(tFar); |
878 | const size_t mask = movemask(vmask) ^ ((1<<8)-1); |
879 | #elif defined(__AVX512F__) // SKX |
880 | const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); |
881 | const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); |
882 | const vbool8 vmask = asInt(tNear) <= asInt(tFar); |
883 | const size_t mask = movemask(vmask); |
884 | #else |
885 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
886 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
887 | const vbool8 vmask = tNear <= tFar; |
888 | const size_t mask = movemask(vmask); |
889 | #endif |
890 | dist = tNear; |
891 | return mask & mvalid; |
892 | } |
893 | |
894 | template<> |
895 | __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist) |
896 | { |
897 | const size_t mvalid = movemask(node->validMask()); |
898 | const vfloat8 start_x(node->start.x); |
899 | const vfloat8 scale_x(node->scale.x); |
900 | const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x); |
901 | const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x); |
902 | const vfloat8 start_y(node->start.y); |
903 | const vfloat8 scale_y(node->scale.y); |
904 | const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y); |
905 | const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y); |
906 | const vfloat8 start_z(node->start.z); |
907 | const vfloat8 scale_z(node->scale.z); |
908 | const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); |
909 | const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); |
910 | |
911 | const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
912 | const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
913 | const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
914 | const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
915 | const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
916 | const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
917 | |
918 | const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); |
919 | const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); |
920 | const vbool8 vmask = tNear <= tFar; |
921 | const size_t mask = movemask(vmask); |
922 | |
923 | dist = tNear; |
924 | return mask & mvalid; |
925 | } |
926 | |
927 | |
928 | #endif |
929 | |
930 | template<int N> |
931 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
932 | { |
933 | const vboolf<N> mvalid = node->validMask(); |
934 | const vfloat<N> lower_x = node->dequantizeLowerX(time); |
935 | const vfloat<N> upper_x = node->dequantizeUpperX(time); |
936 | const vfloat<N> lower_y = node->dequantizeLowerY(time); |
937 | const vfloat<N> upper_y = node->dequantizeUpperY(time); |
938 | const vfloat<N> lower_z = node->dequantizeLowerZ(time); |
939 | const vfloat<N> upper_z = node->dequantizeUpperZ(time); |
940 | #if defined(__FMA_X4__) |
941 | #if defined(__aarch64__) |
942 | const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); |
943 | const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); |
944 | const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); |
945 | const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); |
946 | const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); |
947 | const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); |
948 | #else |
949 | const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
950 | const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
951 | const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
952 | const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
953 | const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
954 | const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
955 | #endif |
956 | #else |
957 | const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x; |
958 | const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y; |
959 | const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z; |
960 | const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x; |
961 | const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y; |
962 | const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z; |
963 | #endif |
964 | |
965 | const vfloat<N> tminX = mini(tNearX,tFarX); |
966 | const vfloat<N> tmaxX = maxi(tNearX,tFarX); |
967 | const vfloat<N> tminY = mini(tNearY,tFarY); |
968 | const vfloat<N> tmaxY = maxi(tNearY,tFarY); |
969 | const vfloat<N> tminZ = mini(tNearZ,tFarZ); |
970 | const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); |
971 | const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); |
972 | const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); |
973 | #if defined(__AVX512F__) // SKX |
974 | const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); |
975 | #else |
976 | const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; |
977 | #endif |
978 | const size_t mask = movemask(vmask); |
979 | dist = tNear; |
980 | return mask; |
981 | } |
982 | |
983 | template<int N> |
984 | __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
985 | { |
986 | const vboolf<N> mvalid = node->validMask(); |
987 | const vfloat<N> lower_x = node->dequantizeLowerX(time); |
988 | const vfloat<N> upper_x = node->dequantizeUpperX(time); |
989 | const vfloat<N> lower_y = node->dequantizeLowerY(time); |
990 | const vfloat<N> upper_y = node->dequantizeUpperY(time); |
991 | const vfloat<N> lower_z = node->dequantizeLowerZ(time); |
992 | const vfloat<N> upper_z = node->dequantizeUpperZ(time); |
993 | const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; |
994 | const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; |
995 | const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; |
996 | const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; |
997 | const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; |
998 | const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; |
999 | |
1000 | const vfloat<N> tminX = mini(tNearX,tFarX); |
1001 | const vfloat<N> tmaxX = maxi(tNearX,tFarX); |
1002 | const vfloat<N> tminY = mini(tNearY,tFarY); |
1003 | const vfloat<N> tmaxY = maxi(tNearY,tFarY); |
1004 | const vfloat<N> tminZ = mini(tNearZ,tFarZ); |
1005 | const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); |
1006 | const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); |
1007 | const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); |
1008 | #if defined(__AVX512F__) // SKX |
1009 | const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); |
1010 | #else |
1011 | const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; |
1012 | #endif |
1013 | const size_t mask = movemask(vmask); |
1014 | dist = tNear; |
1015 | return mask; |
1016 | } |
1017 | |
1018 | ////////////////////////////////////////////////////////////////////////////////////// |
1019 | // Fast OBBNode intersection |
1020 | ////////////////////////////////////////////////////////////////////////////////////// |
1021 | |
1022 | template<int N, bool robust> |
1023 | __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist) |
1024 | { |
1025 | const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir); |
1026 | //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir; |
1027 | const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir); |
1028 | const Vec3vf<N> org = xfmPoint(node->naabb,ray.org); |
1029 | const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir; |
1030 | const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir; |
1031 | |
1032 | const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); |
1033 | const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); |
1034 | const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); |
1035 | const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); |
1036 | const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); |
1037 | const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); |
1038 | vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); |
1039 | vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
1040 | if (robust) { |
1041 | tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); |
1042 | tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); |
1043 | } |
1044 | const vbool<N> vmask = tNear <= tFar; |
1045 | dist = tNear; |
1046 | return movemask(vmask); |
1047 | } |
1048 | |
1049 | ////////////////////////////////////////////////////////////////////////////////////// |
1050 | // Fast OBBNodeMB intersection |
1051 | ////////////////////////////////////////////////////////////////////////////////////// |
1052 | |
1053 | template<int N, bool robust> |
1054 | __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist) |
1055 | { |
1056 | const AffineSpace3vf<N> xfm = node->space0; |
1057 | const Vec3vf<N> b0_lower = zero; |
1058 | const Vec3vf<N> b0_upper = one; |
1059 | const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time)); |
1060 | const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time)); |
1061 | |
1062 | const BBox3vf<N> bounds(lower,upper); |
1063 | const Vec3vf<N> dir = xfmVector(xfm,ray.dir); |
1064 | const Vec3vf<N> rdir = rcp_safe(dir); |
1065 | const Vec3vf<N> org = xfmPoint(xfm,ray.org); |
1066 | |
1067 | const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir; |
1068 | const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir; |
1069 | |
1070 | const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x); |
1071 | const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y); |
1072 | const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z); |
1073 | const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x); |
1074 | const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y); |
1075 | const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z); |
1076 | vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ); |
1077 | vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ ); |
1078 | if (robust) { |
1079 | tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp)); |
1080 | tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp)); |
1081 | } |
1082 | const vbool<N> vmask = tNear <= tFar; |
1083 | dist = tNear; |
1084 | return movemask(vmask); |
1085 | } |
1086 | |
1087 | ////////////////////////////////////////////////////////////////////////////////////// |
1088 | // Node intersectors used in point query raversal |
1089 | ////////////////////////////////////////////////////////////////////////////////////// |
1090 | |
1091 | /*! Computes traversal information for N nodes with 1 point query */ |
1092 | template<int N, int types> |
1093 | struct BVHNNodePointQuerySphere1; |
1094 | |
1095 | template<int N> |
1096 | struct BVHNNodePointQuerySphere1<N, BVH_AN1> |
1097 | { |
1098 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1099 | { |
1100 | if (unlikely(node.isLeaf())) return false; |
1101 | mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); |
1102 | return true; |
1103 | } |
1104 | }; |
1105 | |
1106 | template<int N> |
1107 | struct BVHNNodePointQuerySphere1<N, BVH_AN2> |
1108 | { |
1109 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1110 | { |
1111 | if (unlikely(node.isLeaf())) return false; |
1112 | mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); |
1113 | return true; |
1114 | } |
1115 | }; |
1116 | |
1117 | template<int N> |
1118 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D> |
1119 | { |
1120 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1121 | { |
1122 | if (unlikely(node.isLeaf())) return false; |
1123 | mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist); |
1124 | return true; |
1125 | } |
1126 | }; |
1127 | |
1128 | template<int N> |
1129 | struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1> |
1130 | { |
1131 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1132 | { |
1133 | if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist); |
1134 | else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist); |
1135 | else return false; |
1136 | return true; |
1137 | } |
1138 | }; |
1139 | |
1140 | template<int N> |
1141 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2> |
1142 | { |
1143 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1144 | { |
1145 | if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist); |
1146 | else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); |
1147 | else return false; |
1148 | return true; |
1149 | } |
1150 | }; |
1151 | |
1152 | template<int N> |
1153 | struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2> |
1154 | { |
1155 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1156 | { |
1157 | if (unlikely(node.isLeaf())) return false; |
1158 | if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist); |
1159 | else mask = pointQueryNodeSphereMB4D(node, query, time, dist); |
1160 | return true; |
1161 | } |
1162 | }; |
1163 | |
1164 | template<int N> |
1165 | struct BVHNNodePointQuerySphere1<N, BVH_QN1> |
1166 | { |
1167 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1168 | { |
1169 | if (unlikely(node.isLeaf())) return false; |
1170 | mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); |
1171 | return true; |
1172 | } |
1173 | }; |
1174 | |
1175 | template<int N> |
1176 | struct BVHNQuantizedBaseNodePointQuerySphere1 |
1177 | { |
1178 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
1179 | { |
1180 | return pointQueryNodeSphere(node,query,dist); |
1181 | } |
1182 | |
1183 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
1184 | { |
1185 | return pointQueryNodeSphere(node,query,time,dist); |
1186 | } |
1187 | }; |
1188 | |
1189 | /*! Computes traversal information for N nodes with 1 point query */ |
1190 | template<int N, int types> |
1191 | struct BVHNNodePointQueryAABB1; |
1192 | |
1193 | template<int N> |
1194 | struct BVHNNodePointQueryAABB1<N, BVH_AN1> |
1195 | { |
1196 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1197 | { |
1198 | if (unlikely(node.isLeaf())) return false; |
1199 | mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); |
1200 | return true; |
1201 | } |
1202 | }; |
1203 | |
1204 | template<int N> |
1205 | struct BVHNNodePointQueryAABB1<N, BVH_AN2> |
1206 | { |
1207 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1208 | { |
1209 | if (unlikely(node.isLeaf())) return false; |
1210 | mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); |
1211 | return true; |
1212 | } |
1213 | }; |
1214 | |
1215 | template<int N> |
1216 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D> |
1217 | { |
1218 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1219 | { |
1220 | if (unlikely(node.isLeaf())) return false; |
1221 | mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist); |
1222 | return true; |
1223 | } |
1224 | }; |
1225 | |
1226 | template<int N> |
1227 | struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1> |
1228 | { |
1229 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1230 | { |
1231 | if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist); |
1232 | else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist); |
1233 | else return false; |
1234 | return true; |
1235 | } |
1236 | }; |
1237 | |
1238 | template<int N> |
1239 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2> |
1240 | { |
1241 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1242 | { |
1243 | if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist); |
1244 | else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); |
1245 | else return false; |
1246 | return true; |
1247 | } |
1248 | }; |
1249 | |
1250 | template<int N> |
1251 | struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2> |
1252 | { |
1253 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1254 | { |
1255 | if (unlikely(node.isLeaf())) return false; |
1256 | if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist); |
1257 | else mask = pointQueryNodeAABBMB4D(node, query, time, dist); |
1258 | return true; |
1259 | } |
1260 | }; |
1261 | |
1262 | template<int N> |
1263 | struct BVHNNodePointQueryAABB1<N, BVH_QN1> |
1264 | { |
1265 | static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask) |
1266 | { |
1267 | if (unlikely(node.isLeaf())) return false; |
1268 | mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist); |
1269 | return true; |
1270 | } |
1271 | }; |
1272 | |
1273 | template<int N> |
1274 | struct BVHNQuantizedBaseNodePointQueryAABB1 |
1275 | { |
1276 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist) |
1277 | { |
1278 | return pointQueryNodeAABB(node,query,dist); |
1279 | } |
1280 | |
1281 | static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist) |
1282 | { |
1283 | return pointQueryNodeAABB(node,query,time,dist); |
1284 | } |
1285 | }; |
1286 | |
1287 | |
1288 | ////////////////////////////////////////////////////////////////////////////////////// |
1289 | // Node intersectors used in ray traversal |
1290 | ////////////////////////////////////////////////////////////////////////////////////// |
1291 | |
1292 | /*! Intersects N nodes with 1 ray */ |
1293 | template<int N, int types, bool robust> |
1294 | struct BVHNNodeIntersector1; |
1295 | |
1296 | template<int N> |
1297 | struct BVHNNodeIntersector1<N, BVH_AN1, false> |
1298 | { |
1299 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1300 | { |
1301 | if (unlikely(node.isLeaf())) return false; |
1302 | mask = intersectNode(node.getAABBNode(), ray, dist); |
1303 | return true; |
1304 | } |
1305 | }; |
1306 | |
1307 | template<int N> |
1308 | struct BVHNNodeIntersector1<N, BVH_AN1, true> |
1309 | { |
1310 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1311 | { |
1312 | if (unlikely(node.isLeaf())) return false; |
1313 | mask = intersectNodeRobust(node.getAABBNode(), ray, dist); |
1314 | return true; |
1315 | } |
1316 | }; |
1317 | |
1318 | template<int N> |
1319 | struct BVHNNodeIntersector1<N, BVH_AN2, false> |
1320 | { |
1321 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1322 | { |
1323 | if (unlikely(node.isLeaf())) return false; |
1324 | mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); |
1325 | return true; |
1326 | } |
1327 | }; |
1328 | |
1329 | template<int N> |
1330 | struct BVHNNodeIntersector1<N, BVH_AN2, true> |
1331 | { |
1332 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1333 | { |
1334 | if (unlikely(node.isLeaf())) return false; |
1335 | mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); |
1336 | return true; |
1337 | } |
1338 | }; |
1339 | |
1340 | template<int N> |
1341 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false> |
1342 | { |
1343 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1344 | { |
1345 | if (unlikely(node.isLeaf())) return false; |
1346 | mask = intersectNodeMB4D<N>(node, ray, time, dist); |
1347 | return true; |
1348 | } |
1349 | }; |
1350 | |
1351 | template<int N> |
1352 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true> |
1353 | { |
1354 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1355 | { |
1356 | if (unlikely(node.isLeaf())) return false; |
1357 | mask = intersectNodeMB4DRobust<N>(node, ray, time, dist); |
1358 | return true; |
1359 | } |
1360 | }; |
1361 | |
1362 | template<int N> |
1363 | struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false> |
1364 | { |
1365 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1366 | { |
1367 | if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist); |
1368 | else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); |
1369 | else return false; |
1370 | return true; |
1371 | } |
1372 | }; |
1373 | |
1374 | template<int N> |
1375 | struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true> |
1376 | { |
1377 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1378 | { |
1379 | if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist); |
1380 | else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); |
1381 | else return false; |
1382 | return true; |
1383 | } |
1384 | }; |
1385 | |
1386 | template<int N> |
1387 | struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false> |
1388 | { |
1389 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1390 | { |
1391 | if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); |
1392 | else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1393 | else return false; |
1394 | return true; |
1395 | } |
1396 | }; |
1397 | |
1398 | template<int N> |
1399 | struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true> |
1400 | { |
1401 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1402 | { |
1403 | if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); |
1404 | else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1405 | else return false; |
1406 | return true; |
1407 | } |
1408 | }; |
1409 | |
1410 | template<int N> |
1411 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false> |
1412 | { |
1413 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1414 | { |
1415 | if (unlikely(node.isLeaf())) return false; |
1416 | if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1417 | else mask = intersectNodeMB4D(node, ray, time, dist); |
1418 | return true; |
1419 | } |
1420 | }; |
1421 | |
1422 | template<int N> |
1423 | struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true> |
1424 | { |
1425 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1426 | { |
1427 | if (unlikely(node.isLeaf())) return false; |
1428 | if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); |
1429 | else mask = intersectNodeMB4DRobust(node, ray, time, dist); |
1430 | return true; |
1431 | } |
1432 | }; |
1433 | |
1434 | template<int N> |
1435 | struct BVHNNodeIntersector1<N, BVH_QN1, false> |
1436 | { |
1437 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) |
1438 | { |
1439 | if (unlikely(node.isLeaf())) return false; |
1440 | mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); |
1441 | return true; |
1442 | } |
1443 | }; |
1444 | |
1445 | template<int N> |
1446 | struct BVHNNodeIntersector1<N, BVH_QN1, true> |
1447 | { |
1448 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) |
1449 | { |
1450 | if (unlikely(node.isLeaf())) return false; |
1451 | mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); |
1452 | return true; |
1453 | } |
1454 | }; |
1455 | |
1456 | /*! Intersects N nodes with K rays */ |
1457 | template<int N, bool robust> |
1458 | struct BVHNQuantizedBaseNodeIntersector1; |
1459 | |
1460 | template<int N> |
1461 | struct BVHNQuantizedBaseNodeIntersector1<N, false> |
1462 | { |
1463 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist) |
1464 | { |
1465 | return intersectNode(node,ray,dist); |
1466 | } |
1467 | |
1468 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) |
1469 | { |
1470 | return intersectNode(node,ray,time,dist); |
1471 | } |
1472 | |
1473 | }; |
1474 | |
1475 | template<int N> |
1476 | struct BVHNQuantizedBaseNodeIntersector1<N, true> |
1477 | { |
1478 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) |
1479 | { |
1480 | return intersectNode(node,ray,dist); |
1481 | } |
1482 | |
1483 | static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) |
1484 | { |
1485 | return intersectNode(node,ray,time,dist); |
1486 | } |
1487 | |
1488 | }; |
1489 | |
1490 | |
1491 | } |
1492 | } |
1493 | |