1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | ////////////////////////////////////////////////////////////////////////////////////// |
13 | // Ray packet structure used in hybrid traversal |
14 | ////////////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | template<int K, bool robust> |
17 | struct TravRayK; |
18 | |
19 | /* Fast variant */ |
20 | template<int K> |
21 | struct TravRayK<K, false> |
22 | { |
23 | __forceinline TravRayK() {} |
24 | |
25 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
26 | { |
27 | init(ray_org, ray_dir, N); |
28 | } |
29 | |
30 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
31 | { |
32 | init(ray_org, ray_dir, N); |
33 | tnear = ray_tnear; |
34 | tfar = ray_tfar; |
35 | } |
36 | |
37 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
38 | { |
39 | org = ray_org; |
40 | dir = ray_dir; |
41 | rdir = rcp_safe(ray_dir); |
42 | #if defined(__aarch64__) |
43 | neg_org_rdir = -(org * rdir); |
44 | #elif defined(__AVX2__) |
45 | org_rdir = org * rdir; |
46 | #endif |
47 | |
48 | if (N) |
49 | { |
50 | const int size = sizeof(float)*N; |
51 | nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); |
52 | nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); |
53 | nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); |
54 | } |
55 | } |
56 | |
57 | Vec3vf<K> org; |
58 | Vec3vf<K> dir; |
59 | Vec3vf<K> rdir; |
60 | #if defined(__aarch64__) |
61 | Vec3vf<K> neg_org_rdir; |
62 | #elif defined(__AVX2__) |
63 | Vec3vf<K> org_rdir; |
64 | #endif |
65 | Vec3vi<K> nearXYZ; |
66 | vfloat<K> tnear; |
67 | vfloat<K> tfar; |
68 | }; |
69 | |
70 | template<int K> |
71 | using TravRayKFast = TravRayK<K, false>; |
72 | |
73 | /* Robust variant */ |
74 | template<int K> |
75 | struct TravRayK<K, true> |
76 | { |
77 | __forceinline TravRayK() {} |
78 | |
79 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
80 | { |
81 | init(ray_org, ray_dir, N); |
82 | } |
83 | |
84 | __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
85 | { |
86 | init(ray_org, ray_dir, N); |
87 | tnear = ray_tnear; |
88 | tfar = ray_tfar; |
89 | } |
90 | |
91 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N) |
92 | { |
93 | org = ray_org; |
94 | dir = ray_dir; |
95 | rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); |
96 | |
97 | if (N) |
98 | { |
99 | const int size = sizeof(float)*N; |
100 | nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size)); |
101 | nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size)); |
102 | nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size)); |
103 | } |
104 | } |
105 | |
106 | Vec3vf<K> org; |
107 | Vec3vf<K> dir; |
108 | Vec3vf<K> rdir; |
109 | Vec3vi<K> nearXYZ; |
110 | vfloat<K> tnear; |
111 | vfloat<K> tfar; |
112 | }; |
113 | |
114 | template<int K> |
115 | using TravRayKRobust = TravRayK<K, true>; |
116 | |
117 | ////////////////////////////////////////////////////////////////////////////////////// |
118 | // Fast AABBNode intersection |
119 | ////////////////////////////////////////////////////////////////////////////////////// |
120 | |
121 | template<int N, int K> |
122 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i, |
123 | const TravRayKFast<K>& ray, vfloat<K>& dist) |
124 | |
125 | { |
126 | #if defined(__aarch64__) |
127 | const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); |
128 | const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); |
129 | const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); |
130 | const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); |
131 | const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); |
132 | const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); |
133 | #elif defined(__AVX2__) |
134 | const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); |
135 | const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); |
136 | const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); |
137 | const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x); |
138 | const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y); |
139 | const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z); |
140 | #else |
141 | const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; |
142 | const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; |
143 | const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; |
144 | const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; |
145 | const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; |
146 | const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; |
147 | #endif |
148 | |
149 | #if defined(__AVX512F__) // SKX |
150 | if (K == 16) |
151 | { |
152 | /* use mixed float/int min/max */ |
153 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
154 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
155 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
156 | dist = lnearP; |
157 | return lhit; |
158 | } |
159 | else |
160 | #endif |
161 | { |
162 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
163 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
164 | #if defined(__AVX512F__) // SKX |
165 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
166 | #else |
167 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
168 | #endif |
169 | dist = lnearP; |
170 | return lhit; |
171 | } |
172 | } |
173 | |
174 | ////////////////////////////////////////////////////////////////////////////////////// |
175 | // Robust AABBNode intersection |
176 | ////////////////////////////////////////////////////////////////////////////////////// |
177 | |
178 | template<int N, int K> |
179 | __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i, |
180 | const TravRayKRobust<K>& ray, vfloat<K>& dist) |
181 | { |
182 | // FIXME: use per instruction rounding for AVX512 |
183 | const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x; |
184 | const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y; |
185 | const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z; |
186 | const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x; |
187 | const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y; |
188 | const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; |
189 | const float round_up = 1.0f+3.0f*float(ulp); |
190 | const float round_down = 1.0f-3.0f*float(ulp); |
191 | const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); |
192 | const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); |
193 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
194 | dist = lnearP; |
195 | return lhit; |
196 | } |
197 | |
198 | ////////////////////////////////////////////////////////////////////////////////////// |
199 | // Fast AABBNodeMB intersection |
200 | ////////////////////////////////////////////////////////////////////////////////////// |
201 | |
202 | template<int N, int K> |
203 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i, |
204 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
205 | { |
206 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
207 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
208 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
209 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
210 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
211 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
212 | |
213 | #if defined(__aarch64__) |
214 | const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); |
215 | const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); |
216 | const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); |
217 | const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); |
218 | const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); |
219 | const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); |
220 | #elif defined(__AVX2__) |
221 | const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); |
222 | const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); |
223 | const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); |
224 | const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); |
225 | const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); |
226 | const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); |
227 | #else |
228 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
229 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
230 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
231 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
232 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
233 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
234 | #endif |
235 | |
236 | #if defined(__AVX512F__) // SKX |
237 | if (K == 16) |
238 | { |
239 | /* use mixed float/int min/max */ |
240 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
241 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
242 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
243 | dist = lnearP; |
244 | return lhit; |
245 | } |
246 | else |
247 | #endif |
248 | { |
249 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
250 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
251 | #if defined(__AVX512F__) // SKX |
252 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
253 | #else |
254 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
255 | #endif |
256 | dist = lnearP; |
257 | return lhit; |
258 | } |
259 | } |
260 | |
261 | ////////////////////////////////////////////////////////////////////////////////////// |
262 | // Robust AABBNodeMB intersection |
263 | ////////////////////////////////////////////////////////////////////////////////////// |
264 | |
265 | template<int N, int K> |
266 | __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i, |
267 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
268 | { |
269 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
270 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
271 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
272 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
273 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
274 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
275 | |
276 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
277 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
278 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
279 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
280 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
281 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
282 | |
283 | const float round_up = 1.0f+3.0f*float(ulp); |
284 | const float round_down = 1.0f-3.0f*float(ulp); |
285 | |
286 | #if defined(__AVX512F__) // SKX |
287 | if (K == 16) |
288 | { |
289 | const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
290 | const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
291 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
292 | dist = lnearP; |
293 | return lhit; |
294 | } |
295 | else |
296 | #endif |
297 | { |
298 | const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
299 | const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
300 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
301 | dist = lnearP; |
302 | return lhit; |
303 | } |
304 | } |
305 | |
306 | ////////////////////////////////////////////////////////////////////////////////////// |
307 | // Fast AABBNodeMB4D intersection |
308 | ////////////////////////////////////////////////////////////////////////////////////// |
309 | |
310 | template<int N, int K> |
311 | __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i, |
312 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
313 | { |
314 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
315 | |
316 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
317 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
318 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
319 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
320 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
321 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
322 | |
323 | #if defined(__aarch64__) |
324 | const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); |
325 | const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); |
326 | const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); |
327 | const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); |
328 | const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); |
329 | const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); |
330 | #elif defined(__AVX2__) |
331 | const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); |
332 | const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); |
333 | const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); |
334 | const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x); |
335 | const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y); |
336 | const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z); |
337 | #else |
338 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
339 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
340 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
341 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
342 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
343 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
344 | #endif |
345 | |
346 | const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); |
347 | const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); |
348 | vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
349 | if (unlikely(ref.isAABBNodeMB4D())) { |
350 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
351 | lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); |
352 | } |
353 | dist = lnearP; |
354 | return lhit; |
355 | } |
356 | |
357 | ////////////////////////////////////////////////////////////////////////////////////// |
358 | // Robust AABBNodeMB4D intersection |
359 | ////////////////////////////////////////////////////////////////////////////////////// |
360 | |
361 | template<int N, int K> |
362 | __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i, |
363 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist) |
364 | { |
365 | const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); |
366 | |
367 | const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i])); |
368 | const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i])); |
369 | const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i])); |
370 | const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i])); |
371 | const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); |
372 | const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); |
373 | |
374 | const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x; |
375 | const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y; |
376 | const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z; |
377 | const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x; |
378 | const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y; |
379 | const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; |
380 | |
381 | const float round_up = 1.0f+3.0f*float(ulp); |
382 | const float round_down = 1.0f-3.0f*float(ulp); |
383 | const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); |
384 | const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); |
385 | vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
386 | |
387 | if (unlikely(ref.isAABBNodeMB4D())) { |
388 | const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node; |
389 | lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i])); |
390 | } |
391 | dist = lnearP; |
392 | return lhit; |
393 | } |
394 | |
395 | ////////////////////////////////////////////////////////////////////////////////////// |
396 | // Fast OBBNode intersection |
397 | ////////////////////////////////////////////////////////////////////////////////////// |
398 | |
399 | template<int N, int K, bool robust> |
400 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i, |
401 | const TravRayK<K,robust>& ray, vfloat<K>& dist) |
402 | { |
403 | const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]), |
404 | Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]), |
405 | Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]), |
406 | Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i])); |
407 | |
408 | const Vec3vf<K> dir = xfmVector(naabb, ray.dir); |
409 | const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1? |
410 | const Vec3vf<K> org = xfmPoint(naabb, ray.org); |
411 | |
412 | const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir; |
413 | const vfloat<K> lclipMinY = org.y * nrdir.y; |
414 | const vfloat<K> lclipMinZ = org.z * nrdir.z; |
415 | const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir; |
416 | const vfloat<K> lclipMaxY = lclipMinY - nrdir.y; |
417 | const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z; |
418 | |
419 | vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
420 | vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
421 | if (robust) { |
422 | lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); |
423 | lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); |
424 | } |
425 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
426 | dist = lnearP; |
427 | return lhit; |
428 | } |
429 | |
430 | ////////////////////////////////////////////////////////////////////////////////////// |
431 | // Fast OBBNodeMB intersection |
432 | ////////////////////////////////////////////////////////////////////////////////////// |
433 | |
434 | template<int N, int K, bool robust> |
435 | __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i, |
436 | const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist) |
437 | { |
438 | const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]), |
439 | Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]), |
440 | Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]), |
441 | Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i])); |
442 | |
443 | const Vec3vf<K> b0_lower = zero; |
444 | const Vec3vf<K> b0_upper = one; |
445 | const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]); |
446 | const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]); |
447 | const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time); |
448 | const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time); |
449 | |
450 | const Vec3vf<K> dir = xfmVector(xfm, ray.dir); |
451 | const Vec3vf<K> rdir = rcp_safe(dir); |
452 | const Vec3vf<K> org = xfmPoint(xfm, ray.org); |
453 | |
454 | const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x; |
455 | const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y; |
456 | const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z; |
457 | const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x; |
458 | const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y; |
459 | const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z; |
460 | |
461 | vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
462 | vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
463 | if (robust) { |
464 | lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp)); |
465 | lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp)); |
466 | } |
467 | |
468 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
469 | dist = lnearP; |
470 | return lhit; |
471 | } |
472 | |
473 | |
474 | |
475 | ////////////////////////////////////////////////////////////////////////////////////// |
476 | // QuantizedBaseNode intersection |
477 | ////////////////////////////////////////////////////////////////////////////////////// |
478 | |
479 | template<int N, int K> |
480 | __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, |
481 | const TravRayK<K,false>& ray, vfloat<K>& dist) |
482 | |
483 | { |
484 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
485 | const vfloat<N> lower_x = node->dequantizeLowerX(); |
486 | const vfloat<N> upper_x = node->dequantizeUpperX(); |
487 | const vfloat<N> lower_y = node->dequantizeLowerY(); |
488 | const vfloat<N> upper_y = node->dequantizeUpperY(); |
489 | const vfloat<N> lower_z = node->dequantizeLowerZ(); |
490 | const vfloat<N> upper_z = node->dequantizeUpperZ(); |
491 | |
492 | #if defined(__aarch64__) |
493 | const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); |
494 | const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); |
495 | const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); |
496 | const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); |
497 | const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); |
498 | const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); |
499 | #elif defined(__AVX2__) |
500 | const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); |
501 | const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); |
502 | const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); |
503 | const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x); |
504 | const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y); |
505 | const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z); |
506 | #else |
507 | const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; |
508 | const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; |
509 | const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; |
510 | const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; |
511 | const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; |
512 | const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; |
513 | #endif |
514 | |
515 | #if defined(__AVX512F__) // SKX |
516 | if (K == 16) |
517 | { |
518 | /* use mixed float/int min/max */ |
519 | const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
520 | const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
521 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
522 | dist = lnearP; |
523 | return lhit; |
524 | } |
525 | else |
526 | #endif |
527 | { |
528 | const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); |
529 | const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); |
530 | #if defined(__AVX512F__) // SKX |
531 | const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); |
532 | #else |
533 | const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); |
534 | #endif |
535 | dist = lnearP; |
536 | return lhit; |
537 | } |
538 | } |
539 | |
540 | template<int N, int K> |
541 | __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i, |
542 | const TravRayK<K,true>& ray, vfloat<K>& dist) |
543 | |
544 | { |
545 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
546 | const vfloat<N> lower_x = node->dequantizeLowerX(); |
547 | const vfloat<N> upper_x = node->dequantizeUpperX(); |
548 | const vfloat<N> lower_y = node->dequantizeLowerY(); |
549 | const vfloat<N> upper_y = node->dequantizeUpperY(); |
550 | const vfloat<N> lower_z = node->dequantizeLowerZ(); |
551 | const vfloat<N> upper_z = node->dequantizeUpperZ(); |
552 | |
553 | const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x; |
554 | const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y; |
555 | const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z; |
556 | const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x; |
557 | const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y; |
558 | const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; |
559 | |
560 | const float round_up = 1.0f+3.0f*float(ulp); |
561 | const float round_down = 1.0f-3.0f*float(ulp); |
562 | |
563 | const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
564 | const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
565 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
566 | dist = lnearP; |
567 | return lhit; |
568 | } |
569 | |
570 | template<int N, int K> |
571 | __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
572 | const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) |
573 | |
574 | { |
575 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
576 | |
577 | const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); |
578 | const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); |
579 | const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); |
580 | const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); |
581 | const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); |
582 | const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); |
583 | |
584 | #if defined(__aarch64__) |
585 | const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); |
586 | const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); |
587 | const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); |
588 | const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); |
589 | const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); |
590 | const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); |
591 | #elif defined(__AVX2__) |
592 | const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); |
593 | const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); |
594 | const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); |
595 | const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); |
596 | const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); |
597 | const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); |
598 | #else |
599 | const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; |
600 | const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; |
601 | const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; |
602 | const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; |
603 | const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; |
604 | const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; |
605 | #endif |
606 | const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
607 | const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
608 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
609 | dist = lnearP; |
610 | return lhit; |
611 | } |
612 | |
613 | |
614 | template<int N, int K> |
615 | __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
616 | const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) |
617 | |
618 | { |
619 | assert(movemask(node->validMask()) & ((size_t)1 << i)); |
620 | |
621 | const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); |
622 | const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); |
623 | const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); |
624 | const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); |
625 | const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); |
626 | const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); |
627 | |
628 | const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; |
629 | const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; |
630 | const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z; |
631 | const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x; |
632 | const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y; |
633 | const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z; |
634 | |
635 | const float round_up = 1.0f+3.0f*float(ulp); |
636 | const float round_down = 1.0f-3.0f*float(ulp); |
637 | |
638 | const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); |
639 | const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ)); |
640 | const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar); |
641 | dist = lnearP; |
642 | return lhit; |
643 | } |
644 | |
645 | |
646 | ////////////////////////////////////////////////////////////////////////////////////// |
647 | // Node intersectors used in hybrid traversal |
648 | ////////////////////////////////////////////////////////////////////////////////////// |
649 | |
650 | /*! Intersects N nodes with K rays */ |
651 | template<int N, int K, int types, bool robust> |
652 | struct BVHNNodeIntersectorK; |
653 | |
654 | template<int N, int K> |
655 | struct BVHNNodeIntersectorK<N, K, BVH_AN1, false> |
656 | { |
657 | /* vmask is both an input and an output parameter! Its initial value should be the parent node |
658 | hit mask, which is used for correctly computing the current hit mask. The parent hit mask |
659 | is actually required only for motion blur node intersections (because different rays may |
660 | have different times), so for regular nodes vmask is simply overwritten. */ |
661 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
662 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
663 | { |
664 | vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); |
665 | return true; |
666 | } |
667 | }; |
668 | |
669 | template<int N, int K> |
670 | struct BVHNNodeIntersectorK<N, K, BVH_AN1, true> |
671 | { |
672 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
673 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
674 | { |
675 | vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); |
676 | return true; |
677 | } |
678 | }; |
679 | |
680 | template<int N, int K> |
681 | struct BVHNNodeIntersectorK<N, K, BVH_AN2, false> |
682 | { |
683 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
684 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
685 | { |
686 | vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
687 | return true; |
688 | } |
689 | }; |
690 | |
691 | template<int N, int K> |
692 | struct BVHNNodeIntersectorK<N, K, BVH_AN2, true> |
693 | { |
694 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
695 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
696 | { |
697 | vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
698 | return true; |
699 | } |
700 | }; |
701 | |
702 | template<int N, int K> |
703 | struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false> |
704 | { |
705 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
706 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
707 | { |
708 | if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist); |
709 | else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); |
710 | return true; |
711 | } |
712 | }; |
713 | |
714 | template<int N, int K> |
715 | struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true> |
716 | { |
717 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
718 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
719 | { |
720 | if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist); |
721 | else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist); |
722 | return true; |
723 | } |
724 | }; |
725 | |
726 | template<int N, int K> |
727 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false> |
728 | { |
729 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
730 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
731 | { |
732 | if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
733 | else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
734 | return true; |
735 | } |
736 | }; |
737 | |
738 | template<int N, int K> |
739 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true> |
740 | { |
741 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
742 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
743 | { |
744 | if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist); |
745 | else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
746 | return true; |
747 | } |
748 | }; |
749 | |
750 | template<int N, int K> |
751 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false> |
752 | { |
753 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
754 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
755 | { |
756 | vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); |
757 | return true; |
758 | } |
759 | }; |
760 | |
761 | template<int N, int K> |
762 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true> |
763 | { |
764 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
765 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
766 | { |
767 | vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); |
768 | return true; |
769 | } |
770 | }; |
771 | |
772 | template<int N, int K> |
773 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false> |
774 | { |
775 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
776 | const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
777 | { |
778 | if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { |
779 | vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist); |
780 | } else /*if (unlikely(node.isOBBNodeMB()))*/ { |
781 | assert(node.isOBBNodeMB()); |
782 | vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
783 | } |
784 | return true; |
785 | } |
786 | }; |
787 | |
788 | template<int N, int K> |
789 | struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true> |
790 | { |
791 | static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i, |
792 | const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask) |
793 | { |
794 | if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) { |
795 | vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist); |
796 | } else /*if (unlikely(node.isOBBNodeMB()))*/ { |
797 | assert(node.isOBBNodeMB()); |
798 | vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist); |
799 | } |
800 | return true; |
801 | } |
802 | }; |
803 | |
804 | |
805 | /*! Intersects N nodes with K rays */ |
806 | template<int N, int K, bool robust> |
807 | struct BVHNQuantizedBaseNodeIntersectorK; |
808 | |
809 | template<int N, int K> |
810 | struct BVHNQuantizedBaseNodeIntersectorK<N, K, false> |
811 | { |
812 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, |
813 | const TravRayK<K,false>& ray, vfloat<K>& dist) |
814 | { |
815 | return intersectQuantizedNodeK<N,K>(node,i,ray,dist); |
816 | } |
817 | |
818 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
819 | const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist) |
820 | { |
821 | return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); |
822 | } |
823 | |
824 | }; |
825 | |
826 | template<int N, int K> |
827 | struct BVHNQuantizedBaseNodeIntersectorK<N, K, true> |
828 | { |
829 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i, |
830 | const TravRayK<K,true>& ray, vfloat<K>& dist) |
831 | { |
832 | return intersectQuantizedNodeK<N,K>(node,i,ray,dist); |
833 | } |
834 | |
835 | static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i, |
836 | const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist) |
837 | { |
838 | return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist); |
839 | } |
840 | }; |
841 | |
842 | |
843 | } |
844 | } |
845 | |