1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "triangle.h"
7#include "intersector_epilog.h"
8
9/*! This intersector implements a modified version of the Woop's ray-triangle intersection test */
10
11namespace embree
12{
13 namespace isa
14 {
15 template<int M>
16 struct WoopHitM
17 {
18 __forceinline WoopHitM() {}
19
20 __forceinline WoopHitM(const vbool<M>& valid,
21 const vfloat<M>& U,
22 const vfloat<M>& V,
23 const vfloat<M>& T,
24 const vfloat<M>& inv_det,
25 const Vec3vf<M>& Ng)
26 : U(U), V(V), T(T), inv_det(inv_det), valid(valid), vNg(Ng) {}
27
28 __forceinline void finalize()
29 {
30 vt = T;
31 vu = U*inv_det;
32 vv = V*inv_det;
33 }
34
35 __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
36 __forceinline float t (const size_t i) const { return vt[i]; }
37 __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
38
39 private:
40 const vfloat<M> U;
41 const vfloat<M> V;
42 const vfloat<M> T;
43 const vfloat<M> inv_det;
44
45 public:
46 const vbool<M> valid;
47 vfloat<M> vu;
48 vfloat<M> vv;
49 vfloat<M> vt;
50 Vec3vf<M> vNg;
51 };
52
53 template<int M>
54 struct WoopPrecalculations1
55 {
56 unsigned int kx,ky,kz;
57 Vec3vf<M> org;
58 Vec3fa S;
59 __forceinline WoopPrecalculations1() {}
60
61 __forceinline WoopPrecalculations1(const Ray& ray, const void* ptr)
62 {
63 kz = maxDim(abs(ray.dir));
64 kx = (kz+1) % 3;
65 ky = (kx+1) % 3;
66 const float inv_dir_kz = rcp(ray.dir[kz]);
67 if (ray.dir[kz]) std::swap(kx,ky);
68 S.x = ray.dir[kx] * inv_dir_kz;
69 S.y = ray.dir[ky] * inv_dir_kz;
70 S.z = inv_dir_kz;
71 org = Vec3vf<M>(ray.org[kx],ray.org[ky],ray.org[kz]);
72 }
73 };
74
75
76 template<int M>
77 struct WoopIntersector1
78 {
79
80 typedef WoopPrecalculations1<M> Precalculations;
81
82 __forceinline WoopIntersector1() {}
83
84 __forceinline WoopIntersector1(const Ray& ray, const void* ptr) {}
85
86 static __forceinline bool intersect(const vbool<M>& valid0,
87 Ray& ray,
88 const Precalculations& pre,
89 const Vec3vf<M>& tri_v0,
90 const Vec3vf<M>& tri_v1,
91 const Vec3vf<M>& tri_v2,
92 WoopHitM<M>& hit)
93 {
94 vbool<M> valid = valid0;
95
96 /* vertices relative to ray origin */
97 const Vec3vf<M> org = Vec3vf<M>(pre.org.x,pre.org.y,pre.org.z);
98 const Vec3vf<M> A = Vec3vf<M>(tri_v0[pre.kx],tri_v0[pre.ky],tri_v0[pre.kz]) - org;
99 const Vec3vf<M> B = Vec3vf<M>(tri_v1[pre.kx],tri_v1[pre.ky],tri_v1[pre.kz]) - org;
100 const Vec3vf<M> C = Vec3vf<M>(tri_v2[pre.kx],tri_v2[pre.ky],tri_v2[pre.kz]) - org;
101
102 /* shear and scale vertices */
103 const vfloat<M> Ax = nmadd(A.z,pre.S.x,A.x);
104 const vfloat<M> Ay = nmadd(A.z,pre.S.y,A.y);
105 const vfloat<M> Bx = nmadd(B.z,pre.S.x,B.x);
106 const vfloat<M> By = nmadd(B.z,pre.S.y,B.y);
107 const vfloat<M> Cx = nmadd(C.z,pre.S.x,C.x);
108 const vfloat<M> Cy = nmadd(C.z,pre.S.y,C.y);
109
110 /* scaled barycentric */
111 const vfloat<M> U0 = Cx*By;
112 const vfloat<M> U1 = Cy*Bx;
113 const vfloat<M> V0 = Ax*Cy;
114 const vfloat<M> V1 = Ay*Cx;
115 const vfloat<M> W0 = Bx*Ay;
116 const vfloat<M> W1 = By*Ax;
117#if !defined(__AVX512F__)
118 valid &= (U0 >= U1) & (V0 >= V1) & (W0 >= W1) |
119 (U0 <= U1) & (V0 <= V1) & (W0 <= W1);
120#else
121 valid &= ge(ge(U0 >= U1,V0,V1),W0,W1) | le(le(U0 <= U1,V0,V1),W0,W1);
122#endif
123
124 if (likely(none(valid))) return false;
125 const vfloat<M> U = U0-U1;
126 const vfloat<M> V = V0-V1;
127 const vfloat<M> W = W0-W1;
128
129 const vfloat<M> det = U+V+W;
130
131 valid &= det != 0.0f;
132 const vfloat<M> inv_det = rcp(det);
133
134 const vfloat<M> Az = pre.S.z * A.z;
135 const vfloat<M> Bz = pre.S.z * B.z;
136 const vfloat<M> Cz = pre.S.z * C.z;
137 const vfloat<M> T = madd(U,Az,madd(V,Bz,W*Cz));
138 const vfloat<M> t = T * inv_det;
139 /* perform depth test */
140 valid &= (vfloat<M>(ray.tnear()) < t) & (t <= vfloat<M>(ray.tfar));
141 if (likely(none(valid))) return false;
142
143 const Vec3vf<M> tri_Ng = cross(tri_v2-tri_v0,tri_v0-tri_v1);
144
145 /* update hit information */
146 new (&hit) WoopHitM<M>(valid,U,V,t,inv_det,tri_Ng);
147 return true;
148 }
149
150 static __forceinline bool intersect(Ray& ray,
151 const Precalculations& pre,
152 const Vec3vf<M>& v0,
153 const Vec3vf<M>& v1,
154 const Vec3vf<M>& v2,
155 WoopHitM<M>& hit)
156 {
157 vbool<M> valid = true;
158 return intersect(valid,ray,pre,v0,v1,v2,hit);
159 }
160
161
162 template<typename Epilog>
163 static __forceinline bool intersect(Ray& ray,
164 const Precalculations& pre,
165 const Vec3vf<M>& v0,
166 const Vec3vf<M>& v1,
167 const Vec3vf<M>& v2,
168 const Epilog& epilog)
169 {
170 WoopHitM<M> hit;
171 if (likely(intersect(ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
172 return false;
173 }
174
175 template<typename Epilog>
176 static __forceinline bool intersect(const vbool<M>& valid,
177 Ray& ray,
178 const Precalculations& pre,
179 const Vec3vf<M>& v0,
180 const Vec3vf<M>& v1,
181 const Vec3vf<M>& v2,
182 const Epilog& epilog)
183 {
184 WoopHitM<M> hit;
185 if (likely(intersect(valid,ray,pre,v0,v1,v2,hit))) return epilog(hit.valid,hit);
186 return false;
187 }
188 };
189
190#if 0
191 template<int K>
192 struct WoopHitK
193 {
194 __forceinline WoopHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng)
195 : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {}
196
197 __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
198 {
199 const vfloat<K> rcpAbsDen = rcp(absDen);
200 const vfloat<K> t = T * rcpAbsDen;
201 const vfloat<K> u = U * rcpAbsDen;
202 const vfloat<K> v = V * rcpAbsDen;
203 return std::make_tuple(u,v,t,Ng);
204 }
205
206 private:
207 const vfloat<K> U;
208 const vfloat<K> V;
209 const vfloat<K> T;
210 const vfloat<K> absDen;
211 const Vec3vf<K> Ng;
212 };
213
214 template<int M, int K>
215 struct WoopIntersectorK
216 {
217 __forceinline WoopIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {}
218
219 /*! Intersects K rays with one of M triangles. */
220 template<typename Epilog>
221 __forceinline vbool<K> intersectK(const vbool<K>& valid0,
222 //RayK<K>& ray,
223 const Vec3vf<K>& ray_org,
224 const Vec3vf<K>& ray_dir,
225 const vfloat<K>& ray_tnear,
226 const vfloat<K>& ray_tfar,
227 const Vec3vf<K>& tri_v0,
228 const Vec3vf<K>& tri_e1,
229 const Vec3vf<K>& tri_e2,
230 const Vec3vf<K>& tri_Ng,
231 const Epilog& epilog) const
232 {
233 /* calculate denominator */
234 vbool<K> valid = valid0;
235 const Vec3vf<K> C = tri_v0 - ray_org;
236 const Vec3vf<K> R = cross(C,ray_dir);
237 const vfloat<K> den = dot(tri_Ng,ray_dir);
238 const vfloat<K> absDen = abs(den);
239 const vfloat<K> sgnDen = signmsk(den);
240
241 /* test against edge p2 p0 */
242 const vfloat<K> U = dot(tri_e2,R) ^ sgnDen;
243 valid &= U >= 0.0f;
244 if (likely(none(valid))) return false;
245
246 /* test against edge p0 p1 */
247 const vfloat<K> V = dot(tri_e1,R) ^ sgnDen;
248 valid &= V >= 0.0f;
249 if (likely(none(valid))) return false;
250
251 /* test against edge p1 p2 */
252 const vfloat<K> W = absDen-U-V;
253 valid &= W >= 0.0f;
254 if (likely(none(valid))) return false;
255
256 /* perform depth test */
257 const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
258 valid &= (absDen*ray_tnear < T) & (T <= absDen*ray_tfar);
259 if (unlikely(none(valid))) return false;
260
261 /* perform backface culling */
262#if defined(EMBREE_BACKFACE_CULLING)
263 valid &= den < vfloat<K>(zero);
264 if (unlikely(none(valid))) return false;
265#else
266 valid &= den != vfloat<K>(zero);
267 if (unlikely(none(valid))) return false;
268#endif
269
270 /* calculate hit information */
271 WoopHitK<K> hit(U,V,T,absDen,tri_Ng);
272 return epilog(valid,hit);
273 }
274
275 /*! Intersects K rays with one of M triangles. */
276 template<typename Epilog>
277 __forceinline vbool<K> intersectK(const vbool<K>& valid0,
278 RayK<K>& ray,
279 const Vec3vf<K>& tri_v0,
280 const Vec3vf<K>& tri_v1,
281 const Vec3vf<K>& tri_v2,
282 const Epilog& epilog) const
283 {
284 const Vec3vf<K> e1 = tri_v0-tri_v1;
285 const Vec3vf<K> e2 = tri_v2-tri_v0;
286 const Vec3vf<K> Ng = cross(e2,e1);
287 return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog);
288 }
289
290 /*! Intersects K rays with one of M triangles. */
291 template<typename Epilog>
292 __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0,
293 RayK<K>& ray,
294 const Vec3vf<K>& tri_v0,
295 const Vec3vf<K>& tri_e1,
296 const Vec3vf<K>& tri_e2,
297 const Epilog& epilog) const
298 {
299 const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1);
300 return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog);
301 }
302
303 /*! Intersect k'th ray from ray packet of size K with M triangles. */
304 __forceinline bool intersectEdge(RayK<K>& ray,
305 size_t k,
306 const Vec3vf<M>& tri_v0,
307 const Vec3vf<M>& tri_e1,
308 const Vec3vf<M>& tri_e2,
309 WoopHitM<M>& hit) const
310 {
311 /* calculate denominator */
312 typedef Vec3vf<M> Vec3vfM;
313 const Vec3vf<M> tri_Ng = cross(tri_e2,tri_e1);
314
315 const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
316 const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
317 const Vec3vfM C = Vec3vfM(tri_v0) - O;
318 const Vec3vfM R = cross(C,D);
319 const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
320 const vfloat<M> absDen = abs(den);
321 const vfloat<M> sgnDen = signmsk(den);
322
323 /* perform edge tests */
324 const vfloat<M> U = dot(Vec3vf<M>(tri_e2),R) ^ sgnDen;
325 const vfloat<M> V = dot(Vec3vf<M>(tri_e1),R) ^ sgnDen;
326
327 /* perform backface culling */
328#if defined(EMBREE_BACKFACE_CULLING)
329 vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
330#else
331 vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
332#endif
333 if (likely(none(valid))) return false;
334
335 /* perform depth test */
336 const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen;
337 valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k]));
338 if (likely(none(valid))) return false;
339
340 /* calculate hit information */
341 new (&hit) WoopHitM<M>(valid,U,V,T,absDen,tri_Ng);
342 return true;
343 }
344
345 __forceinline bool intersectEdge(RayK<K>& ray,
346 size_t k,
347 const BBox<vfloat<M>>& time_range,
348 const Vec3vf<M>& tri_v0,
349 const Vec3vf<M>& tri_e1,
350 const Vec3vf<M>& tri_e2,
351 WoopHitM<M>& hit) const
352 {
353 if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit)))
354 {
355 hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]);
356 hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper;
357 return any(hit.valid);
358 }
359 return false;
360 }
361
362 template<typename Epilog>
363 __forceinline bool intersectEdge(RayK<K>& ray,
364 size_t k,
365 const Vec3vf<M>& tri_v0,
366 const Vec3vf<M>& tri_e1,
367 const Vec3vf<M>& tri_e2,
368 const Epilog& epilog) const
369 {
370 WoopHitM<M> hit;
371 if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
372 return false;
373 }
374
375 template<typename Epilog>
376 __forceinline bool intersectEdge(RayK<K>& ray,
377 size_t k,
378 const BBox<vfloat<M>>& time_range,
379 const Vec3vf<M>& tri_v0,
380 const Vec3vf<M>& tri_e1,
381 const Vec3vf<M>& tri_e2,
382 const Epilog& epilog) const
383 {
384 WoopHitM<M> hit;
385 if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit);
386 return false;
387 }
388
389 template<typename Epilog>
390 __forceinline bool intersect(RayK<K>& ray,
391 size_t k,
392 const Vec3vf<M>& v0,
393 const Vec3vf<M>& v1,
394 const Vec3vf<M>& v2,
395 const Epilog& epilog) const
396 {
397 const Vec3vf<M> e1 = v0-v1;
398 const Vec3vf<M> e2 = v2-v0;
399 return intersectEdge(ray,k,v0,e1,e2,epilog);
400 }
401
402 template<typename Epilog>
403 __forceinline bool intersect(RayK<K>& ray,
404 size_t k,
405 const BBox<vfloat<M>>& time_range,
406 const Vec3vf<M>& v0,
407 const Vec3vf<M>& v1,
408 const Vec3vf<M>& v2,
409 const Epilog& epilog) const
410 {
411 const Vec3vf<M> e1 = v0-v1;
412 const Vec3vf<M> e2 = v2-v0;
413 return intersectEdge(ray,k,time_range,v0,e1,e2,epilog);
414 }
415 };
416#endif
417 }
418}
419