1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "quad_intersector_moeller.h"
7
8/*! Modified Pluecker ray/triangle intersector. The test first shifts
9 * the ray origin into the origin of the coordinate system and then
10 * uses Pluecker coordinates for the intersection. Due to the shift,
11 * the Pluecker coordinate calculation simplifies and the tests get
12 * numerically stable. The edge equations are watertight along the
13 * edge for neighboring triangles. */
14
15namespace embree
16{
17 namespace isa
18 {
19 template<int M>
20 struct QuadHitPlueckerM
21 {
22 __forceinline QuadHitPlueckerM() {}
23
24 __forceinline QuadHitPlueckerM(const vbool<M>& valid,
25 const vfloat<M>& U,
26 const vfloat<M>& V,
27 const vfloat<M>& UVW,
28 const vfloat<M>& t,
29 const Vec3vf<M>& Ng,
30 const vbool<M>& flags)
31 : U(U), V(V), UVW(UVW), tri_Ng(Ng), valid(valid), vt(t), flags(flags) {}
32
33 __forceinline void finalize()
34 {
35 const vbool<M> invalid = abs(UVW) < min_rcp_input;
36 const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW));
37 const vfloat<M> u = min(U * rcpUVW,1.0f);
38 const vfloat<M> v = min(V * rcpUVW,1.0f);
39 const vfloat<M> u1 = vfloat<M>(1.0f) - u;
40 const vfloat<M> v1 = vfloat<M>(1.0f) - v;
41#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
42 vu = select(flags,u1,u);
43 vv = select(flags,v1,v);
44 vNg = Vec3vf<M>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
45#else
46 const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
47 vv = select(flags,u1,v);
48 vu = select(flags,v1,u);
49 vNg = Vec3vf<M>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
50#endif
51 }
52
53 __forceinline Vec2f uv(const size_t i)
54 {
55 const float u = vu[i];
56 const float v = vv[i];
57 return Vec2f(u,v);
58 }
59
60 __forceinline float t(const size_t i) { return vt[i]; }
61 __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
62
63 private:
64 vfloat<M> U;
65 vfloat<M> V;
66 vfloat<M> UVW;
67 Vec3vf<M> tri_Ng;
68
69 public:
70 vbool<M> valid;
71 vfloat<M> vu;
72 vfloat<M> vv;
73 vfloat<M> vt;
74 Vec3vf<M> vNg;
75
76 public:
77 const vbool<M> flags;
78 };
79
80 template<int K>
81 struct QuadHitPlueckerK
82 {
83 __forceinline QuadHitPlueckerK(const vfloat<K>& U,
84 const vfloat<K>& V,
85 const vfloat<K>& UVW,
86 const vfloat<K>& t,
87 const Vec3vf<K>& Ng,
88 const vbool<K>& flags)
89 : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng) {}
90
91 __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const
92 {
93 const vbool<K> invalid = abs(UVW) < min_rcp_input;
94 const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW));
95 const vfloat<K> u0 = min(U * rcpUVW,1.0f);
96 const vfloat<K> v0 = min(V * rcpUVW,1.0f);
97 const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
98 const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
99 const vfloat<K> u = select(flags,u1,u0);
100 const vfloat<K> v = select(flags,v1,v0);
101 const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
102 return std::make_tuple(u,v,t,Ng);
103 }
104
105 private:
106 const vfloat<K> U;
107 const vfloat<K> V;
108 const vfloat<K> UVW;
109 const vfloat<K> t;
110 const vbool<K> flags;
111 const Vec3vf<K> tri_Ng;
112 };
113
114 struct PlueckerIntersectorTriangle1
115 {
116 template<int M, typename Epilog>
117 static __forceinline bool intersect(Ray& ray,
118 const Vec3vf<M>& tri_v0,
119 const Vec3vf<M>& tri_v1,
120 const Vec3vf<M>& tri_v2,
121 const vbool<M>& flags,
122 const Epilog& epilog)
123 {
124 /* calculate vertices relative to ray origin */
125 const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org);
126 const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir);
127 const Vec3vf<M> v0 = tri_v0-O;
128 const Vec3vf<M> v1 = tri_v1-O;
129 const Vec3vf<M> v2 = tri_v2-O;
130
131 /* calculate triangle edges */
132 const Vec3vf<M> e0 = v2-v0;
133 const Vec3vf<M> e1 = v0-v1;
134 const Vec3vf<M> e2 = v1-v2;
135
136 /* perform edge tests */
137 const vfloat<M> U = dot(cross(e0,v2+v0),D);
138 const vfloat<M> V = dot(cross(e1,v0+v1),D);
139 const vfloat<M> W = dot(cross(e2,v1+v2),D);
140 const vfloat<M> UVW = U+V+W;
141 const vfloat<M> eps = float(ulp)*abs(UVW);
142#if defined(EMBREE_BACKFACE_CULLING)
143 vbool<M> valid = max(U,V,W) <= eps;
144#else
145 vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
146#endif
147 if (unlikely(none(valid))) return false;
148
149 /* calculate geometry normal and denominator */
150 const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
151 const vfloat<M> den = twice(dot(Ng,D));
152
153 /* perform depth test */
154 const vfloat<M> T = twice(dot(v0,Ng));
155 const vfloat<M> t = rcp(den)*T;
156 valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar);
157 valid &= den != vfloat<M>(zero);
158 if (unlikely(none(valid))) return false;
159
160 /* update hit information */
161 QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
162 return epilog(valid,hit);
163 }
164 };
165
166 /*! Intersects M quads with 1 ray */
167 template<int M, bool filter>
168 struct QuadMIntersector1Pluecker
169 {
170 __forceinline QuadMIntersector1Pluecker() {}
171
172 __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
173
174 __forceinline void intersect(RayHit& ray, IntersectContext* context,
175 const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
176 const vuint<M>& geomID, const vuint<M>& primID) const
177 {
178 Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID);
179 PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog);
180 PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true),epilog);
181 }
182
183 __forceinline bool occluded(Ray& ray, IntersectContext* context,
184 const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
185 const vuint<M>& geomID, const vuint<M>& primID) const
186 {
187 Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID);
188 if (PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog)) return true;
189 if (PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true;
190 return false;
191 }
192 };
193
194#if defined(__AVX__)
195
196 /*! Intersects 4 quads with 1 ray using AVX */
197 template<bool filter>
198 struct QuadMIntersector1Pluecker<4,filter>
199 {
200 __forceinline QuadMIntersector1Pluecker() {}
201
202 __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {}
203
204 template<typename Epilog>
205 __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
206 {
207 const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
208#if !defined(EMBREE_BACKFACE_CULLING)
209 const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
210 const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
211#else
212 const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
213 const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
214#endif
215 const vbool8 flags(0,0,0,0,1,1,1,1);
216 return PlueckerIntersectorTriangle1::intersect<8>(ray,vtx0,vtx1,vtx2,flags,epilog);
217 }
218
219 __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
220 const vuint4& geomID, const vuint4& primID) const
221 {
222 return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
223 }
224
225 __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
226 const vuint4& geomID, const vuint4& primID) const
227 {
228 return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID)));
229 }
230 };
231
232#endif
233
234
235 /* ----------------------------- */
236 /* -- ray packet intersectors -- */
237 /* ----------------------------- */
238
239 struct PlueckerIntersector1KTriangleM
240 {
241 /*! Intersect k'th ray from ray packet of size K with M triangles. */
242 template<int M, int K, typename Epilog>
243 static __forceinline bool intersect1(RayK<K>& ray,
244 size_t k,
245 const Vec3vf<M>& tri_v0,
246 const Vec3vf<M>& tri_v1,
247 const Vec3vf<M>& tri_v2,
248 const vbool<M>& flags,
249 const Epilog& epilog)
250 {
251 /* calculate vertices relative to ray origin */
252 const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k);
253 const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k);
254 const Vec3vf<M> v0 = tri_v0-O;
255 const Vec3vf<M> v1 = tri_v1-O;
256 const Vec3vf<M> v2 = tri_v2-O;
257
258 /* calculate triangle edges */
259 const Vec3vf<M> e0 = v2-v0;
260 const Vec3vf<M> e1 = v0-v1;
261 const Vec3vf<M> e2 = v1-v2;
262
263 /* perform edge tests */
264 const vfloat<M> U = dot(cross(e0,v2+v0),D);
265 const vfloat<M> V = dot(cross(e1,v0+v1),D);
266 const vfloat<M> W = dot(cross(e2,v1+v2),D);
267
268 const vfloat<M> UVW = U+V+W;
269 const vfloat<M> eps = float(ulp)*abs(UVW);
270#if defined(EMBREE_BACKFACE_CULLING)
271 vbool<M> valid = max(U,V,W) <= eps;
272#else
273 vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
274#endif
275 if (unlikely(none(valid))) return false;
276
277 /* calculate geometry normal and denominator */
278 const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2);
279 const vfloat<M> den = twice(dot(Ng,D));
280
281 /* perform depth test */
282 const vfloat<M> T = twice(dot(v0,Ng));
283 const vfloat<M> t = rcp(den)*T;
284 valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]);
285 if (unlikely(none(valid))) return false;
286
287 /* avoid division by 0 */
288 valid &= den != vfloat<M>(zero);
289 if (unlikely(none(valid))) return false;
290
291 /* update hit information */
292 QuadHitPlueckerM<M> hit(valid,U,V,UVW,t,Ng,flags);
293 return epilog(valid,hit);
294 }
295 };
296
297 template<int M, int K, bool filter>
298 struct QuadMIntersectorKPlueckerBase
299 {
300 __forceinline QuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {}
301
302 /*! Intersects K rays with one of M triangles. */
303 template<typename Epilog>
304 __forceinline vbool<K> intersectK(const vbool<K>& valid0,
305 RayK<K>& ray,
306 const Vec3vf<K>& tri_v0,
307 const Vec3vf<K>& tri_v1,
308 const Vec3vf<K>& tri_v2,
309 const vbool<K>& flags,
310 const Epilog& epilog) const
311 {
312 /* calculate vertices relative to ray origin */
313 vbool<K> valid = valid0;
314 const Vec3vf<K> O = ray.org;
315 const Vec3vf<K> D = ray.dir;
316 const Vec3vf<K> v0 = tri_v0-O;
317 const Vec3vf<K> v1 = tri_v1-O;
318 const Vec3vf<K> v2 = tri_v2-O;
319
320 /* calculate triangle edges */
321 const Vec3vf<K> e0 = v2-v0;
322 const Vec3vf<K> e1 = v0-v1;
323 const Vec3vf<K> e2 = v1-v2;
324
325 /* perform edge tests */
326 const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D);
327 const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D);
328 const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D);
329 const vfloat<K> UVW = U+V+W;
330 const vfloat<K> eps = float(ulp)*abs(UVW);
331#if defined(EMBREE_BACKFACE_CULLING)
332 valid &= max(U,V,W) <= eps;
333#else
334 valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps);
335#endif
336 if (unlikely(none(valid))) return false;
337
338 /* calculate geometry normal and denominator */
339 const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2);
340 const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D));
341
342 /* perform depth test */
343 const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng)));
344 const vfloat<K> t = rcp(den)*T;
345 valid &= ray.tnear() <= t & t <= ray.tfar;
346 valid &= den != vfloat<K>(zero);
347 if (unlikely(none(valid))) return false;
348
349 /* calculate hit information */
350 QuadHitPlueckerK<K> hit(U,V,UVW,t,Ng,flags);
351 return epilog(valid,hit);
352 }
353
354 /*! Intersects K rays with one of M quads. */
355 template<typename Epilog>
356 __forceinline bool intersectK(const vbool<K>& valid0,
357 RayK<K>& ray,
358 const Vec3vf<K>& v0,
359 const Vec3vf<K>& v1,
360 const Vec3vf<K>& v2,
361 const Vec3vf<K>& v3,
362 const Epilog& epilog) const
363 {
364 intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
365 if (none(valid0)) return true;
366 intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
367 return none(valid0);
368 }
369 };
370
371 template<int M, int K, bool filter>
372 struct QuadMIntersectorKPluecker : public QuadMIntersectorKPlueckerBase<M,K,filter>
373 {
374 __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
375 : QuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {}
376
377 __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
378 const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
379 const vuint<M>& geomID, const vuint<M>& primID) const
380 {
381 Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
382 PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog);
383 PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
384 }
385
386 __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
387 const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3,
388 const vuint<M>& geomID, const vuint<M>& primID) const
389 {
390 Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID);
391 if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
392 if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
393 return false;
394 }
395 };
396
397#if defined(__AVX__)
398
399 /*! Intersects 4 quads with 1 ray using AVX */
400 template<int K, bool filter>
401 struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter>
402 {
403 __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray)
404 : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {}
405
406 template<typename Epilog>
407 __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
408 {
409 const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
410 const vbool8 flags(0,0,0,0,1,1,1,1);
411#if !defined(EMBREE_BACKFACE_CULLING)
412 const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
413 const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
414#else
415 const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
416 const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
417#endif
418 return PlueckerIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog);
419 }
420
421 __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context,
422 const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
423 const vuint4& geomID, const vuint4& primID) const
424 {
425 return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
426 }
427
428 __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
429 const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
430 const vuint4& geomID, const vuint4& primID) const
431 {
432 return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID)));
433 }
434 };
435
436#endif
437 }
438}
439