1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../common/ray.h"
7#include "curve_intersector_precalculations.h"
8#include "curve_intersector_sweep.h"
9#include "../subdiv/linear_bezier_patch.h"
10
11#define DBG(x)
12
13namespace embree
14{
15 namespace isa
16 {
17 template<typename Ray, typename Epilog>
18 struct TensorLinearCubicBezierSurfaceIntersector
19 {
20 const LinearSpace3fa& ray_space;
21 Ray& ray;
22 TensorLinearCubicBezierSurface3fa curve3d;
23 TensorLinearCubicBezierSurface2fa curve2d;
24 float eps;
25 const Epilog& epilog;
26 bool isHit;
27
28 __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog)
29 : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false)
30 {
31 const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org);
32 curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R));
33 const BBox2fa b2 = curve2d.bounds();
34 eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper)));
35 }
36
37 __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1)
38 {
39 if (p1 == p0) {
40 if (p0 == 0.0f) return Interval1f(u0,u1);
41 else return Interval1f(empty);
42 }
43 const float t = -p0/(p1-p0);
44 const float tt = lerp(u0,u1,t);
45 return Interval1f(tt);
46 }
47
48 __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u)
49 {
50 if (sign(p0.lower) != sign(p0.upper)) u.extend(u0);
51 if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower));
52 if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper));
53 if (sign(p1.lower) != sign(p1.upper)) u.extend(u1);
54 }
55
56 __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve)
57 {
58 Interval1f u = empty;
59 solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u);
60 solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u);
61 solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u);
62 solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u);
63 solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u);
64 solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u);
65 return intersect(u,Interval1f(0.0f,1.0f));
66 }
67
68 __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve)
69 {
70 Interval1f v = empty;
71 solve_linear(0.0f,1.0f,curve.v0,curve.v1,v);
72 return intersect(v,Interval1f(0.0f,1.0f));
73 }
74
75 __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2)
76 {
77 BBox2fa bounds = curve2.bounds();
78 if (bounds.upper.x < 0.0f) return;
79 if (bounds.upper.y < 0.0f) return;
80 if (bounds.lower.x > 0.0f) return;
81 if (bounds.lower.y > 0.0f) return;
82
83 if (max(cu.size(),cv.size()) < 1E-4f)
84 {
85 const float u = cu.center();
86 const float v = cv.center();
87 TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
88 const float t = curve_z.eval(u,v);
89 if (ray.tnear() <= t && t <= ray.tfar) {
90 const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
91 BezierCurveHit hit(t,u,v,Ng);
92 isHit |= epilog(hit);
93 }
94 return;
95 }
96
97 const Vec2fa dv = curve2.axis_v();
98 const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
99 LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
100 if (!curve0v.hasRoot()) return;
101
102 const Interval1f v = bezier_clipping(curve0v);
103 if (isEmpty(v)) return;
104 TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v);
105 cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
106
107 const Vec2fa du = curve2.axis_u();
108 const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du);
109 CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v();
110 int roots = curve0u.maxRoots();
111 if (roots == 0) return;
112
113 if (roots == 1)
114 {
115 const Interval1f u = bezier_clipping(curve0u);
116 if (isEmpty(u)) return;
117 TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u);
118 cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper));
119 solve_bezier_clipping(cu,cv,curve2b);
120 return;
121 }
122
123 TensorLinearCubicBezierSurface2fa curve2l, curve2r;
124 curve2a.split_u(curve2l,curve2r);
125 solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l);
126 solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r);
127 }
128
129 __forceinline bool solve_bezier_clipping()
130 {
131 solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d);
132 return isHit;
133 }
134
135 __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv)
136 {
137 Vec2fa uv(cu.center(),cv.center());
138 const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y);
139 const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y);
140 const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
141 solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J);
142 }
143
144 __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J)
145 {
146 Vec2fa uv = uv_in;
147
148 for (size_t i=0; i<200; i++)
149 {
150 const Vec2fa f = curve2d.eval(uv.x,uv.y);
151 const Vec2fa duv = rcp_J*f;
152 uv -= duv;
153
154 if (max(abs(f.x),abs(f.y)) < eps)
155 {
156 const float u = uv.x;
157 const float v = uv.y;
158 if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs
159 if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs
160 const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
161 const float t = curve_z.eval(u,v);
162 if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs
163 const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
164 BezierCurveHit hit(t,u,v,Ng);
165 isHit |= epilog(hit);
166 return;
167 }
168 }
169 }
170
171 __forceinline bool clip_v(BBox1f& cu, BBox1f& cv)
172 {
173 const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower);
174 const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv);
175 LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
176 if (!curve0v.hasRoot()) return false;
177 Interval1f v = bezier_clipping(curve0v);
178 if (isEmpty(v)) return false;
179 v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
180 cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
181 return true;
182 }
183
184 __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv)
185 {
186 /* perform bezier clipping in v-direction to get tight v-bounds */
187 TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv);
188 const Vec2fa dv = curve2.axis_v();
189 const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
190 LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
191 if (unlikely(!curve0v.hasRoot())) return true;
192 Interval1f v = bezier_clipping(curve0v);
193 if (unlikely(isEmpty(v))) return true;
194 v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
195 curve2 = curve2.clip_v(v);
196 cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
197
198 /* perform one newton raphson iteration */
199 Vec2fa c(cu.center(),cv.center());
200 Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv);
201 const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
202 const Vec2fa c1 = c - rcp_J*f;
203
204 /* calculate bounds of derivatives */
205 const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds();
206 const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds();
207
208 /* calculate krawczyk test */
209 LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f),
210 Interval1f(0.0f), Interval1f(1.0f));
211
212 LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x),
213 Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y));
214
215 const LinearSpace2<Vec2f> rcp_J2(rcp_J);
216 const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2);
217
218 const Vec2<Interval1f> x(cu,cv);
219 const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c)));
220
221 /* test if there is no solution */
222 const Vec2<Interval1f> KK = intersect(K,x);
223 if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true;
224
225 /* exit if convergence cannot get proven, but terminate if we are very small */
226 if (unlikely(!subset(K,x) && !very_small)) return false;
227
228 /* solve using newton raphson iteration of convergence is guaranteed */
229 solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J);
230 return true;
231 }
232
233 __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv)
234 {
235 if (!clip_v(cu,cv)) return;
236 return solve_newton_raphson(cu,cv);
237 }
238
239 __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv)
240 {
241 unsigned int sptr = 0;
242 const unsigned int stack_size = 4;
243 unsigned int mask_stack[stack_size];
244 BBox1f cu_stack[stack_size];
245 BBox1f cv_stack[stack_size];
246 goto entry;
247
248 /* terminate if stack is empty */
249 while (sptr)
250 {
251 /* pop from stack */
252 {
253 sptr--;
254 size_t mask = mask_stack[sptr];
255 cu = cu_stack[sptr];
256 cv = cv_stack[sptr];
257 const size_t i = bscf(mask);
258 mask_stack[sptr] = mask;
259 if (mask) sptr++; // there are still items on the stack
260
261 /* process next element recurse into each hit curve segment */
262 const float u0 = float(i+0)*(1.0f/(VSIZEX-1));
263 const float u1 = float(i+1)*(1.0f/(VSIZEX-1));
264 const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1));
265 cu = cui;
266 }
267
268#if 0
269 solve_newton_raphson_no_recursion(cu,cv);
270 continue;
271
272#else
273 /* we assume convergence for small u ranges and verify using krawczyk */
274 if (cu.size() < 1.0f/6.0f) {
275 const bool very_small = cu.size() < 0.001f || sptr >= stack_size;
276 if (solve_krawczyk(very_small,cu,cv)) {
277 continue;
278 }
279 }
280#endif
281
282 entry:
283
284 /* split the curve into VSIZEX-1 segments in u-direction */
285 vboolx valid = true;
286 TensorLinearCubicBezierSurface<Vec2vfx> subcurves = curve2d.clip_v(cv).vsplit_u(valid,cu);
287
288 /* slabs test in u-direction */
289 Vec2vfx ndv = cross(subcurves.axis_v());
290 BBox<vfloatx> boundsv = subcurves.vxfm(ndv).bounds();
291 valid &= boundsv.lower <= eps;
292 valid &= boundsv.upper >= -eps;
293 if (none(valid)) continue;
294
295 /* slabs test in v-direction */
296 Vec2vfx ndu = cross(subcurves.axis_u());
297 BBox<vfloatx> boundsu = subcurves.vxfm(ndu).bounds();
298 valid &= boundsu.lower <= eps;
299 valid &= boundsu.upper >= -eps;
300 if (none(valid)) continue;
301
302 /* push valid segments to stack */
303 assert(sptr < stack_size);
304 mask_stack [sptr] = movemask(valid);
305 cu_stack [sptr] = cu;
306 cv_stack [sptr] = cv;
307 sptr++;
308 }
309 }
310
311 __forceinline bool solve_newton_raphson_main()
312 {
313 BBox1f vu(0.0f,1.0f);
314 BBox1f vv(0.0f,1.0f);
315 solve_newton_raphson_recursion(vu,vv);
316 return isHit;
317 }
318 };
319
320
321 template<template<typename Ty> class SourceCurve>
322 struct OrientedCurve1Intersector1
323 {
324 //template<typename Ty> using Curve = SourceCurve<Ty>;
325 typedef SourceCurve<Vec3ff> SourceCurve3ff;
326 typedef SourceCurve<Vec3fa> SourceCurve3fa;
327
328 __forceinline OrientedCurve1Intersector1() {}
329
330 __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {}
331
332 template<typename Epilog>
333 __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
334 IntersectContext* context,
335 const CurveGeometry* geom, const unsigned int primID,
336 const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
337 const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
338 const Epilog& epilog) const
339 {
340 STAT3(normal.trav_prims,1,1,1);
341
342 SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
343 SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
344 ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
345 TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
346 //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
347 return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
348 }
349
350 template<typename Epilog>
351 __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
352 IntersectContext* context,
353 const CurveGeometry* geom, const unsigned int primID,
354 const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const
355 {
356 STAT3(normal.trav_prims,1,1,1);
357 //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
358 return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
359 }
360 };
361
362 template<template<typename Ty> class SourceCurve, int K>
363 struct OrientedCurve1IntersectorK
364 {
365 //template<typename Ty> using Curve = SourceCurve<Ty>;
366 typedef SourceCurve<Vec3ff> SourceCurve3ff;
367 typedef SourceCurve<Vec3fa> SourceCurve3fa;
368
369 struct Ray1
370 {
371 __forceinline Ray1(RayK<K>& ray, size_t k)
372 : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
373
374 Vec3fa org;
375 Vec3fa dir;
376 float _tnear;
377 float& tfar;
378
379 __forceinline float& tnear() { return _tnear; }
380 //__forceinline float& tfar() { return _tfar; }
381 __forceinline const float& tnear() const { return _tnear; }
382 //__forceinline const float& tfar() const { return _tfar; }
383 };
384
385 template<typename Epilog>
386 __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
387 IntersectContext* context,
388 const CurveGeometry* geom, const unsigned int primID,
389 const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
390 const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
391 const Epilog& epilog)
392 {
393 STAT3(normal.trav_prims,1,1,1);
394 Ray1 ray(vray,k);
395 SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
396 SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
397 ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
398 TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
399 //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
400 return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
401 }
402
403 template<typename Epilog>
404 __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
405 IntersectContext* context,
406 const CurveGeometry* geom, const unsigned int primID,
407 const TensorLinearCubicBezierSurface3fa& curve,
408 const Epilog& epilog)
409 {
410 STAT3(normal.trav_prims,1,1,1);
411 Ray1 ray(vray,k);
412 //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
413 return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
414 }
415 };
416 }
417}
418