| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../common/ray.h" |
| 7 | #include "cylinder.h" |
| 8 | #include "plane.h" |
| 9 | #include "line_intersector.h" |
| 10 | #include "curve_intersector_precalculations.h" |
| 11 | |
| 12 | namespace embree |
| 13 | { |
| 14 | namespace isa |
| 15 | { |
| 16 | static const size_t numJacobianIterations = 5; |
| 17 | #if defined(__AVX__) |
| 18 | static const size_t numBezierSubdivisions = 2; |
| 19 | #else |
| 20 | static const size_t numBezierSubdivisions = 3; |
| 21 | #endif |
| 22 | |
| 23 | struct BezierCurveHit |
| 24 | { |
| 25 | __forceinline BezierCurveHit() {} |
| 26 | |
| 27 | __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng) |
| 28 | : t(t), u(u), v(0.0f), Ng(Ng) {} |
| 29 | |
| 30 | __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng) |
| 31 | : t(t), u(u), v(v), Ng(Ng) {} |
| 32 | |
| 33 | __forceinline void finalize() {} |
| 34 | |
| 35 | public: |
| 36 | float t; |
| 37 | float u; |
| 38 | float v; |
| 39 | Vec3fa Ng; |
| 40 | }; |
| 41 | |
| 42 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
| 43 | __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i, |
| 44 | const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, |
| 45 | const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du, |
| 46 | const Epilog& epilog) |
| 47 | { |
| 48 | if (tp.lower[i]+dt > ray.tfar) return false; |
| 49 | Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]); |
| 50 | if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]); |
| 51 | if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]); |
| 52 | BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o); |
| 53 | return epilog(hit); |
| 54 | } |
| 55 | |
| 56 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
| 57 | __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog) |
| 58 | { |
| 59 | const Vec3fa org = zero; |
| 60 | const Vec3fa dir = ray.dir; |
| 61 | const float length_ray_dir = length(dir); |
| 62 | |
| 63 | /* error of curve evaluations is proportional to largest coordinate */ |
| 64 | const BBox3ff box = curve.bounds(); |
| 65 | const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper))); |
| 66 | |
| 67 | for (size_t i=0; i<numJacobianIterations; i++) |
| 68 | { |
| 69 | const Vec3fa Q = madd(Vec3fa(t),dir,org); |
| 70 | //const Vec3fa dQdu = zero; |
| 71 | const Vec3fa dQdt = dir; |
| 72 | const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here |
| 73 | |
| 74 | Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu); |
| 75 | //const Vec3fa dPdt = zero; |
| 76 | |
| 77 | const Vec3fa R = Q-P; |
| 78 | const float len_R = length(R); //reduce_max(abs(R)); |
| 79 | const float R_err = max(Q_err,P_err); |
| 80 | const Vec3fa dRdu = /*dQdu*/-dPdu; |
| 81 | const Vec3fa dRdt = dQdt;//-dPdt; |
| 82 | |
| 83 | const Vec3fa T = normalize(dPdu); |
| 84 | const Vec3fa dTdu = dnormalize(dPdu,ddPdu); |
| 85 | //const Vec3fa dTdt = zero; |
| 86 | const float cos_err = P_err/length(dPdu); |
| 87 | |
| 88 | /* Error estimate for dot(R,T): |
| 89 | |
| 90 | dot(R,T) = cos(R,T) |R| |T| |
| 91 | = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err) |
| 92 | = cos(R,T)*|R|*|T| |
| 93 | +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err) |
| 94 | +- cos_error*(|R| + |T|) |
| 95 | +- lower order terms |
| 96 | with cos(R,T) being in [0,1] and |T| = 1 we get: |
| 97 | dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1) |
| 98 | */ |
| 99 | |
| 100 | const float f = dot(R,T); |
| 101 | const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R); |
| 102 | const float dfdu = dot(dRdu,T) + dot(R,dTdu); |
| 103 | const float dfdt = dot(dRdt,T);// + dot(R,dTdt); |
| 104 | |
| 105 | const float K = dot(R,R)-sqr(f); |
| 106 | const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu); |
| 107 | const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt); |
| 108 | const float rsqrt_K = rsqrt(K); |
| 109 | |
| 110 | const float g = sqrt(K)-P.w; |
| 111 | const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w; |
| 112 | const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w; |
| 113 | const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w; |
| 114 | |
| 115 | const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt); |
| 116 | const Vec2f dut = rcp(J)*Vec2f(f,g); |
| 117 | const Vec2f ut = Vec2f(u,t) - dut; |
| 118 | u = ut.x; t = ut.y; |
| 119 | |
| 120 | if (abs(f) < f_err && abs(g) < g_err) |
| 121 | { |
| 122 | t+=dt; |
| 123 | if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs |
| 124 | if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs |
| 125 | const Vec3fa R = normalize(Q-P); |
| 126 | const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu); |
| 127 | const Vec3fa V = cross(dPdu,R); |
| 128 | BezierCurveHit hit(t,u,cross(V,U)); |
| 129 | return epilog(hit); |
| 130 | } |
| 131 | } |
| 132 | return false; |
| 133 | } |
| 134 | |
| 135 | template<typename NativeCurve3ff, typename Ray, typename Epilog> |
| 136 | bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, |
| 137 | float u0, float u1, unsigned int depth, const Epilog& epilog) |
| 138 | { |
| 139 | #if defined(__AVX__) |
| 140 | enum { VSIZEX_ = 8 }; |
| 141 | typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues |
| 142 | typedef vint8 vintx; |
| 143 | typedef vfloat8 vfloatx; |
| 144 | #else |
| 145 | enum { VSIZEX_ = 4 }; |
| 146 | typedef vbool4 vboolx; |
| 147 | typedef vint4 vintx; |
| 148 | typedef vfloat4 vfloatx; |
| 149 | #endif |
| 150 | typedef Vec3<vfloatx> Vec3vfx; |
| 151 | typedef Vec4<vfloatx> Vec4vfx; |
| 152 | |
| 153 | unsigned int maxDepth = numBezierSubdivisions; |
| 154 | bool found = false; |
| 155 | const Vec3fa org = zero; |
| 156 | const Vec3fa dir = ray.dir; |
| 157 | |
| 158 | unsigned int sptr = 0; |
| 159 | const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below |
| 160 | struct StackEntry { |
| 161 | vboolx valid; |
| 162 | vfloatx tlower; |
| 163 | float u0; |
| 164 | float u1; |
| 165 | unsigned int depth; |
| 166 | }; |
| 167 | StackEntry stack[stack_size]; |
| 168 | goto entry; |
| 169 | |
| 170 | /* terminate if stack is empty */ |
| 171 | while (sptr) |
| 172 | { |
| 173 | /* pop from stack */ |
| 174 | { |
| 175 | sptr--; |
| 176 | vboolx valid = stack[sptr].valid; |
| 177 | const vfloatx tlower = stack[sptr].tlower; |
| 178 | valid &= tlower+dt <= ray.tfar; |
| 179 | if (none(valid)) continue; |
| 180 | u0 = stack[sptr].u0; |
| 181 | u1 = stack[sptr].u1; |
| 182 | depth = stack[sptr].depth; |
| 183 | const size_t i = select_min(valid,tlower); clear(valid,i); |
| 184 | stack[sptr].valid = valid; |
| 185 | if (any(valid)) sptr++; // there are still items on the stack |
| 186 | |
| 187 | /* process next segment */ |
| 188 | const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); |
| 189 | u0 = vu0[i+0]; |
| 190 | u1 = vu0[i+1]; |
| 191 | } |
| 192 | entry: |
| 193 | |
| 194 | /* subdivide curve */ |
| 195 | const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); |
| 196 | const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); |
| 197 | Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); |
| 198 | const Vec4vfx P3 = shift_right_1(P0); |
| 199 | const Vec4vfx dP3du = shift_right_1(dP0du); |
| 200 | const Vec4vfx P1 = P0 + dP0du; |
| 201 | const Vec4vfx P2 = P3 - dP3du; |
| 202 | |
| 203 | /* calculate bounding cylinders */ |
| 204 | const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0)); |
| 205 | const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0)); |
| 206 | const vfloatx maxr12 = sqrt(max(rr1,rr2)); |
| 207 | const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp); |
| 208 | const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp); |
| 209 | vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12; |
| 210 | vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12; |
| 211 | r_outer = one_plus_ulp*r_outer; |
| 212 | r_inner = max(0.0f,one_minus_ulp*r_inner); |
| 213 | const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer); |
| 214 | const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner); |
| 215 | vboolx valid = true; clear(valid,vfloatx::size-1); |
| 216 | |
| 217 | /* intersect with outer cylinder */ |
| 218 | BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1; |
| 219 | valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1); |
| 220 | if (none(valid)) continue; |
| 221 | |
| 222 | /* intersect with cap-planes */ |
| 223 | BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt); |
| 224 | tp = embree::intersect(tp,tc_outer); |
| 225 | BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir); |
| 226 | tp = embree::intersect(tp,h0); |
| 227 | BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir); |
| 228 | tp = embree::intersect(tp,h1); |
| 229 | valid &= tp.lower <= tp.upper; |
| 230 | if (none(valid)) continue; |
| 231 | |
| 232 | /* clamp and correct u parameter */ |
| 233 | u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f)); |
| 234 | u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f)); |
| 235 | u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size))); |
| 236 | u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size))); |
| 237 | |
| 238 | /* intersect with inner cylinder */ |
| 239 | BBox<vfloatx> tc_inner; |
| 240 | vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero; |
| 241 | const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1); |
| 242 | |
| 243 | /* at the unstable area we subdivide deeper */ |
| 244 | const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f); |
| 245 | const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f); |
| 246 | |
| 247 | /* subtract the inner interval from the current hit interval */ |
| 248 | BBox<vfloatx> tp0, tp1; |
| 249 | subtract(tp,tc_inner,tp0,tp1); |
| 250 | vboolx valid0 = valid & (tp0.lower <= tp0.upper); |
| 251 | vboolx valid1 = valid & (tp1.lower <= tp1.upper); |
| 252 | if (none(valid0 | valid1)) continue; |
| 253 | |
| 254 | /* iterate over all first hits front to back */ |
| 255 | const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth)); |
| 256 | vboolx recursion_valid0 = valid0 & (depth < termDepth0); |
| 257 | valid0 &= depth >= termDepth0; |
| 258 | |
| 259 | while (any(valid0)) |
| 260 | { |
| 261 | const size_t i = select_min(valid0,tp0.lower); clear(valid0,i); |
| 262 | found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog); |
| 263 | //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog); |
| 264 | valid0 &= tp0.lower+dt <= ray.tfar; |
| 265 | } |
| 266 | valid1 &= tp1.lower+dt <= ray.tfar; |
| 267 | |
| 268 | /* iterate over all second hits front to back */ |
| 269 | const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth)); |
| 270 | vboolx recursion_valid1 = valid1 & (depth < termDepth1); |
| 271 | valid1 &= depth >= termDepth1; |
| 272 | while (any(valid1)) |
| 273 | { |
| 274 | const size_t i = select_min(valid1,tp1.lower); clear(valid1,i); |
| 275 | found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog); |
| 276 | //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog); |
| 277 | valid1 &= tp1.lower+dt <= ray.tfar; |
| 278 | } |
| 279 | |
| 280 | /* push valid segments to stack */ |
| 281 | recursion_valid0 &= tp0.lower+dt <= ray.tfar; |
| 282 | recursion_valid1 &= tp1.lower+dt <= ray.tfar; |
| 283 | const vboolx recursion_valid = recursion_valid0 | recursion_valid1; |
| 284 | if (any(recursion_valid)) |
| 285 | { |
| 286 | assert(sptr < stack_size); |
| 287 | stack[sptr].valid = recursion_valid; |
| 288 | stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower); |
| 289 | stack[sptr].u0 = u0; |
| 290 | stack[sptr].u1 = u1; |
| 291 | stack[sptr].depth = depth+1; |
| 292 | sptr++; |
| 293 | } |
| 294 | } |
| 295 | return found; |
| 296 | } |
| 297 | |
| 298 | template<template<typename Ty> class NativeCurve> |
| 299 | struct SweepCurve1Intersector1 |
| 300 | { |
| 301 | typedef NativeCurve<Vec3ff> NativeCurve3ff; |
| 302 | |
| 303 | template<typename Epilog> |
| 304 | __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, |
| 305 | IntersectContext* context, |
| 306 | const CurveGeometry* geom, const unsigned int primID, |
| 307 | const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, |
| 308 | const Epilog& epilog) |
| 309 | { |
| 310 | STAT3(normal.trav_prims,1,1,1); |
| 311 | |
| 312 | /* move ray closer to make intersection stable */ |
| 313 | NativeCurve3ff curve0(v0,v1,v2,v3); |
| 314 | curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); |
| 315 | const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); |
| 316 | const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); |
| 317 | const NativeCurve3ff curve1 = curve0-ref; |
| 318 | return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); |
| 319 | } |
| 320 | }; |
| 321 | |
| 322 | template<template<typename Ty> class NativeCurve, int K> |
| 323 | struct SweepCurve1IntersectorK |
| 324 | { |
| 325 | typedef NativeCurve<Vec3ff> NativeCurve3ff; |
| 326 | |
| 327 | struct Ray1 |
| 328 | { |
| 329 | __forceinline Ray1(RayK<K>& ray, size_t k) |
| 330 | : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} |
| 331 | |
| 332 | Vec3fa org; |
| 333 | Vec3fa dir; |
| 334 | float _tnear; |
| 335 | float& tfar; |
| 336 | |
| 337 | __forceinline float& tnear() { return _tnear; } |
| 338 | //__forceinline float& tfar() { return _tfar; } |
| 339 | __forceinline const float& tnear() const { return _tnear; } |
| 340 | //__forceinline const float& tfar() const { return _tfar; } |
| 341 | |
| 342 | }; |
| 343 | |
| 344 | template<typename Epilog> |
| 345 | __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, |
| 346 | IntersectContext* context, |
| 347 | const CurveGeometry* geom, const unsigned int primID, |
| 348 | const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, |
| 349 | const Epilog& epilog) |
| 350 | { |
| 351 | STAT3(normal.trav_prims,1,1,1); |
| 352 | Ray1 ray(vray,k); |
| 353 | |
| 354 | /* move ray closer to make intersection stable */ |
| 355 | NativeCurve3ff curve0(v0,v1,v2,v3); |
| 356 | curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); |
| 357 | const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); |
| 358 | const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); |
| 359 | const NativeCurve3ff curve1 = curve0-ref; |
| 360 | return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); |
| 361 | } |
| 362 | }; |
| 363 | } |
| 364 | } |
| 365 | |