| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../common/default.h" |
| 7 | //#include "../common/scene_curves.h" |
| 8 | #include "../common/context.h" |
| 9 | |
| 10 | namespace embree |
| 11 | { |
| 12 | class BezierBasis |
| 13 | { |
| 14 | public: |
| 15 | |
| 16 | template<typename T> |
| 17 | static __forceinline Vec4<T> eval(const T& u) |
| 18 | { |
| 19 | const T t1 = u; |
| 20 | const T t0 = 1.0f-t1; |
| 21 | const T B0 = t0 * t0 * t0; |
| 22 | const T B1 = 3.0f * t1 * (t0 * t0); |
| 23 | const T B2 = 3.0f * (t1 * t1) * t0; |
| 24 | const T B3 = t1 * t1 * t1; |
| 25 | return Vec4<T>(B0,B1,B2,B3); |
| 26 | } |
| 27 | |
| 28 | template<typename T> |
| 29 | static __forceinline Vec4<T> derivative(const T& u) |
| 30 | { |
| 31 | const T t1 = u; |
| 32 | const T t0 = 1.0f-t1; |
| 33 | const T B0 = -(t0*t0); |
| 34 | const T B1 = madd(-2.0f,t0*t1,t0*t0); |
| 35 | const T B2 = msub(+2.0f,t0*t1,t1*t1); |
| 36 | const T B3 = +(t1*t1); |
| 37 | return T(3.0f)*Vec4<T>(B0,B1,B2,B3); |
| 38 | } |
| 39 | |
| 40 | template<typename T> |
| 41 | static __forceinline Vec4<T> derivative2(const T& u) |
| 42 | { |
| 43 | const T t1 = u; |
| 44 | const T t0 = 1.0f-t1; |
| 45 | const T B0 = t0; |
| 46 | const T B1 = madd(-2.0f,t0,t1); |
| 47 | const T B2 = madd(-2.0f,t1,t0); |
| 48 | const T B3 = t1; |
| 49 | return T(6.0f)*Vec4<T>(B0,B1,B2,B3); |
| 50 | } |
| 51 | }; |
| 52 | |
| 53 | struct PrecomputedBezierBasis |
| 54 | { |
| 55 | enum { N = 16 }; |
| 56 | public: |
| 57 | PrecomputedBezierBasis() {} |
| 58 | PrecomputedBezierBasis(int shift); |
| 59 | |
| 60 | /* basis for bezier evaluation */ |
| 61 | public: |
| 62 | float c0[N+1][N+1]; |
| 63 | float c1[N+1][N+1]; |
| 64 | float c2[N+1][N+1]; |
| 65 | float c3[N+1][N+1]; |
| 66 | |
| 67 | /* basis for bezier derivative evaluation */ |
| 68 | public: |
| 69 | float d0[N+1][N+1]; |
| 70 | float d1[N+1][N+1]; |
| 71 | float d2[N+1][N+1]; |
| 72 | float d3[N+1][N+1]; |
| 73 | }; |
| 74 | extern PrecomputedBezierBasis bezier_basis0; |
| 75 | extern PrecomputedBezierBasis bezier_basis1; |
| 76 | |
| 77 | |
| 78 | template<typename V> |
| 79 | struct LinearBezierCurve |
| 80 | { |
| 81 | V v0,v1; |
| 82 | |
| 83 | __forceinline LinearBezierCurve () {} |
| 84 | |
| 85 | __forceinline LinearBezierCurve (const LinearBezierCurve& other) |
| 86 | : v0(other.v0), v1(other.v1) {} |
| 87 | |
| 88 | __forceinline LinearBezierCurve& operator= (const LinearBezierCurve& other) { |
| 89 | v0 = other.v0; v1 = other.v1; return *this; |
| 90 | } |
| 91 | |
| 92 | __forceinline LinearBezierCurve (const V& v0, const V& v1) |
| 93 | : v0(v0), v1(v1) {} |
| 94 | |
| 95 | __forceinline V begin() const { return v0; } |
| 96 | __forceinline V end () const { return v1; } |
| 97 | |
| 98 | bool hasRoot() const; |
| 99 | |
| 100 | friend embree_ostream operator<<(embree_ostream cout, const LinearBezierCurve& a) { |
| 101 | return cout << "LinearBezierCurve (" << a.v0 << ", " << a.v1 << ")" ; |
| 102 | } |
| 103 | }; |
| 104 | |
| 105 | template<> __forceinline bool LinearBezierCurve<Interval1f>::hasRoot() const { |
| 106 | return numRoots(v0,v1); |
| 107 | } |
| 108 | |
| 109 | template<typename V> |
| 110 | struct QuadraticBezierCurve |
| 111 | { |
| 112 | V v0,v1,v2; |
| 113 | |
| 114 | __forceinline QuadraticBezierCurve () {} |
| 115 | |
| 116 | __forceinline QuadraticBezierCurve (const QuadraticBezierCurve& other) |
| 117 | : v0(other.v0), v1(other.v1), v2(other.v2) {} |
| 118 | |
| 119 | __forceinline QuadraticBezierCurve& operator= (const QuadraticBezierCurve& other) { |
| 120 | v0 = other.v0; v1 = other.v1; v2 = other.v2; return *this; |
| 121 | } |
| 122 | |
| 123 | __forceinline QuadraticBezierCurve (const V& v0, const V& v1, const V& v2) |
| 124 | : v0(v0), v1(v1), v2(v2) {} |
| 125 | |
| 126 | __forceinline V begin() const { return v0; } |
| 127 | __forceinline V end () const { return v2; } |
| 128 | |
| 129 | __forceinline V interval() const { |
| 130 | return merge(v0,v1,v2); |
| 131 | } |
| 132 | |
| 133 | __forceinline BBox<V> bounds() const { |
| 134 | return merge(BBox<V>(v0),BBox<V>(v1),BBox<V>(v2)); |
| 135 | } |
| 136 | |
| 137 | friend embree_ostream operator<<(embree_ostream cout, const QuadraticBezierCurve& a) { |
| 138 | return cout << "QuadraticBezierCurve ( (" << a.u.lower << ", " << a.u.upper << "), " << a.v0 << ", " << a.v1 << ", " << a.v2 << ")" ; |
| 139 | } |
| 140 | }; |
| 141 | |
| 142 | |
| 143 | typedef QuadraticBezierCurve<float> QuadraticBezierCurve1f; |
| 144 | typedef QuadraticBezierCurve<Vec2fa> QuadraticBezierCurve2fa; |
| 145 | typedef QuadraticBezierCurve<Vec3fa> QuadraticBezierCurve3fa; |
| 146 | |
| 147 | template<typename Vertex> |
| 148 | struct CubicBezierCurve |
| 149 | { |
| 150 | Vertex v0,v1,v2,v3; |
| 151 | |
| 152 | __forceinline CubicBezierCurve() {} |
| 153 | |
| 154 | template<typename T1> |
| 155 | __forceinline CubicBezierCurve (const CubicBezierCurve<T1>& other) |
| 156 | : v0(other.v0), v1(other.v1), v2(other.v2), v3(other.v3) {} |
| 157 | |
| 158 | __forceinline CubicBezierCurve& operator= (const CubicBezierCurve& other) { |
| 159 | v0 = other.v0; v1 = other.v1; v2 = other.v2; v3 = other.v3; return *this; |
| 160 | } |
| 161 | |
| 162 | __forceinline CubicBezierCurve(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3) |
| 163 | : v0(v0), v1(v1), v2(v2), v3(v3) {} |
| 164 | |
| 165 | __forceinline Vertex begin() const { |
| 166 | return v0; |
| 167 | } |
| 168 | |
| 169 | __forceinline Vertex end() const { |
| 170 | return v3; |
| 171 | } |
| 172 | |
| 173 | __forceinline Vertex center() const { |
| 174 | return 0.25f*(v0+v1+v2+v3); |
| 175 | } |
| 176 | |
| 177 | __forceinline Vertex begin_direction() const { |
| 178 | return v1-v0; |
| 179 | } |
| 180 | |
| 181 | __forceinline Vertex end_direction() const { |
| 182 | return v3-v2; |
| 183 | } |
| 184 | |
| 185 | __forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const { |
| 186 | return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx)); |
| 187 | } |
| 188 | |
| 189 | __forceinline CubicBezierCurve<vfloatx> vxfm(const Vertex& dx) const { |
| 190 | return CubicBezierCurve<vfloatx>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx)); |
| 191 | } |
| 192 | |
| 193 | __forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const { |
| 194 | return CubicBezierCurve<float>(dot(v0-p,dx),dot(v1-p,dx),dot(v2-p,dx),dot(v3-p,dx)); |
| 195 | } |
| 196 | |
| 197 | __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space) const |
| 198 | { |
| 199 | const Vec3fa q0 = xfmVector(space,v0); |
| 200 | const Vec3fa q1 = xfmVector(space,v1); |
| 201 | const Vec3fa q2 = xfmVector(space,v2); |
| 202 | const Vec3fa q3 = xfmVector(space,v3); |
| 203 | return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3); |
| 204 | } |
| 205 | |
| 206 | __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const |
| 207 | { |
| 208 | const Vec3fa q0 = xfmVector(space,v0-p); |
| 209 | const Vec3fa q1 = xfmVector(space,v1-p); |
| 210 | const Vec3fa q2 = xfmVector(space,v2-p); |
| 211 | const Vec3fa q3 = xfmVector(space,v3-p); |
| 212 | return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3); |
| 213 | } |
| 214 | |
| 215 | __forceinline CubicBezierCurve<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const |
| 216 | { |
| 217 | const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w); |
| 218 | const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w); |
| 219 | const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w); |
| 220 | const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w); |
| 221 | return CubicBezierCurve<Vec3ff>(q0,q1,q2,q3); |
| 222 | } |
| 223 | |
| 224 | __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const |
| 225 | { |
| 226 | const Vec3fa q0 = xfmVector(space,s*(v0-p)); |
| 227 | const Vec3fa q1 = xfmVector(space,s*(v1-p)); |
| 228 | const Vec3fa q2 = xfmVector(space,s*(v2-p)); |
| 229 | const Vec3fa q3 = xfmVector(space,s*(v3-p)); |
| 230 | return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3); |
| 231 | } |
| 232 | |
| 233 | __forceinline int maxRoots() const; |
| 234 | |
| 235 | __forceinline BBox<Vertex> bounds() const { |
| 236 | return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3)); |
| 237 | } |
| 238 | |
| 239 | __forceinline friend CubicBezierCurve operator +( const CubicBezierCurve& a, const CubicBezierCurve& b ) { |
| 240 | return CubicBezierCurve(a.v0+b.v0,a.v1+b.v1,a.v2+b.v2,a.v3+b.v3); |
| 241 | } |
| 242 | |
| 243 | __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const CubicBezierCurve& b ) { |
| 244 | return CubicBezierCurve(a.v0-b.v0,a.v1-b.v1,a.v2-b.v2,a.v3-b.v3); |
| 245 | } |
| 246 | |
| 247 | __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const Vertex& b ) { |
| 248 | return CubicBezierCurve(a.v0-b,a.v1-b,a.v2-b,a.v3-b); |
| 249 | } |
| 250 | |
| 251 | __forceinline friend CubicBezierCurve operator *( const Vertex& a, const CubicBezierCurve& b ) { |
| 252 | return CubicBezierCurve(a*b.v0,a*b.v1,a*b.v2,a*b.v3); |
| 253 | } |
| 254 | |
| 255 | __forceinline friend CubicBezierCurve cmadd( const Vertex& a, const CubicBezierCurve& b, const CubicBezierCurve& c) { |
| 256 | return CubicBezierCurve(madd(a,b.v0,c.v0),madd(a,b.v1,c.v1),madd(a,b.v2,c.v2),madd(a,b.v3,c.v3)); |
| 257 | } |
| 258 | |
| 259 | __forceinline friend CubicBezierCurve clerp ( const CubicBezierCurve& a, const CubicBezierCurve& b, const Vertex& t ) { |
| 260 | return cmadd((Vertex(1.0f)-t),a,t*b); |
| 261 | } |
| 262 | |
| 263 | __forceinline friend CubicBezierCurve merge ( const CubicBezierCurve& a, const CubicBezierCurve& b ) { |
| 264 | return CubicBezierCurve(merge(a.v0,b.v0),merge(a.v1,b.v1),merge(a.v2,b.v2),merge(a.v3,b.v3)); |
| 265 | } |
| 266 | |
| 267 | __forceinline void split(CubicBezierCurve& left, CubicBezierCurve& right, const float t = 0.5f) const |
| 268 | { |
| 269 | const Vertex p00 = v0; |
| 270 | const Vertex p01 = v1; |
| 271 | const Vertex p02 = v2; |
| 272 | const Vertex p03 = v3; |
| 273 | |
| 274 | const Vertex p10 = lerp(p00,p01,t); |
| 275 | const Vertex p11 = lerp(p01,p02,t); |
| 276 | const Vertex p12 = lerp(p02,p03,t); |
| 277 | const Vertex p20 = lerp(p10,p11,t); |
| 278 | const Vertex p21 = lerp(p11,p12,t); |
| 279 | const Vertex p30 = lerp(p20,p21,t); |
| 280 | |
| 281 | new (&left ) CubicBezierCurve(p00,p10,p20,p30); |
| 282 | new (&right) CubicBezierCurve(p30,p21,p12,p03); |
| 283 | } |
| 284 | |
| 285 | __forceinline CubicBezierCurve<Vec2vfx> split() const |
| 286 | { |
| 287 | const float u0 = 0.0f, u1 = 1.0f; |
| 288 | const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1))); |
| 289 | const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1))); |
| 290 | Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale); |
| 291 | const Vec2vfx P3 = shift_right_1(P0); |
| 292 | const Vec2vfx dP3du = shift_right_1(dP0du); |
| 293 | const Vec2vfx P1 = P0 + dP0du; |
| 294 | const Vec2vfx P2 = P3 - dP3du; |
| 295 | return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3); |
| 296 | } |
| 297 | |
| 298 | __forceinline CubicBezierCurve<Vec2vfx> split(const BBox1f& u) const |
| 299 | { |
| 300 | const float u0 = u.lower, u1 = u.upper; |
| 301 | const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1))); |
| 302 | const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1))); |
| 303 | Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale); |
| 304 | const Vec2vfx P3 = shift_right_1(P0); |
| 305 | const Vec2vfx dP3du = shift_right_1(dP0du); |
| 306 | const Vec2vfx P1 = P0 + dP0du; |
| 307 | const Vec2vfx P2 = P3 - dP3du; |
| 308 | return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3); |
| 309 | } |
| 310 | |
| 311 | __forceinline void eval(float t, Vertex& p, Vertex& dp) const |
| 312 | { |
| 313 | const Vertex p00 = v0; |
| 314 | const Vertex p01 = v1; |
| 315 | const Vertex p02 = v2; |
| 316 | const Vertex p03 = v3; |
| 317 | |
| 318 | const Vertex p10 = lerp(p00,p01,t); |
| 319 | const Vertex p11 = lerp(p01,p02,t); |
| 320 | const Vertex p12 = lerp(p02,p03,t); |
| 321 | const Vertex p20 = lerp(p10,p11,t); |
| 322 | const Vertex p21 = lerp(p11,p12,t); |
| 323 | const Vertex p30 = lerp(p20,p21,t); |
| 324 | |
| 325 | p = p30; |
| 326 | dp = Vertex(3.0f)*(p21-p20); |
| 327 | } |
| 328 | |
| 329 | #if 0 |
| 330 | __forceinline Vertex eval(float t) const |
| 331 | { |
| 332 | const Vertex p00 = v0; |
| 333 | const Vertex p01 = v1; |
| 334 | const Vertex p02 = v2; |
| 335 | const Vertex p03 = v3; |
| 336 | |
| 337 | const Vertex p10 = lerp(p00,p01,t); |
| 338 | const Vertex p11 = lerp(p01,p02,t); |
| 339 | const Vertex p12 = lerp(p02,p03,t); |
| 340 | const Vertex p20 = lerp(p10,p11,t); |
| 341 | const Vertex p21 = lerp(p11,p12,t); |
| 342 | const Vertex p30 = lerp(p20,p21,t); |
| 343 | |
| 344 | return p30; |
| 345 | } |
| 346 | #else |
| 347 | __forceinline Vertex eval(const float t) const |
| 348 | { |
| 349 | const Vec4<float> b = BezierBasis::eval(t); |
| 350 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
| 351 | } |
| 352 | #endif |
| 353 | |
| 354 | __forceinline Vertex eval_dt(float t) const |
| 355 | { |
| 356 | const Vertex p00 = v1-v0; |
| 357 | const Vertex p01 = v2-v1; |
| 358 | const Vertex p02 = v3-v2; |
| 359 | const Vertex p10 = lerp(p00,p01,t); |
| 360 | const Vertex p11 = lerp(p01,p02,t); |
| 361 | const Vertex p20 = lerp(p10,p11,t); |
| 362 | return Vertex(3.0f)*p20; |
| 363 | } |
| 364 | |
| 365 | __forceinline Vertex eval_du(const float t) const |
| 366 | { |
| 367 | const Vec4<float> b = BezierBasis::derivative(t); |
| 368 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
| 369 | } |
| 370 | |
| 371 | __forceinline Vertex eval_dudu(const float t) const |
| 372 | { |
| 373 | const Vec4<float> b = BezierBasis::derivative2(t); |
| 374 | return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3))); |
| 375 | } |
| 376 | |
| 377 | __forceinline void evalN(const vfloatx& t, Vec2vfx& p, Vec2vfx& dp) const |
| 378 | { |
| 379 | const Vec2vfx p00 = v0; |
| 380 | const Vec2vfx p01 = v1; |
| 381 | const Vec2vfx p02 = v2; |
| 382 | const Vec2vfx p03 = v3; |
| 383 | |
| 384 | const Vec2vfx p10 = lerp(p00,p01,t); |
| 385 | const Vec2vfx p11 = lerp(p01,p02,t); |
| 386 | const Vec2vfx p12 = lerp(p02,p03,t); |
| 387 | |
| 388 | const Vec2vfx p20 = lerp(p10,p11,t); |
| 389 | const Vec2vfx p21 = lerp(p11,p12,t); |
| 390 | |
| 391 | const Vec2vfx p30 = lerp(p20,p21,t); |
| 392 | |
| 393 | p = p30; |
| 394 | dp = vfloatx(3.0f)*(p21-p20); |
| 395 | } |
| 396 | |
| 397 | __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const |
| 398 | { |
| 399 | const Vertex p00 = v0; |
| 400 | const Vertex p01 = v1; |
| 401 | const Vertex p02 = v2; |
| 402 | const Vertex p03 = v3; |
| 403 | const Vertex p10 = lerp(p00,p01,t); |
| 404 | const Vertex p11 = lerp(p01,p02,t); |
| 405 | const Vertex p12 = lerp(p02,p03,t); |
| 406 | const Vertex p20 = lerp(p10,p11,t); |
| 407 | const Vertex p21 = lerp(p11,p12,t); |
| 408 | const Vertex p30 = lerp(p20,p21,t); |
| 409 | p = p30; |
| 410 | dp = 3.0f*(p21-p20); |
| 411 | ddp = eval_dudu(t); |
| 412 | } |
| 413 | |
| 414 | __forceinline CubicBezierCurve clip(const Interval1f& u1) const |
| 415 | { |
| 416 | Vertex f0,df0; eval(u1.lower,f0,df0); |
| 417 | Vertex f1,df1; eval(u1.upper,f1,df1); |
| 418 | float s = u1.upper-u1.lower; |
| 419 | return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1); |
| 420 | } |
| 421 | |
| 422 | __forceinline QuadraticBezierCurve<Vertex> derivative() const |
| 423 | { |
| 424 | const Vertex q0 = 3.0f*(v1-v0); |
| 425 | const Vertex q1 = 3.0f*(v2-v1); |
| 426 | const Vertex q2 = 3.0f*(v3-v2); |
| 427 | return QuadraticBezierCurve<Vertex>(q0,q1,q2); |
| 428 | } |
| 429 | |
| 430 | __forceinline BBox<Vertex> derivative_bounds(const Interval1f& u1) const |
| 431 | { |
| 432 | Vertex f0,df0; eval(u1.lower,f0,df0); |
| 433 | Vertex f3,df3; eval(u1.upper,f3,df3); |
| 434 | const float s = u1.upper-u1.lower; |
| 435 | const Vertex f1 = f0+s*(1.0f/3.0f)*df0; |
| 436 | const Vertex f2 = f3-s*(1.0f/3.0f)*df3; |
| 437 | const Vertex q0 = s*df0; |
| 438 | const Vertex q1 = 3.0f*(f2-f1); |
| 439 | const Vertex q2 = s*df3; |
| 440 | return merge(BBox<Vertex>(q0),BBox<Vertex>(q1),BBox<Vertex>(q2)); |
| 441 | } |
| 442 | |
| 443 | template<int M> |
| 444 | __forceinline Vec4vf<M> veval(const vfloat<M>& t) const |
| 445 | { |
| 446 | const Vec4vf<M> b = BezierBasis::eval(t); |
| 447 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
| 448 | } |
| 449 | |
| 450 | template<int M> |
| 451 | __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const |
| 452 | { |
| 453 | const Vec4vf<M> b = BezierBasis::derivative(t); |
| 454 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
| 455 | } |
| 456 | |
| 457 | template<int M> |
| 458 | __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const |
| 459 | { |
| 460 | const Vec4vf<M> b = BezierBasis::derivative2(t); |
| 461 | return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3)))); |
| 462 | } |
| 463 | |
| 464 | template<int M> |
| 465 | __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const |
| 466 | { |
| 467 | const Vec4vf<M> p00 = v0; |
| 468 | const Vec4vf<M> p01 = v1; |
| 469 | const Vec4vf<M> p02 = v2; |
| 470 | const Vec4vf<M> p03 = v3; |
| 471 | |
| 472 | const Vec4vf<M> p10 = lerp(p00,p01,t); |
| 473 | const Vec4vf<M> p11 = lerp(p01,p02,t); |
| 474 | const Vec4vf<M> p12 = lerp(p02,p03,t); |
| 475 | const Vec4vf<M> p20 = lerp(p10,p11,t); |
| 476 | const Vec4vf<M> p21 = lerp(p11,p12,t); |
| 477 | const Vec4vf<M> p30 = lerp(p20,p21,t); |
| 478 | |
| 479 | p = p30; |
| 480 | dp = vfloat<M>(3.0f)*(p21-p20); |
| 481 | } |
| 482 | |
| 483 | template<int M, typename Vec = Vec4vf<M>> |
| 484 | __forceinline Vec eval0(const int ofs, const int size) const |
| 485 | { |
| 486 | assert(size <= PrecomputedBezierBasis::N); |
| 487 | assert(ofs <= size); |
| 488 | return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0), |
| 489 | madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1), |
| 490 | madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2), |
| 491 | vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3)))); |
| 492 | } |
| 493 | |
| 494 | template<int M, typename Vec = Vec4vf<M>> |
| 495 | __forceinline Vec eval1(const int ofs, const int size) const |
| 496 | { |
| 497 | assert(size <= PrecomputedBezierBasis::N); |
| 498 | assert(ofs <= size); |
| 499 | return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0), |
| 500 | madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1), |
| 501 | madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2), |
| 502 | vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3)))); |
| 503 | } |
| 504 | |
| 505 | template<int M, typename Vec = Vec4vf<M>> |
| 506 | __forceinline Vec derivative0(const int ofs, const int size) const |
| 507 | { |
| 508 | assert(size <= PrecomputedBezierBasis::N); |
| 509 | assert(ofs <= size); |
| 510 | return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0), |
| 511 | madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1), |
| 512 | madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2), |
| 513 | vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3)))); |
| 514 | } |
| 515 | |
| 516 | template<int M, typename Vec = Vec4vf<M>> |
| 517 | __forceinline Vec derivative1(const int ofs, const int size) const |
| 518 | { |
| 519 | assert(size <= PrecomputedBezierBasis::N); |
| 520 | assert(ofs <= size); |
| 521 | return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0), |
| 522 | madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1), |
| 523 | madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2), |
| 524 | vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3)))); |
| 525 | } |
| 526 | |
| 527 | /* calculates bounds of bezier curve geometry */ |
| 528 | __forceinline BBox3fa accurateBounds() const |
| 529 | { |
| 530 | const int N = 7; |
| 531 | const float scale = 1.0f/(3.0f*(N-1)); |
| 532 | Vec3vfx pl(pos_inf), pu(neg_inf); |
| 533 | for (int i=0; i<=N; i+=VSIZEX) |
| 534 | { |
| 535 | vintx vi = vintx(i)+vintx(step); |
| 536 | vboolx valid = vi <= vintx(N); |
| 537 | const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N); |
| 538 | const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N); |
| 539 | const Vec3vfx pm = p-Vec3vfx(scale)*select(vi!=vintx(0),dp,Vec3vfx(zero)); |
| 540 | const Vec3vfx pp = p+Vec3vfx(scale)*select(vi!=vintx(N),dp,Vec3vfx(zero)); |
| 541 | pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min |
| 542 | pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min |
| 543 | } |
| 544 | const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z)); |
| 545 | const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z)); |
| 546 | return BBox3fa(lower,upper); |
| 547 | } |
| 548 | |
| 549 | /* calculates bounds of bezier curve geometry */ |
| 550 | __forceinline BBox3fa accurateRoundBounds() const |
| 551 | { |
| 552 | const int N = 7; |
| 553 | const float scale = 1.0f/(3.0f*(N-1)); |
| 554 | Vec4vfx pl(pos_inf), pu(neg_inf); |
| 555 | for (int i=0; i<=N; i+=VSIZEX) |
| 556 | { |
| 557 | vintx vi = vintx(i)+vintx(step); |
| 558 | vboolx valid = vi <= vintx(N); |
| 559 | const Vec4vfx p = eval0<VSIZEX>(i,N); |
| 560 | const Vec4vfx dp = derivative0<VSIZEX>(i,N); |
| 561 | const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero)); |
| 562 | const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero)); |
| 563 | pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min |
| 564 | pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min |
| 565 | } |
| 566 | const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z)); |
| 567 | const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z)); |
| 568 | const float r_min = reduce_min(pl.w); |
| 569 | const float r_max = reduce_max(pu.w); |
| 570 | const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max))); |
| 571 | return enlarge(BBox3fa(lower,upper),upper_r); |
| 572 | } |
| 573 | |
| 574 | /* calculates bounds when tessellated into N line segments */ |
| 575 | __forceinline BBox3fa accurateFlatBounds(int N) const |
| 576 | { |
| 577 | if (likely(N == 4)) |
| 578 | { |
| 579 | const Vec4vf4 pi = eval0<4>(0,4); |
| 580 | const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z)); |
| 581 | const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z)); |
| 582 | const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w))); |
| 583 | return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w)))); |
| 584 | } |
| 585 | else |
| 586 | { |
| 587 | Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f); |
| 588 | for (int i=0; i<N; i+=VSIZEX) |
| 589 | { |
| 590 | vboolx valid = vintx(i)+vintx(step) < vintx(N); |
| 591 | const Vec4vfx pi = eval0<VSIZEX>(i,N); |
| 592 | |
| 593 | pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min |
| 594 | pl.y = select(valid,min(pl.y,pi.y),pl.y); |
| 595 | pl.z = select(valid,min(pl.z,pi.z),pl.z); |
| 596 | |
| 597 | pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min |
| 598 | pu.y = select(valid,max(pu.y,pi.y),pu.y); |
| 599 | pu.z = select(valid,max(pu.z,pi.z),pu.z); |
| 600 | |
| 601 | ru = select(valid,max(ru,abs(pi.w)),ru); |
| 602 | } |
| 603 | const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z)); |
| 604 | const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z)); |
| 605 | const Vec3fa upper_r(reduce_max(ru)); |
| 606 | return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w)))); |
| 607 | } |
| 608 | } |
| 609 | |
| 610 | friend __forceinline embree_ostream operator<<(embree_ostream cout, const CubicBezierCurve& curve) { |
| 611 | return cout << "CubicBezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }" ; |
| 612 | } |
| 613 | }; |
| 614 | |
| 615 | #if defined(__AVX__) |
| 616 | template<> |
| 617 | __forceinline CubicBezierCurve<vfloat4> CubicBezierCurve<vfloat4>::clip(const Interval1f& u1) const |
| 618 | { |
| 619 | const vfloat8 p00 = vfloat8(v0); |
| 620 | const vfloat8 p01 = vfloat8(v1); |
| 621 | const vfloat8 p02 = vfloat8(v2); |
| 622 | const vfloat8 p03 = vfloat8(v3); |
| 623 | |
| 624 | const vfloat8 t(vfloat4(u1.lower),vfloat4(u1.upper)); |
| 625 | const vfloat8 p10 = lerp(p00,p01,t); |
| 626 | const vfloat8 p11 = lerp(p01,p02,t); |
| 627 | const vfloat8 p12 = lerp(p02,p03,t); |
| 628 | const vfloat8 p20 = lerp(p10,p11,t); |
| 629 | const vfloat8 p21 = lerp(p11,p12,t); |
| 630 | const vfloat8 p30 = lerp(p20,p21,t); |
| 631 | |
| 632 | const vfloat8 f01 = p30; |
| 633 | const vfloat8 df01 = vfloat8(3.0f)*(p21-p20); |
| 634 | |
| 635 | const vfloat4 f0 = extract4<0>(f01), f1 = extract4<1>(f01); |
| 636 | const vfloat4 df0 = extract4<0>(df01), df1 = extract4<1>(df01); |
| 637 | const float s = u1.upper-u1.lower; |
| 638 | return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1); |
| 639 | } |
| 640 | #endif |
| 641 | |
| 642 | template<typename Vertex> using BezierCurveT = CubicBezierCurve<Vertex>; |
| 643 | |
| 644 | typedef CubicBezierCurve<float> CubicBezierCurve1f; |
| 645 | typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa; |
| 646 | typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa; |
| 647 | typedef CubicBezierCurve<Vec3fa> BezierCurve3fa; |
| 648 | |
| 649 | template<> __forceinline int CubicBezierCurve<float>::maxRoots() const |
| 650 | { |
| 651 | float eps = 1E-4f; |
| 652 | bool neg0 = v0 <= 0.0f; bool zero0 = fabs(v0) < eps; |
| 653 | bool neg1 = v1 <= 0.0f; bool zero1 = fabs(v1) < eps; |
| 654 | bool neg2 = v2 <= 0.0f; bool zero2 = fabs(v2) < eps; |
| 655 | bool neg3 = v3 <= 0.0f; bool zero3 = fabs(v3) < eps; |
| 656 | return (neg0 != neg1 || zero0) + (neg1 != neg2 || zero1) + (neg2 != neg3 || zero2 || zero3); |
| 657 | } |
| 658 | |
| 659 | template<> __forceinline int CubicBezierCurve<Interval1f>::maxRoots() const { |
| 660 | return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3); |
| 661 | } |
| 662 | |
| 663 | template<typename CurveGeometry> |
| 664 | __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve) |
| 665 | { |
| 666 | return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0), |
| 667 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1), |
| 668 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2), |
| 669 | enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3)); |
| 670 | } |
| 671 | } |
| 672 | |