| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "patch.h" |
| 7 | #include "catmullclark_patch.h" |
| 8 | #include "bspline_patch.h" |
| 9 | #include "gregory_patch.h" |
| 10 | #include "tessellation.h" |
| 11 | |
| 12 | namespace embree |
| 13 | { |
| 14 | namespace isa |
| 15 | { |
| 16 | struct FeatureAdaptiveEvalGrid |
| 17 | { |
| 18 | typedef CatmullClark1Ring3fa CatmullClarkRing; |
| 19 | typedef CatmullClarkPatch3fa CatmullClarkPatch; |
| 20 | typedef BilinearPatch3fa BilinearPatch; |
| 21 | typedef BSplinePatch3fa BSplinePatch; |
| 22 | typedef BezierPatch3fa BezierPatch; |
| 23 | typedef GregoryPatch3fa GregoryPatch; |
| 24 | |
| 25 | private: |
| 26 | const unsigned x0,x1; |
| 27 | const unsigned y0,y1; |
| 28 | const unsigned swidth,sheight; |
| 29 | const float rcp_swidth, rcp_sheight; |
| 30 | float* const Px; |
| 31 | float* const Py; |
| 32 | float* const Pz; |
| 33 | float* const U; |
| 34 | float* const V; |
| 35 | float* const Nx; |
| 36 | float* const Ny; |
| 37 | float* const Nz; |
| 38 | const unsigned dwidth; |
| 39 | //const unsigned dheight; |
| 40 | unsigned count; |
| 41 | |
| 42 | |
| 43 | public: |
| 44 | FeatureAdaptiveEvalGrid (const GeneralCatmullClarkPatch3fa& patch, unsigned subPatch, |
| 45 | const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, |
| 46 | float* Px, float* Py, float* Pz, float* U, float* V, |
| 47 | float* Nx, float* Ny, float* Nz, |
| 48 | const unsigned dwidth, const unsigned dheight) |
| 49 | : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)), |
| 50 | Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0) |
| 51 | { |
| 52 | assert(swidth < (2<<20) && sheight < (2<<20)); |
| 53 | const BBox2f srange(Vec2f(0.0f,0.0f),Vec2f(float(swidth-1),float(sheight-1))); |
| 54 | const BBox2f erange(Vec2f((float)x0,(float)y0),Vec2f((float)x1,(float)y1)); |
| 55 | |
| 56 | /* convert into standard quad patch if possible */ |
| 57 | if (likely(patch.isQuadPatch())) |
| 58 | { |
| 59 | CatmullClarkPatch3fa qpatch; patch.init(qpatch); |
| 60 | eval(qpatch, srange, erange, 0); |
| 61 | assert(count == (x1-x0+1)*(y1-y0+1)); |
| 62 | return; |
| 63 | } |
| 64 | |
| 65 | /* subdivide patch */ |
| 66 | unsigned N; |
| 67 | array_t<CatmullClarkPatch3fa,GeneralCatmullClarkPatch3fa::SIZE> patches; |
| 68 | patch.subdivide(patches,N); |
| 69 | |
| 70 | if (N == 4) |
| 71 | { |
| 72 | const Vec2f c = srange.center(); |
| 73 | const BBox2f srange0(srange.lower,c); |
| 74 | const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y)); |
| 75 | const BBox2f srange2(c,srange.upper); |
| 76 | const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y)); |
| 77 | |
| 78 | #if PATCH_USE_GREGORY == 2 |
| 79 | BezierCurve3fa borders[GeneralCatmullClarkPatch3fa::SIZE]; patch.getLimitBorder(borders); |
| 80 | BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r); |
| 81 | BezierCurve3fa border1l,border1r; borders[1].subdivide(border1l,border1r); |
| 82 | BezierCurve3fa border2l,border2r; borders[2].subdivide(border2l,border2r); |
| 83 | BezierCurve3fa border3l,border3r; borders[3].subdivide(border3l,border3r); |
| 84 | GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches); |
| 85 | eval(patches[0],srange0,intersect(srange0,erange),1,&border0l,nullptr,nullptr,&border3r); |
| 86 | eval(patches[1],srange1,intersect(srange1,erange),1,&border0r,&border1l,nullptr,nullptr); |
| 87 | eval(patches[2],srange2,intersect(srange2,erange),1,nullptr,&border1r,&border2l,nullptr); |
| 88 | eval(patches[3],srange3,intersect(srange3,erange),1,nullptr,nullptr,&border2r,&border3l); |
| 89 | #else |
| 90 | GeneralCatmullClarkPatch3fa::fix_quad_ring_order(patches); |
| 91 | eval(patches[0],srange0,intersect(srange0,erange),1); |
| 92 | eval(patches[1],srange1,intersect(srange1,erange),1); |
| 93 | eval(patches[2],srange2,intersect(srange2,erange),1); |
| 94 | eval(patches[3],srange3,intersect(srange3,erange),1); |
| 95 | #endif |
| 96 | } |
| 97 | else |
| 98 | { |
| 99 | assert(subPatch < N); |
| 100 | |
| 101 | #if PATCH_USE_GREGORY == 2 |
| 102 | BezierCurve3fa borders[2]; patch.getLimitBorder(borders,subPatch); |
| 103 | BezierCurve3fa border0l,border0r; borders[0].subdivide(border0l,border0r); |
| 104 | BezierCurve3fa border2l,border2r; borders[1].subdivide(border2l,border2r); |
| 105 | eval(patches[subPatch], srange, erange, 1, &border0l, nullptr, nullptr, &border2r); |
| 106 | #else |
| 107 | eval(patches[subPatch], srange, erange, 1); |
| 108 | #endif |
| 109 | |
| 110 | } |
| 111 | assert(count == (x1-x0+1)*(y1-y0+1)); |
| 112 | } |
| 113 | |
| 114 | FeatureAdaptiveEvalGrid (const CatmullClarkPatch3fa& patch, |
| 115 | const BBox2f& srange, const BBox2f& erange, const unsigned depth, |
| 116 | const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, |
| 117 | float* Px, float* Py, float* Pz, float* U, float* V, |
| 118 | float* Nx, float* Ny, float* Nz, |
| 119 | const unsigned dwidth, const unsigned dheight) |
| 120 | : x0(x0), x1(x1), y0(y0), y1(y1), swidth(swidth), sheight(sheight), rcp_swidth(1.0f/(swidth-1.0f)), rcp_sheight(1.0f/(sheight-1.0f)), |
| 121 | Px(Px), Py(Py), Pz(Pz), U(U), V(V), Nx(Nx), Ny(Ny), Nz(Nz), dwidth(dwidth), /*dheight(dheight),*/ count(0) |
| 122 | { |
| 123 | eval(patch,srange,erange,depth); |
| 124 | } |
| 125 | |
| 126 | template<typename Patch> |
| 127 | void evalLocalGrid(const Patch& patch, const BBox2f& srange, const int lx0, const int lx1, const int ly0, const int ly1) |
| 128 | { |
| 129 | const float scale_x = rcp(srange.upper.x-srange.lower.x); |
| 130 | const float scale_y = rcp(srange.upper.y-srange.lower.y); |
| 131 | count += (lx1-lx0)*(ly1-ly0); |
| 132 | |
| 133 | #if 0 |
| 134 | for (unsigned iy=ly0; iy<ly1; iy++) { |
| 135 | for (unsigned ix=lx0; ix<lx1; ix++) { |
| 136 | const float lu = select(ix == swidth -1, float(1.0f), (float(ix)-srange.lower.x)*scale_x); |
| 137 | const float lv = select(iy == sheight-1, float(1.0f), (float(iy)-srange.lower.y)*scale_y); |
| 138 | const Vec3fa p = patch.eval(lu,lv); |
| 139 | const float u = float(ix)*rcp_swidth; |
| 140 | const float v = float(iy)*rcp_sheight; |
| 141 | const int ofs = (iy-y0)*dwidth+(ix-x0); |
| 142 | Px[ofs] = p.x; |
| 143 | Py[ofs] = p.y; |
| 144 | Pz[ofs] = p.z; |
| 145 | U[ofs] = u; |
| 146 | V[ofs] = v; |
| 147 | } |
| 148 | } |
| 149 | #else |
| 150 | foreach2(lx0,lx1,ly0,ly1,[&](const vboolx& valid, const vintx& ix, const vintx& iy) { |
| 151 | const vfloatx lu = select(ix == swidth -1, vfloatx(1.0f), (vfloatx(ix)-srange.lower.x)*scale_x); |
| 152 | const vfloatx lv = select(iy == sheight-1, vfloatx(1.0f), (vfloatx(iy)-srange.lower.y)*scale_y); |
| 153 | const Vec3vfx p = patch.eval(lu,lv); |
| 154 | Vec3vfx n = zero; |
| 155 | if (unlikely(Nx != nullptr)) n = normalize_safe(patch.normal(lu,lv)); |
| 156 | const vfloatx u = vfloatx(ix)*rcp_swidth; |
| 157 | const vfloatx v = vfloatx(iy)*rcp_sheight; |
| 158 | const vintx ofs = (iy-y0)*dwidth+(ix-x0); |
| 159 | if (likely(all(valid)) && all(iy==iy[0])) { |
| 160 | const unsigned ofs2 = ofs[0]; |
| 161 | vfloatx::storeu(Px+ofs2,p.x); |
| 162 | vfloatx::storeu(Py+ofs2,p.y); |
| 163 | vfloatx::storeu(Pz+ofs2,p.z); |
| 164 | vfloatx::storeu(U+ofs2,u); |
| 165 | vfloatx::storeu(V+ofs2,v); |
| 166 | if (unlikely(Nx != nullptr)) { |
| 167 | vfloatx::storeu(Nx+ofs2,n.x); |
| 168 | vfloatx::storeu(Ny+ofs2,n.y); |
| 169 | vfloatx::storeu(Nz+ofs2,n.z); |
| 170 | } |
| 171 | } else { |
| 172 | foreach_unique_index(valid,iy,[&](const vboolx& valid, const int iy0, const int j) { |
| 173 | const unsigned ofs2 = ofs[j]-j; |
| 174 | vfloatx::storeu(valid,Px+ofs2,p.x); |
| 175 | vfloatx::storeu(valid,Py+ofs2,p.y); |
| 176 | vfloatx::storeu(valid,Pz+ofs2,p.z); |
| 177 | vfloatx::storeu(valid,U+ofs2,u); |
| 178 | vfloatx::storeu(valid,V+ofs2,v); |
| 179 | if (unlikely(Nx != nullptr)) { |
| 180 | vfloatx::storeu(valid,Nx+ofs2,n.x); |
| 181 | vfloatx::storeu(valid,Ny+ofs2,n.y); |
| 182 | vfloatx::storeu(valid,Nz+ofs2,n.z); |
| 183 | } |
| 184 | }); |
| 185 | } |
| 186 | }); |
| 187 | #endif |
| 188 | } |
| 189 | |
| 190 | __forceinline bool final(const CatmullClarkPatch3fa& patch, const CatmullClarkRing::Type type, unsigned depth) |
| 191 | { |
| 192 | const unsigned max_eval_depth = (type & CatmullClarkRing::TYPE_CREASES) ? PATCH_MAX_EVAL_DEPTH_CREASE : PATCH_MAX_EVAL_DEPTH_IRREGULAR; |
| 193 | //#if PATCH_MIN_RESOLUTION |
| 194 | // return patch.isFinalResolution(PATCH_MIN_RESOLUTION) || depth>=max_eval_depth; |
| 195 | //#else |
| 196 | return depth>=max_eval_depth; |
| 197 | //#endif |
| 198 | } |
| 199 | |
| 200 | void eval(const CatmullClarkPatch3fa& patch, const BBox2f& srange, const BBox2f& erange, const unsigned depth, |
| 201 | const BezierCurve3fa* border0 = nullptr, const BezierCurve3fa* border1 = nullptr, const BezierCurve3fa* border2 = nullptr, const BezierCurve3fa* border3 = nullptr) |
| 202 | { |
| 203 | if (erange.empty()) |
| 204 | return; |
| 205 | |
| 206 | int lx0 = (int) ceilf(erange.lower.x); |
| 207 | int lx1 = (int) ceilf(erange.upper.x) + (erange.upper.x == x1 && (srange.lower.x < erange.upper.x || erange.upper.x == 0)); |
| 208 | int ly0 = (int) ceilf(erange.lower.y); |
| 209 | int ly1 = (int) ceilf(erange.upper.y) + (erange.upper.y == y1 && (srange.lower.y < erange.upper.y || erange.upper.y == 0)); |
| 210 | if (lx0 >= lx1 || ly0 >= ly1) return; |
| 211 | |
| 212 | CatmullClarkPatch::Type ty = patch.type(); |
| 213 | |
| 214 | if (unlikely(final(patch,ty,depth))) |
| 215 | { |
| 216 | if (ty & CatmullClarkRing::TYPE_REGULAR) { |
| 217 | RegularPatch rpatch(patch,border0,border1,border2,border3); |
| 218 | evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1); |
| 219 | return; |
| 220 | } else { |
| 221 | IrregularFillPatch ipatch(patch,border0,border1,border2,border3); |
| 222 | evalLocalGrid(ipatch,srange,lx0,lx1,ly0,ly1); |
| 223 | return; |
| 224 | } |
| 225 | } |
| 226 | else if (ty & CatmullClarkRing::TYPE_REGULAR_CREASES) { |
| 227 | assert(depth > 0); |
| 228 | RegularPatch rpatch(patch,border0,border1,border2,border3); |
| 229 | evalLocalGrid(rpatch,srange,lx0,lx1,ly0,ly1); |
| 230 | return; |
| 231 | } |
| 232 | #if PATCH_USE_GREGORY == 2 |
| 233 | else if (ty & CatmullClarkRing::TYPE_GREGORY_CREASES) { |
| 234 | assert(depth > 0); |
| 235 | GregoryPatch gpatch(patch,border0,border1,border2,border3); |
| 236 | evalLocalGrid(gpatch,srange,lx0,lx1,ly0,ly1); |
| 237 | } |
| 238 | #endif |
| 239 | else |
| 240 | { |
| 241 | array_t<CatmullClarkPatch3fa,4> patches; |
| 242 | patch.subdivide(patches); |
| 243 | |
| 244 | const Vec2f c = srange.center(); |
| 245 | const BBox2f srange0(srange.lower,c); |
| 246 | const BBox2f srange1(Vec2f(c.x,srange.lower.y),Vec2f(srange.upper.x,c.y)); |
| 247 | const BBox2f srange2(c,srange.upper); |
| 248 | const BBox2f srange3(Vec2f(srange.lower.x,c.y),Vec2f(c.x,srange.upper.y)); |
| 249 | |
| 250 | eval(patches[0],srange0,intersect(srange0,erange),depth+1); |
| 251 | eval(patches[1],srange1,intersect(srange1,erange),depth+1); |
| 252 | eval(patches[2],srange2,intersect(srange2,erange),depth+1); |
| 253 | eval(patches[3],srange3,intersect(srange3,erange),depth+1); |
| 254 | } |
| 255 | } |
| 256 | }; |
| 257 | |
| 258 | template<typename Eval, typename Patch> |
| 259 | bool stitch_col(const Patch& patch, int subPatch, |
| 260 | const bool right, const unsigned y0, const unsigned y1, const int fine_y, const int coarse_y, |
| 261 | float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dx0, const unsigned dwidth, const unsigned dheight) |
| 262 | { |
| 263 | assert(coarse_y <= fine_y); |
| 264 | if (likely(fine_y == coarse_y)) |
| 265 | return false; |
| 266 | |
| 267 | const unsigned y0s = stitch(y0,fine_y,coarse_y); |
| 268 | const unsigned y1s = stitch(y1,fine_y,coarse_y); |
| 269 | const unsigned M = y1s-y0s+1 + VSIZEX; |
| 270 | |
| 271 | dynamic_large_stack_array(float,px,M,64*sizeof(float)); |
| 272 | dynamic_large_stack_array(float,py,M,64*sizeof(float)); |
| 273 | dynamic_large_stack_array(float,pz,M,64*sizeof(float)); |
| 274 | dynamic_large_stack_array(float,u,M,64*sizeof(float)); |
| 275 | dynamic_large_stack_array(float,v,M,64*sizeof(float)); |
| 276 | dynamic_large_stack_array(float,nx,M,64*sizeof(float)); |
| 277 | dynamic_large_stack_array(float,ny,M,64*sizeof(float)); |
| 278 | dynamic_large_stack_array(float,nz,M,64*sizeof(float)); |
| 279 | const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz)); |
| 280 | Eval(patch,subPatch, right,right, y0s,y1s, 2,coarse_y+1, px,py,pz,u,v, |
| 281 | has_Nxyz ? (float*)nx : nullptr,has_Nxyz ? (float*)ny : nullptr ,has_Nxyz ? (float*)nz : nullptr, 1,4097); |
| 282 | |
| 283 | for (unsigned y=y0; y<=y1; y++) |
| 284 | { |
| 285 | const unsigned ys = stitch(y,fine_y,coarse_y)-y0s; |
| 286 | Px[(y-y0)*dwidth+dx0] = px[ys]; |
| 287 | Py[(y-y0)*dwidth+dx0] = py[ys]; |
| 288 | Pz[(y-y0)*dwidth+dx0] = pz[ys]; |
| 289 | U [(y-y0)*dwidth+dx0] = u[ys]; |
| 290 | V [(y-y0)*dwidth+dx0] = v[ys]; |
| 291 | if (unlikely(has_Nxyz)) { |
| 292 | Nx[(y-y0)*dwidth+dx0] = nx[ys]; |
| 293 | Ny[(y-y0)*dwidth+dx0] = ny[ys]; |
| 294 | Nz[(y-y0)*dwidth+dx0] = nz[ys]; |
| 295 | } |
| 296 | } |
| 297 | return true; |
| 298 | } |
| 299 | |
| 300 | template<typename Eval, typename Patch> |
| 301 | bool stitch_row(const Patch& patch, int subPatch, |
| 302 | const bool bottom, const unsigned x0, const unsigned x1, const int fine_x, const int coarse_x, |
| 303 | float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dy0, const unsigned dwidth, const unsigned dheight) |
| 304 | { |
| 305 | assert(coarse_x <= fine_x); |
| 306 | if (likely(fine_x == coarse_x)) |
| 307 | return false; |
| 308 | |
| 309 | const unsigned x0s = stitch(x0,fine_x,coarse_x); |
| 310 | const unsigned x1s = stitch(x1,fine_x,coarse_x); |
| 311 | const unsigned M = x1s-x0s+1 + VSIZEX; |
| 312 | |
| 313 | dynamic_large_stack_array(float,px,M,32*sizeof(float)); |
| 314 | dynamic_large_stack_array(float,py,M,32*sizeof(float)); |
| 315 | dynamic_large_stack_array(float,pz,M,32*sizeof(float)); |
| 316 | dynamic_large_stack_array(float,u,M,32*sizeof(float)); |
| 317 | dynamic_large_stack_array(float,v,M,32*sizeof(float)); |
| 318 | dynamic_large_stack_array(float,nx,M,32*sizeof(float)); |
| 319 | dynamic_large_stack_array(float,ny,M,32*sizeof(float)); |
| 320 | dynamic_large_stack_array(float,nz,M,32*sizeof(float)); |
| 321 | const bool has_Nxyz = Nx; assert(!Nx || (Ny && Nz)); |
| 322 | Eval(patch,subPatch, x0s,x1s, bottom,bottom, coarse_x+1,2, px,py,pz,u,v, |
| 323 | has_Nxyz ? (float*)nx :nullptr, has_Nxyz ? (float*)ny : nullptr , has_Nxyz ? (float*)nz : nullptr, 4097,1); |
| 324 | |
| 325 | for (unsigned x=x0; x<=x1; x++) |
| 326 | { |
| 327 | const unsigned xs = stitch(x,fine_x,coarse_x)-x0s; |
| 328 | Px[dy0*dwidth+x-x0] = px[xs]; |
| 329 | Py[dy0*dwidth+x-x0] = py[xs]; |
| 330 | Pz[dy0*dwidth+x-x0] = pz[xs]; |
| 331 | U [dy0*dwidth+x-x0] = u[xs]; |
| 332 | V [dy0*dwidth+x-x0] = v[xs]; |
| 333 | if (unlikely(has_Nxyz)) { |
| 334 | Nx[dy0*dwidth+x-x0] = nx[xs]; |
| 335 | Ny[dy0*dwidth+x-x0] = ny[xs]; |
| 336 | Nz[dy0*dwidth+x-x0] = nz[xs]; |
| 337 | } |
| 338 | } |
| 339 | return true; |
| 340 | } |
| 341 | |
| 342 | template<typename Eval, typename Patch> |
| 343 | void feature_adaptive_eval_grid (const Patch& patch, unsigned subPatch, const float levels[4], |
| 344 | const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, |
| 345 | float* Px, float* Py, float* Pz, float* U, float* V, float* Nx, float* Ny, float* Nz, const unsigned dwidth, const unsigned dheight) |
| 346 | { |
| 347 | bool sl = false, sr = false, st = false, sb = false; |
| 348 | if (levels) { |
| 349 | sl = x0 == 0 && stitch_col<Eval,Patch>(patch,subPatch,0,y0,y1,sheight-1,int(levels[3]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight); |
| 350 | sr = x1 == swidth-1 && stitch_col<Eval,Patch>(patch,subPatch,1,y0,y1,sheight-1,int(levels[1]), Px,Py,Pz,U,V,Nx,Ny,Nz, x1-x0,dwidth,dheight); |
| 351 | st = y0 == 0 && stitch_row<Eval,Patch>(patch,subPatch,0,x0,x1,swidth-1,int(levels[0]), Px,Py,Pz,U,V,Nx,Ny,Nz, 0 ,dwidth,dheight); |
| 352 | sb = y1 == sheight-1 && stitch_row<Eval,Patch>(patch,subPatch,1,x0,x1,swidth-1,int(levels[2]), Px,Py,Pz,U,V,Nx,Ny,Nz, y1-y0,dwidth,dheight); |
| 353 | } |
| 354 | const unsigned ofs = st*dwidth+sl; |
| 355 | Eval(patch,subPatch,x0+sl,x1-sr,y0+st,y1-sb, swidth,sheight, Px+ofs,Py+ofs,Pz+ofs,U+ofs,V+ofs,Nx?Nx+ofs:nullptr,Ny?Ny+ofs:nullptr,Nz?Nz+ofs:nullptr, dwidth,dheight); |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | |