| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "catmullclark_patch.h" |
| 7 | #include "bezier_patch.h" |
| 8 | #include "bezier_curve.h" |
| 9 | #include "catmullclark_coefficients.h" |
| 10 | |
| 11 | namespace embree |
| 12 | { |
| 13 | template<typename Vertex, typename Vertex_t = Vertex> |
| 14 | class __aligned(64) GregoryPatchT |
| 15 | { |
| 16 | typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch; |
| 17 | typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch; |
| 18 | typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring; |
| 19 | typedef BezierCurveT<Vertex> BezierCurve; |
| 20 | |
| 21 | public: |
| 22 | Vertex v[4][4]; |
| 23 | Vertex f[2][2]; |
| 24 | |
| 25 | __forceinline GregoryPatchT() {} |
| 26 | |
| 27 | __forceinline GregoryPatchT(const CatmullClarkPatch& patch) { |
| 28 | init(patch); |
| 29 | } |
| 30 | |
| 31 | __forceinline GregoryPatchT(const CatmullClarkPatch& patch, |
| 32 | const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) |
| 33 | { |
| 34 | init_crackfix(patch,border0,border1,border2,border3); |
| 35 | } |
| 36 | |
| 37 | __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) { |
| 38 | init(CatmullClarkPatch(edge,vertices,stride)); |
| 39 | } |
| 40 | |
| 41 | __forceinline Vertex& p0() { return v[0][0]; } |
| 42 | __forceinline Vertex& p1() { return v[0][3]; } |
| 43 | __forceinline Vertex& p2() { return v[3][3]; } |
| 44 | __forceinline Vertex& p3() { return v[3][0]; } |
| 45 | |
| 46 | __forceinline Vertex& e0_p() { return v[0][1]; } |
| 47 | __forceinline Vertex& e0_m() { return v[1][0]; } |
| 48 | __forceinline Vertex& e1_p() { return v[1][3]; } |
| 49 | __forceinline Vertex& e1_m() { return v[0][2]; } |
| 50 | __forceinline Vertex& e2_p() { return v[3][2]; } |
| 51 | __forceinline Vertex& e2_m() { return v[2][3]; } |
| 52 | __forceinline Vertex& e3_p() { return v[2][0]; } |
| 53 | __forceinline Vertex& e3_m() { return v[3][1]; } |
| 54 | |
| 55 | __forceinline Vertex& f0_p() { return v[1][1]; } |
| 56 | __forceinline Vertex& f1_p() { return v[1][2]; } |
| 57 | __forceinline Vertex& f2_p() { return v[2][2]; } |
| 58 | __forceinline Vertex& f3_p() { return v[2][1]; } |
| 59 | __forceinline Vertex& f0_m() { return f[0][0]; } |
| 60 | __forceinline Vertex& f1_m() { return f[0][1]; } |
| 61 | __forceinline Vertex& f2_m() { return f[1][1]; } |
| 62 | __forceinline Vertex& f3_m() { return f[1][0]; } |
| 63 | |
| 64 | __forceinline const Vertex& p0() const { return v[0][0]; } |
| 65 | __forceinline const Vertex& p1() const { return v[0][3]; } |
| 66 | __forceinline const Vertex& p2() const { return v[3][3]; } |
| 67 | __forceinline const Vertex& p3() const { return v[3][0]; } |
| 68 | |
| 69 | __forceinline const Vertex& e0_p() const { return v[0][1]; } |
| 70 | __forceinline const Vertex& e0_m() const { return v[1][0]; } |
| 71 | __forceinline const Vertex& e1_p() const { return v[1][3]; } |
| 72 | __forceinline const Vertex& e1_m() const { return v[0][2]; } |
| 73 | __forceinline const Vertex& e2_p() const { return v[3][2]; } |
| 74 | __forceinline const Vertex& e2_m() const { return v[2][3]; } |
| 75 | __forceinline const Vertex& e3_p() const { return v[2][0]; } |
| 76 | __forceinline const Vertex& e3_m() const { return v[3][1]; } |
| 77 | |
| 78 | __forceinline const Vertex& f0_p() const { return v[1][1]; } |
| 79 | __forceinline const Vertex& f1_p() const { return v[1][2]; } |
| 80 | __forceinline const Vertex& f2_p() const { return v[2][2]; } |
| 81 | __forceinline const Vertex& f3_p() const { return v[2][1]; } |
| 82 | __forceinline const Vertex& f0_m() const { return f[0][0]; } |
| 83 | __forceinline const Vertex& f1_m() const { return f[0][1]; } |
| 84 | __forceinline const Vertex& f2_m() const { return f[1][1]; } |
| 85 | __forceinline const Vertex& f3_m() const { return f[1][0]; } |
| 86 | |
| 87 | __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) { |
| 88 | return irreg_patch.ring[index].getLimitVertex(); |
| 89 | } |
| 90 | |
| 91 | __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { |
| 92 | return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx); |
| 93 | } |
| 94 | |
| 95 | __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { |
| 96 | return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx); |
| 97 | } |
| 98 | |
| 99 | __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) |
| 100 | { |
| 101 | CatmullClark1Ring3fa r0,r1,r2; |
| 102 | irreg_patch.ring[index].subdivide(r0); |
| 103 | r0.subdivide(r1); |
| 104 | r1.subdivide(r2); |
| 105 | return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx); |
| 106 | } |
| 107 | |
| 108 | __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) |
| 109 | { |
| 110 | CatmullClark1Ring3fa r0,r1,r2; |
| 111 | irreg_patch.ring[index].subdivide(r0); |
| 112 | r0.subdivide(r1); |
| 113 | r1.subdivide(r2); |
| 114 | return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx); |
| 115 | } |
| 116 | |
| 117 | void initFaceVertex(const CatmullClarkPatch& irreg_patch, |
| 118 | const size_t index, |
| 119 | const Vertex& p_vtx, |
| 120 | const Vertex& e0_p_vtx, |
| 121 | const Vertex& e1_m_vtx, |
| 122 | const unsigned int face_valence_p1, |
| 123 | const Vertex& e0_m_vtx, |
| 124 | const Vertex& e3_p_vtx, |
| 125 | const unsigned int face_valence_p3, |
| 126 | Vertex& f_p_vtx, |
| 127 | Vertex& f_m_vtx) |
| 128 | { |
| 129 | const unsigned int face_valence = irreg_patch.ring[index].face_valence; |
| 130 | const unsigned int edge_valence = irreg_patch.ring[index].edge_valence; |
| 131 | const unsigned int border_index = irreg_patch.ring[index].border_index; |
| 132 | |
| 133 | const Vertex& vtx = irreg_patch.ring[index].vtx; |
| 134 | const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0); |
| 135 | const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0); |
| 136 | const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1); |
| 137 | |
| 138 | Vertex c_i, e_i_p_1; |
| 139 | const bool hasHardEdge0 = |
| 140 | std::isinf(irreg_patch.ring[index].vertex_crease_weight) && |
| 141 | std::isinf(irreg_patch.ring[index].crease_weight[0]); |
| 142 | |
| 143 | if (unlikely((border_index == edge_valence-2) || hasHardEdge0)) |
| 144 | { |
| 145 | /* mirror quad center and edge mid-point */ |
| 146 | c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1); |
| 147 | e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1); |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 ); |
| 152 | e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 ); |
| 153 | } |
| 154 | |
| 155 | Vertex c_i_m_2, e_i_m_2; |
| 156 | const bool hasHardEdge1 = |
| 157 | std::isinf(irreg_patch.ring[index].vertex_crease_weight) && |
| 158 | std::isinf(irreg_patch.ring[index].crease_weight[1]); |
| 159 | |
| 160 | if (unlikely(border_index == 2 || hasHardEdge1)) |
| 161 | { |
| 162 | /* mirror quad center and edge mid-point */ |
| 163 | c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1); |
| 164 | e_i_m_2 = madd(2.0f, vtx - e_i, + e_i); |
| 165 | } |
| 166 | else |
| 167 | { |
| 168 | c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 ); |
| 169 | e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 ); |
| 170 | } |
| 171 | |
| 172 | const float d = 3.0f; |
| 173 | //const float c = cosf(2.0f*M_PI/(float)face_valence); |
| 174 | //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1); |
| 175 | //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3); |
| 176 | |
| 177 | const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence); |
| 178 | const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1); |
| 179 | const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3); |
| 180 | |
| 181 | const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i); |
| 182 | const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2); |
| 183 | |
| 184 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
| 185 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
| 186 | } |
| 187 | |
| 188 | __noinline void init(const CatmullClarkPatch& patch) |
| 189 | { |
| 190 | assert( patch.ring[0].hasValidPositions() ); |
| 191 | assert( patch.ring[1].hasValidPositions() ); |
| 192 | assert( patch.ring[2].hasValidPositions() ); |
| 193 | assert( patch.ring[3].hasValidPositions() ); |
| 194 | |
| 195 | p0() = initCornerVertex(patch,0); |
| 196 | p1() = initCornerVertex(patch,1); |
| 197 | p2() = initCornerVertex(patch,2); |
| 198 | p3() = initCornerVertex(patch,3); |
| 199 | |
| 200 | e0_p() = initPositiveEdgeVertex(patch,0, p0()); |
| 201 | e1_p() = initPositiveEdgeVertex(patch,1, p1()); |
| 202 | e2_p() = initPositiveEdgeVertex(patch,2, p2()); |
| 203 | e3_p() = initPositiveEdgeVertex(patch,3, p3()); |
| 204 | |
| 205 | e0_m() = initNegativeEdgeVertex(patch,0, p0()); |
| 206 | e1_m() = initNegativeEdgeVertex(patch,1, p1()); |
| 207 | e2_m() = initNegativeEdgeVertex(patch,2, p2()); |
| 208 | e3_m() = initNegativeEdgeVertex(patch,3, p3()); |
| 209 | |
| 210 | const unsigned int face_valence_p0 = patch.ring[0].face_valence; |
| 211 | const unsigned int face_valence_p1 = patch.ring[1].face_valence; |
| 212 | const unsigned int face_valence_p2 = patch.ring[2].face_valence; |
| 213 | const unsigned int face_valence_p3 = patch.ring[3].face_valence; |
| 214 | |
| 215 | initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() ); |
| 216 | initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() ); |
| 217 | initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() ); |
| 218 | initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() ); |
| 219 | |
| 220 | } |
| 221 | |
| 222 | __noinline void init_crackfix(const CatmullClarkPatch& patch, |
| 223 | const BezierCurve* border0, |
| 224 | const BezierCurve* border1, |
| 225 | const BezierCurve* border2, |
| 226 | const BezierCurve* border3) |
| 227 | { |
| 228 | assert( patch.ring[0].hasValidPositions() ); |
| 229 | assert( patch.ring[1].hasValidPositions() ); |
| 230 | assert( patch.ring[2].hasValidPositions() ); |
| 231 | assert( patch.ring[3].hasValidPositions() ); |
| 232 | |
| 233 | p0() = initCornerVertex(patch,0); |
| 234 | p1() = initCornerVertex(patch,1); |
| 235 | p2() = initCornerVertex(patch,2); |
| 236 | p3() = initCornerVertex(patch,3); |
| 237 | |
| 238 | e0_p() = initPositiveEdgeVertex(patch,0, p0()); |
| 239 | e1_p() = initPositiveEdgeVertex(patch,1, p1()); |
| 240 | e2_p() = initPositiveEdgeVertex(patch,2, p2()); |
| 241 | e3_p() = initPositiveEdgeVertex(patch,3, p3()); |
| 242 | |
| 243 | e0_m() = initNegativeEdgeVertex(patch,0, p0()); |
| 244 | e1_m() = initNegativeEdgeVertex(patch,1, p1()); |
| 245 | e2_m() = initNegativeEdgeVertex(patch,2, p2()); |
| 246 | e3_m() = initNegativeEdgeVertex(patch,3, p3()); |
| 247 | |
| 248 | if (unlikely(border0 != nullptr)) |
| 249 | { |
| 250 | p0() = border0->v0; |
| 251 | e0_p() = border0->v1; |
| 252 | e1_m() = border0->v2; |
| 253 | p1() = border0->v3; |
| 254 | } |
| 255 | |
| 256 | if (unlikely(border1 != nullptr)) |
| 257 | { |
| 258 | p1() = border1->v0; |
| 259 | e1_p() = border1->v1; |
| 260 | e2_m() = border1->v2; |
| 261 | p2() = border1->v3; |
| 262 | } |
| 263 | |
| 264 | if (unlikely(border2 != nullptr)) |
| 265 | { |
| 266 | p2() = border2->v0; |
| 267 | e2_p() = border2->v1; |
| 268 | e3_m() = border2->v2; |
| 269 | p3() = border2->v3; |
| 270 | } |
| 271 | |
| 272 | if (unlikely(border3 != nullptr)) |
| 273 | { |
| 274 | p3() = border3->v0; |
| 275 | e3_p() = border3->v1; |
| 276 | e0_m() = border3->v2; |
| 277 | p0() = border3->v3; |
| 278 | } |
| 279 | |
| 280 | const unsigned int face_valence_p0 = patch.ring[0].face_valence; |
| 281 | const unsigned int face_valence_p1 = patch.ring[1].face_valence; |
| 282 | const unsigned int face_valence_p2 = patch.ring[2].face_valence; |
| 283 | const unsigned int face_valence_p3 = patch.ring[3].face_valence; |
| 284 | |
| 285 | initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() ); |
| 286 | initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() ); |
| 287 | initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() ); |
| 288 | initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() ); |
| 289 | } |
| 290 | |
| 291 | |
| 292 | void computeGregoryPatchFacePoints(const unsigned int face_valence, |
| 293 | const Vertex& r_e_p, |
| 294 | const Vertex& r_e_m, |
| 295 | const Vertex& p_vtx, |
| 296 | const Vertex& e0_p_vtx, |
| 297 | const Vertex& e1_m_vtx, |
| 298 | const unsigned int face_valence_p1, |
| 299 | const Vertex& e0_m_vtx, |
| 300 | const Vertex& e3_p_vtx, |
| 301 | const unsigned int face_valence_p3, |
| 302 | Vertex& f_p_vtx, |
| 303 | Vertex& f_m_vtx, |
| 304 | const float d = 3.0f) |
| 305 | { |
| 306 | //const float c = cosf(2.0*M_PI/(float)face_valence); |
| 307 | //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1); |
| 308 | //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3); |
| 309 | |
| 310 | const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence); |
| 311 | const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1); |
| 312 | const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3); |
| 313 | |
| 314 | |
| 315 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
| 316 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
| 317 | f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); |
| 318 | f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); |
| 319 | } |
| 320 | |
| 321 | __noinline void init(const GeneralCatmullClarkPatch& patch) |
| 322 | { |
| 323 | assert(patch.size() == 4); |
| 324 | #if 0 |
| 325 | CatmullClarkPatch qpatch; patch.init(qpatch); |
| 326 | init(qpatch); |
| 327 | #else |
| 328 | const float face_valence_p0 = patch.ring[0].face_valence; |
| 329 | const float face_valence_p1 = patch.ring[1].face_valence; |
| 330 | const float face_valence_p2 = patch.ring[2].face_valence; |
| 331 | const float face_valence_p3 = patch.ring[3].face_valence; |
| 332 | |
| 333 | Vertex p0_r_p, p0_r_m; |
| 334 | patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m ); |
| 335 | |
| 336 | Vertex p1_r_p, p1_r_m; |
| 337 | patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m ); |
| 338 | |
| 339 | Vertex p2_r_p, p2_r_m; |
| 340 | patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m ); |
| 341 | |
| 342 | Vertex p3_r_p, p3_r_m; |
| 343 | patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m ); |
| 344 | |
| 345 | computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() ); |
| 346 | computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() ); |
| 347 | computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() ); |
| 348 | computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() ); |
| 349 | |
| 350 | #endif |
| 351 | } |
| 352 | |
| 353 | |
| 354 | __forceinline void convert_to_bezier() |
| 355 | { |
| 356 | f0_p() = (f0_p() + f0_m()) * 0.5f; |
| 357 | f1_p() = (f1_p() + f1_m()) * 0.5f; |
| 358 | f2_p() = (f2_p() + f2_m()) * 0.5f; |
| 359 | f3_p() = (f3_p() + f3_m()) * 0.5f; |
| 360 | f0_m() = Vertex( zero ); |
| 361 | f1_m() = Vertex( zero ); |
| 362 | f2_m() = Vertex( zero ); |
| 363 | f3_m() = Vertex( zero ); |
| 364 | } |
| 365 | |
| 366 | static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv, |
| 367 | Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21) |
| 368 | { |
| 369 | if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f)) |
| 370 | { |
| 371 | matrix_11 = matrix[1][1]; |
| 372 | matrix_12 = matrix[1][2]; |
| 373 | matrix_22 = matrix[2][2]; |
| 374 | matrix_21 = matrix[2][1]; |
| 375 | } |
| 376 | else |
| 377 | { |
| 378 | const Vertex_t f0_p = matrix[1][1]; |
| 379 | const Vertex_t f1_p = matrix[1][2]; |
| 380 | const Vertex_t f2_p = matrix[2][2]; |
| 381 | const Vertex_t f3_p = matrix[2][1]; |
| 382 | |
| 383 | const Vertex_t f0_m = f_m[0][0]; |
| 384 | const Vertex_t f1_m = f_m[0][1]; |
| 385 | const Vertex_t f2_m = f_m[1][1]; |
| 386 | const Vertex_t f3_m = f_m[1][0]; |
| 387 | |
| 388 | matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv); |
| 389 | matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(1.0f-uu+vv); |
| 390 | matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv); |
| 391 | matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv); |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | template<typename vfloat> |
| 396 | static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2], |
| 397 | size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) |
| 398 | { |
| 399 | const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
| 400 | |
| 401 | const vfloat f0_p = v[1][1][i]; |
| 402 | const vfloat f1_p = v[1][2][i]; |
| 403 | const vfloat f2_p = v[2][2][i]; |
| 404 | const vfloat f3_p = v[2][1][i]; |
| 405 | |
| 406 | const vfloat f0_m = f[0][0][i]; |
| 407 | const vfloat f1_m = f[0][1][i]; |
| 408 | const vfloat f2_m = f[1][1][i]; |
| 409 | const vfloat f3_m = f[1][0][i]; |
| 410 | |
| 411 | const vfloat one_minus_uu = vfloat(1.0f) - uu; |
| 412 | const vfloat one_minus_vv = vfloat(1.0f) - vv; |
| 413 | |
| 414 | const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
| 415 | const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
| 416 | const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
| 417 | const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
| 418 | |
| 419 | matrix_11 = select(m_border,f0_p,f0_i); |
| 420 | matrix_12 = select(m_border,f1_p,f1_i); |
| 421 | matrix_22 = select(m_border,f2_p,f2_i); |
| 422 | matrix_21 = select(m_border,f3_p,f3_i); |
| 423 | } |
| 424 | |
| 425 | static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv) |
| 426 | { |
| 427 | Vertex_t v_11, v_12, v_22, v_21; |
| 428 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 429 | |
| 430 | const Vec4<float> Bu = BezierBasis::eval(uu); |
| 431 | const Vec4<float> Bv = BezierBasis::eval(vv); |
| 432 | |
| 433 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 434 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 435 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 436 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 437 | } |
| 438 | |
| 439 | static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
| 440 | { |
| 441 | Vertex_t v_11, v_12, v_22, v_21; |
| 442 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 443 | |
| 444 | const Vec4<float> Bu = BezierBasis::derivative(uu); |
| 445 | const Vec4<float> Bv = BezierBasis::eval(vv); |
| 446 | |
| 447 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 448 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 449 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 450 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 451 | } |
| 452 | |
| 453 | static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
| 454 | { |
| 455 | Vertex_t v_11, v_12, v_22, v_21; |
| 456 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 457 | |
| 458 | const Vec4<float> Bu = BezierBasis::eval(uu); |
| 459 | const Vec4<float> Bv = BezierBasis::derivative(vv); |
| 460 | |
| 461 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 462 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 463 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 464 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 465 | } |
| 466 | |
| 467 | static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
| 468 | { |
| 469 | Vertex_t v_11, v_12, v_22, v_21; |
| 470 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 471 | |
| 472 | const Vec4<float> Bu = BezierBasis::derivative2(uu); |
| 473 | const Vec4<float> Bv = BezierBasis::eval(vv); |
| 474 | |
| 475 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 476 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 477 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 478 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 479 | } |
| 480 | |
| 481 | static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
| 482 | { |
| 483 | Vertex_t v_11, v_12, v_22, v_21; |
| 484 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 485 | |
| 486 | const Vec4<float> Bu = BezierBasis::eval(uu); |
| 487 | const Vec4<float> Bv = BezierBasis::derivative2(vv); |
| 488 | |
| 489 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 490 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 491 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 492 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 493 | } |
| 494 | |
| 495 | static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative |
| 496 | { |
| 497 | Vertex_t v_11, v_12, v_22, v_21; |
| 498 | computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); |
| 499 | |
| 500 | const Vec4<float> Bu = BezierBasis::derivative(uu); |
| 501 | const Vec4<float> Bv = BezierBasis::derivative(vv); |
| 502 | |
| 503 | return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), |
| 504 | madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), |
| 505 | madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), |
| 506 | Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); |
| 507 | } |
| 508 | |
| 509 | __forceinline Vertex eval(const float uu, const float vv) const { |
| 510 | return eval(v,f,uu,vv); |
| 511 | } |
| 512 | |
| 513 | __forceinline Vertex eval_du( const float uu, const float vv) const { |
| 514 | return eval_du(v,f,uu,vv); |
| 515 | } |
| 516 | |
| 517 | __forceinline Vertex eval_dv( const float uu, const float vv) const { |
| 518 | return eval_dv(v,f,uu,vv); |
| 519 | } |
| 520 | |
| 521 | __forceinline Vertex eval_dudu( const float uu, const float vv) const { |
| 522 | return eval_dudu(v,f,uu,vv); |
| 523 | } |
| 524 | |
| 525 | __forceinline Vertex eval_dvdv( const float uu, const float vv) const { |
| 526 | return eval_dvdv(v,f,uu,vv); |
| 527 | } |
| 528 | |
| 529 | __forceinline Vertex eval_dudv( const float uu, const float vv) const { |
| 530 | return eval_dudv(v,f,uu,vv); |
| 531 | } |
| 532 | |
| 533 | static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions |
| 534 | { |
| 535 | /* interpolate inner vertices */ |
| 536 | Vertex_t matrix_11, matrix_12, matrix_22, matrix_21; |
| 537 | computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21); |
| 538 | |
| 539 | /* tangentU */ |
| 540 | const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]); |
| 541 | const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]); |
| 542 | const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]); |
| 543 | const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]); |
| 544 | |
| 545 | const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); |
| 546 | |
| 547 | /* tangentV */ |
| 548 | const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]); |
| 549 | const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]); |
| 550 | const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]); |
| 551 | const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]); |
| 552 | |
| 553 | const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); |
| 554 | |
| 555 | /* normal = tangentU x tangentV */ |
| 556 | const Vertex_t n = cross(tangentU,tangentV); |
| 557 | |
| 558 | return n; |
| 559 | } |
| 560 | |
| 561 | __forceinline Vertex normal( const float uu, const float vv) const { |
| 562 | return normal(v,f,uu,vv); |
| 563 | } |
| 564 | |
| 565 | __forceinline void eval(const float u, const float v, |
| 566 | Vertex* P, Vertex* dPdu, Vertex* dPdv, |
| 567 | Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv, |
| 568 | const float dscale = 1.0f) const |
| 569 | { |
| 570 | if (P) { |
| 571 | *P = eval(u,v); |
| 572 | } |
| 573 | if (dPdu) { |
| 574 | assert(dPdu); *dPdu = eval_du(u,v)*dscale; |
| 575 | assert(dPdv); *dPdv = eval_dv(u,v)*dscale; |
| 576 | } |
| 577 | if (ddPdudu) { |
| 578 | assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale); |
| 579 | assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale); |
| 580 | assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale); |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | template<class vfloat> |
| 585 | static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2], |
| 586 | const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n, |
| 587 | vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) |
| 588 | { |
| 589 | const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i])))); |
| 590 | const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i])))); |
| 591 | const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i])))); |
| 592 | const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i])))); |
| 593 | return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x))); |
| 594 | } |
| 595 | |
| 596 | template<typename vbool, typename vfloat> |
| 597 | static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2], |
| 598 | const vbool& valid, const vfloat& uu, const vfloat& vv, |
| 599 | float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, |
| 600 | const float dscale, const size_t dstride, const size_t N) |
| 601 | { |
| 602 | if (P) { |
| 603 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
| 604 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
| 605 | for (size_t i=0; i<N; i++) { |
| 606 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 607 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 608 | vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)); |
| 609 | } |
| 610 | } |
| 611 | if (dPdu) |
| 612 | { |
| 613 | { |
| 614 | assert(dPdu); |
| 615 | const Vec4<vfloat> u_n = BezierBasis::derivative(uu); |
| 616 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
| 617 | for (size_t i=0; i<N; i++) { |
| 618 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 619 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 620 | vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); |
| 621 | } |
| 622 | } |
| 623 | { |
| 624 | assert(dPdv); |
| 625 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
| 626 | const Vec4<vfloat> v_n = BezierBasis::derivative(vv); |
| 627 | for (size_t i=0; i<N; i++) { |
| 628 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 629 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 630 | vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); |
| 631 | } |
| 632 | } |
| 633 | } |
| 634 | if (ddPdudu) |
| 635 | { |
| 636 | { |
| 637 | assert(ddPdudu); |
| 638 | const Vec4<vfloat> u_n = BezierBasis::derivative2(uu); |
| 639 | const Vec4<vfloat> v_n = BezierBasis::eval(vv); |
| 640 | for (size_t i=0; i<N; i++) { |
| 641 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 642 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 643 | vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); |
| 644 | } |
| 645 | } |
| 646 | { |
| 647 | assert(ddPdvdv); |
| 648 | const Vec4<vfloat> u_n = BezierBasis::eval(uu); |
| 649 | const Vec4<vfloat> v_n = BezierBasis::derivative2(vv); |
| 650 | for (size_t i=0; i<N; i++) { |
| 651 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 652 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 653 | vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); |
| 654 | } |
| 655 | } |
| 656 | { |
| 657 | assert(ddPdudv); |
| 658 | const Vec4<vfloat> u_n = BezierBasis::derivative(uu); |
| 659 | const Vec4<vfloat> v_n = BezierBasis::derivative(vv); |
| 660 | for (size_t i=0; i<N; i++) { |
| 661 | vfloat matrix_11, matrix_12, matrix_22, matrix_21; |
| 662 | computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times |
| 663 | vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); |
| 664 | } |
| 665 | } |
| 666 | } |
| 667 | } |
| 668 | |
| 669 | template<typename vbool, typename vfloat> |
| 670 | __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, |
| 671 | float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, |
| 672 | const float dscale, const size_t dstride, const size_t N) const { |
| 673 | eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N); |
| 674 | } |
| 675 | |
| 676 | template<class T> |
| 677 | static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) |
| 678 | { |
| 679 | typedef typename T::Bool M; |
| 680 | const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
| 681 | |
| 682 | const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); |
| 683 | const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); |
| 684 | const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); |
| 685 | const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); |
| 686 | |
| 687 | const Vec3<T> f0_m = f[0][0]; |
| 688 | const Vec3<T> f1_m = f[0][1]; |
| 689 | const Vec3<T> f2_m = f[1][1]; |
| 690 | const Vec3<T> f3_m = f[1][0]; |
| 691 | |
| 692 | const T one_minus_uu = T(1.0f) - uu; |
| 693 | const T one_minus_vv = T(1.0f) - vv; |
| 694 | |
| 695 | const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
| 696 | const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
| 697 | const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
| 698 | const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
| 699 | |
| 700 | const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); |
| 701 | const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); |
| 702 | const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); |
| 703 | const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); |
| 704 | |
| 705 | const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu; |
| 706 | const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv; |
| 707 | const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu); |
| 708 | const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv); |
| 709 | const T B2_u = 3.0f * (uu * one_minus_uu * uu); |
| 710 | const T B2_v = 3.0f * (vv * one_minus_vv * vv); |
| 711 | const T B3_u = uu * uu * uu; |
| 712 | const T B3_v = vv * vv * vv; |
| 713 | |
| 714 | const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))), |
| 715 | madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))), |
| 716 | madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))), |
| 717 | B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x)))))); |
| 718 | |
| 719 | const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))), |
| 720 | madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))), |
| 721 | madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))), |
| 722 | B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y)))))); |
| 723 | |
| 724 | const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))), |
| 725 | madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))), |
| 726 | madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))), |
| 727 | B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z)))))); |
| 728 | |
| 729 | return Vec3<T>(x,y,z); |
| 730 | } |
| 731 | |
| 732 | template<class T> |
| 733 | __forceinline Vec3<T> eval(const T& uu, const T& vv) const |
| 734 | { |
| 735 | Vec3<T> ff[2][2]; |
| 736 | ff[0][0] = Vec3<T>(f[0][0]); |
| 737 | ff[0][1] = Vec3<T>(f[0][1]); |
| 738 | ff[1][1] = Vec3<T>(f[1][1]); |
| 739 | ff[1][0] = Vec3<T>(f[1][0]); |
| 740 | return eval_t(v,ff,uu,vv); |
| 741 | } |
| 742 | |
| 743 | template<class T> |
| 744 | static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) |
| 745 | { |
| 746 | typedef typename T::Bool M; |
| 747 | |
| 748 | const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); |
| 749 | const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); |
| 750 | const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); |
| 751 | const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); |
| 752 | |
| 753 | const Vec3<T> f0_m = f[0][0]; |
| 754 | const Vec3<T> f1_m = f[0][1]; |
| 755 | const Vec3<T> f2_m = f[1][1]; |
| 756 | const Vec3<T> f3_m = f[1][0]; |
| 757 | |
| 758 | const T one_minus_uu = T(1.0f) - uu; |
| 759 | const T one_minus_vv = T(1.0f) - vv; |
| 760 | |
| 761 | const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); |
| 762 | const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); |
| 763 | const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); |
| 764 | const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); |
| 765 | |
| 766 | #if 1 |
| 767 | const M m_corner0 = (uu == 0.0f) & (vv == 0.0f); |
| 768 | const M m_corner1 = (uu == 1.0f) & (vv == 0.0f); |
| 769 | const M m_corner2 = (uu == 1.0f) & (vv == 1.0f); |
| 770 | const M m_corner3 = (uu == 0.0f) & (vv == 1.0f); |
| 771 | const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) ); |
| 772 | const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) ); |
| 773 | const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) ); |
| 774 | const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) ); |
| 775 | #else |
| 776 | const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); |
| 777 | const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); |
| 778 | const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); |
| 779 | const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); |
| 780 | const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); |
| 781 | #endif |
| 782 | |
| 783 | const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z); |
| 784 | const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z); |
| 785 | const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z); |
| 786 | const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z); |
| 787 | |
| 788 | const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z); |
| 789 | const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z); |
| 790 | const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z); |
| 791 | |
| 792 | const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z); |
| 793 | const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z); |
| 794 | const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z); |
| 795 | |
| 796 | const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z); |
| 797 | const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z); |
| 798 | |
| 799 | /* tangentU */ |
| 800 | const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30); |
| 801 | const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31); |
| 802 | const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32); |
| 803 | const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33); |
| 804 | |
| 805 | const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); |
| 806 | |
| 807 | /* tangentV */ |
| 808 | const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03); |
| 809 | const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13); |
| 810 | const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23); |
| 811 | const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33); |
| 812 | |
| 813 | const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); |
| 814 | |
| 815 | /* normal = tangentU x tangentV */ |
| 816 | const Vec3<T> n = cross(tangentU,tangentV); |
| 817 | return n; |
| 818 | } |
| 819 | |
| 820 | template<class T> |
| 821 | __forceinline Vec3<T> normal(const T& uu, const T& vv) const |
| 822 | { |
| 823 | Vec3<T> ff[2][2]; |
| 824 | ff[0][0] = Vec3<T>(f[0][0]); |
| 825 | ff[0][1] = Vec3<T>(f[0][1]); |
| 826 | ff[1][1] = Vec3<T>(f[1][1]); |
| 827 | ff[1][0] = Vec3<T>(f[1][0]); |
| 828 | return normal_t(v,ff,uu,vv); |
| 829 | } |
| 830 | |
| 831 | __forceinline BBox<Vertex> bounds() const |
| 832 | { |
| 833 | const Vertex *const cv = &v[0][0]; |
| 834 | BBox<Vertex> bounds (cv[0]); |
| 835 | for (size_t i=1; i<16; i++) |
| 836 | bounds.extend( cv[i] ); |
| 837 | bounds.extend(f[0][0]); |
| 838 | bounds.extend(f[1][0]); |
| 839 | bounds.extend(f[1][1]); |
| 840 | bounds.extend(f[1][1]); |
| 841 | return bounds; |
| 842 | } |
| 843 | |
| 844 | friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p) |
| 845 | { |
| 846 | for (size_t y=0; y<4; y++) |
| 847 | for (size_t x=0; x<4; x++) |
| 848 | o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl; |
| 849 | |
| 850 | for (size_t y=0; y<2; y++) |
| 851 | for (size_t x=0; x<2; x++) |
| 852 | o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl; |
| 853 | return o; |
| 854 | } |
| 855 | }; |
| 856 | |
| 857 | typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa; |
| 858 | |
| 859 | template<typename Vertex, typename Vertex_t> |
| 860 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride) |
| 861 | { |
| 862 | CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride); |
| 863 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch); |
| 864 | gpatch.convert_to_bezier(); |
| 865 | for (size_t y=0; y<4; y++) |
| 866 | for (size_t x=0; x<4; x++) |
| 867 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
| 868 | } |
| 869 | |
| 870 | template<typename Vertex, typename Vertex_t> |
| 871 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch) |
| 872 | { |
| 873 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch); |
| 874 | gpatch.convert_to_bezier(); |
| 875 | for (size_t y=0; y<4; y++) |
| 876 | for (size_t x=0; x<4; x++) |
| 877 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
| 878 | } |
| 879 | |
| 880 | template<typename Vertex, typename Vertex_t> |
| 881 | __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch, |
| 882 | const BezierCurveT<Vertex>* border0, |
| 883 | const BezierCurveT<Vertex>* border1, |
| 884 | const BezierCurveT<Vertex>* border2, |
| 885 | const BezierCurveT<Vertex>* border3) |
| 886 | { |
| 887 | GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3); |
| 888 | gpatch.convert_to_bezier(); |
| 889 | for (size_t y=0; y<4; y++) |
| 890 | for (size_t x=0; x<4; x++) |
| 891 | matrix[y][x] = (Vertex_t)gpatch.v[y][x]; |
| 892 | } |
| 893 | } |
| 894 | |