1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "gregory_patch.h"
7
8namespace embree
9{
10 class __aligned(64) DenseGregoryPatch3fa
11 {
12 typedef Vec3fa Vec3fa_4x4[4][4];
13 public:
14
15 __forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch)
16 {
17 for (size_t y=0; y<4; y++)
18 for (size_t x=0; x<4; x++)
19 matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f);
20
21 matrix[0][0].w = patch.f[0][0].x;
22 matrix[0][1].w = patch.f[0][0].y;
23 matrix[0][2].w = patch.f[0][0].z;
24 matrix[0][3].w = 0.0f;
25
26 matrix[1][0].w = patch.f[0][1].x;
27 matrix[1][1].w = patch.f[0][1].y;
28 matrix[1][2].w = patch.f[0][1].z;
29 matrix[1][3].w = 0.0f;
30
31 matrix[2][0].w = patch.f[1][1].x;
32 matrix[2][1].w = patch.f[1][1].y;
33 matrix[2][2].w = patch.f[1][1].z;
34 matrix[2][3].w = 0.0f;
35
36 matrix[3][0].w = patch.f[1][0].x;
37 matrix[3][1].w = patch.f[1][0].y;
38 matrix[3][2].w = patch.f[1][0].z;
39 matrix[3][3].w = 0.0f;
40 }
41
42 __forceinline void extract_f_m(Vec3fa f_m[2][2]) const
43 {
44 f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
45 f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
46 f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
47 f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
48 }
49
50 __forceinline Vec3fa eval(const float uu, const float vv) const
51 {
52 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
53 return GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
54 }
55
56 __forceinline Vec3fa normal(const float uu, const float vv) const
57 {
58 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
59 return GregoryPatch3fa::normal(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
60 }
61
62 template<class T>
63 __forceinline Vec3<T> eval(const T &uu, const T &vv) const
64 {
65 Vec3<T> f_m[2][2];
66 f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
67 f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
68 f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
69 f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
70 return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
71 }
72
73 template<class T>
74 __forceinline Vec3<T> normal(const T &uu, const T &vv) const
75 {
76 Vec3<T> f_m[2][2];
77 f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w );
78 f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w );
79 f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w );
80 f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w );
81 return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv);
82 }
83
84 __forceinline void eval(const float u, const float v,
85 Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv,
86 const float dscale = 1.0f) const
87 {
88 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
89 if (P) {
90 *P = GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,u,v);
91 }
92 if (dPdu) {
93 assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
94 assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale;
95 }
96 if (ddPdudu) {
97 assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
98 assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
99 assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale);
100 }
101 }
102
103 template<typename vbool, typename vfloat>
104 __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const
105 {
106 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m);
107 GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N);
108 }
109
110 private:
111 Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component
112 };
113}
114