gregory_patch.h source code [Godot/thirdparty/embree/kernels/subdiv/gregory_patch.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "catmullclark_patch.h"
7	#include "bezier_patch.h"
8	#include "bezier_curve.h"
9	#include "catmullclark_coefficients.h"
10
11	namespace embree
12	{
13	template<typename Vertex, typename Vertex_t = Vertex>
14	class __aligned(`64`) GregoryPatchT
15	{
16	typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
17	typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
18	typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
19	typedef BezierCurveT<Vertex> BezierCurve;
20
21	public:
22	Vertex v[`4`][`4`];
23	Vertex f[`2`][`2`];
24
25	__forceinline GregoryPatchT() {}
26
27	__forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
28	init(patch);
29	}
30
31	__forceinline GregoryPatchT(const CatmullClarkPatch& patch,
32	const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
33	{
34	init_crackfix(patch,border0,border1,border2,border3);
35	}
36
37	__forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
38	init(CatmullClarkPatch(edge,vertices,stride));
39	}
40
41	__forceinline Vertex& p0() { return v[`0`][`0`]; }
42	__forceinline Vertex& p1() { return v[`0`][`3`]; }
43	__forceinline Vertex& p2() { return v[`3`][`3`]; }
44	__forceinline Vertex& p3() { return v[`3`][`0`]; }
45
46	__forceinline Vertex& e0_p() { return v[`0`][`1`]; }
47	__forceinline Vertex& e0_m() { return v[`1`][`0`]; }
48	__forceinline Vertex& e1_p() { return v[`1`][`3`]; }
49	__forceinline Vertex& e1_m() { return v[`0`][`2`]; }
50	__forceinline Vertex& e2_p() { return v[`3`][`2`]; }
51	__forceinline Vertex& e2_m() { return v[`2`][`3`]; }
52	__forceinline Vertex& e3_p() { return v[`2`][`0`]; }
53	__forceinline Vertex& e3_m() { return v[`3`][`1`]; }
54
55	__forceinline Vertex& f0_p() { return v[`1`][`1`]; }
56	__forceinline Vertex& f1_p() { return v[`1`][`2`]; }
57	__forceinline Vertex& f2_p() { return v[`2`][`2`]; }
58	__forceinline Vertex& f3_p() { return v[`2`][`1`]; }
59	__forceinline Vertex& f0_m() { return f[`0`][`0`]; }
60	__forceinline Vertex& f1_m() { return f[`0`][`1`]; }
61	__forceinline Vertex& f2_m() { return f[`1`][`1`]; }
62	__forceinline Vertex& f3_m() { return f[`1`][`0`]; }
63
64	__forceinline const Vertex& p0() const { return v[`0`][`0`]; }
65	__forceinline const Vertex& p1() const { return v[`0`][`3`]; }
66	__forceinline const Vertex& p2() const { return v[`3`][`3`]; }
67	__forceinline const Vertex& p3() const { return v[`3`][`0`]; }
68
69	__forceinline const Vertex& e0_p() const { return v[`0`][`1`]; }
70	__forceinline const Vertex& e0_m() const { return v[`1`][`0`]; }
71	__forceinline const Vertex& e1_p() const { return v[`1`][`3`]; }
72	__forceinline const Vertex& e1_m() const { return v[`0`][`2`]; }
73	__forceinline const Vertex& e2_p() const { return v[`3`][`2`]; }
74	__forceinline const Vertex& e2_m() const { return v[`2`][`3`]; }
75	__forceinline const Vertex& e3_p() const { return v[`2`][`0`]; }
76	__forceinline const Vertex& e3_m() const { return v[`3`][`1`]; }
77
78	__forceinline const Vertex& f0_p() const { return v[`1`][`1`]; }
79	__forceinline const Vertex& f1_p() const { return v[`1`][`2`]; }
80	__forceinline const Vertex& f2_p() const { return v[`2`][`2`]; }
81	__forceinline const Vertex& f3_p() const { return v[`2`][`1`]; }
82	__forceinline const Vertex& f0_m() const { return f[`0`][`0`]; }
83	__forceinline const Vertex& f1_m() const { return f[`0`][`1`]; }
84	__forceinline const Vertex& f2_m() const { return f[`1`][`1`]; }
85	__forceinline const Vertex& f3_m() const { return f[`1`][`0`]; }
86
87	__forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
88	return irreg_patch.ring[index].getLimitVertex();
89	}
90
91	__forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
92	return madd(`1.0f`/`3.0f`,irreg_patch.ring[index].getLimitTangent(),p_vtx);
93	}
94
95	__forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
96	return madd(`1.0f`/`3.0f`,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
97	}
98
99	__forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
100	{
101	CatmullClark1Ring3fa r0,r1,r2;
102	irreg_patch.ring[index].subdivide(r0);
103	r0.subdivide(r1);
104	r1.subdivide(r2);
105	return madd(`8.0f`/`3.0f`,r2.getLimitTangent(),p_vtx);
106	}
107
108	__forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
109	{
110	CatmullClark1Ring3fa r0,r1,r2;
111	irreg_patch.ring[index].subdivide(r0);
112	r0.subdivide(r1);
113	r1.subdivide(r2);
114	return madd(`8.0f`/`3.0f`,r2.getSecondLimitTangent(),p_vtx);
115	}
116
117	void initFaceVertex(const CatmullClarkPatch& irreg_patch,
118	const size_t index,
119	const Vertex& p_vtx,
120	const Vertex& e0_p_vtx,
121	const Vertex& e1_m_vtx,
122	const unsigned int face_valence_p1,
123	const Vertex& e0_m_vtx,
124	const Vertex& e3_p_vtx,
125	const unsigned int face_valence_p3,
126	Vertex& f_p_vtx,
127	Vertex& f_m_vtx)
128	{
129	const unsigned int face_valence = irreg_patch.ring[index].face_valence;
130	const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
131	const unsigned int border_index = irreg_patch.ring[index].border_index;
132
133	const Vertex& vtx = irreg_patch.ring[index].vtx;
134	const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(`0`);
135	const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(`0`);
136	const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(`1`);
137
138	Vertex c_i, e_i_p_1;
139	const bool hasHardEdge0 =
140	std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
141	std::isinf(irreg_patch.ring[index].crease_weight[`0`]);
142
143	if (unlikely((border_index == edge_valence-`2`) \|\| hasHardEdge0))
144	{
145	/ mirror quad center and edge mid-point /
146	c_i = madd(`2.0f`, e_i - c_i_m_1, c_i_m_1);
147	e_i_p_1 = madd(`2.0f`, vtx - e_i_m_1, e_i_m_1);
148	}
149	else
150	{
151	c_i = irreg_patch.ring[index].getQuadCenter( face_valence-`1` );
152	e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-`1` );
153	}
154
155	Vertex c_i_m_2, e_i_m_2;
156	const bool hasHardEdge1 =
157	std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
158	std::isinf(irreg_patch.ring[index].crease_weight[`1`]);
159
160	if (unlikely(border_index == `2` \|\| hasHardEdge1))
161	{
162	/ mirror quad center and edge mid-point /
163	c_i_m_2 = madd(`2.0f`, e_i_m_1 - c_i_m_1, c_i_m_1);
164	e_i_m_2 = madd(`2.0f`, vtx - e_i, + e_i);
165	}
166	else
167	{
168	c_i_m_2 = irreg_patch.ring[index].getQuadCenter( `1` );
169	e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( `2` );
170	}
171
172	const float d = `3.0f`;
173	//const float c = cosf(2.0fM_PI/(float)face_valence);*
174	//const float c_e_p = cosf(2.0fM_PI/(float)face_valence_p1);*
175	//const float c_e_m = cosf(2.0fM_PI/(float)face_valence_p3);*
176
177	const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
178	const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
179	const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
180
181	const Vertex r_e_p = `1.0f`/`3.0f` * (e_i_m_1 - e_i_p_1) + `2.0f`/`3.0f` * (c_i_m_1 - c_i);
182	const Vertex r_e_m = `1.0f`/`3.0f` * (e_i - e_i_m_2) + `2.0f`/`3.0f` * (c_i_m_1 - c_i_m_2);
183
184	f_p_vtx = `1.0f` / d * (c_e_p * p_vtx + (d - `2.0f`c - c_e_p) e0_p_vtx + `2.0f`c e1_m_vtx + r_e_p);
185	f_m_vtx = `1.0f` / d * (c_e_m * p_vtx + (d - `2.0f`c - c_e_m) e0_m_vtx + `2.0f`c e3_p_vtx + r_e_m);
186	}
187
188	__noinline void init(const CatmullClarkPatch& patch)
189	{
190	assert( patch.ring[`0`].hasValidPositions() );
191	assert( patch.ring[`1`].hasValidPositions() );
192	assert( patch.ring[`2`].hasValidPositions() );
193	assert( patch.ring[`3`].hasValidPositions() );
194
195	p0() = initCornerVertex(patch,`0`);
196	p1() = initCornerVertex(patch,`1`);
197	p2() = initCornerVertex(patch,`2`);
198	p3() = initCornerVertex(patch,`3`);
199
200	e0_p() = initPositiveEdgeVertex(patch,`0`, p0());
201	e1_p() = initPositiveEdgeVertex(patch,`1`, p1());
202	e2_p() = initPositiveEdgeVertex(patch,`2`, p2());
203	e3_p() = initPositiveEdgeVertex(patch,`3`, p3());
204
205	e0_m() = initNegativeEdgeVertex(patch,`0`, p0());
206	e1_m() = initNegativeEdgeVertex(patch,`1`, p1());
207	e2_m() = initNegativeEdgeVertex(patch,`2`, p2());
208	e3_m() = initNegativeEdgeVertex(patch,`3`, p3());
209
210	const unsigned int face_valence_p0 = patch.ring[`0`].face_valence;
211	const unsigned int face_valence_p1 = patch.ring[`1`].face_valence;
212	const unsigned int face_valence_p2 = patch.ring[`2`].face_valence;
213	const unsigned int face_valence_p3 = patch.ring[`3`].face_valence;
214
215	initFaceVertex(patch,`0`,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
216	initFaceVertex(patch,`1`,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
217	initFaceVertex(patch,`2`,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
218	initFaceVertex(patch,`3`,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
219
220	}
221
222	__noinline void init_crackfix(const CatmullClarkPatch& patch,
223	const BezierCurve* border0,
224	const BezierCurve* border1,
225	const BezierCurve* border2,
226	const BezierCurve* border3)
227	{
228	assert( patch.ring[`0`].hasValidPositions() );
229	assert( patch.ring[`1`].hasValidPositions() );
230	assert( patch.ring[`2`].hasValidPositions() );
231	assert( patch.ring[`3`].hasValidPositions() );
232
233	p0() = initCornerVertex(patch,`0`);
234	p1() = initCornerVertex(patch,`1`);
235	p2() = initCornerVertex(patch,`2`);
236	p3() = initCornerVertex(patch,`3`);
237
238	e0_p() = initPositiveEdgeVertex(patch,`0`, p0());
239	e1_p() = initPositiveEdgeVertex(patch,`1`, p1());
240	e2_p() = initPositiveEdgeVertex(patch,`2`, p2());
241	e3_p() = initPositiveEdgeVertex(patch,`3`, p3());
242
243	e0_m() = initNegativeEdgeVertex(patch,`0`, p0());
244	e1_m() = initNegativeEdgeVertex(patch,`1`, p1());
245	e2_m() = initNegativeEdgeVertex(patch,`2`, p2());
246	e3_m() = initNegativeEdgeVertex(patch,`3`, p3());
247
248	if (unlikely(border0 != nullptr))
249	{
250	p0() = border0->v0;
251	e0_p() = border0->v1;
252	e1_m() = border0->v2;
253	p1() = border0->v3;
254	}
255
256	if (unlikely(border1 != nullptr))
257	{
258	p1() = border1->v0;
259	e1_p() = border1->v1;
260	e2_m() = border1->v2;
261	p2() = border1->v3;
262	}
263
264	if (unlikely(border2 != nullptr))
265	{
266	p2() = border2->v0;
267	e2_p() = border2->v1;
268	e3_m() = border2->v2;
269	p3() = border2->v3;
270	}
271
272	if (unlikely(border3 != nullptr))
273	{
274	p3() = border3->v0;
275	e3_p() = border3->v1;
276	e0_m() = border3->v2;
277	p0() = border3->v3;
278	}
279
280	const unsigned int face_valence_p0 = patch.ring[`0`].face_valence;
281	const unsigned int face_valence_p1 = patch.ring[`1`].face_valence;
282	const unsigned int face_valence_p2 = patch.ring[`2`].face_valence;
283	const unsigned int face_valence_p3 = patch.ring[`3`].face_valence;
284
285	initFaceVertex(patch,`0`,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
286	initFaceVertex(patch,`1`,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
287	initFaceVertex(patch,`2`,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
288	initFaceVertex(patch,`3`,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
289	}
290
291
292	void computeGregoryPatchFacePoints(const unsigned int face_valence,
293	const Vertex& r_e_p,
294	const Vertex& r_e_m,
295	const Vertex& p_vtx,
296	const Vertex& e0_p_vtx,
297	const Vertex& e1_m_vtx,
298	const unsigned int face_valence_p1,
299	const Vertex& e0_m_vtx,
300	const Vertex& e3_p_vtx,
301	const unsigned int face_valence_p3,
302	Vertex& f_p_vtx,
303	Vertex& f_m_vtx,
304	const float d = `3.0f`)
305	{
306	//const float c = cosf(2.0M_PI/(float)face_valence);*
307	//const float c_e_p = cosf(2.0M_PI/(float)face_valence_p1);*
308	//const float c_e_m = cosf(2.0M_PI/(float)face_valence_p3);*
309
310	const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
311	const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
312	const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
313
314
315	f_p_vtx = `1.0f` / d * (c_e_p * p_vtx + (d - `2.0f`c - c_e_p) e0_p_vtx + `2.0f`c e1_m_vtx + r_e_p);
316	f_m_vtx = `1.0f` / d * (c_e_m * p_vtx + (d - `2.0f`c - c_e_m) e0_m_vtx + `2.0f`c e3_p_vtx + r_e_m);
317	f_p_vtx = `1.0f` / d * (c_e_p * p_vtx + (d - `2.0f`c - c_e_p) e0_p_vtx + `2.0f`c e1_m_vtx + r_e_p);
318	f_m_vtx = `1.0f` / d * (c_e_m * p_vtx + (d - `2.0f`c - c_e_m) e0_m_vtx + `2.0f`c e3_p_vtx + r_e_m);
319	}
320
321	__noinline void init(const GeneralCatmullClarkPatch& patch)
322	{
323	assert(patch.size() == `4`);
324	#if 0
325	CatmullClarkPatch qpatch; patch.init(qpatch);
326	init(qpatch);
327	#else
328	const float face_valence_p0 = patch.ring[`0`].face_valence;
329	const float face_valence_p1 = patch.ring[`1`].face_valence;
330	const float face_valence_p2 = patch.ring[`2`].face_valence;
331	const float face_valence_p3 = patch.ring[`3`].face_valence;
332
333	Vertex p0_r_p, p0_r_m;
334	patch.ring[`0`].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
335
336	Vertex p1_r_p, p1_r_m;
337	patch.ring[`1`].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
338
339	Vertex p2_r_p, p2_r_m;
340	patch.ring[`2`].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
341
342	Vertex p3_r_p, p3_r_m;
343	patch.ring[`3`].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
344
345	computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
346	computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
347	computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
348	computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
349
350	#endif
351	}
352
353
354	__forceinline void convert_to_bezier()
355	{
356	f0_p() = (f0_p() + f0_m()) * `0.5f`;
357	f1_p() = (f1_p() + f1_m()) * `0.5f`;
358	f2_p() = (f2_p() + f2_m()) * `0.5f`;
359	f3_p() = (f3_p() + f3_m()) * `0.5f`;
360	f0_m() = Vertex( zero );
361	f1_m() = Vertex( zero );
362	f2_m() = Vertex( zero );
363	f3_m() = Vertex( zero );
364	}
365
366	static __forceinline void computeInnerVertices(const Vertex matrix[`4`][`4`], const Vertex f_m[`2`][`2`], const float uu, const float vv,
367	Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
368	{
369	if (unlikely(uu == `0.0f` \|\| uu == `1.0f` \|\| vv == `0.0f` \|\| vv == `1.0f`))
370	{
371	matrix_11 = matrix[`1`][`1`];
372	matrix_12 = matrix[`1`][`2`];
373	matrix_22 = matrix[`2`][`2`];
374	matrix_21 = matrix[`2`][`1`];
375	}
376	else
377	{
378	const Vertex_t f0_p = matrix[`1`][`1`];
379	const Vertex_t f1_p = matrix[`1`][`2`];
380	const Vertex_t f2_p = matrix[`2`][`2`];
381	const Vertex_t f3_p = matrix[`2`][`1`];
382
383	const Vertex_t f0_m = f_m[`0`][`0`];
384	const Vertex_t f1_m = f_m[`0`][`1`];
385	const Vertex_t f2_m = f_m[`1`][`1`];
386	const Vertex_t f3_m = f_m[`1`][`0`];
387
388	matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv);
389	matrix_12 = ((`1.0f`-uu) * f1_m + vv * f1_p)*rcp(`1.0f`-uu+vv);
390	matrix_22 = ((`1.0f`-uu) * f2_p + (`1.0f`-vv) * f2_m)*rcp(`2.0f`-uu-vv);
391	matrix_21 = ( uu * f3_m + (`1.0f`-vv) * f3_p)*rcp(`1.0f`+uu-vv);
392	}
393	}
394
395	template<typename vfloat>
396	static __forceinline void computeInnerVertices(const Vertex v[`4`][`4`], const Vertex f[`2`][`2`],
397	size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
398	{
399	const auto m_border = (uu == `0.0f`) \| (uu == `1.0f`) \| (vv == `0.0f`) \| (vv == `1.0f`);
400
401	const vfloat f0_p = v[`1`][`1`][i];
402	const vfloat f1_p = v[`1`][`2`][i];
403	const vfloat f2_p = v[`2`][`2`][i];
404	const vfloat f3_p = v[`2`][`1`][i];
405
406	const vfloat f0_m = f[`0`][`0`][i];
407	const vfloat f1_m = f[`0`][`1`][i];
408	const vfloat f2_m = f[`1`][`1`][i];
409	const vfloat f3_m = f[`1`][`0`][i];
410
411	const vfloat one_minus_uu = vfloat(`1.0f`) - uu;
412	const vfloat one_minus_vv = vfloat(`1.0f`) - vv;
413
414	const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
415	const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
416	const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
417	const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
418
419	matrix_11 = select(m_border,f0_p,f0_i);
420	matrix_12 = select(m_border,f1_p,f1_i);
421	matrix_22 = select(m_border,f2_p,f2_i);
422	matrix_21 = select(m_border,f3_p,f3_i);
423	}
424
425	static __forceinline Vertex eval(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float& uu, const float& vv)
426	{
427	Vertex_t v_11, v_12, v_22, v_21;
428	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
429
430	const Vec4<float> Bu = BezierBasis::eval(uu);
431	const Vec4<float> Bv = BezierBasis::eval(vv);
432
433	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
434	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
435	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
436	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
437	}
438
439	static __forceinline Vertex eval_du(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float uu, const float vv) // approximative derivative
440	{
441	Vertex_t v_11, v_12, v_22, v_21;
442	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
443
444	const Vec4<float> Bu = BezierBasis::derivative(uu);
445	const Vec4<float> Bv = BezierBasis::eval(vv);
446
447	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
448	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
449	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
450	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
451	}
452
453	static __forceinline Vertex eval_dv(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float uu, const float vv) // approximative derivative
454	{
455	Vertex_t v_11, v_12, v_22, v_21;
456	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
457
458	const Vec4<float> Bu = BezierBasis::eval(uu);
459	const Vec4<float> Bv = BezierBasis::derivative(vv);
460
461	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
462	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
463	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
464	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
465	}
466
467	static __forceinline Vertex eval_dudu(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float uu, const float vv) // approximative derivative
468	{
469	Vertex_t v_11, v_12, v_22, v_21;
470	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
471
472	const Vec4<float> Bu = BezierBasis::derivative2(uu);
473	const Vec4<float> Bv = BezierBasis::eval(vv);
474
475	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
476	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
477	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
478	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
479	}
480
481	static __forceinline Vertex eval_dvdv(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float uu, const float vv) // approximative derivative
482	{
483	Vertex_t v_11, v_12, v_22, v_21;
484	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
485
486	const Vec4<float> Bu = BezierBasis::eval(uu);
487	const Vec4<float> Bv = BezierBasis::derivative2(vv);
488
489	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
490	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
491	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
492	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
493	}
494
495	static __forceinline Vertex eval_dudv(const Vertex matrix[`4`][`4`], const Vertex f[`2`][`2`], const float uu, const float vv) // approximative derivative
496	{
497	Vertex_t v_11, v_12, v_22, v_21;
498	computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
499
500	const Vec4<float> Bu = BezierBasis::derivative(uu);
501	const Vec4<float> Bv = BezierBasis::derivative(vv);
502
503	return madd(Bv.x,madd(Bu.x,matrix[`0`][`0`],madd(Bu.y,matrix[`0`][`1`],madd(Bu.z,matrix[`0`][`2`],Bu.w * matrix[`0`][`3`]))),
504	madd(Bv.y,madd(Bu.x,matrix[`1`][`0`],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[`1`][`3`]))),
505	madd(Bv.z,madd(Bu.x,matrix[`2`][`0`],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[`2`][`3`]))),
506	Bv.wmadd(Bu.x,matrix[`3`][`0`],madd(Bu.y,matrix[`3`][`1`],madd(Bu.z,matrix[`3`][`2`],Bu.w matrix[`3`][`3`]))))));
507	}
508
509	__forceinline Vertex eval(const float uu, const float vv) const {
510	return eval(v,f,uu,vv);
511	}
512
513	__forceinline Vertex eval_du( const float uu, const float vv) const {
514	return eval_du(v,f,uu,vv);
515	}
516
517	__forceinline Vertex eval_dv( const float uu, const float vv) const {
518	return eval_dv(v,f,uu,vv);
519	}
520
521	__forceinline Vertex eval_dudu( const float uu, const float vv) const {
522	return eval_dudu(v,f,uu,vv);
523	}
524
525	__forceinline Vertex eval_dvdv( const float uu, const float vv) const {
526	return eval_dvdv(v,f,uu,vv);
527	}
528
529	__forceinline Vertex eval_dudv( const float uu, const float vv) const {
530	return eval_dudv(v,f,uu,vv);
531	}
532
533	static __forceinline Vertex normal(const Vertex matrix[`4`][`4`], const Vertex f_m[`2`][`2`], const float uu, const float vv) // FIXME: why not using basis functions
534	{
535	/ interpolate inner vertices /
536	Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
537	computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
538
539	/ tangentU /
540	const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[`0`][`0`], (Vertex_t)matrix[`1`][`0`], (Vertex_t)matrix[`2`][`0`], (Vertex_t)matrix[`3`][`0`]);
541	const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[`0`][`1`], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[`3`][`1`]);
542	const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[`0`][`2`], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[`3`][`2`]);
543	const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[`0`][`3`], (Vertex_t)matrix[`1`][`3`], (Vertex_t)matrix[`2`][`3`], (Vertex_t)matrix[`3`][`3`]);
544
545	const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
546
547	/ tangentV /
548	const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[`0`][`0`], (Vertex_t)matrix[`0`][`1`], (Vertex_t)matrix[`0`][`2`], (Vertex_t)matrix[`0`][`3`]);
549	const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[`1`][`0`], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[`1`][`3`]);
550	const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[`2`][`0`], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[`2`][`3`]);
551	const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[`3`][`0`], (Vertex_t)matrix[`3`][`1`], (Vertex_t)matrix[`3`][`2`], (Vertex_t)matrix[`3`][`3`]);
552
553	const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
554
555	/ normal = tangentU x tangentV /
556	const Vertex_t n = cross(tangentU,tangentV);
557
558	return n;
559	}
560
561	__forceinline Vertex normal( const float uu, const float vv) const {
562	return normal(v,f,uu,vv);
563	}
564
565	__forceinline void eval(const float u, const float v,
566	Vertex* P, Vertex* dPdu, Vertex* dPdv,
567	Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
568	const float dscale = `1.0f`) const
569	{
570	if (P) {
571	*P = eval(u,v);
572	}
573	if (dPdu) {
574	assert(dPdu); dPdu = eval_du(u,v)dscale;
575	assert(dPdv); dPdv = eval_dv(u,v)dscale;
576	}
577	if (ddPdudu) {
578	assert(ddPdudu); ddPdudu = eval_dudu(u,v)sqr(dscale);
579	assert(ddPdvdv); ddPdvdv = eval_dvdv(u,v)sqr(dscale);
580	assert(ddPdudv); ddPdudv = eval_dudv(u,v)sqr(dscale);
581	}
582	}
583
584	template<class vfloat>
585	static __forceinline vfloat eval(const Vertex v[`4`][`4`], const Vertex f[`2`][`2`],
586	const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
587	vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
588	{
589	const vfloat curve0_x = madd(v_n[`0`],vfloat(v[`0`][`0`][i]),madd(v_n[`1`],vfloat(v[`1`][`0`][i]),madd(v_n[`2`],vfloat(v[`2`][`0`][i]),v_n[`3`] * vfloat(v[`3`][`0`][i]))));
590	const vfloat curve1_x = madd(v_n[`0`],vfloat(v[`0`][`1`][i]),madd(v_n[`1`],vfloat(matrix_11 ),madd(v_n[`2`],vfloat(matrix_21 ),v_n[`3`] * vfloat(v[`3`][`1`][i]))));
591	const vfloat curve2_x = madd(v_n[`0`],vfloat(v[`0`][`2`][i]),madd(v_n[`1`],vfloat(matrix_12 ),madd(v_n[`2`],vfloat(matrix_22 ),v_n[`3`] * vfloat(v[`3`][`2`][i]))));
592	const vfloat curve3_x = madd(v_n[`0`],vfloat(v[`0`][`3`][i]),madd(v_n[`1`],vfloat(v[`1`][`3`][i]),madd(v_n[`2`],vfloat(v[`2`][`3`][i]),v_n[`3`] * vfloat(v[`3`][`3`][i]))));
593	return madd(u_n[`0`],curve0_x,madd(u_n[`1`],curve1_x,madd(u_n[`2`],curve2_x,u_n[`3`] * curve3_x)));
594	}
595
596	template<typename vbool, typename vfloat>
597	static __forceinline void eval(const Vertex v[`4`][`4`], const Vertex f[`2`][`2`],
598	const vbool& valid, const vfloat& uu, const vfloat& vv,
599	float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
600	const float dscale, const size_t dstride, const size_t N)
601	{
602	if (P) {
603	const Vec4<vfloat> u_n = BezierBasis::eval(uu);
604	const Vec4<vfloat> v_n = BezierBasis::eval(vv);
605	for (size_t i=`0`; i<N; i++) {
606	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
607	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
608	vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
609	}
610	}
611	if (dPdu)
612	{
613	{
614	assert(dPdu);
615	const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
616	const Vec4<vfloat> v_n = BezierBasis::eval(vv);
617	for (size_t i=`0`; i<N; i++) {
618	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
619	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
620	vfloat::store(valid,dPdu+idstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)dscale);
621	}
622	}
623	{
624	assert(dPdv);
625	const Vec4<vfloat> u_n = BezierBasis::eval(uu);
626	const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
627	for (size_t i=`0`; i<N; i++) {
628	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
629	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
630	vfloat::store(valid,dPdv+idstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)dscale);
631	}
632	}
633	}
634	if (ddPdudu)
635	{
636	{
637	assert(ddPdudu);
638	const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
639	const Vec4<vfloat> v_n = BezierBasis::eval(vv);
640	for (size_t i=`0`; i<N; i++) {
641	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
642	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
643	vfloat::store(valid,ddPdudu+idstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)sqr(dscale));
644	}
645	}
646	{
647	assert(ddPdvdv);
648	const Vec4<vfloat> u_n = BezierBasis::eval(uu);
649	const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
650	for (size_t i=`0`; i<N; i++) {
651	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
652	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
653	vfloat::store(valid,ddPdvdv+idstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)sqr(dscale));
654	}
655	}
656	{
657	assert(ddPdudv);
658	const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
659	const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
660	for (size_t i=`0`; i<N; i++) {
661	vfloat matrix_11, matrix_12, matrix_22, matrix_21;
662	computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
663	vfloat::store(valid,ddPdudv+idstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)sqr(dscale));
664	}
665	}
666	}
667	}
668
669	template<typename vbool, typename vfloat>
670	__forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
671	float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
672	const float dscale, const size_t dstride, const size_t N) const {
673	eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
674	}
675
676	template<class T>
677	static __forceinline Vec3<T> eval_t(const Vertex matrix[`4`][`4`], const Vec3<T> f[`2`][`2`], const T& uu, const T& vv)
678	{
679	typedef typename T::Bool M;
680	const M m_border = (uu == `0.0f`) \| (uu == `1.0f`) \| (vv == `0.0f`) \| (vv == `1.0f`);
681
682	const Vec3<T> f0_p = Vec3<T>(matrix[`1`][`1`].x,matrix[`1`][`1`].y,matrix[`1`][`1`].z);
683	const Vec3<T> f1_p = Vec3<T>(matrix[`1`][`2`].x,matrix[`1`][`2`].y,matrix[`1`][`2`].z);
684	const Vec3<T> f2_p = Vec3<T>(matrix[`2`][`2`].x,matrix[`2`][`2`].y,matrix[`2`][`2`].z);
685	const Vec3<T> f3_p = Vec3<T>(matrix[`2`][`1`].x,matrix[`2`][`1`].y,matrix[`2`][`1`].z);
686
687	const Vec3<T> f0_m = f[`0`][`0`];
688	const Vec3<T> f1_m = f[`0`][`1`];
689	const Vec3<T> f2_m = f[`1`][`1`];
690	const Vec3<T> f3_m = f[`1`][`0`];
691
692	const T one_minus_uu = T(`1.0f`) - uu;
693	const T one_minus_vv = T(`1.0f`) - vv;
694
695	const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
696	const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
697	const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
698	const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
699
700	const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
701	const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
702	const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
703	const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
704
705	const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
706	const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
707	const T B1_u = `3.0f` * (one_minus_uu * uu * one_minus_uu);
708	const T B1_v = `3.0f` * (one_minus_vv * vv * one_minus_vv);
709	const T B2_u = `3.0f` * (uu * one_minus_uu * uu);
710	const T B2_v = `3.0f` * (vv * one_minus_vv * vv);
711	const T B3_u = uu * uu * uu;
712	const T B3_v = vv * vv * vv;
713
714	const T x = madd(B0_v,madd(B0_u,matrix[`0`][`0`].x,madd(B1_u,matrix[`0`][`1`].x,madd(B2_u,matrix[`0`][`2`].x,B3_u * matrix[`0`][`3`].x))),
715	madd(B1_v,madd(B0_u,matrix[`1`][`0`].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[`1`][`3`].x))),
716	madd(B2_v,madd(B0_u,matrix[`2`][`0`].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[`2`][`3`].x))),
717	B3_vmadd(B0_u,matrix[`3`][`0`].x,madd(B1_u,matrix[`3`][`1`].x,madd(B2_u,matrix[`3`][`2`].x,B3_u matrix[`3`][`3`].x))))));
718
719	const T y = madd(B0_v,madd(B0_u,matrix[`0`][`0`].y,madd(B1_u,matrix[`0`][`1`].y,madd(B2_u,matrix[`0`][`2`].y,B3_u * matrix[`0`][`3`].y))),
720	madd(B1_v,madd(B0_u,matrix[`1`][`0`].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[`1`][`3`].y))),
721	madd(B2_v,madd(B0_u,matrix[`2`][`0`].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[`2`][`3`].y))),
722	B3_vmadd(B0_u,matrix[`3`][`0`].y,madd(B1_u,matrix[`3`][`1`].y,madd(B2_u,matrix[`3`][`2`].y,B3_u matrix[`3`][`3`].y))))));
723
724	const T z = madd(B0_v,madd(B0_u,matrix[`0`][`0`].z,madd(B1_u,matrix[`0`][`1`].z,madd(B2_u,matrix[`0`][`2`].z,B3_u * matrix[`0`][`3`].z))),
725	madd(B1_v,madd(B0_u,matrix[`1`][`0`].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[`1`][`3`].z))),
726	madd(B2_v,madd(B0_u,matrix[`2`][`0`].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[`2`][`3`].z))),
727	B3_vmadd(B0_u,matrix[`3`][`0`].z,madd(B1_u,matrix[`3`][`1`].z,madd(B2_u,matrix[`3`][`2`].z,B3_u matrix[`3`][`3`].z))))));
728
729	return Vec3<T>(x,y,z);
730	}
731
732	template<class T>
733	__forceinline Vec3<T> eval(const T& uu, const T& vv) const
734	{
735	Vec3<T> ff[`2`][`2`];
736	ff[`0`][`0`] = Vec3<T>(f[`0`][`0`]);
737	ff[`0`][`1`] = Vec3<T>(f[`0`][`1`]);
738	ff[`1`][`1`] = Vec3<T>(f[`1`][`1`]);
739	ff[`1`][`0`] = Vec3<T>(f[`1`][`0`]);
740	return eval_t(v,ff,uu,vv);
741	}
742
743	template<class T>
744	static __forceinline Vec3<T> normal_t(const Vertex matrix[`4`][`4`], const Vec3<T> f[`2`][`2`], const T& uu, const T& vv)
745	{
746	typedef typename T::Bool M;
747
748	const Vec3<T> f0_p = Vec3<T>(matrix[`1`][`1`].x,matrix[`1`][`1`].y,matrix[`1`][`1`].z);
749	const Vec3<T> f1_p = Vec3<T>(matrix[`1`][`2`].x,matrix[`1`][`2`].y,matrix[`1`][`2`].z);
750	const Vec3<T> f2_p = Vec3<T>(matrix[`2`][`2`].x,matrix[`2`][`2`].y,matrix[`2`][`2`].z);
751	const Vec3<T> f3_p = Vec3<T>(matrix[`2`][`1`].x,matrix[`2`][`1`].y,matrix[`2`][`1`].z);
752
753	const Vec3<T> f0_m = f[`0`][`0`];
754	const Vec3<T> f1_m = f[`0`][`1`];
755	const Vec3<T> f2_m = f[`1`][`1`];
756	const Vec3<T> f3_m = f[`1`][`0`];
757
758	const T one_minus_uu = T(`1.0f`) - uu;
759	const T one_minus_vv = T(`1.0f`) - vv;
760
761	const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv);
762	const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv);
763	const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
764	const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
765
766	#if 1
767	const M m_corner0 = (uu == `0.0f`) & (vv == `0.0f`);
768	const M m_corner1 = (uu == `1.0f`) & (vv == `0.0f`);
769	const M m_corner2 = (uu == `1.0f`) & (vv == `1.0f`);
770	const M m_corner3 = (uu == `0.0f`) & (vv == `1.0f`);
771	const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
772	const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
773	const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
774	const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
775	#else
776	const M m_border = (uu == `0.0f`) \| (uu == `1.0f`) \| (vv == `0.0f`) \| (vv == `1.0f`);
777	const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
778	const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
779	const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
780	const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
781	#endif
782
783	const Vec3<T> matrix_00 = Vec3<T>(matrix[`0`][`0`].x,matrix[`0`][`0`].y,matrix[`0`][`0`].z);
784	const Vec3<T> matrix_10 = Vec3<T>(matrix[`1`][`0`].x,matrix[`1`][`0`].y,matrix[`1`][`0`].z);
785	const Vec3<T> matrix_20 = Vec3<T>(matrix[`2`][`0`].x,matrix[`2`][`0`].y,matrix[`2`][`0`].z);
786	const Vec3<T> matrix_30 = Vec3<T>(matrix[`3`][`0`].x,matrix[`3`][`0`].y,matrix[`3`][`0`].z);
787
788	const Vec3<T> matrix_01 = Vec3<T>(matrix[`0`][`1`].x,matrix[`0`][`1`].y,matrix[`0`][`1`].z);
789	const Vec3<T> matrix_02 = Vec3<T>(matrix[`0`][`2`].x,matrix[`0`][`2`].y,matrix[`0`][`2`].z);
790	const Vec3<T> matrix_03 = Vec3<T>(matrix[`0`][`3`].x,matrix[`0`][`3`].y,matrix[`0`][`3`].z);
791
792	const Vec3<T> matrix_31 = Vec3<T>(matrix[`3`][`1`].x,matrix[`3`][`1`].y,matrix[`3`][`1`].z);
793	const Vec3<T> matrix_32 = Vec3<T>(matrix[`3`][`2`].x,matrix[`3`][`2`].y,matrix[`3`][`2`].z);
794	const Vec3<T> matrix_33 = Vec3<T>(matrix[`3`][`3`].x,matrix[`3`][`3`].y,matrix[`3`][`3`].z);
795
796	const Vec3<T> matrix_13 = Vec3<T>(matrix[`1`][`3`].x,matrix[`1`][`3`].y,matrix[`1`][`3`].z);
797	const Vec3<T> matrix_23 = Vec3<T>(matrix[`2`][`3`].x,matrix[`2`][`3`].y,matrix[`2`][`3`].z);
798
799	/ tangentU /
800	const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
801	const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
802	const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
803	const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
804
805	const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
806
807	/ tangentV /
808	const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
809	const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
810	const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
811	const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
812
813	const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
814
815	/ normal = tangentU x tangentV /
816	const Vec3<T> n = cross(tangentU,tangentV);
817	return n;
818	}
819
820	template<class T>
821	__forceinline Vec3<T> normal(const T& uu, const T& vv) const
822	{
823	Vec3<T> ff[`2`][`2`];
824	ff[`0`][`0`] = Vec3<T>(f[`0`][`0`]);
825	ff[`0`][`1`] = Vec3<T>(f[`0`][`1`]);
826	ff[`1`][`1`] = Vec3<T>(f[`1`][`1`]);
827	ff[`1`][`0`] = Vec3<T>(f[`1`][`0`]);
828	return normal_t(v,ff,uu,vv);
829	}
830
831	__forceinline BBox<Vertex> bounds() const
832	{
833	const Vertex *const cv = &v[`0`][`0`];
834	BBox<Vertex> bounds (cv[`0`]);
835	for (size_t i=`1`; i<`16`; i++)
836	bounds.extend( cv[i] );
837	bounds.extend(f[`0`][`0`]);
838	bounds.extend(f[`1`][`0`]);
839	bounds.extend(f[`1`][`1`]);
840	bounds.extend(f[`1`][`1`]);
841	return bounds;
842	}
843
844	friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p)
845	{
846	for (size_t y=`0`; y<`4`; y++)
847	for (size_t x=`0`; x<`4`; x++)
848	o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
849
850	for (size_t y=`0`; y<`2`; y++)
851	for (size_t x=`0`; x<`2`; x++)
852	o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl;
853	return o;
854	}
855	};
856
857	typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
858
859	template<typename Vertex, typename Vertex_t>
860	__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride)
861	{
862	CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
863	GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
864	gpatch.convert_to_bezier();
865	for (size_t y=`0`; y<`4`; y++)
866	for (size_t x=`0`; x<`4`; x++)
867	matrix[y][x] = (Vertex_t)gpatch.v[y][x];
868	}
869
870	template<typename Vertex, typename Vertex_t>
871	__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch)
872	{
873	GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
874	gpatch.convert_to_bezier();
875	for (size_t y=`0`; y<`4`; y++)
876	for (size_t x=`0`; x<`4`; x++)
877	matrix[y][x] = (Vertex_t)gpatch.v[y][x];
878	}
879
880	template<typename Vertex, typename Vertex_t>
881	__forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
882	const BezierCurveT<Vertex>* border0,
883	const BezierCurveT<Vertex>* border1,
884	const BezierCurveT<Vertex>* border2,
885	const BezierCurveT<Vertex>* border3)
886	{
887	GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3);
888	gpatch.convert_to_bezier();
889	for (size_t y=`0`; y<`4`; y++)
890	for (size_t x=`0`; x<`4`; x++)
891	matrix[y][x] = (Vertex_t)gpatch.v[y][x];
892	}
893	}
894

Browse the source code of Godot/thirdparty/embree/kernels/subdiv/gregory_patch.h