curve_intersector_sweep.h source code [Godot/thirdparty/embree/kernels/geometry/curve_intersector_sweep.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "../common/ray.h"
7	#include "cylinder.h"
8	#include "plane.h"
9	#include "line_intersector.h"
10	#include "curve_intersector_precalculations.h"
11
12	namespace embree
13	{
14	namespace isa
15	{
16	static const size_t numJacobianIterations = `5`;
17	#if defined(__AVX__)
18	static const size_t numBezierSubdivisions = `2`;
19	#else
20	static const size_t numBezierSubdivisions = `3`;
21	#endif
22
23	struct BezierCurveHit
24	{
25	__forceinline BezierCurveHit() {}
26
27	__forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
28	: t(t), u(u), v(`0.0f`), Ng (Ng) {}
29
30	__forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
31	: t(t), u(u), v(v), Ng (Ng) {}
32
33	__forceinline void finalize() {}
34
35	public:
36	float t;
37	float u;
38	float v;
39	Vec3fa Ng;
40	};
41
42	template<typename NativeCurve3ff, typename Ray, typename Epilog>
43	__forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
44	const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
45	const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
46	const Epilog& epilog)
47	{
48	if (tp.lower [i]+dt > ray.tfar) return false;
49	Vec3fa Ng_o = Vec3fa (Ng.x [i],Ng.y [i],Ng.z [i]);
50	if (h0.lower [i] == tp.lower [i]) Ng_o = -Vec3fa (dP0du.x [i],dP0du.y [i],dP0du.z [i]);
51	if (h1.lower [i] == tp.lower [i]) Ng_o = +Vec3fa (dP3du.x [i],dP3du.y [i],dP3du.z [i]);
52	BezierCurveHit hit(tp.lower [i]+dt,u [i],Ng_o);
53	return epilog(hit);
54	}
55
56	template<typename NativeCurve3ff, typename Ray, typename Epilog>
57	__forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
58	{
59	const Vec3fa org = zero;
60	const Vec3fa dir = ray.dir;
61	const float length_ray_dir = length(dir);
62
63	/ error of curve evaluations is proportional to largest coordinate /
64	const BBox3ff box = curve.bounds();
65	const float P_err = `16.0f`*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
66
67	for (size_t i=`0`; i<numJacobianIterations; i++)
68	{
69	const Vec3fa Q = madd(Vec3fa (t),dir,org);
70	//const Vec3fa dQdu = zero;
71	const Vec3fa dQdt = dir;
72	const float Q_err = `16.0f`*float(ulp)length_ray_dirt; // works as org=zero here
73
74	Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
75	//const Vec3fa dPdt = zero;
76
77	const Vec3fa R = Q -P;
78	const float len_R = length(R); //reduce_max(abs(R));
79	const float R_err = max(Q_err,P_err);
80	const Vec3fa dRdu = /dQdu/-dPdu;
81	const Vec3fa dRdt = dQdt;//-dPdt;
82
83	const Vec3fa T = normalize(dPdu);
84	const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
85	//const Vec3fa dTdt = zero;
86	const float cos_err = P_err/length(dPdu);
87
88	/ Error estimate for dot(R,T):*
89
90	dot(R,T) = cos(R,T) \|R\| \|T\|
91	= (cos(R,T) +- cos_error) (\|R\| +- \|R\|_err) * (\|T\| +- \|T\|_err)*
92	= cos(R,T)\|R\|\|T\|
93	+- cos(R,T)(\|R\|\|T\|_err + \|T\|\|R\|_err)*
94	+- cos_error(\|R\| + \|T\|)*
95	+- lower order terms
96	with cos(R,T) being in [0,1] and \|T\| = 1 we get:
97	dot(R,T)_err = \|R\|\|T\|_err + \|R\|_err = cos_error(\|R\|+1)
98	*/
99
100	const float f = dot(R,T);
101	const float f_err = len_RP_err + R_err + cos_err(`1.0f`+len_R);
102	const float dfdu = dot(dRdu,T) + dot(R,dTdu);
103	const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
104
105	const float K = dot(R,R)-sqr(f);
106	const float dKdu = /2.0f/(dot(R,dRdu)-f*dfdu);
107	const float dKdt = /2.0f/(dot(R,dRdt)-f*dfdt);
108	const float rsqrt_K = rsqrt(K);
109
110	const float g = sqrt(K)-P.w;
111	const float g_err = R_err + f_err + `16.0f`*float(ulp)*box.upper.w;
112	const float dgdu = /0.5f/dKdu*rsqrt_K-dPdu.w;
113	const float dgdt = /0.5f/dKdtrsqrt_K;//-dPdt.w;*
114
115	const LinearSpace2f J = LinearSpace2f (dfdu,dfdt,dgdu,dgdt);
116	const Vec2f dut = rcp(J)*Vec2f (f,g);
117	const Vec2f ut = Vec2f (u,t) - dut;
118	u = ut.x; t = ut.y;
119
120	if (abs(f) < f_err && abs(g) < g_err)
121	{
122	t+=dt;
123	if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
124	if (!(u >= `0.0f` && u <= `1.0f`)) return false; // rejects NaNs
125	const Vec3fa R = normalize(Q -P);
126	const Vec3fa U = madd(Vec3fa (dPdu.w),R,dPdu);
127	const Vec3fa V = cross(dPdu,R);
128	BezierCurveHit hit(t,u,cross(V,U));
129	return epilog(hit);
130	}
131	}
132	return false;
133	}
134
135	template<typename NativeCurve3ff, typename Ray, typename Epilog>
136	bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
137	float u0, float u1, unsigned int depth, const Epilog& epilog)
138	{
139	#if defined(__AVX__)
140	enum { VSIZEX_ = `8` };
141	typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
142	typedef vint8 vintx;
143	typedef vfloat8 vfloatx;
144	#else
145	enum { VSIZEX_ = `4` };
146	typedef vbool4 vboolx;
147	typedef vint4 vintx;
148	typedef vfloat4 vfloatx;
149	#endif
150	typedef Vec3<vfloatx> Vec3vfx;
151	typedef Vec4<vfloatx> Vec4vfx;
152
153	unsigned int maxDepth = numBezierSubdivisions;
154	bool found = false;
155	const Vec3fa org = zero;
156	const Vec3fa dir = ray.dir;
157
158	unsigned int sptr = `0`;
159	const unsigned int stack_size = numBezierSubdivisions+`1`; // +1 because of unstable workaround below
160	struct StackEntry {
161	vboolx valid;
162	vfloatx tlower;
163	float u0;
164	float u1;
165	unsigned int depth;
166	};
167	StackEntry stack[stack_size];
168	goto entry;
169
170	/ terminate if stack is empty /
171	while (sptr)
172	{
173	/ pop from stack /
174	{
175	sptr--;
176	vboolx valid = stack[sptr].valid;
177	const vfloatx tlower = stack[sptr].tlower;
178	valid &= tlower +dt <= ray.tfar;
179	if (none(valid)) continue;
180	u0 = stack[sptr].u0;
181	u1 = stack[sptr].u1;
182	depth = stack[sptr].depth;
183	const size_t i = select_min(valid,tlower); clear(valid,i);
184	stack[sptr].valid = valid;
185	if (any(valid)) sptr++; // there are still items on the stack
186
187	/ process next segment /
188	const vfloatx vu0 = lerp(u0,u1,vfloatx (step)*(`1.0f`/(vfloatx::size-`1`)));
189	u0 = vu0 [i+`0`];
190	u1 = vu0 [i+`1`];
191	}
192	entry:
193
194	/ subdivide curve /
195	const float dscale = (u1-u0)(`1.0f`/(`3.0f`(vfloatx::size-`1`)));
196	const vfloatx vu0 = lerp(u0,u1,vfloatx (step)*(`1.0f`/(vfloatx::size-`1`)));
197	Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx (dscale);
198	const Vec4vfx P3 = shift_right_1(P0);
199	const Vec4vfx dP3du = shift_right_1(dP0du);
200	const Vec4vfx P1 = P0 + dP0du;
201	const Vec4vfx P2 = P3 - dP3du;
202
203	/ calculate bounding cylinders /
204	const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3 -P0));
205	const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3 -P0));
206	const vfloatx maxr12 = sqrt(max(rr1,rr2));
207	const vfloatx one_plus_ulp = `1.0f`+`2.0f`*float(ulp);
208	const vfloatx one_minus_ulp = `1.0f`-`2.0f`*float(ulp);
209	vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
210	vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
211	r_outer = one_plus_ulp *r_outer;
212	r_inner = max(`0.0f`,one_minus_ulp *r_inner);
213	const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
214	const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
215	vboolx valid = true; clear(valid,vfloatx::size-`1`);
216
217	/ intersect with outer cylinder /
218	BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
219	valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
220	if (none(valid)) continue;
221
222	/ intersect with cap-planes /
223	BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
224	tp = embree::intersect(tp,tc_outer);
225	BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
226	tp = embree::intersect(tp,h0);
227	BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
228	tp = embree::intersect(tp,h1);
229	valid &= tp.lower <= tp.upper;
230	if (none(valid)) continue;
231
232	/ clamp and correct u parameter /
233	u_outer0 = clamp(u_outer0,vfloatx (`0.0f`),vfloatx (`1.0f`));
234	u_outer1 = clamp(u_outer1,vfloatx (`0.0f`),vfloatx (`1.0f`));
235	u_outer0 = lerp(u0,u1,(vfloatx (step)+u_outer0)(`1.0f`/float*(vfloatx::size)));
236	u_outer1 = lerp(u0,u1,(vfloatx (step)+u_outer1)(`1.0f`/float*(vfloatx::size)));
237
238	/ intersect with inner cylinder /
239	BBox<vfloatx> tc_inner;
240	vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
241	const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
242
243	/ at the unstable area we subdivide deeper /
244	const vboolx unstable0 = (!valid_inner) \| (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < `0.3f`);
245	const vboolx unstable1 = (!valid_inner) \| (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < `0.3f`);
246
247	/ subtract the inner interval from the current hit interval /
248	BBox<vfloatx> tp0, tp1;
249	subtract(tp,tc_inner,tp0,tp1);
250	vboolx valid0 = valid & (tp0.lower <= tp0.upper);
251	vboolx valid1 = valid & (tp1.lower <= tp1.upper);
252	if (none(valid0 \| valid1)) continue;
253
254	/ iterate over all first hits front to back /
255	const vintx termDepth0 = select(unstable0,vintx (maxDepth+`1`),vintx (maxDepth));
256	vboolx recursion_valid0 = valid0 & (depth < termDepth0);
257	valid0 &= depth >= termDepth0;
258
259	while (any(valid0))
260	{
261	const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
262	found = found \| intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0 [i],tp0.lower [i],epilog);
263	//found = found \| intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
264	valid0 &= tp0.lower +dt <= ray.tfar;
265	}
266	valid1 &= tp1.lower +dt <= ray.tfar;
267
268	/ iterate over all second hits front to back /
269	const vintx termDepth1 = select(unstable1,vintx (maxDepth+`1`),vintx (maxDepth));
270	vboolx recursion_valid1 = valid1 & (depth < termDepth1);
271	valid1 &= depth >= termDepth1;
272	while (any(valid1))
273	{
274	const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
275	found = found \| intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1 [i],tp1.upper [i],epilog);
276	//found = found \| intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
277	valid1 &= tp1.lower +dt <= ray.tfar;
278	}
279
280	/ push valid segments to stack /
281	recursion_valid0 &= tp0.lower +dt <= ray.tfar;
282	recursion_valid1 &= tp1.lower +dt <= ray.tfar;
283	const vboolx recursion_valid = recursion_valid0 \| recursion_valid1;
284	if (any(recursion_valid))
285	{
286	assert(sptr < stack_size);
287	stack[sptr].valid = recursion_valid;
288	stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
289	stack[sptr].u0 = u0;
290	stack[sptr].u1 = u1;
291	stack[sptr].depth = depth+`1`;
292	sptr++;
293	}
294	}
295	return found;
296	}
297
298	template<template<typename Ty> class NativeCurve>
299	struct SweepCurve1Intersector1
300	{
301	typedef NativeCurve<Vec3ff> NativeCurve3ff;
302
303	template<typename Epilog>
304	__noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
305	IntersectContext* context,
306	const CurveGeometry* geom, const unsigned int primID,
307	const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
308	const Epilog& epilog)
309	{
310	STAT3(normal.trav_prims,`1`,`1`,`1`);
311
312	/ move ray closer to make intersection stable /
313	NativeCurve3ff curve0(v0,v1,v2,v3);
314	curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
315	const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
316	const Vec3ff ref(madd(Vec3fa (dt),ray.dir,ray.org),`0.0f`);
317	const NativeCurve3ff curve1 = curve0-ref;
318	return intersect_bezier_recursive_jacobian(ray,dt,curve1,`0.0f`,`1.0f`,`1`,epilog);
319	}
320	};
321
322	template<template<typename Ty> class NativeCurve, int K>
323	struct SweepCurve1IntersectorK
324	{
325	typedef NativeCurve<Vec3ff> NativeCurve3ff;
326
327	struct Ray1
328	{
329	__forceinline Ray1(RayK<K>& ray, size_t k)
330	: org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
331
332	Vec3fa org;
333	Vec3fa dir;
334	float _tnear;
335	float& tfar;
336
337	__forceinline float& tnear() { return _tnear; }
338	//__forceinline float& tfar() { return _tfar; }
339	__forceinline const float& tnear() const { return _tnear; }
340	//__forceinline const float& tfar() const { return _tfar; }
341
342	};
343
344	template<typename Epilog>
345	__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
346	IntersectContext* context,
347	const CurveGeometry* geom, const unsigned int primID,
348	const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
349	const Epilog& epilog)
350	{
351	STAT3(normal.trav_prims,`1`,`1`,`1`);
352	Ray1 ray(vray,k);
353
354	/ move ray closer to make intersection stable /
355	NativeCurve3ff curve0(v0,v1,v2,v3);
356	curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
357	const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
358	const Vec3ff ref(madd(Vec3fa (dt),ray.dir,ray.org),`0.0f`);
359	const NativeCurve3ff curve1 = curve0-ref;
360	return intersect_bezier_recursive_jacobian(ray,dt,curve1,`0.0f`,`1.0f`,`1`,epilog);
361	}
362	};
363	}
364	}
365

Browse the source code of Godot/thirdparty/embree/kernels/geometry/curve_intersector_sweep.h