node_intersector_frustum.h source code [Godot/thirdparty/embree/kernels/bvh/node_intersector_frustum.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "node_intersector.h"
7
8	namespace embree
9	{
10	namespace isa
11	{
12	//////////////////////////////////////////////////////////////////////////////////////
13	// Frustum structure used in hybrid and stream traversal
14	//////////////////////////////////////////////////////////////////////////////////////
15
16	/*
17	Optimized frustum test. We calculate t=(p-org)/dir in ray/box
18	intersection. We assume the rays are split by octant, thus
19	dir intervals are either positive or negative in each
20	dimension.
21
22	Case 1: dir.min >= 0 && dir.max >= 0:
23	t_min = (p_min - org_max) / dir_max = (p_min - org_max)rdir_min = p_minrdir_min - org_maxrdir_min*
24	t_max = (p_max - org_min) / dir_min = (p_max - org_min)rdir_max = p_maxrdir_max - org_minrdir_max*
25
26	Case 2: dir.min < 0 && dir.max < 0:
27	t_min = (p_max - org_min) / dir_min = (p_max - org_min)rdir_max = p_maxrdir_max - org_minrdir_max*
28	t_max = (p_min - org_max) / dir_max = (p_min - org_max)rdir_min = p_minrdir_min - org_maxrdir_min*
29	*/
30
31	template<bool robust>
32	struct Frustum;
33
34	/ Fast variant /
35	template<>
36	struct Frustum<false>
37	{
38	__forceinline Frustum() {}
39
40	template<int K>
41	__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
42	{
43	const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
44	reduce_min(select(valid, org.y, pos_inf)),
45	reduce_min(select(valid, org.z, pos_inf)));
46
47	const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
48	reduce_max(select(valid, org.y, neg_inf)),
49	reduce_max(select(valid, org.z, neg_inf)));
50
51	const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
52	reduce_min(select(valid, rdir.y, pos_inf)),
53	reduce_min(select(valid, rdir.z, pos_inf)));
54
55	const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
56	reduce_max(select(valid, rdir.y, neg_inf)),
57	reduce_max(select(valid, rdir.z, neg_inf)));
58
59	const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
60	const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
61
62	init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
63	}
64
65	__forceinline void init(const Vec3fa& reduced_min_org,
66	const Vec3fa& reduced_max_org,
67	const Vec3fa& reduced_min_rdir,
68	const Vec3fa& reduced_max_rdir,
69	float reduced_min_dist,
70	float reduced_max_dist,
71	int N)
72	{
73	const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa (zero));
74
75	min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
76	max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
77
78	#if defined (__aarch64__)
79	neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
80	neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
81	#else
82	min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
83	max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
84	#endif
85	min_dist = reduced_min_dist;
86	max_dist = reduced_max_dist;
87
88	nf = NearFarPrecalculations (min_rdir, N);
89	}
90
91	template<int K>
92	__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
93	{
94	max_dist = reduce_max(ray_tfar);
95	}
96
97	NearFarPrecalculations nf;
98
99	Vec3fa min_rdir;
100	Vec3fa max_rdir;
101
102	#if defined (__aarch64__)
103	Vec3fa neg_min_org_rdir;
104	Vec3fa neg_max_org_rdir;
105	#else
106	Vec3fa min_org_rdir;
107	Vec3fa max_org_rdir;
108	#endif
109	float min_dist;
110	float max_dist;
111	};
112
113	typedef Frustum<false> FrustumFast;
114
115	/ Robust variant /
116	template<>
117	struct Frustum<true>
118	{
119	__forceinline Frustum() {}
120
121	template<int K>
122	__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
123	{
124	const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
125	reduce_min(select(valid, org.y, pos_inf)),
126	reduce_min(select(valid, org.z, pos_inf)));
127
128	const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
129	reduce_max(select(valid, org.y, neg_inf)),
130	reduce_max(select(valid, org.z, neg_inf)));
131
132	const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
133	reduce_min(select(valid, rdir.y, pos_inf)),
134	reduce_min(select(valid, rdir.z, pos_inf)));
135
136	const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
137	reduce_max(select(valid, rdir.y, neg_inf)),
138	reduce_max(select(valid, rdir.z, neg_inf)));
139
140	const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
141	const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
142
143	init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
144	}
145
146	__forceinline void init(const Vec3fa& reduced_min_org,
147	const Vec3fa& reduced_max_org,
148	const Vec3fa& reduced_min_rdir,
149	const Vec3fa& reduced_max_rdir,
150	float reduced_min_dist,
151	float reduced_max_dist,
152	int N)
153	{
154	const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa (zero));
155	min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
156	max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
157
158	min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
159	max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
160
161	min_dist = reduced_min_dist;
162	max_dist = reduced_max_dist;
163
164	nf = NearFarPrecalculations (min_rdir, N);
165	}
166
167	template<int K>
168	__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
169	{
170	max_dist = reduce_max(ray_tfar);
171	}
172
173	NearFarPrecalculations nf;
174
175	Vec3fa min_rdir;
176	Vec3fa max_rdir;
177
178	Vec3fa min_org;
179	Vec3fa max_org;
180
181	float min_dist;
182	float max_dist;
183	};
184
185	typedef Frustum<true> FrustumRobust;
186
187	//////////////////////////////////////////////////////////////////////////////////////
188	// Fast AABBNode intersection
189	//////////////////////////////////////////////////////////////////////////////////////
190
191	template<int N>
192	__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
193	const FrustumFast& frustum, vfloat<N>& dist)
194	{
195	const vfloat<N> bminX = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearX);
196	const vfloat<N> bminY = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearY);
197	const vfloat<N> bminZ = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearZ);
198	const vfloat<N> bmaxX = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farX);
199	const vfloat<N> bmaxY = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farY);
200	const vfloat<N> bmaxZ = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farZ);
201
202	#if defined (__aarch64__)
203	const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
204	const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
205	const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
206	const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
207	const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
208	const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
209	#else
210	const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
211	const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
212	const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
213	const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
214	const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
215	const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
216	#endif
217	const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
218	dist = fmin;
219	const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
220	const vbool<N> vmask_node_hit = fmin <= fmax;
221	size_t m_node = movemask(vmask_node_hit) & (((size_t)`1` << N)-`1`);
222	return m_node;
223	}
224
225	//////////////////////////////////////////////////////////////////////////////////////
226	// Robust AABBNode intersection
227	//////////////////////////////////////////////////////////////////////////////////////
228
229	template<int N>
230	__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
231	const FrustumRobust& frustum, vfloat<N>& dist)
232	{
233	const vfloat<N> bminX = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearX);
234	const vfloat<N> bminY = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearY);
235	const vfloat<N> bminZ = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.nearZ);
236	const vfloat<N> bmaxX = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farX);
237	const vfloat<N> bmaxY = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farY);
238	const vfloat<N> bmaxZ = (const* vfloat<N>)((const* char*)&node->lower_x + frustum.nf.farZ);
239
240	const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
241	const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
242	const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
243	const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
244	const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
245	const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
246
247	const float round_down = `1.0f`-`2.0f`*float(ulp); // FIXME: use per instruction rounding for AVX512
248	const float round_up = `1.0f`+`2.0f`*float(ulp);
249	const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
250	dist = fmin;
251	const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
252	const vbool<N> vmask_node_hit = (round_downfmin <= round_upfmax);
253	size_t m_node = movemask(vmask_node_hit) & (((size_t)`1` << N)-`1`);
254	return m_node;
255	}
256	}
257	}
258

Browse the source code of Godot/thirdparty/embree/kernels/bvh/node_intersector_frustum.h