node_intersector_packet_stream.h source code [Godot/thirdparty/embree/kernels/bvh/node_intersector_packet_stream.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "node_intersector.h"
7
8	namespace embree
9	{
10	namespace isa
11	{
12	//////////////////////////////////////////////////////////////////////////////////////
13	// Ray packet structure used in stream traversal
14	//////////////////////////////////////////////////////////////////////////////////////
15
16	template<int K, bool robust>
17	struct TravRayKStream;
18
19	/ Fast variant /
20	template<int K>
21	struct TravRayKStream<K, false>
22	{
23	__forceinline TravRayKStream() {}
24
25	__forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
26	{
27	init(ray_org, ray_dir);
28	tnear = ray_tnear;
29	tfar = ray_tfar;
30	}
31
32	__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
33	{
34	rdir = rcp_safe(ray_dir);
35	#if defined(__aarch64__)
36	neg_org_rdir = -(ray_org * rdir);
37	#else
38	org_rdir = ray_org * rdir;
39	#endif
40	}
41
42	Vec3vf<K> rdir;
43	#if defined(__aarch64__)
44	Vec3vf<K> neg_org_rdir;
45	#else
46	Vec3vf<K> org_rdir;
47	#endif
48	vfloat<K> tnear;
49	vfloat<K> tfar;
50	};
51
52	template<int K>
53	using TravRayKStreamFast = TravRayKStream<K, false>;
54
55	/ Robust variant /
56	template<int K>
57	struct TravRayKStream<K, true>
58	{
59	__forceinline TravRayKStream() {}
60
61	__forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
62	{
63	init(ray_org, ray_dir);
64	tnear = ray_tnear;
65	tfar = ray_tfar;
66	}
67
68	__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
69	{
70	rdir = vfloat<K>(`1.0f`)/(zero_fix(ray_dir));
71	org = ray_org;
72	}
73
74	Vec3vf<K> rdir;
75	Vec3vf<K> org;
76	vfloat<K> tnear;
77	vfloat<K> tfar;
78	};
79
80	template<int K>
81	using TravRayKStreamRobust = TravRayKStream<K, true>;
82
83	//////////////////////////////////////////////////////////////////////////////////////
84	// Fast AABBNode intersection
85	//////////////////////////////////////////////////////////////////////////////////////
86
87	template<int N, int K>
88	__forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
89	const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
90	{
91	const vfloat<N> bminX = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearX));
92	const vfloat<N> bminY = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearY));
93	const vfloat<N> bminZ = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearZ));
94	const vfloat<N> bmaxX = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farX));
95	const vfloat<N> bmaxY = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farY));
96	const vfloat<N> bmaxZ = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farZ));
97
98	#if defined (__aarch64__)
99	const vfloat<N> rminX = madd(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
100	const vfloat<N> rminY = madd(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
101	const vfloat<N> rminZ = madd(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
102	const vfloat<N> rmaxX = madd(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
103	const vfloat<N> rmaxY = madd(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
104	const vfloat<N> rmaxZ = madd(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
105	#else
106	const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
107	const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
108	const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
109	const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
110	const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
111	const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
112	#endif
113	const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
114	const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
115
116	const vbool<N> vmask_first_hit = rmin <= rmax;
117
118	return movemask(vmask_first_hit) & (((size_t)`1` << N)-`1`);
119	}
120
121	template<int N, int K>
122	__forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
123	const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
124	{
125	char* ptr = (char)&node->lower_x + isizeof(float);
126	const vfloat<K> bminX = (const* float*)(ptr + nf.nearX);
127	const vfloat<K> bminY = (const* float*)(ptr + nf.nearY);
128	const vfloat<K> bminZ = (const* float*)(ptr + nf.nearZ);
129	const vfloat<K> bmaxX = (const* float*)(ptr + nf.farX);
130	const vfloat<K> bmaxY = (const* float*)(ptr + nf.farY);
131	const vfloat<K> bmaxZ = (const* float*)(ptr + nf.farZ);
132
133	#if defined (__aarch64__)
134	const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x);
135	const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y);
136	const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z);
137	const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x);
138	const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y);
139	const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z);
140	#else
141	const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
142	const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
143	const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
144	const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
145	const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
146	const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
147	#endif
148
149	const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
150	const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
151
152	const vbool<K> vmask_first_hit = rmin <= rmax;
153
154	return movemask(vmask_first_hit);
155	}
156
157	//////////////////////////////////////////////////////////////////////////////////////
158	// Robust AABBNode intersection
159	//////////////////////////////////////////////////////////////////////////////////////
160
161	template<int N, int K>
162	__forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
163	const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
164	{
165	const vfloat<N> bminX = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearX));
166	const vfloat<N> bminY = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearY));
167	const vfloat<N> bminZ = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.nearZ));
168	const vfloat<N> bmaxX = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farX));
169	const vfloat<N> bmaxY = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farY));
170	const vfloat<N> bmaxZ = vfloat<N>((const* vfloat<N>)((const* char*)&node->lower_x + nf.farZ));
171
172	const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
173	const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
174	const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
175	const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
176	const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
177	const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
178	const float round_up = `1.0f`+`3.0f`*float(ulp); // FIXME: use per instruction rounding for AVX512
179	const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
180	const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
181
182	const vbool<N> vmask_first_hit = rmin <= rmax;
183
184	return movemask(vmask_first_hit) & (((size_t)`1` << N)-`1`);
185	}
186
187	template<int N, int K>
188	__forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
189	const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
190	{
191	char ptr = (char)&node->lower_x + isizeof(float*);
192	const vfloat<K> bminX = (const* float*)(ptr + nf.nearX);
193	const vfloat<K> bminY = (const* float*)(ptr + nf.nearY);
194	const vfloat<K> bminZ = (const* float*)(ptr + nf.nearZ);
195	const vfloat<K> bmaxX = (const* float*)(ptr + nf.farX);
196	const vfloat<K> bmaxY = (const* float*)(ptr + nf.farY);
197	const vfloat<K> bmaxZ = (const* float*)(ptr + nf.farZ);
198
199	const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
200	const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
201	const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
202	const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
203	const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
204	const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
205
206	const float round_up = `1.0f`+`3.0f`*float(ulp);
207	const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
208	const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
209
210	const vbool<K> vmask_first_hit = rmin <= rmax;
211
212	return movemask(vmask_first_hit);
213	}
214	}
215	}
216

Browse the source code of Godot/thirdparty/embree/kernels/bvh/node_intersector_packet_stream.h