1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | ////////////////////////////////////////////////////////////////////////////////////// |
13 | // Frustum structure used in hybrid and stream traversal |
14 | ////////////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | /* |
17 | Optimized frustum test. We calculate t=(p-org)/dir in ray/box |
18 | intersection. We assume the rays are split by octant, thus |
19 | dir intervals are either positive or negative in each |
20 | dimension. |
21 | |
22 | Case 1: dir.min >= 0 && dir.max >= 0: |
23 | t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min |
24 | t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max |
25 | |
26 | Case 2: dir.min < 0 && dir.max < 0: |
27 | t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max |
28 | t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min |
29 | */ |
30 | |
31 | template<bool robust> |
32 | struct Frustum; |
33 | |
34 | /* Fast variant */ |
35 | template<> |
36 | struct Frustum<false> |
37 | { |
38 | __forceinline Frustum() {} |
39 | |
40 | template<int K> |
41 | __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
42 | { |
43 | const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), |
44 | reduce_min(select(valid, org.y, pos_inf)), |
45 | reduce_min(select(valid, org.z, pos_inf))); |
46 | |
47 | const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), |
48 | reduce_max(select(valid, org.y, neg_inf)), |
49 | reduce_max(select(valid, org.z, neg_inf))); |
50 | |
51 | const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), |
52 | reduce_min(select(valid, rdir.y, pos_inf)), |
53 | reduce_min(select(valid, rdir.z, pos_inf))); |
54 | |
55 | const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), |
56 | reduce_max(select(valid, rdir.y, neg_inf)), |
57 | reduce_max(select(valid, rdir.z, neg_inf))); |
58 | |
59 | const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf))); |
60 | const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf))); |
61 | |
62 | init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); |
63 | } |
64 | |
65 | __forceinline void init(const Vec3fa& reduced_min_org, |
66 | const Vec3fa& reduced_max_org, |
67 | const Vec3fa& reduced_min_rdir, |
68 | const Vec3fa& reduced_max_rdir, |
69 | float reduced_min_dist, |
70 | float reduced_max_dist, |
71 | int N) |
72 | { |
73 | const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); |
74 | |
75 | min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); |
76 | max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); |
77 | |
78 | #if defined (__aarch64__) |
79 | neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org)); |
80 | neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org)); |
81 | #else |
82 | min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org); |
83 | max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org); |
84 | #endif |
85 | min_dist = reduced_min_dist; |
86 | max_dist = reduced_max_dist; |
87 | |
88 | nf = NearFarPrecalculations(min_rdir, N); |
89 | } |
90 | |
91 | template<int K> |
92 | __forceinline void updateMaxDist(const vfloat<K>& ray_tfar) |
93 | { |
94 | max_dist = reduce_max(ray_tfar); |
95 | } |
96 | |
97 | NearFarPrecalculations nf; |
98 | |
99 | Vec3fa min_rdir; |
100 | Vec3fa max_rdir; |
101 | |
102 | #if defined (__aarch64__) |
103 | Vec3fa neg_min_org_rdir; |
104 | Vec3fa neg_max_org_rdir; |
105 | #else |
106 | Vec3fa min_org_rdir; |
107 | Vec3fa max_org_rdir; |
108 | #endif |
109 | float min_dist; |
110 | float max_dist; |
111 | }; |
112 | |
113 | typedef Frustum<false> FrustumFast; |
114 | |
115 | /* Robust variant */ |
116 | template<> |
117 | struct Frustum<true> |
118 | { |
119 | __forceinline Frustum() {} |
120 | |
121 | template<int K> |
122 | __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) |
123 | { |
124 | const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), |
125 | reduce_min(select(valid, org.y, pos_inf)), |
126 | reduce_min(select(valid, org.z, pos_inf))); |
127 | |
128 | const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)), |
129 | reduce_max(select(valid, org.y, neg_inf)), |
130 | reduce_max(select(valid, org.z, neg_inf))); |
131 | |
132 | const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)), |
133 | reduce_min(select(valid, rdir.y, pos_inf)), |
134 | reduce_min(select(valid, rdir.z, pos_inf))); |
135 | |
136 | const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)), |
137 | reduce_max(select(valid, rdir.y, neg_inf)), |
138 | reduce_max(select(valid, rdir.z, neg_inf))); |
139 | |
140 | const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf))); |
141 | const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf))); |
142 | |
143 | init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N); |
144 | } |
145 | |
146 | __forceinline void init(const Vec3fa& reduced_min_org, |
147 | const Vec3fa& reduced_max_org, |
148 | const Vec3fa& reduced_min_rdir, |
149 | const Vec3fa& reduced_max_rdir, |
150 | float reduced_min_dist, |
151 | float reduced_max_dist, |
152 | int N) |
153 | { |
154 | const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero)); |
155 | min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); |
156 | max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); |
157 | |
158 | min_org = select(pos_rdir, reduced_max_org, reduced_min_org); |
159 | max_org = select(pos_rdir, reduced_min_org, reduced_max_org); |
160 | |
161 | min_dist = reduced_min_dist; |
162 | max_dist = reduced_max_dist; |
163 | |
164 | nf = NearFarPrecalculations(min_rdir, N); |
165 | } |
166 | |
167 | template<int K> |
168 | __forceinline void updateMaxDist(const vfloat<K>& ray_tfar) |
169 | { |
170 | max_dist = reduce_max(ray_tfar); |
171 | } |
172 | |
173 | NearFarPrecalculations nf; |
174 | |
175 | Vec3fa min_rdir; |
176 | Vec3fa max_rdir; |
177 | |
178 | Vec3fa min_org; |
179 | Vec3fa max_org; |
180 | |
181 | float min_dist; |
182 | float max_dist; |
183 | }; |
184 | |
185 | typedef Frustum<true> FrustumRobust; |
186 | |
187 | ////////////////////////////////////////////////////////////////////////////////////// |
188 | // Fast AABBNode intersection |
189 | ////////////////////////////////////////////////////////////////////////////////////// |
190 | |
191 | template<int N> |
192 | __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, |
193 | const FrustumFast& frustum, vfloat<N>& dist) |
194 | { |
195 | const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); |
196 | const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); |
197 | const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); |
198 | const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); |
199 | const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); |
200 | const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); |
201 | |
202 | #if defined (__aarch64__) |
203 | const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x)); |
204 | const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y)); |
205 | const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z)); |
206 | const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x)); |
207 | const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y)); |
208 | const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z)); |
209 | #else |
210 | const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x)); |
211 | const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y)); |
212 | const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z)); |
213 | const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x)); |
214 | const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y)); |
215 | const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z)); |
216 | #endif |
217 | const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist)); |
218 | dist = fmin; |
219 | const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist)); |
220 | const vbool<N> vmask_node_hit = fmin <= fmax; |
221 | size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); |
222 | return m_node; |
223 | } |
224 | |
225 | ////////////////////////////////////////////////////////////////////////////////////// |
226 | // Robust AABBNode intersection |
227 | ////////////////////////////////////////////////////////////////////////////////////// |
228 | |
229 | template<int N> |
230 | __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, |
231 | const FrustumRobust& frustum, vfloat<N>& dist) |
232 | { |
233 | const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); |
234 | const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); |
235 | const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); |
236 | const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); |
237 | const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); |
238 | const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); |
239 | |
240 | const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x); |
241 | const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y); |
242 | const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z); |
243 | const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x); |
244 | const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y); |
245 | const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z); |
246 | |
247 | const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 |
248 | const float round_up = 1.0f+2.0f*float(ulp); |
249 | const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist)); |
250 | dist = fmin; |
251 | const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist)); |
252 | const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax); |
253 | size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); |
254 | return m_node; |
255 | } |
256 | } |
257 | } |
258 | |