1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "node_intersector.h"
7
8namespace embree
9{
10 namespace isa
11 {
12 //////////////////////////////////////////////////////////////////////////////////////
13 // Frustum structure used in hybrid and stream traversal
14 //////////////////////////////////////////////////////////////////////////////////////
15
16 /*
17 Optimized frustum test. We calculate t=(p-org)/dir in ray/box
18 intersection. We assume the rays are split by octant, thus
19 dir intervals are either positive or negative in each
20 dimension.
21
22 Case 1: dir.min >= 0 && dir.max >= 0:
23 t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
24 t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
25
26 Case 2: dir.min < 0 && dir.max < 0:
27 t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
28 t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
29 */
30
31 template<bool robust>
32 struct Frustum;
33
34 /* Fast variant */
35 template<>
36 struct Frustum<false>
37 {
38 __forceinline Frustum() {}
39
40 template<int K>
41 __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
42 {
43 const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
44 reduce_min(select(valid, org.y, pos_inf)),
45 reduce_min(select(valid, org.z, pos_inf)));
46
47 const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
48 reduce_max(select(valid, org.y, neg_inf)),
49 reduce_max(select(valid, org.z, neg_inf)));
50
51 const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
52 reduce_min(select(valid, rdir.y, pos_inf)),
53 reduce_min(select(valid, rdir.z, pos_inf)));
54
55 const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
56 reduce_max(select(valid, rdir.y, neg_inf)),
57 reduce_max(select(valid, rdir.z, neg_inf)));
58
59 const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
60 const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
61
62 init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
63 }
64
65 __forceinline void init(const Vec3fa& reduced_min_org,
66 const Vec3fa& reduced_max_org,
67 const Vec3fa& reduced_min_rdir,
68 const Vec3fa& reduced_max_rdir,
69 float reduced_min_dist,
70 float reduced_max_dist,
71 int N)
72 {
73 const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
74
75 min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
76 max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
77
78#if defined (__aarch64__)
79 neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
80 neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
81#else
82 min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
83 max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
84#endif
85 min_dist = reduced_min_dist;
86 max_dist = reduced_max_dist;
87
88 nf = NearFarPrecalculations(min_rdir, N);
89 }
90
91 template<int K>
92 __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
93 {
94 max_dist = reduce_max(ray_tfar);
95 }
96
97 NearFarPrecalculations nf;
98
99 Vec3fa min_rdir;
100 Vec3fa max_rdir;
101
102#if defined (__aarch64__)
103 Vec3fa neg_min_org_rdir;
104 Vec3fa neg_max_org_rdir;
105#else
106 Vec3fa min_org_rdir;
107 Vec3fa max_org_rdir;
108#endif
109 float min_dist;
110 float max_dist;
111 };
112
113 typedef Frustum<false> FrustumFast;
114
115 /* Robust variant */
116 template<>
117 struct Frustum<true>
118 {
119 __forceinline Frustum() {}
120
121 template<int K>
122 __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
123 {
124 const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
125 reduce_min(select(valid, org.y, pos_inf)),
126 reduce_min(select(valid, org.z, pos_inf)));
127
128 const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
129 reduce_max(select(valid, org.y, neg_inf)),
130 reduce_max(select(valid, org.z, neg_inf)));
131
132 const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
133 reduce_min(select(valid, rdir.y, pos_inf)),
134 reduce_min(select(valid, rdir.z, pos_inf)));
135
136 const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
137 reduce_max(select(valid, rdir.y, neg_inf)),
138 reduce_max(select(valid, rdir.z, neg_inf)));
139
140 const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
141 const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
142
143 init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
144 }
145
146 __forceinline void init(const Vec3fa& reduced_min_org,
147 const Vec3fa& reduced_max_org,
148 const Vec3fa& reduced_min_rdir,
149 const Vec3fa& reduced_max_rdir,
150 float reduced_min_dist,
151 float reduced_max_dist,
152 int N)
153 {
154 const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
155 min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
156 max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
157
158 min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
159 max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
160
161 min_dist = reduced_min_dist;
162 max_dist = reduced_max_dist;
163
164 nf = NearFarPrecalculations(min_rdir, N);
165 }
166
167 template<int K>
168 __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
169 {
170 max_dist = reduce_max(ray_tfar);
171 }
172
173 NearFarPrecalculations nf;
174
175 Vec3fa min_rdir;
176 Vec3fa max_rdir;
177
178 Vec3fa min_org;
179 Vec3fa max_org;
180
181 float min_dist;
182 float max_dist;
183 };
184
185 typedef Frustum<true> FrustumRobust;
186
187 //////////////////////////////////////////////////////////////////////////////////////
188 // Fast AABBNode intersection
189 //////////////////////////////////////////////////////////////////////////////////////
190
191 template<int N>
192 __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
193 const FrustumFast& frustum, vfloat<N>& dist)
194 {
195 const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
196 const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
197 const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
198 const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
199 const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
200 const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
201
202#if defined (__aarch64__)
203 const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
204 const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
205 const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
206 const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
207 const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
208 const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
209#else
210 const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
211 const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
212 const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
213 const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
214 const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
215 const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
216#endif
217 const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
218 dist = fmin;
219 const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
220 const vbool<N> vmask_node_hit = fmin <= fmax;
221 size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
222 return m_node;
223 }
224
225 //////////////////////////////////////////////////////////////////////////////////////
226 // Robust AABBNode intersection
227 //////////////////////////////////////////////////////////////////////////////////////
228
229 template<int N>
230 __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
231 const FrustumRobust& frustum, vfloat<N>& dist)
232 {
233 const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
234 const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
235 const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
236 const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
237 const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
238 const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
239
240 const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
241 const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
242 const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
243 const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
244 const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
245 const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
246
247 const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
248 const float round_up = 1.0f+2.0f*float(ulp);
249 const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
250 dist = fmin;
251 const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
252 const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
253 size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
254 return m_node;
255 }
256 }
257}
258