1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "node_intersector.h" |
7 | |
8 | namespace embree |
9 | { |
10 | namespace isa |
11 | { |
12 | ////////////////////////////////////////////////////////////////////////////////////// |
13 | // Ray packet structure used in stream traversal |
14 | ////////////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | template<int K, bool robust> |
17 | struct TravRayKStream; |
18 | |
19 | /* Fast variant */ |
20 | template<int K> |
21 | struct TravRayKStream<K, false> |
22 | { |
23 | __forceinline TravRayKStream() {} |
24 | |
25 | __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar) |
26 | { |
27 | init(ray_org, ray_dir); |
28 | tnear = ray_tnear; |
29 | tfar = ray_tfar; |
30 | } |
31 | |
32 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir) |
33 | { |
34 | rdir = rcp_safe(ray_dir); |
35 | #if defined(__aarch64__) |
36 | neg_org_rdir = -(ray_org * rdir); |
37 | #else |
38 | org_rdir = ray_org * rdir; |
39 | #endif |
40 | } |
41 | |
42 | Vec3vf<K> rdir; |
43 | #if defined(__aarch64__) |
44 | Vec3vf<K> neg_org_rdir; |
45 | #else |
46 | Vec3vf<K> org_rdir; |
47 | #endif |
48 | vfloat<K> tnear; |
49 | vfloat<K> tfar; |
50 | }; |
51 | |
52 | template<int K> |
53 | using TravRayKStreamFast = TravRayKStream<K, false>; |
54 | |
55 | /* Robust variant */ |
56 | template<int K> |
57 | struct TravRayKStream<K, true> |
58 | { |
59 | __forceinline TravRayKStream() {} |
60 | |
61 | __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar) |
62 | { |
63 | init(ray_org, ray_dir); |
64 | tnear = ray_tnear; |
65 | tfar = ray_tfar; |
66 | } |
67 | |
68 | __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir) |
69 | { |
70 | rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir)); |
71 | org = ray_org; |
72 | } |
73 | |
74 | Vec3vf<K> rdir; |
75 | Vec3vf<K> org; |
76 | vfloat<K> tnear; |
77 | vfloat<K> tfar; |
78 | }; |
79 | |
80 | template<int K> |
81 | using TravRayKStreamRobust = TravRayKStream<K, true>; |
82 | |
83 | ////////////////////////////////////////////////////////////////////////////////////// |
84 | // Fast AABBNode intersection |
85 | ////////////////////////////////////////////////////////////////////////////////////// |
86 | |
87 | template<int N, int K> |
88 | __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, |
89 | const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf) |
90 | { |
91 | const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); |
92 | const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); |
93 | const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); |
94 | const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); |
95 | const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); |
96 | const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); |
97 | |
98 | #if defined (__aarch64__) |
99 | const vfloat<N> rminX = madd(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k])); |
100 | const vfloat<N> rminY = madd(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k])); |
101 | const vfloat<N> rminZ = madd(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k])); |
102 | const vfloat<N> rmaxX = madd(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k])); |
103 | const vfloat<N> rmaxY = madd(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k])); |
104 | const vfloat<N> rmaxZ = madd(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k])); |
105 | #else |
106 | const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k])); |
107 | const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k])); |
108 | const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k])); |
109 | const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k])); |
110 | const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k])); |
111 | const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k])); |
112 | #endif |
113 | const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k])); |
114 | const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k])); |
115 | |
116 | const vbool<N> vmask_first_hit = rmin <= rmax; |
117 | |
118 | return movemask(vmask_first_hit) & (((size_t)1 << N)-1); |
119 | } |
120 | |
121 | template<int N, int K> |
122 | __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i, |
123 | const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf) |
124 | { |
125 | char* ptr = (char*)&node->lower_x + i*sizeof(float); |
126 | const vfloat<K> bminX = *(const float*)(ptr + nf.nearX); |
127 | const vfloat<K> bminY = *(const float*)(ptr + nf.nearY); |
128 | const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ); |
129 | const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX); |
130 | const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY); |
131 | const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ); |
132 | |
133 | #if defined (__aarch64__) |
134 | const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x); |
135 | const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y); |
136 | const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z); |
137 | const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x); |
138 | const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y); |
139 | const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z); |
140 | #else |
141 | const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x); |
142 | const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y); |
143 | const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z); |
144 | const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x); |
145 | const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y); |
146 | const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z); |
147 | #endif |
148 | |
149 | const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear); |
150 | const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar); |
151 | |
152 | const vbool<K> vmask_first_hit = rmin <= rmax; |
153 | |
154 | return movemask(vmask_first_hit); |
155 | } |
156 | |
157 | ////////////////////////////////////////////////////////////////////////////////////// |
158 | // Robust AABBNode intersection |
159 | ////////////////////////////////////////////////////////////////////////////////////// |
160 | |
161 | template<int N, int K> |
162 | __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, |
163 | const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf) |
164 | { |
165 | const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); |
166 | const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); |
167 | const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); |
168 | const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); |
169 | const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); |
170 | const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); |
171 | |
172 | const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]); |
173 | const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]); |
174 | const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]); |
175 | const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]); |
176 | const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]); |
177 | const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]); |
178 | const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 |
179 | const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k])); |
180 | const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k])); |
181 | |
182 | const vbool<N> vmask_first_hit = rmin <= rmax; |
183 | |
184 | return movemask(vmask_first_hit) & (((size_t)1 << N)-1); |
185 | } |
186 | |
187 | template<int N, int K> |
188 | __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i, |
189 | const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf) |
190 | { |
191 | char *ptr = (char*)&node->lower_x + i*sizeof(float); |
192 | const vfloat<K> bminX = *(const float*)(ptr + nf.nearX); |
193 | const vfloat<K> bminY = *(const float*)(ptr + nf.nearY); |
194 | const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ); |
195 | const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX); |
196 | const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY); |
197 | const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ); |
198 | |
199 | const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x; |
200 | const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y; |
201 | const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z; |
202 | const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x; |
203 | const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y; |
204 | const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z; |
205 | |
206 | const float round_up = 1.0f+3.0f*float(ulp); |
207 | const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear)); |
208 | const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar)); |
209 | |
210 | const vbool<K> vmask_first_hit = rmin <= rmax; |
211 | |
212 | return movemask(vmask_first_hit); |
213 | } |
214 | } |
215 | } |
216 | |