1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "node_intersector.h"
7
8namespace embree
9{
10 namespace isa
11 {
12 //////////////////////////////////////////////////////////////////////////////////////
13 // Ray packet structure used in stream traversal
14 //////////////////////////////////////////////////////////////////////////////////////
15
16 template<int K, bool robust>
17 struct TravRayKStream;
18
19 /* Fast variant */
20 template<int K>
21 struct TravRayKStream<K, false>
22 {
23 __forceinline TravRayKStream() {}
24
25 __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
26 {
27 init(ray_org, ray_dir);
28 tnear = ray_tnear;
29 tfar = ray_tfar;
30 }
31
32 __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
33 {
34 rdir = rcp_safe(ray_dir);
35#if defined(__aarch64__)
36 neg_org_rdir = -(ray_org * rdir);
37#else
38 org_rdir = ray_org * rdir;
39#endif
40 }
41
42 Vec3vf<K> rdir;
43#if defined(__aarch64__)
44 Vec3vf<K> neg_org_rdir;
45#else
46 Vec3vf<K> org_rdir;
47#endif
48 vfloat<K> tnear;
49 vfloat<K> tfar;
50 };
51
52 template<int K>
53 using TravRayKStreamFast = TravRayKStream<K, false>;
54
55 /* Robust variant */
56 template<int K>
57 struct TravRayKStream<K, true>
58 {
59 __forceinline TravRayKStream() {}
60
61 __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
62 {
63 init(ray_org, ray_dir);
64 tnear = ray_tnear;
65 tfar = ray_tfar;
66 }
67
68 __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
69 {
70 rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
71 org = ray_org;
72 }
73
74 Vec3vf<K> rdir;
75 Vec3vf<K> org;
76 vfloat<K> tnear;
77 vfloat<K> tfar;
78 };
79
80 template<int K>
81 using TravRayKStreamRobust = TravRayKStream<K, true>;
82
83 //////////////////////////////////////////////////////////////////////////////////////
84 // Fast AABBNode intersection
85 //////////////////////////////////////////////////////////////////////////////////////
86
87 template<int N, int K>
88 __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
89 const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
90 {
91 const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
92 const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
93 const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
94 const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
95 const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
96 const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
97
98#if defined (__aarch64__)
99 const vfloat<N> rminX = madd(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
100 const vfloat<N> rminY = madd(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
101 const vfloat<N> rminZ = madd(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
102 const vfloat<N> rmaxX = madd(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.neg_org_rdir.x[k]));
103 const vfloat<N> rmaxY = madd(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.neg_org_rdir.y[k]));
104 const vfloat<N> rmaxZ = madd(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.neg_org_rdir.z[k]));
105#else
106 const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
107 const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
108 const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
109 const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
110 const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
111 const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
112#endif
113 const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
114 const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
115
116 const vbool<N> vmask_first_hit = rmin <= rmax;
117
118 return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
119 }
120
121 template<int N, int K>
122 __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
123 const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
124 {
125 char* ptr = (char*)&node->lower_x + i*sizeof(float);
126 const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
127 const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
128 const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
129 const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
130 const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
131 const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
132
133#if defined (__aarch64__)
134 const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x);
135 const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y);
136 const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z);
137 const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x);
138 const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y);
139 const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z);
140#else
141 const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
142 const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
143 const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
144 const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
145 const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
146 const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
147#endif
148
149 const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
150 const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
151
152 const vbool<K> vmask_first_hit = rmin <= rmax;
153
154 return movemask(vmask_first_hit);
155 }
156
157 //////////////////////////////////////////////////////////////////////////////////////
158 // Robust AABBNode intersection
159 //////////////////////////////////////////////////////////////////////////////////////
160
161 template<int N, int K>
162 __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
163 const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
164 {
165 const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
166 const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
167 const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
168 const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
169 const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
170 const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
171
172 const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
173 const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
174 const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
175 const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
176 const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
177 const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
178 const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
179 const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
180 const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
181
182 const vbool<N> vmask_first_hit = rmin <= rmax;
183
184 return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
185 }
186
187 template<int N, int K>
188 __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
189 const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
190 {
191 char *ptr = (char*)&node->lower_x + i*sizeof(float);
192 const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
193 const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
194 const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
195 const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
196 const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
197 const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
198
199 const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
200 const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
201 const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
202 const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
203 const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
204 const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
205
206 const float round_up = 1.0f+3.0f*float(ulp);
207 const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
208 const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
209
210 const vbool<K> vmask_first_hit = rmin <= rmax;
211
212 return movemask(vmask_first_hit);
213 }
214 }
215}
216