1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../common/scene.h"
7#include "priminfo.h"
8
9namespace embree
10{
11 static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
12
13 namespace isa
14 {
15
16 /*! mapping into bins */
17 template<size_t BINS>
18 struct SpatialBinMapping
19 {
20 public:
21 __forceinline SpatialBinMapping() {}
22
23 /*! calculates the mapping */
24 __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
25 {
26 const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
27 const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
28 const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
29 const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
30 scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
31 ofs = (vfloat4) pinfo.geomBounds.lower;
32 inv_scale = 1.0f / scale;
33 }
34
35 /*! slower but safe binning */
36 __forceinline vint4 bin(const Vec3fa& p) const
37 {
38 const vint4 i = floori((vfloat4(p)-ofs)*scale);
39 return clamp(i,vint4(0),vint4(BINS-1));
40 }
41
42 __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
43 {
44#if defined(__AVX__)
45 const vfloat8 ofs8(ofs);
46 const vfloat8 scale8(scale);
47 const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
48 const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
49 return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
50#else
51 const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
52 const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
53 const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
54 const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
55 return std::pair<vint4,vint4>(c_lower,c_upper);
56#endif
57 }
58
59
60 /*! calculates left spatial position of bin */
61 __forceinline float pos(const size_t bin, const size_t dim) const {
62 return madd(float(bin),inv_scale[dim],ofs[dim]);
63 }
64
65 /*! calculates left spatial position of bin */
66 template<size_t N>
67 __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
68 return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
69 }
70
71 /*! returns true if the mapping is invalid in some dimension */
72 __forceinline bool invalid(const size_t dim) const {
73 return scale[dim] == 0.0f;
74 }
75
76 public:
77 vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
78 };
79
80 /*! stores all information required to perform some split */
81 template<size_t BINS>
82 struct SpatialBinSplit
83 {
84 /*! construct an invalid split by default */
85 __forceinline SpatialBinSplit()
86 : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
87
88 /*! constructs specified split */
89 __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
90 : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
91
92 /*! constructs specified split */
93 __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
94 : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
95
96 /*! tests if this split is valid */
97 __forceinline bool valid() const { return dim != -1; }
98
99 /*! calculates surface area heuristic for performing the split */
100 __forceinline float splitSAH() const { return sah; }
101
102 /*! stream output */
103 friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
104 return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
105 }
106
107 public:
108 float sah; //!< SAH cost of the split
109 int dim; //!< split dimension
110 int pos; //!< split position
111 int left; //!< number of elements on the left side
112 int right; //!< number of elements on the right side
113 float factor; //!< factor splitting the extended range
114 SpatialBinMapping<BINS> mapping; //!< mapping into bins
115 };
116
117 /*! stores all binning information */
118 template<size_t BINS, typename PrimRef>
119 struct __aligned(64) SpatialBinInfo
120 {
121 SpatialBinInfo() {
122 }
123
124 __forceinline SpatialBinInfo(EmptyTy) {
125 clear();
126 }
127
128 /*! clears the bin info */
129 __forceinline void clear()
130 {
131 for (size_t i=0; i<BINS; i++) {
132 bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
133 numBegin[i] = numEnd[i] = 0;
134 }
135 }
136
137 /*! adds binning data */
138 __forceinline void add(const size_t dim,
139 const size_t beginID,
140 const size_t endID,
141 const size_t binID,
142 const BBox3fa &b,
143 const size_t n = 1)
144 {
145 assert(beginID < BINS);
146 assert(endID < BINS);
147 assert(binID < BINS);
148
149 numBegin[beginID][dim]+=(unsigned int)n;
150 numEnd [endID][dim]+=(unsigned int)n;
151 bounds [binID][dim].extend(b);
152 }
153
154 /*! extends binning bounds */
155 __forceinline void extend(const size_t dim,
156 const size_t binID,
157 const BBox3fa &b)
158 {
159 assert(binID < BINS);
160 bounds [binID][dim].extend(b);
161 }
162
163 /*! bins an array of primitives */
164 template<typename PrimitiveSplitterFactory>
165 __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
166 {
167 for (size_t i=begin; i<end; i++)
168 {
169 const PrimRef& prim = source[i];
170 unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
171
172 if (unlikely(splits <= 1))
173 {
174 const vint4 bin = mapping.bin(center(prim.bounds()));
175 for (size_t dim=0; dim<3; dim++)
176 {
177 assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
178 add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
179 }
180 }
181 else
182 {
183 const vint4 bin0 = mapping.bin(prim.bounds().lower);
184 const vint4 bin1 = mapping.bin(prim.bounds().upper);
185
186 for (size_t dim=0; dim<3; dim++)
187 {
188 if (unlikely(mapping.invalid(dim)))
189 continue;
190
191 size_t bin;
192 size_t l = bin0[dim];
193 size_t r = bin1[dim];
194
195 // same bin optimization
196 if (likely(l == r))
197 {
198 add(dim,l,l,l,prim.bounds());
199 continue;
200 }
201 size_t bin_start = bin0[dim];
202 size_t bin_end = bin1[dim];
203 BBox3fa rest = prim.bounds();
204
205 /* assure that split position always overlaps the primitive bounds */
206 while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
207 while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--;
208
209 const auto splitter = splitterFactory(prim);
210 for (bin=bin_start; bin<bin_end; bin++)
211 {
212 const float pos = mapping.pos(bin+1,dim);
213 BBox3fa left,right;
214 splitter(rest,dim,pos,left,right);
215
216 if (unlikely(left.empty())) l++;
217 extend(dim,bin,left);
218 rest = right;
219 }
220 if (unlikely(rest.empty())) r--;
221 add(dim,l,r,bin,rest);
222 }
223 }
224 }
225 }
226
227
228 /*! bins an array of primitives */
229 __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
230 {
231 for (size_t i=begin; i<end; i++)
232 {
233 const PrimRef &prim = source[i];
234 const vint4 bin0 = mapping.bin(prim.bounds().lower);
235 const vint4 bin1 = mapping.bin(prim.bounds().upper);
236
237 for (size_t dim=0; dim<3; dim++)
238 {
239 if (unlikely(mapping.invalid(dim)))
240 continue;
241
242 const size_t l = bin0[dim];
243 const size_t r = bin1[dim];
244
245 const unsigned int n = prim.primID();
246
247 // same bin optimization
248 if (likely(l == r))
249 {
250 add(dim,l,l,l,prim.bounds(),n);
251 continue;
252 }
253 const size_t bin_start = bin0[dim];
254 const size_t bin_end = bin1[dim];
255 for (size_t bin=bin_start; bin<bin_end; bin++)
256 add(dim,l,r,bin,prim.bounds(),n);
257 }
258 }
259 }
260
261 /*! merges in other binning information */
262 void merge (const SpatialBinInfo& other)
263 {
264 for (size_t i=0; i<BINS; i++)
265 {
266 numBegin[i] += other.numBegin[i];
267 numEnd [i] += other.numEnd [i];
268 bounds[i][0].extend(other.bounds[i][0]);
269 bounds[i][1].extend(other.bounds[i][1]);
270 bounds[i][2].extend(other.bounds[i][2]);
271 }
272 }
273
274 /*! merges in other binning information */
275 static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
276 {
277 SpatialBinInfo c(empty);
278 for (size_t i=0; i<BINS; i++)
279 {
280 c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
281 c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
282 c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
283 c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
284 c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
285 }
286 return c;
287 }
288
289 /*! finds the best split by scanning binning information */
290 SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
291 {
292 /* sweep from right to left and compute parallel prefix of merged bounds */
293 vfloat4 rAreas[BINS];
294 vuint4 rCounts[BINS];
295 vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
296 for (size_t i=BINS-1; i>0; i--)
297 {
298 count += numEnd[i];
299 rCounts[i] = count;
300 bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
301 by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
302 bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
303 rAreas[i][3] = 0.0f;
304 }
305
306 /* sweep from left to right and compute SAH */
307 vuint4 blocks_add = (1 << blocks_shift)-1;
308 vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
309 count = 0; bx = empty; by = empty; bz = empty;
310 for (size_t i=1; i<BINS; i++, ii+=1)
311 {
312 count += numBegin[i-1];
313 bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
314 by.extend(bounds[i-1][1]); float Ay = halfArea(by);
315 bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
316 const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
317 const vfloat4 rArea = rAreas[i];
318 const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
319 const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
320 const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
321 // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
322 const vbool4 mask = sah < vbestSAH;
323 vbestPos = select(mask,ii ,vbestPos);
324 vbestSAH = select(mask,sah,vbestSAH);
325 vbestlCount = select(mask,count,vbestlCount);
326 vbestrCount = select(mask,rCounts[i],vbestrCount);
327 }
328
329 /* find best dimension */
330 float bestSAH = inf;
331 int bestDim = -1;
332 int bestPos = 0;
333 unsigned int bestlCount = 0;
334 unsigned int bestrCount = 0;
335 for (int dim=0; dim<3; dim++)
336 {
337 /* ignore zero sized dimensions */
338 if (unlikely(mapping.invalid(dim)))
339 continue;
340
341 /* test if this is a better dimension */
342 if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
343 bestDim = dim;
344 bestPos = vbestPos[dim];
345 bestSAH = vbestSAH[dim];
346 bestlCount = vbestlCount[dim];
347 bestrCount = vbestrCount[dim];
348 }
349 }
350 assert(bestSAH >= 0.0f);
351
352 /* return invalid split if no split found */
353 if (bestDim == -1)
354 return SpatialBinSplit<BINS>(inf,-1,0,mapping);
355
356 /* return best found split */
357 return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
358 }
359
360 private:
361 BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
362 vuint4 numBegin[BINS]; //!< number of primitives starting in bin
363 vuint4 numEnd[BINS]; //!< number of primitives ending in bin
364 };
365 }
366}
367
368