1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "priminfo.h"
7#include "../../common/algorithms/parallel_reduce.h"
8#include "../../common/algorithms/parallel_partition.h"
9
10namespace embree
11{
12 namespace isa
13 {
14 /*! mapping into bins */
15 template<size_t BINS>
16 struct BinMapping
17 {
18 public:
19 __forceinline BinMapping() {}
20
21 /*! calculates the mapping */
22 __forceinline BinMapping(size_t N, const BBox3fa& centBounds)
23 {
24 num = min(BINS,size_t(4.0f + 0.05f*N));
25 assert(num >= 1);
26 const vfloat4 eps = 1E-34f;
27 const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
28 scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
29 ofs = (vfloat4) centBounds.lower;
30 }
31
32 /*! calculates the mapping */
33 __forceinline BinMapping(const BBox3fa& centBounds)
34 {
35 num = BINS;
36 const vfloat4 eps = 1E-34f;
37 const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
38 scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
39 ofs = (vfloat4) centBounds.lower;
40 }
41
42 /*! calculates the mapping */
43 template<typename PrimInfo>
44 __forceinline BinMapping(const PrimInfo& pinfo)
45 {
46 const vfloat4 eps = 1E-34f;
47 num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
48 const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
49 scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
50 ofs = (vfloat4) pinfo.centBounds.lower;
51 }
52
53 /*! returns number of bins */
54 __forceinline size_t size() const { return num; }
55
56 /*! slower but safe binning */
57 __forceinline Vec3ia bin(const Vec3fa& p) const
58 {
59 const vint4 i = floori((vfloat4(p)-ofs)*scale);
60 assert(i[0] >= 0 && (size_t)i[0] < num);
61 assert(i[1] >= 0 && (size_t)i[1] < num);
62 assert(i[2] >= 0 && (size_t)i[2] < num);
63
64 // we clamp to handle corner cases that could calculate out of bounds bin
65 return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
66 }
67
68 /*! faster but unsafe binning */
69 __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
70 return Vec3ia(floori((vfloat4(p)-ofs)*scale));
71 }
72
73 /*! faster but unsafe binning */
74 template<typename PrimRef>
75 __forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
76 return bin_unsafe(p.binCenter());
77 }
78
79 /*! faster but unsafe binning */
80 template<typename PrimRef, typename BinBoundsAndCenter>
81 __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
82 return bin_unsafe(binBoundsAndCenter.binCenter(p));
83 }
84
85 template<typename PrimRef>
86 __forceinline bool bin_unsafe(const PrimRef& ref,
87 const vint4& vSplitPos,
88 const vbool4& splitDimMask) const // FIXME: rename to isLeft
89 {
90 return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
91 }
92 /*! calculates left spatial position of bin */
93 __forceinline float pos(const size_t bin, const size_t dim) const {
94 return madd(float(bin),1.0f / scale[dim],ofs[dim]);
95 }
96
97 /*! returns true if the mapping is invalid in some dimension */
98 __forceinline bool invalid(const size_t dim) const {
99 return scale[dim] == 0.0f;
100 }
101
102 /*! stream output */
103 friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
104 return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
105 }
106
107 public:
108 size_t num;
109 vfloat4 ofs,scale; //!< linear function that maps to bin ID
110 };
111
112 /*! stores all information to perform some split */
113 template<size_t BINS>
114 struct BinSplit
115 {
116 enum
117 {
118 SPLIT_OBJECT = 0,
119 SPLIT_FALLBACK = 1,
120 SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
121 SPLIT_TEMPORAL = 2,
122 SPLIT_GEOMID = 3,
123 };
124
125 /*! construct an invalid split by default */
126 __forceinline BinSplit()
127 : sah(inf), dim(-1), pos(0), data(0) {}
128
129 __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
130 : sah(sah), dim(dim), fpos(fpos), data(data) {}
131
132 /*! constructs specified split */
133 __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
134 : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
135
136 /*! tests if this split is valid */
137 __forceinline bool valid() const { return dim != -1; }
138
139 /*! calculates surface area heuristic for performing the split */
140 __forceinline float splitSAH() const { return sah; }
141
142 /*! stream output */
143 friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
144 return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
145 }
146
147 public:
148 float sah; //!< SAH cost of the split
149 int dim; //!< split dimension
150 union { int pos; float fpos; }; //!< bin index for splitting
151 unsigned int data; //!< extra optional split data
152 BinMapping<BINS> mapping; //!< mapping into bins
153 };
154
155 /*! stores extended information about the split */
156 template<typename BBox>
157 struct SplitInfoT
158 {
159
160 __forceinline SplitInfoT () {}
161
162 __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
163 : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
164
165 public:
166 size_t leftCount,rightCount;
167 BBox leftBounds,rightBounds;
168 };
169
170 typedef SplitInfoT<BBox3fa> SplitInfo;
171 typedef SplitInfoT<LBBox3fa> SplitInfo2;
172
173 /*! stores all binning information */
174 template<size_t BINS, typename PrimRef, typename BBox>
175 struct __aligned(64) BinInfoT
176 {
177 typedef BinSplit<BINS> Split;
178 typedef vbool4 vbool;
179 typedef vint4 vint;
180 typedef vfloat4 vfloat;
181
182 __forceinline BinInfoT() {
183 }
184
185 __forceinline BinInfoT(EmptyTy) {
186 clear();
187 }
188
189 /*! bin access function */
190 __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
191 __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
192
193 __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
194 __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
195
196 __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
197 __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
198
199 /*! clears the bin info */
200 __forceinline void clear()
201 {
202 for (size_t i=0; i<BINS; i++) {
203 bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
204 counts(i) = vuint4(zero);
205 }
206 }
207
208 /*! bins an array of primitives */
209 __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
210 {
211 if (unlikely(N == 0)) return;
212 size_t i;
213 for (i=0; i<N-1; i+=2)
214 {
215 /*! map even and odd primitive to bin */
216 BBox prim0; Vec3fa center0;
217 prims[i+0].binBoundsAndCenter(prim0,center0);
218 const vint4 bin0 = (vint4)mapping.bin(center0);
219
220 BBox prim1; Vec3fa center1;
221 prims[i+1].binBoundsAndCenter(prim1,center1);
222 const vint4 bin1 = (vint4)mapping.bin(center1);
223
224 /*! increase bounds for bins for even primitive */
225 const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
226 const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
227 const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
228 const unsigned int s0 = (unsigned int)prims[i+0].size();
229 counts(b00,0)+=s0;
230 counts(b01,1)+=s0;
231 counts(b02,2)+=s0;
232
233 /*! increase bounds of bins for odd primitive */
234 const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
235 const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
236 const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
237 const unsigned int s1 = (unsigned int)prims[i+1].size();
238 counts(b10,0)+=s1;
239 counts(b11,1)+=s1;
240 counts(b12,2)+=s1;
241 }
242 /*! for uneven number of primitives */
243 if (i < N)
244 {
245 /*! map primitive to bin */
246 BBox prim0; Vec3fa center0;
247 prims[i].binBoundsAndCenter(prim0,center0);
248 const vint4 bin0 = (vint4)mapping.bin(center0);
249
250 /*! increase bounds of bins */
251 const unsigned int s0 = (unsigned int)prims[i].size();
252 const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
253 const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
254 const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
255 }
256 }
257
258 /*! bins an array of primitives */
259 template<typename BinBoundsAndCenter>
260 __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
261 {
262 if (N == 0) return;
263
264 size_t i;
265 for (i=0; i<N-1; i+=2)
266 {
267 /*! map even and odd primitive to bin */
268 BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
269 const vint4 bin0 = (vint4)mapping.bin(center0);
270 BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
271 const vint4 bin1 = (vint4)mapping.bin(center1);
272
273 /*! increase bounds for bins for even primitive */
274 const unsigned int s0 = prims[i+0].size();
275 const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
276 const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
277 const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
278
279 /*! increase bounds of bins for odd primitive */
280 const unsigned int s1 = prims[i+1].size();
281 const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
282 const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
283 const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
284 }
285
286 /*! for uneven number of primitives */
287 if (i < N)
288 {
289 /*! map primitive to bin */
290 BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
291 const vint4 bin0 = (vint4)mapping.bin(center0);
292
293 /*! increase bounds of bins */
294 const unsigned int s0 = prims[i+0].size();
295 const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
296 const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
297 const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
298 }
299 }
300
301 __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
302 bin(prims+begin,end-begin,mapping);
303 }
304
305 template<typename BinBoundsAndCenter>
306 __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
307 bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
308 }
309
310 /*! merges in other binning information */
311 __forceinline void merge (const BinInfoT& other, size_t numBins)
312 {
313
314 for (size_t i=0; i<numBins; i++)
315 {
316 counts(i) += other.counts(i);
317 bounds(i,0).extend(other.bounds(i,0));
318 bounds(i,1).extend(other.bounds(i,1));
319 bounds(i,2).extend(other.bounds(i,2));
320 }
321 }
322
323 /*! reduces binning information */
324 static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
325 {
326 BinInfoT c;
327 for (size_t i=0; i<numBins; i++)
328 {
329 c.counts(i) = a.counts(i)+b.counts(i);
330 c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
331 c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
332 c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
333 }
334 return c;
335 }
336
337 /*! finds the best split by scanning binning information */
338 __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
339 {
340 /* sweep from right to left and compute parallel prefix of merged bounds */
341 vfloat4 rAreas[BINS];
342 vuint4 rCounts[BINS];
343 vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
344 for (size_t i=mapping.size()-1; i>0; i--)
345 {
346 count += counts(i);
347 rCounts[i] = count;
348 bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
349 by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
350 bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
351 rAreas[i][3] = 0.0f;
352 }
353 /* sweep from left to right and compute SAH */
354 vuint4 blocks_add = (1 << blocks_shift)-1;
355 vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
356 count = 0; bx = empty; by = empty; bz = empty;
357 for (size_t i=1; i<mapping.size(); i++, ii+=1)
358 {
359 count += counts(i-1);
360 bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
361 by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
362 bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
363 const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
364 const vfloat4 rArea = rAreas[i];
365 const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
366 const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
367 const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
368 //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
369
370 vbestPos = select(sah < vbestSAH,ii ,vbestPos);
371 vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
372 }
373
374 /* find best dimension */
375 float bestSAH = inf;
376 int bestDim = -1;
377 int bestPos = 0;
378 for (int dim=0; dim<3; dim++)
379 {
380 /* ignore zero sized dimensions */
381 if (unlikely(mapping.invalid(dim)))
382 continue;
383
384 /* test if this is a better dimension */
385 if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
386 bestDim = dim;
387 bestPos = vbestPos[dim];
388 bestSAH = vbestSAH[dim];
389 }
390 }
391 return Split(bestSAH,bestDim,bestPos,mapping);
392 }
393
394 /*! calculates extended split information */
395 __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
396 {
397 if (split.dim == -1) {
398 new (&info) SplitInfoT<BBox>(0,empty,0,empty);
399 return;
400 }
401
402 size_t leftCount = 0;
403 BBox leftBounds = empty;
404 for (size_t i=0; i<(size_t)split.pos; i++) {
405 leftCount += counts(i,split.dim);
406 leftBounds.extend(bounds(i,split.dim));
407 }
408 size_t rightCount = 0;
409 BBox rightBounds = empty;
410 for (size_t i=split.pos; i<mapping.size(); i++) {
411 rightCount += counts(i,split.dim);
412 rightBounds.extend(bounds(i,split.dim));
413 }
414 new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
415 }
416
417 /*! gets the number of primitives left of the split */
418 __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
419 {
420 if (unlikely(split.dim == -1)) return -1;
421
422 size_t leftCount = 0;
423 for (size_t i = 0; i < (size_t)split.pos; i++) {
424 leftCount += counts(i, split.dim);
425 }
426 return leftCount;
427 }
428
429 /*! gets the number of primitives right of the split */
430 __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
431 {
432 if (unlikely(split.dim == -1)) return -1;
433
434 size_t rightCount = 0;
435 for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
436 rightCount += counts(i, split.dim);
437 }
438 return rightCount;
439 }
440
441 private:
442 BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
443 vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
444 };
445 }
446
447 template<typename BinInfoT, typename BinMapping, typename PrimRef>
448 __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
449 {
450 if (likely(end-begin < parallelThreshold)) {
451 binner.bin(prims,begin,end,mapping);
452 } else {
453 binner = parallel_reduce(begin,end,blockSize,binner,
454 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
455 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
456 }
457 }
458
459 template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
460 __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
461 {
462 if (likely(end-begin < parallelThreshold)) {
463 binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
464 } else {
465 binner = parallel_reduce(begin,end,blockSize,binner,
466 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
467 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
468 }
469 }
470
471 template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
472 __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
473 {
474 if (!parallel) {
475 binner.bin(prims,begin,end,mapping);
476 } else {
477 binner = parallel_reduce(begin,end,blockSize,binner,
478 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
479 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
480 }
481 }
482
483 template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
484 __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
485 {
486 if (!parallel) {
487 binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
488 } else {
489 binner = parallel_reduce(begin,end,blockSize,binner,
490 [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
491 [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
492 }
493 }
494}
495