1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "priminfo.h" |
7 | #include "../../common/algorithms/parallel_reduce.h" |
8 | #include "../../common/algorithms/parallel_partition.h" |
9 | |
10 | namespace embree |
11 | { |
12 | namespace isa |
13 | { |
14 | /*! mapping into bins */ |
15 | template<size_t BINS> |
16 | struct BinMapping |
17 | { |
18 | public: |
19 | __forceinline BinMapping() {} |
20 | |
21 | /*! calculates the mapping */ |
22 | __forceinline BinMapping(size_t N, const BBox3fa& centBounds) |
23 | { |
24 | num = min(BINS,size_t(4.0f + 0.05f*N)); |
25 | assert(num >= 1); |
26 | const vfloat4 eps = 1E-34f; |
27 | const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); |
28 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
29 | ofs = (vfloat4) centBounds.lower; |
30 | } |
31 | |
32 | /*! calculates the mapping */ |
33 | __forceinline BinMapping(const BBox3fa& centBounds) |
34 | { |
35 | num = BINS; |
36 | const vfloat4 eps = 1E-34f; |
37 | const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); |
38 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
39 | ofs = (vfloat4) centBounds.lower; |
40 | } |
41 | |
42 | /*! calculates the mapping */ |
43 | template<typename PrimInfo> |
44 | __forceinline BinMapping(const PrimInfo& pinfo) |
45 | { |
46 | const vfloat4 eps = 1E-34f; |
47 | num = min(BINS,size_t(4.0f + 0.05f*pinfo.size())); |
48 | const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); |
49 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
50 | ofs = (vfloat4) pinfo.centBounds.lower; |
51 | } |
52 | |
53 | /*! returns number of bins */ |
54 | __forceinline size_t size() const { return num; } |
55 | |
56 | /*! slower but safe binning */ |
57 | __forceinline Vec3ia bin(const Vec3fa& p) const |
58 | { |
59 | const vint4 i = floori((vfloat4(p)-ofs)*scale); |
60 | assert(i[0] >= 0 && (size_t)i[0] < num); |
61 | assert(i[1] >= 0 && (size_t)i[1] < num); |
62 | assert(i[2] >= 0 && (size_t)i[2] < num); |
63 | |
64 | // we clamp to handle corner cases that could calculate out of bounds bin |
65 | return Vec3ia(clamp(i,vint4(0),vint4(num-1))); |
66 | } |
67 | |
68 | /*! faster but unsafe binning */ |
69 | __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { |
70 | return Vec3ia(floori((vfloat4(p)-ofs)*scale)); |
71 | } |
72 | |
73 | /*! faster but unsafe binning */ |
74 | template<typename PrimRef> |
75 | __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { |
76 | return bin_unsafe(p.binCenter()); |
77 | } |
78 | |
79 | /*! faster but unsafe binning */ |
80 | template<typename PrimRef, typename BinBoundsAndCenter> |
81 | __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { |
82 | return bin_unsafe(binBoundsAndCenter.binCenter(p)); |
83 | } |
84 | |
85 | template<typename PrimRef> |
86 | __forceinline bool bin_unsafe(const PrimRef& ref, |
87 | const vint4& vSplitPos, |
88 | const vbool4& splitDimMask) const // FIXME: rename to isLeft |
89 | { |
90 | return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); |
91 | } |
92 | /*! calculates left spatial position of bin */ |
93 | __forceinline float pos(const size_t bin, const size_t dim) const { |
94 | return madd(float(bin),1.0f / scale[dim],ofs[dim]); |
95 | } |
96 | |
97 | /*! returns true if the mapping is invalid in some dimension */ |
98 | __forceinline bool invalid(const size_t dim) const { |
99 | return scale[dim] == 0.0f; |
100 | } |
101 | |
102 | /*! stream output */ |
103 | friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { |
104 | return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}" ; |
105 | } |
106 | |
107 | public: |
108 | size_t num; |
109 | vfloat4 ofs,scale; //!< linear function that maps to bin ID |
110 | }; |
111 | |
112 | /*! stores all information to perform some split */ |
113 | template<size_t BINS> |
114 | struct BinSplit |
115 | { |
116 | enum |
117 | { |
118 | SPLIT_OBJECT = 0, |
119 | SPLIT_FALLBACK = 1, |
120 | SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already |
121 | SPLIT_TEMPORAL = 2, |
122 | SPLIT_GEOMID = 3, |
123 | }; |
124 | |
125 | /*! construct an invalid split by default */ |
126 | __forceinline BinSplit() |
127 | : sah(inf), dim(-1), pos(0), data(0) {} |
128 | |
129 | __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) |
130 | : sah(sah), dim(dim), fpos(fpos), data(data) {} |
131 | |
132 | /*! constructs specified split */ |
133 | __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping) |
134 | : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} |
135 | |
136 | /*! tests if this split is valid */ |
137 | __forceinline bool valid() const { return dim != -1; } |
138 | |
139 | /*! calculates surface area heuristic for performing the split */ |
140 | __forceinline float splitSAH() const { return sah; } |
141 | |
142 | /*! stream output */ |
143 | friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { |
144 | return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}" ; |
145 | } |
146 | |
147 | public: |
148 | float sah; //!< SAH cost of the split |
149 | int dim; //!< split dimension |
150 | union { int pos; float fpos; }; //!< bin index for splitting |
151 | unsigned int data; //!< extra optional split data |
152 | BinMapping<BINS> mapping; //!< mapping into bins |
153 | }; |
154 | |
155 | /*! stores extended information about the split */ |
156 | template<typename BBox> |
157 | struct SplitInfoT |
158 | { |
159 | |
160 | __forceinline SplitInfoT () {} |
161 | |
162 | __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) |
163 | : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} |
164 | |
165 | public: |
166 | size_t leftCount,rightCount; |
167 | BBox leftBounds,rightBounds; |
168 | }; |
169 | |
170 | typedef SplitInfoT<BBox3fa> SplitInfo; |
171 | typedef SplitInfoT<LBBox3fa> SplitInfo2; |
172 | |
173 | /*! stores all binning information */ |
174 | template<size_t BINS, typename PrimRef, typename BBox> |
175 | struct __aligned(64) BinInfoT |
176 | { |
177 | typedef BinSplit<BINS> Split; |
178 | typedef vbool4 vbool; |
179 | typedef vint4 vint; |
180 | typedef vfloat4 vfloat; |
181 | |
182 | __forceinline BinInfoT() { |
183 | } |
184 | |
185 | __forceinline BinInfoT(EmptyTy) { |
186 | clear(); |
187 | } |
188 | |
189 | /*! bin access function */ |
190 | __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; } |
191 | __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } |
192 | |
193 | __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; } |
194 | __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } |
195 | |
196 | __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; } |
197 | __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } |
198 | |
199 | /*! clears the bin info */ |
200 | __forceinline void clear() |
201 | { |
202 | for (size_t i=0; i<BINS; i++) { |
203 | bounds(i,0) = bounds(i,1) = bounds(i,2) = empty; |
204 | counts(i) = vuint4(zero); |
205 | } |
206 | } |
207 | |
208 | /*! bins an array of primitives */ |
209 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping) |
210 | { |
211 | if (unlikely(N == 0)) return; |
212 | size_t i; |
213 | for (i=0; i<N-1; i+=2) |
214 | { |
215 | /*! map even and odd primitive to bin */ |
216 | BBox prim0; Vec3fa center0; |
217 | prims[i+0].binBoundsAndCenter(prim0,center0); |
218 | const vint4 bin0 = (vint4)mapping.bin(center0); |
219 | |
220 | BBox prim1; Vec3fa center1; |
221 | prims[i+1].binBoundsAndCenter(prim1,center1); |
222 | const vint4 bin1 = (vint4)mapping.bin(center1); |
223 | |
224 | /*! increase bounds for bins for even primitive */ |
225 | const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0); |
226 | const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0); |
227 | const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0); |
228 | const unsigned int s0 = (unsigned int)prims[i+0].size(); |
229 | counts(b00,0)+=s0; |
230 | counts(b01,1)+=s0; |
231 | counts(b02,2)+=s0; |
232 | |
233 | /*! increase bounds of bins for odd primitive */ |
234 | const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1); |
235 | const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1); |
236 | const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1); |
237 | const unsigned int s1 = (unsigned int)prims[i+1].size(); |
238 | counts(b10,0)+=s1; |
239 | counts(b11,1)+=s1; |
240 | counts(b12,2)+=s1; |
241 | } |
242 | /*! for uneven number of primitives */ |
243 | if (i < N) |
244 | { |
245 | /*! map primitive to bin */ |
246 | BBox prim0; Vec3fa center0; |
247 | prims[i].binBoundsAndCenter(prim0,center0); |
248 | const vint4 bin0 = (vint4)mapping.bin(center0); |
249 | |
250 | /*! increase bounds of bins */ |
251 | const unsigned int s0 = (unsigned int)prims[i].size(); |
252 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
253 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
254 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
255 | } |
256 | } |
257 | |
258 | /*! bins an array of primitives */ |
259 | template<typename BinBoundsAndCenter> |
260 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
261 | { |
262 | if (N == 0) return; |
263 | |
264 | size_t i; |
265 | for (i=0; i<N-1; i+=2) |
266 | { |
267 | /*! map even and odd primitive to bin */ |
268 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
269 | const vint4 bin0 = (vint4)mapping.bin(center0); |
270 | BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1); |
271 | const vint4 bin1 = (vint4)mapping.bin(center1); |
272 | |
273 | /*! increase bounds for bins for even primitive */ |
274 | const unsigned int s0 = prims[i+0].size(); |
275 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
276 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
277 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
278 | |
279 | /*! increase bounds of bins for odd primitive */ |
280 | const unsigned int s1 = prims[i+1].size(); |
281 | const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); |
282 | const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); |
283 | const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); |
284 | } |
285 | |
286 | /*! for uneven number of primitives */ |
287 | if (i < N) |
288 | { |
289 | /*! map primitive to bin */ |
290 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
291 | const vint4 bin0 = (vint4)mapping.bin(center0); |
292 | |
293 | /*! increase bounds of bins */ |
294 | const unsigned int s0 = prims[i+0].size(); |
295 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
296 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
297 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
298 | } |
299 | } |
300 | |
301 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) { |
302 | bin(prims+begin,end-begin,mapping); |
303 | } |
304 | |
305 | template<typename BinBoundsAndCenter> |
306 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { |
307 | bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter); |
308 | } |
309 | |
310 | /*! merges in other binning information */ |
311 | __forceinline void merge (const BinInfoT& other, size_t numBins) |
312 | { |
313 | |
314 | for (size_t i=0; i<numBins; i++) |
315 | { |
316 | counts(i) += other.counts(i); |
317 | bounds(i,0).extend(other.bounds(i,0)); |
318 | bounds(i,1).extend(other.bounds(i,1)); |
319 | bounds(i,2).extend(other.bounds(i,2)); |
320 | } |
321 | } |
322 | |
323 | /*! reduces binning information */ |
324 | static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS) |
325 | { |
326 | BinInfoT c; |
327 | for (size_t i=0; i<numBins; i++) |
328 | { |
329 | c.counts(i) = a.counts(i)+b.counts(i); |
330 | c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0)); |
331 | c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1)); |
332 | c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2)); |
333 | } |
334 | return c; |
335 | } |
336 | |
337 | /*! finds the best split by scanning binning information */ |
338 | __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const |
339 | { |
340 | /* sweep from right to left and compute parallel prefix of merged bounds */ |
341 | vfloat4 rAreas[BINS]; |
342 | vuint4 rCounts[BINS]; |
343 | vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; |
344 | for (size_t i=mapping.size()-1; i>0; i--) |
345 | { |
346 | count += counts(i); |
347 | rCounts[i] = count; |
348 | bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); |
349 | by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); |
350 | bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); |
351 | rAreas[i][3] = 0.0f; |
352 | } |
353 | /* sweep from left to right and compute SAH */ |
354 | vuint4 blocks_add = (1 << blocks_shift)-1; |
355 | vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; |
356 | count = 0; bx = empty; by = empty; bz = empty; |
357 | for (size_t i=1; i<mapping.size(); i++, ii+=1) |
358 | { |
359 | count += counts(i-1); |
360 | bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx); |
361 | by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by); |
362 | bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz); |
363 | const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); |
364 | const vfloat4 rArea = rAreas[i]; |
365 | const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. |
366 | const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); |
367 | const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); |
368 | //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); |
369 | |
370 | vbestPos = select(sah < vbestSAH,ii ,vbestPos); |
371 | vbestSAH = select(sah < vbestSAH,sah,vbestSAH); |
372 | } |
373 | |
374 | /* find best dimension */ |
375 | float bestSAH = inf; |
376 | int bestDim = -1; |
377 | int bestPos = 0; |
378 | for (int dim=0; dim<3; dim++) |
379 | { |
380 | /* ignore zero sized dimensions */ |
381 | if (unlikely(mapping.invalid(dim))) |
382 | continue; |
383 | |
384 | /* test if this is a better dimension */ |
385 | if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { |
386 | bestDim = dim; |
387 | bestPos = vbestPos[dim]; |
388 | bestSAH = vbestSAH[dim]; |
389 | } |
390 | } |
391 | return Split(bestSAH,bestDim,bestPos,mapping); |
392 | } |
393 | |
394 | /*! calculates extended split information */ |
395 | __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const |
396 | { |
397 | if (split.dim == -1) { |
398 | new (&info) SplitInfoT<BBox>(0,empty,0,empty); |
399 | return; |
400 | } |
401 | |
402 | size_t leftCount = 0; |
403 | BBox leftBounds = empty; |
404 | for (size_t i=0; i<(size_t)split.pos; i++) { |
405 | leftCount += counts(i,split.dim); |
406 | leftBounds.extend(bounds(i,split.dim)); |
407 | } |
408 | size_t rightCount = 0; |
409 | BBox rightBounds = empty; |
410 | for (size_t i=split.pos; i<mapping.size(); i++) { |
411 | rightCount += counts(i,split.dim); |
412 | rightBounds.extend(bounds(i,split.dim)); |
413 | } |
414 | new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds); |
415 | } |
416 | |
417 | /*! gets the number of primitives left of the split */ |
418 | __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const |
419 | { |
420 | if (unlikely(split.dim == -1)) return -1; |
421 | |
422 | size_t leftCount = 0; |
423 | for (size_t i = 0; i < (size_t)split.pos; i++) { |
424 | leftCount += counts(i, split.dim); |
425 | } |
426 | return leftCount; |
427 | } |
428 | |
429 | /*! gets the number of primitives right of the split */ |
430 | __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const |
431 | { |
432 | if (unlikely(split.dim == -1)) return -1; |
433 | |
434 | size_t rightCount = 0; |
435 | for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { |
436 | rightCount += counts(i, split.dim); |
437 | } |
438 | return rightCount; |
439 | } |
440 | |
441 | private: |
442 | BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension |
443 | vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins |
444 | }; |
445 | } |
446 | |
447 | template<typename BinInfoT, typename BinMapping, typename PrimRef> |
448 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) |
449 | { |
450 | if (likely(end-begin < parallelThreshold)) { |
451 | binner.bin(prims,begin,end,mapping); |
452 | } else { |
453 | binner = parallel_reduce(begin,end,blockSize,binner, |
454 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
455 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
456 | } |
457 | } |
458 | |
459 | template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
460 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
461 | { |
462 | if (likely(end-begin < parallelThreshold)) { |
463 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
464 | } else { |
465 | binner = parallel_reduce(begin,end,blockSize,binner, |
466 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
467 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
468 | } |
469 | } |
470 | |
471 | template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef> |
472 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) |
473 | { |
474 | if (!parallel) { |
475 | binner.bin(prims,begin,end,mapping); |
476 | } else { |
477 | binner = parallel_reduce(begin,end,blockSize,binner, |
478 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
479 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
480 | } |
481 | } |
482 | |
483 | template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
484 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
485 | { |
486 | if (!parallel) { |
487 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
488 | } else { |
489 | binner = parallel_reduce(begin,end,blockSize,binner, |
490 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
491 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
492 | } |
493 | } |
494 | } |
495 | |