| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "priminfo.h" |
| 7 | #include "../../common/algorithms/parallel_reduce.h" |
| 8 | #include "../../common/algorithms/parallel_partition.h" |
| 9 | |
| 10 | namespace embree |
| 11 | { |
| 12 | namespace isa |
| 13 | { |
| 14 | /*! mapping into bins */ |
| 15 | template<size_t BINS> |
| 16 | struct BinMapping |
| 17 | { |
| 18 | public: |
| 19 | __forceinline BinMapping() {} |
| 20 | |
| 21 | /*! calculates the mapping */ |
| 22 | __forceinline BinMapping(size_t N, const BBox3fa& centBounds) |
| 23 | { |
| 24 | num = min(BINS,size_t(4.0f + 0.05f*N)); |
| 25 | assert(num >= 1); |
| 26 | const vfloat4 eps = 1E-34f; |
| 27 | const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); |
| 28 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
| 29 | ofs = (vfloat4) centBounds.lower; |
| 30 | } |
| 31 | |
| 32 | /*! calculates the mapping */ |
| 33 | __forceinline BinMapping(const BBox3fa& centBounds) |
| 34 | { |
| 35 | num = BINS; |
| 36 | const vfloat4 eps = 1E-34f; |
| 37 | const vfloat4 diag = max(eps, (vfloat4) centBounds.size()); |
| 38 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
| 39 | ofs = (vfloat4) centBounds.lower; |
| 40 | } |
| 41 | |
| 42 | /*! calculates the mapping */ |
| 43 | template<typename PrimInfo> |
| 44 | __forceinline BinMapping(const PrimInfo& pinfo) |
| 45 | { |
| 46 | const vfloat4 eps = 1E-34f; |
| 47 | num = min(BINS,size_t(4.0f + 0.05f*pinfo.size())); |
| 48 | const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); |
| 49 | scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); |
| 50 | ofs = (vfloat4) pinfo.centBounds.lower; |
| 51 | } |
| 52 | |
| 53 | /*! returns number of bins */ |
| 54 | __forceinline size_t size() const { return num; } |
| 55 | |
| 56 | /*! slower but safe binning */ |
| 57 | __forceinline Vec3ia bin(const Vec3fa& p) const |
| 58 | { |
| 59 | const vint4 i = floori((vfloat4(p)-ofs)*scale); |
| 60 | assert(i[0] >= 0 && (size_t)i[0] < num); |
| 61 | assert(i[1] >= 0 && (size_t)i[1] < num); |
| 62 | assert(i[2] >= 0 && (size_t)i[2] < num); |
| 63 | |
| 64 | // we clamp to handle corner cases that could calculate out of bounds bin |
| 65 | return Vec3ia(clamp(i,vint4(0),vint4(num-1))); |
| 66 | } |
| 67 | |
| 68 | /*! faster but unsafe binning */ |
| 69 | __forceinline Vec3ia bin_unsafe(const Vec3fa& p) const { |
| 70 | return Vec3ia(floori((vfloat4(p)-ofs)*scale)); |
| 71 | } |
| 72 | |
| 73 | /*! faster but unsafe binning */ |
| 74 | template<typename PrimRef> |
| 75 | __forceinline Vec3ia bin_unsafe(const PrimRef& p) const { |
| 76 | return bin_unsafe(p.binCenter()); |
| 77 | } |
| 78 | |
| 79 | /*! faster but unsafe binning */ |
| 80 | template<typename PrimRef, typename BinBoundsAndCenter> |
| 81 | __forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const { |
| 82 | return bin_unsafe(binBoundsAndCenter.binCenter(p)); |
| 83 | } |
| 84 | |
| 85 | template<typename PrimRef> |
| 86 | __forceinline bool bin_unsafe(const PrimRef& ref, |
| 87 | const vint4& vSplitPos, |
| 88 | const vbool4& splitDimMask) const // FIXME: rename to isLeft |
| 89 | { |
| 90 | return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask); |
| 91 | } |
| 92 | /*! calculates left spatial position of bin */ |
| 93 | __forceinline float pos(const size_t bin, const size_t dim) const { |
| 94 | return madd(float(bin),1.0f / scale[dim],ofs[dim]); |
| 95 | } |
| 96 | |
| 97 | /*! returns true if the mapping is invalid in some dimension */ |
| 98 | __forceinline bool invalid(const size_t dim) const { |
| 99 | return scale[dim] == 0.0f; |
| 100 | } |
| 101 | |
| 102 | /*! stream output */ |
| 103 | friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) { |
| 104 | return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}" ; |
| 105 | } |
| 106 | |
| 107 | public: |
| 108 | size_t num; |
| 109 | vfloat4 ofs,scale; //!< linear function that maps to bin ID |
| 110 | }; |
| 111 | |
| 112 | /*! stores all information to perform some split */ |
| 113 | template<size_t BINS> |
| 114 | struct BinSplit |
| 115 | { |
| 116 | enum |
| 117 | { |
| 118 | SPLIT_OBJECT = 0, |
| 119 | SPLIT_FALLBACK = 1, |
| 120 | SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already |
| 121 | SPLIT_TEMPORAL = 2, |
| 122 | SPLIT_GEOMID = 3, |
| 123 | }; |
| 124 | |
| 125 | /*! construct an invalid split by default */ |
| 126 | __forceinline BinSplit() |
| 127 | : sah(inf), dim(-1), pos(0), data(0) {} |
| 128 | |
| 129 | __forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0) |
| 130 | : sah(sah), dim(dim), fpos(fpos), data(data) {} |
| 131 | |
| 132 | /*! constructs specified split */ |
| 133 | __forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping) |
| 134 | : sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {} |
| 135 | |
| 136 | /*! tests if this split is valid */ |
| 137 | __forceinline bool valid() const { return dim != -1; } |
| 138 | |
| 139 | /*! calculates surface area heuristic for performing the split */ |
| 140 | __forceinline float splitSAH() const { return sah; } |
| 141 | |
| 142 | /*! stream output */ |
| 143 | friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) { |
| 144 | return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}" ; |
| 145 | } |
| 146 | |
| 147 | public: |
| 148 | float sah; //!< SAH cost of the split |
| 149 | int dim; //!< split dimension |
| 150 | union { int pos; float fpos; }; //!< bin index for splitting |
| 151 | unsigned int data; //!< extra optional split data |
| 152 | BinMapping<BINS> mapping; //!< mapping into bins |
| 153 | }; |
| 154 | |
| 155 | /*! stores extended information about the split */ |
| 156 | template<typename BBox> |
| 157 | struct SplitInfoT |
| 158 | { |
| 159 | |
| 160 | __forceinline SplitInfoT () {} |
| 161 | |
| 162 | __forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds) |
| 163 | : leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {} |
| 164 | |
| 165 | public: |
| 166 | size_t leftCount,rightCount; |
| 167 | BBox leftBounds,rightBounds; |
| 168 | }; |
| 169 | |
| 170 | typedef SplitInfoT<BBox3fa> SplitInfo; |
| 171 | typedef SplitInfoT<LBBox3fa> SplitInfo2; |
| 172 | |
| 173 | /*! stores all binning information */ |
| 174 | template<size_t BINS, typename PrimRef, typename BBox> |
| 175 | struct __aligned(64) BinInfoT |
| 176 | { |
| 177 | typedef BinSplit<BINS> Split; |
| 178 | typedef vbool4 vbool; |
| 179 | typedef vint4 vint; |
| 180 | typedef vfloat4 vfloat; |
| 181 | |
| 182 | __forceinline BinInfoT() { |
| 183 | } |
| 184 | |
| 185 | __forceinline BinInfoT(EmptyTy) { |
| 186 | clear(); |
| 187 | } |
| 188 | |
| 189 | /*! bin access function */ |
| 190 | __forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; } |
| 191 | __forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; } |
| 192 | |
| 193 | __forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; } |
| 194 | __forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; } |
| 195 | |
| 196 | __forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; } |
| 197 | __forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; } |
| 198 | |
| 199 | /*! clears the bin info */ |
| 200 | __forceinline void clear() |
| 201 | { |
| 202 | for (size_t i=0; i<BINS; i++) { |
| 203 | bounds(i,0) = bounds(i,1) = bounds(i,2) = empty; |
| 204 | counts(i) = vuint4(zero); |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | /*! bins an array of primitives */ |
| 209 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping) |
| 210 | { |
| 211 | if (unlikely(N == 0)) return; |
| 212 | size_t i; |
| 213 | for (i=0; i<N-1; i+=2) |
| 214 | { |
| 215 | /*! map even and odd primitive to bin */ |
| 216 | BBox prim0; Vec3fa center0; |
| 217 | prims[i+0].binBoundsAndCenter(prim0,center0); |
| 218 | const vint4 bin0 = (vint4)mapping.bin(center0); |
| 219 | |
| 220 | BBox prim1; Vec3fa center1; |
| 221 | prims[i+1].binBoundsAndCenter(prim1,center1); |
| 222 | const vint4 bin1 = (vint4)mapping.bin(center1); |
| 223 | |
| 224 | /*! increase bounds for bins for even primitive */ |
| 225 | const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0); |
| 226 | const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0); |
| 227 | const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0); |
| 228 | const unsigned int s0 = (unsigned int)prims[i+0].size(); |
| 229 | counts(b00,0)+=s0; |
| 230 | counts(b01,1)+=s0; |
| 231 | counts(b02,2)+=s0; |
| 232 | |
| 233 | /*! increase bounds of bins for odd primitive */ |
| 234 | const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1); |
| 235 | const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1); |
| 236 | const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1); |
| 237 | const unsigned int s1 = (unsigned int)prims[i+1].size(); |
| 238 | counts(b10,0)+=s1; |
| 239 | counts(b11,1)+=s1; |
| 240 | counts(b12,2)+=s1; |
| 241 | } |
| 242 | /*! for uneven number of primitives */ |
| 243 | if (i < N) |
| 244 | { |
| 245 | /*! map primitive to bin */ |
| 246 | BBox prim0; Vec3fa center0; |
| 247 | prims[i].binBoundsAndCenter(prim0,center0); |
| 248 | const vint4 bin0 = (vint4)mapping.bin(center0); |
| 249 | |
| 250 | /*! increase bounds of bins */ |
| 251 | const unsigned int s0 = (unsigned int)prims[i].size(); |
| 252 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
| 253 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
| 254 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | /*! bins an array of primitives */ |
| 259 | template<typename BinBoundsAndCenter> |
| 260 | __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
| 261 | { |
| 262 | if (N == 0) return; |
| 263 | |
| 264 | size_t i; |
| 265 | for (i=0; i<N-1; i+=2) |
| 266 | { |
| 267 | /*! map even and odd primitive to bin */ |
| 268 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
| 269 | const vint4 bin0 = (vint4)mapping.bin(center0); |
| 270 | BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1); |
| 271 | const vint4 bin1 = (vint4)mapping.bin(center1); |
| 272 | |
| 273 | /*! increase bounds for bins for even primitive */ |
| 274 | const unsigned int s0 = prims[i+0].size(); |
| 275 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
| 276 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
| 277 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
| 278 | |
| 279 | /*! increase bounds of bins for odd primitive */ |
| 280 | const unsigned int s1 = prims[i+1].size(); |
| 281 | const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1); |
| 282 | const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1); |
| 283 | const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1); |
| 284 | } |
| 285 | |
| 286 | /*! for uneven number of primitives */ |
| 287 | if (i < N) |
| 288 | { |
| 289 | /*! map primitive to bin */ |
| 290 | BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0); |
| 291 | const vint4 bin0 = (vint4)mapping.bin(center0); |
| 292 | |
| 293 | /*! increase bounds of bins */ |
| 294 | const unsigned int s0 = prims[i+0].size(); |
| 295 | const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0); |
| 296 | const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0); |
| 297 | const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0); |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) { |
| 302 | bin(prims+begin,end-begin,mapping); |
| 303 | } |
| 304 | |
| 305 | template<typename BinBoundsAndCenter> |
| 306 | __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) { |
| 307 | bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter); |
| 308 | } |
| 309 | |
| 310 | /*! merges in other binning information */ |
| 311 | __forceinline void merge (const BinInfoT& other, size_t numBins) |
| 312 | { |
| 313 | |
| 314 | for (size_t i=0; i<numBins; i++) |
| 315 | { |
| 316 | counts(i) += other.counts(i); |
| 317 | bounds(i,0).extend(other.bounds(i,0)); |
| 318 | bounds(i,1).extend(other.bounds(i,1)); |
| 319 | bounds(i,2).extend(other.bounds(i,2)); |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | /*! reduces binning information */ |
| 324 | static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS) |
| 325 | { |
| 326 | BinInfoT c; |
| 327 | for (size_t i=0; i<numBins; i++) |
| 328 | { |
| 329 | c.counts(i) = a.counts(i)+b.counts(i); |
| 330 | c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0)); |
| 331 | c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1)); |
| 332 | c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2)); |
| 333 | } |
| 334 | return c; |
| 335 | } |
| 336 | |
| 337 | /*! finds the best split by scanning binning information */ |
| 338 | __forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const |
| 339 | { |
| 340 | /* sweep from right to left and compute parallel prefix of merged bounds */ |
| 341 | vfloat4 rAreas[BINS]; |
| 342 | vuint4 rCounts[BINS]; |
| 343 | vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty; |
| 344 | for (size_t i=mapping.size()-1; i>0; i--) |
| 345 | { |
| 346 | count += counts(i); |
| 347 | rCounts[i] = count; |
| 348 | bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx); |
| 349 | by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by); |
| 350 | bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz); |
| 351 | rAreas[i][3] = 0.0f; |
| 352 | } |
| 353 | /* sweep from left to right and compute SAH */ |
| 354 | vuint4 blocks_add = (1 << blocks_shift)-1; |
| 355 | vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; |
| 356 | count = 0; bx = empty; by = empty; bz = empty; |
| 357 | for (size_t i=1; i<mapping.size(); i++, ii+=1) |
| 358 | { |
| 359 | count += counts(i-1); |
| 360 | bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx); |
| 361 | by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by); |
| 362 | bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz); |
| 363 | const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); |
| 364 | const vfloat4 rArea = rAreas[i]; |
| 365 | const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions. |
| 366 | const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); |
| 367 | const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); |
| 368 | //const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); |
| 369 | |
| 370 | vbestPos = select(sah < vbestSAH,ii ,vbestPos); |
| 371 | vbestSAH = select(sah < vbestSAH,sah,vbestSAH); |
| 372 | } |
| 373 | |
| 374 | /* find best dimension */ |
| 375 | float bestSAH = inf; |
| 376 | int bestDim = -1; |
| 377 | int bestPos = 0; |
| 378 | for (int dim=0; dim<3; dim++) |
| 379 | { |
| 380 | /* ignore zero sized dimensions */ |
| 381 | if (unlikely(mapping.invalid(dim))) |
| 382 | continue; |
| 383 | |
| 384 | /* test if this is a better dimension */ |
| 385 | if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { |
| 386 | bestDim = dim; |
| 387 | bestPos = vbestPos[dim]; |
| 388 | bestSAH = vbestSAH[dim]; |
| 389 | } |
| 390 | } |
| 391 | return Split(bestSAH,bestDim,bestPos,mapping); |
| 392 | } |
| 393 | |
| 394 | /*! calculates extended split information */ |
| 395 | __forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const |
| 396 | { |
| 397 | if (split.dim == -1) { |
| 398 | new (&info) SplitInfoT<BBox>(0,empty,0,empty); |
| 399 | return; |
| 400 | } |
| 401 | |
| 402 | size_t leftCount = 0; |
| 403 | BBox leftBounds = empty; |
| 404 | for (size_t i=0; i<(size_t)split.pos; i++) { |
| 405 | leftCount += counts(i,split.dim); |
| 406 | leftBounds.extend(bounds(i,split.dim)); |
| 407 | } |
| 408 | size_t rightCount = 0; |
| 409 | BBox rightBounds = empty; |
| 410 | for (size_t i=split.pos; i<mapping.size(); i++) { |
| 411 | rightCount += counts(i,split.dim); |
| 412 | rightBounds.extend(bounds(i,split.dim)); |
| 413 | } |
| 414 | new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds); |
| 415 | } |
| 416 | |
| 417 | /*! gets the number of primitives left of the split */ |
| 418 | __forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const |
| 419 | { |
| 420 | if (unlikely(split.dim == -1)) return -1; |
| 421 | |
| 422 | size_t leftCount = 0; |
| 423 | for (size_t i = 0; i < (size_t)split.pos; i++) { |
| 424 | leftCount += counts(i, split.dim); |
| 425 | } |
| 426 | return leftCount; |
| 427 | } |
| 428 | |
| 429 | /*! gets the number of primitives right of the split */ |
| 430 | __forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const |
| 431 | { |
| 432 | if (unlikely(split.dim == -1)) return -1; |
| 433 | |
| 434 | size_t rightCount = 0; |
| 435 | for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { |
| 436 | rightCount += counts(i, split.dim); |
| 437 | } |
| 438 | return rightCount; |
| 439 | } |
| 440 | |
| 441 | private: |
| 442 | BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension |
| 443 | vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins |
| 444 | }; |
| 445 | } |
| 446 | |
| 447 | template<typename BinInfoT, typename BinMapping, typename PrimRef> |
| 448 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping) |
| 449 | { |
| 450 | if (likely(end-begin < parallelThreshold)) { |
| 451 | binner.bin(prims,begin,end,mapping); |
| 452 | } else { |
| 453 | binner = parallel_reduce(begin,end,blockSize,binner, |
| 454 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
| 455 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
| 460 | __forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
| 461 | { |
| 462 | if (likely(end-begin < parallelThreshold)) { |
| 463 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
| 464 | } else { |
| 465 | binner = parallel_reduce(begin,end,blockSize,binner, |
| 466 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
| 467 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
| 468 | } |
| 469 | } |
| 470 | |
| 471 | template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef> |
| 472 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping) |
| 473 | { |
| 474 | if (!parallel) { |
| 475 | binner.bin(prims,begin,end,mapping); |
| 476 | } else { |
| 477 | binner = parallel_reduce(begin,end,blockSize,binner, |
| 478 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; }, |
| 479 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef> |
| 484 | __forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter) |
| 485 | { |
| 486 | if (!parallel) { |
| 487 | binner.bin(prims,begin,end,mapping,binBoundsAndCenter); |
| 488 | } else { |
| 489 | binner = parallel_reduce(begin,end,blockSize,binner, |
| 490 | [&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; }, |
| 491 | [&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; }); |
| 492 | } |
| 493 | } |
| 494 | } |
| 495 | |