| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../common/scene.h" |
| 7 | #include "priminfo.h" |
| 8 | |
| 9 | namespace embree |
| 10 | { |
| 11 | static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; |
| 12 | |
| 13 | namespace isa |
| 14 | { |
| 15 | |
| 16 | /*! mapping into bins */ |
| 17 | template<size_t BINS> |
| 18 | struct SpatialBinMapping |
| 19 | { |
| 20 | public: |
| 21 | __forceinline SpatialBinMapping() {} |
| 22 | |
| 23 | /*! calculates the mapping */ |
| 24 | __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) |
| 25 | { |
| 26 | const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; |
| 27 | const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; |
| 28 | const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper)); |
| 29 | const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size()); |
| 30 | scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag); |
| 31 | ofs = (vfloat4) pinfo.geomBounds.lower; |
| 32 | inv_scale = 1.0f / scale; |
| 33 | } |
| 34 | |
| 35 | /*! slower but safe binning */ |
| 36 | __forceinline vint4 bin(const Vec3fa& p) const |
| 37 | { |
| 38 | const vint4 i = floori((vfloat4(p)-ofs)*scale); |
| 39 | return clamp(i,vint4(0),vint4(BINS-1)); |
| 40 | } |
| 41 | |
| 42 | __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const |
| 43 | { |
| 44 | #if defined(__AVX__) |
| 45 | const vfloat8 ofs8(ofs); |
| 46 | const vfloat8 scale8(scale); |
| 47 | const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8); |
| 48 | const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1)); |
| 49 | return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu)); |
| 50 | #else |
| 51 | const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale); |
| 52 | const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale); |
| 53 | const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1)); |
| 54 | const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1)); |
| 55 | return std::pair<vint4,vint4>(c_lower,c_upper); |
| 56 | #endif |
| 57 | } |
| 58 | |
| 59 | |
| 60 | /*! calculates left spatial position of bin */ |
| 61 | __forceinline float pos(const size_t bin, const size_t dim) const { |
| 62 | return madd(float(bin),inv_scale[dim],ofs[dim]); |
| 63 | } |
| 64 | |
| 65 | /*! calculates left spatial position of bin */ |
| 66 | template<size_t N> |
| 67 | __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const { |
| 68 | return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim])); |
| 69 | } |
| 70 | |
| 71 | /*! returns true if the mapping is invalid in some dimension */ |
| 72 | __forceinline bool invalid(const size_t dim) const { |
| 73 | return scale[dim] == 0.0f; |
| 74 | } |
| 75 | |
| 76 | public: |
| 77 | vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID |
| 78 | }; |
| 79 | |
| 80 | /*! stores all information required to perform some split */ |
| 81 | template<size_t BINS> |
| 82 | struct SpatialBinSplit |
| 83 | { |
| 84 | /*! construct an invalid split by default */ |
| 85 | __forceinline SpatialBinSplit() |
| 86 | : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} |
| 87 | |
| 88 | /*! constructs specified split */ |
| 89 | __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping) |
| 90 | : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} |
| 91 | |
| 92 | /*! constructs specified split */ |
| 93 | __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping) |
| 94 | : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} |
| 95 | |
| 96 | /*! tests if this split is valid */ |
| 97 | __forceinline bool valid() const { return dim != -1; } |
| 98 | |
| 99 | /*! calculates surface area heuristic for performing the split */ |
| 100 | __forceinline float splitSAH() const { return sah; } |
| 101 | |
| 102 | /*! stream output */ |
| 103 | friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { |
| 104 | return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}" ; |
| 105 | } |
| 106 | |
| 107 | public: |
| 108 | float sah; //!< SAH cost of the split |
| 109 | int dim; //!< split dimension |
| 110 | int pos; //!< split position |
| 111 | int left; //!< number of elements on the left side |
| 112 | int right; //!< number of elements on the right side |
| 113 | float factor; //!< factor splitting the extended range |
| 114 | SpatialBinMapping<BINS> mapping; //!< mapping into bins |
| 115 | }; |
| 116 | |
| 117 | /*! stores all binning information */ |
| 118 | template<size_t BINS, typename PrimRef> |
| 119 | struct __aligned(64) SpatialBinInfo |
| 120 | { |
| 121 | SpatialBinInfo() { |
| 122 | } |
| 123 | |
| 124 | __forceinline SpatialBinInfo(EmptyTy) { |
| 125 | clear(); |
| 126 | } |
| 127 | |
| 128 | /*! clears the bin info */ |
| 129 | __forceinline void clear() |
| 130 | { |
| 131 | for (size_t i=0; i<BINS; i++) { |
| 132 | bounds[i][0] = bounds[i][1] = bounds[i][2] = empty; |
| 133 | numBegin[i] = numEnd[i] = 0; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | /*! adds binning data */ |
| 138 | __forceinline void add(const size_t dim, |
| 139 | const size_t beginID, |
| 140 | const size_t endID, |
| 141 | const size_t binID, |
| 142 | const BBox3fa &b, |
| 143 | const size_t n = 1) |
| 144 | { |
| 145 | assert(beginID < BINS); |
| 146 | assert(endID < BINS); |
| 147 | assert(binID < BINS); |
| 148 | |
| 149 | numBegin[beginID][dim]+=(unsigned int)n; |
| 150 | numEnd [endID][dim]+=(unsigned int)n; |
| 151 | bounds [binID][dim].extend(b); |
| 152 | } |
| 153 | |
| 154 | /*! extends binning bounds */ |
| 155 | __forceinline void extend(const size_t dim, |
| 156 | const size_t binID, |
| 157 | const BBox3fa &b) |
| 158 | { |
| 159 | assert(binID < BINS); |
| 160 | bounds [binID][dim].extend(b); |
| 161 | } |
| 162 | |
| 163 | /*! bins an array of primitives */ |
| 164 | template<typename PrimitiveSplitterFactory> |
| 165 | __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) |
| 166 | { |
| 167 | for (size_t i=begin; i<end; i++) |
| 168 | { |
| 169 | const PrimRef& prim = source[i]; |
| 170 | unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); |
| 171 | |
| 172 | if (unlikely(splits <= 1)) |
| 173 | { |
| 174 | const vint4 bin = mapping.bin(center(prim.bounds())); |
| 175 | for (size_t dim=0; dim<3; dim++) |
| 176 | { |
| 177 | assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); |
| 178 | add(dim,bin[dim],bin[dim],bin[dim],prim.bounds()); |
| 179 | } |
| 180 | } |
| 181 | else |
| 182 | { |
| 183 | const vint4 bin0 = mapping.bin(prim.bounds().lower); |
| 184 | const vint4 bin1 = mapping.bin(prim.bounds().upper); |
| 185 | |
| 186 | for (size_t dim=0; dim<3; dim++) |
| 187 | { |
| 188 | if (unlikely(mapping.invalid(dim))) |
| 189 | continue; |
| 190 | |
| 191 | size_t bin; |
| 192 | size_t l = bin0[dim]; |
| 193 | size_t r = bin1[dim]; |
| 194 | |
| 195 | // same bin optimization |
| 196 | if (likely(l == r)) |
| 197 | { |
| 198 | add(dim,l,l,l,prim.bounds()); |
| 199 | continue; |
| 200 | } |
| 201 | size_t bin_start = bin0[dim]; |
| 202 | size_t bin_end = bin1[dim]; |
| 203 | BBox3fa rest = prim.bounds(); |
| 204 | |
| 205 | /* assure that split position always overlaps the primitive bounds */ |
| 206 | while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++; |
| 207 | while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--; |
| 208 | |
| 209 | const auto splitter = splitterFactory(prim); |
| 210 | for (bin=bin_start; bin<bin_end; bin++) |
| 211 | { |
| 212 | const float pos = mapping.pos(bin+1,dim); |
| 213 | BBox3fa left,right; |
| 214 | splitter(rest,dim,pos,left,right); |
| 215 | |
| 216 | if (unlikely(left.empty())) l++; |
| 217 | extend(dim,bin,left); |
| 218 | rest = right; |
| 219 | } |
| 220 | if (unlikely(rest.empty())) r--; |
| 221 | add(dim,l,r,bin,rest); |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | |
| 228 | /*! bins an array of primitives */ |
| 229 | __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) |
| 230 | { |
| 231 | for (size_t i=begin; i<end; i++) |
| 232 | { |
| 233 | const PrimRef &prim = source[i]; |
| 234 | const vint4 bin0 = mapping.bin(prim.bounds().lower); |
| 235 | const vint4 bin1 = mapping.bin(prim.bounds().upper); |
| 236 | |
| 237 | for (size_t dim=0; dim<3; dim++) |
| 238 | { |
| 239 | if (unlikely(mapping.invalid(dim))) |
| 240 | continue; |
| 241 | |
| 242 | const size_t l = bin0[dim]; |
| 243 | const size_t r = bin1[dim]; |
| 244 | |
| 245 | const unsigned int n = prim.primID(); |
| 246 | |
| 247 | // same bin optimization |
| 248 | if (likely(l == r)) |
| 249 | { |
| 250 | add(dim,l,l,l,prim.bounds(),n); |
| 251 | continue; |
| 252 | } |
| 253 | const size_t bin_start = bin0[dim]; |
| 254 | const size_t bin_end = bin1[dim]; |
| 255 | for (size_t bin=bin_start; bin<bin_end; bin++) |
| 256 | add(dim,l,r,bin,prim.bounds(),n); |
| 257 | } |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | /*! merges in other binning information */ |
| 262 | void merge (const SpatialBinInfo& other) |
| 263 | { |
| 264 | for (size_t i=0; i<BINS; i++) |
| 265 | { |
| 266 | numBegin[i] += other.numBegin[i]; |
| 267 | numEnd [i] += other.numEnd [i]; |
| 268 | bounds[i][0].extend(other.bounds[i][0]); |
| 269 | bounds[i][1].extend(other.bounds[i][1]); |
| 270 | bounds[i][2].extend(other.bounds[i][2]); |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | /*! merges in other binning information */ |
| 275 | static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b) |
| 276 | { |
| 277 | SpatialBinInfo c(empty); |
| 278 | for (size_t i=0; i<BINS; i++) |
| 279 | { |
| 280 | c.numBegin[i] += a.numBegin[i]+b.numBegin[i]; |
| 281 | c.numEnd [i] += a.numEnd [i]+b.numEnd [i]; |
| 282 | c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]); |
| 283 | c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]); |
| 284 | c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]); |
| 285 | } |
| 286 | return c; |
| 287 | } |
| 288 | |
| 289 | /*! finds the best split by scanning binning information */ |
| 290 | SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const |
| 291 | { |
| 292 | /* sweep from right to left and compute parallel prefix of merged bounds */ |
| 293 | vfloat4 rAreas[BINS]; |
| 294 | vuint4 rCounts[BINS]; |
| 295 | vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; |
| 296 | for (size_t i=BINS-1; i>0; i--) |
| 297 | { |
| 298 | count += numEnd[i]; |
| 299 | rCounts[i] = count; |
| 300 | bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); |
| 301 | by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); |
| 302 | bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); |
| 303 | rAreas[i][3] = 0.0f; |
| 304 | } |
| 305 | |
| 306 | /* sweep from left to right and compute SAH */ |
| 307 | vuint4 blocks_add = (1 << blocks_shift)-1; |
| 308 | vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; |
| 309 | count = 0; bx = empty; by = empty; bz = empty; |
| 310 | for (size_t i=1; i<BINS; i++, ii+=1) |
| 311 | { |
| 312 | count += numBegin[i-1]; |
| 313 | bx.extend(bounds[i-1][0]); float Ax = halfArea(bx); |
| 314 | by.extend(bounds[i-1][1]); float Ay = halfArea(by); |
| 315 | bz.extend(bounds[i-1][2]); float Az = halfArea(bz); |
| 316 | const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); |
| 317 | const vfloat4 rArea = rAreas[i]; |
| 318 | const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); |
| 319 | const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); |
| 320 | const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); |
| 321 | // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); |
| 322 | const vbool4 mask = sah < vbestSAH; |
| 323 | vbestPos = select(mask,ii ,vbestPos); |
| 324 | vbestSAH = select(mask,sah,vbestSAH); |
| 325 | vbestlCount = select(mask,count,vbestlCount); |
| 326 | vbestrCount = select(mask,rCounts[i],vbestrCount); |
| 327 | } |
| 328 | |
| 329 | /* find best dimension */ |
| 330 | float bestSAH = inf; |
| 331 | int bestDim = -1; |
| 332 | int bestPos = 0; |
| 333 | unsigned int bestlCount = 0; |
| 334 | unsigned int bestrCount = 0; |
| 335 | for (int dim=0; dim<3; dim++) |
| 336 | { |
| 337 | /* ignore zero sized dimensions */ |
| 338 | if (unlikely(mapping.invalid(dim))) |
| 339 | continue; |
| 340 | |
| 341 | /* test if this is a better dimension */ |
| 342 | if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { |
| 343 | bestDim = dim; |
| 344 | bestPos = vbestPos[dim]; |
| 345 | bestSAH = vbestSAH[dim]; |
| 346 | bestlCount = vbestlCount[dim]; |
| 347 | bestrCount = vbestrCount[dim]; |
| 348 | } |
| 349 | } |
| 350 | assert(bestSAH >= 0.0f); |
| 351 | |
| 352 | /* return invalid split if no split found */ |
| 353 | if (bestDim == -1) |
| 354 | return SpatialBinSplit<BINS>(inf,-1,0,mapping); |
| 355 | |
| 356 | /* return best found split */ |
| 357 | return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); |
| 358 | } |
| 359 | |
| 360 | private: |
| 361 | BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension |
| 362 | vuint4 numBegin[BINS]; //!< number of primitives starting in bin |
| 363 | vuint4 numEnd[BINS]; //!< number of primitives ending in bin |
| 364 | }; |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | |