1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../common/scene.h" |
7 | #include "priminfo.h" |
8 | |
9 | namespace embree |
10 | { |
11 | static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5; |
12 | |
13 | namespace isa |
14 | { |
15 | |
16 | /*! mapping into bins */ |
17 | template<size_t BINS> |
18 | struct SpatialBinMapping |
19 | { |
20 | public: |
21 | __forceinline SpatialBinMapping() {} |
22 | |
23 | /*! calculates the mapping */ |
24 | __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo) |
25 | { |
26 | const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower; |
27 | const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper; |
28 | const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper)); |
29 | const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size()); |
30 | scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag); |
31 | ofs = (vfloat4) pinfo.geomBounds.lower; |
32 | inv_scale = 1.0f / scale; |
33 | } |
34 | |
35 | /*! slower but safe binning */ |
36 | __forceinline vint4 bin(const Vec3fa& p) const |
37 | { |
38 | const vint4 i = floori((vfloat4(p)-ofs)*scale); |
39 | return clamp(i,vint4(0),vint4(BINS-1)); |
40 | } |
41 | |
42 | __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const |
43 | { |
44 | #if defined(__AVX__) |
45 | const vfloat8 ofs8(ofs); |
46 | const vfloat8 scale8(scale); |
47 | const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8); |
48 | const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1)); |
49 | return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu)); |
50 | #else |
51 | const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale); |
52 | const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale); |
53 | const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1)); |
54 | const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1)); |
55 | return std::pair<vint4,vint4>(c_lower,c_upper); |
56 | #endif |
57 | } |
58 | |
59 | |
60 | /*! calculates left spatial position of bin */ |
61 | __forceinline float pos(const size_t bin, const size_t dim) const { |
62 | return madd(float(bin),inv_scale[dim],ofs[dim]); |
63 | } |
64 | |
65 | /*! calculates left spatial position of bin */ |
66 | template<size_t N> |
67 | __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const { |
68 | return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim])); |
69 | } |
70 | |
71 | /*! returns true if the mapping is invalid in some dimension */ |
72 | __forceinline bool invalid(const size_t dim) const { |
73 | return scale[dim] == 0.0f; |
74 | } |
75 | |
76 | public: |
77 | vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID |
78 | }; |
79 | |
80 | /*! stores all information required to perform some split */ |
81 | template<size_t BINS> |
82 | struct SpatialBinSplit |
83 | { |
84 | /*! construct an invalid split by default */ |
85 | __forceinline SpatialBinSplit() |
86 | : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {} |
87 | |
88 | /*! constructs specified split */ |
89 | __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping) |
90 | : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {} |
91 | |
92 | /*! constructs specified split */ |
93 | __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping) |
94 | : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {} |
95 | |
96 | /*! tests if this split is valid */ |
97 | __forceinline bool valid() const { return dim != -1; } |
98 | |
99 | /*! calculates surface area heuristic for performing the split */ |
100 | __forceinline float splitSAH() const { return sah; } |
101 | |
102 | /*! stream output */ |
103 | friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) { |
104 | return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}" ; |
105 | } |
106 | |
107 | public: |
108 | float sah; //!< SAH cost of the split |
109 | int dim; //!< split dimension |
110 | int pos; //!< split position |
111 | int left; //!< number of elements on the left side |
112 | int right; //!< number of elements on the right side |
113 | float factor; //!< factor splitting the extended range |
114 | SpatialBinMapping<BINS> mapping; //!< mapping into bins |
115 | }; |
116 | |
117 | /*! stores all binning information */ |
118 | template<size_t BINS, typename PrimRef> |
119 | struct __aligned(64) SpatialBinInfo |
120 | { |
121 | SpatialBinInfo() { |
122 | } |
123 | |
124 | __forceinline SpatialBinInfo(EmptyTy) { |
125 | clear(); |
126 | } |
127 | |
128 | /*! clears the bin info */ |
129 | __forceinline void clear() |
130 | { |
131 | for (size_t i=0; i<BINS; i++) { |
132 | bounds[i][0] = bounds[i][1] = bounds[i][2] = empty; |
133 | numBegin[i] = numEnd[i] = 0; |
134 | } |
135 | } |
136 | |
137 | /*! adds binning data */ |
138 | __forceinline void add(const size_t dim, |
139 | const size_t beginID, |
140 | const size_t endID, |
141 | const size_t binID, |
142 | const BBox3fa &b, |
143 | const size_t n = 1) |
144 | { |
145 | assert(beginID < BINS); |
146 | assert(endID < BINS); |
147 | assert(binID < BINS); |
148 | |
149 | numBegin[beginID][dim]+=(unsigned int)n; |
150 | numEnd [endID][dim]+=(unsigned int)n; |
151 | bounds [binID][dim].extend(b); |
152 | } |
153 | |
154 | /*! extends binning bounds */ |
155 | __forceinline void extend(const size_t dim, |
156 | const size_t binID, |
157 | const BBox3fa &b) |
158 | { |
159 | assert(binID < BINS); |
160 | bounds [binID][dim].extend(b); |
161 | } |
162 | |
163 | /*! bins an array of primitives */ |
164 | template<typename PrimitiveSplitterFactory> |
165 | __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) |
166 | { |
167 | for (size_t i=begin; i<end; i++) |
168 | { |
169 | const PrimRef& prim = source[i]; |
170 | unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS); |
171 | |
172 | if (unlikely(splits <= 1)) |
173 | { |
174 | const vint4 bin = mapping.bin(center(prim.bounds())); |
175 | for (size_t dim=0; dim<3; dim++) |
176 | { |
177 | assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS); |
178 | add(dim,bin[dim],bin[dim],bin[dim],prim.bounds()); |
179 | } |
180 | } |
181 | else |
182 | { |
183 | const vint4 bin0 = mapping.bin(prim.bounds().lower); |
184 | const vint4 bin1 = mapping.bin(prim.bounds().upper); |
185 | |
186 | for (size_t dim=0; dim<3; dim++) |
187 | { |
188 | if (unlikely(mapping.invalid(dim))) |
189 | continue; |
190 | |
191 | size_t bin; |
192 | size_t l = bin0[dim]; |
193 | size_t r = bin1[dim]; |
194 | |
195 | // same bin optimization |
196 | if (likely(l == r)) |
197 | { |
198 | add(dim,l,l,l,prim.bounds()); |
199 | continue; |
200 | } |
201 | size_t bin_start = bin0[dim]; |
202 | size_t bin_end = bin1[dim]; |
203 | BBox3fa rest = prim.bounds(); |
204 | |
205 | /* assure that split position always overlaps the primitive bounds */ |
206 | while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++; |
207 | while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--; |
208 | |
209 | const auto splitter = splitterFactory(prim); |
210 | for (bin=bin_start; bin<bin_end; bin++) |
211 | { |
212 | const float pos = mapping.pos(bin+1,dim); |
213 | BBox3fa left,right; |
214 | splitter(rest,dim,pos,left,right); |
215 | |
216 | if (unlikely(left.empty())) l++; |
217 | extend(dim,bin,left); |
218 | rest = right; |
219 | } |
220 | if (unlikely(rest.empty())) r--; |
221 | add(dim,l,r,bin,rest); |
222 | } |
223 | } |
224 | } |
225 | } |
226 | |
227 | |
228 | /*! bins an array of primitives */ |
229 | __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) |
230 | { |
231 | for (size_t i=begin; i<end; i++) |
232 | { |
233 | const PrimRef &prim = source[i]; |
234 | const vint4 bin0 = mapping.bin(prim.bounds().lower); |
235 | const vint4 bin1 = mapping.bin(prim.bounds().upper); |
236 | |
237 | for (size_t dim=0; dim<3; dim++) |
238 | { |
239 | if (unlikely(mapping.invalid(dim))) |
240 | continue; |
241 | |
242 | const size_t l = bin0[dim]; |
243 | const size_t r = bin1[dim]; |
244 | |
245 | const unsigned int n = prim.primID(); |
246 | |
247 | // same bin optimization |
248 | if (likely(l == r)) |
249 | { |
250 | add(dim,l,l,l,prim.bounds(),n); |
251 | continue; |
252 | } |
253 | const size_t bin_start = bin0[dim]; |
254 | const size_t bin_end = bin1[dim]; |
255 | for (size_t bin=bin_start; bin<bin_end; bin++) |
256 | add(dim,l,r,bin,prim.bounds(),n); |
257 | } |
258 | } |
259 | } |
260 | |
261 | /*! merges in other binning information */ |
262 | void merge (const SpatialBinInfo& other) |
263 | { |
264 | for (size_t i=0; i<BINS; i++) |
265 | { |
266 | numBegin[i] += other.numBegin[i]; |
267 | numEnd [i] += other.numEnd [i]; |
268 | bounds[i][0].extend(other.bounds[i][0]); |
269 | bounds[i][1].extend(other.bounds[i][1]); |
270 | bounds[i][2].extend(other.bounds[i][2]); |
271 | } |
272 | } |
273 | |
274 | /*! merges in other binning information */ |
275 | static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b) |
276 | { |
277 | SpatialBinInfo c(empty); |
278 | for (size_t i=0; i<BINS; i++) |
279 | { |
280 | c.numBegin[i] += a.numBegin[i]+b.numBegin[i]; |
281 | c.numEnd [i] += a.numEnd [i]+b.numEnd [i]; |
282 | c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]); |
283 | c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]); |
284 | c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]); |
285 | } |
286 | return c; |
287 | } |
288 | |
289 | /*! finds the best split by scanning binning information */ |
290 | SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const |
291 | { |
292 | /* sweep from right to left and compute parallel prefix of merged bounds */ |
293 | vfloat4 rAreas[BINS]; |
294 | vuint4 rCounts[BINS]; |
295 | vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; |
296 | for (size_t i=BINS-1; i>0; i--) |
297 | { |
298 | count += numEnd[i]; |
299 | rCounts[i] = count; |
300 | bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); |
301 | by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); |
302 | bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); |
303 | rAreas[i][3] = 0.0f; |
304 | } |
305 | |
306 | /* sweep from left to right and compute SAH */ |
307 | vuint4 blocks_add = (1 << blocks_shift)-1; |
308 | vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0; |
309 | count = 0; bx = empty; by = empty; bz = empty; |
310 | for (size_t i=1; i<BINS; i++, ii+=1) |
311 | { |
312 | count += numBegin[i-1]; |
313 | bx.extend(bounds[i-1][0]); float Ax = halfArea(bx); |
314 | by.extend(bounds[i-1][1]); float Ay = halfArea(by); |
315 | bz.extend(bounds[i-1][2]); float Az = halfArea(bz); |
316 | const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az); |
317 | const vfloat4 rArea = rAreas[i]; |
318 | const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); |
319 | const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift); |
320 | const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount)); |
321 | // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount))); |
322 | const vbool4 mask = sah < vbestSAH; |
323 | vbestPos = select(mask,ii ,vbestPos); |
324 | vbestSAH = select(mask,sah,vbestSAH); |
325 | vbestlCount = select(mask,count,vbestlCount); |
326 | vbestrCount = select(mask,rCounts[i],vbestrCount); |
327 | } |
328 | |
329 | /* find best dimension */ |
330 | float bestSAH = inf; |
331 | int bestDim = -1; |
332 | int bestPos = 0; |
333 | unsigned int bestlCount = 0; |
334 | unsigned int bestrCount = 0; |
335 | for (int dim=0; dim<3; dim++) |
336 | { |
337 | /* ignore zero sized dimensions */ |
338 | if (unlikely(mapping.invalid(dim))) |
339 | continue; |
340 | |
341 | /* test if this is a better dimension */ |
342 | if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { |
343 | bestDim = dim; |
344 | bestPos = vbestPos[dim]; |
345 | bestSAH = vbestSAH[dim]; |
346 | bestlCount = vbestlCount[dim]; |
347 | bestrCount = vbestrCount[dim]; |
348 | } |
349 | } |
350 | assert(bestSAH >= 0.0f); |
351 | |
352 | /* return invalid split if no split found */ |
353 | if (bestDim == -1) |
354 | return SpatialBinSplit<BINS>(inf,-1,0,mapping); |
355 | |
356 | /* return best found split */ |
357 | return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping); |
358 | } |
359 | |
360 | private: |
361 | BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension |
362 | vuint4 numBegin[BINS]; //!< number of primitives starting in bin |
363 | vuint4 numEnd[BINS]; //!< number of primitives ending in bin |
364 | }; |
365 | } |
366 | } |
367 | |
368 | |