1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4// TODO:
5// - adjust parallel build thresholds
6// - openNodesBasedOnExtend should consider max extended size
7
8#pragma once
9
10#include "heuristic_binning.h"
11#include "heuristic_spatial.h"
12
13/* stop opening of all bref.geomIDs are the same */
14#define EQUAL_GEOMID_STOP_CRITERIA 1
15
16/* 10% spatial extend threshold */
17#define MAX_EXTEND_THRESHOLD 0.1f
18
19/* maximum is 8 children */
20#define MAX_OPENED_CHILD_NODES 8
21
22/* open until all build refs are below threshold size in one step */
23#define USE_LOOP_OPENING 0
24
25namespace embree
26{
27 namespace isa
28 {
29 /*! Performs standard object binning */
30 template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
31 struct HeuristicArrayOpenMergeSAH
32 {
33 typedef BinSplit<OBJECT_BINS> Split;
34 typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
35
36 static const size_t PARALLEL_THRESHOLD = 1024;
37 static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
38 static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
39
40 static const size_t MOVE_STEP_SIZE = 64;
41 static const size_t CREATE_SPLITS_STEP_SIZE = 128;
42
43 __forceinline HeuristicArrayOpenMergeSAH ()
44 : prims0(nullptr) {}
45
46 /*! remember prim array */
47 __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
48 : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
49 {
50 assert(max_open_size <= MAX_OPENED_CHILD_NODES);
51 }
52
53 struct OpenHeuristic
54 {
55 __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
56 {
57 const Vec3fa diag = pinfo.geomBounds.size();
58 dim = maxDim(diag);
59 assert(diag[dim] > 0.0f);
60 inv_max_extend = 1.0f / diag[dim];
61 }
62
63 __forceinline bool operator () ( PrimRef& prim ) const {
64 return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
65 }
66
67 private:
68 size_t dim;
69 float inv_max_extend;
70 };
71
72 /*! compute extended ranges */
73 __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
74 {
75 assert(set.ext_range_size() > 0);
76 const float left_factor = (float)lweight / (lweight + rweight);
77 const size_t ext_range_size = set.ext_range_size();
78 const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
79 const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
80 lset.set_ext_range(lset.end() + left_ext_range_size);
81 rset.set_ext_range(rset.end() + right_ext_range_size);
82 }
83
84 /*! move ranges */
85 __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
86 {
87 const size_t left_ext_range_size = lset.ext_range_size();
88 const size_t right_size = rset.size();
89
90 /* has the left child an extended range? */
91 if (left_ext_range_size > 0)
92 {
93 /* left extended range smaller than right range ? */
94 if (left_ext_range_size < right_size)
95 {
96 /* only move a small part of the beginning of the right range to the end */
97 parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
98 for (size_t i=r.begin(); i<r.end(); i++)
99 prims0[i+right_size] = prims0[i];
100 });
101 }
102 else
103 {
104 /* no overlap, move entire right range to new location, can be made fully parallel */
105 parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
106 for (size_t i=r.begin(); i<r.end(); i++)
107 prims0[i+left_ext_range_size] = prims0[i];
108 });
109 }
110 /* update right range */
111 assert(rset.ext_end() + left_ext_range_size == set.ext_end());
112 rset.move_right(left_ext_range_size);
113 }
114 }
115
116 /* estimates the extra space required when opening, and checks if all primitives are from same geometry */
117 __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
118 {
119 const OpenHeuristic heuristic(set);
120 const unsigned int geomID = prims0[set.begin()].geomID();
121
122 auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
123 bool commonGeomID = true;
124 size_t opens = 0;
125 for (size_t i=r.begin(); i<r.end(); i++) {
126 commonGeomID &= prims0[i].geomID() == geomID;
127 if (heuristic(prims0[i]))
128 opens += prims0[i].node.getN()-1; // coarse approximation
129 }
130 return std::pair<size_t,bool>(opens,commonGeomID);
131 };
132 auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
133 return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
134 };
135 return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
136 }
137
138 // FIXME: should consider maximum available extended size
139 __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
140 {
141 const OpenHeuristic heuristic(set);
142 const size_t ext_range_start = set.end();
143
144 if (false && set.size() < PARALLEL_THRESHOLD)
145 {
146 size_t extra_elements = 0;
147 for (size_t i=set.begin(); i<set.end(); i++)
148 {
149 if (heuristic(prims0[i]))
150 {
151 PrimRef tmp[MAX_OPENED_CHILD_NODES];
152 const size_t n = nodeOpenerFunc(prims0[i],tmp);
153 assert(extra_elements + n-1 <= set.ext_range_size());
154 for (size_t j=0; j<n; j++)
155 set.extend_center2(tmp[j]);
156
157 prims0[i] = tmp[0];
158 for (size_t j=1; j<n; j++)
159 prims0[ext_range_start+extra_elements+j-1] = tmp[j];
160 extra_elements += n-1;
161 }
162 }
163 set._end += extra_elements;
164 }
165 else
166 {
167 std::atomic<size_t> ext_elements;
168 ext_elements.store(0);
169 PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
170 PrimInfo info(empty);
171 for (size_t i=r.begin(); i<r.end(); i++)
172 if (heuristic(prims0[i]))
173 {
174 PrimRef tmp[MAX_OPENED_CHILD_NODES];
175 const size_t n = nodeOpenerFunc(prims0[i],tmp);
176 const size_t ID = ext_elements.fetch_add(n-1);
177 assert(ID + n-1 <= set.ext_range_size());
178
179 for (size_t j=0; j<n; j++)
180 info.extend_center2(tmp[j]);
181
182 prims0[i] = tmp[0];
183 for (size_t j=1; j<n; j++)
184 prims0[ext_range_start+ID+j-1] = tmp[j];
185 }
186 return info;
187 }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
188 set.centBounds.extend(info.centBounds);
189 assert(ext_elements.load() <= set.ext_range_size());
190 set._end += ext_elements.load();
191 }
192 }
193
194 __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
195 {
196 const OpenHeuristic heuristic(set);
197 size_t next_iteration_extra_elements = est_new_elements;
198
199 while (next_iteration_extra_elements <= set.ext_range_size())
200 {
201 next_iteration_extra_elements = 0;
202 size_t extra_elements = 0;
203 const size_t ext_range_start = set.end();
204
205 for (size_t i=set.begin(); i<set.end(); i++)
206 {
207 if (heuristic(prims0[i]))
208 {
209 PrimRef tmp[MAX_OPENED_CHILD_NODES];
210 const size_t n = nodeOpenerFunc(prims0[i],tmp);
211 assert(extra_elements + n-1 <= set.ext_range_size());
212 for (size_t j=0;j<n;j++)
213 set.extend_center2(tmp[j]);
214
215 prims0[i] = tmp[0];
216 for (size_t j=1;j<n;j++)
217 prims0[ext_range_start+extra_elements+j-1] = tmp[j];
218 extra_elements += n-1;
219
220 for (size_t j=0; j<n; j++)
221 if (heuristic(tmp[j]))
222 next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
223
224 }
225 }
226 assert( extra_elements <= set.ext_range_size());
227 set._end += extra_elements;
228
229 for (size_t i=set.begin();i<set.end();i++)
230 assert(prims0[i].numPrimitives() > 0);
231
232 if (unlikely(next_iteration_extra_elements == 0)) break;
233 }
234 }
235
236 __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
237 {
238 /* single element */
239 if (set.size() <= 1)
240 return Split();
241
242 /* disable opening if there is no overlap */
243 const size_t D = 4;
244 if (unlikely(set.has_ext_range() && set.size() <= D))
245 {
246 bool disjoint = true;
247 for (size_t j=set.begin(); j<set.end()-1; j++) {
248 for (size_t i=set.begin()+1; i<set.end(); i++) {
249 if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
250 disjoint = false; break;
251 }
252 }
253 }
254 if (disjoint) set.set_ext_range(set.end()); /* disables opening */
255 }
256
257 std::pair<size_t,bool> p(0,false);
258
259 /* disable opening when all primitives are from same geometry */
260 if (unlikely(set.has_ext_range()))
261 {
262 p = getProperties(set);
263#if EQUAL_GEOMID_STOP_CRITERIA == 1
264 if (p.second) set.set_ext_range(set.end()); /* disable opening */
265#endif
266 }
267
268 /* open nodes when we have sufficient space available */
269 if (unlikely(set.has_ext_range()))
270 {
271#if USE_LOOP_OPENING == 1
272 openNodesBasedOnExtendLoop(set,p.first);
273#else
274 if (p.first <= set.ext_range_size())
275 openNodesBasedOnExtend(set);
276#endif
277
278 /* disable opening when insufficient space for opening a node available */
279 if (set.ext_range_size() < max_open_size-1)
280 set.set_ext_range(set.end()); /* disable opening */
281 }
282
283 /* find best split */
284 return object_find(set,logBlockSize);
285 }
286
287
288 /*! finds the best object split */
289 __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
290 {
291 if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
292 else return parallel_object_find (set,logBlockSize);
293 }
294
295 /*! finds the best object split */
296 __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
297 {
298 Binner binner(empty);
299 const BinMapping<OBJECT_BINS> mapping(set.centBounds);
300 binner.bin(prims0,set.begin(),set.end(),mapping);
301 return binner.best(mapping,logBlockSize);
302 }
303
304 /*! finds the best split */
305 __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
306 {
307 Binner binner(empty);
308 const BinMapping<OBJECT_BINS> mapping(set.centBounds);
309 const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
310 auto body = [&] (const range<size_t>& r) -> Binner {
311 Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
312 };
313 auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
314 Binner r = b0; r.merge(b1,_mapping.size()); return r;
315 };
316 binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
317 return binner.best(mapping,logBlockSize);
318 }
319
320 /*! array partitioning */
321 __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
322 {
323 PrimInfoExtRange set = set_i;
324
325 /* valid split */
326 if (unlikely(!split.valid())) {
327 deterministic_order(set);
328 splitFallback(set,lset,rset);
329 return;
330 }
331
332 std::pair<size_t,size_t> ext_weights(0,0);
333
334 /* object split */
335 if (likely(set.size() < PARALLEL_THRESHOLD))
336 ext_weights = sequential_object_split(split,set,lset,rset);
337 else
338 ext_weights = parallel_object_split(split,set,lset,rset);
339
340 /* if we have an extended range, set extended child ranges and move right split range */
341 if (unlikely(set.has_ext_range()))
342 {
343 setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
344 moveExtentedRange(set,lset,rset);
345 }
346 }
347
348 /*! array partitioning */
349 std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
350 {
351 const size_t begin = set.begin();
352 const size_t end = set.end();
353 PrimInfo local_left(empty);
354 PrimInfo local_right(empty);
355 const unsigned int splitPos = split.pos;
356 const unsigned int splitDim = split.dim;
357 const unsigned int splitDimMask = (unsigned int)1 << splitDim;
358
359 const vint4 vSplitPos(splitPos);
360 const vbool4 vSplitMask( (int)splitDimMask );
361
362 size_t center = serial_partitioning(prims0,
363 begin,end,local_left,local_right,
364 [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
365 [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
366
367 new (&lset) PrimInfoExtRange(begin,center,center,local_left);
368 new (&rset) PrimInfoExtRange(center,end,end,local_right);
369 assert(area(lset.geomBounds) >= 0.0f);
370 assert(area(rset.geomBounds) >= 0.0f);
371 return std::pair<size_t,size_t>(local_left.size(),local_right.size());
372 }
373
374 /*! array partitioning */
375 __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
376 {
377 const size_t begin = set.begin();
378 const size_t end = set.end();
379 PrimInfo left(empty);
380 PrimInfo right(empty);
381 const unsigned int splitPos = split.pos;
382 const unsigned int splitDim = split.dim;
383 const unsigned int splitDimMask = (unsigned int)1 << splitDim;
384
385 const vint4 vSplitPos(splitPos);
386 const vbool4 vSplitMask( (int)splitDimMask );
387 auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
388
389 const size_t center = parallel_partitioning(
390 prims0,begin,end,EmptyTy(),left,right,isLeft,
391 [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
392 [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
393 PARALLEL_PARTITION_BLOCK_SIZE);
394
395 new (&lset) PrimInfoExtRange(begin,center,center,left);
396 new (&rset) PrimInfoExtRange(center,end,end,right);
397 assert(area(lset.geomBounds) >= 0.0f);
398 assert(area(rset.geomBounds) >= 0.0f);
399
400 return std::pair<size_t,size_t>(left.size(),right.size());
401 }
402
403 void deterministic_order(const extended_range<size_t>& set)
404 {
405 /* required as parallel partition destroys original primitive order */
406 std::sort(&prims0[set.begin()],&prims0[set.end()]);
407 }
408
409 __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
410 {
411 const size_t begin = set.begin();
412 const size_t end = set.end();
413 const size_t center = (begin + end)/2;
414
415 PrimInfo left(empty);
416 for (size_t i=begin; i<center; i++)
417 left.add_center2(prims0[i]);
418
419 const size_t lweight = left.end;
420
421 PrimInfo right(empty);
422 for (size_t i=center; i<end; i++)
423 right.add_center2(prims0[i]);
424
425 const size_t rweight = right.end;
426 new (&lset) PrimInfoExtRange(begin,center,center,left);
427 new (&rset) PrimInfoExtRange(center,end,end,right);
428
429 /* if we have an extended range */
430 if (set.has_ext_range())
431 {
432 setExtentedRanges(set,lset,rset,lweight,rweight);
433 moveExtentedRange(set,lset,rset);
434 }
435 }
436
437 private:
438 PrimRef* const prims0;
439 const NodeOpenerFunc& nodeOpenerFunc;
440 size_t max_open_size;
441 };
442 }
443}
444