1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#define MBLUR_NUM_TEMPORAL_BINS 2
7#define MBLUR_NUM_OBJECT_BINS 32
8
9#include "../bvh/bvh.h"
10#include "../common/primref_mb.h"
11#include "heuristic_binning_array_aligned.h"
12#include "heuristic_timesplit_array.h"
13
14namespace embree
15{
16 namespace isa
17 {
18 template<typename T>
19 struct SharedVector
20 {
21 __forceinline SharedVector() {}
22
23 __forceinline SharedVector(T* ptr, size_t refCount = 1)
24 : prims(ptr), refCount(refCount) {}
25
26 __forceinline void incRef() {
27 refCount++;
28 }
29
30 __forceinline void decRef()
31 {
32 if (--refCount == 0)
33 delete prims;
34 }
35
36 T* prims;
37 size_t refCount;
38 };
39
40 template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
41 struct LocalChildListT
42 {
43 typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
44
45 __forceinline LocalChildListT (const BuildRecord& record)
46 : numChildren(1), numSharedPrimVecs(1)
47 {
48 /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
49 children[0] = record;
50 primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
51 }
52
53 __forceinline ~LocalChildListT()
54 {
55 for (size_t i = 0; i < numChildren; i++)
56 primvecs[i]->decRef();
57 }
58
59 __forceinline BuildRecord& operator[] ( const size_t i ) {
60 return children[i];
61 }
62
63 __forceinline size_t size() const {
64 return numChildren;
65 }
66
67 __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
68 {
69 SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
70 if (lrecord.prims.prims == bsharedPrimVec->prims) {
71 primvecs[bestChild] = bsharedPrimVec;
72 bsharedPrimVec->incRef();
73 }
74 else {
75 primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
76 }
77
78 if (rrecord.prims.prims == bsharedPrimVec->prims) {
79 primvecs[numChildren] = bsharedPrimVec;
80 bsharedPrimVec->incRef();
81 }
82 else {
83 primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
84 }
85 bsharedPrimVec->decRef();
86 new_vector.release();
87
88 children[bestChild] = lrecord;
89 children[numChildren] = rrecord;
90 numChildren++;
91 }
92
93 public:
94 array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
95 array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
96 size_t numChildren;
97
98 array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
99 size_t numSharedPrimVecs;
100 };
101
102 template<typename Mesh>
103 struct RecalculatePrimRef
104 {
105 Scene* scene;
106
107 __forceinline RecalculatePrimRef (Scene* scene)
108 : scene(scene) {}
109
110 __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
111 {
112 const unsigned geomID = prim.geomID();
113 const unsigned primID = prim.primID();
114 const Mesh* mesh = scene->get<Mesh>(geomID);
115 const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
116 const range<int> tbounds = mesh->timeSegmentRange(time_range);
117 return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
118 }
119
120 // __noinline is workaround for ICC16 bug under MacOSX
121 __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
122 {
123 const unsigned geomID = prim.geomID();
124 const unsigned primID = prim.primID();
125 const Mesh* mesh = scene->get<Mesh>(geomID);
126 const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
127 const range<int> tbounds = mesh->timeSegmentRange(time_range);
128 return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
129 }
130
131 __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
132 return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
133 }
134
135 // __noinline is workaround for ICC16 bug under MacOSX
136 __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
137 return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
138 }
139 };
140
141 struct VirtualRecalculatePrimRef
142 {
143 Scene* scene;
144
145 __forceinline VirtualRecalculatePrimRef (Scene* scene)
146 : scene(scene) {}
147
148 __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
149 {
150 const unsigned geomID = prim.geomID();
151 const unsigned primID = prim.primID();
152 const Geometry* mesh = scene->get(geomID);
153 const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
154 const range<int> tbounds = mesh->timeSegmentRange(time_range);
155 return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
156 }
157
158 __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
159 {
160 const unsigned geomID = prim.geomID();
161 const unsigned primID = prim.primID();
162 const Geometry* mesh = scene->get(geomID);
163 const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
164 const range<int> tbounds = mesh->timeSegmentRange(time_range);
165 return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
166 }
167
168 __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
169 return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
170 }
171
172 __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
173 return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
174 }
175 };
176
177 struct BVHBuilderMSMBlur
178 {
179 /*! settings for msmblur builder */
180 struct Settings
181 {
182 /*! default settings */
183 Settings ()
184 : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
185 travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
186 singleThreadThreshold(1024) {}
187
188
189 Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
190 : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
191 travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
192 {
193 minLeafSize = min(minLeafSize,maxLeafSize);
194 }
195
196 public:
197 size_t branchingFactor; //!< branching factor of BVH to build
198 size_t maxDepth; //!< maximum depth of BVH to build
199 size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
200 size_t minLeafSize; //!< minimum size of a leaf
201 size_t maxLeafSize; //!< maximum size of a leaf
202 float travCost; //!< estimated cost of one traversal step
203 float intCost; //!< estimated cost of one primitive intersection
204 bool singleLeafTimeSegment; //!< split time to single time range
205 size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
206 };
207
208 struct BuildRecord
209 {
210 public:
211 __forceinline BuildRecord () {}
212
213 __forceinline BuildRecord (size_t depth)
214 : depth(depth) {}
215
216 __forceinline BuildRecord (const SetMB& prims, size_t depth)
217 : depth(depth), prims(prims) {}
218
219 __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
220 return a.prims.size() < b.prims.size();
221 }
222
223 __forceinline size_t size() const {
224 return prims.size();
225 }
226
227 public:
228 size_t depth; //!< Depth of the root of this subtree.
229 SetMB prims; //!< The list of primitives.
230 };
231
232 struct BuildRecordSplit : public BuildRecord
233 {
234 __forceinline BuildRecordSplit () {}
235
236 __forceinline BuildRecordSplit (size_t depth)
237 : BuildRecord(depth) {}
238
239 __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
240 : BuildRecord(record), split(split) {}
241
242 BinSplit<MBLUR_NUM_OBJECT_BINS> split;
243 };
244
245 template<
246 typename NodeRef,
247 typename RecalculatePrimRef,
248 typename Allocator,
249 typename CreateAllocFunc,
250 typename CreateNodeFunc,
251 typename SetNodeFunc,
252 typename CreateLeafFunc,
253 typename ProgressMonitor>
254
255 class BuilderT
256 {
257 ALIGNED_CLASS_(16);
258 static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
259 static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
260
261 typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
262 typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
263 typedef mvector<PrimRefMB>* PrimRefVector;
264 typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
265 typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
266 typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
267
268 public:
269
270 BuilderT (MemoryMonitorInterface* device,
271 const RecalculatePrimRef recalculatePrimRef,
272 const CreateAllocFunc createAlloc,
273 const CreateNodeFunc createNode,
274 const SetNodeFunc setNode,
275 const CreateLeafFunc createLeaf,
276 const ProgressMonitor progressMonitor,
277 const Settings& settings)
278 : cfg(settings),
279 heuristicObjectSplit(),
280 heuristicTemporalSplit(device, recalculatePrimRef),
281 recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
282 progressMonitor(progressMonitor)
283 {
284 if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
285 throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
286 }
287
288 /*! finds the best split */
289 const Split find(const SetMB& set)
290 {
291 /* first try standard object split */
292 const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
293 const float object_split_sah = object_split.splitSAH();
294
295 /* test temporal splits only when object split was bad */
296 const float leaf_sah = set.leafSAH(cfg.logBlockSize);
297 if (object_split_sah < 0.50f*leaf_sah)
298 return object_split;
299
300 /* do temporal splits only if the time range is big enough */
301 if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
302 {
303 const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
304 const float temporal_split_sah = temporal_split.splitSAH();
305
306 /* take temporal split if it improved SAH */
307 if (temporal_split_sah < object_split_sah)
308 return temporal_split;
309 }
310
311 return object_split;
312 }
313
314 /*! array partitioning */
315 __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
316 {
317 /* perform object split */
318 if (likely(split.data == Split::SPLIT_OBJECT)) {
319 heuristicObjectSplit.split(split,set,lset,rset);
320 }
321 /* perform temporal split */
322 else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
323 return heuristicTemporalSplit.split(split,set,lset,rset);
324 }
325 /* perform fallback split */
326 else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
327 set.deterministic_order();
328 splitFallback(set,lset,rset);
329 }
330 /* split by geometry */
331 else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
332 set.deterministic_order();
333 splitByGeometry(set,lset,rset);
334 }
335 else
336 assert(false);
337
338 return std::unique_ptr<mvector<PrimRefMB>>();
339 }
340
341 /*! finds the best fallback split */
342 __noinline Split findFallback(const SetMB& set)
343 {
344 /* split if primitives are not from same geometry */
345 if (!sameGeometry(set))
346 return Split(0.0f,Split::SPLIT_GEOMID);
347
348 /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
349 if (cfg.singleLeafTimeSegment)
350 {
351 /* test if one primitive has more than one time segment in time range, if so split time */
352 for (size_t i=set.begin(); i<set.end(); i++)
353 {
354 const PrimRefMB& prim = (*set.prims)[i];
355 const range<int> itime_range = prim.timeSegmentRange(set.time_range);
356 const int localTimeSegments = itime_range.size();
357 assert(localTimeSegments > 0);
358 if (localTimeSegments > 1) {
359 const int icenter = (itime_range.begin() + itime_range.end())/2;
360 const float splitTime = prim.timeStep(icenter);
361 return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
362 }
363 }
364 }
365
366 /* otherwise return fallback split */
367 return Split(0.0f,Split::SPLIT_FALLBACK);
368 }
369
370 /*! performs fallback split */
371 void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
372 {
373 mvector<PrimRefMB>& prims = *set.prims;
374
375 const size_t begin = set.begin();
376 const size_t end = set.end();
377 const size_t center = (begin + end + 1) / 2;
378
379 PrimInfoMB linfo = empty;
380 for (size_t i=begin; i<center; i++)
381 linfo.add_primref(prims[i]);
382
383 PrimInfoMB rinfo = empty;
384 for (size_t i=center; i<end; i++)
385 rinfo.add_primref(prims[i]);
386
387 new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
388 new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
389 }
390
391 /*! checks if all primitives are from the same geometry */
392 __forceinline bool sameGeometry(const SetMB& set)
393 {
394 if (set.size() == 0) return true;
395 mvector<PrimRefMB>& prims = *set.prims;
396 const size_t begin = set.begin();
397 const size_t end = set.end();
398 unsigned int firstGeomID = prims[begin].geomID();
399 for (size_t i=begin+1; i<end; i++) {
400 if (prims[i].geomID() != firstGeomID){
401 return false;
402 }
403 }
404 return true;
405 }
406
407 /* split by geometry ID */
408 void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
409 {
410 assert(set.size() > 1);
411
412 mvector<PrimRefMB>& prims = *set.prims;
413 const size_t begin = set.begin();
414 const size_t end = set.end();
415
416 PrimInfoMB left(empty);
417 PrimInfoMB right(empty);
418 unsigned int geomID = prims[begin].geomID();
419 size_t center = serial_partitioning(prims.data(),begin,end,left,right,
420 [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
421 [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
422
423 new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
424 new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
425 }
426
427 const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
428 {
429 /* this should never occur but is a fatal error */
430 if (in.depth > cfg.maxDepth)
431 throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
432
433 /* replace already found split by fallback split */
434 const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
435
436 /* special case when directly creating leaf without any splits that could shrink time_range */
437 bool force_split = false;
438 if (current.depth == 1 && current.size() > 0)
439 {
440 BBox1f c = empty;
441 BBox1f p = current.prims.time_range;
442 for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
443 mvector<PrimRefMB>& prims = *current.prims.prims;
444 c.extend(prims[i].time_range);
445 }
446
447 force_split = c.lower > p.lower || c.upper < p.upper;
448 }
449
450 /* create leaf for few primitives */
451 if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
452 return createLeaf(current,alloc);
453
454 /* fill all children by always splitting the largest one */
455 bool hasTimeSplits = false;
456 NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
457 LocalChildListSplit children(current);
458
459 do {
460 /* find best child with largest bounding box area */
461 size_t bestChild = -1;
462 size_t bestSize = 0;
463 for (size_t i=0; i<children.size(); i++)
464 {
465 /* ignore leaves as they cannot get split */
466 if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
467 continue;
468
469 force_split = false;
470
471 /* remember child with largest size */
472 if (children[i].size() > bestSize) {
473 bestSize = children[i].size();
474 bestChild = i;
475 }
476 }
477 if (bestChild == -1) break;
478
479 /* perform best found split */
480 BuildRecordSplit& brecord = children[bestChild];
481 BuildRecordSplit lrecord(current.depth+1);
482 BuildRecordSplit rrecord(current.depth+1);
483 std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
484 hasTimeSplits |= new_vector != nullptr;
485
486 /* find new splits */
487 lrecord.split = findFallback(lrecord.prims);
488 rrecord.split = findFallback(rrecord.prims);
489 children.split(bestChild,lrecord,rrecord,std::move(new_vector));
490
491 } while (children.size() < cfg.branchingFactor);
492
493 /* detect time_ranges that have shrunken */
494 for (size_t i=0; i<children.size(); i++) {
495 const BBox1f c = children[i].prims.time_range;
496 const BBox1f p = in.prims.time_range;
497 hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
498 }
499
500 /* create node */
501 auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
502
503 /* recurse into each child and perform reduction */
504 LBBox3fa gbounds = empty;
505 for (size_t i=0; i<children.size(); i++) {
506 values[i] = createLargeLeaf(children[i],alloc);
507 gbounds.extend(values[i].lbounds);
508 }
509
510 setNode(current,children.children.data(),node,values,children.numChildren);
511
512 /* calculate geometry bounds of this node */
513 if (hasTimeSplits)
514 return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
515 else
516 return NodeRecordMB4D(node,gbounds,current.prims.time_range);
517 }
518
519 const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
520 {
521 /* get thread local allocator */
522 if (!alloc)
523 alloc = createAlloc();
524
525 /* call memory monitor function to signal progress */
526 if (toplevel && current.size() <= cfg.singleThreadThreshold)
527 progressMonitor(current.size());
528
529 /*! find best split */
530 const Split csplit = find(current.prims);
531
532 /*! compute leaf and split cost */
533 const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
534 const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
535 assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
536
537 /*! create a leaf node when threshold reached or SAH tells us to stop */
538 if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
539 current.prims.deterministic_order();
540 return createLargeLeaf(current,alloc);
541 }
542
543 /*! perform initial split */
544 SetMB lprims,rprims;
545 std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
546 bool hasTimeSplits = new_vector != nullptr;
547 NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
548 LocalChildList children(current);
549 {
550 BuildRecord lrecord(lprims,current.depth+1);
551 BuildRecord rrecord(rprims,current.depth+1);
552 children.split(0,lrecord,rrecord,std::move(new_vector));
553 }
554
555 /*! split until node is full or SAH tells us to stop */
556 while (children.size() < cfg.branchingFactor)
557 {
558 /*! find best child to split */
559 float bestArea = neg_inf;
560 ssize_t bestChild = -1;
561 for (size_t i=0; i<children.size(); i++)
562 {
563 if (children[i].size() <= cfg.minLeafSize) continue;
564 if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
565 bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
566 }
567 }
568 if (bestChild == -1) break;
569
570 /* perform split */
571 BuildRecord& brecord = children[bestChild];
572 BuildRecord lrecord(current.depth+1);
573 BuildRecord rrecord(current.depth+1);
574 Split csplit = find(brecord.prims);
575 std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
576 hasTimeSplits |= new_vector != nullptr;
577 children.split(bestChild,lrecord,rrecord,std::move(new_vector));
578 }
579
580 /* detect time_ranges that have shrunken */
581 for (size_t i=0; i<children.size(); i++) {
582 const BBox1f c = children[i].prims.time_range;
583 const BBox1f p = current.prims.time_range;
584 hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
585 }
586
587 /* sort buildrecords for simpler shadow ray traversal */
588 //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
589
590 /*! create an inner node */
591 auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
592 LBBox3fa gbounds = empty;
593
594 /* spawn tasks */
595 if (unlikely(current.size() > cfg.singleThreadThreshold))
596 {
597 /*! parallel_for is faster than spawning sub-tasks */
598 parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
599 for (size_t i=r.begin(); i<r.end(); i++) {
600 values[i] = recurse(children[i],nullptr,true);
601 _mm_mfence(); // to allow non-temporal stores during build
602 }
603 });
604
605 /*! merge bounding boxes */
606 for (size_t i=0; i<children.size(); i++)
607 gbounds.extend(values[i].lbounds);
608 }
609 /* recurse into each child */
610 else
611 {
612 //for (size_t i=0; i<children.size(); i++)
613 for (ssize_t i=children.size()-1; i>=0; i--) {
614 values[i] = recurse(children[i],alloc,false);
615 gbounds.extend(values[i].lbounds);
616 }
617 }
618
619 setNode(current,children.children.data(),node,values,children.numChildren);
620
621 /* calculate geometry bounds of this node */
622 if (unlikely(hasTimeSplits))
623 return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
624 else
625 return NodeRecordMB4D(node,gbounds,current.prims.time_range);
626 }
627
628 /*! builder entry function */
629 __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
630 {
631 const SetMB set(pinfo,&prims);
632 auto ret = recurse(BuildRecord(set,1),nullptr,true);
633 _mm_mfence(); // to allow non-temporal stores during build
634 return ret;
635 }
636
637 private:
638 Settings cfg;
639 HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
640 HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
641 const RecalculatePrimRef recalculatePrimRef;
642 const CreateAllocFunc createAlloc;
643 const CreateNodeFunc createNode;
644 const SetNodeFunc setNode;
645 const CreateLeafFunc createLeaf;
646 const ProgressMonitor progressMonitor;
647 };
648
649 template<typename NodeRef,
650 typename RecalculatePrimRef,
651 typename CreateAllocFunc,
652 typename CreateNodeFunc,
653 typename SetNodeFunc,
654 typename CreateLeafFunc,
655 typename ProgressMonitorFunc>
656
657 static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
658 const PrimInfoMB& pinfo,
659 MemoryMonitorInterface* device,
660 const RecalculatePrimRef recalculatePrimRef,
661 const CreateAllocFunc createAlloc,
662 const CreateNodeFunc createNode,
663 const SetNodeFunc setNode,
664 const CreateLeafFunc createLeaf,
665 const ProgressMonitorFunc progressMonitor,
666 const Settings& settings)
667 {
668 typedef BuilderT<
669 NodeRef,
670 RecalculatePrimRef,
671 decltype(createAlloc()),
672 CreateAllocFunc,
673 CreateNodeFunc,
674 SetNodeFunc,
675 CreateLeafFunc,
676 ProgressMonitorFunc> Builder;
677
678 Builder builder(device,
679 recalculatePrimRef,
680 createAlloc,
681 createNode,
682 setNode,
683 createLeaf,
684 progressMonitor,
685 settings);
686
687
688 return builder(prims,pinfo);
689 }
690 };
691 }
692}
693