1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "heuristic_binning.h"
7
8namespace embree
9{
10 namespace isa
11 {
12 struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
13 {
14 __forceinline PrimInfoRange () {
15 }
16
17 __forceinline PrimInfoRange(const PrimInfo& pinfo)
18 : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
19
20 __forceinline PrimInfoRange(EmptyTy)
21 : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
22
23 __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
24 : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
25
26 __forceinline float leafSAH() const {
27 return expectedApproxHalfArea(geomBounds)*float(size());
28 }
29
30 __forceinline float leafSAH(size_t block_shift) const {
31 return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
32 }
33 };
34
35 /*! Performs standard object binning */
36 template<typename PrimRef, size_t BINS>
37 struct HeuristicArrayBinningSAH
38 {
39 typedef BinSplit<BINS> Split;
40 typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
41 typedef range<size_t> Set;
42
43 static const size_t PARALLEL_THRESHOLD = 3 * 1024;
44 static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
45 static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
46
47 __forceinline HeuristicArrayBinningSAH ()
48 : prims(nullptr) {}
49
50 /*! remember prim array */
51 __forceinline HeuristicArrayBinningSAH (PrimRef* prims)
52 : prims(prims) {}
53
54 /*! finds the best split */
55 __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
56 {
57 if (likely(pinfo.size() < PARALLEL_THRESHOLD))
58 return find_template<false>(pinfo,logBlockSize);
59 else
60 return find_template<true>(pinfo,logBlockSize);
61 }
62
63 template<bool parallel>
64 __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
65 {
66 Binner binner(empty);
67 const BinMapping<BINS> mapping(pinfo);
68 bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
69 return binner.best(mapping,logBlockSize);
70 }
71
72 /*! array partitioning */
73 __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
74 {
75 if (likely(pinfo.size() < PARALLEL_THRESHOLD))
76 split_template<false>(split,pinfo,linfo,rinfo);
77 else
78 split_template<true>(split,pinfo,linfo,rinfo);
79 }
80
81 template<bool parallel>
82 __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
83 {
84 if (!split.valid()) {
85 deterministic_order(set);
86 return splitFallback(set,lset,rset);
87 }
88
89 const size_t begin = set.begin();
90 const size_t end = set.end();
91 CentGeomBBox3fa local_left(empty);
92 CentGeomBBox3fa local_right(empty);
93 const unsigned int splitPos = split.pos;
94 const unsigned int splitDim = split.dim;
95 const unsigned int splitDimMask = (unsigned int)1 << splitDim;
96
97 const typename Binner::vint vSplitPos(splitPos);
98 const typename Binner::vbool vSplitMask(splitDimMask);
99 auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
100
101 size_t center = 0;
102 if (!parallel)
103 center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
104 [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
105 else
106 center = parallel_partitioning(
107 prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
108 [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
109 [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
110 PARALLEL_PARTITION_BLOCK_SIZE);
111
112 new (&lset) PrimInfoRange(begin,center,local_left);
113 new (&rset) PrimInfoRange(center,end,local_right);
114 assert(area(lset.geomBounds) >= 0.0f);
115 assert(area(rset.geomBounds) >= 0.0f);
116 }
117
118 void deterministic_order(const PrimInfoRange& pinfo)
119 {
120 /* required as parallel partition destroys original primitive order */
121 std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
122 }
123
124 void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
125 {
126 const size_t begin = pinfo.begin();
127 const size_t end = pinfo.end();
128 const size_t center = (begin + end)/2;
129
130 CentGeomBBox3fa left(empty);
131 for (size_t i=begin; i<center; i++)
132 left.extend_center2(prims[i]);
133 new (&linfo) PrimInfoRange(begin,center,left);
134
135 CentGeomBBox3fa right(empty);
136 for (size_t i=center; i<end; i++)
137 right.extend_center2(prims[i]);
138 new (&rinfo) PrimInfoRange(center,end,right);
139 }
140
141 void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
142 {
143 assert(range.size() > 1);
144 CentGeomBBox3fa left(empty);
145 CentGeomBBox3fa right(empty);
146 unsigned int geomID = prims[range.begin()].geomID();
147 size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
148 [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
149 [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
150
151 new (&linfo) PrimInfoRange(range.begin(),center,left);
152 new (&rinfo) PrimInfoRange(center,range.end(),right);
153 }
154
155 private:
156 PrimRef* const prims;
157 };
158
159 /*! Performs standard object binning */
160 template<typename PrimRefMB, size_t BINS>
161 struct HeuristicArrayBinningMB
162 {
163 typedef BinSplit<BINS> Split;
164 typedef typename PrimRefMB::BBox BBox;
165 typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
166 static const size_t PARALLEL_THRESHOLD = 3 * 1024;
167 static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
168 static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
169
170 /*! finds the best split */
171 const Split find(const SetMB& set, const size_t logBlockSize)
172 {
173 ObjectBinner binner(empty);
174 const BinMapping<BINS> mapping(set.size(),set.centBounds);
175 bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
176 Split osplit = binner.best(mapping,logBlockSize);
177 osplit.sah *= set.time_range.size();
178 if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
179 return osplit;
180 }
181
182 /*! array partitioning */
183 __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
184 {
185 const size_t begin = set.begin();
186 const size_t end = set.end();
187 PrimInfoMB left = empty;
188 PrimInfoMB right = empty;
189 const vint4 vSplitPos(split.pos);
190 const vbool4 vSplitMask(1 << split.dim);
191 auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
192 auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
193 auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
194 size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
195 new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
196 new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
197 }
198 };
199 }
200}
201