1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../common/default.h" |
7 | #include "../common/primref.h" |
8 | #include "../common/primref_mb.h" |
9 | |
10 | namespace embree |
11 | { |
12 | // FIXME: maybe there's a better place for this util fct |
13 | __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2) |
14 | { |
15 | const Vec3fa e0 = v1-v0; |
16 | const Vec3fa e1 = v2-v0; |
17 | const Vec3fa d = cross(e0,e1); |
18 | return fabs(d.x) + fabs(d.y) + fabs(d.z); |
19 | } |
20 | |
21 | //namespace isa |
22 | //{ |
23 | template<typename BBox> |
24 | class CentGeom |
25 | { |
26 | public: |
27 | __forceinline CentGeom () {} |
28 | |
29 | __forceinline CentGeom (EmptyTy) |
30 | : geomBounds(empty), centBounds(empty) {} |
31 | |
32 | __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds) |
33 | : geomBounds(geomBounds), centBounds(centBounds) {} |
34 | |
35 | template<typename PrimRef> |
36 | __forceinline void extend_primref(const PrimRef& prim) |
37 | { |
38 | BBox bounds; Vec3fa center; |
39 | prim.binBoundsAndCenter(bounds,center); |
40 | geomBounds.extend(bounds); |
41 | centBounds.extend(center); |
42 | } |
43 | |
44 | template<typename PrimRef> |
45 | __forceinline void extend_center2(const PrimRef& prim) |
46 | { |
47 | BBox3fa bounds = prim.bounds(); |
48 | geomBounds.extend(bounds); |
49 | centBounds.extend(bounds.center2()); |
50 | } |
51 | |
52 | __forceinline void extend(const BBox& geomBounds_) { |
53 | geomBounds.extend(geomBounds_); |
54 | centBounds.extend(center2(geomBounds_)); |
55 | } |
56 | |
57 | __forceinline void merge(const CentGeom& other) |
58 | { |
59 | geomBounds.extend(other.geomBounds); |
60 | centBounds.extend(other.centBounds); |
61 | } |
62 | |
63 | static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) { |
64 | CentGeom r = a; r.merge(b); return r; |
65 | } |
66 | |
67 | public: |
68 | BBox geomBounds; //!< geometry bounds of primitives |
69 | BBox3fa centBounds; //!< centroid bounds of primitives |
70 | }; |
71 | |
72 | typedef CentGeom<BBox3fa> CentGeomBBox3fa; |
73 | |
74 | /*! stores bounding information for a set of primitives */ |
75 | template<typename BBox> |
76 | class PrimInfoT : public CentGeom<BBox> |
77 | { |
78 | public: |
79 | using CentGeom<BBox>::geomBounds; |
80 | using CentGeom<BBox>::centBounds; |
81 | |
82 | __forceinline PrimInfoT () {} |
83 | |
84 | __forceinline PrimInfoT (EmptyTy) |
85 | : CentGeom<BBox>(empty), begin(0), end(0) {} |
86 | |
87 | __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) |
88 | : CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {} |
89 | |
90 | template<typename PrimRef> |
91 | __forceinline void add_primref(const PrimRef& prim) |
92 | { |
93 | CentGeom<BBox>::extend_primref(prim); |
94 | end++; |
95 | } |
96 | |
97 | template<typename PrimRef> |
98 | __forceinline void add_center2(const PrimRef& prim) { |
99 | CentGeom<BBox>::extend_center2(prim); |
100 | end++; |
101 | } |
102 | |
103 | template<typename PrimRef> |
104 | __forceinline void add_center2(const PrimRef& prim, const size_t i) { |
105 | CentGeom<BBox>::extend_center2(prim); |
106 | end+=i; |
107 | } |
108 | |
109 | /*__forceinline void add(const BBox& geomBounds_) { |
110 | CentGeom<BBox>::extend(geomBounds_); |
111 | end++; |
112 | } |
113 | |
114 | __forceinline void add(const BBox& geomBounds_, const size_t i) { |
115 | CentGeom<BBox>::extend(geomBounds_); |
116 | end+=i; |
117 | }*/ |
118 | |
119 | __forceinline void merge(const PrimInfoT& other) |
120 | { |
121 | CentGeom<BBox>::merge(other); |
122 | begin += other.begin; |
123 | end += other.end; |
124 | } |
125 | |
126 | static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) { |
127 | PrimInfoT r = a; r.merge(b); return r; |
128 | } |
129 | |
130 | /*! returns the number of primitives */ |
131 | __forceinline size_t size() const { |
132 | return end-begin; |
133 | } |
134 | |
135 | __forceinline float halfArea() { |
136 | return expectedApproxHalfArea(geomBounds); |
137 | } |
138 | |
139 | __forceinline float leafSAH() const { |
140 | return expectedApproxHalfArea(geomBounds)*float(size()); |
141 | //return halfArea(geomBounds)*blocks(num); |
142 | } |
143 | |
144 | __forceinline float leafSAH(size_t block_shift) const { |
145 | return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); |
146 | //return halfArea(geomBounds)*float((num+3) >> 2); |
147 | //return halfArea(geomBounds)*blocks(num); |
148 | } |
149 | |
150 | /*! stream output */ |
151 | friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) { |
152 | return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}" ; |
153 | } |
154 | |
155 | public: |
156 | size_t begin,end; //!< number of primitives |
157 | }; |
158 | |
159 | typedef PrimInfoT<BBox3fa> PrimInfo; |
160 | //typedef PrimInfoT<LBBox3fa> PrimInfoMB; |
161 | |
162 | /*! stores bounding information for a set of primitives */ |
163 | template<typename BBox> |
164 | class PrimInfoMBT : public CentGeom<BBox> |
165 | { |
166 | public: |
167 | using CentGeom<BBox>::geomBounds; |
168 | using CentGeom<BBox>::centBounds; |
169 | |
170 | __forceinline PrimInfoMBT () { |
171 | } |
172 | |
173 | __forceinline PrimInfoMBT (EmptyTy) |
174 | : CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} |
175 | |
176 | __forceinline PrimInfoMBT (size_t begin, size_t end) |
177 | : CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} |
178 | |
179 | template<typename PrimRef> |
180 | __forceinline void add_primref(const PrimRef& prim) |
181 | { |
182 | CentGeom<BBox>::extend_primref(prim); |
183 | time_range.extend(prim.time_range); |
184 | object_range._end++; |
185 | num_time_segments += prim.size(); |
186 | if (max_num_time_segments < prim.totalTimeSegments()) { |
187 | max_num_time_segments = prim.totalTimeSegments(); |
188 | max_time_range = prim.time_range; |
189 | } |
190 | } |
191 | |
192 | __forceinline void merge(const PrimInfoMBT& other) |
193 | { |
194 | CentGeom<BBox>::merge(other); |
195 | time_range.extend(other.time_range); |
196 | object_range._begin += other.object_range.begin(); |
197 | object_range._end += other.object_range.end(); |
198 | num_time_segments += other.num_time_segments; |
199 | if (max_num_time_segments < other.max_num_time_segments) { |
200 | max_num_time_segments = other.max_num_time_segments; |
201 | max_time_range = other.max_time_range; |
202 | } |
203 | } |
204 | |
205 | static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) { |
206 | PrimInfoMBT r = a; r.merge(b); return r; |
207 | } |
208 | |
209 | __forceinline size_t begin() const { |
210 | return object_range.begin(); |
211 | } |
212 | |
213 | __forceinline size_t end() const { |
214 | return object_range.end(); |
215 | } |
216 | |
217 | /*! returns the number of primitives */ |
218 | __forceinline size_t size() const { |
219 | return object_range.size(); |
220 | } |
221 | |
222 | __forceinline float halfArea() const { |
223 | return time_range.size()*expectedApproxHalfArea(geomBounds); |
224 | } |
225 | |
226 | __forceinline float leafSAH() const { |
227 | return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments); |
228 | } |
229 | |
230 | __forceinline float leafSAH(size_t block_shift) const { |
231 | return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift); |
232 | } |
233 | |
234 | __forceinline float align_time(float ct) const |
235 | { |
236 | //return roundf(ct * float(numTimeSegments)) / float(numTimeSegments); |
237 | float t0 = (ct-max_time_range.lower)/max_time_range.size(); |
238 | float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments); |
239 | return t1*max_time_range.size()+max_time_range.lower; |
240 | } |
241 | |
242 | /*! stream output */ |
243 | friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo) |
244 | { |
245 | return cout << "PrimInfo { " << |
246 | "object_range = " << pinfo.object_range << |
247 | ", time_range = " << pinfo.time_range << |
248 | ", time_segments = " << pinfo.num_time_segments << |
249 | ", geomBounds = " << pinfo.geomBounds << |
250 | ", centBounds = " << pinfo.centBounds << |
251 | "}" ; |
252 | } |
253 | |
254 | public: |
255 | range<size_t> object_range; //!< primitive range |
256 | size_t num_time_segments; //!< total number of time segments of all added primrefs |
257 | size_t max_num_time_segments; //!< maximum number of time segments of a primitive |
258 | BBox1f max_time_range; //!< time range of primitive with max_num_time_segments |
259 | BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB |
260 | }; |
261 | |
262 | typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB; |
263 | |
264 | struct SetMB : public PrimInfoMB |
265 | { |
266 | static const size_t PARALLEL_THRESHOLD = 3 * 1024; |
267 | static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; |
268 | static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; |
269 | |
270 | typedef mvector<PrimRefMB>* PrimRefVector; |
271 | |
272 | __forceinline SetMB() {} |
273 | |
274 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims) |
275 | : PrimInfoMB(pinfo_i), prims(prims) {} |
276 | |
277 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in) |
278 | : PrimInfoMB(pinfo_i), prims(prims) |
279 | { |
280 | object_range = object_range_in; |
281 | time_range = intersect(time_range,time_range_in); |
282 | } |
283 | |
284 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in) |
285 | : PrimInfoMB(pinfo_i), prims(prims) |
286 | { |
287 | time_range = intersect(time_range,time_range_in); |
288 | } |
289 | |
290 | void deterministic_order() const |
291 | { |
292 | /* required as parallel partition destroys original primitive order */ |
293 | PrimRefMB* prim = prims->data(); |
294 | std::sort(&prim[object_range.begin()],&prim[object_range.end()]); |
295 | } |
296 | |
297 | template<typename RecalculatePrimRef> |
298 | __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const |
299 | { |
300 | auto reduce = [&](const range<size_t>& r) -> LBBox3fa |
301 | { |
302 | LBBox3fa cbounds(empty); |
303 | for (size_t j = r.begin(); j < r.end(); j++) |
304 | { |
305 | PrimRefMB& ref = (*prims)[j]; |
306 | const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range); |
307 | cbounds.extend(bn); |
308 | }; |
309 | return cbounds; |
310 | }; |
311 | |
312 | return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), |
313 | reduce, |
314 | [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); |
315 | } |
316 | |
317 | template<typename RecalculatePrimRef> |
318 | __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const |
319 | { |
320 | auto reduce = [&](const range<size_t>& r) -> LBBox3fa |
321 | { |
322 | LBBox3fa cbounds(empty); |
323 | for (size_t j = r.begin(); j < r.end(); j++) |
324 | { |
325 | PrimRefMB& ref = (*prims)[j]; |
326 | const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space); |
327 | cbounds.extend(bn); |
328 | }; |
329 | return cbounds; |
330 | }; |
331 | |
332 | return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), |
333 | reduce, |
334 | [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); |
335 | } |
336 | |
337 | template<typename RecalculatePrimRef> |
338 | const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const |
339 | { |
340 | auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB |
341 | { |
342 | PrimInfoMB pinfo(empty); |
343 | for (size_t j=r.begin(); j<r.end(); j++) |
344 | { |
345 | PrimRefMB& ref = (*prims)[j]; |
346 | PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space); |
347 | pinfo.add_primref(ref1); |
348 | }; |
349 | return pinfo; |
350 | }; |
351 | |
352 | const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, |
353 | PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2); |
354 | |
355 | return SetMB(pinfo,prims,object_range,time_range); |
356 | } |
357 | |
358 | public: |
359 | PrimRefVector prims; |
360 | }; |
361 | //} |
362 | } |
363 | |