1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #define RTC_EXPORT_API |
5 | |
6 | #include "default.h" |
7 | #include "device.h" |
8 | #include "scene.h" |
9 | #include "context.h" |
10 | #include "alloc.h" |
11 | |
12 | #include "../builders/bvh_builder_sah.h" |
13 | #include "../builders/bvh_builder_morton.h" |
14 | |
15 | namespace embree |
16 | { |
17 | namespace isa // FIXME: support more ISAs for builders |
18 | { |
19 | struct BVH : public RefCount |
20 | { |
21 | BVH (Device* device) |
22 | : device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0) |
23 | { |
24 | device->refInc(); |
25 | } |
26 | |
27 | ~BVH() { |
28 | device->refDec(); |
29 | } |
30 | |
31 | public: |
32 | Device* device; |
33 | FastAllocator allocator; |
34 | mvector<BVHBuilderMorton::BuildPrim> morton_src; |
35 | mvector<BVHBuilderMorton::BuildPrim> morton_tmp; |
36 | }; |
37 | |
38 | void* rtcBuildBVHMorton(const RTCBuildArguments* arguments) |
39 | { |
40 | BVH* bvh = (BVH*) arguments->bvh; |
41 | RTCBuildPrimitive* prims_i = arguments->primitives; |
42 | size_t primitiveCount = arguments->primitiveCount; |
43 | RTCCreateNodeFunction createNode = arguments->createNode; |
44 | RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; |
45 | RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; |
46 | RTCCreateLeafFunction createLeaf = arguments->createLeaf; |
47 | RTCProgressMonitorFunction buildProgress = arguments->buildProgress; |
48 | void* userPtr = arguments->userPtr; |
49 | |
50 | std::atomic<size_t> progress(0); |
51 | |
52 | /* initialize temporary arrays for morton builder */ |
53 | PrimRef* prims = (PrimRef*) prims_i; |
54 | mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src; |
55 | mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp; |
56 | morton_src.resize(primitiveCount); |
57 | morton_tmp.resize(primitiveCount); |
58 | |
59 | /* compute centroid bounds */ |
60 | const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa { |
61 | |
62 | BBox3fa bounds(empty); |
63 | for (size_t i=r.begin(); i<r.end(); i++) |
64 | bounds.extend(prims[i].bounds().center2()); |
65 | return bounds; |
66 | }, BBox3fa::merge); |
67 | |
68 | /* compute morton codes */ |
69 | BVHBuilderMorton::MortonCodeMapping mapping(centBounds); |
70 | parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) { |
71 | BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]); |
72 | for (size_t i=r.begin(); i<r.end(); i++) { |
73 | generator(prims[i].bounds(),(unsigned) i); |
74 | } |
75 | }); |
76 | |
77 | /* start morton build */ |
78 | std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>( |
79 | |
80 | /* thread local allocator for fast allocations */ |
81 | [&] () -> FastAllocator::CachedAllocator { |
82 | return bvh->allocator.getCachedAllocator(); |
83 | }, |
84 | |
85 | /* lambda function that allocates BVH nodes */ |
86 | [&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* { |
87 | return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); |
88 | }, |
89 | |
90 | /* lambda function that sets bounds */ |
91 | [&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa> |
92 | { |
93 | BBox3fa bounds = empty; |
94 | void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR]; |
95 | const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR]; |
96 | for (size_t i=0; i<N; i++) { |
97 | bounds.extend(children[i].second); |
98 | childptrs[i] = children[i].first; |
99 | cbounds[i] = (const RTCBounds*)&children[i].second; |
100 | } |
101 | setNodeBounds(node,cbounds,(unsigned int)N,userPtr); |
102 | setNodeChildren(node,childptrs, (unsigned int)N,userPtr); |
103 | return std::make_pair(node,bounds); |
104 | }, |
105 | |
106 | /* lambda function that creates BVH leaves */ |
107 | [&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa> |
108 | { |
109 | RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF]; |
110 | BBox3fa bounds = empty; |
111 | for (size_t i=0;i<current.size();i++) |
112 | { |
113 | const size_t id = morton_src[current.begin()+i].index; |
114 | bounds.extend(prims[id].bounds()); |
115 | localBuildPrims[i] = prims_i[id]; |
116 | } |
117 | void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr); |
118 | return std::make_pair(node,bounds); |
119 | }, |
120 | |
121 | /* lambda that calculates the bounds for some primitive */ |
122 | [&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa { |
123 | return prims[morton.index].bounds(); |
124 | }, |
125 | |
126 | /* progress monitor function */ |
127 | [&] (size_t dn) { |
128 | if (!buildProgress) return true; |
129 | const size_t n = progress.fetch_add(dn)+dn; |
130 | const double f = std::min(1.0,double(n)/double(primitiveCount)); |
131 | return buildProgress(userPtr,f); |
132 | }, |
133 | |
134 | morton_src.data(),morton_tmp.data(),primitiveCount, |
135 | *arguments); |
136 | |
137 | bvh->allocator.cleanup(); |
138 | return root.first; |
139 | } |
140 | |
141 | void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments) |
142 | { |
143 | BVH* bvh = (BVH*) arguments->bvh; |
144 | RTCBuildPrimitive* prims = arguments->primitives; |
145 | size_t primitiveCount = arguments->primitiveCount; |
146 | RTCCreateNodeFunction createNode = arguments->createNode; |
147 | RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; |
148 | RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; |
149 | RTCCreateLeafFunction createLeaf = arguments->createLeaf; |
150 | RTCProgressMonitorFunction buildProgress = arguments->buildProgress; |
151 | void* userPtr = arguments->userPtr; |
152 | |
153 | std::atomic<size_t> progress(0); |
154 | |
155 | /* calculate priminfo */ |
156 | auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa |
157 | { |
158 | CentGeomBBox3fa bounds(empty); |
159 | for (size_t j=r.begin(); j<r.end(); j++) |
160 | bounds.extend((BBox3fa&)prims[j]); |
161 | return bounds; |
162 | }; |
163 | const CentGeomBBox3fa bounds = |
164 | parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2); |
165 | |
166 | const PrimInfo pinfo(0,primitiveCount,bounds); |
167 | |
168 | /* build BVH */ |
169 | void* root = BVHBuilderBinnedSAH::build<void*>( |
170 | |
171 | /* thread local allocator for fast allocations */ |
172 | [&] () -> FastAllocator::CachedAllocator { |
173 | return bvh->allocator.getCachedAllocator(); |
174 | }, |
175 | |
176 | /* lambda function that creates BVH nodes */ |
177 | [&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void* |
178 | { |
179 | void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); |
180 | const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR]; |
181 | for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds; |
182 | setNodeBounds(node,cbounds, (unsigned int)N,userPtr); |
183 | return node; |
184 | }, |
185 | |
186 | /* lambda function that updates BVH nodes */ |
187 | [&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* { |
188 | setNodeChildren(node,children, (unsigned int)N,userPtr); |
189 | return node; |
190 | }, |
191 | |
192 | /* lambda function that creates BVH leaves */ |
193 | [&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* { |
194 | return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr); |
195 | }, |
196 | |
197 | /* progress monitor function */ |
198 | [&] (size_t dn) { |
199 | if (!buildProgress) return true; |
200 | const size_t n = progress.fetch_add(dn)+dn; |
201 | const double f = std::min(1.0,double(n)/double(primitiveCount)); |
202 | return buildProgress(userPtr,f); |
203 | }, |
204 | |
205 | (PrimRef*)prims,pinfo,*arguments); |
206 | |
207 | bvh->allocator.cleanup(); |
208 | return root; |
209 | } |
210 | |
211 | static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) { |
212 | CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first); |
213 | unsigned int maxGeomID = max(a.second,b.second); |
214 | return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID); |
215 | } |
216 | |
217 | void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments) |
218 | { |
219 | BVH* bvh = (BVH*) arguments->bvh; |
220 | RTCBuildPrimitive* prims = arguments->primitives; |
221 | size_t primitiveCount = arguments->primitiveCount; |
222 | RTCCreateNodeFunction createNode = arguments->createNode; |
223 | RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren; |
224 | RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds; |
225 | RTCCreateLeafFunction createLeaf = arguments->createLeaf; |
226 | RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive; |
227 | RTCProgressMonitorFunction buildProgress = arguments->buildProgress; |
228 | void* userPtr = arguments->userPtr; |
229 | |
230 | std::atomic<size_t> progress(0); |
231 | |
232 | /* calculate priminfo */ |
233 | |
234 | auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int> |
235 | { |
236 | CentGeomBBox3fa bounds(empty); |
237 | unsigned maxGeomID = 0; |
238 | for (size_t j=r.begin(); j<r.end(); j++) |
239 | { |
240 | bounds.extend((BBox3fa&)prims[j]); |
241 | maxGeomID = max(maxGeomID,prims[j].geomID); |
242 | } |
243 | return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID); |
244 | }; |
245 | |
246 | |
247 | const std::pair<CentGeomBBox3fa,unsigned int> pair = |
248 | parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair); |
249 | |
250 | CentGeomBBox3fa bounds = pair.first; |
251 | const unsigned int maxGeomID = pair.second; |
252 | |
253 | if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)))) |
254 | { |
255 | /* fallback code for max geomID larger than threshold */ |
256 | return rtcBuildBVHBinnedSAH(arguments); |
257 | } |
258 | |
259 | const PrimInfo pinfo(0,primitiveCount,bounds); |
260 | |
261 | /* function that splits a build primitive */ |
262 | struct Splitter |
263 | { |
264 | Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr) |
265 | : splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {} |
266 | |
267 | __forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const |
268 | { |
269 | prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK; |
270 | splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr); |
271 | left_o.geomIDref() = geomID; left_o.primIDref() = primID; |
272 | right_o.geomIDref() = geomID; right_o.primIDref() = primID; |
273 | } |
274 | |
275 | __forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const |
276 | { |
277 | PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID); |
278 | splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr); |
279 | } |
280 | |
281 | RTCSplitPrimitiveFunction splitPrimitive; |
282 | unsigned geomID; |
283 | unsigned primID; |
284 | void* userPtr; |
285 | }; |
286 | |
287 | /* build BVH */ |
288 | void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>( |
289 | |
290 | /* thread local allocator for fast allocations */ |
291 | [&] () -> FastAllocator::CachedAllocator { |
292 | return bvh->allocator.getCachedAllocator(); |
293 | }, |
294 | |
295 | /* lambda function that creates BVH nodes */ |
296 | [&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void* |
297 | { |
298 | void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr); |
299 | const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR]; |
300 | for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds; |
301 | setNodeBounds(node,cbounds, (unsigned int)N,userPtr); |
302 | return node; |
303 | }, |
304 | |
305 | /* lambda function that updates BVH nodes */ |
306 | [&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* { |
307 | setNodeChildren(node,children, (unsigned int)N,userPtr); |
308 | return node; |
309 | }, |
310 | |
311 | /* lambda function that creates BVH leaves */ |
312 | [&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* { |
313 | return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr); |
314 | }, |
315 | |
316 | /* returns the splitter */ |
317 | [&] ( const PrimRef& prim ) -> Splitter { |
318 | return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr); |
319 | }, |
320 | |
321 | /* progress monitor function */ |
322 | [&] (size_t dn) { |
323 | if (!buildProgress) return true; |
324 | const size_t n = progress.fetch_add(dn)+dn; |
325 | const double f = std::min(1.0,double(n)/double(primitiveCount)); |
326 | return buildProgress(userPtr,f); |
327 | }, |
328 | |
329 | (PrimRef*)prims, |
330 | arguments->primitiveArrayCapacity, |
331 | pinfo,*arguments); |
332 | |
333 | bvh->allocator.cleanup(); |
334 | return root; |
335 | } |
336 | } |
337 | } |
338 | |
339 | using namespace embree; |
340 | using namespace embree::isa; |
341 | |
342 | RTC_NAMESPACE_BEGIN |
343 | |
344 | RTC_API RTCBVH rtcNewBVH(RTCDevice device) |
345 | { |
346 | RTC_CATCH_BEGIN; |
347 | RTC_TRACE(rtcNewAllocator); |
348 | RTC_VERIFY_HANDLE(device); |
349 | BVH* bvh = new BVH((Device*)device); |
350 | return (RTCBVH) bvh->refInc(); |
351 | RTC_CATCH_END((Device*)device); |
352 | return nullptr; |
353 | } |
354 | |
355 | RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments) |
356 | { |
357 | BVH* bvh = (BVH*) arguments->bvh; |
358 | RTC_CATCH_BEGIN; |
359 | RTC_TRACE(rtcBuildBVH); |
360 | RTC_VERIFY_HANDLE(bvh); |
361 | RTC_VERIFY_HANDLE(arguments); |
362 | RTC_VERIFY_HANDLE(arguments->createNode); |
363 | RTC_VERIFY_HANDLE(arguments->setNodeChildren); |
364 | RTC_VERIFY_HANDLE(arguments->setNodeBounds); |
365 | RTC_VERIFY_HANDLE(arguments->createLeaf); |
366 | |
367 | if (arguments->primitiveArrayCapacity < arguments->primitiveCount) |
368 | throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount" ) |
369 | |
370 | /* initialize the allocator */ |
371 | bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa)); |
372 | bvh->allocator.reset(); |
373 | |
374 | /* switch between different builders based on quality level */ |
375 | if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW) |
376 | return rtcBuildBVHMorton(arguments); |
377 | else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM) |
378 | return rtcBuildBVHBinnedSAH(arguments); |
379 | else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) { |
380 | if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount) |
381 | return rtcBuildBVHBinnedSAH(arguments); |
382 | else |
383 | return rtcBuildBVHSpatialSAH(arguments); |
384 | } |
385 | else |
386 | throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality" ); |
387 | |
388 | /* if we are in dynamic mode, then do not clear temporary data */ |
389 | if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC)) |
390 | { |
391 | bvh->morton_src.clear(); |
392 | bvh->morton_tmp.clear(); |
393 | } |
394 | |
395 | RTC_CATCH_END(bvh->device); |
396 | return nullptr; |
397 | } |
398 | |
399 | RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align) |
400 | { |
401 | FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator; |
402 | RTC_CATCH_BEGIN; |
403 | RTC_TRACE(rtcThreadLocalAlloc); |
404 | return alloc->malloc0(bytes,align); |
405 | RTC_CATCH_END(alloc->alloc->getDevice()); |
406 | return nullptr; |
407 | } |
408 | |
409 | RTC_API void rtcMakeStaticBVH(RTCBVH hbvh) |
410 | { |
411 | BVH* bvh = (BVH*) hbvh; |
412 | RTC_CATCH_BEGIN; |
413 | RTC_TRACE(rtcStaticBVH); |
414 | RTC_VERIFY_HANDLE(hbvh); |
415 | bvh->morton_src.clear(); |
416 | bvh->morton_tmp.clear(); |
417 | RTC_CATCH_END(bvh->device); |
418 | } |
419 | |
420 | RTC_API void rtcRetainBVH(RTCBVH hbvh) |
421 | { |
422 | BVH* bvh = (BVH*) hbvh; |
423 | Device* device = bvh ? bvh->device : nullptr; |
424 | RTC_CATCH_BEGIN; |
425 | RTC_TRACE(rtcRetainBVH); |
426 | RTC_VERIFY_HANDLE(hbvh); |
427 | bvh->refInc(); |
428 | RTC_CATCH_END(device); |
429 | } |
430 | |
431 | RTC_API void rtcReleaseBVH(RTCBVH hbvh) |
432 | { |
433 | BVH* bvh = (BVH*) hbvh; |
434 | Device* device = bvh ? bvh->device : nullptr; |
435 | RTC_CATCH_BEGIN; |
436 | RTC_TRACE(rtcReleaseBVH); |
437 | RTC_VERIFY_HANDLE(hbvh); |
438 | bvh->refDec(); |
439 | RTC_CATCH_END(device); |
440 | } |
441 | |
442 | RTC_NAMESPACE_END |
443 | |