1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../builders/primrefgen.h"
7#include "../builders/heuristic_spatial.h"
8#include "../builders/splitter.h"
9
10#include "../../common/algorithms/parallel_for_for.h"
11#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
12
13#define DBG_PRESPLIT(x)
14#define CHECK_PRESPLIT(x)
15
16#define GRID_SIZE 1024
17#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
18#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
19#define PRIORITY_CUTOFF_THRESHOLD 1.0f
20#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
21
22namespace embree
23{
24 namespace isa
25 {
26
27 struct PresplitItem
28 {
29 union {
30 float priority;
31 unsigned int data;
32 };
33 unsigned int index;
34
35 __forceinline operator unsigned() const
36 {
37 return reinterpret_cast<const unsigned&>(priority);
38 }
39 __forceinline bool operator < (const PresplitItem& item) const
40 {
41 return (priority < item.priority);
42 }
43
44 template<typename Mesh>
45 __forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
46 {
47 const unsigned int geomID = ref.geomID();
48 const unsigned int primID = ref.primID();
49 const float area_aabb = area(ref.bounds());
50 const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
51 const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
52 assert(area_prim <= area_aabb);
53 //const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
54 const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
55 assert(priority >= 0.0f && priority < FLT_LARGE);
56 return priority;
57 }
58
59
60 };
61
62 inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
63 return cout << "index " << item.index << " priority " << item.priority;
64 };
65
66 template<typename SplitterFactory>
67 void splitPrimitive(SplitterFactory &Splitter,
68 const PrimRef &prim,
69 const unsigned int geomID,
70 const unsigned int primID,
71 const unsigned int split_level,
72 const Vec3fa &grid_base,
73 const float grid_scale,
74 const float grid_extend,
75 PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
76 unsigned int& numSubPrims)
77 {
78 assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
79 if (split_level == 0)
80 {
81 assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
82 subPrims[numSubPrims++] = prim;
83 }
84 else
85 {
86 const Vec3fa lower = prim.lower;
87 const Vec3fa upper = prim.upper;
88 const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
89 const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
90 Vec3ia ilower(floor(glower));
91 Vec3ia iupper(floor(gupper));
92
93 /* this ignores dimensions that are empty */
94 iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
95
96 /* compute a morton code for the lower and upper grid coordinates. */
97 const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
98 const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
99
100 /* if all bits are equal then we cannot split */
101 if(unlikely(lower_code == upper_code))
102 {
103 assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
104 subPrims[numSubPrims++] = prim;
105 return;
106 }
107
108 /* compute octree level and dimension to perform the split in */
109 const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
110 const unsigned int level = diff / 3;
111 const unsigned int dim = diff % 3;
112
113 /* now we compute the grid position of the split */
114 const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
115
116 /* compute world space position of split */
117 const float inv_grid_size = 1.0f / GRID_SIZE;
118 const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
119
120 assert(prim.lower[dim] <= fsplit &&
121 prim.upper[dim] >= fsplit);
122
123 /* split primitive */
124 const auto splitter = Splitter(prim);
125 BBox3fa left,right;
126 splitter(prim.bounds(),dim,fsplit,left,right);
127 assert(!left.empty());
128 assert(!right.empty());
129
130
131 splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
132 splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
133 }
134 }
135
136
137 template<typename Mesh, typename SplitterFactory>
138 PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
139 {
140 ParallelPrefixSumState<PrimInfo> pstate;
141
142 /* first try */
143 progressMonitor(0);
144 PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
145 return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
146 }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
147
148 /* if we need to filter out geometry, run again */
149 if (pinfo.size() != numPrimRefs)
150 {
151 progressMonitor(0);
152 pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
153 return geometry->createPrimRefArray(prims,r,base.size(),geomID);
154 }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
155 }
156 return pinfo;
157 }
158
159 __forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
160 {
161 const Vec3fa lower = ref.lower;
162 const Vec3fa upper = ref.upper;
163 const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
164 const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
165 Vec3ia ilower(floor(glower));
166 Vec3ia iupper(floor(gupper));
167
168 /* this ignores dimensions that are empty */
169 iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
170
171 /* compute a morton code for the lower and upper grid coordinates. */
172 const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
173 const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
174 return Vec2i(lower_code,upper_code);
175 }
176
177 template<typename Mesh, typename SplitterFactory>
178 PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
179 {
180 static const size_t MIN_STEP_SIZE = 128;
181
182 ParallelForForPrefixSumState<PrimInfo> pstate;
183 Scene::Iterator2 iter(scene,types,mblur);
184
185 /* first try */
186 progressMonitor(0);
187 pstate.init(iter,size_t(1024));
188 PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
189 return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
190 }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
191
192 /* if we need to filter out geometry, run again */
193 if (pinfo.size() != numPrimRefs)
194 {
195 progressMonitor(0);
196 pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
197 return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
198 }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
199 }
200
201 /* use correct number of primitives */
202 size_t numPrimitives = pinfo.size();
203 const size_t alloc_numPrimitives = prims.size();
204 const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
205
206 /* set up primitive splitter */
207 SplitterFactory Splitter(scene);
208
209
210 DBG_PRESPLIT(
211 const size_t org_numPrimitives = pinfo.size();
212 PRINT(numPrimitives);
213 PRINT(alloc_numPrimitives);
214 PRINT(numSplitPrimitivesBudget);
215 );
216
217 /* allocate double buffer presplit items */
218 const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
219 PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
220 PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
221
222 /* compute grid */
223 const Vec3fa grid_base = pinfo.geomBounds.lower;
224 const Vec3fa grid_diag = pinfo.geomBounds.size();
225 const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
226 const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
227
228 /* init presplit items and get total sum */
229 const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
230 float sum = 0.0f;
231 for (size_t i=r.begin(); i<r.end(); i++)
232 {
233 presplitItem[i].index = (unsigned int)i;
234 const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
235 /* if all bits are equal then we cannot split */
236 presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
237 /* FIXME: sum undeterministic */
238 sum += presplitItem[i].priority;
239 }
240 return sum;
241 },[](const float& a, const float& b) -> float { return a+b; });
242
243 /* compute number of splits per primitive */
244 const float inv_psum = 1.0f / psum;
245 parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
246 for (size_t i=r.begin(); i<r.end(); i++)
247 {
248 if (presplitItem[i].priority > 0.0f)
249 {
250 const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
251 if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
252 {
253 presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
254 //presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
255 assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
256 }
257 else
258 presplitItem[i].priority = 0.0f;
259 }
260 }
261 });
262
263 auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
264 size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
265
266 /* anything to split ? */
267 if (center < numPrimitives)
268 {
269 size_t numPrimitivesToSplit = numPrimitives - center;
270 assert(presplitItem[center].priority >= 1.0f);
271
272 /* sort presplit items in ascending order */
273 radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
274
275 CHECK_PRESPLIT(
276 parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
277 for (size_t i=r.begin(); i<r.end(); i++)
278 assert(presplitItem[i-1].priority <= presplitItem[i].priority);
279 });
280 );
281
282 unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem;
283 unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
284
285 /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
286 const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
287 size_t sum = 0;
288 for (size_t i=t.begin(); i<t.end(); i++)
289 {
290 PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
291 assert(presplitItem[i].priority >= 1.0f);
292 const unsigned int primrefID = presplitItem[i].index;
293 const float prio = presplitItem[i].priority;
294 const unsigned int geomID = prims[primrefID].geomID();
295 const unsigned int primID = prims[primrefID].primID();
296 const unsigned int split_levels = (unsigned int)prio;
297 unsigned int numSubPrims = 0;
298 splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
299 assert(numSubPrims);
300 numSubPrims--; // can reuse slot
301 sum+=numSubPrims;
302 presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
303 primOffset0[i-center] = numSubPrims;
304 }
305 return sum;
306 },[](const size_t& a, const size_t& b) -> size_t { return a+b; });
307
308 /* if we are over budget, need to shrink the range */
309 if (totalNumSubPrims > numSplitPrimitivesBudget)
310 {
311 size_t new_center = numPrimitives-1;
312 size_t sum = 0;
313 for (;new_center>=center;new_center--)
314 {
315 const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
316 if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
317 sum += numSubPrims;
318 }
319 new_center++;
320
321 primOffset0 += new_center - center;
322 numPrimitivesToSplit -= new_center - center;
323 center = new_center;
324 assert(numPrimitivesToSplit == (numPrimitives - center));
325 }
326
327 /* parallel prefix sum to compute offsets for storing sub-primitives */
328 const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
329 assert(numPrimitives+offset <= alloc_numPrimitives);
330
331 /* iterate over range, and split primitives into sub primitives and append them to prims array */
332 parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
333 for (size_t j=rn.begin(); j<rn.end(); j++)
334 {
335 PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
336 const unsigned int primrefID = presplitItem[j].index;
337 const unsigned int geomID = prims[primrefID].geomID();
338 const unsigned int primID = prims[primrefID].primID();
339 const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
340
341 assert(split_levels);
342 assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
343 unsigned int numSubPrims = 0;
344 splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
345 const size_t newID = numPrimitives + primOffset1[j-center];
346 assert(newID+numSubPrims-1 <= alloc_numPrimitives);
347 prims[primrefID] = subPrims[0];
348 for (size_t i=1;i<numSubPrims;i++)
349 prims[newID+i-1] = subPrims[i];
350 }
351 });
352
353 numPrimitives += offset;
354 DBG_PRESPLIT(
355 PRINT(pinfo.size());
356 PRINT(numPrimitives);
357 PRINT((float)numPrimitives/org_numPrimitives));
358 }
359
360 /* recompute centroid bounding boxes */
361 pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
362 PrimInfo p(empty);
363 for (size_t j=r.begin(); j<r.end(); j++)
364 p.add_center2(prims[j]);
365 return p;
366 }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
367
368 assert(pinfo.size() == numPrimitives);
369
370 /* free double buffer presplit items */
371 alignedFree(tmp_presplitItem);
372 alignedFree(presplitItem);
373 return pinfo;
374 }
375 }
376}
377