| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../common/default.h" |
| 7 | #include "../common/primref.h" |
| 8 | #include "../common/primref_mb.h" |
| 9 | |
| 10 | namespace embree |
| 11 | { |
| 12 | // FIXME: maybe there's a better place for this util fct |
| 13 | __forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2) |
| 14 | { |
| 15 | const Vec3fa e0 = v1-v0; |
| 16 | const Vec3fa e1 = v2-v0; |
| 17 | const Vec3fa d = cross(e0,e1); |
| 18 | return fabs(d.x) + fabs(d.y) + fabs(d.z); |
| 19 | } |
| 20 | |
| 21 | //namespace isa |
| 22 | //{ |
| 23 | template<typename BBox> |
| 24 | class CentGeom |
| 25 | { |
| 26 | public: |
| 27 | __forceinline CentGeom () {} |
| 28 | |
| 29 | __forceinline CentGeom (EmptyTy) |
| 30 | : geomBounds(empty), centBounds(empty) {} |
| 31 | |
| 32 | __forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds) |
| 33 | : geomBounds(geomBounds), centBounds(centBounds) {} |
| 34 | |
| 35 | template<typename PrimRef> |
| 36 | __forceinline void extend_primref(const PrimRef& prim) |
| 37 | { |
| 38 | BBox bounds; Vec3fa center; |
| 39 | prim.binBoundsAndCenter(bounds,center); |
| 40 | geomBounds.extend(bounds); |
| 41 | centBounds.extend(center); |
| 42 | } |
| 43 | |
| 44 | template<typename PrimRef> |
| 45 | __forceinline void extend_center2(const PrimRef& prim) |
| 46 | { |
| 47 | BBox3fa bounds = prim.bounds(); |
| 48 | geomBounds.extend(bounds); |
| 49 | centBounds.extend(bounds.center2()); |
| 50 | } |
| 51 | |
| 52 | __forceinline void extend(const BBox& geomBounds_) { |
| 53 | geomBounds.extend(geomBounds_); |
| 54 | centBounds.extend(center2(geomBounds_)); |
| 55 | } |
| 56 | |
| 57 | __forceinline void merge(const CentGeom& other) |
| 58 | { |
| 59 | geomBounds.extend(other.geomBounds); |
| 60 | centBounds.extend(other.centBounds); |
| 61 | } |
| 62 | |
| 63 | static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) { |
| 64 | CentGeom r = a; r.merge(b); return r; |
| 65 | } |
| 66 | |
| 67 | public: |
| 68 | BBox geomBounds; //!< geometry bounds of primitives |
| 69 | BBox3fa centBounds; //!< centroid bounds of primitives |
| 70 | }; |
| 71 | |
| 72 | typedef CentGeom<BBox3fa> CentGeomBBox3fa; |
| 73 | |
| 74 | /*! stores bounding information for a set of primitives */ |
| 75 | template<typename BBox> |
| 76 | class PrimInfoT : public CentGeom<BBox> |
| 77 | { |
| 78 | public: |
| 79 | using CentGeom<BBox>::geomBounds; |
| 80 | using CentGeom<BBox>::centBounds; |
| 81 | |
| 82 | __forceinline PrimInfoT () {} |
| 83 | |
| 84 | __forceinline PrimInfoT (EmptyTy) |
| 85 | : CentGeom<BBox>(empty), begin(0), end(0) {} |
| 86 | |
| 87 | __forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds) |
| 88 | : CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {} |
| 89 | |
| 90 | template<typename PrimRef> |
| 91 | __forceinline void add_primref(const PrimRef& prim) |
| 92 | { |
| 93 | CentGeom<BBox>::extend_primref(prim); |
| 94 | end++; |
| 95 | } |
| 96 | |
| 97 | template<typename PrimRef> |
| 98 | __forceinline void add_center2(const PrimRef& prim) { |
| 99 | CentGeom<BBox>::extend_center2(prim); |
| 100 | end++; |
| 101 | } |
| 102 | |
| 103 | template<typename PrimRef> |
| 104 | __forceinline void add_center2(const PrimRef& prim, const size_t i) { |
| 105 | CentGeom<BBox>::extend_center2(prim); |
| 106 | end+=i; |
| 107 | } |
| 108 | |
| 109 | /*__forceinline void add(const BBox& geomBounds_) { |
| 110 | CentGeom<BBox>::extend(geomBounds_); |
| 111 | end++; |
| 112 | } |
| 113 | |
| 114 | __forceinline void add(const BBox& geomBounds_, const size_t i) { |
| 115 | CentGeom<BBox>::extend(geomBounds_); |
| 116 | end+=i; |
| 117 | }*/ |
| 118 | |
| 119 | __forceinline void merge(const PrimInfoT& other) |
| 120 | { |
| 121 | CentGeom<BBox>::merge(other); |
| 122 | begin += other.begin; |
| 123 | end += other.end; |
| 124 | } |
| 125 | |
| 126 | static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) { |
| 127 | PrimInfoT r = a; r.merge(b); return r; |
| 128 | } |
| 129 | |
| 130 | /*! returns the number of primitives */ |
| 131 | __forceinline size_t size() const { |
| 132 | return end-begin; |
| 133 | } |
| 134 | |
| 135 | __forceinline float halfArea() { |
| 136 | return expectedApproxHalfArea(geomBounds); |
| 137 | } |
| 138 | |
| 139 | __forceinline float leafSAH() const { |
| 140 | return expectedApproxHalfArea(geomBounds)*float(size()); |
| 141 | //return halfArea(geomBounds)*blocks(num); |
| 142 | } |
| 143 | |
| 144 | __forceinline float leafSAH(size_t block_shift) const { |
| 145 | return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift); |
| 146 | //return halfArea(geomBounds)*float((num+3) >> 2); |
| 147 | //return halfArea(geomBounds)*blocks(num); |
| 148 | } |
| 149 | |
| 150 | /*! stream output */ |
| 151 | friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) { |
| 152 | return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}" ; |
| 153 | } |
| 154 | |
| 155 | public: |
| 156 | size_t begin,end; //!< number of primitives |
| 157 | }; |
| 158 | |
| 159 | typedef PrimInfoT<BBox3fa> PrimInfo; |
| 160 | //typedef PrimInfoT<LBBox3fa> PrimInfoMB; |
| 161 | |
| 162 | /*! stores bounding information for a set of primitives */ |
| 163 | template<typename BBox> |
| 164 | class PrimInfoMBT : public CentGeom<BBox> |
| 165 | { |
| 166 | public: |
| 167 | using CentGeom<BBox>::geomBounds; |
| 168 | using CentGeom<BBox>::centBounds; |
| 169 | |
| 170 | __forceinline PrimInfoMBT () { |
| 171 | } |
| 172 | |
| 173 | __forceinline PrimInfoMBT (EmptyTy) |
| 174 | : CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} |
| 175 | |
| 176 | __forceinline PrimInfoMBT (size_t begin, size_t end) |
| 177 | : CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {} |
| 178 | |
| 179 | template<typename PrimRef> |
| 180 | __forceinline void add_primref(const PrimRef& prim) |
| 181 | { |
| 182 | CentGeom<BBox>::extend_primref(prim); |
| 183 | time_range.extend(prim.time_range); |
| 184 | object_range._end++; |
| 185 | num_time_segments += prim.size(); |
| 186 | if (max_num_time_segments < prim.totalTimeSegments()) { |
| 187 | max_num_time_segments = prim.totalTimeSegments(); |
| 188 | max_time_range = prim.time_range; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | __forceinline void merge(const PrimInfoMBT& other) |
| 193 | { |
| 194 | CentGeom<BBox>::merge(other); |
| 195 | time_range.extend(other.time_range); |
| 196 | object_range._begin += other.object_range.begin(); |
| 197 | object_range._end += other.object_range.end(); |
| 198 | num_time_segments += other.num_time_segments; |
| 199 | if (max_num_time_segments < other.max_num_time_segments) { |
| 200 | max_num_time_segments = other.max_num_time_segments; |
| 201 | max_time_range = other.max_time_range; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) { |
| 206 | PrimInfoMBT r = a; r.merge(b); return r; |
| 207 | } |
| 208 | |
| 209 | __forceinline size_t begin() const { |
| 210 | return object_range.begin(); |
| 211 | } |
| 212 | |
| 213 | __forceinline size_t end() const { |
| 214 | return object_range.end(); |
| 215 | } |
| 216 | |
| 217 | /*! returns the number of primitives */ |
| 218 | __forceinline size_t size() const { |
| 219 | return object_range.size(); |
| 220 | } |
| 221 | |
| 222 | __forceinline float halfArea() const { |
| 223 | return time_range.size()*expectedApproxHalfArea(geomBounds); |
| 224 | } |
| 225 | |
| 226 | __forceinline float leafSAH() const { |
| 227 | return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments); |
| 228 | } |
| 229 | |
| 230 | __forceinline float leafSAH(size_t block_shift) const { |
| 231 | return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift); |
| 232 | } |
| 233 | |
| 234 | __forceinline float align_time(float ct) const |
| 235 | { |
| 236 | //return roundf(ct * float(numTimeSegments)) / float(numTimeSegments); |
| 237 | float t0 = (ct-max_time_range.lower)/max_time_range.size(); |
| 238 | float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments); |
| 239 | return t1*max_time_range.size()+max_time_range.lower; |
| 240 | } |
| 241 | |
| 242 | /*! stream output */ |
| 243 | friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo) |
| 244 | { |
| 245 | return cout << "PrimInfo { " << |
| 246 | "object_range = " << pinfo.object_range << |
| 247 | ", time_range = " << pinfo.time_range << |
| 248 | ", time_segments = " << pinfo.num_time_segments << |
| 249 | ", geomBounds = " << pinfo.geomBounds << |
| 250 | ", centBounds = " << pinfo.centBounds << |
| 251 | "}" ; |
| 252 | } |
| 253 | |
| 254 | public: |
| 255 | range<size_t> object_range; //!< primitive range |
| 256 | size_t num_time_segments; //!< total number of time segments of all added primrefs |
| 257 | size_t max_num_time_segments; //!< maximum number of time segments of a primitive |
| 258 | BBox1f max_time_range; //!< time range of primitive with max_num_time_segments |
| 259 | BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB |
| 260 | }; |
| 261 | |
| 262 | typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB; |
| 263 | |
| 264 | struct SetMB : public PrimInfoMB |
| 265 | { |
| 266 | static const size_t PARALLEL_THRESHOLD = 3 * 1024; |
| 267 | static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; |
| 268 | static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; |
| 269 | |
| 270 | typedef mvector<PrimRefMB>* PrimRefVector; |
| 271 | |
| 272 | __forceinline SetMB() {} |
| 273 | |
| 274 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims) |
| 275 | : PrimInfoMB(pinfo_i), prims(prims) {} |
| 276 | |
| 277 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in) |
| 278 | : PrimInfoMB(pinfo_i), prims(prims) |
| 279 | { |
| 280 | object_range = object_range_in; |
| 281 | time_range = intersect(time_range,time_range_in); |
| 282 | } |
| 283 | |
| 284 | __forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in) |
| 285 | : PrimInfoMB(pinfo_i), prims(prims) |
| 286 | { |
| 287 | time_range = intersect(time_range,time_range_in); |
| 288 | } |
| 289 | |
| 290 | void deterministic_order() const |
| 291 | { |
| 292 | /* required as parallel partition destroys original primitive order */ |
| 293 | PrimRefMB* prim = prims->data(); |
| 294 | std::sort(&prim[object_range.begin()],&prim[object_range.end()]); |
| 295 | } |
| 296 | |
| 297 | template<typename RecalculatePrimRef> |
| 298 | __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const |
| 299 | { |
| 300 | auto reduce = [&](const range<size_t>& r) -> LBBox3fa |
| 301 | { |
| 302 | LBBox3fa cbounds(empty); |
| 303 | for (size_t j = r.begin(); j < r.end(); j++) |
| 304 | { |
| 305 | PrimRefMB& ref = (*prims)[j]; |
| 306 | const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range); |
| 307 | cbounds.extend(bn); |
| 308 | }; |
| 309 | return cbounds; |
| 310 | }; |
| 311 | |
| 312 | return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), |
| 313 | reduce, |
| 314 | [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); |
| 315 | } |
| 316 | |
| 317 | template<typename RecalculatePrimRef> |
| 318 | __forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const |
| 319 | { |
| 320 | auto reduce = [&](const range<size_t>& r) -> LBBox3fa |
| 321 | { |
| 322 | LBBox3fa cbounds(empty); |
| 323 | for (size_t j = r.begin(); j < r.end(); j++) |
| 324 | { |
| 325 | PrimRefMB& ref = (*prims)[j]; |
| 326 | const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space); |
| 327 | cbounds.extend(bn); |
| 328 | }; |
| 329 | return cbounds; |
| 330 | }; |
| 331 | |
| 332 | return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty), |
| 333 | reduce, |
| 334 | [&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); }); |
| 335 | } |
| 336 | |
| 337 | template<typename RecalculatePrimRef> |
| 338 | const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const |
| 339 | { |
| 340 | auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB |
| 341 | { |
| 342 | PrimInfoMB pinfo(empty); |
| 343 | for (size_t j=r.begin(); j<r.end(); j++) |
| 344 | { |
| 345 | PrimRefMB& ref = (*prims)[j]; |
| 346 | PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space); |
| 347 | pinfo.add_primref(ref1); |
| 348 | }; |
| 349 | return pinfo; |
| 350 | }; |
| 351 | |
| 352 | const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, |
| 353 | PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2); |
| 354 | |
| 355 | return SetMB(pinfo,prims,object_range,time_range); |
| 356 | } |
| 357 | |
| 358 | public: |
| 359 | PrimRefVector prims; |
| 360 | }; |
| 361 | //} |
| 362 | } |
| 363 | |