| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../common/ray.h" |
| 7 | #include "../common/scene_subdiv_mesh.h" |
| 8 | #include "../bvh/bvh.h" |
| 9 | #include "../subdiv/tessellation.h" |
| 10 | #include "../subdiv/tessellation_cache.h" |
| 11 | #include "subdivpatch1.h" |
| 12 | |
| 13 | namespace embree |
| 14 | { |
| 15 | namespace isa |
| 16 | { |
| 17 | class GridSOA |
| 18 | { |
| 19 | public: |
| 20 | |
| 21 | /*! GridSOA constructor */ |
| 22 | GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps, |
| 23 | const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight, |
| 24 | const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr); |
| 25 | |
| 26 | /*! Subgrid creation */ |
| 27 | template<typename Allocator> |
| 28 | static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps, |
| 29 | unsigned x0, unsigned x1, unsigned y0, unsigned y1, |
| 30 | const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr) |
| 31 | { |
| 32 | const unsigned width = x1-x0+1; |
| 33 | const unsigned height = y1-y0+1; |
| 34 | const GridRange range(0,width-1,0,height-1); |
| 35 | size_t bvhBytes = 0; |
| 36 | if (time_steps == 1) |
| 37 | bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0); |
| 38 | else { |
| 39 | bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0); |
| 40 | bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D)); |
| 41 | } |
| 42 | const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float); |
| 43 | size_t rootBytes = time_steps*sizeof(BVH4::NodeRef); |
| 44 | #if !defined(__64BIT__) |
| 45 | rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding. |
| 46 | #endif |
| 47 | void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes); |
| 48 | assert(data); |
| 49 | return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o); |
| 50 | } |
| 51 | |
| 52 | /*! Grid creation */ |
| 53 | template<typename Allocator> |
| 54 | static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps, |
| 55 | const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr) |
| 56 | { |
| 57 | return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o); |
| 58 | } |
| 59 | |
| 60 | /*! returns reference to root */ |
| 61 | __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } |
| 62 | __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } |
| 63 | |
| 64 | /*! returns pointer to BVH array */ |
| 65 | __forceinline char* bvhData() { return &data[0]; } |
| 66 | __forceinline const char* bvhData() const { return &data[0]; } |
| 67 | |
| 68 | /*! returns pointer to Grid array */ |
| 69 | __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; } |
| 70 | __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; } |
| 71 | |
| 72 | __forceinline void* encodeLeaf(size_t u, size_t v) { |
| 73 | return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf |
| 74 | } |
| 75 | __forceinline float* decodeLeaf(size_t t, const void* ptr) { |
| 76 | return gridData(t) + (((size_t) (ptr) >> 4) - 1); |
| 77 | } |
| 78 | |
| 79 | /*! returns the size of the BVH over the grid in bytes */ |
| 80 | static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes); |
| 81 | |
| 82 | /*! returns the size of the temporal BVH over the time range BVHs */ |
| 83 | static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes); |
| 84 | |
| 85 | /*! calculates bounding box of grid range */ |
| 86 | __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const |
| 87 | { |
| 88 | const float* const grid_array = gridData(time); |
| 89 | const float* const grid_x_array = grid_array + 0 * dim_offset; |
| 90 | const float* const grid_y_array = grid_array + 1 * dim_offset; |
| 91 | const float* const grid_z_array = grid_array + 2 * dim_offset; |
| 92 | |
| 93 | /* compute the bounds just for the range! */ |
| 94 | BBox3fa bounds( empty ); |
| 95 | for (unsigned v = range.v_start; v<=range.v_end; v++) |
| 96 | { |
| 97 | for (unsigned u = range.u_start; u<=range.u_end; u++) |
| 98 | { |
| 99 | const float x = grid_x_array[ v * width + u]; |
| 100 | const float y = grid_y_array[ v * width + u]; |
| 101 | const float z = grid_z_array[ v * width + u]; |
| 102 | bounds.extend( Vec3fa(x,y,z) ); |
| 103 | } |
| 104 | } |
| 105 | assert(is_finite(bounds)); |
| 106 | return bounds; |
| 107 | } |
| 108 | |
| 109 | /*! Evaluates grid over patch and builds BVH4 tree over the grid. */ |
| 110 | std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o); |
| 111 | |
| 112 | /*! Create BVH4 tree over grid. */ |
| 113 | std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator); |
| 114 | |
| 115 | /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */ |
| 116 | std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o); |
| 117 | |
| 118 | /*! Create MBlur BVH4 tree over grid. */ |
| 119 | std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator); |
| 120 | |
| 121 | /*! Create MSMBlur BVH4 tree over grid. */ |
| 122 | std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o); |
| 123 | |
| 124 | template<typename Loader> |
| 125 | struct MapUV |
| 126 | { |
| 127 | typedef typename Loader::vfloat vfloat; |
| 128 | const float* const grid_uv; |
| 129 | size_t line_offset; |
| 130 | size_t lines; |
| 131 | |
| 132 | __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines) |
| 133 | : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {} |
| 134 | |
| 135 | __forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const { |
| 136 | const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines); |
| 137 | const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]); |
| 138 | const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]); |
| 139 | const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]); |
| 140 | const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0; |
| 141 | u = uv[0];v = uv[1]; |
| 142 | } |
| 143 | }; |
| 144 | |
| 145 | struct Gather2x3 |
| 146 | { |
| 147 | enum { M = 4 }; |
| 148 | typedef vbool4 vbool; |
| 149 | typedef vint4 vint; |
| 150 | typedef vfloat4 vfloat; |
| 151 | |
| 152 | static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines) |
| 153 | { |
| 154 | vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset); |
| 155 | vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid |
| 156 | if (unlikely(line_offset == 2)) |
| 157 | { |
| 158 | r0 = shuffle<0,1,1,1>(r0); |
| 159 | r1 = shuffle<0,1,1,1>(r1); |
| 160 | } |
| 161 | return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11 |
| 162 | shuffle<1,1,2,2>(r0), // r01, r01, r02, r02 |
| 163 | shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12 |
| 164 | } |
| 165 | |
| 166 | static __forceinline void gather(const float* const grid_x, |
| 167 | const float* const grid_y, |
| 168 | const float* const grid_z, |
| 169 | const size_t line_offset, |
| 170 | const size_t lines, |
| 171 | Vec3vf4& v0_o, |
| 172 | Vec3vf4& v1_o, |
| 173 | Vec3vf4& v2_o) |
| 174 | { |
| 175 | const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines); |
| 176 | const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines); |
| 177 | const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines); |
| 178 | v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); |
| 179 | v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); |
| 180 | v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); |
| 181 | } |
| 182 | }; |
| 183 | |
| 184 | #if defined (__AVX__) |
| 185 | struct Gather3x3 |
| 186 | { |
| 187 | enum { M = 8 }; |
| 188 | typedef vbool8 vbool; |
| 189 | typedef vint8 vint; |
| 190 | typedef vfloat8 vfloat; |
| 191 | |
| 192 | static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines) |
| 193 | { |
| 194 | vfloat4 ra = vfloat4::loadu(grid + 0*line_offset); |
| 195 | vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid |
| 196 | vfloat4 rc; |
| 197 | if (likely(lines > 2)) |
| 198 | rc = vfloat4::loadu(grid + 2*line_offset); |
| 199 | else |
| 200 | rc = rb; |
| 201 | |
| 202 | if (unlikely(line_offset == 2)) |
| 203 | { |
| 204 | ra = shuffle<0,1,1,1>(ra); |
| 205 | rb = shuffle<0,1,1,1>(rb); |
| 206 | rc = shuffle<0,1,1,1>(rc); |
| 207 | } |
| 208 | |
| 209 | const vfloat8 r0 = vfloat8(ra,rb); |
| 210 | const vfloat8 r1 = vfloat8(rb,rc); |
| 211 | return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21 |
| 212 | shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12 |
| 213 | shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22 |
| 214 | } |
| 215 | |
| 216 | static __forceinline void gather(const float* const grid_x, |
| 217 | const float* const grid_y, |
| 218 | const float* const grid_z, |
| 219 | const size_t line_offset, |
| 220 | const size_t lines, |
| 221 | Vec3vf8& v0_o, |
| 222 | Vec3vf8& v1_o, |
| 223 | Vec3vf8& v2_o) |
| 224 | { |
| 225 | const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines); |
| 226 | const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines); |
| 227 | const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines); |
| 228 | v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]); |
| 229 | v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]); |
| 230 | v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]); |
| 231 | } |
| 232 | }; |
| 233 | #endif |
| 234 | |
| 235 | template<typename vfloat> |
| 236 | static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv) |
| 237 | { |
| 238 | typedef typename vfloat::Int vint; |
| 239 | const vint iu = asInt(uv) & 0xffff; |
| 240 | const vint iv = srl(asInt(uv),16); |
| 241 | const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000); |
| 242 | const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000); |
| 243 | return Vec2<vfloat>(u,v); |
| 244 | } |
| 245 | |
| 246 | __forceinline unsigned int geomID() const { |
| 247 | return _geomID; |
| 248 | } |
| 249 | |
| 250 | __forceinline unsigned int primID() const { |
| 251 | return _primID; |
| 252 | } |
| 253 | |
| 254 | public: |
| 255 | BVH4::NodeRef troot; |
| 256 | #if !defined(__64BIT__) |
| 257 | unsigned align1; |
| 258 | #endif |
| 259 | unsigned time_steps; |
| 260 | unsigned width; |
| 261 | |
| 262 | unsigned height; |
| 263 | unsigned dim_offset; |
| 264 | unsigned _geomID; |
| 265 | unsigned _primID; |
| 266 | |
| 267 | unsigned align2; |
| 268 | unsigned gridOffset; |
| 269 | unsigned gridBytes; |
| 270 | unsigned rootOffset; |
| 271 | |
| 272 | char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots |
| 273 | }; |
| 274 | } |
| 275 | } |
| 276 | |