| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "geometry.h" |
| 7 | #include "buffer.h" |
| 8 | |
| 9 | namespace embree |
| 10 | { |
| 11 | /*! Grid Mesh */ |
| 12 | struct GridMesh : public Geometry |
| 13 | { |
| 14 | /*! type of this geometry */ |
| 15 | static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH; |
| 16 | |
| 17 | /*! grid */ |
| 18 | struct Grid |
| 19 | { |
| 20 | unsigned int startVtxID; |
| 21 | unsigned int lineVtxOffset; |
| 22 | unsigned short resX,resY; |
| 23 | |
| 24 | /* border flags due to 3x3 vertex pattern */ |
| 25 | __forceinline unsigned int get3x3FlagsX(const unsigned int x) const |
| 26 | { |
| 27 | return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0; |
| 28 | } |
| 29 | |
| 30 | /* border flags due to 3x3 vertex pattern */ |
| 31 | __forceinline unsigned int get3x3FlagsY(const unsigned int y) const |
| 32 | { |
| 33 | return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0; |
| 34 | } |
| 35 | |
| 36 | /*! outputs grid structure */ |
| 37 | __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) { |
| 38 | return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }" ; |
| 39 | } |
| 40 | }; |
| 41 | |
| 42 | public: |
| 43 | |
| 44 | /*! grid mesh construction */ |
| 45 | GridMesh (Device* device); |
| 46 | |
| 47 | /* geometry interface */ |
| 48 | public: |
| 49 | void setMask(unsigned mask); |
| 50 | void setNumTimeSteps (unsigned int numTimeSteps); |
| 51 | void setVertexAttributeCount (unsigned int N); |
| 52 | void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); |
| 53 | void* getBuffer(RTCBufferType type, unsigned int slot); |
| 54 | void updateBuffer(RTCBufferType type, unsigned int slot); |
| 55 | void commit(); |
| 56 | bool verify(); |
| 57 | void interpolate(const RTCInterpolateArguments* const args); |
| 58 | |
| 59 | template<int N> |
| 60 | void interpolate_impl(const RTCInterpolateArguments* const args) |
| 61 | { |
| 62 | unsigned int primID = args->primID; |
| 63 | float U = args->u; |
| 64 | float V = args->v; |
| 65 | |
| 66 | /* clamp input u,v to [0;1] range */ |
| 67 | U = max(min(U,1.0f),0.0f); |
| 68 | V = max(min(V,1.0f),0.0f); |
| 69 | |
| 70 | RTCBufferType bufferType = args->bufferType; |
| 71 | unsigned int bufferSlot = args->bufferSlot; |
| 72 | float* P = args->P; |
| 73 | float* dPdu = args->dPdu; |
| 74 | float* dPdv = args->dPdv; |
| 75 | float* ddPdudu = args->ddPdudu; |
| 76 | float* ddPdvdv = args->ddPdvdv; |
| 77 | float* ddPdudv = args->ddPdudv; |
| 78 | unsigned int valueCount = args->valueCount; |
| 79 | |
| 80 | /* calculate base pointer and stride */ |
| 81 | assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || |
| 82 | (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); |
| 83 | const char* src = nullptr; |
| 84 | size_t stride = 0; |
| 85 | if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { |
| 86 | src = vertexAttribs[bufferSlot].getPtr(); |
| 87 | stride = vertexAttribs[bufferSlot].getStride(); |
| 88 | } else { |
| 89 | src = vertices[bufferSlot].getPtr(); |
| 90 | stride = vertices[bufferSlot].getStride(); |
| 91 | } |
| 92 | |
| 93 | const Grid& grid = grids[primID]; |
| 94 | const int grid_width = grid.resX-1; |
| 95 | const int grid_height = grid.resY-1; |
| 96 | const float rcp_grid_width = rcp(float(grid_width)); |
| 97 | const float rcp_grid_height = rcp(float(grid_height)); |
| 98 | const int iu = min((int)floor(U*grid_width ),grid_width); |
| 99 | const int iv = min((int)floor(V*grid_height),grid_height); |
| 100 | const float u = U*grid_width-float(iu); |
| 101 | const float v = V*grid_height-float(iv); |
| 102 | |
| 103 | for (unsigned int i=0; i<valueCount; i+=N) |
| 104 | { |
| 105 | const size_t ofs = i*sizeof(float); |
| 106 | const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu; |
| 107 | const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu; |
| 108 | |
| 109 | const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); |
| 110 | const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]); |
| 111 | const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]); |
| 112 | const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]); |
| 113 | const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]); |
| 114 | const vbool<N> left = u+v <= 1.0f; |
| 115 | const vfloat<N> Q0 = select(left,p0,p2); |
| 116 | const vfloat<N> Q1 = select(left,p1,p3); |
| 117 | const vfloat<N> Q2 = select(left,p3,p1); |
| 118 | const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u); |
| 119 | const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v); |
| 120 | const vfloat<N> W = 1.0f-U-V; |
| 121 | |
| 122 | if (P) { |
| 123 | mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); |
| 124 | } |
| 125 | if (dPdu) { |
| 126 | assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width); |
| 127 | assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height); |
| 128 | } |
| 129 | if (ddPdudu) { |
| 130 | assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); |
| 131 | assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero)); |
| 132 | assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero)); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | void addElementsToCount (GeometryCounts & counts) const; |
| 138 | |
| 139 | __forceinline unsigned int getNumSubGrids(const size_t gridID) |
| 140 | { |
| 141 | const Grid &g = grid(gridID); |
| 142 | return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1)); |
| 143 | } |
| 144 | |
| 145 | /*! get fast access to first vertex buffer */ |
| 146 | __forceinline float * getCompactVertexArray () const { |
| 147 | return (float*) vertices0.getPtr(); |
| 148 | } |
| 149 | |
| 150 | public: |
| 151 | |
| 152 | /*! returns number of vertices */ |
| 153 | __forceinline size_t numVertices() const { |
| 154 | return vertices[0].size(); |
| 155 | } |
| 156 | |
| 157 | /*! returns i'th grid*/ |
| 158 | __forceinline const Grid& grid(size_t i) const { |
| 159 | return grids[i]; |
| 160 | } |
| 161 | |
| 162 | /*! returns i'th vertex of the first time step */ |
| 163 | __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load |
| 164 | return vertices0[i]; |
| 165 | } |
| 166 | |
| 167 | /*! returns i'th vertex of the first time step */ |
| 168 | __forceinline const char* vertexPtr(size_t i) const { |
| 169 | return vertices0.getPtr(i); |
| 170 | } |
| 171 | |
| 172 | /*! returns i'th vertex of itime'th timestep */ |
| 173 | __forceinline const Vec3fa vertex(size_t i, size_t itime) const { |
| 174 | return vertices[itime][i]; |
| 175 | } |
| 176 | |
| 177 | /*! returns i'th vertex of itime'th timestep */ |
| 178 | __forceinline const char* vertexPtr(size_t i, size_t itime) const { |
| 179 | return vertices[itime].getPtr(i); |
| 180 | } |
| 181 | |
| 182 | /*! returns i'th vertex of the first timestep */ |
| 183 | __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const { |
| 184 | assert(x < (size_t)g.resX); |
| 185 | assert(y < (size_t)g.resY); |
| 186 | return g.startVtxID + x + y * g.lineVtxOffset; |
| 187 | } |
| 188 | |
| 189 | /*! returns i'th vertex of the first timestep */ |
| 190 | __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const { |
| 191 | const size_t index = grid_vertex_index(g,x,y); |
| 192 | return vertex(index); |
| 193 | } |
| 194 | |
| 195 | /*! returns i'th vertex of the itime'th timestep */ |
| 196 | __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const { |
| 197 | const size_t index = grid_vertex_index(g,x,y); |
| 198 | return vertex(index,itime); |
| 199 | } |
| 200 | |
| 201 | /*! calculates the build bounds of the i'th primitive, if it's valid */ |
| 202 | __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const |
| 203 | { |
| 204 | BBox3fa b(empty); |
| 205 | for (size_t t=0; t<numTimeSteps; t++) |
| 206 | { |
| 207 | for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) |
| 208 | for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) |
| 209 | { |
| 210 | const Vec3fa v = grid_vertex(g,x,y,t); |
| 211 | if (unlikely(!isvalid(v))) return false; |
| 212 | b.extend(v); |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | bbox = b; |
| 217 | return true; |
| 218 | } |
| 219 | |
| 220 | /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ |
| 221 | __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const |
| 222 | { |
| 223 | assert(itime < numTimeSteps); |
| 224 | BBox3fa b0(empty); |
| 225 | for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) |
| 226 | for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) |
| 227 | { |
| 228 | const Vec3fa v = grid_vertex(g,x,y,itime); |
| 229 | if (unlikely(!isvalid(v))) return false; |
| 230 | b0.extend(v); |
| 231 | } |
| 232 | |
| 233 | /* use bounds of first time step in builder */ |
| 234 | bbox = b0; |
| 235 | return true; |
| 236 | } |
| 237 | |
| 238 | __forceinline bool valid(size_t gridID, size_t itime=0) const { |
| 239 | return valid(gridID, make_range(itime, itime)); |
| 240 | } |
| 241 | |
| 242 | /*! check if the i'th primitive is valid between the specified time range */ |
| 243 | __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const |
| 244 | { |
| 245 | if (unlikely(gridID >= grids.size())) return false; |
| 246 | const Grid &g = grid(gridID); |
| 247 | if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false; |
| 248 | if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false; |
| 249 | |
| 250 | for (size_t y=0;y<g.resY;y++) |
| 251 | for (size_t x=0;x<g.resX;x++) |
| 252 | for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) |
| 253 | if (!isvalid(grid_vertex(g,x,y,itime))) return false; |
| 254 | return true; |
| 255 | } |
| 256 | |
| 257 | |
| 258 | __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const |
| 259 | { |
| 260 | BBox3fa box(empty); |
| 261 | buildBounds(g,sx,sy,itime,box); |
| 262 | return box; |
| 263 | } |
| 264 | |
| 265 | __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const { |
| 266 | BBox3fa bounds0, bounds1; |
| 267 | buildBounds(g,sx,sy,itime+0,bounds0); |
| 268 | buildBounds(g,sx,sy,itime+1,bounds1); |
| 269 | return LBBox3fa(bounds0,bounds1); |
| 270 | } |
| 271 | |
| 272 | /*! calculates the linear bounds of the i'th primitive for the specified time range */ |
| 273 | __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const { |
| 274 | return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments); |
| 275 | } |
| 276 | |
| 277 | public: |
| 278 | BufferView<Grid> grids; //!< array of triangles |
| 279 | BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer |
| 280 | vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep |
| 281 | vector<RawBufferView> vertexAttribs; //!< vertex attributes |
| 282 | }; |
| 283 | |
| 284 | namespace isa |
| 285 | { |
| 286 | struct GridMeshISA : public GridMesh |
| 287 | { |
| 288 | GridMeshISA (Device* device) |
| 289 | : GridMesh(device) {} |
| 290 | }; |
| 291 | } |
| 292 | |
| 293 | DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*); |
| 294 | } |
| 295 | |