1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "geometry.h" |
7 | #include "buffer.h" |
8 | |
9 | namespace embree |
10 | { |
11 | /*! Grid Mesh */ |
12 | struct GridMesh : public Geometry |
13 | { |
14 | /*! type of this geometry */ |
15 | static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH; |
16 | |
17 | /*! grid */ |
18 | struct Grid |
19 | { |
20 | unsigned int startVtxID; |
21 | unsigned int lineVtxOffset; |
22 | unsigned short resX,resY; |
23 | |
24 | /* border flags due to 3x3 vertex pattern */ |
25 | __forceinline unsigned int get3x3FlagsX(const unsigned int x) const |
26 | { |
27 | return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0; |
28 | } |
29 | |
30 | /* border flags due to 3x3 vertex pattern */ |
31 | __forceinline unsigned int get3x3FlagsY(const unsigned int y) const |
32 | { |
33 | return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0; |
34 | } |
35 | |
36 | /*! outputs grid structure */ |
37 | __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) { |
38 | return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }" ; |
39 | } |
40 | }; |
41 | |
42 | public: |
43 | |
44 | /*! grid mesh construction */ |
45 | GridMesh (Device* device); |
46 | |
47 | /* geometry interface */ |
48 | public: |
49 | void setMask(unsigned mask); |
50 | void setNumTimeSteps (unsigned int numTimeSteps); |
51 | void setVertexAttributeCount (unsigned int N); |
52 | void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); |
53 | void* getBuffer(RTCBufferType type, unsigned int slot); |
54 | void updateBuffer(RTCBufferType type, unsigned int slot); |
55 | void commit(); |
56 | bool verify(); |
57 | void interpolate(const RTCInterpolateArguments* const args); |
58 | |
59 | template<int N> |
60 | void interpolate_impl(const RTCInterpolateArguments* const args) |
61 | { |
62 | unsigned int primID = args->primID; |
63 | float U = args->u; |
64 | float V = args->v; |
65 | |
66 | /* clamp input u,v to [0;1] range */ |
67 | U = max(min(U,1.0f),0.0f); |
68 | V = max(min(V,1.0f),0.0f); |
69 | |
70 | RTCBufferType bufferType = args->bufferType; |
71 | unsigned int bufferSlot = args->bufferSlot; |
72 | float* P = args->P; |
73 | float* dPdu = args->dPdu; |
74 | float* dPdv = args->dPdv; |
75 | float* ddPdudu = args->ddPdudu; |
76 | float* ddPdvdv = args->ddPdvdv; |
77 | float* ddPdudv = args->ddPdudv; |
78 | unsigned int valueCount = args->valueCount; |
79 | |
80 | /* calculate base pointer and stride */ |
81 | assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || |
82 | (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); |
83 | const char* src = nullptr; |
84 | size_t stride = 0; |
85 | if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { |
86 | src = vertexAttribs[bufferSlot].getPtr(); |
87 | stride = vertexAttribs[bufferSlot].getStride(); |
88 | } else { |
89 | src = vertices[bufferSlot].getPtr(); |
90 | stride = vertices[bufferSlot].getStride(); |
91 | } |
92 | |
93 | const Grid& grid = grids[primID]; |
94 | const int grid_width = grid.resX-1; |
95 | const int grid_height = grid.resY-1; |
96 | const float rcp_grid_width = rcp(float(grid_width)); |
97 | const float rcp_grid_height = rcp(float(grid_height)); |
98 | const int iu = min((int)floor(U*grid_width ),grid_width); |
99 | const int iv = min((int)floor(V*grid_height),grid_height); |
100 | const float u = U*grid_width-float(iu); |
101 | const float v = V*grid_height-float(iv); |
102 | |
103 | for (unsigned int i=0; i<valueCount; i+=N) |
104 | { |
105 | const size_t ofs = i*sizeof(float); |
106 | const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu; |
107 | const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu; |
108 | |
109 | const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); |
110 | const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]); |
111 | const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]); |
112 | const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]); |
113 | const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]); |
114 | const vbool<N> left = u+v <= 1.0f; |
115 | const vfloat<N> Q0 = select(left,p0,p2); |
116 | const vfloat<N> Q1 = select(left,p1,p3); |
117 | const vfloat<N> Q2 = select(left,p3,p1); |
118 | const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u); |
119 | const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v); |
120 | const vfloat<N> W = 1.0f-U-V; |
121 | |
122 | if (P) { |
123 | mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); |
124 | } |
125 | if (dPdu) { |
126 | assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width); |
127 | assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height); |
128 | } |
129 | if (ddPdudu) { |
130 | assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); |
131 | assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero)); |
132 | assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero)); |
133 | } |
134 | } |
135 | } |
136 | |
137 | void addElementsToCount (GeometryCounts & counts) const; |
138 | |
139 | __forceinline unsigned int getNumSubGrids(const size_t gridID) |
140 | { |
141 | const Grid &g = grid(gridID); |
142 | return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1)); |
143 | } |
144 | |
145 | /*! get fast access to first vertex buffer */ |
146 | __forceinline float * getCompactVertexArray () const { |
147 | return (float*) vertices0.getPtr(); |
148 | } |
149 | |
150 | public: |
151 | |
152 | /*! returns number of vertices */ |
153 | __forceinline size_t numVertices() const { |
154 | return vertices[0].size(); |
155 | } |
156 | |
157 | /*! returns i'th grid*/ |
158 | __forceinline const Grid& grid(size_t i) const { |
159 | return grids[i]; |
160 | } |
161 | |
162 | /*! returns i'th vertex of the first time step */ |
163 | __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load |
164 | return vertices0[i]; |
165 | } |
166 | |
167 | /*! returns i'th vertex of the first time step */ |
168 | __forceinline const char* vertexPtr(size_t i) const { |
169 | return vertices0.getPtr(i); |
170 | } |
171 | |
172 | /*! returns i'th vertex of itime'th timestep */ |
173 | __forceinline const Vec3fa vertex(size_t i, size_t itime) const { |
174 | return vertices[itime][i]; |
175 | } |
176 | |
177 | /*! returns i'th vertex of itime'th timestep */ |
178 | __forceinline const char* vertexPtr(size_t i, size_t itime) const { |
179 | return vertices[itime].getPtr(i); |
180 | } |
181 | |
182 | /*! returns i'th vertex of the first timestep */ |
183 | __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const { |
184 | assert(x < (size_t)g.resX); |
185 | assert(y < (size_t)g.resY); |
186 | return g.startVtxID + x + y * g.lineVtxOffset; |
187 | } |
188 | |
189 | /*! returns i'th vertex of the first timestep */ |
190 | __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const { |
191 | const size_t index = grid_vertex_index(g,x,y); |
192 | return vertex(index); |
193 | } |
194 | |
195 | /*! returns i'th vertex of the itime'th timestep */ |
196 | __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const { |
197 | const size_t index = grid_vertex_index(g,x,y); |
198 | return vertex(index,itime); |
199 | } |
200 | |
201 | /*! calculates the build bounds of the i'th primitive, if it's valid */ |
202 | __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const |
203 | { |
204 | BBox3fa b(empty); |
205 | for (size_t t=0; t<numTimeSteps; t++) |
206 | { |
207 | for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) |
208 | for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) |
209 | { |
210 | const Vec3fa v = grid_vertex(g,x,y,t); |
211 | if (unlikely(!isvalid(v))) return false; |
212 | b.extend(v); |
213 | } |
214 | } |
215 | |
216 | bbox = b; |
217 | return true; |
218 | } |
219 | |
220 | /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */ |
221 | __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const |
222 | { |
223 | assert(itime < numTimeSteps); |
224 | BBox3fa b0(empty); |
225 | for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++) |
226 | for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++) |
227 | { |
228 | const Vec3fa v = grid_vertex(g,x,y,itime); |
229 | if (unlikely(!isvalid(v))) return false; |
230 | b0.extend(v); |
231 | } |
232 | |
233 | /* use bounds of first time step in builder */ |
234 | bbox = b0; |
235 | return true; |
236 | } |
237 | |
238 | __forceinline bool valid(size_t gridID, size_t itime=0) const { |
239 | return valid(gridID, make_range(itime, itime)); |
240 | } |
241 | |
242 | /*! check if the i'th primitive is valid between the specified time range */ |
243 | __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const |
244 | { |
245 | if (unlikely(gridID >= grids.size())) return false; |
246 | const Grid &g = grid(gridID); |
247 | if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false; |
248 | if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false; |
249 | |
250 | for (size_t y=0;y<g.resY;y++) |
251 | for (size_t x=0;x<g.resX;x++) |
252 | for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) |
253 | if (!isvalid(grid_vertex(g,x,y,itime))) return false; |
254 | return true; |
255 | } |
256 | |
257 | |
258 | __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const |
259 | { |
260 | BBox3fa box(empty); |
261 | buildBounds(g,sx,sy,itime,box); |
262 | return box; |
263 | } |
264 | |
265 | __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const { |
266 | BBox3fa bounds0, bounds1; |
267 | buildBounds(g,sx,sy,itime+0,bounds0); |
268 | buildBounds(g,sx,sy,itime+1,bounds1); |
269 | return LBBox3fa(bounds0,bounds1); |
270 | } |
271 | |
272 | /*! calculates the linear bounds of the i'th primitive for the specified time range */ |
273 | __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const { |
274 | return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments); |
275 | } |
276 | |
277 | public: |
278 | BufferView<Grid> grids; //!< array of triangles |
279 | BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer |
280 | vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep |
281 | vector<RawBufferView> vertexAttribs; //!< vertex attributes |
282 | }; |
283 | |
284 | namespace isa |
285 | { |
286 | struct GridMeshISA : public GridMesh |
287 | { |
288 | GridMeshISA (Device* device) |
289 | : GridMesh(device) {} |
290 | }; |
291 | } |
292 | |
293 | DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*); |
294 | } |
295 | |