1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "primitive.h" |
7 | #include "../common/scene.h" |
8 | |
9 | namespace embree |
10 | { |
11 | /* Stores M quads from an indexed face set */ |
12 | template <int M> |
13 | struct QuadMi |
14 | { |
15 | /* Virtual interface to query information about the quad type */ |
16 | struct Type : public PrimitiveType |
17 | { |
18 | const char* name() const; |
19 | size_t sizeActive(const char* This) const; |
20 | size_t sizeTotal(const char* This) const; |
21 | size_t getBytes(const char* This) const; |
22 | }; |
23 | static Type type; |
24 | |
25 | public: |
26 | |
27 | /* primitive supports multiple time segments */ |
28 | static const bool singleTimeSegment = false; |
29 | |
30 | /* Returns maximum number of stored quads */ |
31 | static __forceinline size_t max_size() { return M; } |
32 | |
33 | /* Returns required number of primitive blocks for N primitives */ |
34 | static __forceinline size_t blocks(size_t N) { return (N+max_size()-1)/max_size(); } |
35 | |
36 | public: |
37 | |
38 | /* Default constructor */ |
39 | __forceinline QuadMi() { } |
40 | |
41 | /* Construction from vertices and IDs */ |
42 | __forceinline QuadMi(const vuint<M>& v0, |
43 | const vuint<M>& v1, |
44 | const vuint<M>& v2, |
45 | const vuint<M>& v3, |
46 | const vuint<M>& geomIDs, |
47 | const vuint<M>& primIDs) |
48 | #if defined(EMBREE_COMPACT_POLYS) |
49 | : geomIDs(geomIDs), primIDs(primIDs) {} |
50 | #else |
51 | : v0_(v0),v1_(v1), v2_(v2), v3_(v3), geomIDs(geomIDs), primIDs(primIDs) {} |
52 | #endif |
53 | |
54 | /* Returns a mask that tells which quads are valid */ |
55 | __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } |
56 | |
57 | /* Returns if the specified quad is valid */ |
58 | __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } |
59 | |
60 | /* Returns the number of stored quads */ |
61 | __forceinline size_t size() const { return bsf(~movemask(valid())); } |
62 | |
63 | /* Returns the geometry IDs */ |
64 | __forceinline vuint<M>& geomID() { return geomIDs; } |
65 | __forceinline const vuint<M>& geomID() const { return geomIDs; } |
66 | __forceinline unsigned int geomID(const size_t i) const { assert(i<M); assert(geomIDs[i] != -1); return geomIDs[i]; } |
67 | |
68 | /* Returns the primitive IDs */ |
69 | __forceinline vuint<M>& primID() { return primIDs; } |
70 | __forceinline const vuint<M>& primID() const { return primIDs; } |
71 | __forceinline unsigned int primID(const size_t i) const { assert(i<M); return primIDs[i]; } |
72 | |
73 | /* Calculate the bounds of the quads */ |
74 | __forceinline const BBox3fa bounds(const Scene *const scene, const size_t itime=0) const |
75 | { |
76 | BBox3fa bounds = empty; |
77 | for (size_t i=0; i<M && valid(i); i++) { |
78 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); |
79 | bounds.extend(mesh->bounds(primID(i),itime)); |
80 | } |
81 | return bounds; |
82 | } |
83 | |
84 | /* Calculate the linear bounds of the primitive */ |
85 | __forceinline LBBox3fa linearBounds(const Scene* const scene, const size_t itime) { |
86 | return LBBox3fa(bounds(scene,itime+0),bounds(scene,itime+1)); |
87 | } |
88 | |
89 | __forceinline LBBox3fa linearBounds(const Scene *const scene, size_t itime, size_t numTimeSteps) |
90 | { |
91 | LBBox3fa allBounds = empty; |
92 | for (size_t i=0; i<M && valid(i); i++) |
93 | { |
94 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); |
95 | allBounds.extend(mesh->linearBounds(primID(i), itime, numTimeSteps)); |
96 | } |
97 | return allBounds; |
98 | } |
99 | |
100 | __forceinline LBBox3fa linearBounds(const Scene *const scene, const BBox1f time_range) |
101 | { |
102 | LBBox3fa allBounds = empty; |
103 | for (size_t i=0; i<M && valid(i); i++) |
104 | { |
105 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(i)); |
106 | allBounds.extend(mesh->linearBounds(primID(i), time_range)); |
107 | } |
108 | return allBounds; |
109 | } |
110 | |
111 | /* Fill quad from quad list */ |
112 | template<typename PrimRefT> |
113 | __forceinline void fill(const PrimRefT* prims, size_t& begin, size_t end, Scene* scene) |
114 | { |
115 | vuint<M> geomID = -1, primID = -1; |
116 | const PrimRefT* prim = &prims[begin]; |
117 | vuint<M> v0 = zero, v1 = zero, v2 = zero, v3 = zero; |
118 | |
119 | for (size_t i=0; i<M; i++) |
120 | { |
121 | if (begin<end) { |
122 | geomID[i] = prim->geomID(); |
123 | primID[i] = prim->primID(); |
124 | #if !defined(EMBREE_COMPACT_POLYS) |
125 | const QuadMesh* mesh = scene->get<QuadMesh>(prim->geomID()); |
126 | const QuadMesh::Quad& q = mesh->quad(prim->primID()); |
127 | unsigned int_stride = mesh->vertices0.getStride()/4; |
128 | v0[i] = q.v[0] * int_stride; |
129 | v1[i] = q.v[1] * int_stride; |
130 | v2[i] = q.v[2] * int_stride; |
131 | v3[i] = q.v[3] * int_stride; |
132 | #endif |
133 | begin++; |
134 | } else { |
135 | assert(i); |
136 | if (likely(i > 0)) { |
137 | geomID[i] = geomID[0]; // always valid geomIDs |
138 | primID[i] = -1; // indicates invalid data |
139 | v0[i] = v0[0]; |
140 | v1[i] = v0[0]; |
141 | v2[i] = v0[0]; |
142 | v3[i] = v0[0]; |
143 | } |
144 | } |
145 | if (begin<end) prim = &prims[begin]; |
146 | } |
147 | new (this) QuadMi(v0,v1,v2,v3,geomID,primID); // FIXME: use non temporal store |
148 | } |
149 | |
150 | __forceinline LBBox3fa fillMB(const PrimRef* prims, size_t& begin, size_t end, Scene* scene, size_t itime) |
151 | { |
152 | fill(prims, begin, end, scene); |
153 | return linearBounds(scene, itime); |
154 | } |
155 | |
156 | __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t end, Scene* scene, const BBox1f time_range) |
157 | { |
158 | fill(prims, begin, end, scene); |
159 | return linearBounds(scene, time_range); |
160 | } |
161 | |
162 | friend embree_ostream operator<<(embree_ostream cout, const QuadMi& quad) { |
163 | return cout << "QuadMi<" << M << ">( " |
164 | #if !defined(EMBREE_COMPACT_POLYS) |
165 | << "v0 = " << quad.v0_ << ", v1 = " << quad.v1_ << ", v2 = " << quad.v2_ << ", v3 = " << quad.v3_ << ", " |
166 | #endif |
167 | << "geomID = " << quad.geomIDs << ", primID = " << quad.primIDs << " )" ; |
168 | } |
169 | |
170 | protected: |
171 | #if !defined(EMBREE_COMPACT_POLYS) |
172 | vuint<M> v0_; // 4 byte offset of 1st vertex |
173 | vuint<M> v1_; // 4 byte offset of 2nd vertex |
174 | vuint<M> v2_; // 4 byte offset of 3rd vertex |
175 | vuint<M> v3_; // 4 byte offset of 4th vertex |
176 | #endif |
177 | vuint<M> geomIDs; // geometry ID of mesh |
178 | vuint<M> primIDs; // primitive ID of primitive inside mesh |
179 | }; |
180 | |
181 | namespace isa |
182 | { |
183 | |
184 | template<int M> |
185 | struct QuadMi : public embree::QuadMi<M> |
186 | { |
187 | #if !defined(EMBREE_COMPACT_POLYS) |
188 | using embree::QuadMi<M>::v0_; |
189 | using embree::QuadMi<M>::v1_; |
190 | using embree::QuadMi<M>::v2_; |
191 | using embree::QuadMi<M>::v3_; |
192 | #endif |
193 | using embree::QuadMi<M>::geomIDs; |
194 | using embree::QuadMi<M>::primIDs; |
195 | using embree::QuadMi<M>::geomID; |
196 | using embree::QuadMi<M>::primID; |
197 | using embree::QuadMi<M>::valid; |
198 | |
199 | template<int vid> |
200 | __forceinline Vec3f getVertex(const size_t index, const Scene *const scene) const |
201 | { |
202 | #if defined(EMBREE_COMPACT_POLYS) |
203 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); |
204 | const QuadMesh::Quad& quad = mesh->quad(primID(index)); |
205 | return (Vec3f) mesh->vertices[0][quad.v[vid]]; |
206 | #else |
207 | const vuint<M>& v = getVertexOffset<vid>(); |
208 | const float* vertices = scene->vertices[geomID(index)]; |
209 | return (Vec3f&) vertices[v[index]]; |
210 | #endif |
211 | } |
212 | |
213 | template<int vid, typename T> |
214 | __forceinline Vec3<T> getVertex(const size_t index, const Scene *const scene, const size_t itime, const T& ftime) const |
215 | { |
216 | #if defined(EMBREE_COMPACT_POLYS) |
217 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); |
218 | const QuadMesh::Quad& quad = mesh->quad(primID(index)); |
219 | const Vec3fa v0 = mesh->vertices[itime+0][quad.v[vid]]; |
220 | const Vec3fa v1 = mesh->vertices[itime+1][quad.v[vid]]; |
221 | #else |
222 | const vuint<M>& v = getVertexOffset<vid>(); |
223 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); |
224 | const float* vertices0 = (const float*) mesh->vertexPtr(0,itime+0); |
225 | const float* vertices1 = (const float*) mesh->vertexPtr(0,itime+1); |
226 | const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); |
227 | const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); |
228 | #endif |
229 | const Vec3<T> p0(v0.x,v0.y,v0.z); |
230 | const Vec3<T> p1(v1.x,v1.y,v1.z); |
231 | return lerp(p0,p1,ftime); |
232 | } |
233 | |
234 | template<int vid, int K, typename T> |
235 | __forceinline Vec3<T> getVertex(const vbool<K>& valid, const size_t index, const Scene *const scene, const vint<K>& itime, const T& ftime) const |
236 | { |
237 | Vec3<T> p0, p1; |
238 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); |
239 | |
240 | for (size_t mask=movemask(valid), i=bsf(mask); mask; mask=btc(mask,i), i=bsf(mask)) |
241 | { |
242 | #if defined(EMBREE_COMPACT_POLYS) |
243 | const QuadMesh::Quad& quad = mesh->quad(primID(index)); |
244 | const Vec3fa v0 = mesh->vertices[itime[i]+0][quad.v[vid]]; |
245 | const Vec3fa v1 = mesh->vertices[itime[i]+1][quad.v[vid]]; |
246 | #else |
247 | const vuint<M>& v = getVertexOffset<vid>(); |
248 | const float* vertices0 = (const float*) mesh->vertexPtr(0,itime[i]+0); |
249 | const float* vertices1 = (const float*) mesh->vertexPtr(0,itime[i]+1); |
250 | const Vec3fa v0 = Vec3fa::loadu(vertices0+v[index]); |
251 | const Vec3fa v1 = Vec3fa::loadu(vertices1+v[index]); |
252 | #endif |
253 | p0.x[i] = v0.x; p0.y[i] = v0.y; p0.z[i] = v0.z; |
254 | p1.x[i] = v1.x; p1.y[i] = v1.y; p1.z[i] = v1.z; |
255 | } |
256 | return (T(one)-ftime)*p0 + ftime*p1; |
257 | } |
258 | |
259 | struct Quad { |
260 | vfloat4 v0,v1,v2,v3; |
261 | }; |
262 | |
263 | #if defined(EMBREE_COMPACT_POLYS) |
264 | |
265 | __forceinline Quad loadQuad(const int i, const Scene* const scene) const |
266 | { |
267 | const unsigned int geomID = geomIDs[i]; |
268 | const unsigned int primID = primIDs[i]; |
269 | if (unlikely(primID == -1)) return { zero, zero, zero, zero }; |
270 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID); |
271 | const QuadMesh::Quad& quad = mesh->quad(primID); |
272 | const vfloat4 v0 = (vfloat4) mesh->vertices0[quad.v[0]]; |
273 | const vfloat4 v1 = (vfloat4) mesh->vertices0[quad.v[1]]; |
274 | const vfloat4 v2 = (vfloat4) mesh->vertices0[quad.v[2]]; |
275 | const vfloat4 v3 = (vfloat4) mesh->vertices0[quad.v[3]]; |
276 | return { v0, v1, v2, v3 }; |
277 | } |
278 | |
279 | __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const |
280 | { |
281 | const unsigned int geomID = geomIDs[i]; |
282 | const unsigned int primID = primIDs[i]; |
283 | if (unlikely(primID == -1)) return { zero, zero, zero, zero }; |
284 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID); |
285 | const QuadMesh::Quad& quad = mesh->quad(primID); |
286 | const vfloat4 v0 = (vfloat4) mesh->vertices[itime][quad.v[0]]; |
287 | const vfloat4 v1 = (vfloat4) mesh->vertices[itime][quad.v[1]]; |
288 | const vfloat4 v2 = (vfloat4) mesh->vertices[itime][quad.v[2]]; |
289 | const vfloat4 v3 = (vfloat4) mesh->vertices[itime][quad.v[3]]; |
290 | return { v0, v1, v2, v3 }; |
291 | } |
292 | |
293 | #else |
294 | |
295 | __forceinline Quad loadQuad(const int i, const Scene* const scene) const |
296 | { |
297 | const float* vertices = scene->vertices[geomID(i)]; |
298 | const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); |
299 | const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); |
300 | const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); |
301 | const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); |
302 | return { v0, v1, v2, v3 }; |
303 | } |
304 | |
305 | __forceinline Quad loadQuad(const int i, const int itime, const Scene* const scene) const |
306 | { |
307 | const unsigned int geomID = geomIDs[i]; |
308 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID); |
309 | const float* vertices = (const float*) mesh->vertexPtr(0,itime); |
310 | const vfloat4 v0 = vfloat4::loadu(vertices + v0_[i]); |
311 | const vfloat4 v1 = vfloat4::loadu(vertices + v1_[i]); |
312 | const vfloat4 v2 = vfloat4::loadu(vertices + v2_[i]); |
313 | const vfloat4 v3 = vfloat4::loadu(vertices + v3_[i]); |
314 | return { v0, v1, v2, v3 }; |
315 | } |
316 | |
317 | #endif |
318 | |
319 | /* Gather the quads */ |
320 | __forceinline void gather(Vec3vf<M>& p0, |
321 | Vec3vf<M>& p1, |
322 | Vec3vf<M>& p2, |
323 | Vec3vf<M>& p3, |
324 | const Scene *const scene) const; |
325 | |
326 | #if defined(__AVX512F__) |
327 | __forceinline void gather(Vec3vf16& p0, |
328 | Vec3vf16& p1, |
329 | Vec3vf16& p2, |
330 | Vec3vf16& p3, |
331 | const Scene *const scene) const; |
332 | #endif |
333 | |
334 | template<int K> |
335 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 2000) // workaround for compiler bug in ICC 2019 |
336 | __noinline |
337 | #else |
338 | __forceinline |
339 | #endif |
340 | void gather(const vbool<K>& valid, |
341 | Vec3vf<K>& p0, |
342 | Vec3vf<K>& p1, |
343 | Vec3vf<K>& p2, |
344 | Vec3vf<K>& p3, |
345 | const size_t index, |
346 | const Scene* const scene, |
347 | const vfloat<K>& time) const |
348 | { |
349 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); |
350 | |
351 | vfloat<K> ftime; |
352 | const vint<K> itime = mesh->timeSegment<K>(time, ftime); |
353 | |
354 | const size_t first = bsf(movemask(valid)); |
355 | if (likely(all(valid,itime[first] == itime))) |
356 | { |
357 | p0 = getVertex<0>(index, scene, itime[first], ftime); |
358 | p1 = getVertex<1>(index, scene, itime[first], ftime); |
359 | p2 = getVertex<2>(index, scene, itime[first], ftime); |
360 | p3 = getVertex<3>(index, scene, itime[first], ftime); |
361 | } |
362 | else |
363 | { |
364 | p0 = getVertex<0,K>(valid, index, scene, itime, ftime); |
365 | p1 = getVertex<1,K>(valid, index, scene, itime, ftime); |
366 | p2 = getVertex<2,K>(valid, index, scene, itime, ftime); |
367 | p3 = getVertex<3,K>(valid, index, scene, itime, ftime); |
368 | } |
369 | } |
370 | |
371 | __forceinline void gather(Vec3vf<M>& p0, |
372 | Vec3vf<M>& p1, |
373 | Vec3vf<M>& p2, |
374 | Vec3vf<M>& p3, |
375 | const QuadMesh* mesh, |
376 | const Scene *const scene, |
377 | const int itime) const; |
378 | |
379 | __forceinline void gather(Vec3vf<M>& p0, |
380 | Vec3vf<M>& p1, |
381 | Vec3vf<M>& p2, |
382 | Vec3vf<M>& p3, |
383 | const Scene *const scene, |
384 | const float time) const; |
385 | |
386 | /* Updates the primitive */ |
387 | __forceinline BBox3fa update(QuadMesh* mesh) |
388 | { |
389 | BBox3fa bounds = empty; |
390 | for (size_t i=0; i<M; i++) |
391 | { |
392 | if (!valid(i)) break; |
393 | const unsigned primId = primID(i); |
394 | const QuadMesh::Quad& q = mesh->quad(primId); |
395 | const Vec3fa p0 = mesh->vertex(q.v[0]); |
396 | const Vec3fa p1 = mesh->vertex(q.v[1]); |
397 | const Vec3fa p2 = mesh->vertex(q.v[2]); |
398 | const Vec3fa p3 = mesh->vertex(q.v[3]); |
399 | bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2),BBox3fa(p3))); |
400 | } |
401 | return bounds; |
402 | } |
403 | |
404 | private: |
405 | #if !defined(EMBREE_COMPACT_POLYS) |
406 | template<int N> const vuint<M>& getVertexOffset() const; |
407 | #endif |
408 | }; |
409 | |
410 | #if !defined(EMBREE_COMPACT_POLYS) |
411 | template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<0>() const { return v0_; } |
412 | template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<1>() const { return v1_; } |
413 | template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<2>() const { return v2_; } |
414 | template<> template<> __forceinline const vuint<4>& QuadMi<4>::getVertexOffset<3>() const { return v3_; } |
415 | #endif |
416 | |
417 | template<> |
418 | __forceinline void QuadMi<4>::gather(Vec3vf4& p0, |
419 | Vec3vf4& p1, |
420 | Vec3vf4& p2, |
421 | Vec3vf4& p3, |
422 | const Scene *const scene) const |
423 | { |
424 | prefetchL1(((char*)this)+0*64); |
425 | prefetchL1(((char*)this)+1*64); |
426 | const Quad tri0 = loadQuad(0,scene); |
427 | const Quad tri1 = loadQuad(1,scene); |
428 | const Quad tri2 = loadQuad(2,scene); |
429 | const Quad tri3 = loadQuad(3,scene); |
430 | transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); |
431 | transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); |
432 | transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); |
433 | transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); |
434 | } |
435 | |
436 | template<> |
437 | __forceinline void QuadMi<4>::gather(Vec3vf4& p0, |
438 | Vec3vf4& p1, |
439 | Vec3vf4& p2, |
440 | Vec3vf4& p3, |
441 | const QuadMesh* mesh, |
442 | const Scene *const scene, |
443 | const int itime) const |
444 | { |
445 | // FIXME: for trianglei there all geometries are identical, is this the case here too? |
446 | |
447 | const Quad tri0 = loadQuad(0,itime,scene); |
448 | const Quad tri1 = loadQuad(1,itime,scene); |
449 | const Quad tri2 = loadQuad(2,itime,scene); |
450 | const Quad tri3 = loadQuad(3,itime,scene); |
451 | transpose(tri0.v0,tri1.v0,tri2.v0,tri3.v0,p0.x,p0.y,p0.z); |
452 | transpose(tri0.v1,tri1.v1,tri2.v1,tri3.v1,p1.x,p1.y,p1.z); |
453 | transpose(tri0.v2,tri1.v2,tri2.v2,tri3.v2,p2.x,p2.y,p2.z); |
454 | transpose(tri0.v3,tri1.v3,tri2.v3,tri3.v3,p3.x,p3.y,p3.z); |
455 | } |
456 | |
457 | template<> |
458 | __forceinline void QuadMi<4>::gather(Vec3vf4& p0, |
459 | Vec3vf4& p1, |
460 | Vec3vf4& p2, |
461 | Vec3vf4& p3, |
462 | const Scene *const scene, |
463 | const float time) const |
464 | { |
465 | const QuadMesh* mesh = scene->get<QuadMesh>(geomID(0)); // in mblur mode all geometries are identical |
466 | |
467 | float ftime; |
468 | const int itime = mesh->timeSegment(time, ftime); |
469 | |
470 | Vec3vf4 a0,a1,a2,a3; gather(a0,a1,a2,a3,mesh,scene,itime); |
471 | Vec3vf4 b0,b1,b2,b3; gather(b0,b1,b2,b3,mesh,scene,itime+1); |
472 | p0 = lerp(a0,b0,vfloat4(ftime)); |
473 | p1 = lerp(a1,b1,vfloat4(ftime)); |
474 | p2 = lerp(a2,b2,vfloat4(ftime)); |
475 | p3 = lerp(a3,b3,vfloat4(ftime)); |
476 | } |
477 | } |
478 | |
479 | template<int M> |
480 | typename QuadMi<M>::Type QuadMi<M>::type; |
481 | |
482 | typedef QuadMi<4> Quad4i; |
483 | } |
484 | |