1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "default.h"
7#include "instance_stack.h"
8
9// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted
10
11namespace embree
12{
13 static const size_t MAX_INTERNAL_STREAM_SIZE = 32;
14
15 /* Ray structure for K rays */
16 template<int K>
17 struct RayK
18 {
19 /* Default construction does nothing */
20 __forceinline RayK() {}
21
22 /* Constructs a ray from origin, direction, and ray segment. Near
23 * has to be smaller than far */
24 __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
25 const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
26 const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
27 : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
28
29 /* Returns the size of the ray */
30 static __forceinline size_t size() { return K; }
31
32 /* Calculates if this is a valid ray that does not cause issues during traversal */
33 __forceinline vbool<K> valid() const
34 {
35 const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
36 const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
37 const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
38 const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
39 const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
40 return vx & vy & vz & vn & vf;
41 }
42
43 __forceinline void get(RayK<1>* ray) const;
44 __forceinline void get(size_t i, RayK<1>& ray) const;
45 __forceinline void set(const RayK<1>* ray);
46 __forceinline void set(size_t i, const RayK<1>& ray);
47
48 __forceinline void copy(size_t dest, size_t source);
49
50 __forceinline vint<K> octant() const
51 {
52 return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
53 select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
54 select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
55 }
56
57 /* Ray data */
58 Vec3vf<K> org; // ray origin
59 vfloat<K> _tnear; // start of ray segment
60 Vec3vf<K> dir; // ray direction
61 vfloat<K> _time; // time of this ray for motion blur
62 vfloat<K> tfar; // end of ray segment
63 vint<K> mask; // used to mask out objects during traversal
64 vint<K> id;
65 vint<K> flags;
66
67 __forceinline vfloat<K>& tnear() { return _tnear; }
68 __forceinline vfloat<K>& time() { return _time; }
69 __forceinline const vfloat<K>& tnear() const { return _tnear; }
70 __forceinline const vfloat<K>& time() const { return _time; }
71 };
72
73 /* Ray+hit structure for K rays */
74 template<int K>
75 struct RayHitK : RayK<K>
76 {
77 using RayK<K>::org;
78 using RayK<K>::_tnear;
79 using RayK<K>::dir;
80 using RayK<K>::_time;
81 using RayK<K>::tfar;
82 using RayK<K>::mask;
83 using RayK<K>::id;
84 using RayK<K>::flags;
85
86 using RayK<K>::tnear;
87 using RayK<K>::time;
88
89 /* Default construction does nothing */
90 __forceinline RayHitK() {}
91
92 /* Constructs a ray from origin, direction, and ray segment. Near
93 * has to be smaller than far */
94 __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
95 const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
96 const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
97 : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
98 geomID(RTC_INVALID_GEOMETRY_ID)
99 {
100 for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
101 instID[l] = RTC_INVALID_GEOMETRY_ID;
102 }
103
104 __forceinline RayHitK(const RayK<K>& ray)
105 : RayK<K>(ray),
106 geomID(RTC_INVALID_GEOMETRY_ID)
107 {
108 for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
109 instID[l] = RTC_INVALID_GEOMETRY_ID;
110 }
111
112 __forceinline RayHitK<K>& operator =(const RayK<K>& ray)
113 {
114 org = ray.org;
115 _tnear = ray._tnear;
116 dir = ray.dir;
117 _time = ray._time;
118 tfar = ray.tfar;
119 mask = ray.mask;
120 id = ray.id;
121 flags = ray.flags;
122
123 geomID = RTC_INVALID_GEOMETRY_ID;
124 for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
125 instID[l] = RTC_INVALID_GEOMETRY_ID;
126
127 return *this;
128 }
129
130 /* Calculates if the hit is valid */
131 __forceinline void verifyHit(const vbool<K>& valid0) const
132 {
133 vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
134 const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
135 const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
136 const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
137 const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
138 const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
139 const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
140 if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
141 if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
142 if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
143 if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
144 if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
145 if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
146 }
147
148 __forceinline void get(RayHitK<1>* ray) const;
149 __forceinline void get(size_t i, RayHitK<1>& ray) const;
150 __forceinline void set(const RayHitK<1>* ray);
151 __forceinline void set(size_t i, const RayHitK<1>& ray);
152
153 __forceinline void copy(size_t dest, size_t source);
154
155 /* Hit data */
156 Vec3vf<K> Ng; // geometry normal
157 vfloat<K> u; // barycentric u coordinate of hit
158 vfloat<K> v; // barycentric v coordinate of hit
159 vuint<K> primID; // primitive ID
160 vuint<K> geomID; // geometry ID
161 vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
162 };
163
164 /* Specialization for a single ray */
165 template<>
166 struct RayK<1>
167 {
168 /* Default construction does nothing */
169 __forceinline RayK() {}
170
171 /* Constructs a ray from origin, direction, and ray segment. Near
172 * has to be smaller than far */
173 __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
174 : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
175
176 /* Calculates if this is a valid ray that does not cause issues during traversal */
177 __forceinline bool valid() const {
178 return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
179 }
180
181 /* Ray data */
182 Vec3ff org; // 3 floats for ray origin, 1 float for tnear
183 //float tnear; // start of ray segment
184 Vec3ff dir; // 3 floats for ray direction, 1 float for time
185 // float time;
186 float tfar; // end of ray segment
187 int mask; // used to mask out objects during traversal
188 int id; // ray ID
189 int flags; // ray flags
190
191 __forceinline float& tnear() { return org.w; };
192 __forceinline const float& tnear() const { return org.w; };
193
194 __forceinline float& time() { return dir.w; };
195 __forceinline const float& time() const { return dir.w; };
196
197 };
198
199 template<>
200 struct RayHitK<1> : RayK<1>
201 {
202 /* Default construction does nothing */
203 __forceinline RayHitK() {}
204
205 /* Constructs a ray from origin, direction, and ray segment. Near
206 * has to be smaller than far */
207 __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
208 : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
209 geomID(RTC_INVALID_GEOMETRY_ID) {}
210
211 __forceinline RayHitK(const RayK<1>& ray)
212 : RayK<1>(ray),
213 geomID(RTC_INVALID_GEOMETRY_ID) {}
214
215 __forceinline RayHitK<1>& operator =(const RayK<1>& ray)
216 {
217 org = ray.org;
218 dir = ray.dir;
219 tfar = ray.tfar;
220 mask = ray.mask;
221 id = ray.id;
222 flags = ray.flags;
223
224 geomID = RTC_INVALID_GEOMETRY_ID;
225
226 return *this;
227 }
228
229 /* Calculates if the hit is valid */
230 __forceinline void verifyHit() const
231 {
232 if (geomID == RTC_INVALID_GEOMETRY_ID) return;
233 const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
234 const bool vu = (abs(u) <= FLT_LARGE);
235 const bool vv = (abs(u) <= FLT_LARGE);
236 const bool vnx = abs(Ng.x) <= FLT_LARGE;
237 const bool vny = abs(Ng.y) <= FLT_LARGE;
238 const bool vnz = abs(Ng.z) <= FLT_LARGE;
239 if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
240 if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
241 if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
242 if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
243 if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
244 if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
245 }
246
247 /* Hit data */
248 Vec3f Ng; // not normalized geometry normal
249 float u; // barycentric u coordinate of hit
250 float v; // barycentric v coordinate of hit
251 unsigned int primID; // primitive ID
252 unsigned int geomID; // geometry ID
253 unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
254 };
255
256 /* Converts ray packet to single rays */
257 template<int K>
258 __forceinline void RayK<K>::get(RayK<1>* ray) const
259 {
260 for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
261 {
262 ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
263 ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
264 ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
265 }
266 }
267
268 template<int K>
269 __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
270 {
271 // FIXME: use SIMD transpose
272 for (size_t i = 0; i < K; i++)
273 get(i, ray[i]);
274 }
275
276 /* Extracts a single ray out of a ray packet*/
277 template<int K>
278 __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
279 {
280 ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
281 ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
282 ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
283 }
284
285 template<int K>
286 __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
287 {
288 ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
289 ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
290 ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
291 ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
292 ray.u = u[i]; ray.v = v[i];
293 ray.primID = primID[i]; ray.geomID = geomID[i];
294
295 instance_id_stack::copy_VU<K>(instID, ray.instID, i);
296 }
297
298 /* Converts single rays to ray packet */
299 template<int K>
300 __forceinline void RayK<K>::set(const RayK<1>* ray)
301 {
302 // FIXME: use SIMD transpose
303 for (size_t i = 0; i < K; i++)
304 set(i, ray[i]);
305 }
306
307 template<int K>
308 __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
309 {
310 // FIXME: use SIMD transpose
311 for (size_t i = 0; i < K; i++)
312 set(i, ray[i]);
313 }
314
315 /* inserts a single ray into a ray packet element */
316 template<int K>
317 __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
318 {
319 org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
320 dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
321 tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
322 }
323
324 template<int K>
325 __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
326 {
327 org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
328 dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
329 tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
330 Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
331 u[i] = ray.u; v[i] = ray.v;
332 primID[i] = ray.primID; geomID[i] = ray.geomID;
333
334 instance_id_stack::copy_UV<K>(ray.instID, instID, i);
335 }
336
337 /* copies a ray packet element into another element*/
338 template<int K>
339 __forceinline void RayK<K>::copy(size_t dest, size_t source)
340 {
341 org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
342 dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
343 tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
344 }
345
346 template<int K>
347 __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
348 {
349 org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
350 dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
351 tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
352 Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
353 u[dest] = u[source]; v[dest] = v[source];
354 primID[dest] = primID[source]; geomID[dest] = geomID[source];
355
356 instance_id_stack::copy_VV<K>(instID, instID, source, dest);
357 }
358
359 /* Shortcuts */
360 typedef RayK<1> Ray;
361 typedef RayK<4> Ray4;
362 typedef RayK<8> Ray8;
363 typedef RayK<16> Ray16;
364 struct RayN;
365
366 typedef RayHitK<1> RayHit;
367 typedef RayHitK<4> RayHit4;
368 typedef RayHitK<8> RayHit8;
369 typedef RayHitK<16> RayHit16;
370 struct RayHitN;
371
372 template<int K, bool intersect>
373 struct RayTypeHelper;
374
375 template<int K>
376 struct RayTypeHelper<K, true>
377 {
378 typedef RayHitK<K> Ty;
379 };
380
381 template<int K>
382 struct RayTypeHelper<K, false>
383 {
384 typedef RayK<K> Ty;
385 };
386
387 template<bool intersect>
388 using RayType = typename RayTypeHelper<1, intersect>::Ty;
389
390 template<int K, bool intersect>
391 using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
392
393 /* Outputs ray to stream */
394 template<int K>
395 __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
396 {
397 return cout << "{ " << embree_endl
398 << " org = " << ray.org << embree_endl
399 << " dir = " << ray.dir << embree_endl
400 << " near = " << ray.tnear() << embree_endl
401 << " far = " << ray.tfar << embree_endl
402 << " time = " << ray.time() << embree_endl
403 << " mask = " << ray.mask << embree_endl
404 << " id = " << ray.id << embree_endl
405 << " flags = " << ray.flags << embree_endl
406 << "}";
407 }
408
409 template<int K>
410 __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
411 {
412 cout << "{ " << embree_endl
413 << " org = " << ray.org << embree_endl
414 << " dir = " << ray.dir << embree_endl
415 << " near = " << ray.tnear() << embree_endl
416 << " far = " << ray.tfar << embree_endl
417 << " time = " << ray.time() << embree_endl
418 << " mask = " << ray.mask << embree_endl
419 << " id = " << ray.id << embree_endl
420 << " flags = " << ray.flags << embree_endl
421 << " Ng = " << ray.Ng
422 << " u = " << ray.u << embree_endl
423 << " v = " << ray.v << embree_endl
424 << " primID = " << ray.primID << embree_endl
425 << " geomID = " << ray.geomID << embree_endl
426 << " instID =";
427 for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
428 {
429 cout << " " << ray.instID[l];
430 }
431 cout << embree_endl;
432 return cout << "}";
433 }
434
435 struct RayStreamSOA
436 {
437 __forceinline RayStreamSOA(void* rays, size_t N)
438 : ptr((char*)rays), N(N) {}
439
440 /* ray data access functions */
441 __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
442 __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
443 __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
444 __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
445
446 __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
447 __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
448 __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
449 __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
450
451 __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
452 __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
453 __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
454 __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
455
456 /* hit data access functions */
457 __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
458 __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
459 __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
460
461 __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
462 __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
463
464 __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
465 __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
466 __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
467
468 __forceinline Ray getRayByOffset(size_t offset)
469 {
470 Ray ray;
471 ray.org.x = org_x(offset)[0];
472 ray.org.y = org_y(offset)[0];
473 ray.org.z = org_z(offset)[0];
474 ray.tnear() = tnear(offset)[0];
475 ray.dir.x = dir_x(offset)[0];
476 ray.dir.y = dir_y(offset)[0];
477 ray.dir.z = dir_z(offset)[0];
478 ray.time() = time(offset)[0];
479 ray.tfar = tfar(offset)[0];
480 ray.mask = mask(offset)[0];
481 ray.id = id(offset)[0];
482 ray.flags = flags(offset)[0];
483 return ray;
484 }
485
486 template<int K>
487 __forceinline RayK<K> getRayByOffset(size_t offset)
488 {
489 RayK<K> ray;
490 ray.org.x = vfloat<K>::loadu(org_x(offset));
491 ray.org.y = vfloat<K>::loadu(org_y(offset));
492 ray.org.z = vfloat<K>::loadu(org_z(offset));
493 ray.tnear = vfloat<K>::loadu(tnear(offset));
494 ray.dir.x = vfloat<K>::loadu(dir_x(offset));
495 ray.dir.y = vfloat<K>::loadu(dir_y(offset));
496 ray.dir.z = vfloat<K>::loadu(dir_z(offset));
497 ray.time = vfloat<K>::loadu(time(offset));
498 ray.tfar = vfloat<K>::loadu(tfar(offset));
499 ray.mask = vint<K>::loadu(mask(offset));
500 ray.id = vint<K>::loadu(id(offset));
501 ray.flags = vint<K>::loadu(flags(offset));
502 return ray;
503 }
504
505 template<int K>
506 __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
507 {
508 RayK<K> ray;
509 ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
510 ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
511 ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
512 ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
513 ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
514 ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
515 ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
516 ray.time() = vfloat<K>::loadu(valid, time(offset));
517 ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
518
519#if !defined(__AVX__)
520 /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
521 because the SSE masked loads always access the entire vector */
522 if (unlikely(!all(valid)))
523 {
524 ray.mask = zero;
525 ray.id = zero;
526 ray.flags = zero;
527
528 for (size_t k = 0; k < K; k++)
529 {
530 if (likely(valid[k]))
531 {
532 ray.mask[k] = mask(offset)[k];
533 ray.id[k] = id(offset)[k];
534 ray.flags[k] = flags(offset)[k];
535 }
536 }
537 }
538 else
539#endif
540 {
541 ray.mask = vint<K>::loadu(valid, mask(offset));
542 ray.id = vint<K>::loadu(valid, id(offset));
543 ray.flags = vint<K>::loadu(valid, flags(offset));
544 }
545
546 return ray;
547 }
548
549 template<int K>
550 __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
551 {
552 /*
553 * valid_i: stores which of the input rays exist (do not access nonexistent rays!)
554 * valid: stores which of the rays actually hit something.
555 */
556 vbool<K> valid = valid_i;
557 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
558
559 if (likely(any(valid)))
560 {
561 vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
562 vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
563 vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
564 vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
565 vfloat<K>::storeu(valid, u(offset), ray.u);
566 vfloat<K>::storeu(valid, v(offset), ray.v);
567
568#if !defined(__AVX__)
569 /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
570 because the SSE masked stores always access the entire vector */
571 if (unlikely(!all(valid_i)))
572 {
573 for (size_t k = 0; k < K; k++)
574 {
575 if (likely(valid[k]))
576 {
577 primID(offset)[k] = ray.primID[k];
578 geomID(offset)[k] = ray.geomID[k];
579
580 instID(0, offset)[k] = ray.instID[0][k];
581#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
582 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
583 instID(l, offset)[k] = ray.instID[l][k];
584#endif
585 }
586 }
587 }
588 else
589#endif
590 {
591 vuint<K>::storeu(valid, primID(offset), ray.primID);
592 vuint<K>::storeu(valid, geomID(offset), ray.geomID);
593
594 vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
595#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
596 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
597 vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
598#endif
599 }
600 }
601 }
602
603 template<int K>
604 __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
605 {
606 vbool<K> valid = valid_i;
607 valid &= (ray.tfar < 0.0f);
608
609 if (likely(any(valid)))
610 vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
611 }
612
613 __forceinline size_t getOctantByOffset(size_t offset)
614 {
615 const float dx = dir_x(offset)[0];
616 const float dy = dir_y(offset)[0];
617 const float dz = dir_z(offset)[0];
618 const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
619 return octantID;
620 }
621
622 __forceinline bool isValidByOffset(size_t offset)
623 {
624 const float nnear = tnear(offset)[0];
625 const float ffar = tfar(offset)[0];
626 return nnear <= ffar;
627 }
628
629 template<int K>
630 __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
631 {
632 RayK<K> ray;
633
634#if defined(__AVX2__)
635 ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
636 ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
637 ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
638 ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
639 ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
640 ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
641 ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
642 ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
643 ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
644 ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
645 ray.id = vint<K>::template gather<1>(valid, id(), offset);
646 ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
647#else
648 ray.org = zero;
649 ray.tnear() = zero;
650 ray.dir = zero;
651 ray.time() = zero;
652 ray.tfar = zero;
653 ray.mask = zero;
654 ray.id = zero;
655 ray.flags = zero;
656
657 for (size_t k = 0; k < K; k++)
658 {
659 if (likely(valid[k]))
660 {
661 const size_t ofs = offset[k];
662
663 ray.org.x[k] = *org_x(ofs);
664 ray.org.y[k] = *org_y(ofs);
665 ray.org.z[k] = *org_z(ofs);
666 ray.tnear()[k] = *tnear(ofs);
667 ray.dir.x[k] = *dir_x(ofs);
668 ray.dir.y[k] = *dir_y(ofs);
669 ray.dir.z[k] = *dir_z(ofs);
670 ray.time()[k] = *time(ofs);
671 ray.tfar[k] = *tfar(ofs);
672 ray.mask[k] = *mask(ofs);
673 ray.id[k] = *id(ofs);
674 ray.flags[k] = *flags(ofs);
675 }
676 }
677#endif
678
679 return ray;
680 }
681
682 template<int K>
683 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
684 {
685 vbool<K> valid = valid_i;
686 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
687
688 if (likely(any(valid)))
689 {
690#if defined(__AVX512F__)
691 vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
692 vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
693 vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
694 vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
695 vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
696 vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
697 vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
698 vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
699
700 vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
701#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
702 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
703 vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
704#endif
705#else
706 size_t valid_bits = movemask(valid);
707 while (valid_bits != 0)
708 {
709 const size_t k = bscf(valid_bits);
710 const size_t ofs = offset[k];
711
712 *tfar(ofs) = ray.tfar[k];
713
714 *Ng_x(ofs) = ray.Ng.x[k];
715 *Ng_y(ofs) = ray.Ng.y[k];
716 *Ng_z(ofs) = ray.Ng.z[k];
717 *u(ofs) = ray.u[k];
718 *v(ofs) = ray.v[k];
719 *primID(ofs) = ray.primID[k];
720 *geomID(ofs) = ray.geomID[k];
721
722 *instID(0, ofs) = ray.instID[0][k];
723#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
724 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
725 *instID(l, ofs) = ray.instID[l][k];
726#endif
727 }
728#endif
729 }
730 }
731
732 template<int K>
733 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
734 {
735 vbool<K> valid = valid_i;
736 valid &= (ray.tfar < 0.0f);
737
738 if (likely(any(valid)))
739 {
740#if defined(__AVX512F__)
741 vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
742#else
743 size_t valid_bits = movemask(valid);
744 while (valid_bits != 0)
745 {
746 const size_t k = bscf(valid_bits);
747 const size_t ofs = offset[k];
748
749 *tfar(ofs) = ray.tfar[k];
750 }
751#endif
752 }
753 }
754
755 char* __restrict__ ptr;
756 size_t N;
757 };
758
759 template<size_t MAX_K>
760 struct StackRayStreamSOA : public RayStreamSOA
761 {
762 __forceinline StackRayStreamSOA(size_t K)
763 : RayStreamSOA(data, K) { assert(K <= MAX_K); }
764
765 char data[MAX_K / 4 * sizeof(RayHit4)];
766 };
767
768
769 struct RayStreamSOP
770 {
771 template<class T>
772 __forceinline void init(T& t)
773 {
774 org_x = (float*)&t.org.x;
775 org_y = (float*)&t.org.y;
776 org_z = (float*)&t.org.z;
777 tnear = (float*)&t.tnear;
778 dir_x = (float*)&t.dir.x;
779 dir_y = (float*)&t.dir.y;
780 dir_z = (float*)&t.dir.z;
781 time = (float*)&t.time;
782 tfar = (float*)&t.tfar;
783 mask = (unsigned int*)&t.mask;
784 id = (unsigned int*)&t.id;
785 flags = (unsigned int*)&t.flags;
786
787 Ng_x = (float*)&t.Ng.x;
788 Ng_y = (float*)&t.Ng.y;
789 Ng_z = (float*)&t.Ng.z;
790 u = (float*)&t.u;
791 v = (float*)&t.v;
792 primID = (unsigned int*)&t.primID;
793 geomID = (unsigned int*)&t.geomID;
794
795 for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
796 instID[l] = (unsigned int*)&t.instID[l];
797 }
798
799 __forceinline Ray getRayByOffset(size_t offset)
800 {
801 Ray ray;
802 ray.org.x = *(float* __restrict__)((char*)org_x + offset);
803 ray.org.y = *(float* __restrict__)((char*)org_y + offset);
804 ray.org.z = *(float* __restrict__)((char*)org_z + offset);
805 ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
806 ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
807 ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
808 ray.tfar = *(float* __restrict__)((char*)tfar + offset);
809 ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
810 ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
811 ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
812 ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
813 ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
814 return ray;
815 }
816
817 template<int K>
818 __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
819 {
820 RayK<K> ray;
821 ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
822 ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
823 ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
824 ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
825 ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
826 ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
827 ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
828 ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
829 ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
830 ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
831 ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
832 ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
833 return ray;
834 }
835
836 template<int K>
837 __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
838 {
839 Vec3vf<K> dir;
840 dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
841 dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
842 dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
843 return dir;
844 }
845
846 __forceinline void setHitByOffset(size_t offset, const RayHit& ray)
847 {
848 if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
849 {
850 *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
851
852 if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
853 if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
854 if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
855 *(float* __restrict__)((char*)u + offset) = ray.u;
856 *(float* __restrict__)((char*)v + offset) = ray.v;
857 *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
858 *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
859
860 if (likely(instID[0])) {
861 *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
862#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
863 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l)
864 *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
865#endif
866 }
867 }
868 }
869
870 __forceinline void setHitByOffset(size_t offset, const Ray& ray)
871 {
872 *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
873 }
874
875 template<int K>
876 __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
877 {
878 vbool<K> valid = valid_i;
879 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
880
881 if (likely(any(valid)))
882 {
883 vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
884
885 if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
886 if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
887 if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
888 vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
889 vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
890 vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
891 vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
892
893 if (likely(instID[0])) {
894 vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
895#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
896 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
897 vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
898#endif
899 }
900 }
901 }
902
903 template<int K>
904 __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
905 {
906 vbool<K> valid = valid_i;
907 valid &= (ray.tfar < 0.0f);
908
909 if (likely(any(valid)))
910 vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
911 }
912
913 __forceinline size_t getOctantByOffset(size_t offset)
914 {
915 const float dx = *(float* __restrict__)((char*)dir_x + offset);
916 const float dy = *(float* __restrict__)((char*)dir_y + offset);
917 const float dz = *(float* __restrict__)((char*)dir_z + offset);
918 const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
919 return octantID;
920 }
921
922 __forceinline bool isValidByOffset(size_t offset)
923 {
924 const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
925 const float ffar = *(float* __restrict__)((char*)tfar + offset);
926 return nnear <= ffar;
927 }
928
929 template<int K>
930 __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
931 {
932 const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
933 const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
934 return nnear <= ffar;
935 }
936
937 template<int K>
938 __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
939 {
940 RayK<K> ray;
941
942#if defined(__AVX2__)
943 ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
944 ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
945 ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
946 ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
947 ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
948 ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
949 ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
950 ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
951 ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
952 ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
953 ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
954 ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
955#else
956 ray.org = zero;
957 ray.tnear() = zero;
958 ray.dir = zero;
959 ray.tfar = zero;
960 ray.time() = zero;
961 ray.mask = zero;
962 ray.id = zero;
963 ray.flags = zero;
964
965 for (size_t k = 0; k < K; k++)
966 {
967 if (likely(valid[k]))
968 {
969 const size_t ofs = offset[k];
970
971 ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
972 ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
973 ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
974 ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
975 ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
976 ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
977 ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
978 ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
979 ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
980 ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
981 ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
982 ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
983 }
984 }
985#endif
986
987 return ray;
988 }
989
990 template<int K>
991 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
992 {
993 vbool<K> valid = valid_i;
994 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
995
996 if (likely(any(valid)))
997 {
998#if defined(__AVX512F__)
999 vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
1000
1001 if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
1002 if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
1003 if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
1004 vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
1005 vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
1006 vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
1007 vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
1008
1009 if (likely(instID[0])) {
1010 vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
1011#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1012 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
1013 vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
1014#endif
1015 }
1016#else
1017 size_t valid_bits = movemask(valid);
1018 while (valid_bits != 0)
1019 {
1020 const size_t k = bscf(valid_bits);
1021 const size_t ofs = offset[k];
1022
1023 *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
1024
1025 if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
1026 if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
1027 if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
1028 *(float* __restrict__)((char*)u + ofs) = ray.u[k];
1029 *(float* __restrict__)((char*)v + ofs) = ray.v[k];
1030 *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
1031 *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
1032
1033 if (likely(instID[0])) {
1034 *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
1035#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1036 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l)
1037 *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
1038#endif
1039 }
1040 }
1041#endif
1042 }
1043 }
1044
1045 template<int K>
1046 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
1047 {
1048 vbool<K> valid = valid_i;
1049 valid &= (ray.tfar < 0.0f);
1050
1051 if (likely(any(valid)))
1052 {
1053#if defined(__AVX512F__)
1054 vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
1055#else
1056 size_t valid_bits = movemask(valid);
1057 while (valid_bits != 0)
1058 {
1059 const size_t k = bscf(valid_bits);
1060 const size_t ofs = offset[k];
1061
1062 *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
1063 }
1064#endif
1065 }
1066 }
1067
1068 /* ray data */
1069 float* __restrict__ org_x; // x coordinate of ray origin
1070 float* __restrict__ org_y; // y coordinate of ray origin
1071 float* __restrict__ org_z; // z coordinate of ray origin
1072 float* __restrict__ tnear; // start of ray segment (optional)
1073
1074 float* __restrict__ dir_x; // x coordinate of ray direction
1075 float* __restrict__ dir_y; // y coordinate of ray direction
1076 float* __restrict__ dir_z; // z coordinate of ray direction
1077 float* __restrict__ time; // time of this ray for motion blur (optional)
1078
1079 float* __restrict__ tfar; // end of ray segment (set to hit distance)
1080 unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
1081 unsigned int* __restrict__ id; // ray ID
1082 unsigned int* __restrict__ flags; // ray flags
1083
1084 /* hit data */
1085 float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
1086 float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
1087 float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
1088
1089 float* __restrict__ u; // barycentric u coordinate of hit
1090 float* __restrict__ v; // barycentric v coordinate of hit
1091
1092 unsigned int* __restrict__ primID; // primitive ID
1093 unsigned int* __restrict__ geomID; // geometry ID
1094 unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID (optional)
1095 };
1096
1097
1098 struct RayStreamAOS
1099 {
1100 __forceinline RayStreamAOS(void* rays)
1101 : ptr((Ray*)rays) {}
1102
1103 __forceinline Ray& getRayByOffset(size_t offset)
1104 {
1105 return *(Ray*)((char*)ptr + offset);
1106 }
1107
1108 template<int K>
1109 __forceinline RayK<K> getRayByOffset(const vint<K>& offset);
1110
1111 template<int K>
1112 __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
1113 {
1114 const vint<K> valid_offset = select(valid, offset, vintx(zero));
1115 return getRayByOffset<K>(valid_offset);
1116 }
1117
1118 template<int K>
1119 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
1120 {
1121 vbool<K> valid = valid_i;
1122 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
1123
1124 if (likely(any(valid)))
1125 {
1126#if defined(__AVX512F__)
1127 vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
1128 vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
1129 vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
1130 vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
1131 vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
1132 vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
1133 vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
1134 vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
1135
1136 vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
1137#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1138 for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l)
1139 vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
1140#endif
1141#else
1142 size_t valid_bits = movemask(valid);
1143 while (valid_bits != 0)
1144 {
1145 const size_t k = bscf(valid_bits);
1146 RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
1147 ray_k->tfar = ray.tfar[k];
1148 ray_k->Ng.x = ray.Ng.x[k];
1149 ray_k->Ng.y = ray.Ng.y[k];
1150 ray_k->Ng.z = ray.Ng.z[k];
1151 ray_k->u = ray.u[k];
1152 ray_k->v = ray.v[k];
1153 ray_k->primID = ray.primID[k];
1154 ray_k->geomID = ray.geomID[k];
1155
1156 instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
1157 }
1158#endif
1159 }
1160 }
1161
1162 template<int K>
1163 __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
1164 {
1165 vbool<K> valid = valid_i;
1166 valid &= (ray.tfar < 0.0f);
1167
1168 if (likely(any(valid)))
1169 {
1170#if defined(__AVX512F__)
1171 vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
1172#else
1173 size_t valid_bits = movemask(valid);
1174 while (valid_bits != 0)
1175 {
1176 const size_t k = bscf(valid_bits);
1177 Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
1178 ray_k->tfar = ray.tfar[k];
1179 }
1180#endif
1181 }
1182 }
1183
1184 Ray* __restrict__ ptr;
1185 };
1186
1187 template<>
1188 __forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)
1189 {
1190 Ray4 ray;
1191
1192 /* load and transpose: org.x, org.y, org.z, tnear */
1193 const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
1194 const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
1195 const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
1196 const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
1197
1198 transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
1199
1200 /* load and transpose: dir.x, dir.y, dir.z, time */
1201 const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
1202 const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
1203 const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
1204 const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
1205
1206 transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1207
1208 /* load and transpose: tfar, mask, id, flags */
1209 const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
1210 const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
1211 const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
1212 const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
1213
1214 vfloat4 maskf, idf, flagsf;
1215 transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
1216 ray.mask = asInt(maskf);
1217 ray.id = asInt(idf);
1218 ray.flags = asInt(flagsf);
1219
1220 return ray;
1221 }
1222
1223#if defined(__AVX__)
1224 template<>
1225 __forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)
1226 {
1227 Ray8 ray;
1228
1229 /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1230 const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
1231 const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
1232 const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
1233 const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
1234 const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
1235 const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
1236 const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
1237 const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
1238
1239 transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1240
1241 /* load and transpose: tfar, mask, id, flags */
1242 const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
1243 const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
1244 const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
1245 const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
1246 const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
1247 const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
1248 const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
1249 const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
1250
1251 vfloat8 maskf, idf, flagsf;
1252 transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
1253 ray.mask = asInt(maskf);
1254 ray.id = asInt(idf);
1255 ray.flags = asInt(flagsf);
1256
1257 return ray;
1258 }
1259#endif
1260
1261#if defined(__AVX512F__)
1262 template<>
1263 __forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)
1264 {
1265 Ray16 ray;
1266
1267 /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1268 const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
1269 const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
1270 const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
1271 const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
1272 const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
1273 const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
1274 const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
1275 const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
1276 const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
1277 const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
1278 const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
1279 const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
1280 const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
1281 const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
1282 const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
1283 const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
1284
1285 transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
1286 ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1287
1288 /* load and transpose: tfar, mask, id, flags */
1289 const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
1290 const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
1291 const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
1292 const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
1293 const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
1294 const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
1295 const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
1296 const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
1297 const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
1298 const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
1299 const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
1300 const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
1301 const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
1302 const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
1303 const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
1304 const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
1305
1306 vfloat16 maskf, idf, flagsf;
1307 transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
1308 ray.tfar, maskf, idf, flagsf);
1309 ray.mask = asInt(maskf);
1310 ray.id = asInt(idf);
1311 ray.flags = asInt(flagsf);
1312
1313 return ray;
1314 }
1315#endif
1316
1317
1318 struct RayStreamAOP
1319 {
1320 __forceinline RayStreamAOP(void* rays)
1321 : ptr((Ray**)rays) {}
1322
1323 __forceinline Ray& getRayByIndex(size_t index)
1324 {
1325 return *ptr[index];
1326 }
1327
1328 template<int K>
1329 __forceinline RayK<K> getRayByIndex(const vint<K>& index);
1330
1331 template<int K>
1332 __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
1333 {
1334 const vint<K> valid_index = select(valid, index, vintx(zero));
1335 return getRayByIndex<K>(valid_index);
1336 }
1337
1338 template<int K>
1339 __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
1340 {
1341 vbool<K> valid = valid_i;
1342 valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
1343
1344 if (likely(any(valid)))
1345 {
1346 size_t valid_bits = movemask(valid);
1347 while (valid_bits != 0)
1348 {
1349 const size_t k = bscf(valid_bits);
1350 RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
1351
1352 ray_k->tfar = ray.tfar[k];
1353 ray_k->Ng.x = ray.Ng.x[k];
1354 ray_k->Ng.y = ray.Ng.y[k];
1355 ray_k->Ng.z = ray.Ng.z[k];
1356 ray_k->u = ray.u[k];
1357 ray_k->v = ray.v[k];
1358 ray_k->primID = ray.primID[k];
1359 ray_k->geomID = ray.geomID[k];
1360 instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
1361 }
1362 }
1363 }
1364
1365 template<int K>
1366 __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
1367 {
1368 vbool<K> valid = valid_i;
1369 valid &= (ray.tfar < 0.0f);
1370
1371 if (likely(any(valid)))
1372 {
1373 size_t valid_bits = movemask(valid);
1374 while (valid_bits != 0)
1375 {
1376 const size_t k = bscf(valid_bits);
1377 Ray* __restrict__ ray_k = ptr[index[k]];
1378
1379 ray_k->tfar = ray.tfar[k];
1380 }
1381 }
1382 }
1383
1384 Ray** __restrict__ ptr;
1385 };
1386
1387 template<>
1388 __forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)
1389 {
1390 Ray4 ray;
1391
1392 /* load and transpose: org.x, org.y, org.z, tnear */
1393 const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
1394 const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
1395 const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
1396 const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
1397
1398 transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
1399
1400 /* load and transpose: dir.x, dir.y, dir.z, time */
1401 const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
1402 const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
1403 const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
1404 const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
1405
1406 transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1407
1408 /* load and transpose: tfar, mask, id, flags */
1409 const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1410 const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1411 const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1412 const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1413
1414 vfloat4 maskf, idf, flagsf;
1415 transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
1416 ray.mask = asInt(maskf);
1417 ray.id = asInt(idf);
1418 ray.flags = asInt(flagsf);
1419
1420 return ray;
1421 }
1422
1423#if defined(__AVX__)
1424 template<>
1425 __forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)
1426 {
1427 Ray8 ray;
1428
1429 /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1430 const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
1431 const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
1432 const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
1433 const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
1434 const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
1435 const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
1436 const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
1437 const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
1438
1439 transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1440
1441 /* load and transpose: tfar, mask, id, flags */
1442 const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1443 const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1444 const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1445 const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1446 const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
1447 const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
1448 const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
1449 const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
1450
1451 vfloat8 maskf, idf, flagsf;
1452 transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
1453 ray.mask = asInt(maskf);
1454 ray.id = asInt(idf);
1455 ray.flags = asInt(flagsf);
1456
1457 return ray;
1458 }
1459#endif
1460
1461#if defined(__AVX512F__)
1462 template<>
1463 __forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)
1464 {
1465 Ray16 ray;
1466
1467 /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1468 const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
1469 const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
1470 const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
1471 const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
1472 const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
1473 const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
1474 const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
1475 const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
1476 const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
1477 const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
1478 const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
1479 const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
1480 const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
1481 const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
1482 const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
1483 const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
1484
1485 transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
1486 ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1487
1488 /* load and transpose: tfar, mask, id, flags */
1489 const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1490 const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1491 const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1492 const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1493 const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
1494 const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
1495 const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
1496 const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
1497 const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
1498 const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
1499 const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
1500 const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
1501 const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
1502 const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
1503 const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
1504 const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
1505
1506 vfloat16 maskf, idf, flagsf;
1507 transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
1508 ray.tfar, maskf, idf, flagsf);
1509
1510 ray.mask = asInt(maskf);
1511 ray.id = asInt(idf);
1512 ray.flags = asInt(flagsf);
1513
1514 return ray;
1515 }
1516#endif
1517}
1518