1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "../../common/sys/platform.h"
7#include "../../common/sys/sysinfo.h"
8#include "../../common/sys/thread.h"
9#include "../../common/sys/alloc.h"
10#include "../../common/sys/ref.h"
11#include "../../common/sys/intrinsics.h"
12#include "../../common/sys/atomic.h"
13#include "../../common/sys/mutex.h"
14#include "../../common/sys/vector.h"
15#include "../../common/sys/array.h"
16#include "../../common/sys/string.h"
17#include "../../common/sys/regression.h"
18#include "../../common/sys/vector.h"
19
20#include "../../common/math/math.h"
21#include "../../common/math/transcendental.h"
22#include "../../common/simd/simd.h"
23#include "../../common/math/vec2.h"
24#include "../../common/math/vec3.h"
25#include "../../common/math/vec4.h"
26#include "../../common/math/vec2fa.h"
27#include "../../common/math/vec3fa.h"
28#include "../../common/math/interval.h"
29#include "../../common/math/bbox.h"
30#include "../../common/math/obbox.h"
31#include "../../common/math/lbbox.h"
32#include "../../common/math/linearspace2.h"
33#include "../../common/math/linearspace3.h"
34#include "../../common/math/affinespace.h"
35#include "../../common/math/range.h"
36#include "../../common/lexers/tokenstream.h"
37
38#include "../../common/tasking/taskscheduler.h"
39
40#define COMMA ,
41
42#include "../config.h"
43#include "isa.h"
44#include "stat.h"
45#include "profile.h"
46#include "rtcore.h"
47#include "vector.h"
48#include "state.h"
49#include "instance_stack.h"
50
51#include <vector>
52#include <map>
53#include <algorithm>
54#include <functional>
55#include <utility>
56#include <sstream>
57
58namespace embree
59{
60 ////////////////////////////////////////////////////////////////////////////////
61 /// Vec2 shortcuts
62 ////////////////////////////////////////////////////////////////////////////////
63
64 template<int N> using Vec2vf = Vec2<vfloat<N>>;
65 template<int N> using Vec2vd = Vec2<vdouble<N>>;
66 template<int N> using Vec2vr = Vec2<vreal<N>>;
67 template<int N> using Vec2vi = Vec2<vint<N>>;
68 template<int N> using Vec2vl = Vec2<vllong<N>>;
69 template<int N> using Vec2vb = Vec2<vbool<N>>;
70 template<int N> using Vec2vbf = Vec2<vboolf<N>>;
71 template<int N> using Vec2vbd = Vec2<vboold<N>>;
72
73 typedef Vec2<vfloat4> Vec2vf4;
74 typedef Vec2<vdouble4> Vec2vd4;
75 typedef Vec2<vreal4> Vec2vr4;
76 typedef Vec2<vint4> Vec2vi4;
77 typedef Vec2<vllong4> Vec2vl4;
78 typedef Vec2<vbool4> Vec2vb4;
79 typedef Vec2<vboolf4> Vec2vbf4;
80 typedef Vec2<vboold4> Vec2vbd4;
81
82 typedef Vec2<vfloat8> Vec2vf8;
83 typedef Vec2<vdouble8> Vec2vd8;
84 typedef Vec2<vreal8> Vec2vr8;
85 typedef Vec2<vint8> Vec2vi8;
86 typedef Vec2<vllong8> Vec2vl8;
87 typedef Vec2<vbool8> Vec2vb8;
88 typedef Vec2<vboolf8> Vec2vbf8;
89 typedef Vec2<vboold8> Vec2vbd8;
90
91 typedef Vec2<vfloat16> Vec2vf16;
92 typedef Vec2<vdouble16> Vec2vd16;
93 typedef Vec2<vreal16> Vec2vr16;
94 typedef Vec2<vint16> Vec2vi16;
95 typedef Vec2<vllong16> Vec2vl16;
96 typedef Vec2<vbool16> Vec2vb16;
97 typedef Vec2<vboolf16> Vec2vbf16;
98 typedef Vec2<vboold16> Vec2vbd16;
99
100 typedef Vec2<vfloatx> Vec2vfx;
101 typedef Vec2<vdoublex> Vec2vdx;
102 typedef Vec2<vrealx> Vec2vrx;
103 typedef Vec2<vintx> Vec2vix;
104 typedef Vec2<vllongx> Vec2vlx;
105 typedef Vec2<vboolx> Vec2vbx;
106 typedef Vec2<vboolfx> Vec2vbfx;
107 typedef Vec2<vbooldx> Vec2vbdx;
108
109 ////////////////////////////////////////////////////////////////////////////////
110 /// Vec3 shortcuts
111 ////////////////////////////////////////////////////////////////////////////////
112
113 template<int N> using Vec3vf = Vec3<vfloat<N>>;
114 template<int N> using Vec3vd = Vec3<vdouble<N>>;
115 template<int N> using Vec3vr = Vec3<vreal<N>>;
116 template<int N> using Vec3vi = Vec3<vint<N>>;
117 template<int N> using Vec3vl = Vec3<vllong<N>>;
118 template<int N> using Vec3vb = Vec3<vbool<N>>;
119 template<int N> using Vec3vbf = Vec3<vboolf<N>>;
120 template<int N> using Vec3vbd = Vec3<vboold<N>>;
121
122 typedef Vec3<vfloat4> Vec3vf4;
123 typedef Vec3<vdouble4> Vec3vd4;
124 typedef Vec3<vreal4> Vec3vr4;
125 typedef Vec3<vint4> Vec3vi4;
126 typedef Vec3<vllong4> Vec3vl4;
127 typedef Vec3<vbool4> Vec3vb4;
128 typedef Vec3<vboolf4> Vec3vbf4;
129 typedef Vec3<vboold4> Vec3vbd4;
130
131 typedef Vec3<vfloat8> Vec3vf8;
132 typedef Vec3<vdouble8> Vec3vd8;
133 typedef Vec3<vreal8> Vec3vr8;
134 typedef Vec3<vint8> Vec3vi8;
135 typedef Vec3<vllong8> Vec3vl8;
136 typedef Vec3<vbool8> Vec3vb8;
137 typedef Vec3<vboolf8> Vec3vbf8;
138 typedef Vec3<vboold8> Vec3vbd8;
139
140 typedef Vec3<vfloat16> Vec3vf16;
141 typedef Vec3<vdouble16> Vec3vd16;
142 typedef Vec3<vreal16> Vec3vr16;
143 typedef Vec3<vint16> Vec3vi16;
144 typedef Vec3<vllong16> Vec3vl16;
145 typedef Vec3<vbool16> Vec3vb16;
146 typedef Vec3<vboolf16> Vec3vbf16;
147 typedef Vec3<vboold16> Vec3vbd16;
148
149 typedef Vec3<vfloatx> Vec3vfx;
150 typedef Vec3<vdoublex> Vec3vdx;
151 typedef Vec3<vrealx> Vec3vrx;
152 typedef Vec3<vintx> Vec3vix;
153 typedef Vec3<vllongx> Vec3vlx;
154 typedef Vec3<vboolx> Vec3vbx;
155 typedef Vec3<vboolfx> Vec3vbfx;
156 typedef Vec3<vbooldx> Vec3vbdx;
157
158 ////////////////////////////////////////////////////////////////////////////////
159 /// Vec4 shortcuts
160 ////////////////////////////////////////////////////////////////////////////////
161
162 template<int N> using Vec4vf = Vec4<vfloat<N>>;
163 template<int N> using Vec4vd = Vec4<vdouble<N>>;
164 template<int N> using Vec4vr = Vec4<vreal<N>>;
165 template<int N> using Vec4vi = Vec4<vint<N>>;
166 template<int N> using Vec4vl = Vec4<vllong<N>>;
167 template<int N> using Vec4vb = Vec4<vbool<N>>;
168 template<int N> using Vec4vbf = Vec4<vboolf<N>>;
169 template<int N> using Vec4vbd = Vec4<vboold<N>>;
170
171 typedef Vec4<vfloat4> Vec4vf4;
172 typedef Vec4<vdouble4> Vec4vd4;
173 typedef Vec4<vreal4> Vec4vr4;
174 typedef Vec4<vint4> Vec4vi4;
175 typedef Vec4<vllong4> Vec4vl4;
176 typedef Vec4<vbool4> Vec4vb4;
177 typedef Vec4<vboolf4> Vec4vbf4;
178 typedef Vec4<vboold4> Vec4vbd4;
179
180 typedef Vec4<vfloat8> Vec4vf8;
181 typedef Vec4<vdouble8> Vec4vd8;
182 typedef Vec4<vreal8> Vec4vr8;
183 typedef Vec4<vint8> Vec4vi8;
184 typedef Vec4<vllong8> Vec4vl8;
185 typedef Vec4<vbool8> Vec4vb8;
186 typedef Vec4<vboolf8> Vec4vbf8;
187 typedef Vec4<vboold8> Vec4vbd8;
188
189 typedef Vec4<vfloat16> Vec4vf16;
190 typedef Vec4<vdouble16> Vec4vd16;
191 typedef Vec4<vreal16> Vec4vr16;
192 typedef Vec4<vint16> Vec4vi16;
193 typedef Vec4<vllong16> Vec4vl16;
194 typedef Vec4<vbool16> Vec4vb16;
195 typedef Vec4<vboolf16> Vec4vbf16;
196 typedef Vec4<vboold16> Vec4vbd16;
197
198 typedef Vec4<vfloatx> Vec4vfx;
199 typedef Vec4<vdoublex> Vec4vdx;
200 typedef Vec4<vrealx> Vec4vrx;
201 typedef Vec4<vintx> Vec4vix;
202 typedef Vec4<vllongx> Vec4vlx;
203 typedef Vec4<vboolx> Vec4vbx;
204 typedef Vec4<vboolfx> Vec4vbfx;
205 typedef Vec4<vbooldx> Vec4vbdx;
206
207 ////////////////////////////////////////////////////////////////////////////////
208 /// Other shortcuts
209 ////////////////////////////////////////////////////////////////////////////////
210
211 template<int N> using BBox3vf = BBox<Vec3vf<N>>;
212 typedef BBox<Vec3vf4> BBox3vf4;
213 typedef BBox<Vec3vf8> BBox3vf8;
214 typedef BBox<Vec3vf16> BBox3vf16;
215
216 /* calculate time segment itime and fractional time ftime */
217 __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
218 {
219 const float timeScaled = time * numTimeSegments;
220 const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
221 ftime = timeScaled - itimef;
222 return int(itimef);
223 }
224
225 __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
226 {
227 const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
228 const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
229 ftime = timeScaled - itimef;
230 return int(itimef);
231 }
232
233 template<int N>
234 __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
235 {
236 const vfloat<N> timeScaled = time * numTimeSegments;
237 const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
238 ftime = timeScaled - itimef;
239 return vint<N>(itimef);
240 }
241
242 template<int N>
243 __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
244 {
245 const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
246 const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
247 ftime = timeScaled - itimef;
248 return vint<N>(itimef);
249 }
250
251 /* calculate overlapping time segment range */
252 __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
253 {
254 const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
255 const float round_down = 1.0f-2.0f*float(ulp);
256 const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
257 const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
258 return make_range(itime_lower, itime_upper);
259 }
260
261 /* calculate overlapping time segment range */
262 __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
263 {
264 const float lower = (range.lower-time_range.lower)/time_range.size();
265 const float upper = (range.upper-time_range.lower)/time_range.size();
266 return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
267 }
268}
269