| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../../common/sys/platform.h" |
| 7 | #include "../../common/sys/sysinfo.h" |
| 8 | #include "../../common/sys/thread.h" |
| 9 | #include "../../common/sys/alloc.h" |
| 10 | #include "../../common/sys/ref.h" |
| 11 | #include "../../common/sys/intrinsics.h" |
| 12 | #include "../../common/sys/atomic.h" |
| 13 | #include "../../common/sys/mutex.h" |
| 14 | #include "../../common/sys/vector.h" |
| 15 | #include "../../common/sys/array.h" |
| 16 | #include "../../common/sys/string.h" |
| 17 | #include "../../common/sys/regression.h" |
| 18 | #include "../../common/sys/vector.h" |
| 19 | |
| 20 | #include "../../common/math/math.h" |
| 21 | #include "../../common/math/transcendental.h" |
| 22 | #include "../../common/simd/simd.h" |
| 23 | #include "../../common/math/vec2.h" |
| 24 | #include "../../common/math/vec3.h" |
| 25 | #include "../../common/math/vec4.h" |
| 26 | #include "../../common/math/vec2fa.h" |
| 27 | #include "../../common/math/vec3fa.h" |
| 28 | #include "../../common/math/interval.h" |
| 29 | #include "../../common/math/bbox.h" |
| 30 | #include "../../common/math/obbox.h" |
| 31 | #include "../../common/math/lbbox.h" |
| 32 | #include "../../common/math/linearspace2.h" |
| 33 | #include "../../common/math/linearspace3.h" |
| 34 | #include "../../common/math/affinespace.h" |
| 35 | #include "../../common/math/range.h" |
| 36 | #include "../../common/lexers/tokenstream.h" |
| 37 | |
| 38 | #include "../../common/tasking/taskscheduler.h" |
| 39 | |
| 40 | #define COMMA , |
| 41 | |
| 42 | #include "../config.h" |
| 43 | #include "isa.h" |
| 44 | #include "stat.h" |
| 45 | #include "profile.h" |
| 46 | #include "rtcore.h" |
| 47 | #include "vector.h" |
| 48 | #include "state.h" |
| 49 | #include "instance_stack.h" |
| 50 | |
| 51 | #include <vector> |
| 52 | #include <map> |
| 53 | #include <algorithm> |
| 54 | #include <functional> |
| 55 | #include <utility> |
| 56 | #include <sstream> |
| 57 | |
| 58 | namespace embree |
| 59 | { |
| 60 | //////////////////////////////////////////////////////////////////////////////// |
| 61 | /// Vec2 shortcuts |
| 62 | //////////////////////////////////////////////////////////////////////////////// |
| 63 | |
| 64 | template<int N> using Vec2vf = Vec2<vfloat<N>>; |
| 65 | template<int N> using Vec2vd = Vec2<vdouble<N>>; |
| 66 | template<int N> using Vec2vr = Vec2<vreal<N>>; |
| 67 | template<int N> using Vec2vi = Vec2<vint<N>>; |
| 68 | template<int N> using Vec2vl = Vec2<vllong<N>>; |
| 69 | template<int N> using Vec2vb = Vec2<vbool<N>>; |
| 70 | template<int N> using Vec2vbf = Vec2<vboolf<N>>; |
| 71 | template<int N> using Vec2vbd = Vec2<vboold<N>>; |
| 72 | |
| 73 | typedef Vec2<vfloat4> Vec2vf4; |
| 74 | typedef Vec2<vdouble4> Vec2vd4; |
| 75 | typedef Vec2<vreal4> Vec2vr4; |
| 76 | typedef Vec2<vint4> Vec2vi4; |
| 77 | typedef Vec2<vllong4> Vec2vl4; |
| 78 | typedef Vec2<vbool4> Vec2vb4; |
| 79 | typedef Vec2<vboolf4> Vec2vbf4; |
| 80 | typedef Vec2<vboold4> Vec2vbd4; |
| 81 | |
| 82 | typedef Vec2<vfloat8> Vec2vf8; |
| 83 | typedef Vec2<vdouble8> Vec2vd8; |
| 84 | typedef Vec2<vreal8> Vec2vr8; |
| 85 | typedef Vec2<vint8> Vec2vi8; |
| 86 | typedef Vec2<vllong8> Vec2vl8; |
| 87 | typedef Vec2<vbool8> Vec2vb8; |
| 88 | typedef Vec2<vboolf8> Vec2vbf8; |
| 89 | typedef Vec2<vboold8> Vec2vbd8; |
| 90 | |
| 91 | typedef Vec2<vfloat16> Vec2vf16; |
| 92 | typedef Vec2<vdouble16> Vec2vd16; |
| 93 | typedef Vec2<vreal16> Vec2vr16; |
| 94 | typedef Vec2<vint16> Vec2vi16; |
| 95 | typedef Vec2<vllong16> Vec2vl16; |
| 96 | typedef Vec2<vbool16> Vec2vb16; |
| 97 | typedef Vec2<vboolf16> Vec2vbf16; |
| 98 | typedef Vec2<vboold16> Vec2vbd16; |
| 99 | |
| 100 | typedef Vec2<vfloatx> Vec2vfx; |
| 101 | typedef Vec2<vdoublex> Vec2vdx; |
| 102 | typedef Vec2<vrealx> Vec2vrx; |
| 103 | typedef Vec2<vintx> Vec2vix; |
| 104 | typedef Vec2<vllongx> Vec2vlx; |
| 105 | typedef Vec2<vboolx> Vec2vbx; |
| 106 | typedef Vec2<vboolfx> Vec2vbfx; |
| 107 | typedef Vec2<vbooldx> Vec2vbdx; |
| 108 | |
| 109 | //////////////////////////////////////////////////////////////////////////////// |
| 110 | /// Vec3 shortcuts |
| 111 | //////////////////////////////////////////////////////////////////////////////// |
| 112 | |
| 113 | template<int N> using Vec3vf = Vec3<vfloat<N>>; |
| 114 | template<int N> using Vec3vd = Vec3<vdouble<N>>; |
| 115 | template<int N> using Vec3vr = Vec3<vreal<N>>; |
| 116 | template<int N> using Vec3vi = Vec3<vint<N>>; |
| 117 | template<int N> using Vec3vl = Vec3<vllong<N>>; |
| 118 | template<int N> using Vec3vb = Vec3<vbool<N>>; |
| 119 | template<int N> using Vec3vbf = Vec3<vboolf<N>>; |
| 120 | template<int N> using Vec3vbd = Vec3<vboold<N>>; |
| 121 | |
| 122 | typedef Vec3<vfloat4> Vec3vf4; |
| 123 | typedef Vec3<vdouble4> Vec3vd4; |
| 124 | typedef Vec3<vreal4> Vec3vr4; |
| 125 | typedef Vec3<vint4> Vec3vi4; |
| 126 | typedef Vec3<vllong4> Vec3vl4; |
| 127 | typedef Vec3<vbool4> Vec3vb4; |
| 128 | typedef Vec3<vboolf4> Vec3vbf4; |
| 129 | typedef Vec3<vboold4> Vec3vbd4; |
| 130 | |
| 131 | typedef Vec3<vfloat8> Vec3vf8; |
| 132 | typedef Vec3<vdouble8> Vec3vd8; |
| 133 | typedef Vec3<vreal8> Vec3vr8; |
| 134 | typedef Vec3<vint8> Vec3vi8; |
| 135 | typedef Vec3<vllong8> Vec3vl8; |
| 136 | typedef Vec3<vbool8> Vec3vb8; |
| 137 | typedef Vec3<vboolf8> Vec3vbf8; |
| 138 | typedef Vec3<vboold8> Vec3vbd8; |
| 139 | |
| 140 | typedef Vec3<vfloat16> Vec3vf16; |
| 141 | typedef Vec3<vdouble16> Vec3vd16; |
| 142 | typedef Vec3<vreal16> Vec3vr16; |
| 143 | typedef Vec3<vint16> Vec3vi16; |
| 144 | typedef Vec3<vllong16> Vec3vl16; |
| 145 | typedef Vec3<vbool16> Vec3vb16; |
| 146 | typedef Vec3<vboolf16> Vec3vbf16; |
| 147 | typedef Vec3<vboold16> Vec3vbd16; |
| 148 | |
| 149 | typedef Vec3<vfloatx> Vec3vfx; |
| 150 | typedef Vec3<vdoublex> Vec3vdx; |
| 151 | typedef Vec3<vrealx> Vec3vrx; |
| 152 | typedef Vec3<vintx> Vec3vix; |
| 153 | typedef Vec3<vllongx> Vec3vlx; |
| 154 | typedef Vec3<vboolx> Vec3vbx; |
| 155 | typedef Vec3<vboolfx> Vec3vbfx; |
| 156 | typedef Vec3<vbooldx> Vec3vbdx; |
| 157 | |
| 158 | //////////////////////////////////////////////////////////////////////////////// |
| 159 | /// Vec4 shortcuts |
| 160 | //////////////////////////////////////////////////////////////////////////////// |
| 161 | |
| 162 | template<int N> using Vec4vf = Vec4<vfloat<N>>; |
| 163 | template<int N> using Vec4vd = Vec4<vdouble<N>>; |
| 164 | template<int N> using Vec4vr = Vec4<vreal<N>>; |
| 165 | template<int N> using Vec4vi = Vec4<vint<N>>; |
| 166 | template<int N> using Vec4vl = Vec4<vllong<N>>; |
| 167 | template<int N> using Vec4vb = Vec4<vbool<N>>; |
| 168 | template<int N> using Vec4vbf = Vec4<vboolf<N>>; |
| 169 | template<int N> using Vec4vbd = Vec4<vboold<N>>; |
| 170 | |
| 171 | typedef Vec4<vfloat4> Vec4vf4; |
| 172 | typedef Vec4<vdouble4> Vec4vd4; |
| 173 | typedef Vec4<vreal4> Vec4vr4; |
| 174 | typedef Vec4<vint4> Vec4vi4; |
| 175 | typedef Vec4<vllong4> Vec4vl4; |
| 176 | typedef Vec4<vbool4> Vec4vb4; |
| 177 | typedef Vec4<vboolf4> Vec4vbf4; |
| 178 | typedef Vec4<vboold4> Vec4vbd4; |
| 179 | |
| 180 | typedef Vec4<vfloat8> Vec4vf8; |
| 181 | typedef Vec4<vdouble8> Vec4vd8; |
| 182 | typedef Vec4<vreal8> Vec4vr8; |
| 183 | typedef Vec4<vint8> Vec4vi8; |
| 184 | typedef Vec4<vllong8> Vec4vl8; |
| 185 | typedef Vec4<vbool8> Vec4vb8; |
| 186 | typedef Vec4<vboolf8> Vec4vbf8; |
| 187 | typedef Vec4<vboold8> Vec4vbd8; |
| 188 | |
| 189 | typedef Vec4<vfloat16> Vec4vf16; |
| 190 | typedef Vec4<vdouble16> Vec4vd16; |
| 191 | typedef Vec4<vreal16> Vec4vr16; |
| 192 | typedef Vec4<vint16> Vec4vi16; |
| 193 | typedef Vec4<vllong16> Vec4vl16; |
| 194 | typedef Vec4<vbool16> Vec4vb16; |
| 195 | typedef Vec4<vboolf16> Vec4vbf16; |
| 196 | typedef Vec4<vboold16> Vec4vbd16; |
| 197 | |
| 198 | typedef Vec4<vfloatx> Vec4vfx; |
| 199 | typedef Vec4<vdoublex> Vec4vdx; |
| 200 | typedef Vec4<vrealx> Vec4vrx; |
| 201 | typedef Vec4<vintx> Vec4vix; |
| 202 | typedef Vec4<vllongx> Vec4vlx; |
| 203 | typedef Vec4<vboolx> Vec4vbx; |
| 204 | typedef Vec4<vboolfx> Vec4vbfx; |
| 205 | typedef Vec4<vbooldx> Vec4vbdx; |
| 206 | |
| 207 | //////////////////////////////////////////////////////////////////////////////// |
| 208 | /// Other shortcuts |
| 209 | //////////////////////////////////////////////////////////////////////////////// |
| 210 | |
| 211 | template<int N> using BBox3vf = BBox<Vec3vf<N>>; |
| 212 | typedef BBox<Vec3vf4> BBox3vf4; |
| 213 | typedef BBox<Vec3vf8> BBox3vf8; |
| 214 | typedef BBox<Vec3vf16> BBox3vf16; |
| 215 | |
| 216 | /* calculate time segment itime and fractional time ftime */ |
| 217 | __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime) |
| 218 | { |
| 219 | const float timeScaled = time * numTimeSegments; |
| 220 | const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); |
| 221 | ftime = timeScaled - itimef; |
| 222 | return int(itimef); |
| 223 | } |
| 224 | |
| 225 | __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime) |
| 226 | { |
| 227 | const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; |
| 228 | const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); |
| 229 | ftime = timeScaled - itimef; |
| 230 | return int(itimef); |
| 231 | } |
| 232 | |
| 233 | template<int N> |
| 234 | __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) |
| 235 | { |
| 236 | const vfloat<N> timeScaled = time * numTimeSegments; |
| 237 | const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); |
| 238 | ftime = timeScaled - itimef; |
| 239 | return vint<N>(itimef); |
| 240 | } |
| 241 | |
| 242 | template<int N> |
| 243 | __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) |
| 244 | { |
| 245 | const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; |
| 246 | const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); |
| 247 | ftime = timeScaled - itimef; |
| 248 | return vint<N>(itimef); |
| 249 | } |
| 250 | |
| 251 | /* calculate overlapping time segment range */ |
| 252 | __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments) |
| 253 | { |
| 254 | const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step |
| 255 | const float round_down = 1.0f-2.0f*float(ulp); |
| 256 | const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f); |
| 257 | const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments); |
| 258 | return make_range(itime_lower, itime_upper); |
| 259 | } |
| 260 | |
| 261 | /* calculate overlapping time segment range */ |
| 262 | __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments) |
| 263 | { |
| 264 | const float lower = (range.lower-time_range.lower)/time_range.size(); |
| 265 | const float upper = (range.upper-time_range.lower)/time_range.size(); |
| 266 | return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments); |
| 267 | } |
| 268 | } |
| 269 | |