| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "math.h" |
| 7 | #include "vec3.h" |
| 8 | |
| 9 | namespace embree |
| 10 | { |
| 11 | //////////////////////////////////////////////////////////////////////////////// |
| 12 | /// Generic 4D vector Class |
| 13 | //////////////////////////////////////////////////////////////////////////////// |
| 14 | |
| 15 | template<typename T> struct Vec4 |
| 16 | { |
| 17 | enum { N = 4 }; |
| 18 | union { |
| 19 | struct { T x, y, z, w; }; |
| 20 | #if !(defined(__WIN32__) && _MSC_VER == 1800) // workaround for older VS 2013 compiler |
| 21 | T components[N]; |
| 22 | #endif |
| 23 | }; |
| 24 | |
| 25 | typedef T Scalar; |
| 26 | |
| 27 | //////////////////////////////////////////////////////////////////////////////// |
| 28 | /// Construction |
| 29 | //////////////////////////////////////////////////////////////////////////////// |
| 30 | |
| 31 | __forceinline Vec4( ) {} |
| 32 | __forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {} |
| 33 | __forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {} |
| 34 | __forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {} |
| 35 | |
| 36 | __forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; } |
| 37 | __forceinline Vec4( const Vec3fx& other ); |
| 38 | |
| 39 | template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {} |
| 40 | template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; } |
| 41 | |
| 42 | __forceinline Vec4& operator =(const Vec4& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; } |
| 43 | |
| 44 | __forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); } |
| 45 | |
| 46 | //////////////////////////////////////////////////////////////////////////////// |
| 47 | /// Constants |
| 48 | //////////////////////////////////////////////////////////////////////////////// |
| 49 | |
| 50 | __forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {} |
| 51 | __forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {} |
| 52 | __forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {} |
| 53 | __forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {} |
| 54 | |
| 55 | #if defined(__WIN32__) && (_MSC_VER == 1800) // workaround for older VS 2013 compiler |
| 56 | __forceinline const T& operator [](const size_t axis) const { assert(axis < 4); return (&x)[axis]; } |
| 57 | __forceinline T& operator [](const size_t axis) { assert(axis < 4); return (&x)[axis]; } |
| 58 | #else |
| 59 | __forceinline const T& operator [](const size_t axis ) const { assert(axis < 4); return components[axis]; } |
| 60 | __forceinline T& operator [](const size_t axis) { assert(axis < 4); return components[axis]; } |
| 61 | #endif |
| 62 | |
| 63 | //////////////////////////////////////////////////////////////////////////////// |
| 64 | /// Swizzles |
| 65 | //////////////////////////////////////////////////////////////////////////////// |
| 66 | |
| 67 | __forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); } |
| 68 | }; |
| 69 | |
| 70 | //////////////////////////////////////////////////////////////////////////////// |
| 71 | /// Unary Operators |
| 72 | //////////////////////////////////////////////////////////////////////////////// |
| 73 | |
| 74 | template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); } |
| 75 | template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); } |
| 76 | template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); } |
| 77 | template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); } |
| 78 | template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); } |
| 79 | template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); } |
| 80 | |
| 81 | //////////////////////////////////////////////////////////////////////////////// |
| 82 | /// Binary Operators |
| 83 | //////////////////////////////////////////////////////////////////////////////// |
| 84 | |
| 85 | template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } |
| 86 | template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } |
| 87 | template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } |
| 88 | template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); } |
| 89 | template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); } |
| 90 | template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); } |
| 91 | template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); } |
| 92 | template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); } |
| 93 | |
| 94 | template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); } |
| 95 | template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); } |
| 96 | |
| 97 | //////////////////////////////////////////////////////////////////////////////// |
| 98 | /// Ternary Operators |
| 99 | //////////////////////////////////////////////////////////////////////////////// |
| 100 | |
| 101 | template<typename T> __forceinline Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); } |
| 102 | template<typename T> __forceinline Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); } |
| 103 | template<typename T> __forceinline Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); } |
| 104 | template<typename T> __forceinline Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); } |
| 105 | |
| 106 | template<typename T> __forceinline Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); } |
| 107 | template<typename T> __forceinline Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); } |
| 108 | template<typename T> __forceinline Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); } |
| 109 | template<typename T> __forceinline Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); } |
| 110 | |
| 111 | //////////////////////////////////////////////////////////////////////////////// |
| 112 | /// Assignment Operators |
| 113 | //////////////////////////////////////////////////////////////////////////////// |
| 114 | |
| 115 | template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; } |
| 116 | template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; } |
| 117 | template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; } |
| 118 | template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; } |
| 119 | |
| 120 | //////////////////////////////////////////////////////////////////////////////// |
| 121 | /// Reduction Operators |
| 122 | //////////////////////////////////////////////////////////////////////////////// |
| 123 | |
| 124 | template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; } |
| 125 | template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; } |
| 126 | template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); } |
| 127 | template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); } |
| 128 | |
| 129 | //////////////////////////////////////////////////////////////////////////////// |
| 130 | /// Comparison Operators |
| 131 | //////////////////////////////////////////////////////////////////////////////// |
| 132 | |
| 133 | template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; } |
| 134 | template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; } |
| 135 | template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) { |
| 136 | if (a.x != b.x) return a.x < b.x; |
| 137 | if (a.y != b.y) return a.y < b.y; |
| 138 | if (a.z != b.z) return a.z < b.z; |
| 139 | if (a.w != b.w) return a.w < b.w; |
| 140 | return false; |
| 141 | } |
| 142 | |
| 143 | //////////////////////////////////////////////////////////////////////////////// |
| 144 | /// Shift Operators |
| 145 | //////////////////////////////////////////////////////////////////////////////// |
| 146 | |
| 147 | template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) { |
| 148 | return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w)); |
| 149 | } |
| 150 | |
| 151 | //////////////////////////////////////////////////////////////////////////////// |
| 152 | /// Euclidean Space Operators |
| 153 | //////////////////////////////////////////////////////////////////////////////// |
| 154 | |
| 155 | template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); } |
| 156 | |
| 157 | template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); } |
| 158 | template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); } |
| 159 | template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); } |
| 160 | |
| 161 | //////////////////////////////////////////////////////////////////////////////// |
| 162 | /// Select |
| 163 | //////////////////////////////////////////////////////////////////////////////// |
| 164 | |
| 165 | template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) { |
| 166 | return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w)); |
| 167 | } |
| 168 | |
| 169 | template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) { |
| 170 | return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w)); |
| 171 | } |
| 172 | |
| 173 | template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) { |
| 174 | return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w)); |
| 175 | } |
| 176 | |
| 177 | template<typename T> |
| 178 | __forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) { |
| 179 | return madd(Vec4<T>(T(1.0f)-t),v0,t*v1); |
| 180 | } |
| 181 | |
| 182 | //////////////////////////////////////////////////////////////////////////////// |
| 183 | /// Output Operators |
| 184 | //////////////////////////////////////////////////////////////////////////////// |
| 185 | |
| 186 | template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Vec4<T>& a) { |
| 187 | return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")" ; |
| 188 | } |
| 189 | |
| 190 | //////////////////////////////////////////////////////////////////////////////// |
| 191 | /// Default template instantiations |
| 192 | //////////////////////////////////////////////////////////////////////////////// |
| 193 | |
| 194 | typedef Vec4<bool > Vec4b; |
| 195 | typedef Vec4<unsigned char> Vec4uc; |
| 196 | typedef Vec4<int > Vec4i; |
| 197 | typedef Vec4<float > Vec4f; |
| 198 | } |
| 199 | |
| 200 | #include "vec3ba.h" |
| 201 | #include "vec3ia.h" |
| 202 | #include "vec3fa.h" |
| 203 | |
| 204 | //////////////////////////////////////////////////////////////////////////////// |
| 205 | /// SSE / AVX / MIC specializations |
| 206 | //////////////////////////////////////////////////////////////////////////////// |
| 207 | |
| 208 | #if defined(__SSE__) || defined(__ARM_NEON) |
| 209 | #include "../simd/sse.h" |
| 210 | #endif |
| 211 | |
| 212 | #if defined __AVX__ |
| 213 | #include "../simd/avx.h" |
| 214 | #endif |
| 215 | |
| 216 | #if defined __AVX512F__ |
| 217 | #include "../simd/avx512.h" |
| 218 | #endif |
| 219 | |
| 220 | namespace embree |
| 221 | { |
| 222 | template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; } |
| 223 | |
| 224 | #if defined(__AVX__) |
| 225 | template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) { |
| 226 | x = a.x; y = a.y; z = a.z; w = a.w; |
| 227 | } |
| 228 | #elif defined(__SSE__) || defined(__ARM_NEON) |
| 229 | template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) { |
| 230 | const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v); |
| 231 | } |
| 232 | #endif |
| 233 | |
| 234 | #if defined(__AVX__) |
| 235 | template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) { |
| 236 | x = a.x; y = a.y; z = a.z; w = a.w; |
| 237 | } |
| 238 | #endif |
| 239 | |
| 240 | #if defined(__AVX512F__) |
| 241 | template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {} |
| 242 | #endif |
| 243 | } |
| 244 | |