| 1 | // Copyright 2009-2021 Intel Corporation |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| 4 | #pragma once |
| 5 | |
| 6 | #include "../sys/alloc.h" |
| 7 | #include "math.h" |
| 8 | #include "../simd/sse.h" |
| 9 | |
| 10 | namespace embree |
| 11 | { |
| 12 | //////////////////////////////////////////////////////////////////////////////// |
| 13 | /// SSE Vec3ba Type |
| 14 | //////////////////////////////////////////////////////////////////////////////// |
| 15 | |
| 16 | struct __aligned(16) Vec3ba |
| 17 | { |
| 18 | ALIGNED_STRUCT_(16); |
| 19 | |
| 20 | union { |
| 21 | __m128 m128; |
| 22 | struct { int x,y,z; }; |
| 23 | }; |
| 24 | |
| 25 | typedef int Scalar; |
| 26 | enum { N = 3 }; |
| 27 | |
| 28 | //////////////////////////////////////////////////////////////////////////////// |
| 29 | /// Constructors, Assignment & Cast Operators |
| 30 | //////////////////////////////////////////////////////////////////////////////// |
| 31 | |
| 32 | __forceinline Vec3ba( ) {} |
| 33 | __forceinline Vec3ba( const __m128 input ) : m128(input) {} |
| 34 | __forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {} |
| 35 | __forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; } |
| 36 | |
| 37 | __forceinline explicit Vec3ba( bool a ) |
| 38 | : m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {} |
| 39 | __forceinline Vec3ba( bool a, bool b, bool c) |
| 40 | : m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} |
| 41 | |
| 42 | __forceinline operator const __m128&() const { return m128; } |
| 43 | __forceinline operator __m128&() { return m128; } |
| 44 | |
| 45 | //////////////////////////////////////////////////////////////////////////////// |
| 46 | /// Constants |
| 47 | //////////////////////////////////////////////////////////////////////////////// |
| 48 | |
| 49 | __forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {} |
| 50 | __forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {} |
| 51 | |
| 52 | //////////////////////////////////////////////////////////////////////////////// |
| 53 | /// Array Access |
| 54 | //////////////////////////////////////////////////////////////////////////////// |
| 55 | |
| 56 | __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } |
| 57 | __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } |
| 58 | }; |
| 59 | |
| 60 | |
| 61 | //////////////////////////////////////////////////////////////////////////////// |
| 62 | /// Unary Operators |
| 63 | //////////////////////////////////////////////////////////////////////////////// |
| 64 | |
| 65 | __forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a.m128, Vec3ba(embree::True)); } |
| 66 | |
| 67 | //////////////////////////////////////////////////////////////////////////////// |
| 68 | /// Binary Operators |
| 69 | //////////////////////////////////////////////////////////////////////////////// |
| 70 | |
| 71 | __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a.m128, b.m128); } |
| 72 | __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a.m128, b.m128); } |
| 73 | __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a.m128, b.m128); } |
| 74 | |
| 75 | //////////////////////////////////////////////////////////////////////////////// |
| 76 | /// Assignment Operators |
| 77 | //////////////////////////////////////////////////////////////////////////////// |
| 78 | |
| 79 | __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; } |
| 80 | __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; } |
| 81 | __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; } |
| 82 | |
| 83 | //////////////////////////////////////////////////////////////////////////////// |
| 84 | /// Comparison Operators + Select |
| 85 | //////////////////////////////////////////////////////////////////////////////// |
| 86 | |
| 87 | __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) { |
| 88 | return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) == 7; |
| 89 | } |
| 90 | __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) { |
| 91 | return (_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(a.m128), _mm_castps_si128(b.m128)))) & 7) != 7; |
| 92 | } |
| 93 | __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) { |
| 94 | if (a.x != b.x) return a.x < b.x; |
| 95 | if (a.y != b.y) return a.y < b.y; |
| 96 | if (a.z != b.z) return a.z < b.z; |
| 97 | return false; |
| 98 | } |
| 99 | |
| 100 | //////////////////////////////////////////////////////////////////////////////// |
| 101 | /// Reduction Operations |
| 102 | //////////////////////////////////////////////////////////////////////////////// |
| 103 | |
| 104 | __forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) == 0x7; } |
| 105 | __forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a) & 0x7) != 0x0; } |
| 106 | |
| 107 | __forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x7; } |
| 108 | __forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) != 0x0; } |
| 109 | __forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(b) & 0x7) == 0x0; } |
| 110 | |
| 111 | __forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a) & 0x7; } |
| 112 | |
| 113 | //////////////////////////////////////////////////////////////////////////////// |
| 114 | /// Output Operators |
| 115 | //////////////////////////////////////////////////////////////////////////////// |
| 116 | |
| 117 | __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) { |
| 118 | return cout << "(" << (a.x ? "1" : "0" ) << ", " << (a.y ? "1" : "0" ) << ", " << (a.z ? "1" : "0" ) << ")" ; |
| 119 | } |
| 120 | } |
| 121 | |