1 | /* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_TEST_BITS_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_TEST_BITS_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/expr.h> |
17 | #include <simdpp/core/bit_and.h> |
18 | #include <simdpp/core/extract.h> |
19 | #include <simdpp/core/move_l.h> |
20 | #include <simdpp/detail/insn/test_bits.h> |
21 | #include <simdpp/detail/null/bitwise.h> |
22 | |
23 | namespace simdpp { |
24 | namespace SIMDPP_ARCH_NAMESPACE { |
25 | namespace detail { |
26 | namespace insn { |
27 | |
28 | static SIMDPP_INL |
29 | bool i_test_bits_any(const uint32<4>& a) |
30 | { |
31 | #if SIMDPP_USE_NULL |
32 | return null::test_bits_any(a); |
33 | #elif SIMDPP_USE_SSE4_1 |
34 | return !_mm_testz_si128(a.native(), a.native()); |
35 | #elif SIMDPP_USE_SSE2 |
36 | uint32<4> r = _mm_cmpeq_epi32(a.native(), _mm_setzero_si128()); |
37 | return _mm_movemask_epi8(r.native()) != 0xffff; |
38 | #elif SIMDPP_USE_NEON |
39 | #if SIMDPP_64_BITS |
40 | uint64<2> r; |
41 | r = a; |
42 | r = bit_or(r, move2_l<1>(r)); |
43 | return extract<0>(r) != 0; |
44 | #else |
45 | uint32x4 r = bit_or(a, move4_l<2>(a)); |
46 | r = bit_or(r, move4_l<1>(r)); |
47 | return extract<0>(r) != 0; |
48 | #endif |
49 | #elif SIMDPP_USE_ALTIVEC |
50 | uint32<4> z = make_uint(0); |
51 | return vec_any_gt(a.native(), z.native()); |
52 | #elif SIMDPP_USE_MSA |
53 | return __msa_test_bnz_v((v16u8) a.native()); |
54 | #endif |
55 | } |
56 | |
57 | static SIMDPP_INL |
58 | bool i_test_bits_any(const uint16<8>& a) { return i_test_bits_any(uint32<4>(a)); } |
59 | static SIMDPP_INL |
60 | bool i_test_bits_any(const uint8<16>& a) { return i_test_bits_any(uint32<4>(a)); } |
61 | |
62 | static SIMDPP_INL |
63 | bool i_test_bits_any(const uint64<2>& a) |
64 | { |
65 | #if SIMDPP_USE_VSX_207 |
66 | uint64<2> z = make_zero(); |
67 | return vec_any_gt(a.native(), z.native()); |
68 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
69 | return null::test_bits_any(a); |
70 | #else |
71 | return i_test_bits_any(uint32<4>(a)); |
72 | #endif |
73 | } |
74 | |
75 | static SIMDPP_INL |
76 | bool i_test_bits_any(const float32<4>& a) |
77 | { |
78 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
79 | return null::test_bits_any(a); |
80 | #else |
81 | return i_test_bits_any(uint32<4>(a)); |
82 | #endif |
83 | } |
84 | |
85 | static SIMDPP_INL |
86 | bool i_test_bits_any(const float64<2>& a) |
87 | { |
88 | #if SIMDPP_USE_VSX_206 |
89 | return i_test_bits_any(uint32<4>(a)); |
90 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
91 | return null::test_bits_any(a); |
92 | #else |
93 | return i_test_bits_any(uint32<4>(a)); |
94 | #endif |
95 | } |
96 | |
97 | #if SIMDPP_USE_AVX |
98 | static SIMDPP_INL |
99 | bool i_test_bits_any(const float32<8>& a) |
100 | { |
101 | return !_mm256_testz_si256(_mm256_castps_si256(a.native()), |
102 | _mm256_castps_si256(a.native())); |
103 | } |
104 | static SIMDPP_INL |
105 | bool i_test_bits_any(const float64<4>& a) |
106 | { |
107 | return !_mm256_testz_si256(_mm256_castpd_si256(a.native()), |
108 | _mm256_castpd_si256(a.native())); |
109 | } |
110 | #endif |
111 | |
112 | #if SIMDPP_USE_AVX2 |
113 | static SIMDPP_INL |
114 | bool i_test_bits_any(const uint8<32>& a) { return !_mm256_testz_si256(a.native(), a.native()); } |
115 | static SIMDPP_INL |
116 | bool i_test_bits_any(const uint16<16>& a) { return !_mm256_testz_si256(a.native(), a.native()); } |
117 | static SIMDPP_INL |
118 | bool i_test_bits_any(const uint32<8>& a) { return !_mm256_testz_si256(a.native(), a.native()); } |
119 | static SIMDPP_INL |
120 | bool i_test_bits_any(const uint64<4>& a) { return !_mm256_testz_si256(a.native(), a.native()); } |
121 | #endif |
122 | |
123 | #if SIMDPP_USE_AVX512F |
124 | static SIMDPP_INL |
125 | bool i_test_bits_any(const uint32<16>& a) |
126 | { |
127 | return _mm512_test_epi64_mask(a.native(), a.native()) != 0; |
128 | } |
129 | static SIMDPP_INL |
130 | bool i_test_bits_any(const uint64<8>& a) { return i_test_bits_any(uint32<16>(a)); } |
131 | static SIMDPP_INL |
132 | bool i_test_bits_any(const float32<16>& a) { return i_test_bits_any(uint32<16>(a)); } |
133 | static SIMDPP_INL |
134 | bool i_test_bits_any(const float64<8>& a) { return i_test_bits_any(uint32<16>(a)); } |
135 | #endif |
136 | |
137 | #if SIMDPP_USE_AVX512BW |
138 | SIMDPP_INL bool i_test_bits_any(const uint8<64>& a) { return i_test_bits_any(uint32<16>(a)); } |
139 | SIMDPP_INL bool i_test_bits_any(const uint16<32>& a) { return i_test_bits_any(uint32<16>(a)); } |
140 | #endif |
141 | |
142 | template<unsigned N, class V> |
143 | SIMDPP_INL bool i_test_bits_any(const any_vec<N,V>& a) |
144 | { |
145 | const V& wa = a.wrapped(); |
146 | typename V::base_vector_type r = wa.vec(0); |
147 | for (unsigned i = 1; i < wa.vec_length; ++i) |
148 | r = bit_or(r, wa.vec(i)); |
149 | return i_test_bits_any(r); |
150 | } |
151 | |
152 | } // namespace insn |
153 | } // namespace detail |
154 | } // namespace SIMDPP_ARCH_NAMESPACE |
155 | } // namespace simdpp |
156 | |
157 | #endif |
158 | |