1 | /* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_TO_MASK_INL |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_TO_MASK_INL |
10 | |
11 | #include <simdpp/types.h> |
12 | #include <simdpp/detail/insn/conv_to_mask.h> |
13 | #include <simdpp/core/cmp_neq.h> |
14 | #include <simdpp/detail/vector_array_macros.h> |
15 | |
16 | namespace simdpp { |
17 | namespace SIMDPP_ARCH_NAMESPACE { |
18 | namespace detail { |
19 | namespace insn { |
20 | |
21 | static SIMDPP_INL |
22 | mask_int8<16> i_to_mask(const uint8<16>& a) |
23 | { |
24 | #if SIMDPP_USE_NULL |
25 | return cmp_neq(a, (uint8<16>) make_zero()); |
26 | #elif SIMDPP_USE_AVX512VL |
27 | return _mm_movepi8_mask(a.native()); |
28 | #else |
29 | return a.native(); |
30 | #endif |
31 | } |
32 | |
33 | #if SIMDPP_USE_AVX2 |
34 | static SIMDPP_INL |
35 | mask_int8<32> i_to_mask(const uint8<32>& a) |
36 | { |
37 | #if SIMDPP_USE_AVX512VL |
38 | return _mm256_movepi8_mask(a.native()); |
39 | #else |
40 | return a.native(); |
41 | #endif |
42 | } |
43 | #endif |
44 | |
45 | #if SIMDPP_USE_AVX512BW |
46 | static SIMDPP_INL |
47 | mask_int8<64> i_to_mask(const uint8<64>& a) |
48 | { |
49 | return _mm512_movepi8_mask(a.native()); |
50 | } |
51 | #endif |
52 | |
53 | // ----------------------------------------------------------------------------- |
54 | |
55 | static SIMDPP_INL |
56 | mask_int16<8> i_to_mask(const uint16<8>& a) |
57 | { |
58 | #if SIMDPP_USE_NULL |
59 | return cmp_neq(a, (uint16<8>) make_zero()); |
60 | #elif SIMDPP_USE_AVX512VL |
61 | return _mm_movepi16_mask(a.native()); |
62 | #else |
63 | return a.native(); |
64 | #endif |
65 | } |
66 | |
67 | #if SIMDPP_USE_AVX2 |
68 | static SIMDPP_INL |
69 | mask_int16<16> i_to_mask(const uint16<16>& a) |
70 | { |
71 | #if SIMDPP_USE_AVX512VL |
72 | return _mm256_movepi16_mask(a.native()); |
73 | #else |
74 | return a.native(); |
75 | #endif |
76 | } |
77 | #endif |
78 | |
79 | #if SIMDPP_USE_AVX512BW |
80 | static SIMDPP_INL |
81 | mask_int16<32> i_to_mask(const uint16<32>& a) |
82 | { |
83 | return _mm512_movepi16_mask(a.native()); |
84 | } |
85 | #endif |
86 | |
87 | // ----------------------------------------------------------------------------- |
88 | |
89 | static SIMDPP_INL |
90 | mask_int32<4> i_to_mask(const uint32<4>& a) |
91 | { |
92 | #if SIMDPP_USE_NULL |
93 | return cmp_neq(a, (uint32<4>) make_zero()); |
94 | #elif SIMDPP_USE_AVX512VL |
95 | return _mm_movepi32_mask(a.native()); |
96 | #else |
97 | return a.native(); |
98 | #endif |
99 | } |
100 | |
101 | #if SIMDPP_USE_AVX2 |
102 | static SIMDPP_INL |
103 | mask_int32<8> i_to_mask(const uint32<8>& a) |
104 | { |
105 | #if SIMDPP_USE_AVX512VL |
106 | return _mm256_movepi32_mask(a.native()); |
107 | #else |
108 | return a.native(); |
109 | #endif |
110 | } |
111 | #endif |
112 | |
113 | #if SIMDPP_USE_AVX512F |
114 | static SIMDPP_INL |
115 | mask_int32<16> i_to_mask(const uint32<16>& a) |
116 | { |
117 | return _mm512_test_epi32_mask(a.native(), a.native()); |
118 | } |
119 | #endif |
120 | |
121 | // ----------------------------------------------------------------------------- |
122 | |
123 | static SIMDPP_INL |
124 | mask_int64<2> i_to_mask(const uint64<2>& a) |
125 | { |
126 | #if SIMDPP_USE_AVX512VL |
127 | return _mm_movepi64_mask(a.native()); |
128 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA |
129 | return a.native(); |
130 | #else |
131 | return cmp_neq(a, (uint64<2>) make_zero()); |
132 | #endif |
133 | } |
134 | |
135 | #if SIMDPP_USE_AVX2 |
136 | static SIMDPP_INL |
137 | mask_int64<4> i_to_mask(const uint64<4>& a) |
138 | { |
139 | #if SIMDPP_USE_AVX512VL |
140 | return _mm256_movepi64_mask(a.native()); |
141 | #else |
142 | return a.native(); |
143 | #endif |
144 | } |
145 | #endif |
146 | |
147 | #if SIMDPP_USE_AVX512F |
148 | static SIMDPP_INL |
149 | mask_int64<8> i_to_mask(const uint64<8>& a) |
150 | { |
151 | return _mm512_test_epi64_mask(a.native(), a.native()); |
152 | } |
153 | #endif |
154 | |
155 | // ----------------------------------------------------------------------------- |
156 | |
157 | static SIMDPP_INL |
158 | mask_float32<4> i_to_mask(const float32<4>& a) |
159 | { |
160 | #if SIMDPP_USE_AVX512VL |
161 | __m128i ia = _mm_castps_si128(a.native()); |
162 | return _mm_test_epi32_mask(ia, ia); |
163 | #elif SIMDPP_USE_NULL || (SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP) |
164 | return cmp_neq(a, (float32<4>) make_zero()); |
165 | #else |
166 | return a.native(); |
167 | #endif |
168 | } |
169 | |
170 | #if SIMDPP_USE_AVX |
171 | static SIMDPP_INL |
172 | mask_float32<8> i_to_mask(const float32<8>& a) |
173 | { |
174 | #if SIMDPP_USE_AVX512VL |
175 | __m256i ia = _mm256_castps_si256(a.native()); |
176 | return _mm256_test_epi32_mask(ia, ia); |
177 | #else |
178 | return a.native(); |
179 | #endif |
180 | } |
181 | #endif |
182 | |
183 | #if SIMDPP_USE_AVX512F |
184 | static SIMDPP_INL |
185 | mask_float32<16> i_to_mask(const float32<16>& a) |
186 | { |
187 | __m512i ia = _mm512_castps_si512(a.native()); |
188 | return _mm512_test_epi32_mask(ia, ia); |
189 | } |
190 | #endif |
191 | |
192 | // ----------------------------------------------------------------------------- |
193 | |
194 | static SIMDPP_INL |
195 | mask_float64<2> i_to_mask(const float64<2>& a) |
196 | { |
197 | #if SIMDPP_USE_AVX512VL |
198 | __m128i ia = _mm_castpd_si128(a.native()); |
199 | return _mm_test_epi64_mask(ia, ia); |
200 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA |
201 | return a.native(); |
202 | #else |
203 | return cmp_neq(a, (float64<2>) make_zero()); |
204 | #endif |
205 | } |
206 | |
207 | #if SIMDPP_USE_AVX |
208 | static SIMDPP_INL |
209 | mask_float64<4> i_to_mask(const float64<4>& a) |
210 | { |
211 | #if SIMDPP_USE_AVX512VL |
212 | __m256i ia = _mm256_castpd_si256(a.native()); |
213 | return _mm256_test_epi64_mask(ia, ia); |
214 | #else |
215 | return a.native(); |
216 | #endif |
217 | } |
218 | #endif |
219 | |
220 | #if SIMDPP_USE_AVX512F |
221 | static SIMDPP_INL |
222 | mask_float64<8> i_to_mask(const float64<8>& a) |
223 | { |
224 | __m512i ia = _mm512_castpd_si512(a.native()); |
225 | return _mm512_test_epi64_mask(ia, ia); |
226 | } |
227 | #endif |
228 | |
229 | // ----------------------------------------------------------------------------- |
230 | |
231 | template<class V> SIMDPP_INL |
232 | typename V::mask_vector_type i_to_mask(const V& a) |
233 | { |
234 | SIMDPP_VEC_ARRAY_IMPL1(typename V::mask_vector_type, i_to_mask, a) |
235 | } |
236 | |
237 | } // namespace insn |
238 | } // namespace detail |
239 | } // namespace SIMDPP_ARCH_NAMESPACE |
240 | } // namespace simdpp |
241 | |
242 | #endif // LIBSIMDPP_SIMDPP_DETAIL_TO_MASK_INL |
243 | |