1/* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_TO_MASK_INL
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_TO_MASK_INL
10
11#include <simdpp/types.h>
12#include <simdpp/detail/insn/conv_to_mask.h>
13#include <simdpp/core/cmp_neq.h>
14#include <simdpp/detail/vector_array_macros.h>
15
16namespace simdpp {
17namespace SIMDPP_ARCH_NAMESPACE {
18namespace detail {
19namespace insn {
20
21static SIMDPP_INL
22mask_int8<16> i_to_mask(const uint8<16>& a)
23{
24#if SIMDPP_USE_NULL
25 return cmp_neq(a, (uint8<16>) make_zero());
26#elif SIMDPP_USE_AVX512VL
27 return _mm_movepi8_mask(a.native());
28#else
29 return a.native();
30#endif
31}
32
33#if SIMDPP_USE_AVX2
34static SIMDPP_INL
35mask_int8<32> i_to_mask(const uint8<32>& a)
36{
37#if SIMDPP_USE_AVX512VL
38 return _mm256_movepi8_mask(a.native());
39#else
40 return a.native();
41#endif
42}
43#endif
44
45#if SIMDPP_USE_AVX512BW
46static SIMDPP_INL
47mask_int8<64> i_to_mask(const uint8<64>& a)
48{
49 return _mm512_movepi8_mask(a.native());
50}
51#endif
52
53// -----------------------------------------------------------------------------
54
55static SIMDPP_INL
56mask_int16<8> i_to_mask(const uint16<8>& a)
57{
58#if SIMDPP_USE_NULL
59 return cmp_neq(a, (uint16<8>) make_zero());
60#elif SIMDPP_USE_AVX512VL
61 return _mm_movepi16_mask(a.native());
62#else
63 return a.native();
64#endif
65}
66
67#if SIMDPP_USE_AVX2
68static SIMDPP_INL
69mask_int16<16> i_to_mask(const uint16<16>& a)
70{
71#if SIMDPP_USE_AVX512VL
72 return _mm256_movepi16_mask(a.native());
73#else
74 return a.native();
75#endif
76}
77#endif
78
79#if SIMDPP_USE_AVX512BW
80static SIMDPP_INL
81mask_int16<32> i_to_mask(const uint16<32>& a)
82{
83 return _mm512_movepi16_mask(a.native());
84}
85#endif
86
87// -----------------------------------------------------------------------------
88
89static SIMDPP_INL
90mask_int32<4> i_to_mask(const uint32<4>& a)
91{
92#if SIMDPP_USE_NULL
93 return cmp_neq(a, (uint32<4>) make_zero());
94#elif SIMDPP_USE_AVX512VL
95 return _mm_movepi32_mask(a.native());
96#else
97 return a.native();
98#endif
99}
100
101#if SIMDPP_USE_AVX2
102static SIMDPP_INL
103mask_int32<8> i_to_mask(const uint32<8>& a)
104{
105#if SIMDPP_USE_AVX512VL
106 return _mm256_movepi32_mask(a.native());
107#else
108 return a.native();
109#endif
110}
111#endif
112
113#if SIMDPP_USE_AVX512F
114static SIMDPP_INL
115mask_int32<16> i_to_mask(const uint32<16>& a)
116{
117 return _mm512_test_epi32_mask(a.native(), a.native());
118}
119#endif
120
121// -----------------------------------------------------------------------------
122
123static SIMDPP_INL
124mask_int64<2> i_to_mask(const uint64<2>& a)
125{
126#if SIMDPP_USE_AVX512VL
127 return _mm_movepi64_mask(a.native());
128#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
129 return a.native();
130#else
131 return cmp_neq(a, (uint64<2>) make_zero());
132#endif
133}
134
135#if SIMDPP_USE_AVX2
136static SIMDPP_INL
137mask_int64<4> i_to_mask(const uint64<4>& a)
138{
139#if SIMDPP_USE_AVX512VL
140 return _mm256_movepi64_mask(a.native());
141#else
142 return a.native();
143#endif
144}
145#endif
146
147#if SIMDPP_USE_AVX512F
148static SIMDPP_INL
149mask_int64<8> i_to_mask(const uint64<8>& a)
150{
151 return _mm512_test_epi64_mask(a.native(), a.native());
152}
153#endif
154
155// -----------------------------------------------------------------------------
156
157static SIMDPP_INL
158mask_float32<4> i_to_mask(const float32<4>& a)
159{
160#if SIMDPP_USE_AVX512VL
161 __m128i ia = _mm_castps_si128(a.native());
162 return _mm_test_epi32_mask(ia, ia);
163#elif SIMDPP_USE_NULL || (SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP)
164 return cmp_neq(a, (float32<4>) make_zero());
165#else
166 return a.native();
167#endif
168}
169
170#if SIMDPP_USE_AVX
171static SIMDPP_INL
172mask_float32<8> i_to_mask(const float32<8>& a)
173{
174#if SIMDPP_USE_AVX512VL
175 __m256i ia = _mm256_castps_si256(a.native());
176 return _mm256_test_epi32_mask(ia, ia);
177#else
178 return a.native();
179#endif
180}
181#endif
182
183#if SIMDPP_USE_AVX512F
184static SIMDPP_INL
185mask_float32<16> i_to_mask(const float32<16>& a)
186{
187 __m512i ia = _mm512_castps_si512(a.native());
188 return _mm512_test_epi32_mask(ia, ia);
189}
190#endif
191
192// -----------------------------------------------------------------------------
193
194static SIMDPP_INL
195mask_float64<2> i_to_mask(const float64<2>& a)
196{
197#if SIMDPP_USE_AVX512VL
198 __m128i ia = _mm_castpd_si128(a.native());
199 return _mm_test_epi64_mask(ia, ia);
200#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
201 return a.native();
202#else
203 return cmp_neq(a, (float64<2>) make_zero());
204#endif
205}
206
207#if SIMDPP_USE_AVX
208static SIMDPP_INL
209mask_float64<4> i_to_mask(const float64<4>& a)
210{
211#if SIMDPP_USE_AVX512VL
212 __m256i ia = _mm256_castpd_si256(a.native());
213 return _mm256_test_epi64_mask(ia, ia);
214#else
215 return a.native();
216#endif
217}
218#endif
219
220#if SIMDPP_USE_AVX512F
221static SIMDPP_INL
222mask_float64<8> i_to_mask(const float64<8>& a)
223{
224 __m512i ia = _mm512_castpd_si512(a.native());
225 return _mm512_test_epi64_mask(ia, ia);
226}
227#endif
228
229// -----------------------------------------------------------------------------
230
231template<class V> SIMDPP_INL
232typename V::mask_vector_type i_to_mask(const V& a)
233{
234 SIMDPP_VEC_ARRAY_IMPL1(typename V::mask_vector_type, i_to_mask, a)
235}
236
237} // namespace insn
238} // namespace detail
239} // namespace SIMDPP_ARCH_NAMESPACE
240} // namespace simdpp
241
242#endif // LIBSIMDPP_SIMDPP_DETAIL_TO_MASK_INL
243