1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/make_shuffle_bytes_mask.h> |
17 | #include <simdpp/detail/null/compare.h> |
18 | #include <simdpp/detail/insn/bit_not.h> |
19 | #include <simdpp/detail/insn/cmp_lt.h> |
20 | #include <simdpp/detail/vector_array_macros.h> |
21 | |
22 | namespace simdpp { |
23 | namespace SIMDPP_ARCH_NAMESPACE { |
24 | namespace detail { |
25 | namespace insn { |
26 | |
27 | SIMDPP_INL mask_int8<16> i_cmp_ge(const int8<16>& a, const int8<16>& b) |
28 | { |
29 | #if SIMDPP_USE_NULL |
30 | return detail::null::cmp_ge(a, b); |
31 | #elif SIMDPP_USE_AVX512VL |
32 | return _mm_cmpge_epi8_mask(a.native(), b.native()); |
33 | #elif SIMDPP_USE_NEON |
34 | return vcgeq_s8(a.native(), b.native()); |
35 | #elif SIMDPP_USE_MSA |
36 | return (v16u8) __msa_cle_s_b(b.native(), a.native()); |
37 | #else |
38 | return i_bit_not(i_cmp_lt(a, b)); |
39 | #endif |
40 | } |
41 | |
42 | #if SIMDPP_USE_AVX2 |
43 | SIMDPP_INL mask_int8<32> i_cmp_ge(const int8<32>& a, const int8<32>& b) |
44 | { |
45 | #if SIMDPP_USE_AVX512VL |
46 | return _mm256_cmpge_epi8_mask(a.native(), b.native()); |
47 | #else |
48 | return i_bit_not(i_cmp_lt(a, b)); |
49 | #endif |
50 | } |
51 | #endif |
52 | |
53 | #if SIMDPP_USE_AVX512BW |
54 | SIMDPP_INL mask_int8<64> i_cmp_ge(const int8<64>& a, const int8<64>& b) |
55 | { |
56 | return _mm512_cmpge_epi8_mask(a.native(), b.native()); |
57 | } |
58 | #endif |
59 | |
60 | // ----------------------------------------------------------------------------- |
61 | |
62 | SIMDPP_INL mask_int8<16> i_cmp_ge(const uint8<16>& ca, const uint8<16>& cb) |
63 | { |
64 | uint8<16> a = ca, b = cb; |
65 | #if SIMDPP_USE_NULL |
66 | return detail::null::cmp_ge(a, b); |
67 | #elif SIMDPP_USE_AVX512VL |
68 | return _mm_cmpge_epu8_mask(a.native(), b.native()); |
69 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
70 | return _mm_comge_epu8(a.native(), b.native()); |
71 | #elif SIMDPP_USE_NEON |
72 | return vcgeq_u8(a.native(), b.native()); |
73 | #elif SIMDPP_USE_MSA |
74 | return (v16u8) __msa_cle_u_b(b.native(), a.native()); |
75 | #else |
76 | return i_bit_not(i_cmp_lt(a, b)); |
77 | #endif |
78 | } |
79 | |
80 | #if SIMDPP_USE_AVX2 |
81 | SIMDPP_INL mask_int8<32> i_cmp_ge(const uint8<32>& a, const uint8<32>& b) |
82 | { |
83 | #if SIMDPP_USE_AVX512VL |
84 | return _mm256_cmpge_epu8_mask(a.native(), b.native()); |
85 | #else |
86 | return i_bit_not(i_cmp_lt(a, b)); |
87 | #endif |
88 | } |
89 | #endif |
90 | |
91 | #if SIMDPP_USE_AVX512BW |
92 | SIMDPP_INL mask_int8<64> i_cmp_ge(const uint8<64>& a, const uint8<64>& b) |
93 | { |
94 | return _mm512_cmpge_epu8_mask(a.native(), b.native()); |
95 | } |
96 | #endif |
97 | |
98 | // ----------------------------------------------------------------------------- |
99 | |
100 | SIMDPP_INL mask_int16<8> i_cmp_ge(const int16<8>& a, const int16<8>& b) |
101 | { |
102 | #if SIMDPP_USE_NULL |
103 | return detail::null::cmp_ge(a, b); |
104 | #elif SIMDPP_USE_AVX512VL |
105 | return _mm_cmpge_epi16_mask(a.native(), b.native()); |
106 | #elif SIMDPP_USE_NEON |
107 | return vcgeq_s16(a.native(), b.native()); |
108 | #elif SIMDPP_USE_MSA |
109 | return (v8u16) __msa_cle_s_h(b.native(), a.native()); |
110 | #else |
111 | return i_bit_not(i_cmp_lt(a, b)); |
112 | #endif |
113 | } |
114 | |
115 | #if SIMDPP_USE_AVX2 |
116 | SIMDPP_INL mask_int16<16> i_cmp_ge(const int16<16>& a, const int16<16>& b) |
117 | { |
118 | #if SIMDPP_USE_AVX512VL |
119 | return _mm256_cmpge_epi16_mask(a.native(), b.native()); |
120 | #else |
121 | return i_bit_not(i_cmp_lt(a, b)); |
122 | #endif |
123 | } |
124 | #endif |
125 | |
126 | #if SIMDPP_USE_AVX512BW |
127 | SIMDPP_INL mask_int16<32> i_cmp_ge(const int16<32>& a, const int16<32>& b) |
128 | { |
129 | return _mm512_cmpge_epi16_mask(a.native(), b.native()); |
130 | } |
131 | #endif |
132 | |
133 | // ----------------------------------------------------------------------------- |
134 | |
135 | SIMDPP_INL mask_int16<8> i_cmp_ge(const uint16<8>& ca, const uint16<8>& cb) |
136 | { |
137 | uint16<8> a = ca, b = cb; |
138 | #if SIMDPP_USE_NULL |
139 | return detail::null::cmp_ge(a, b); |
140 | #elif SIMDPP_USE_AVX512VL |
141 | return _mm_cmpge_epu16_mask(a.native(), b.native()); |
142 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
143 | return _mm_comge_epu16(a.native(), b.native()); |
144 | #elif SIMDPP_USE_NEON |
145 | return vcgeq_u16(a.native(), b.native()); |
146 | #elif SIMDPP_USE_MSA |
147 | return (v8u16) __msa_cle_u_h(b.native(), a.native()); |
148 | #else |
149 | return i_bit_not(i_cmp_lt(a, b)); |
150 | #endif |
151 | } |
152 | |
153 | #if SIMDPP_USE_AVX2 |
154 | SIMDPP_INL mask_int16<16> i_cmp_ge(const uint16<16>& a, const uint16<16>& b) |
155 | { |
156 | #if SIMDPP_USE_AVX512VL |
157 | return _mm256_cmpge_epu16_mask(a.native(), b.native()); |
158 | #else |
159 | return i_bit_not(i_cmp_lt(a, b)); |
160 | #endif |
161 | } |
162 | #endif |
163 | |
164 | #if SIMDPP_USE_AVX512BW |
165 | SIMDPP_INL mask_int16<32> i_cmp_ge(const uint16<32>& a, const uint16<32>& b) |
166 | { |
167 | return _mm512_cmpge_epu16_mask(a.native(), b.native()); |
168 | } |
169 | #endif |
170 | |
171 | // ----------------------------------------------------------------------------- |
172 | |
173 | SIMDPP_INL mask_int32<4> i_cmp_ge(const int32<4>& a, const int32<4>& b) |
174 | { |
175 | #if SIMDPP_USE_NULL |
176 | return detail::null::cmp_ge(a, b); |
177 | #elif SIMDPP_USE_AVX512VL |
178 | return _mm_cmpge_epi32_mask(a.native(), b.native()); |
179 | #elif SIMDPP_USE_NEON |
180 | return vcgeq_s32(a.native(), b.native()); |
181 | #elif SIMDPP_USE_MSA |
182 | return (v4u32) __msa_cle_s_w(b.native(), a.native()); |
183 | #else |
184 | return i_bit_not(i_cmp_lt(a, b)); |
185 | #endif |
186 | } |
187 | |
188 | #if SIMDPP_USE_AVX2 |
189 | SIMDPP_INL mask_int32<8> i_cmp_ge(const int32<8>& a, const int32<8>& b) |
190 | { |
191 | #if SIMDPP_USE_AVX512VL |
192 | return _mm256_cmpge_epi32_mask(a.native(), b.native()); |
193 | #else |
194 | return i_bit_not(i_cmp_lt(a, b)); |
195 | #endif |
196 | } |
197 | #endif |
198 | |
199 | #if SIMDPP_USE_AVX512F |
200 | SIMDPP_INL mask_int32<16> i_cmp_ge(const int32<16>& a, const int32<16>& b) |
201 | { |
202 | return _mm512_cmpge_epi32_mask(a.native(), b.native()); |
203 | } |
204 | #endif |
205 | |
206 | // ----------------------------------------------------------------------------- |
207 | |
208 | SIMDPP_INL mask_int32<4> i_cmp_ge(const uint32<4>& ca, const uint32<4>& cb) |
209 | { |
210 | uint32<4> a = ca, b = cb; |
211 | #if SIMDPP_USE_NULL |
212 | return detail::null::cmp_ge(a, b); |
213 | #elif SIMDPP_USE_AVX512VL |
214 | return _mm_cmpge_epu32_mask(a.native(), b.native()); |
215 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
216 | return _mm_comge_epu32(a.native(), b.native()); |
217 | #elif SIMDPP_USE_NEON |
218 | return vcgeq_u32(a.native(), b.native()); |
219 | #elif SIMDPP_USE_MSA |
220 | return (v4u32) __msa_cle_u_w(b.native(), a.native()); |
221 | #else |
222 | return i_bit_not(i_cmp_lt(a, b)); |
223 | #endif |
224 | } |
225 | |
226 | #if SIMDPP_USE_AVX2 |
227 | SIMDPP_INL mask_int32<8> i_cmp_ge(const uint32<8>& a, const uint32<8>& b) |
228 | { |
229 | #if SIMDPP_USE_AVX512VL |
230 | return _mm256_cmpge_epu32_mask(a.native(), b.native()); |
231 | #else |
232 | return i_bit_not(i_cmp_lt(a, b)); |
233 | #endif |
234 | } |
235 | #endif |
236 | |
237 | #if SIMDPP_USE_AVX512F |
238 | SIMDPP_INL mask_int32<16> i_cmp_ge(const uint32<16>& a, const uint32<16>& b) |
239 | { |
240 | // FIXME: BUG: GCC does not have _mm512_cmpge_epu32_mask |
241 | return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLT); |
242 | } |
243 | #endif |
244 | |
245 | // ----------------------------------------------------------------------------- |
246 | |
247 | SIMDPP_INL mask_int64<2> i_cmp_ge(const int64<2>& a, const int64<2>& b) |
248 | { |
249 | #if SIMDPP_USE_AVX512VL |
250 | return _mm_cmpge_epi64_mask(a.native(), b.native()); |
251 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
252 | return _mm_comge_epi64(a.native(), b.native()); |
253 | #elif SIMDPP_USE_NEON64 |
254 | return vcgeq_s64(a.native(), b.native()); |
255 | #elif SIMDPP_USE_MSA |
256 | return (v2u64) __msa_cle_s_d(b.native(), a.native()); |
257 | #elif SIMDPP_USE_NULL |
258 | return detail::null::cmp_ge(a, b); |
259 | #else |
260 | return i_bit_not(i_cmp_lt(a, b)); |
261 | #endif |
262 | } |
263 | |
264 | #if SIMDPP_USE_AVX2 |
265 | SIMDPP_INL mask_int64<4> i_cmp_ge(const int64<4>& a, const int64<4>& b) |
266 | { |
267 | #if SIMDPP_USE_AVX512VL |
268 | return _mm256_cmpge_epi64_mask(a.native(), b.native()); |
269 | #else |
270 | return i_bit_not(i_cmp_lt(a, b)); |
271 | #endif |
272 | } |
273 | #endif |
274 | |
275 | #if SIMDPP_USE_AVX512F |
276 | SIMDPP_INL mask_int64<8> i_cmp_ge(const int64<8>& a, const int64<8>& b) |
277 | { |
278 | // GCC does not have _mm512_cmpge_epi64_mask |
279 | return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLT); |
280 | } |
281 | #endif |
282 | |
283 | // ----------------------------------------------------------------------------- |
284 | |
285 | SIMDPP_INL mask_int64<2> i_cmp_ge(const uint64<2>& a, const uint64<2>& b) |
286 | { |
287 | #if SIMDPP_USE_AVX512VL |
288 | return _mm_cmpge_epu64_mask(a.native(), b.native()); |
289 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
290 | return _mm_comge_epu64(a.native(), b.native()); |
291 | #elif SIMDPP_USE_NEON64 |
292 | return vcgeq_u64(a.native(), b.native()); |
293 | #elif SIMDPP_USE_MSA |
294 | return (v2u64) __msa_cle_u_d(b.native(), a.native()); |
295 | #elif SIMDPP_USE_NULL |
296 | return detail::null::cmp_ge(a, b); |
297 | #else |
298 | return i_bit_not(i_cmp_lt(a, b)); |
299 | #endif |
300 | } |
301 | |
302 | #if SIMDPP_USE_AVX2 |
303 | SIMDPP_INL mask_int64<4> i_cmp_ge(const uint64<4>& a, const uint64<4>& b) |
304 | { |
305 | #if SIMDPP_USE_AVX512VL |
306 | return _mm256_cmpge_epu64_mask(a.native(), b.native()); |
307 | #else |
308 | return i_bit_not(i_cmp_lt(a, b)); |
309 | #endif |
310 | } |
311 | #endif |
312 | |
313 | #if SIMDPP_USE_AVX512F |
314 | SIMDPP_INL mask_int64<8> i_cmp_ge(const uint64<8>& a, const uint64<8>& b) |
315 | { |
316 | return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLT); |
317 | } |
318 | #endif |
319 | |
320 | // ----------------------------------------------------------------------------- |
321 | |
322 | static SIMDPP_INL |
323 | mask_float32<4> i_cmp_ge(const float32<4>& a, const float32<4>& b) |
324 | { |
325 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
326 | return detail::null::cmp_ge(a, b); |
327 | #elif SIMDPP_USE_AVX512VL |
328 | return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ); |
329 | #elif SIMDPP_USE_AVX |
330 | return _mm_cmp_ps(a.native(), b.native(), _CMP_GE_OQ); |
331 | #elif SIMDPP_USE_SSE2 |
332 | return _mm_cmpge_ps(a.native(), b.native()); |
333 | #elif SIMDPP_USE_NEON |
334 | return vreinterpretq_f32_u32(vcgeq_f32(a.native(), b.native())); |
335 | #elif SIMDPP_USE_ALTIVEC |
336 | return vec_cmpge(a.native(), b.native()); |
337 | #elif SIMDPP_USE_MSA |
338 | return (v4f32) __msa_fcle_w(b.native(), a.native()); |
339 | #endif |
340 | } |
341 | |
342 | #if SIMDPP_USE_AVX |
343 | static SIMDPP_INL |
344 | mask_float32<8> i_cmp_ge(const float32<8>& a, const float32<8>& b) |
345 | { |
346 | #if SIMDPP_USE_AVX512VL |
347 | return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ); |
348 | #else |
349 | return _mm256_cmp_ps(a.native(), b.native(), _CMP_GE_OQ); |
350 | #endif |
351 | } |
352 | #endif |
353 | |
354 | #if SIMDPP_USE_AVX512F |
355 | static SIMDPP_INL |
356 | mask_float32<16> i_cmp_ge(const float32<16>& a, const float32<16>& b) |
357 | { |
358 | return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ); |
359 | } |
360 | #endif |
361 | |
362 | // ----------------------------------------------------------------------------- |
363 | |
364 | static SIMDPP_INL |
365 | mask_float64<2> i_cmp_ge(const float64<2>& a, const float64<2>& b) |
366 | { |
367 | #if SIMDPP_USE_AVX512VL |
368 | return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ); |
369 | #elif SIMDPP_USE_AVX |
370 | return _mm_cmp_pd(a.native(), b.native(), _CMP_GE_OQ); |
371 | #elif SIMDPP_USE_SSE2 |
372 | return _mm_cmpge_pd(a.native(), b.native()); |
373 | #elif SIMDPP_USE_NEON64 |
374 | return vreinterpretq_f64_u64(vcgeq_f64(a.native(), b.native())); |
375 | #elif SIMDPP_USE_VSX_206 |
376 | return (__vector double) vec_cmpge(a.native(), b.native()); |
377 | #elif SIMDPP_USE_MSA |
378 | return (v2f64) __msa_fcle_d(b.native(), a.native()); |
379 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
380 | return detail::null::cmp_ge(a, b); |
381 | #endif |
382 | } |
383 | |
384 | #if SIMDPP_USE_AVX |
385 | static SIMDPP_INL |
386 | mask_float64<4> i_cmp_ge(const float64<4>& a, const float64<4>& b) |
387 | { |
388 | #if SIMDPP_USE_AVX512VL |
389 | return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ); |
390 | #else |
391 | return _mm256_cmp_pd(a.native(), b.native(), _CMP_GE_OQ); |
392 | #endif |
393 | } |
394 | #endif |
395 | |
396 | #if SIMDPP_USE_AVX512F |
397 | static SIMDPP_INL |
398 | mask_float64<8> i_cmp_ge(const float64<8>& a, const float64<8>& b) |
399 | { |
400 | return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ); |
401 | } |
402 | #endif |
403 | |
404 | // ----------------------------------------------------------------------------- |
405 | |
406 | template<class V> SIMDPP_INL |
407 | typename V::mask_vector_type i_cmp_ge(const V& a, const V& b) |
408 | { |
409 | SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_ge, a, b); |
410 | } |
411 | |
412 | } // namespace insn |
413 | } // namespace detail |
414 | } // namespace SIMDPP_ARCH_NAMESPACE |
415 | } // namespace simdpp |
416 | |
417 | #endif |
418 | |
419 | |