1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/make_shuffle_bytes_mask.h>
17#include <simdpp/detail/null/compare.h>
18#include <simdpp/detail/insn/bit_not.h>
19#include <simdpp/detail/insn/cmp_lt.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27SIMDPP_INL mask_int8<16> i_cmp_ge(const int8<16>& a, const int8<16>& b)
28{
29#if SIMDPP_USE_NULL
30 return detail::null::cmp_ge(a, b);
31#elif SIMDPP_USE_AVX512VL
32 return _mm_cmpge_epi8_mask(a.native(), b.native());
33#elif SIMDPP_USE_NEON
34 return vcgeq_s8(a.native(), b.native());
35#elif SIMDPP_USE_MSA
36 return (v16u8) __msa_cle_s_b(b.native(), a.native());
37#else
38 return i_bit_not(i_cmp_lt(a, b));
39#endif
40}
41
42#if SIMDPP_USE_AVX2
43SIMDPP_INL mask_int8<32> i_cmp_ge(const int8<32>& a, const int8<32>& b)
44{
45#if SIMDPP_USE_AVX512VL
46 return _mm256_cmpge_epi8_mask(a.native(), b.native());
47#else
48 return i_bit_not(i_cmp_lt(a, b));
49#endif
50}
51#endif
52
53#if SIMDPP_USE_AVX512BW
54SIMDPP_INL mask_int8<64> i_cmp_ge(const int8<64>& a, const int8<64>& b)
55{
56 return _mm512_cmpge_epi8_mask(a.native(), b.native());
57}
58#endif
59
60// -----------------------------------------------------------------------------
61
62SIMDPP_INL mask_int8<16> i_cmp_ge(const uint8<16>& ca, const uint8<16>& cb)
63{
64 uint8<16> a = ca, b = cb;
65#if SIMDPP_USE_NULL
66 return detail::null::cmp_ge(a, b);
67#elif SIMDPP_USE_AVX512VL
68 return _mm_cmpge_epu8_mask(a.native(), b.native());
69#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
70 return _mm_comge_epu8(a.native(), b.native());
71#elif SIMDPP_USE_NEON
72 return vcgeq_u8(a.native(), b.native());
73#elif SIMDPP_USE_MSA
74 return (v16u8) __msa_cle_u_b(b.native(), a.native());
75#else
76 return i_bit_not(i_cmp_lt(a, b));
77#endif
78}
79
80#if SIMDPP_USE_AVX2
81SIMDPP_INL mask_int8<32> i_cmp_ge(const uint8<32>& a, const uint8<32>& b)
82{
83#if SIMDPP_USE_AVX512VL
84 return _mm256_cmpge_epu8_mask(a.native(), b.native());
85#else
86 return i_bit_not(i_cmp_lt(a, b));
87#endif
88}
89#endif
90
91#if SIMDPP_USE_AVX512BW
92SIMDPP_INL mask_int8<64> i_cmp_ge(const uint8<64>& a, const uint8<64>& b)
93{
94 return _mm512_cmpge_epu8_mask(a.native(), b.native());
95}
96#endif
97
98// -----------------------------------------------------------------------------
99
100SIMDPP_INL mask_int16<8> i_cmp_ge(const int16<8>& a, const int16<8>& b)
101{
102#if SIMDPP_USE_NULL
103 return detail::null::cmp_ge(a, b);
104#elif SIMDPP_USE_AVX512VL
105 return _mm_cmpge_epi16_mask(a.native(), b.native());
106#elif SIMDPP_USE_NEON
107 return vcgeq_s16(a.native(), b.native());
108#elif SIMDPP_USE_MSA
109 return (v8u16) __msa_cle_s_h(b.native(), a.native());
110#else
111 return i_bit_not(i_cmp_lt(a, b));
112#endif
113}
114
115#if SIMDPP_USE_AVX2
116SIMDPP_INL mask_int16<16> i_cmp_ge(const int16<16>& a, const int16<16>& b)
117{
118#if SIMDPP_USE_AVX512VL
119 return _mm256_cmpge_epi16_mask(a.native(), b.native());
120#else
121 return i_bit_not(i_cmp_lt(a, b));
122#endif
123}
124#endif
125
126#if SIMDPP_USE_AVX512BW
127SIMDPP_INL mask_int16<32> i_cmp_ge(const int16<32>& a, const int16<32>& b)
128{
129 return _mm512_cmpge_epi16_mask(a.native(), b.native());
130}
131#endif
132
133// -----------------------------------------------------------------------------
134
135SIMDPP_INL mask_int16<8> i_cmp_ge(const uint16<8>& ca, const uint16<8>& cb)
136{
137 uint16<8> a = ca, b = cb;
138#if SIMDPP_USE_NULL
139 return detail::null::cmp_ge(a, b);
140#elif SIMDPP_USE_AVX512VL
141 return _mm_cmpge_epu16_mask(a.native(), b.native());
142#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
143 return _mm_comge_epu16(a.native(), b.native());
144#elif SIMDPP_USE_NEON
145 return vcgeq_u16(a.native(), b.native());
146#elif SIMDPP_USE_MSA
147 return (v8u16) __msa_cle_u_h(b.native(), a.native());
148#else
149 return i_bit_not(i_cmp_lt(a, b));
150#endif
151}
152
153#if SIMDPP_USE_AVX2
154SIMDPP_INL mask_int16<16> i_cmp_ge(const uint16<16>& a, const uint16<16>& b)
155{
156#if SIMDPP_USE_AVX512VL
157 return _mm256_cmpge_epu16_mask(a.native(), b.native());
158#else
159 return i_bit_not(i_cmp_lt(a, b));
160#endif
161}
162#endif
163
164#if SIMDPP_USE_AVX512BW
165SIMDPP_INL mask_int16<32> i_cmp_ge(const uint16<32>& a, const uint16<32>& b)
166{
167 return _mm512_cmpge_epu16_mask(a.native(), b.native());
168}
169#endif
170
171// -----------------------------------------------------------------------------
172
173SIMDPP_INL mask_int32<4> i_cmp_ge(const int32<4>& a, const int32<4>& b)
174{
175#if SIMDPP_USE_NULL
176 return detail::null::cmp_ge(a, b);
177#elif SIMDPP_USE_AVX512VL
178 return _mm_cmpge_epi32_mask(a.native(), b.native());
179#elif SIMDPP_USE_NEON
180 return vcgeq_s32(a.native(), b.native());
181#elif SIMDPP_USE_MSA
182 return (v4u32) __msa_cle_s_w(b.native(), a.native());
183#else
184 return i_bit_not(i_cmp_lt(a, b));
185#endif
186}
187
188#if SIMDPP_USE_AVX2
189SIMDPP_INL mask_int32<8> i_cmp_ge(const int32<8>& a, const int32<8>& b)
190{
191#if SIMDPP_USE_AVX512VL
192 return _mm256_cmpge_epi32_mask(a.native(), b.native());
193#else
194 return i_bit_not(i_cmp_lt(a, b));
195#endif
196}
197#endif
198
199#if SIMDPP_USE_AVX512F
200SIMDPP_INL mask_int32<16> i_cmp_ge(const int32<16>& a, const int32<16>& b)
201{
202 return _mm512_cmpge_epi32_mask(a.native(), b.native());
203}
204#endif
205
206// -----------------------------------------------------------------------------
207
208SIMDPP_INL mask_int32<4> i_cmp_ge(const uint32<4>& ca, const uint32<4>& cb)
209{
210 uint32<4> a = ca, b = cb;
211#if SIMDPP_USE_NULL
212 return detail::null::cmp_ge(a, b);
213#elif SIMDPP_USE_AVX512VL
214 return _mm_cmpge_epu32_mask(a.native(), b.native());
215#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
216 return _mm_comge_epu32(a.native(), b.native());
217#elif SIMDPP_USE_NEON
218 return vcgeq_u32(a.native(), b.native());
219#elif SIMDPP_USE_MSA
220 return (v4u32) __msa_cle_u_w(b.native(), a.native());
221#else
222 return i_bit_not(i_cmp_lt(a, b));
223#endif
224}
225
226#if SIMDPP_USE_AVX2
227SIMDPP_INL mask_int32<8> i_cmp_ge(const uint32<8>& a, const uint32<8>& b)
228{
229#if SIMDPP_USE_AVX512VL
230 return _mm256_cmpge_epu32_mask(a.native(), b.native());
231#else
232 return i_bit_not(i_cmp_lt(a, b));
233#endif
234}
235#endif
236
237#if SIMDPP_USE_AVX512F
238SIMDPP_INL mask_int32<16> i_cmp_ge(const uint32<16>& a, const uint32<16>& b)
239{
240 // FIXME: BUG: GCC does not have _mm512_cmpge_epu32_mask
241 return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLT);
242}
243#endif
244
245// -----------------------------------------------------------------------------
246
247SIMDPP_INL mask_int64<2> i_cmp_ge(const int64<2>& a, const int64<2>& b)
248{
249#if SIMDPP_USE_AVX512VL
250 return _mm_cmpge_epi64_mask(a.native(), b.native());
251#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
252 return _mm_comge_epi64(a.native(), b.native());
253#elif SIMDPP_USE_NEON64
254 return vcgeq_s64(a.native(), b.native());
255#elif SIMDPP_USE_MSA
256 return (v2u64) __msa_cle_s_d(b.native(), a.native());
257#elif SIMDPP_USE_NULL
258 return detail::null::cmp_ge(a, b);
259#else
260 return i_bit_not(i_cmp_lt(a, b));
261#endif
262}
263
264#if SIMDPP_USE_AVX2
265SIMDPP_INL mask_int64<4> i_cmp_ge(const int64<4>& a, const int64<4>& b)
266{
267#if SIMDPP_USE_AVX512VL
268 return _mm256_cmpge_epi64_mask(a.native(), b.native());
269#else
270 return i_bit_not(i_cmp_lt(a, b));
271#endif
272}
273#endif
274
275#if SIMDPP_USE_AVX512F
276SIMDPP_INL mask_int64<8> i_cmp_ge(const int64<8>& a, const int64<8>& b)
277{
278 // GCC does not have _mm512_cmpge_epi64_mask
279 return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
280}
281#endif
282
283// -----------------------------------------------------------------------------
284
285SIMDPP_INL mask_int64<2> i_cmp_ge(const uint64<2>& a, const uint64<2>& b)
286{
287#if SIMDPP_USE_AVX512VL
288 return _mm_cmpge_epu64_mask(a.native(), b.native());
289#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
290 return _mm_comge_epu64(a.native(), b.native());
291#elif SIMDPP_USE_NEON64
292 return vcgeq_u64(a.native(), b.native());
293#elif SIMDPP_USE_MSA
294 return (v2u64) __msa_cle_u_d(b.native(), a.native());
295#elif SIMDPP_USE_NULL
296 return detail::null::cmp_ge(a, b);
297#else
298 return i_bit_not(i_cmp_lt(a, b));
299#endif
300}
301
302#if SIMDPP_USE_AVX2
303SIMDPP_INL mask_int64<4> i_cmp_ge(const uint64<4>& a, const uint64<4>& b)
304{
305#if SIMDPP_USE_AVX512VL
306 return _mm256_cmpge_epu64_mask(a.native(), b.native());
307#else
308 return i_bit_not(i_cmp_lt(a, b));
309#endif
310}
311#endif
312
313#if SIMDPP_USE_AVX512F
314SIMDPP_INL mask_int64<8> i_cmp_ge(const uint64<8>& a, const uint64<8>& b)
315{
316 return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
317}
318#endif
319
320// -----------------------------------------------------------------------------
321
322static SIMDPP_INL
323mask_float32<4> i_cmp_ge(const float32<4>& a, const float32<4>& b)
324{
325#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
326 return detail::null::cmp_ge(a, b);
327#elif SIMDPP_USE_AVX512VL
328 return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
329#elif SIMDPP_USE_AVX
330 return _mm_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
331#elif SIMDPP_USE_SSE2
332 return _mm_cmpge_ps(a.native(), b.native());
333#elif SIMDPP_USE_NEON
334 return vreinterpretq_f32_u32(vcgeq_f32(a.native(), b.native()));
335#elif SIMDPP_USE_ALTIVEC
336 return vec_cmpge(a.native(), b.native());
337#elif SIMDPP_USE_MSA
338 return (v4f32) __msa_fcle_w(b.native(), a.native());
339#endif
340}
341
342#if SIMDPP_USE_AVX
343static SIMDPP_INL
344mask_float32<8> i_cmp_ge(const float32<8>& a, const float32<8>& b)
345{
346#if SIMDPP_USE_AVX512VL
347 return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
348#else
349 return _mm256_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
350#endif
351}
352#endif
353
354#if SIMDPP_USE_AVX512F
355static SIMDPP_INL
356mask_float32<16> i_cmp_ge(const float32<16>& a, const float32<16>& b)
357{
358 return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
359}
360#endif
361
362// -----------------------------------------------------------------------------
363
364static SIMDPP_INL
365mask_float64<2> i_cmp_ge(const float64<2>& a, const float64<2>& b)
366{
367#if SIMDPP_USE_AVX512VL
368 return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
369#elif SIMDPP_USE_AVX
370 return _mm_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
371#elif SIMDPP_USE_SSE2
372 return _mm_cmpge_pd(a.native(), b.native());
373#elif SIMDPP_USE_NEON64
374 return vreinterpretq_f64_u64(vcgeq_f64(a.native(), b.native()));
375#elif SIMDPP_USE_VSX_206
376 return (__vector double) vec_cmpge(a.native(), b.native());
377#elif SIMDPP_USE_MSA
378 return (v2f64) __msa_fcle_d(b.native(), a.native());
379#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
380 return detail::null::cmp_ge(a, b);
381#endif
382}
383
384#if SIMDPP_USE_AVX
385static SIMDPP_INL
386mask_float64<4> i_cmp_ge(const float64<4>& a, const float64<4>& b)
387{
388#if SIMDPP_USE_AVX512VL
389 return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
390#else
391 return _mm256_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
392#endif
393}
394#endif
395
396#if SIMDPP_USE_AVX512F
397static SIMDPP_INL
398mask_float64<8> i_cmp_ge(const float64<8>& a, const float64<8>& b)
399{
400 return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
401}
402#endif
403
404// -----------------------------------------------------------------------------
405
406template<class V> SIMDPP_INL
407typename V::mask_vector_type i_cmp_ge(const V& a, const V& b)
408{
409 SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_ge, a, b);
410}
411
412} // namespace insn
413} // namespace detail
414} // namespace SIMDPP_ARCH_NAMESPACE
415} // namespace simdpp
416
417#endif
418
419