1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LE_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LE_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/make_shuffle_bytes_mask.h>
17#include <simdpp/detail/null/compare.h>
18#include <simdpp/detail/insn/bit_not.h>
19#include <simdpp/detail/insn/cmp_gt.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27SIMDPP_INL mask_int8<16> i_cmp_le(const int8<16>& a, const int8<16>& b)
28{
29#if SIMDPP_USE_NULL
30 return detail::null::cmp_le(a, b);
31#elif SIMDPP_USE_AVX512VL
32 return _mm_cmple_epi8_mask(a.native(), b.native());
33#elif SIMDPP_USE_NEON
34 return vcleq_s8(a.native(), b.native());
35#elif SIMDPP_USE_MSA
36 return (v16u8) __msa_cle_s_b(a.native(), b.native());
37#else
38 return i_bit_not(i_cmp_gt(a, b));
39#endif
40}
41
42#if SIMDPP_USE_AVX2
43SIMDPP_INL mask_int8<32> i_cmp_le(const int8<32>& a, const int8<32>& b)
44{
45#if SIMDPP_USE_AVX512VL
46 return _mm256_cmple_epi8_mask(a.native(), b.native());
47#else
48 return i_bit_not(i_cmp_gt(a, b));
49#endif
50}
51#endif
52
53#if SIMDPP_USE_AVX512BW
54SIMDPP_INL mask_int8<64> i_cmp_le(const int8<64>& a, const int8<64>& b)
55{
56 return _mm512_cmple_epi8_mask(a.native(), b.native());
57}
58#endif
59
60// -----------------------------------------------------------------------------
61
62SIMDPP_INL mask_int8<16> i_cmp_le(const uint8<16>& ca, const uint8<16>& cb)
63{
64 uint8<16> a = ca, b = cb;
65#if SIMDPP_USE_NULL
66 return detail::null::cmp_le(a, b);
67#elif SIMDPP_USE_AVX512VL
68 return _mm_cmple_epu8_mask(a.native(), b.native());
69#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
70 return _mm_comle_epu8(a.native(), b.native());
71#elif SIMDPP_USE_NEON
72 return vcleq_u8(a.native(), b.native());
73#elif SIMDPP_USE_MSA
74 return (v16u8) __msa_cle_u_b(a.native(), b.native());
75#else
76 return i_bit_not(i_cmp_gt(a, b));
77#endif
78}
79
80#if SIMDPP_USE_AVX2
81SIMDPP_INL mask_int8<32> i_cmp_le(const uint8<32>& a, const uint8<32>& b)
82{
83#if SIMDPP_USE_AVX512VL
84 return _mm256_cmple_epu8_mask(a.native(), b.native());
85#else
86 return i_bit_not(i_cmp_gt(a, b));
87#endif
88}
89#endif
90
91#if SIMDPP_USE_AVX512BW
92SIMDPP_INL mask_int8<64> i_cmp_le(const uint8<64>& a, const uint8<64>& b)
93{
94 return _mm512_cmple_epu8_mask(a.native(), b.native());
95}
96#endif
97
98// -----------------------------------------------------------------------------
99
100SIMDPP_INL mask_int16<8> i_cmp_le(const int16<8>& a, const int16<8>& b)
101{
102#if SIMDPP_USE_NULL
103 return detail::null::cmp_le(a, b);
104#elif SIMDPP_USE_AVX512VL
105 return _mm_cmple_epi16_mask(a.native(), b.native());
106#elif SIMDPP_USE_NEON
107 return vcleq_s16(a.native(), b.native());
108#elif SIMDPP_USE_MSA
109 return (v8u16) __msa_cle_s_h(a.native(), b.native());
110#else
111 return i_bit_not(i_cmp_gt(a, b));
112#endif
113}
114
115#if SIMDPP_USE_AVX2
116SIMDPP_INL mask_int16<16> i_cmp_le(const int16<16>& a, const int16<16>& b)
117{
118#if SIMDPP_USE_AVX512VL
119 return _mm256_cmple_epi16_mask(a.native(), b.native());
120#else
121 return i_bit_not(i_cmp_gt(a, b));
122#endif
123}
124#endif
125
126#if SIMDPP_USE_AVX512BW
127SIMDPP_INL mask_int16<32> i_cmp_le(const int16<32>& a, const int16<32>& b)
128{
129 return _mm512_cmple_epi16_mask(a.native(), b.native());
130}
131#endif
132
133// -----------------------------------------------------------------------------
134
135SIMDPP_INL mask_int16<8> i_cmp_le(const uint16<8>& ca, const uint16<8>& cb)
136{
137 uint16<8> a = ca, b = cb;
138#if SIMDPP_USE_NULL
139 return detail::null::cmp_le(a, b);
140#elif SIMDPP_USE_AVX512VL
141 return _mm_cmple_epu16_mask(a.native(), b.native());
142#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
143 return _mm_comle_epu16(a.native(), b.native());
144#elif SIMDPP_USE_NEON
145 return vcleq_u16(a.native(), b.native());
146#elif SIMDPP_USE_MSA
147 return (v8u16) __msa_cle_u_h(a.native(), b.native());
148#else
149 return i_bit_not(i_cmp_gt(a, b));
150#endif
151}
152
153#if SIMDPP_USE_AVX2
154SIMDPP_INL mask_int16<16> i_cmp_le(const uint16<16>& a, const uint16<16>& b)
155{
156#if SIMDPP_USE_AVX512VL
157 return _mm256_cmple_epu16_mask(a.native(), b.native());
158#else
159 return i_bit_not(i_cmp_gt(a, b));
160#endif
161}
162#endif
163
164#if SIMDPP_USE_AVX512BW
165SIMDPP_INL mask_int16<32> i_cmp_le(const uint16<32>& a, const uint16<32>& b)
166{
167 return _mm512_cmple_epu16_mask(a.native(), b.native());
168}
169#endif
170
171// -----------------------------------------------------------------------------
172
173SIMDPP_INL mask_int32<4> i_cmp_le(const int32<4>& a, const int32<4>& b)
174{
175#if SIMDPP_USE_NULL
176 return detail::null::cmp_le(a, b);
177#elif SIMDPP_USE_AVX512VL
178 return _mm_cmple_epi32_mask(a.native(), b.native());
179#elif SIMDPP_USE_NEON
180 return vcleq_s32(a.native(), b.native());
181#elif SIMDPP_USE_MSA
182 return (v4u32) __msa_cle_s_w(a.native(), b.native());
183#else
184 return i_bit_not(i_cmp_gt(a, b));
185#endif
186}
187
188#if SIMDPP_USE_AVX2
189SIMDPP_INL mask_int32<8> i_cmp_le(const int32<8>& a, const int32<8>& b)
190{
191#if SIMDPP_USE_AVX512VL
192 return _mm256_cmple_epi32_mask(a.native(), b.native());
193#else
194 return i_bit_not(i_cmp_gt(a, b));
195#endif
196}
197#endif
198
199#if SIMDPP_USE_AVX512F
200SIMDPP_INL mask_int32<16> i_cmp_le(const int32<16>& a, const int32<16>& b)
201{
202 return _mm512_cmple_epi32_mask(a.native(), b.native());
203
204}
205#endif
206
207// -----------------------------------------------------------------------------
208
209SIMDPP_INL mask_int32<4> i_cmp_le(const uint32<4>& ca, const uint32<4>& cb)
210{
211 uint32<4> a = ca, b = cb;
212#if SIMDPP_USE_NULL
213 return detail::null::cmp_le(a, b);
214#elif SIMDPP_USE_AVX512VL
215 return _mm_cmple_epu32_mask(a.native(), b.native());
216#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
217 return _mm_comle_epu32(a.native(), b.native());
218#elif SIMDPP_USE_NEON
219 return vcleq_u32(a.native(), b.native());
220#elif SIMDPP_USE_MSA
221 return (v4u32) __msa_cle_u_w(a.native(), b.native());
222#else
223 return i_bit_not(i_cmp_gt(a, b));
224#endif
225}
226
227#if SIMDPP_USE_AVX2
228SIMDPP_INL mask_int32<8> i_cmp_le(const uint32<8>& a, const uint32<8>& b)
229{
230#if SIMDPP_USE_AVX512VL
231 return _mm256_cmple_epu32_mask(a.native(), b.native());
232#else
233 return i_bit_not(i_cmp_gt(a, b));
234#endif
235}
236#endif
237
238#if SIMDPP_USE_AVX512F
239SIMDPP_INL mask_int32<16> i_cmp_le(const uint32<16>& a, const uint32<16>& b)
240{
241 return _mm512_cmple_epu32_mask(a.native(), b.native());
242}
243#endif
244
245// -----------------------------------------------------------------------------
246
247SIMDPP_INL mask_int64<2> i_cmp_le(const int64<2>& a, const int64<2>& b)
248{
249#if SIMDPP_USE_NULL
250 return detail::null::cmp_le(a, b);
251#elif SIMDPP_USE_AVX512VL
252 return _mm_cmple_epi64_mask(a.native(), b.native());
253#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
254 return _mm_comle_epi64(a.native(), b.native());
255#elif SIMDPP_USE_NEON64
256 return vcleq_s64(a.native(), b.native());
257#elif SIMDPP_USE_MSA
258 return (v2u64) __msa_cle_s_d(a.native(), b.native());
259#else
260 return i_bit_not(i_cmp_gt(a, b));
261#endif
262}
263
264#if SIMDPP_USE_AVX2
265SIMDPP_INL mask_int64<4> i_cmp_le(const int64<4>& a, const int64<4>& b)
266{
267#if SIMDPP_USE_AVX512VL
268 return _mm256_cmple_epi64_mask(a.native(), b.native());
269#else
270 return i_bit_not(i_cmp_gt(a, b));
271#endif
272}
273#endif
274
275#if SIMDPP_USE_AVX512F
276SIMDPP_INL mask_int64<8> i_cmp_le(const int64<8>& a, const int64<8>& b)
277{
278 return _mm512_cmple_epi64_mask(a.native(), b.native());
279}
280#endif
281
282// -----------------------------------------------------------------------------
283
284SIMDPP_INL mask_int64<2> i_cmp_le(const uint64<2>& a, const uint64<2>& b)
285{
286#if SIMDPP_USE_AVX512VL
287 return _mm_cmple_epu64_mask(a.native(), b.native());
288#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
289 return _mm_comle_epu64(a.native(), b.native());
290#elif SIMDPP_USE_NEON64
291 return vcleq_u64(a.native(), b.native());
292#elif SIMDPP_USE_MSA
293 return (v2u64) __msa_cle_u_d(a.native(), b.native());
294#elif SIMDPP_USE_NULL
295 return detail::null::cmp_le(a, b);
296#else
297 return i_bit_not(i_cmp_gt(a, b));
298#endif
299}
300
301#if SIMDPP_USE_AVX2
302SIMDPP_INL mask_int64<4> i_cmp_le(const uint64<4>& a, const uint64<4>& b)
303{
304#if SIMDPP_USE_AVX512VL
305 return _mm256_cmple_epu64_mask(a.native(), b.native());
306#else
307 return i_bit_not(i_cmp_gt(a, b));
308#endif
309}
310#endif
311
312#if SIMDPP_USE_AVX512F
313SIMDPP_INL mask_int64<8> i_cmp_le(const uint64<8>& a, const uint64<8>& b)
314{
315 return _mm512_cmple_epu64_mask(a.native(), b.native());
316}
317#endif
318
319// -----------------------------------------------------------------------------
320
321static SIMDPP_INL
322mask_float32<4> i_cmp_le(const float32<4>& a, const float32<4>& b)
323{
324#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
325 return detail::null::cmp_le(a, b);
326#elif SIMDPP_USE_AVX512VL
327 return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ);
328#elif SIMDPP_USE_AVX
329 return _mm_cmp_ps(a.native(), b.native(), _CMP_LE_OQ);
330#elif SIMDPP_USE_SSE2
331 return _mm_cmple_ps(a.native(), b.native());
332#elif SIMDPP_USE_NEON
333 return vreinterpretq_f32_u32(vcleq_f32(a.native(), b.native()));
334#elif SIMDPP_USE_ALTIVEC
335 return vec_cmple(a.native(), b.native());
336#elif SIMDPP_USE_MSA
337 return (v4f32) __msa_fcle_w(a.native(), b.native());
338#endif
339}
340
341#if SIMDPP_USE_AVX
342static SIMDPP_INL
343mask_float32<8> i_cmp_le(const float32<8>& a, const float32<8>& b)
344{
345#if SIMDPP_USE_AVX512VL
346 return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ);
347#else
348 return _mm256_cmp_ps(a.native(), b.native(), _CMP_LE_OQ);
349#endif
350}
351#endif
352
353#if SIMDPP_USE_AVX512F
354static SIMDPP_INL
355mask_float32<16> i_cmp_le(const float32<16>& a, const float32<16>& b)
356{
357 return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ);
358}
359#endif
360
361// -----------------------------------------------------------------------------
362
363static SIMDPP_INL
364mask_float64<2> i_cmp_le(const float64<2>& a, const float64<2>& b)
365{
366#if SIMDPP_USE_AVX512VL
367 return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ);
368#elif SIMDPP_USE_AVX
369 return _mm_cmp_pd(a.native(), b.native(), _CMP_LE_OQ);
370#elif SIMDPP_USE_SSE2
371 return _mm_cmple_pd(a.native(), b.native());
372#elif SIMDPP_USE_NEON64
373 return vreinterpretq_f64_u64(vcleq_f64(a.native(), b.native()));
374#elif SIMDPP_USE_VSX_206
375 return (__vector double) vec_cmple(a.native(), b.native());
376#elif SIMDPP_USE_MSA
377 return (v2f64) __msa_fcle_d(a.native(), b.native());
378#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
379 return detail::null::cmp_le(a, b);
380#endif
381}
382
383#if SIMDPP_USE_AVX
384static SIMDPP_INL
385mask_float64<4> i_cmp_le(const float64<4>& a, const float64<4>& b)
386{
387#if SIMDPP_USE_AVX512VL
388 return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ);
389#else
390 return _mm256_cmp_pd(a.native(), b.native(), _CMP_LE_OQ);
391#endif
392}
393#endif
394
395#if SIMDPP_USE_AVX512F
396static SIMDPP_INL
397mask_float64<8> i_cmp_le(const float64<8>& a, const float64<8>& b)
398{
399 return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ);
400}
401#endif
402
403// -----------------------------------------------------------------------------
404
405template<class V> SIMDPP_INL
406typename V::mask_vector_type i_cmp_le(const V& a, const V& b)
407{
408 SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_le, a, b);
409}
410
411
412} // namespace insn
413} // namespace detail
414} // namespace SIMDPP_ARCH_NAMESPACE
415} // namespace simdpp
416
417#endif
418
419