1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LE_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LE_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/make_shuffle_bytes_mask.h> |
17 | #include <simdpp/detail/null/compare.h> |
18 | #include <simdpp/detail/insn/bit_not.h> |
19 | #include <simdpp/detail/insn/cmp_gt.h> |
20 | #include <simdpp/detail/vector_array_macros.h> |
21 | |
22 | namespace simdpp { |
23 | namespace SIMDPP_ARCH_NAMESPACE { |
24 | namespace detail { |
25 | namespace insn { |
26 | |
27 | SIMDPP_INL mask_int8<16> i_cmp_le(const int8<16>& a, const int8<16>& b) |
28 | { |
29 | #if SIMDPP_USE_NULL |
30 | return detail::null::cmp_le(a, b); |
31 | #elif SIMDPP_USE_AVX512VL |
32 | return _mm_cmple_epi8_mask(a.native(), b.native()); |
33 | #elif SIMDPP_USE_NEON |
34 | return vcleq_s8(a.native(), b.native()); |
35 | #elif SIMDPP_USE_MSA |
36 | return (v16u8) __msa_cle_s_b(a.native(), b.native()); |
37 | #else |
38 | return i_bit_not(i_cmp_gt(a, b)); |
39 | #endif |
40 | } |
41 | |
42 | #if SIMDPP_USE_AVX2 |
43 | SIMDPP_INL mask_int8<32> i_cmp_le(const int8<32>& a, const int8<32>& b) |
44 | { |
45 | #if SIMDPP_USE_AVX512VL |
46 | return _mm256_cmple_epi8_mask(a.native(), b.native()); |
47 | #else |
48 | return i_bit_not(i_cmp_gt(a, b)); |
49 | #endif |
50 | } |
51 | #endif |
52 | |
53 | #if SIMDPP_USE_AVX512BW |
54 | SIMDPP_INL mask_int8<64> i_cmp_le(const int8<64>& a, const int8<64>& b) |
55 | { |
56 | return _mm512_cmple_epi8_mask(a.native(), b.native()); |
57 | } |
58 | #endif |
59 | |
60 | // ----------------------------------------------------------------------------- |
61 | |
62 | SIMDPP_INL mask_int8<16> i_cmp_le(const uint8<16>& ca, const uint8<16>& cb) |
63 | { |
64 | uint8<16> a = ca, b = cb; |
65 | #if SIMDPP_USE_NULL |
66 | return detail::null::cmp_le(a, b); |
67 | #elif SIMDPP_USE_AVX512VL |
68 | return _mm_cmple_epu8_mask(a.native(), b.native()); |
69 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
70 | return _mm_comle_epu8(a.native(), b.native()); |
71 | #elif SIMDPP_USE_NEON |
72 | return vcleq_u8(a.native(), b.native()); |
73 | #elif SIMDPP_USE_MSA |
74 | return (v16u8) __msa_cle_u_b(a.native(), b.native()); |
75 | #else |
76 | return i_bit_not(i_cmp_gt(a, b)); |
77 | #endif |
78 | } |
79 | |
80 | #if SIMDPP_USE_AVX2 |
81 | SIMDPP_INL mask_int8<32> i_cmp_le(const uint8<32>& a, const uint8<32>& b) |
82 | { |
83 | #if SIMDPP_USE_AVX512VL |
84 | return _mm256_cmple_epu8_mask(a.native(), b.native()); |
85 | #else |
86 | return i_bit_not(i_cmp_gt(a, b)); |
87 | #endif |
88 | } |
89 | #endif |
90 | |
91 | #if SIMDPP_USE_AVX512BW |
92 | SIMDPP_INL mask_int8<64> i_cmp_le(const uint8<64>& a, const uint8<64>& b) |
93 | { |
94 | return _mm512_cmple_epu8_mask(a.native(), b.native()); |
95 | } |
96 | #endif |
97 | |
98 | // ----------------------------------------------------------------------------- |
99 | |
100 | SIMDPP_INL mask_int16<8> i_cmp_le(const int16<8>& a, const int16<8>& b) |
101 | { |
102 | #if SIMDPP_USE_NULL |
103 | return detail::null::cmp_le(a, b); |
104 | #elif SIMDPP_USE_AVX512VL |
105 | return _mm_cmple_epi16_mask(a.native(), b.native()); |
106 | #elif SIMDPP_USE_NEON |
107 | return vcleq_s16(a.native(), b.native()); |
108 | #elif SIMDPP_USE_MSA |
109 | return (v8u16) __msa_cle_s_h(a.native(), b.native()); |
110 | #else |
111 | return i_bit_not(i_cmp_gt(a, b)); |
112 | #endif |
113 | } |
114 | |
115 | #if SIMDPP_USE_AVX2 |
116 | SIMDPP_INL mask_int16<16> i_cmp_le(const int16<16>& a, const int16<16>& b) |
117 | { |
118 | #if SIMDPP_USE_AVX512VL |
119 | return _mm256_cmple_epi16_mask(a.native(), b.native()); |
120 | #else |
121 | return i_bit_not(i_cmp_gt(a, b)); |
122 | #endif |
123 | } |
124 | #endif |
125 | |
126 | #if SIMDPP_USE_AVX512BW |
127 | SIMDPP_INL mask_int16<32> i_cmp_le(const int16<32>& a, const int16<32>& b) |
128 | { |
129 | return _mm512_cmple_epi16_mask(a.native(), b.native()); |
130 | } |
131 | #endif |
132 | |
133 | // ----------------------------------------------------------------------------- |
134 | |
135 | SIMDPP_INL mask_int16<8> i_cmp_le(const uint16<8>& ca, const uint16<8>& cb) |
136 | { |
137 | uint16<8> a = ca, b = cb; |
138 | #if SIMDPP_USE_NULL |
139 | return detail::null::cmp_le(a, b); |
140 | #elif SIMDPP_USE_AVX512VL |
141 | return _mm_cmple_epu16_mask(a.native(), b.native()); |
142 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
143 | return _mm_comle_epu16(a.native(), b.native()); |
144 | #elif SIMDPP_USE_NEON |
145 | return vcleq_u16(a.native(), b.native()); |
146 | #elif SIMDPP_USE_MSA |
147 | return (v8u16) __msa_cle_u_h(a.native(), b.native()); |
148 | #else |
149 | return i_bit_not(i_cmp_gt(a, b)); |
150 | #endif |
151 | } |
152 | |
153 | #if SIMDPP_USE_AVX2 |
154 | SIMDPP_INL mask_int16<16> i_cmp_le(const uint16<16>& a, const uint16<16>& b) |
155 | { |
156 | #if SIMDPP_USE_AVX512VL |
157 | return _mm256_cmple_epu16_mask(a.native(), b.native()); |
158 | #else |
159 | return i_bit_not(i_cmp_gt(a, b)); |
160 | #endif |
161 | } |
162 | #endif |
163 | |
164 | #if SIMDPP_USE_AVX512BW |
165 | SIMDPP_INL mask_int16<32> i_cmp_le(const uint16<32>& a, const uint16<32>& b) |
166 | { |
167 | return _mm512_cmple_epu16_mask(a.native(), b.native()); |
168 | } |
169 | #endif |
170 | |
171 | // ----------------------------------------------------------------------------- |
172 | |
173 | SIMDPP_INL mask_int32<4> i_cmp_le(const int32<4>& a, const int32<4>& b) |
174 | { |
175 | #if SIMDPP_USE_NULL |
176 | return detail::null::cmp_le(a, b); |
177 | #elif SIMDPP_USE_AVX512VL |
178 | return _mm_cmple_epi32_mask(a.native(), b.native()); |
179 | #elif SIMDPP_USE_NEON |
180 | return vcleq_s32(a.native(), b.native()); |
181 | #elif SIMDPP_USE_MSA |
182 | return (v4u32) __msa_cle_s_w(a.native(), b.native()); |
183 | #else |
184 | return i_bit_not(i_cmp_gt(a, b)); |
185 | #endif |
186 | } |
187 | |
188 | #if SIMDPP_USE_AVX2 |
189 | SIMDPP_INL mask_int32<8> i_cmp_le(const int32<8>& a, const int32<8>& b) |
190 | { |
191 | #if SIMDPP_USE_AVX512VL |
192 | return _mm256_cmple_epi32_mask(a.native(), b.native()); |
193 | #else |
194 | return i_bit_not(i_cmp_gt(a, b)); |
195 | #endif |
196 | } |
197 | #endif |
198 | |
199 | #if SIMDPP_USE_AVX512F |
200 | SIMDPP_INL mask_int32<16> i_cmp_le(const int32<16>& a, const int32<16>& b) |
201 | { |
202 | return _mm512_cmple_epi32_mask(a.native(), b.native()); |
203 | |
204 | } |
205 | #endif |
206 | |
207 | // ----------------------------------------------------------------------------- |
208 | |
209 | SIMDPP_INL mask_int32<4> i_cmp_le(const uint32<4>& ca, const uint32<4>& cb) |
210 | { |
211 | uint32<4> a = ca, b = cb; |
212 | #if SIMDPP_USE_NULL |
213 | return detail::null::cmp_le(a, b); |
214 | #elif SIMDPP_USE_AVX512VL |
215 | return _mm_cmple_epu32_mask(a.native(), b.native()); |
216 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
217 | return _mm_comle_epu32(a.native(), b.native()); |
218 | #elif SIMDPP_USE_NEON |
219 | return vcleq_u32(a.native(), b.native()); |
220 | #elif SIMDPP_USE_MSA |
221 | return (v4u32) __msa_cle_u_w(a.native(), b.native()); |
222 | #else |
223 | return i_bit_not(i_cmp_gt(a, b)); |
224 | #endif |
225 | } |
226 | |
227 | #if SIMDPP_USE_AVX2 |
228 | SIMDPP_INL mask_int32<8> i_cmp_le(const uint32<8>& a, const uint32<8>& b) |
229 | { |
230 | #if SIMDPP_USE_AVX512VL |
231 | return _mm256_cmple_epu32_mask(a.native(), b.native()); |
232 | #else |
233 | return i_bit_not(i_cmp_gt(a, b)); |
234 | #endif |
235 | } |
236 | #endif |
237 | |
238 | #if SIMDPP_USE_AVX512F |
239 | SIMDPP_INL mask_int32<16> i_cmp_le(const uint32<16>& a, const uint32<16>& b) |
240 | { |
241 | return _mm512_cmple_epu32_mask(a.native(), b.native()); |
242 | } |
243 | #endif |
244 | |
245 | // ----------------------------------------------------------------------------- |
246 | |
247 | SIMDPP_INL mask_int64<2> i_cmp_le(const int64<2>& a, const int64<2>& b) |
248 | { |
249 | #if SIMDPP_USE_NULL |
250 | return detail::null::cmp_le(a, b); |
251 | #elif SIMDPP_USE_AVX512VL |
252 | return _mm_cmple_epi64_mask(a.native(), b.native()); |
253 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
254 | return _mm_comle_epi64(a.native(), b.native()); |
255 | #elif SIMDPP_USE_NEON64 |
256 | return vcleq_s64(a.native(), b.native()); |
257 | #elif SIMDPP_USE_MSA |
258 | return (v2u64) __msa_cle_s_d(a.native(), b.native()); |
259 | #else |
260 | return i_bit_not(i_cmp_gt(a, b)); |
261 | #endif |
262 | } |
263 | |
264 | #if SIMDPP_USE_AVX2 |
265 | SIMDPP_INL mask_int64<4> i_cmp_le(const int64<4>& a, const int64<4>& b) |
266 | { |
267 | #if SIMDPP_USE_AVX512VL |
268 | return _mm256_cmple_epi64_mask(a.native(), b.native()); |
269 | #else |
270 | return i_bit_not(i_cmp_gt(a, b)); |
271 | #endif |
272 | } |
273 | #endif |
274 | |
275 | #if SIMDPP_USE_AVX512F |
276 | SIMDPP_INL mask_int64<8> i_cmp_le(const int64<8>& a, const int64<8>& b) |
277 | { |
278 | return _mm512_cmple_epi64_mask(a.native(), b.native()); |
279 | } |
280 | #endif |
281 | |
282 | // ----------------------------------------------------------------------------- |
283 | |
284 | SIMDPP_INL mask_int64<2> i_cmp_le(const uint64<2>& a, const uint64<2>& b) |
285 | { |
286 | #if SIMDPP_USE_AVX512VL |
287 | return _mm_cmple_epu64_mask(a.native(), b.native()); |
288 | #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM |
289 | return _mm_comle_epu64(a.native(), b.native()); |
290 | #elif SIMDPP_USE_NEON64 |
291 | return vcleq_u64(a.native(), b.native()); |
292 | #elif SIMDPP_USE_MSA |
293 | return (v2u64) __msa_cle_u_d(a.native(), b.native()); |
294 | #elif SIMDPP_USE_NULL |
295 | return detail::null::cmp_le(a, b); |
296 | #else |
297 | return i_bit_not(i_cmp_gt(a, b)); |
298 | #endif |
299 | } |
300 | |
301 | #if SIMDPP_USE_AVX2 |
302 | SIMDPP_INL mask_int64<4> i_cmp_le(const uint64<4>& a, const uint64<4>& b) |
303 | { |
304 | #if SIMDPP_USE_AVX512VL |
305 | return _mm256_cmple_epu64_mask(a.native(), b.native()); |
306 | #else |
307 | return i_bit_not(i_cmp_gt(a, b)); |
308 | #endif |
309 | } |
310 | #endif |
311 | |
312 | #if SIMDPP_USE_AVX512F |
313 | SIMDPP_INL mask_int64<8> i_cmp_le(const uint64<8>& a, const uint64<8>& b) |
314 | { |
315 | return _mm512_cmple_epu64_mask(a.native(), b.native()); |
316 | } |
317 | #endif |
318 | |
319 | // ----------------------------------------------------------------------------- |
320 | |
321 | static SIMDPP_INL |
322 | mask_float32<4> i_cmp_le(const float32<4>& a, const float32<4>& b) |
323 | { |
324 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
325 | return detail::null::cmp_le(a, b); |
326 | #elif SIMDPP_USE_AVX512VL |
327 | return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ); |
328 | #elif SIMDPP_USE_AVX |
329 | return _mm_cmp_ps(a.native(), b.native(), _CMP_LE_OQ); |
330 | #elif SIMDPP_USE_SSE2 |
331 | return _mm_cmple_ps(a.native(), b.native()); |
332 | #elif SIMDPP_USE_NEON |
333 | return vreinterpretq_f32_u32(vcleq_f32(a.native(), b.native())); |
334 | #elif SIMDPP_USE_ALTIVEC |
335 | return vec_cmple(a.native(), b.native()); |
336 | #elif SIMDPP_USE_MSA |
337 | return (v4f32) __msa_fcle_w(a.native(), b.native()); |
338 | #endif |
339 | } |
340 | |
341 | #if SIMDPP_USE_AVX |
342 | static SIMDPP_INL |
343 | mask_float32<8> i_cmp_le(const float32<8>& a, const float32<8>& b) |
344 | { |
345 | #if SIMDPP_USE_AVX512VL |
346 | return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ); |
347 | #else |
348 | return _mm256_cmp_ps(a.native(), b.native(), _CMP_LE_OQ); |
349 | #endif |
350 | } |
351 | #endif |
352 | |
353 | #if SIMDPP_USE_AVX512F |
354 | static SIMDPP_INL |
355 | mask_float32<16> i_cmp_le(const float32<16>& a, const float32<16>& b) |
356 | { |
357 | return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_LE_OQ); |
358 | } |
359 | #endif |
360 | |
361 | // ----------------------------------------------------------------------------- |
362 | |
363 | static SIMDPP_INL |
364 | mask_float64<2> i_cmp_le(const float64<2>& a, const float64<2>& b) |
365 | { |
366 | #if SIMDPP_USE_AVX512VL |
367 | return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ); |
368 | #elif SIMDPP_USE_AVX |
369 | return _mm_cmp_pd(a.native(), b.native(), _CMP_LE_OQ); |
370 | #elif SIMDPP_USE_SSE2 |
371 | return _mm_cmple_pd(a.native(), b.native()); |
372 | #elif SIMDPP_USE_NEON64 |
373 | return vreinterpretq_f64_u64(vcleq_f64(a.native(), b.native())); |
374 | #elif SIMDPP_USE_VSX_206 |
375 | return (__vector double) vec_cmple(a.native(), b.native()); |
376 | #elif SIMDPP_USE_MSA |
377 | return (v2f64) __msa_fcle_d(a.native(), b.native()); |
378 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
379 | return detail::null::cmp_le(a, b); |
380 | #endif |
381 | } |
382 | |
383 | #if SIMDPP_USE_AVX |
384 | static SIMDPP_INL |
385 | mask_float64<4> i_cmp_le(const float64<4>& a, const float64<4>& b) |
386 | { |
387 | #if SIMDPP_USE_AVX512VL |
388 | return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ); |
389 | #else |
390 | return _mm256_cmp_pd(a.native(), b.native(), _CMP_LE_OQ); |
391 | #endif |
392 | } |
393 | #endif |
394 | |
395 | #if SIMDPP_USE_AVX512F |
396 | static SIMDPP_INL |
397 | mask_float64<8> i_cmp_le(const float64<8>& a, const float64<8>& b) |
398 | { |
399 | return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_LE_OQ); |
400 | } |
401 | #endif |
402 | |
403 | // ----------------------------------------------------------------------------- |
404 | |
405 | template<class V> SIMDPP_INL |
406 | typename V::mask_vector_type i_cmp_le(const V& a, const V& b) |
407 | { |
408 | SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_le, a, b); |
409 | } |
410 | |
411 | |
412 | } // namespace insn |
413 | } // namespace detail |
414 | } // namespace SIMDPP_ARCH_NAMESPACE |
415 | } // namespace simdpp |
416 | |
417 | #endif |
418 | |
419 | |