cmp_ge.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_ge.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/make_shuffle_bytes_mask.h>
17	#include <simdpp/detail/null/compare.h>
18	#include <simdpp/detail/insn/bit_not.h>
19	#include <simdpp/detail/insn/cmp_lt.h>
20	#include <simdpp/detail/vector_array_macros.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	SIMDPP_INL mask_int8<`16`> i_cmp_ge(const int8<`16`>& a, const int8<`16`>& b)
28	{
29	#if SIMDPP_USE_NULL
30	return detail::null::cmp_ge(a, b);
31	#elif SIMDPP_USE_AVX512VL
32	return _mm_cmpge_epi8_mask(a.native(), b.native());
33	#elif SIMDPP_USE_NEON
34	return vcgeq_s8(a.native(), b.native());
35	#elif SIMDPP_USE_MSA
36	return (v16u8) __msa_cle_s_b(b.native(), a.native());
37	#else
38	return i_bit_not(i_cmp_lt(a, b));
39	#endif
40	}
41
42	#if SIMDPP_USE_AVX2
43	SIMDPP_INL mask_int8<`32`> i_cmp_ge(const int8<`32`>& a, const int8<`32`>& b)
44	{
45	#if SIMDPP_USE_AVX512VL
46	return _mm256_cmpge_epi8_mask(a.native(), b.native());
47	#else
48	return i_bit_not(i_cmp_lt(a, b));
49	#endif
50	}
51	#endif
52
53	#if SIMDPP_USE_AVX512BW
54	SIMDPP_INL mask_int8<`64`> i_cmp_ge(const int8<`64`>& a, const int8<`64`>& b)
55	{
56	return _mm512_cmpge_epi8_mask(a.native(), b.native());
57	}
58	#endif
59
60	// -----------------------------------------------------------------------------
61
62	SIMDPP_INL mask_int8<`16`> i_cmp_ge(const uint8<`16`>& ca, const uint8<`16`>& cb)
63	{
64	uint8<`16`> a = ca, b = cb;
65	#if SIMDPP_USE_NULL
66	return detail::null::cmp_ge(a, b);
67	#elif SIMDPP_USE_AVX512VL
68	return _mm_cmpge_epu8_mask(a.native(), b.native());
69	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
70	return _mm_comge_epu8(a.native(), b.native());
71	#elif SIMDPP_USE_NEON
72	return vcgeq_u8(a.native(), b.native());
73	#elif SIMDPP_USE_MSA
74	return (v16u8) __msa_cle_u_b(b.native(), a.native());
75	#else
76	return i_bit_not(i_cmp_lt(a, b));
77	#endif
78	}
79
80	#if SIMDPP_USE_AVX2
81	SIMDPP_INL mask_int8<`32`> i_cmp_ge(const uint8<`32`>& a, const uint8<`32`>& b)
82	{
83	#if SIMDPP_USE_AVX512VL
84	return _mm256_cmpge_epu8_mask(a.native(), b.native());
85	#else
86	return i_bit_not(i_cmp_lt(a, b));
87	#endif
88	}
89	#endif
90
91	#if SIMDPP_USE_AVX512BW
92	SIMDPP_INL mask_int8<`64`> i_cmp_ge(const uint8<`64`>& a, const uint8<`64`>& b)
93	{
94	return _mm512_cmpge_epu8_mask(a.native(), b.native());
95	}
96	#endif
97
98	// -----------------------------------------------------------------------------
99
100	SIMDPP_INL mask_int16<`8`> i_cmp_ge(const int16<`8`>& a, const int16<`8`>& b)
101	{
102	#if SIMDPP_USE_NULL
103	return detail::null::cmp_ge(a, b);
104	#elif SIMDPP_USE_AVX512VL
105	return _mm_cmpge_epi16_mask(a.native(), b.native());
106	#elif SIMDPP_USE_NEON
107	return vcgeq_s16(a.native(), b.native());
108	#elif SIMDPP_USE_MSA
109	return (v8u16) __msa_cle_s_h(b.native(), a.native());
110	#else
111	return i_bit_not(i_cmp_lt(a, b));
112	#endif
113	}
114
115	#if SIMDPP_USE_AVX2
116	SIMDPP_INL mask_int16<`16`> i_cmp_ge(const int16<`16`>& a, const int16<`16`>& b)
117	{
118	#if SIMDPP_USE_AVX512VL
119	return _mm256_cmpge_epi16_mask(a.native(), b.native());
120	#else
121	return i_bit_not(i_cmp_lt(a, b));
122	#endif
123	}
124	#endif
125
126	#if SIMDPP_USE_AVX512BW
127	SIMDPP_INL mask_int16<`32`> i_cmp_ge(const int16<`32`>& a, const int16<`32`>& b)
128	{
129	return _mm512_cmpge_epi16_mask(a.native(), b.native());
130	}
131	#endif
132
133	// -----------------------------------------------------------------------------
134
135	SIMDPP_INL mask_int16<`8`> i_cmp_ge(const uint16<`8`>& ca, const uint16<`8`>& cb)
136	{
137	uint16<`8`> a = ca, b = cb;
138	#if SIMDPP_USE_NULL
139	return detail::null::cmp_ge(a, b);
140	#elif SIMDPP_USE_AVX512VL
141	return _mm_cmpge_epu16_mask(a.native(), b.native());
142	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
143	return _mm_comge_epu16(a.native(), b.native());
144	#elif SIMDPP_USE_NEON
145	return vcgeq_u16(a.native(), b.native());
146	#elif SIMDPP_USE_MSA
147	return (v8u16) __msa_cle_u_h(b.native(), a.native());
148	#else
149	return i_bit_not(i_cmp_lt(a, b));
150	#endif
151	}
152
153	#if SIMDPP_USE_AVX2
154	SIMDPP_INL mask_int16<`16`> i_cmp_ge(const uint16<`16`>& a, const uint16<`16`>& b)
155	{
156	#if SIMDPP_USE_AVX512VL
157	return _mm256_cmpge_epu16_mask(a.native(), b.native());
158	#else
159	return i_bit_not(i_cmp_lt(a, b));
160	#endif
161	}
162	#endif
163
164	#if SIMDPP_USE_AVX512BW
165	SIMDPP_INL mask_int16<`32`> i_cmp_ge(const uint16<`32`>& a, const uint16<`32`>& b)
166	{
167	return _mm512_cmpge_epu16_mask(a.native(), b.native());
168	}
169	#endif
170
171	// -----------------------------------------------------------------------------
172
173	SIMDPP_INL mask_int32<`4`> i_cmp_ge(const int32<`4`>& a, const int32<`4`>& b)
174	{
175	#if SIMDPP_USE_NULL
176	return detail::null::cmp_ge(a, b);
177	#elif SIMDPP_USE_AVX512VL
178	return _mm_cmpge_epi32_mask(a.native(), b.native());
179	#elif SIMDPP_USE_NEON
180	return vcgeq_s32(a.native(), b.native());
181	#elif SIMDPP_USE_MSA
182	return (v4u32) __msa_cle_s_w(b.native(), a.native());
183	#else
184	return i_bit_not(i_cmp_lt(a, b));
185	#endif
186	}
187
188	#if SIMDPP_USE_AVX2
189	SIMDPP_INL mask_int32<`8`> i_cmp_ge(const int32<`8`>& a, const int32<`8`>& b)
190	{
191	#if SIMDPP_USE_AVX512VL
192	return _mm256_cmpge_epi32_mask(a.native(), b.native());
193	#else
194	return i_bit_not(i_cmp_lt(a, b));
195	#endif
196	}
197	#endif
198
199	#if SIMDPP_USE_AVX512F
200	SIMDPP_INL mask_int32<`16`> i_cmp_ge(const int32<`16`>& a, const int32<`16`>& b)
201	{
202	return _mm512_cmpge_epi32_mask(a.native(), b.native());
203	}
204	#endif
205
206	// -----------------------------------------------------------------------------
207
208	SIMDPP_INL mask_int32<`4`> i_cmp_ge(const uint32<`4`>& ca, const uint32<`4`>& cb)
209	{
210	uint32<`4`> a = ca, b = cb;
211	#if SIMDPP_USE_NULL
212	return detail::null::cmp_ge(a, b);
213	#elif SIMDPP_USE_AVX512VL
214	return _mm_cmpge_epu32_mask(a.native(), b.native());
215	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
216	return _mm_comge_epu32(a.native(), b.native());
217	#elif SIMDPP_USE_NEON
218	return vcgeq_u32(a.native(), b.native());
219	#elif SIMDPP_USE_MSA
220	return (v4u32) __msa_cle_u_w(b.native(), a.native());
221	#else
222	return i_bit_not(i_cmp_lt(a, b));
223	#endif
224	}
225
226	#if SIMDPP_USE_AVX2
227	SIMDPP_INL mask_int32<`8`> i_cmp_ge(const uint32<`8`>& a, const uint32<`8`>& b)
228	{
229	#if SIMDPP_USE_AVX512VL
230	return _mm256_cmpge_epu32_mask(a.native(), b.native());
231	#else
232	return i_bit_not(i_cmp_lt(a, b));
233	#endif
234	}
235	#endif
236
237	#if SIMDPP_USE_AVX512F
238	SIMDPP_INL mask_int32<`16`> i_cmp_ge(const uint32<`16`>& a, const uint32<`16`>& b)
239	{
240	// FIXME: BUG: GCC does not have _mm512_cmpge_epu32_mask
241	return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLT);
242	}
243	#endif
244
245	// -----------------------------------------------------------------------------
246
247	SIMDPP_INL mask_int64<`2`> i_cmp_ge(const int64<`2`>& a, const int64<`2`>& b)
248	{
249	#if SIMDPP_USE_AVX512VL
250	return _mm_cmpge_epi64_mask(a.native(), b.native());
251	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
252	return _mm_comge_epi64(a.native(), b.native());
253	#elif SIMDPP_USE_NEON64
254	return vcgeq_s64(a.native(), b.native());
255	#elif SIMDPP_USE_MSA
256	return (v2u64) __msa_cle_s_d(b.native(), a.native());
257	#elif SIMDPP_USE_NULL
258	return detail::null::cmp_ge(a, b);
259	#else
260	return i_bit_not(i_cmp_lt(a, b));
261	#endif
262	}
263
264	#if SIMDPP_USE_AVX2
265	SIMDPP_INL mask_int64<`4`> i_cmp_ge(const int64<`4`>& a, const int64<`4`>& b)
266	{
267	#if SIMDPP_USE_AVX512VL
268	return _mm256_cmpge_epi64_mask(a.native(), b.native());
269	#else
270	return i_bit_not(i_cmp_lt(a, b));
271	#endif
272	}
273	#endif
274
275	#if SIMDPP_USE_AVX512F
276	SIMDPP_INL mask_int64<`8`> i_cmp_ge(const int64<`8`>& a, const int64<`8`>& b)
277	{
278	// GCC does not have _mm512_cmpge_epi64_mask
279	return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
280	}
281	#endif
282
283	// -----------------------------------------------------------------------------
284
285	SIMDPP_INL mask_int64<`2`> i_cmp_ge(const uint64<`2`>& a, const uint64<`2`>& b)
286	{
287	#if SIMDPP_USE_AVX512VL
288	return _mm_cmpge_epu64_mask(a.native(), b.native());
289	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
290	return _mm_comge_epu64(a.native(), b.native());
291	#elif SIMDPP_USE_NEON64
292	return vcgeq_u64(a.native(), b.native());
293	#elif SIMDPP_USE_MSA
294	return (v2u64) __msa_cle_u_d(b.native(), a.native());
295	#elif SIMDPP_USE_NULL
296	return detail::null::cmp_ge(a, b);
297	#else
298	return i_bit_not(i_cmp_lt(a, b));
299	#endif
300	}
301
302	#if SIMDPP_USE_AVX2
303	SIMDPP_INL mask_int64<`4`> i_cmp_ge(const uint64<`4`>& a, const uint64<`4`>& b)
304	{
305	#if SIMDPP_USE_AVX512VL
306	return _mm256_cmpge_epu64_mask(a.native(), b.native());
307	#else
308	return i_bit_not(i_cmp_lt(a, b));
309	#endif
310	}
311	#endif
312
313	#if SIMDPP_USE_AVX512F
314	SIMDPP_INL mask_int64<`8`> i_cmp_ge(const uint64<`8`>& a, const uint64<`8`>& b)
315	{
316	return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
317	}
318	#endif
319
320	// -----------------------------------------------------------------------------
321
322	static SIMDPP_INL
323	mask_float32<`4`> i_cmp_ge(const float32<`4`>& a, const float32<`4`>& b)
324	{
325	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
326	return detail::null::cmp_ge(a, b);
327	#elif SIMDPP_USE_AVX512VL
328	return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
329	#elif SIMDPP_USE_AVX
330	return _mm_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
331	#elif SIMDPP_USE_SSE2
332	return _mm_cmpge_ps(a.native(), b.native());
333	#elif SIMDPP_USE_NEON
334	return vreinterpretq_f32_u32(vcgeq_f32(a.native(), b.native()));
335	#elif SIMDPP_USE_ALTIVEC
336	return vec_cmpge(a.native(), b.native());
337	#elif SIMDPP_USE_MSA
338	return (v4f32) __msa_fcle_w(b.native(), a.native());
339	#endif
340	}
341
342	#if SIMDPP_USE_AVX
343	static SIMDPP_INL
344	mask_float32<`8`> i_cmp_ge(const float32<`8`>& a, const float32<`8`>& b)
345	{
346	#if SIMDPP_USE_AVX512VL
347	return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
348	#else
349	return _mm256_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
350	#endif
351	}
352	#endif
353
354	#if SIMDPP_USE_AVX512F
355	static SIMDPP_INL
356	mask_float32<`16`> i_cmp_ge(const float32<`16`>& a, const float32<`16`>& b)
357	{
358	return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
359	}
360	#endif
361
362	// -----------------------------------------------------------------------------
363
364	static SIMDPP_INL
365	mask_float64<`2`> i_cmp_ge(const float64<`2`>& a, const float64<`2`>& b)
366	{
367	#if SIMDPP_USE_AVX512VL
368	return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
369	#elif SIMDPP_USE_AVX
370	return _mm_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
371	#elif SIMDPP_USE_SSE2
372	return _mm_cmpge_pd(a.native(), b.native());
373	#elif SIMDPP_USE_NEON64
374	return vreinterpretq_f64_u64(vcgeq_f64(a.native(), b.native()));
375	#elif SIMDPP_USE_VSX_206
376	return (__vector double) vec_cmpge(a.native(), b.native());
377	#elif SIMDPP_USE_MSA
378	return (v2f64) __msa_fcle_d(b.native(), a.native());
379	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
380	return detail::null::cmp_ge(a, b);
381	#endif
382	}
383
384	#if SIMDPP_USE_AVX
385	static SIMDPP_INL
386	mask_float64<`4`> i_cmp_ge(const float64<`4`>& a, const float64<`4`>& b)
387	{
388	#if SIMDPP_USE_AVX512VL
389	return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
390	#else
391	return _mm256_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
392	#endif
393	}
394	#endif
395
396	#if SIMDPP_USE_AVX512F
397	static SIMDPP_INL
398	mask_float64<`8`> i_cmp_ge(const float64<`8`>& a, const float64<`8`>& b)
399	{
400	return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
401	}
402	#endif
403
404	// -----------------------------------------------------------------------------
405
406	template<class V> SIMDPP_INL
407	typename V::mask_vector_type i_cmp_ge(const V& a, const V& b)
408	{
409	SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_ge, a, b);
410	}
411
412	} // namespace insn
413	} // namespace detail
414	} // namespace SIMDPP_ARCH_NAMESPACE
415	} // namespace simdpp
416
417	#endif
418
419

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_ge.h