cmp_gt.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_gt.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GT_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GT_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/make_shuffle_bytes_mask.h>
17	#include <simdpp/core/make_uint.h>
18	#include <simdpp/detail/not_implemented.h>
19	#include <simdpp/core/bit_xor.h>
20	#include <simdpp/detail/null/compare.h>
21	#include <simdpp/detail/not_implemented.h>
22	#include <simdpp/detail/vector_array_macros.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29
30	static SIMDPP_INL
31	mask_int8x16 i_cmp_gt(const int8x16& a, const int8x16& b)
32	{
33	#if SIMDPP_USE_NULL
34	return detail::null::cmp_gt(a, b);
35	#elif SIMDPP_USE_AVX512VL
36	return _mm_cmpgt_epi8_mask(a.native(), b.native());
37	#elif SIMDPP_USE_SSE2
38	return _mm_cmpgt_epi8(a.native(), b.native());
39	#elif SIMDPP_USE_NEON
40	return vcgtq_s8(a.native(), b.native());
41	#elif SIMDPP_USE_ALTIVEC
42	return vec_cmpgt(a.native(), b.native());
43	#elif SIMDPP_USE_MSA
44	return (v16u8) __msa_clt_s_b(b.native(), a.native());
45	#endif
46	}
47
48	#if SIMDPP_USE_AVX2
49	static SIMDPP_INL
50	mask_int8x32 i_cmp_gt(const int8x32& a, const int8x32& b)
51	{
52	#if SIMDPP_USE_AVX512VL
53	return _mm256_cmpgt_epi8_mask(a.native(), b.native());
54	#else
55	return _mm256_cmpgt_epi8(a.native(), b.native());
56	#endif
57	}
58	#endif
59
60	#if SIMDPP_USE_AVX512BW
61	SIMDPP_INL mask_int8<`64`> i_cmp_gt(const int8<`64`>& a, const int8<`64`>& b)
62	{
63	return _mm512_cmpgt_epi8_mask(a.native(), b.native());
64	}
65	#endif
66
67	// -----------------------------------------------------------------------------
68
69	static SIMDPP_INL
70	mask_int8x16 i_cmp_gt(const uint8x16& ca, const uint8x16& cb)
71	{
72	uint8<`16`> a = ca, b = cb;
73	#if SIMDPP_USE_NULL
74	return detail::null::cmp_gt(a, b);
75	#elif SIMDPP_USE_AVX512VL
76	return _mm_cmpgt_epu8_mask(a.native(), b.native());
77	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
78	return _mm_comgt_epu8(a.native(), b.native());
79	#elif SIMDPP_USE_SSE2
80	a = bit_xor(a, `0x80`); // sub
81	b = bit_xor(b, `0x80`); // sub
82	return _mm_cmpgt_epi8(a.native(), b.native());
83	#elif SIMDPP_USE_NEON
84	return vcgtq_u8(a.native(), b.native());
85	#elif SIMDPP_USE_ALTIVEC
86	return vec_cmpgt(a.native(), b.native());
87	#elif SIMDPP_USE_MSA
88	return (v16u8) __msa_clt_u_b(b.native(), a.native());
89	#endif
90	}
91
92	#if SIMDPP_USE_AVX2
93	static SIMDPP_INL
94	mask_int8x32 i_cmp_gt(const uint8x32& ca, const uint8x32& cb)
95	{
96	#if SIMDPP_USE_AVX512VL
97	return _mm256_cmpgt_epu8_mask(ca.native(), cb.native());
98	#else
99	uint8<`32`> a = ca, b = cb;
100	a = bit_xor(a, `0x80`); // sub
101	b = bit_xor(b, `0x80`); // sub
102	return _mm256_cmpgt_epi8(a.native(), b.native());
103	#endif
104	}
105	#endif
106
107	#if SIMDPP_USE_AVX512BW
108	SIMDPP_INL mask_int8<`64`> i_cmp_gt(const uint8<`64`>& a, const uint8<`64`>& b)
109	{
110	return _mm512_cmpgt_epu8_mask(a.native(), b.native());
111	}
112	#endif
113
114	// -----------------------------------------------------------------------------
115
116	static SIMDPP_INL
117	mask_int16x8 i_cmp_gt(const int16x8& a, const int16x8& b)
118	{
119	#if SIMDPP_USE_NULL
120	return detail::null::cmp_gt(a, b);
121	#elif SIMDPP_USE_AVX512VL
122	return _mm_cmpgt_epi16_mask(a.native(), b.native());
123	#elif SIMDPP_USE_SSE2
124	return _mm_cmpgt_epi16(a.native(), b.native());
125	#elif SIMDPP_USE_NEON
126	return vcgtq_s16(a.native(), b.native());
127	#elif SIMDPP_USE_ALTIVEC
128	return vec_cmpgt(a.native(), b.native());
129	#elif SIMDPP_USE_MSA
130	return (v8u16) __msa_clt_s_h(b.native(), a.native());
131	#endif
132	}
133
134	#if SIMDPP_USE_AVX2
135	static SIMDPP_INL
136	mask_int16x16 i_cmp_gt(const int16x16& a, const int16x16& b)
137	{
138	#if SIMDPP_USE_AVX512VL
139	return _mm256_cmpgt_epi16_mask(a.native(), b.native());
140	#else
141	return _mm256_cmpgt_epi16(a.native(), b.native());
142	#endif
143	}
144	#endif
145
146	#if SIMDPP_USE_AVX512BW
147	SIMDPP_INL mask_int16<`32`> i_cmp_gt(const int16<`32`>& a, const int16<`32`>& b)
148	{
149	return _mm512_cmpgt_epi16_mask(a.native(), b.native());
150	}
151	#endif
152
153	// -----------------------------------------------------------------------------
154
155	static SIMDPP_INL
156	mask_int16x8 i_cmp_gt(const uint16x8& ca, const uint16x8& cb)
157	{
158	uint16<`8`> a = ca, b = cb;
159	#if SIMDPP_USE_NULL
160	return detail::null::cmp_gt(a, b);
161	#elif SIMDPP_USE_AVX512VL
162	return _mm_cmpgt_epu16_mask(a.native(), b.native());
163	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
164	return _mm_comgt_epu16(a.native(), b.native());
165	#elif SIMDPP_USE_SSE2
166	a = bit_xor(a, `0x8000`); // sub
167	b = bit_xor(b, `0x8000`); // sub
168	return _mm_cmpgt_epi16(a.native(), b.native());
169	#elif SIMDPP_USE_NEON
170	return vcgtq_u16(a.native(), b.native());
171	#elif SIMDPP_USE_ALTIVEC
172	return vec_cmpgt(a.native(), b.native());
173	#elif SIMDPP_USE_MSA
174	return (v8u16) __msa_clt_u_h(b.native(), a.native());
175	#endif
176	}
177
178	#if SIMDPP_USE_AVX2
179	static SIMDPP_INL
180	mask_int16x16 i_cmp_gt(const uint16x16& ca, const uint16x16& cb)
181	{
182	#if SIMDPP_USE_AVX512VL
183	return _mm256_cmpgt_epu16_mask(ca.native(), cb.native());
184	#else
185	uint16<`16`> a = ca, b = cb;
186	a = bit_xor(a, `0x8000`); // sub
187	b = bit_xor(b, `0x8000`); // sub
188	return _mm256_cmpgt_epi16(a.native(), b.native());
189	#endif
190	}
191	#endif
192
193	#if SIMDPP_USE_AVX512BW
194	SIMDPP_INL mask_int16<`32`> i_cmp_gt(const uint16<`32`>& a, const uint16<`32`>& b)
195	{
196	return _mm512_cmpgt_epu16_mask(a.native(), b.native());
197	}
198	#endif
199
200	// -----------------------------------------------------------------------------
201
202	static SIMDPP_INL
203	mask_int32x4 i_cmp_gt(const int32x4& a, const int32x4& b)
204	{
205	#if SIMDPP_USE_NULL
206	return detail::null::cmp_gt(a, b);
207	#elif SIMDPP_USE_AVX512VL
208	return _mm_cmpgt_epi32_mask(a.native(), b.native());
209	#elif SIMDPP_USE_SSE2
210	return _mm_cmpgt_epi32(a.native(), b.native());
211	#elif SIMDPP_USE_NEON
212	return vcgtq_s32(a.native(), b.native());
213	#elif SIMDPP_USE_ALTIVEC
214	return vec_cmpgt(a.native(), b.native());
215	#elif SIMDPP_USE_MSA
216	return (v4u32) __msa_clt_s_w(b.native(), a.native());
217	#endif
218	}
219
220	#if SIMDPP_USE_AVX2
221	static SIMDPP_INL
222	mask_int32x8 i_cmp_gt(const int32x8& a, const int32x8& b)
223	{
224	#if SIMDPP_USE_AVX512VL
225	return _mm256_cmpgt_epi32_mask(a.native(), b.native());
226	#else
227	return _mm256_cmpgt_epi32(a.native(), b.native());
228	#endif
229	}
230	#endif
231
232	#if SIMDPP_USE_AVX512F
233	static SIMDPP_INL
234	mask_int32<`16`> i_cmp_gt(const int32<`16`>& a, const int32<`16`>& b)
235	{
236	return _mm512_cmpgt_epi32_mask(a.native(), b.native());
237	}
238	#endif
239
240	// -----------------------------------------------------------------------------
241
242	static SIMDPP_INL
243	mask_int32x4 i_cmp_gt(const uint32x4& ca, const uint32x4& cb)
244	{
245	uint32<`4`> a = ca, b = cb;
246	#if SIMDPP_USE_NULL
247	return detail::null::cmp_gt(a, b);
248	#elif SIMDPP_USE_AVX512VL
249	return _mm_cmpgt_epu32_mask(a.native(), b.native());
250	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
251	return _mm_comgt_epu32(a.native(), b.native());
252	#elif SIMDPP_USE_SSE2
253	a = bit_xor(a, `0x80000000`); // sub
254	b = bit_xor(b, `0x80000000`); // sub
255	return _mm_cmpgt_epi32(a.native(), b.native());
256	#elif SIMDPP_USE_NEON
257	return vcgtq_u32(a.native(), b.native());
258	#elif SIMDPP_USE_ALTIVEC
259	return vec_cmpgt(a.native(), b.native());
260	#elif SIMDPP_USE_MSA
261	return (v4u32) __msa_clt_u_w(b.native(), a.native());
262	#endif
263	}
264
265	#if SIMDPP_USE_AVX2
266	static SIMDPP_INL
267	mask_int32x8 i_cmp_gt(const uint32x8& ca, const uint32x8& cb)
268	{
269	#if SIMDPP_USE_AVX512VL
270	return _mm256_cmpgt_epu32_mask(ca.native(), cb.native());
271	#else
272	uint32<`8`> a = ca, b = cb;
273	a = bit_xor(a, `0x80000000`); // sub
274	b = bit_xor(b, `0x80000000`); // sub
275	return _mm256_cmpgt_epi32(a.native(), b.native());
276	#endif
277	}
278	#endif
279
280	#if SIMDPP_USE_AVX512F
281	static SIMDPP_INL
282	mask_int32<`16`> i_cmp_gt(const uint32<`16`>& a, const uint32<`16`>& b)
283	{
284	// FIXME: BUG: GCC does not have _mm512_cmpgt_epu32_mask
285	return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLE);
286	}
287	#endif
288
289	// -----------------------------------------------------------------------------
290
291	static SIMDPP_INL
292	mask_int64x2 i_cmp_gt(const int64x2& a, const int64x2& b)
293	{
294	#if SIMDPP_USE_AVX512VL
295	return _mm_cmpgt_epi64_mask(a.native(), b.native());
296	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
297	return _mm_comgt_epi64(a.native(), b.native());
298	#elif SIMDPP_USE_AVX2
299	return _mm_cmpgt_epi64(a.native(), b.native());
300	#elif SIMDPP_USE_NEON64
301	return vcgtq_s64(a.native(), b.native());
302	#elif SIMDPP_USE_VSX_207
303	return (__vector uint64_t) vec_cmpgt(a.native(), b.native());
304	#elif SIMDPP_USE_MSA
305	return (v2u64) __msa_clt_s_d(b.native(), a.native());
306	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
307	return detail::null::cmp_gt(a, b);
308	#else
309	return SIMDPP_NOT_IMPLEMENTED2(a, b);
310	#endif
311	}
312
313	#if SIMDPP_USE_AVX2
314	static SIMDPP_INL
315	mask_int64x4 i_cmp_gt(const int64x4& a, const int64x4& b)
316	{
317	#if SIMDPP_USE_AVX512VL
318	return _mm256_cmpgt_epi64_mask(a.native(), b.native());
319	#else
320	return _mm256_cmpgt_epi64(a.native(), b.native());
321	#endif
322	}
323	#endif
324
325	#if SIMDPP_USE_AVX512F
326	static SIMDPP_INL
327	mask_int64<`8`> i_cmp_gt(const int64<`8`>& a, const int64<`8`>& b)
328	{
329	// GCC does not have _mm512_cmpgt_epi64_mask
330	return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLE);
331	}
332	#endif
333
334	// -----------------------------------------------------------------------------
335
336	static SIMDPP_INL
337	mask_int64x2 i_cmp_gt(const uint64x2& a, const uint64x2& b)
338	{
339	#if SIMDPP_USE_AVX512VL
340	return _mm_cmpgt_epu64_mask(a.native(), b.native());
341	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
342	return _mm_comgt_epu64(a.native(), b.native());
343	#elif SIMDPP_USE_AVX2
344	uint64<`2`> ca = bit_xor(a, `0x8000000000000000`); // sub
345	uint64<`2`> cb = bit_xor(b, `0x8000000000000000`); // sub
346	return _mm_cmpgt_epi64(ca.native(), cb.native());
347	#elif SIMDPP_USE_NEON64
348	return vcgtq_u64(a.native(), b.native());
349	#elif SIMDPP_USE_VSX_207
350	return (__vector uint64_t) vec_cmpgt(a.native(), b.native());
351	#elif SIMDPP_USE_MSA
352	return (v2u64) __msa_clt_u_d(b.native(), a.native());
353	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
354	return detail::null::cmp_gt(a, b);
355	#else
356	return SIMDPP_NOT_IMPLEMENTED2(a, b);
357	#endif
358	}
359
360	#if SIMDPP_USE_AVX2
361	static SIMDPP_INL
362	mask_int64x4 i_cmp_gt(const uint64x4& ca, const uint64x4& cb)
363	{
364	#if SIMDPP_USE_AVX512VL
365	return _mm256_cmpgt_epu64_mask(ca.native(), cb.native());
366	#else
367	uint64<`4`> a = ca, b = cb;
368	a = bit_xor(a, `0x8000000000000000`); // sub
369	b = bit_xor(b, `0x8000000000000000`); // sub
370	return _mm256_cmpgt_epi64(a.native(), b.native());
371	#endif
372	}
373	#endif
374
375	#if SIMDPP_USE_AVX512F
376	static SIMDPP_INL
377	mask_int64<`8`> i_cmp_gt(const uint64<`8`>& a, const uint64<`8`>& b)
378	{
379	return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLE);
380	}
381	#endif
382
383	// -----------------------------------------------------------------------------
384
385	static SIMDPP_INL
386	mask_float32x4 i_cmp_gt(const float32x4& a, const float32x4& b)
387	{
388	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
389	return detail::null::cmp_gt(a, b);
390	#elif SIMDPP_USE_AVX512VL
391	return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GT_OQ);
392	#elif SIMDPP_USE_AVX
393	return _mm_cmp_ps(a.native(), b.native(), _CMP_GT_OQ);
394	#elif SIMDPP_USE_SSE2
395	return _mm_cmpgt_ps(a.native(), b.native());
396	#elif SIMDPP_USE_NEON
397	return vreinterpretq_f32_u32(vcgtq_f32(a.native(), b.native()));
398	#elif SIMDPP_USE_ALTIVEC
399	return vec_cmpgt(a.native(), b.native());
400	#elif SIMDPP_USE_MSA
401	return (v4f32) __msa_fclt_w(b.native(), a.native());
402	#endif
403	}
404
405	#if SIMDPP_USE_AVX
406	static SIMDPP_INL
407	mask_float32x8 i_cmp_gt(const float32x8& a, const float32x8& b)
408	{
409	#if SIMDPP_USE_AVX512VL
410	return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GT_OQ);
411	#else
412	return _mm256_cmp_ps(a.native(), b.native(), _CMP_GT_OQ);
413	#endif
414	}
415	#endif
416
417	#if SIMDPP_USE_AVX512F
418	static SIMDPP_INL
419	mask_float32<`16`> i_cmp_gt(const float32<`16`>& a, const float32<`16`>& b)
420	{
421	return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GT_OQ);
422	}
423	#endif
424
425	// -----------------------------------------------------------------------------
426
427	static SIMDPP_INL
428	mask_float64x2 i_cmp_gt(const float64x2& a, const float64x2& b)
429	{
430	#if SIMDPP_USE_AVX512VL
431	return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GT_OQ);
432	#elif SIMDPP_USE_AVX
433	return _mm_cmp_pd(a.native(), b.native(), _CMP_GT_OQ);
434	#elif SIMDPP_USE_SSE2
435	return _mm_cmpgt_pd(a.native(), b.native());
436	#elif SIMDPP_USE_NEON64
437	return vreinterpretq_f64_u64(vcgtq_f64(a.native(), b.native()));
438	#elif SIMDPP_USE_VSX_206
439	return (__vector double) vec_cmpgt(a.native(), b.native());
440	#elif SIMDPP_USE_MSA
441	return (v2f64) __msa_fclt_d(b.native(), a.native());
442	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
443	return detail::null::cmp_gt(a, b);
444	#endif
445	}
446
447	#if SIMDPP_USE_AVX
448	static SIMDPP_INL
449	mask_float64x4 i_cmp_gt(const float64x4& a, const float64x4& b)
450	{
451	#if SIMDPP_USE_AVX512VL
452	return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GT_OQ);
453	#else
454	return _mm256_cmp_pd(a.native(), b.native(), _CMP_GT_OQ);
455	#endif
456	}
457	#endif
458
459	#if SIMDPP_USE_AVX512F
460	static SIMDPP_INL
461	mask_float64<`8`> i_cmp_gt(const float64<`8`>& a, const float64<`8`>& b)
462	{
463	return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GT_OQ);
464	}
465	#endif
466
467	// -----------------------------------------------------------------------------
468
469	template<class V> SIMDPP_INL
470	typename V::mask_vector_type i_cmp_gt(const V& a, const V& b)
471	{
472	SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_gt, a, b);
473	}
474
475	} // namespace insn
476	} // namespace detail
477	} // namespace SIMDPP_ARCH_NAMESPACE
478	} // namespace simdpp
479
480	#endif
481
482

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_gt.h