cmp_lt.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_lt.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LT_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_LT_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_xor.h>
17	#include <simdpp/detail/null/compare.h>
18	#include <simdpp/detail/not_implemented.h>
19	#include <simdpp/detail/vector_array_macros.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26
27	static SIMDPP_INL
28	mask_int8x16 i_cmp_lt(const int8x16& a, const int8x16& b)
29	{
30	#if SIMDPP_USE_NULL
31	return detail::null::cmp_lt(a, b);
32	#elif SIMDPP_USE_AVX512VL
33	return _mm_cmplt_epi8_mask(a.native(), b.native());
34	#elif SIMDPP_USE_SSE2
35	return _mm_cmplt_epi8(a.native(), b.native());
36	#elif SIMDPP_USE_NEON
37	return vcltq_s8(a.native(), b.native());
38	#elif SIMDPP_USE_ALTIVEC
39	return vec_cmplt(a.native(), b.native());
40	#elif SIMDPP_USE_MSA
41	return (v16u8) __msa_clt_s_b(a.native(), b.native());
42	#endif
43	}
44
45	#if SIMDPP_USE_AVX2
46	static SIMDPP_INL
47	mask_int8x32 i_cmp_lt(const int8x32& a, const int8x32& b)
48	{
49	#if SIMDPP_USE_AVX512VL
50	return _mm256_cmplt_epi8_mask(a.native(), b.native());
51	#else
52	return _mm256_cmpgt_epi8(b.native(), a.native());
53	#endif
54	}
55	#endif
56
57	#if SIMDPP_USE_AVX512BW
58	SIMDPP_INL mask_int8<`64`> i_cmp_lt(const int8<`64`>& a, const int8<`64`>& b)
59	{
60	return _mm512_cmplt_epi8_mask(a.native(), b.native());
61	}
62	#endif
63
64	// -----------------------------------------------------------------------------
65
66	static SIMDPP_INL
67	mask_int8x16 i_cmp_lt(const uint8x16& ca, const uint8x16& cb)
68	{
69	uint8<`16`> a = ca, b = cb;
70	#if SIMDPP_USE_NULL
71	return detail::null::cmp_lt(a, b);
72	#elif SIMDPP_USE_AVX512VL
73	return _mm_cmplt_epu8_mask(a.native(), b.native());
74	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
75	return _mm_comlt_epu8(a.native(), b.native());
76	#elif SIMDPP_USE_SSE2
77	a = bit_xor(a, `0x80`); // sub
78	b = bit_xor(b, `0x80`); // sub
79	return _mm_cmplt_epi8(a.native(), b.native());
80	#elif SIMDPP_USE_NEON
81	return vcltq_u8(a.native(), b.native());
82	#elif SIMDPP_USE_ALTIVEC
83	return vec_cmplt(a.native(), b.native());
84	#elif SIMDPP_USE_MSA
85	return (v16u8) __msa_clt_u_b(a.native(), b.native());
86	#endif
87	}
88
89	#if SIMDPP_USE_AVX2
90	static SIMDPP_INL
91	mask_int8x32 i_cmp_lt(const uint8x32& ca, const uint8x32& cb)
92	{
93	#if SIMDPP_USE_AVX512VL
94	return _mm256_cmplt_epu8_mask(ca.native(), cb.native());
95	#else
96	uint8<`32`> a = ca, b = cb;
97	a = bit_xor(a, `0x80`); // sub
98	b = bit_xor(b, `0x80`); // sub
99	return _mm256_cmpgt_epi8(b.native(), a.native());
100	#endif
101	}
102	#endif
103
104	#if SIMDPP_USE_AVX512BW
105	SIMDPP_INL mask_int8<`64`> i_cmp_lt(const uint8<`64`>& a, const uint8<`64`>& b)
106	{
107	return _mm512_cmplt_epu8_mask(a.native(), b.native());
108	}
109	#endif
110
111	// -----------------------------------------------------------------------------
112
113	static SIMDPP_INL
114	mask_int16x8 i_cmp_lt(const int16x8& a, const int16x8& b)
115	{
116	#if SIMDPP_USE_NULL
117	return detail::null::cmp_lt(a, b);
118	#elif SIMDPP_USE_AVX512VL
119	return _mm_cmplt_epi16_mask(a.native(), b.native());
120	#elif SIMDPP_USE_SSE2
121	return _mm_cmplt_epi16(a.native(), b.native());
122	#elif SIMDPP_USE_NEON
123	return vcltq_s16(a.native(), b.native());
124	#elif SIMDPP_USE_ALTIVEC
125	return vec_cmplt(a.native(), b.native());
126	#elif SIMDPP_USE_MSA
127	return (v8u16) __msa_clt_s_h(a.native(), b.native());
128	#endif
129	}
130
131	#if SIMDPP_USE_AVX2
132	static SIMDPP_INL
133	mask_int16x16 i_cmp_lt(const int16x16& a, const int16x16& b)
134	{
135	#if SIMDPP_USE_AVX512VL
136	return _mm256_cmplt_epi16_mask(a.native(), b.native());
137	#else
138	return _mm256_cmpgt_epi16(b.native(), a.native());
139	#endif
140	}
141	#endif
142
143	#if SIMDPP_USE_AVX512BW
144	SIMDPP_INL mask_int16<`32`> i_cmp_lt(const int16<`32`>& a, const int16<`32`>& b)
145	{
146	return _mm512_cmplt_epi16_mask(a.native(), b.native());
147	}
148	#endif
149
150	// -----------------------------------------------------------------------------
151
152	static SIMDPP_INL
153	mask_int16x8 i_cmp_lt(const uint16x8& ca, const uint16x8& cb)
154	{
155	uint16<`8`> a = ca, b = cb;
156	#if SIMDPP_USE_NULL
157	return detail::null::cmp_lt(a, b);
158	#elif SIMDPP_USE_AVX512VL
159	return _mm_cmplt_epu16_mask(a.native(), b.native());
160	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
161	return _mm_comlt_epu16(a.native(), b.native());
162	#elif SIMDPP_USE_SSE2
163	uint16x8 bias = make_uint(`0x8000`);
164	a = bit_xor(a, bias); // sub
165	b = bit_xor(b, bias); // sub
166	return _mm_cmplt_epi16(a.native(), b.native());
167	#elif SIMDPP_USE_NEON
168	return vcltq_u16(a.native(), b.native());
169	#elif SIMDPP_USE_ALTIVEC
170	return vec_cmplt(a.native(), b.native());
171	#elif SIMDPP_USE_MSA
172	return (v8u16) __msa_clt_u_h(a.native(), b.native());
173	#endif
174	}
175
176	#if SIMDPP_USE_AVX2
177	static SIMDPP_INL
178	mask_int16x16 i_cmp_lt(const uint16x16& ca, const uint16x16& cb)
179	{
180	#if SIMDPP_USE_AVX512VL
181	return _mm256_cmplt_epu16_mask(ca.native(), cb.native());
182	#else
183	uint16<`16`> a = ca, b = cb;
184	a = bit_xor(a, `0x8000`); // sub
185	b = bit_xor(b, `0x8000`); // sub
186	return _mm256_cmpgt_epi16(b.native(), a.native());
187	#endif
188	}
189	#endif
190
191	#if SIMDPP_USE_AVX512BW
192	SIMDPP_INL mask_int16<`32`> i_cmp_lt(const uint16<`32`>& a, const uint16<`32`>& b)
193	{
194	return _mm512_cmplt_epu16_mask(a.native(), b.native());
195	}
196	#endif
197
198	// -----------------------------------------------------------------------------
199
200	static SIMDPP_INL
201	mask_int32x4 i_cmp_lt(const int32x4& a, const int32x4& b)
202	{
203	#if SIMDPP_USE_NULL
204	return detail::null::cmp_lt(a, b);
205	#elif SIMDPP_USE_AVX512VL
206	return _mm_cmplt_epi32_mask(a.native(), b.native());
207	#elif SIMDPP_USE_SSE2
208	return _mm_cmplt_epi32(a.native(), b.native());
209	#elif SIMDPP_USE_NEON
210	return vcltq_s32(a.native(), b.native());
211	#elif SIMDPP_USE_ALTIVEC
212	return vec_cmplt(a.native(), b.native());
213	#elif SIMDPP_USE_MSA
214	return (v4u32) __msa_clt_s_w(a.native(), b.native());
215	#endif
216	}
217
218	#if SIMDPP_USE_AVX2
219	static SIMDPP_INL
220	mask_int32x8 i_cmp_lt(const int32x8& a, const int32x8& b)
221	{
222	#if SIMDPP_USE_AVX512VL
223	return _mm256_cmplt_epi32_mask(a.native(), b.native());
224	#else
225	return _mm256_cmpgt_epi32(b.native(), a.native());
226	#endif
227	}
228	#endif
229
230	#if SIMDPP_USE_AVX512F
231	static SIMDPP_INL
232	mask_int32<`16`> i_cmp_lt(const int32<`16`>& a, const int32<`16`>& b)
233	{
234	return _mm512_cmpgt_epi32_mask(b.native(), a.native());
235	}
236	#endif
237
238	// -----------------------------------------------------------------------------
239
240	static SIMDPP_INL
241	mask_int32x4 i_cmp_lt(const uint32x4& ca, const uint32x4& cb)
242	{
243	uint32<`4`> a = ca, b = cb;
244	#if SIMDPP_USE_NULL
245	return detail::null::cmp_lt(a, b);
246	#elif SIMDPP_USE_AVX512VL
247	return _mm_cmplt_epu32_mask(a.native(), b.native());
248	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
249	return _mm_comlt_epu32(a.native(), b.native());
250	#elif SIMDPP_USE_SSE2
251	a = bit_xor(a, `0x80000000`); // sub
252	b = bit_xor(b, `0x80000000`); // sub
253	return _mm_cmplt_epi32(a.native(), b.native());
254	#elif SIMDPP_USE_NEON
255	return vcltq_u32(a.native(), b.native());
256	#elif SIMDPP_USE_ALTIVEC
257	return vec_cmplt(a.native(), b.native());
258	#elif SIMDPP_USE_MSA
259	return (v4u32) __msa_clt_u_w(a.native(), b.native());
260	#endif
261	}
262
263	#if SIMDPP_USE_AVX2
264	static SIMDPP_INL
265	mask_int32x8 i_cmp_lt(const uint32x8& ca, const uint32x8& cb)
266	{
267	#if SIMDPP_USE_AVX512VL
268	return _mm256_cmplt_epu32_mask(ca.native(), cb.native());
269	#else
270	uint32<`8`> a = ca, b = cb;
271	a = bit_xor(a, `0x80000000`); // sub
272	b = bit_xor(b, `0x80000000`); // sub
273	return _mm256_cmpgt_epi32(b.native(), a.native());
274	#endif
275	}
276	#endif
277
278	#if SIMDPP_USE_AVX512F
279	static SIMDPP_INL
280	mask_int32<`16`> i_cmp_lt(const uint32<`16`>& a, const uint32<`16`>& b)
281	{
282	return _mm512_cmplt_epu32_mask(a.native(), b.native());
283	}
284	#endif
285
286	// -----------------------------------------------------------------------------
287
288	static SIMDPP_INL
289	mask_int64x2 i_cmp_lt(const int64x2& a, const int64x2& b)
290	{
291	#if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
292	return _mm_comlt_epi64(a.native(), b.native());
293	#elif SIMDPP_USE_AVX512VL
294	return _mm_cmplt_epi64_mask(a.native(), b.native());
295	#elif SIMDPP_USE_AVX2
296	return _mm_cmpgt_epi64(b.native(), a.native());
297	#elif SIMDPP_USE_NEON64
298	return vcltq_s64(a.native(), b.native());
299	#elif SIMDPP_USE_VSX_207
300	return (__vector uint64_t) vec_cmplt(a.native(), b.native());
301	#elif SIMDPP_USE_MSA
302	return (v2u64) __msa_clt_s_d(a.native(), b.native());
303	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
304	return detail::null::cmp_lt(a, b);
305	#else
306	return SIMDPP_NOT_IMPLEMENTED2(a, b);
307	#endif
308	}
309
310	#if SIMDPP_USE_AVX2
311	static SIMDPP_INL
312	mask_int64x4 i_cmp_lt(const int64x4& a, const int64x4& b)
313	{
314	#if SIMDPP_USE_AVX512VL
315	return _mm256_cmplt_epi64_mask(a.native(), b.native());
316	#else
317	return _mm256_cmpgt_epi64(b.native(), a.native());
318	#endif
319	}
320	#endif
321
322	#if SIMDPP_USE_AVX512F
323	static SIMDPP_INL
324	mask_int64<`8`> i_cmp_lt(const int64<`8`>& a, const int64<`8`>& b)
325	{
326	return _mm512_cmplt_epi64_mask(a.native(), b.native());
327	}
328	#endif
329
330	// -----------------------------------------------------------------------------
331
332	static SIMDPP_INL
333	mask_int64x2 i_cmp_lt(const uint64x2& a, const uint64x2& b)
334	{
335	#if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
336	return _mm_comlt_epu64(a.native(), b.native());
337	#elif SIMDPP_USE_AVX512VL
338	return _mm_cmplt_epu64_mask(a.native(), b.native());
339	#elif SIMDPP_USE_AVX2
340	uint64<`2`> ca = bit_xor(a, `0x8000000000000000`); // sub
341	uint64<`2`> cb = bit_xor(b, `0x8000000000000000`); // sub
342	return _mm_cmpgt_epi64(cb.native(), ca.native());
343	#elif SIMDPP_USE_NEON64
344	return vcltq_u64(a.native(), b.native());
345	#elif SIMDPP_USE_VSX_207
346	return (__vector uint64_t) vec_cmplt(a.native(), b.native());
347	#elif SIMDPP_USE_MSA
348	return (v2u64) __msa_clt_u_d(a.native(), b.native());
349	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
350	return detail::null::cmp_lt(a, b);
351	#else
352	return SIMDPP_NOT_IMPLEMENTED2(a, b);
353	#endif
354	}
355
356	#if SIMDPP_USE_AVX2
357	static SIMDPP_INL
358	mask_int64x4 i_cmp_lt(const uint64x4& ca, const uint64x4& cb)
359	{
360	#if SIMDPP_USE_AVX512VL
361	return _mm256_cmplt_epu64_mask(ca.native(), cb.native());
362	#else
363	uint64<`4`> a = ca, b = cb;
364	a = bit_xor(a, `0x8000000000000000`); // sub
365	b = bit_xor(b, `0x8000000000000000`); // sub
366	return _mm256_cmpgt_epi64(b.native(), a.native());
367	#endif
368	}
369	#endif
370
371	#if SIMDPP_USE_AVX512F
372	static SIMDPP_INL
373	mask_int64<`8`> i_cmp_lt(const uint64<`8`>& a, const uint64<`8`>& b)
374	{
375	return _mm512_cmplt_epu64_mask(a.native(), b.native());
376	}
377	#endif
378
379	// -----------------------------------------------------------------------------
380
381	static SIMDPP_INL
382	mask_float32x4 i_cmp_lt(const float32x4& a, const float32x4& b)
383	{
384	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
385	return detail::null::cmp_lt(a, b);
386	#elif SIMDPP_USE_AVX512VL
387	return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_LT_OQ);
388	#elif SIMDPP_USE_AVX
389	return _mm_cmp_ps(a.native(), b.native(), _CMP_LT_OQ);
390	#elif SIMDPP_USE_SSE2
391	return _mm_cmplt_ps(a.native(), b.native());
392	#elif SIMDPP_USE_NEON
393	return vreinterpretq_f32_u32(vcltq_f32(a.native(), b.native()));
394	#elif SIMDPP_USE_ALTIVEC
395	return vec_cmplt(a.native(), b.native());
396	#elif SIMDPP_USE_MSA
397	return (v4f32) __msa_fclt_w(a.native(), b.native());
398	#endif
399	}
400
401	#if SIMDPP_USE_AVX
402	static SIMDPP_INL
403	mask_float32x8 i_cmp_lt(const float32x8& a, const float32x8& b)
404	{
405	#if SIMDPP_USE_AVX512VL
406	return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_LT_OQ);
407	#else
408	return _mm256_cmp_ps(a.native(), b.native(), _CMP_LT_OQ);
409	#endif
410	}
411	#endif
412
413	#if SIMDPP_USE_AVX512F
414	static SIMDPP_INL
415	mask_float32<`16`> i_cmp_lt(const float32<`16`>& a, const float32<`16`>& b)
416	{
417	return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_LT_OQ);
418	}
419	#endif
420
421	// -----------------------------------------------------------------------------
422
423	static SIMDPP_INL
424	mask_float64x2 i_cmp_lt(const float64x2& a, const float64x2& b)
425	{
426	#if SIMDPP_USE_AVX512VL
427	return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_LT_OQ);
428	#elif SIMDPP_USE_AVX
429	return _mm_cmp_pd(a.native(), b.native(), _CMP_LT_OQ);
430	#elif SIMDPP_USE_SSE2
431	return _mm_cmplt_pd(a.native(), b.native());
432	#elif SIMDPP_USE_NEON64
433	return vreinterpretq_f64_u64(vcltq_f64(a.native(), b.native()));
434	#elif SIMDPP_USE_VSX_206
435	return (__vector double) vec_cmplt(a.native(), b.native());
436	#elif SIMDPP_USE_MSA
437	return (v2f64) __msa_fclt_d(a.native(), b.native());
438	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
439	return detail::null::cmp_lt(a, b);
440	#endif
441	}
442
443	#if SIMDPP_USE_AVX
444	static SIMDPP_INL
445	mask_float64x4 i_cmp_lt(const float64x4& a, const float64x4& b)
446	{
447	#if SIMDPP_USE_AVX512VL
448	return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_LT_OQ);
449	#else
450	return _mm256_cmp_pd(a.native(), b.native(), _CMP_LT_OQ);
451	#endif
452	}
453	#endif
454
455	#if SIMDPP_USE_AVX512F
456	static SIMDPP_INL
457	mask_float64<`8`> i_cmp_lt(const float64<`8`>& a, const float64<`8`>& b)
458	{
459	return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_LT_OQ);
460	}
461	#endif
462
463	// -----------------------------------------------------------------------------
464
465	template<class V> SIMDPP_INL
466	typename V::mask_vector_type i_cmp_lt(const V& a, const V& b)
467	{
468	SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_lt, a, b);
469	}
470
471	} // namespace insn
472	} // namespace detail
473	} // namespace SIMDPP_ARCH_NAMESPACE
474	} // namespace simdpp
475
476	#endif
477
478

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_lt.h