cmp_neq.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_neq.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_not.h>
17	#include <simdpp/core/cmp_eq.h>
18	#include <simdpp/detail/not_implemented.h>
19	#include <simdpp/detail/null/compare.h>
20	#include <simdpp/detail/vector_array_macros.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	static SIMDPP_INL
28	mask_int8x16 i_cmp_neq(const uint8x16& a, const uint8x16& b)
29	{
30	#if SIMDPP_USE_NULL
31	return detail::null::cmp_neq(a, b);
32	#elif SIMDPP_USE_AVX512VL
33	return _mm_cmpneq_epi8_mask(a.native(), b.native());
34	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
35	return _mm_comneq_epi8(a.native(), b.native());
36	#else
37	return bit_not(cmp_eq(a, b));
38	#endif
39	}
40
41	#if SIMDPP_USE_AVX512VL
42	static SIMDPP_INL
43	mask_int8<`16`> i_cmp_neq(const mask_int8<`16`>& a, const mask_int8<`16`>& b)
44	{
45	return _mm512_kxor(a.native(), b.native());
46	}
47	#endif
48
49	#if SIMDPP_USE_AVX2
50	static SIMDPP_INL
51	mask_int8x32 i_cmp_neq(const uint8x32& a, const uint8x32& b)
52	{
53	#if SIMDPP_USE_AVX512VL
54	return _mm256_cmpneq_epi8_mask(a.native(), b.native());
55	#else
56	return bit_not(cmp_eq(a, b));
57	#endif
58	}
59	#endif
60
61	#if SIMDPP_USE_AVX512VL
62	static SIMDPP_INL
63	mask_int8<`32`> i_cmp_neq(const mask_int8<`32`>& a, const mask_int8<`32`>& b)
64	{
65	return _mm512_kxor(a.native(), b.native());
66	}
67	#endif
68
69	#if SIMDPP_USE_AVX512BW
70	SIMDPP_INL mask_int8<`64`> i_cmp_neq(const uint8<`64`>& a, const uint8<`64`>& b)
71	{
72	return _mm512_cmpneq_epi8_mask(a.native(), b.native());
73	}
74
75	SIMDPP_INL mask_int8<`64`> i_cmp_neq(const mask_int8<`64`>& a, const mask_int8<`64`>& b)
76	{
77	return _mm512_kxor(a.native(), b.native());
78	}
79	#endif
80
81	// -----------------------------------------------------------------------------
82
83	static SIMDPP_INL
84	mask_int16x8 i_cmp_neq(const uint16x8& a, const uint16x8& b)
85	{
86	#if SIMDPP_USE_NULL
87	return detail::null::cmp_neq(a, b);
88	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
89	return _mm_comneq_epi16(a.native(), b.native());
90	#else
91	return bit_not(cmp_eq(a, b));
92	#endif
93	}
94
95	#if SIMDPP_USE_AVX512VL
96	static SIMDPP_INL
97	mask_int16<`8`> i_cmp_neq(const mask_int16<`8`>& a, const mask_int16<`8`>& b)
98	{
99	return _mm512_kxor(a.native(), b.native());
100	}
101	#endif
102
103	#if SIMDPP_USE_AVX2
104	static SIMDPP_INL
105	mask_int16x16 i_cmp_neq(const uint16x16& a, const uint16x16& b)
106	{
107	return bit_not(cmp_eq(a, b));
108	}
109	#endif
110
111	#if SIMDPP_USE_AVX512VL
112	static SIMDPP_INL
113	mask_int16<`16`> i_cmp_neq(const mask_int16<`16`>& a, const mask_int16<`16`>& b)
114	{
115	return _mm512_kxor(a.native(), b.native());
116	}
117	#endif
118
119
120	#if SIMDPP_USE_AVX512BW
121	SIMDPP_INL mask_int16<`32`> i_cmp_neq(const uint16<`32`>& a, const uint16<`32`>& b)
122	{
123	return _mm512_cmpneq_epi16_mask(a.native(), b.native());
124	}
125
126	SIMDPP_INL mask_int16<`32`> i_cmp_neq(const mask_int16<`32`>& a, const mask_int16<`32`>& b)
127	{
128	return _mm512_kxor(a.native(), b.native());
129	}
130	#endif
131
132	// -----------------------------------------------------------------------------
133
134	static SIMDPP_INL
135	mask_int32x4 i_cmp_neq(const uint32x4& a, const uint32x4& b)
136	{
137	#if SIMDPP_USE_NULL
138	return detail::null::cmp_neq(a, b);
139	#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
140	return _mm_comneq_epi32(a.native(), b.native());
141	#else
142	return bit_not(cmp_eq(a, b));
143	#endif
144	}
145
146	#if SIMDPP_USE_AVX512VL
147	static SIMDPP_INL
148	mask_int32<`4`> i_cmp_neq(const mask_int32<`4`>& a, const mask_int32<`4`>& b)
149	{
150	return _mm512_kxor(a.native(), b.native());
151	}
152	#endif
153
154	#if SIMDPP_USE_AVX2
155	static SIMDPP_INL
156	mask_int32x8 i_cmp_neq(const uint32x8& a, const uint32x8& b)
157	{
158	return bit_not(cmp_eq(a, b));
159	}
160	#endif
161
162	#if SIMDPP_USE_AVX512VL
163	static SIMDPP_INL
164	mask_int32<`8`> i_cmp_neq(const mask_int32<`8`>& a, const mask_int32<`8`>& b)
165	{
166	return _mm512_kxor(a.native(), b.native());
167	}
168	#endif
169
170	#if SIMDPP_USE_AVX512F
171	static SIMDPP_INL
172	mask_int32<`16`> i_cmp_neq(const uint32<`16`>& a, const uint32<`16`>& b)
173	{
174	return _mm512_cmpneq_epu32_mask(a.native(), b.native());
175	}
176
177	static SIMDPP_INL
178	mask_int32<`16`> i_cmp_neq(const mask_int32<`16`>& a, const mask_int32<`16`>& b)
179	{
180	return _mm512_kxor(a.native(), b.native());
181	}
182	#endif
183
184	// -----------------------------------------------------------------------------
185
186	static SIMDPP_INL
187	mask_int64x2 i_cmp_neq(const uint64x2& a, const uint64x2& b)
188	{
189	#if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
190	return _mm_comneq_epi64(a.native(), b.native());
191	#elif SIMDPP_USE_SSE4_1 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
192	return bit_not(cmp_eq(a, b));
193	#elif SIMDPP_USE_SSE2
194	uint64x2 r32, r32s;
195	r32 = (uint32x4)cmp_eq(uint32x4(a), uint32x4(b));
196	// swap the 32-bit halves
197	r32s = bit_or(shift_l<`32`>(r32), shift_r<`32`>(r32));
198	// combine the results. Each 32-bit half is ORed with the neighbouring pair
199	r32 = bit_or(r32, r32s);
200	return r32;
201	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
202	return detail::null::cmp_neq(a, b);
203	#endif
204	}
205
206	#if SIMDPP_USE_AVX512VL
207	static SIMDPP_INL
208	mask_int64<`2`> i_cmp_neq(const mask_int64<`2`>& a, const mask_int64<`2`>& b)
209	{
210	return _mm512_kxor(a.native(), b.native());
211	}
212	#endif
213
214	#if SIMDPP_USE_AVX2
215	static SIMDPP_INL
216	mask_int64x4 i_cmp_neq(const uint64x4& a, const uint64x4& b)
217	{
218	return bit_not(cmp_eq(a, b));
219	}
220	#endif
221
222	#if SIMDPP_USE_AVX512VL
223	static SIMDPP_INL
224	mask_int64<`4`> i_cmp_neq(const mask_int64<`4`>& a, const mask_int64<`4`>& b)
225	{
226	return _mm512_kxor(a.native(), b.native());
227	}
228	#endif
229
230	#if SIMDPP_USE_AVX512F
231	static SIMDPP_INL
232	mask_int64<`8`> i_cmp_neq(const uint64<`8`>& a, const uint64<`8`>& b)
233	{
234	return _mm512_cmpneq_epi64_mask(a.native(), b.native());
235	}
236
237	static SIMDPP_INL
238	mask_int64<`8`> i_cmp_neq(const mask_int64<`8`>& a, const mask_int64<`8`>& b)
239	{
240	return _mm512_kxor(a.native(), b.native());
241	}
242	#endif
243
244	// -----------------------------------------------------------------------------
245
246	static SIMDPP_INL
247	mask_float32x4 i_cmp_neq(const float32x4& a, const float32x4& b)
248	{
249	#if SIMDPP_USE_NULL
250	return detail::null::cmp_neq(a, b);
251	#elif SIMDPP_USE_AVX512VL
252	return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
253	#elif SIMDPP_USE_AVX
254	return _mm_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
255	#elif SIMDPP_USE_SSE2
256	return _mm_cmpneq_ps(a.native(), b.native());
257	#elif SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
258	return bit_not(cmp_eq(a, b));
259	#elif SIMDPP_USE_MSA
260	return (v4f32) __msa_fcune_w(a.native(), b.native());
261	#endif
262	}
263
264	#if SIMDPP_USE_AVX512VL
265	static SIMDPP_INL
266	mask_float32<`4`> i_cmp_neq(const mask_float32<`4`>& a, const mask_float32<`4`>& b)
267	{
268	return _mm512_kxor(a.native(), b.native());
269	}
270	#endif
271
272	#if SIMDPP_USE_AVX
273	static SIMDPP_INL
274	mask_float32x8 i_cmp_neq(const float32x8& a, const float32x8& b)
275	{
276	#if SIMDPP_USE_AVX512VL
277	return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
278	#else
279	return _mm256_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
280	#endif
281	}
282	#endif
283
284	#if SIMDPP_USE_AVX512VL
285	static SIMDPP_INL
286	mask_float32<`8`> i_cmp_neq(const mask_float32<`8`>& a, const mask_float32<`8`>& b)
287	{
288	return _mm512_kxor(a.native(), b.native());
289	}
290	#endif
291
292	#if SIMDPP_USE_AVX512F
293	static SIMDPP_INL
294	mask_float32<`16`> i_cmp_neq(const float32<`16`>& a, const float32<`16`>& b)
295	{
296	return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
297	}
298
299	static SIMDPP_INL
300	mask_float32<`16`> i_cmp_neq(const mask_float32<`16`>& a, const mask_float32<`16`>& b)
301	{
302	return _mm512_kxor(a.native(), b.native());
303	}
304	#endif
305
306	// -----------------------------------------------------------------------------
307
308	static SIMDPP_INL
309	mask_float64x2 i_cmp_neq(const float64x2& a, const float64x2& b)
310	{
311	#if SIMDPP_USE_AVX512VL
312	return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
313	#elif SIMDPP_USE_AVX
314	return _mm_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
315	#elif SIMDPP_USE_SSE2
316	return _mm_cmpneq_pd(a.native(), b.native());
317	#elif SIMDPP_USE_NEON64 \|\| SIMDPP_USE_VSX_206
318	return bit_not(cmp_eq(a, b));
319	#elif SIMDPP_USE_MSA
320	return (v2f64) __msa_fcune_d(a.native(), b.native());
321	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
322	return detail::null::cmp_neq(a, b);
323	#else
324	return SIMDPP_NOT_IMPLEMENTED2(a, b);
325	#endif
326	}
327
328	#if SIMDPP_USE_AVX512VL
329	static SIMDPP_INL
330	mask_float64<`2`> i_cmp_neq(const mask_float64<`2`>& a, const mask_float64<`2`>& b)
331	{
332	return _mm512_kxor(a.native(), b.native());
333	}
334	#endif
335
336	#if SIMDPP_USE_AVX
337	static SIMDPP_INL
338	mask_float64x4 i_cmp_neq(const float64x4& a, const float64x4& b)
339	{
340	#if SIMDPP_USE_AVX512VL
341	return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
342	#else
343	return _mm256_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
344	#endif
345	}
346	#endif
347
348	#if SIMDPP_USE_AVX512VL
349	static SIMDPP_INL
350	mask_float64<`4`> i_cmp_neq(const mask_float64<`4`>& a, const mask_float64<`4`>& b)
351	{
352	return _mm512_kxor(a.native(), b.native());
353	}
354	#endif
355
356	#if SIMDPP_USE_AVX512F
357	static SIMDPP_INL
358	mask_float64<`8`> i_cmp_neq(const float64<`8`>& a, const float64<`8`>& b)
359	{
360	return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
361	}
362
363	static SIMDPP_INL
364	mask_float64<`8`> i_cmp_neq(const mask_float64<`8`>& a, const mask_float64<`8`>& b)
365	{
366	return _mm512_kxor(a.native(), b.native());
367	}
368	#endif
369
370	// -----------------------------------------------------------------------------
371
372	template<class V> SIMDPP_INL
373	typename V::mask_vector_type i_cmp_neq(const V& a, const V& b)
374	{
375	SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_neq, a, b);
376	}
377
378	} // namespace insn
379	} // namespace detail
380	} // namespace SIMDPP_ARCH_NAMESPACE
381	} // namespace simdpp
382
383	#endif
384
385

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_neq.h