1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/bit_not.h>
17#include <simdpp/core/cmp_eq.h>
18#include <simdpp/detail/not_implemented.h>
19#include <simdpp/detail/null/compare.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27static SIMDPP_INL
28mask_int8x16 i_cmp_neq(const uint8x16& a, const uint8x16& b)
29{
30#if SIMDPP_USE_NULL
31 return detail::null::cmp_neq(a, b);
32#elif SIMDPP_USE_AVX512VL
33 return _mm_cmpneq_epi8_mask(a.native(), b.native());
34#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
35 return _mm_comneq_epi8(a.native(), b.native());
36#else
37 return bit_not(cmp_eq(a, b));
38#endif
39}
40
41#if SIMDPP_USE_AVX512VL
42static SIMDPP_INL
43mask_int8<16> i_cmp_neq(const mask_int8<16>& a, const mask_int8<16>& b)
44{
45 return _mm512_kxor(a.native(), b.native());
46}
47#endif
48
49#if SIMDPP_USE_AVX2
50static SIMDPP_INL
51mask_int8x32 i_cmp_neq(const uint8x32& a, const uint8x32& b)
52{
53#if SIMDPP_USE_AVX512VL
54 return _mm256_cmpneq_epi8_mask(a.native(), b.native());
55#else
56 return bit_not(cmp_eq(a, b));
57#endif
58}
59#endif
60
61#if SIMDPP_USE_AVX512VL
62static SIMDPP_INL
63mask_int8<32> i_cmp_neq(const mask_int8<32>& a, const mask_int8<32>& b)
64{
65 return _mm512_kxor(a.native(), b.native());
66}
67#endif
68
69#if SIMDPP_USE_AVX512BW
70SIMDPP_INL mask_int8<64> i_cmp_neq(const uint8<64>& a, const uint8<64>& b)
71{
72 return _mm512_cmpneq_epi8_mask(a.native(), b.native());
73}
74
75SIMDPP_INL mask_int8<64> i_cmp_neq(const mask_int8<64>& a, const mask_int8<64>& b)
76{
77 return _mm512_kxor(a.native(), b.native());
78}
79#endif
80
81// -----------------------------------------------------------------------------
82
83static SIMDPP_INL
84mask_int16x8 i_cmp_neq(const uint16x8& a, const uint16x8& b)
85{
86#if SIMDPP_USE_NULL
87 return detail::null::cmp_neq(a, b);
88#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
89 return _mm_comneq_epi16(a.native(), b.native());
90#else
91 return bit_not(cmp_eq(a, b));
92#endif
93}
94
95#if SIMDPP_USE_AVX512VL
96static SIMDPP_INL
97mask_int16<8> i_cmp_neq(const mask_int16<8>& a, const mask_int16<8>& b)
98{
99 return _mm512_kxor(a.native(), b.native());
100}
101#endif
102
103#if SIMDPP_USE_AVX2
104static SIMDPP_INL
105mask_int16x16 i_cmp_neq(const uint16x16& a, const uint16x16& b)
106{
107 return bit_not(cmp_eq(a, b));
108}
109#endif
110
111#if SIMDPP_USE_AVX512VL
112static SIMDPP_INL
113mask_int16<16> i_cmp_neq(const mask_int16<16>& a, const mask_int16<16>& b)
114{
115 return _mm512_kxor(a.native(), b.native());
116}
117#endif
118
119
120#if SIMDPP_USE_AVX512BW
121SIMDPP_INL mask_int16<32> i_cmp_neq(const uint16<32>& a, const uint16<32>& b)
122{
123 return _mm512_cmpneq_epi16_mask(a.native(), b.native());
124}
125
126SIMDPP_INL mask_int16<32> i_cmp_neq(const mask_int16<32>& a, const mask_int16<32>& b)
127{
128 return _mm512_kxor(a.native(), b.native());
129}
130#endif
131
132// -----------------------------------------------------------------------------
133
134static SIMDPP_INL
135mask_int32x4 i_cmp_neq(const uint32x4& a, const uint32x4& b)
136{
137#if SIMDPP_USE_NULL
138 return detail::null::cmp_neq(a, b);
139#elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
140 return _mm_comneq_epi32(a.native(), b.native());
141#else
142 return bit_not(cmp_eq(a, b));
143#endif
144}
145
146#if SIMDPP_USE_AVX512VL
147static SIMDPP_INL
148mask_int32<4> i_cmp_neq(const mask_int32<4>& a, const mask_int32<4>& b)
149{
150 return _mm512_kxor(a.native(), b.native());
151}
152#endif
153
154#if SIMDPP_USE_AVX2
155static SIMDPP_INL
156mask_int32x8 i_cmp_neq(const uint32x8& a, const uint32x8& b)
157{
158 return bit_not(cmp_eq(a, b));
159}
160#endif
161
162#if SIMDPP_USE_AVX512VL
163static SIMDPP_INL
164mask_int32<8> i_cmp_neq(const mask_int32<8>& a, const mask_int32<8>& b)
165{
166 return _mm512_kxor(a.native(), b.native());
167}
168#endif
169
170#if SIMDPP_USE_AVX512F
171static SIMDPP_INL
172mask_int32<16> i_cmp_neq(const uint32<16>& a, const uint32<16>& b)
173{
174 return _mm512_cmpneq_epu32_mask(a.native(), b.native());
175}
176
177static SIMDPP_INL
178mask_int32<16> i_cmp_neq(const mask_int32<16>& a, const mask_int32<16>& b)
179{
180 return _mm512_kxor(a.native(), b.native());
181}
182#endif
183
184// -----------------------------------------------------------------------------
185
186static SIMDPP_INL
187mask_int64x2 i_cmp_neq(const uint64x2& a, const uint64x2& b)
188{
189#if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
190 return _mm_comneq_epi64(a.native(), b.native());
191#elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
192 return bit_not(cmp_eq(a, b));
193#elif SIMDPP_USE_SSE2
194 uint64x2 r32, r32s;
195 r32 = (uint32x4)cmp_eq(uint32x4(a), uint32x4(b));
196 // swap the 32-bit halves
197 r32s = bit_or(shift_l<32>(r32), shift_r<32>(r32));
198 // combine the results. Each 32-bit half is ORed with the neighbouring pair
199 r32 = bit_or(r32, r32s);
200 return r32;
201#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
202 return detail::null::cmp_neq(a, b);
203#endif
204}
205
206#if SIMDPP_USE_AVX512VL
207static SIMDPP_INL
208mask_int64<2> i_cmp_neq(const mask_int64<2>& a, const mask_int64<2>& b)
209{
210 return _mm512_kxor(a.native(), b.native());
211}
212#endif
213
214#if SIMDPP_USE_AVX2
215static SIMDPP_INL
216mask_int64x4 i_cmp_neq(const uint64x4& a, const uint64x4& b)
217{
218 return bit_not(cmp_eq(a, b));
219}
220#endif
221
222#if SIMDPP_USE_AVX512VL
223static SIMDPP_INL
224mask_int64<4> i_cmp_neq(const mask_int64<4>& a, const mask_int64<4>& b)
225{
226 return _mm512_kxor(a.native(), b.native());
227}
228#endif
229
230#if SIMDPP_USE_AVX512F
231static SIMDPP_INL
232mask_int64<8> i_cmp_neq(const uint64<8>& a, const uint64<8>& b)
233{
234 return _mm512_cmpneq_epi64_mask(a.native(), b.native());
235}
236
237static SIMDPP_INL
238mask_int64<8> i_cmp_neq(const mask_int64<8>& a, const mask_int64<8>& b)
239{
240 return _mm512_kxor(a.native(), b.native());
241}
242#endif
243
244// -----------------------------------------------------------------------------
245
246static SIMDPP_INL
247mask_float32x4 i_cmp_neq(const float32x4& a, const float32x4& b)
248{
249#if SIMDPP_USE_NULL
250 return detail::null::cmp_neq(a, b);
251#elif SIMDPP_USE_AVX512VL
252 return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
253#elif SIMDPP_USE_AVX
254 return _mm_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
255#elif SIMDPP_USE_SSE2
256 return _mm_cmpneq_ps(a.native(), b.native());
257#elif SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
258 return bit_not(cmp_eq(a, b));
259#elif SIMDPP_USE_MSA
260 return (v4f32) __msa_fcune_w(a.native(), b.native());
261#endif
262}
263
264#if SIMDPP_USE_AVX512VL
265static SIMDPP_INL
266mask_float32<4> i_cmp_neq(const mask_float32<4>& a, const mask_float32<4>& b)
267{
268 return _mm512_kxor(a.native(), b.native());
269}
270#endif
271
272#if SIMDPP_USE_AVX
273static SIMDPP_INL
274mask_float32x8 i_cmp_neq(const float32x8& a, const float32x8& b)
275{
276#if SIMDPP_USE_AVX512VL
277 return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
278#else
279 return _mm256_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
280#endif
281}
282#endif
283
284#if SIMDPP_USE_AVX512VL
285static SIMDPP_INL
286mask_float32<8> i_cmp_neq(const mask_float32<8>& a, const mask_float32<8>& b)
287{
288 return _mm512_kxor(a.native(), b.native());
289}
290#endif
291
292#if SIMDPP_USE_AVX512F
293static SIMDPP_INL
294mask_float32<16> i_cmp_neq(const float32<16>& a, const float32<16>& b)
295{
296 return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
297}
298
299static SIMDPP_INL
300mask_float32<16> i_cmp_neq(const mask_float32<16>& a, const mask_float32<16>& b)
301{
302 return _mm512_kxor(a.native(), b.native());
303}
304#endif
305
306// -----------------------------------------------------------------------------
307
308static SIMDPP_INL
309mask_float64x2 i_cmp_neq(const float64x2& a, const float64x2& b)
310{
311#if SIMDPP_USE_AVX512VL
312 return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
313#elif SIMDPP_USE_AVX
314 return _mm_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
315#elif SIMDPP_USE_SSE2
316 return _mm_cmpneq_pd(a.native(), b.native());
317#elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206
318 return bit_not(cmp_eq(a, b));
319#elif SIMDPP_USE_MSA
320 return (v2f64) __msa_fcune_d(a.native(), b.native());
321#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
322 return detail::null::cmp_neq(a, b);
323#else
324 return SIMDPP_NOT_IMPLEMENTED2(a, b);
325#endif
326}
327
328#if SIMDPP_USE_AVX512VL
329static SIMDPP_INL
330mask_float64<2> i_cmp_neq(const mask_float64<2>& a, const mask_float64<2>& b)
331{
332 return _mm512_kxor(a.native(), b.native());
333}
334#endif
335
336#if SIMDPP_USE_AVX
337static SIMDPP_INL
338mask_float64x4 i_cmp_neq(const float64x4& a, const float64x4& b)
339{
340#if SIMDPP_USE_AVX512VL
341 return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
342#else
343 return _mm256_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
344#endif
345}
346#endif
347
348#if SIMDPP_USE_AVX512VL
349static SIMDPP_INL
350mask_float64<4> i_cmp_neq(const mask_float64<4>& a, const mask_float64<4>& b)
351{
352 return _mm512_kxor(a.native(), b.native());
353}
354#endif
355
356#if SIMDPP_USE_AVX512F
357static SIMDPP_INL
358mask_float64<8> i_cmp_neq(const float64<8>& a, const float64<8>& b)
359{
360 return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
361}
362
363static SIMDPP_INL
364mask_float64<8> i_cmp_neq(const mask_float64<8>& a, const mask_float64<8>& b)
365{
366 return _mm512_kxor(a.native(), b.native());
367}
368#endif
369
370// -----------------------------------------------------------------------------
371
372template<class V> SIMDPP_INL
373typename V::mask_vector_type i_cmp_neq(const V& a, const V& b)
374{
375 SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_neq, a, b);
376}
377
378} // namespace insn
379} // namespace detail
380} // namespace SIMDPP_ARCH_NAMESPACE
381} // namespace simdpp
382
383#endif
384
385