1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/to_mask.h>
17#include <simdpp/detail/null/bitwise.h>
18#include <simdpp/detail/vector_array_macros.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25// -----------------------------------------------------------------------------
26// uint8, uint8
27static SIMDPP_INL
28uint8x16 i_bit_xor(const uint8x16& a, const uint8x16& b)
29{
30#if SIMDPP_USE_NULL
31 return detail::null::bit_xor(a, uint8x16(b));
32#elif SIMDPP_USE_SSE2
33 return _mm_xor_si128(a.native(), b.native());
34#elif SIMDPP_USE_NEON
35 return veorq_u8(a.native(), b.native());
36#elif SIMDPP_USE_ALTIVEC
37 return vec_xor(a.native(), b.native());
38#elif SIMDPP_USE_MSA
39 return __msa_xor_v(a.native(), b.native());
40#endif
41}
42
43#if SIMDPP_USE_AVX2
44static SIMDPP_INL
45uint8x32 i_bit_xor(const uint8x32& a, const uint8x32& b)
46{
47 return _mm256_xor_si256(a.native(), b.native());
48}
49#endif
50
51#if SIMDPP_USE_AVX512BW
52SIMDPP_INL uint8<64> i_bit_xor(const uint8<64>& a, const uint8<64>& b)
53{
54 return _mm512_xor_si512(a.native(), b.native());
55}
56#endif
57
58// -----------------------------------------------------------------------------
59// mask_int8, mask_int8
60static SIMDPP_INL
61mask_int8x16 i_bit_xor(const mask_int8x16& a, const mask_int8x16& b)
62{
63#if SIMDPP_USE_NULL
64 return detail::null::bit_xor_mm(a, b);
65#elif SIMDPP_USE_AVX512VL
66 return a.native() ^ b.native();
67#else
68 return to_mask(i_bit_xor(uint8x16(a), uint8x16(b)));
69#endif
70}
71
72#if SIMDPP_USE_AVX2
73static SIMDPP_INL
74mask_int8x32 i_bit_xor(const mask_int8x32& a, const mask_int8x32& b)
75{
76#if SIMDPP_USE_AVX512VL
77 return a.native() ^ b.native();
78#else
79 return _mm256_xor_si256(a.native(), b.native());
80#endif
81}
82#endif
83
84#if SIMDPP_USE_AVX512BW
85SIMDPP_INL mask_int8<64> i_bit_xor(const mask_int8<64>& a, const mask_int8<64>& b)
86{
87 return a.native() ^ b.native();
88}
89#endif
90
91// -----------------------------------------------------------------------------
92// uint16, uint16
93static SIMDPP_INL
94uint16<8> i_bit_xor(const uint16<8>& a, const uint16<8>& b)
95{
96 return (uint16<8>) i_bit_xor(uint8<16>(a), uint8<16>(b));
97}
98
99#if SIMDPP_USE_AVX2
100static SIMDPP_INL
101uint16<16> i_bit_xor(const uint16<16>& a, const uint16<16>& b)
102{
103 return _mm256_xor_si256(a.native(), b.native());
104}
105#endif
106
107#if SIMDPP_USE_AVX512BW
108SIMDPP_INL uint16<32> i_bit_xor(const uint16<32>& a, const uint16<32>& b)
109{
110 return _mm512_xor_si512(a.native(), b.native());
111}
112#endif
113
114// -----------------------------------------------------------------------------
115// mask_int16, mask_int16
116static SIMDPP_INL
117mask_int16<8> i_bit_xor(const mask_int16<8>& a, const mask_int16<8>& b)
118{
119#if SIMDPP_USE_NULL
120 return detail::null::bit_xor_mm(a, b);
121#elif SIMDPP_USE_AVX512VL
122 return a.native() ^ b.native();
123#else
124 return to_mask((uint16<8>) i_bit_xor(uint8<16>(a), uint8<16>(b)));
125#endif
126}
127
128#if SIMDPP_USE_AVX2
129static SIMDPP_INL
130mask_int16<16> i_bit_xor(const mask_int16<16>& a, const mask_int16<16>& b)
131{
132#if SIMDPP_USE_AVX512VL
133 return a.native() ^ b.native();
134#else
135 return to_mask((uint16<16>) i_bit_xor(uint16<16>(a), uint16<16>(b)));
136#endif
137}
138#endif
139
140#if SIMDPP_USE_AVX512BW
141SIMDPP_INL mask_int16<32> i_bit_xor(const mask_int16<32>& a, const mask_int16<32>& b)
142{
143 return a.native() ^ b.native();
144}
145#endif
146
147// -----------------------------------------------------------------------------
148// uint32, uint32
149static SIMDPP_INL
150uint32<4> i_bit_xor(const uint32<4>& a, const uint32<4>& b)
151{
152 return (uint32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b));
153}
154
155#if SIMDPP_USE_AVX2
156static SIMDPP_INL
157uint32<8> i_bit_xor(const uint32<8>& a, const uint32<8>& b)
158{
159 return _mm256_xor_si256(a.native(), b.native());
160}
161#endif
162
163#if SIMDPP_USE_AVX512F
164static SIMDPP_INL
165uint32<16> i_bit_xor(const uint32<16>& a, const uint32<16>& b)
166{
167 return _mm512_xor_epi32(a.native(), b.native());
168}
169#endif
170
171// -----------------------------------------------------------------------------
172// mask_int32, mask_int32
173static SIMDPP_INL
174mask_int32<4> i_bit_xor(const mask_int32<4>& a, const mask_int32<4>& b)
175{
176#if SIMDPP_USE_NULL
177 return detail::null::bit_xor_mm(a, b);
178#elif SIMDPP_USE_AVX512VL
179 return a.native() ^ b.native();
180#else
181 return to_mask((uint32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b)));
182#endif
183}
184
185#if SIMDPP_USE_AVX2
186static SIMDPP_INL
187mask_int32<8> i_bit_xor(const mask_int32<8>& a, const mask_int32<8>& b)
188{
189#if SIMDPP_USE_AVX512VL
190 return a.native() ^ b.native();
191#else
192 return to_mask((uint32<8>) i_bit_xor(uint32<8>(a), uint32<8>(b)));
193#endif
194}
195#endif
196
197#if SIMDPP_USE_AVX512F
198static SIMDPP_INL
199mask_int32<16> i_bit_xor(const mask_int32<16>& a, const mask_int32<16>& b)
200{
201 return _mm512_kxor(a.native(), b.native());
202}
203#endif
204
205// -----------------------------------------------------------------------------
206// uint64, uint64
207static SIMDPP_INL
208uint64<2> i_bit_xor(const uint64<2>& a, const uint64<2>& b)
209{
210#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
211 return detail::null::bit_xor(a, b);
212#else
213 return (uint64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b));
214#endif
215}
216
217#if SIMDPP_USE_AVX2
218static SIMDPP_INL
219uint64<4> i_bit_xor(const uint64<4>& a, const uint64<4>& b)
220{
221 return _mm256_xor_si256(a.native(), b.native());
222}
223#endif
224
225#if SIMDPP_USE_AVX512F
226static SIMDPP_INL
227uint64<8> i_bit_xor(const uint64<8>& a, const uint64<8>& b)
228{
229 return _mm512_xor_epi64(a.native(), b.native());
230}
231#endif
232
233// -----------------------------------------------------------------------------
234// mask_int64, mask_int64
235static SIMDPP_INL
236mask_int64<2> i_bit_xor(const mask_int64<2>& a, const mask_int64<2>& b)
237{
238#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239 return detail::null::bit_xor_mm(a, b);
240#elif SIMDPP_USE_AVX512VL
241 return a.native() ^ b.native();
242#else
243 return to_mask((uint64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b)));
244#endif
245}
246
247#if SIMDPP_USE_AVX2
248static SIMDPP_INL
249mask_int64<4> i_bit_xor(const mask_int64<4>& a, const mask_int64<4>& b)
250{
251#if SIMDPP_USE_AVX512VL
252 return a.native() ^ b.native();
253#else
254 return to_mask((uint64<4>) i_bit_xor(uint64<4>(a), uint64<4>(b)));
255#endif
256}
257#endif
258
259#if SIMDPP_USE_AVX512F
260static SIMDPP_INL
261mask_int64<8> i_bit_xor(const mask_int64<8>& a, const mask_int64<8>& b)
262{
263 return _mm512_kxor(a.native(), b.native());
264}
265#endif
266
267// -----------------------------------------------------------------------------
268
269static SIMDPP_INL
270float32x4 i_bit_xor(const float32x4& a, const float32x4& b)
271{
272#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
273 return detail::null::bit_xor(a, b);
274#elif SIMDPP_USE_SSE2
275 return _mm_xor_ps(a.native(), b.native());
276#elif SIMDPP_USE_NEON
277 return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(a.native()),
278 vreinterpretq_s32_f32(b.native())));
279#elif SIMDPP_USE_MSA
280 return (float32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b));
281#elif SIMDPP_USE_ALTIVEC
282 return vec_xor(a.native(), b.native());
283#endif
284}
285
286#if SIMDPP_USE_AVX
287static SIMDPP_INL
288float32x8 i_bit_xor(const float32x8& a, const float32x8& b)
289{
290 return _mm256_xor_ps(a.native(), b.native());
291}
292#endif
293
294#if SIMDPP_USE_AVX512F
295static SIMDPP_INL
296float32<16> i_bit_xor(const float32<16>& a, const float32<16>& b)
297{
298#if SIMDPP_USE_AVX512DQ
299 return _mm512_xor_ps(a.native(), b.native());
300#else
301 return (float32<16>) i_bit_xor(uint32<16>(a), uint32<16>(b));
302#endif
303}
304#endif
305
306// -----------------------------------------------------------------------------
307// mask_float32, mask_float32
308
309static SIMDPP_INL
310mask_float32x4 i_bit_xor(const mask_float32x4& a, const mask_float32x4& b)
311{
312#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
313 return detail::null::bit_xor_mm(a, b);
314#elif SIMDPP_USE_AVX512VL
315 return a.native() ^ b.native();
316#else
317 return to_mask(i_bit_xor(float32x4(a), float32x4(b)));
318#endif
319}
320
321#if SIMDPP_USE_AVX
322static SIMDPP_INL
323mask_float32x8 i_bit_xor(const mask_float32x8& a, const mask_float32x8& b)
324{
325#if SIMDPP_USE_AVX512VL
326 return a.native() ^ b.native();
327#else
328 return to_mask(i_bit_xor(float32x8(a), float32x8(b)));
329#endif
330}
331#endif
332
333#if SIMDPP_USE_AVX512F
334static SIMDPP_INL
335mask_float32<16> i_bit_xor(const mask_float32<16>& a, const mask_float32<16>& b)
336{
337 return _mm512_kxor(a.native(), b.native());
338}
339#endif
340
341// -----------------------------------------------------------------------------
342// float64, float64
343
344static SIMDPP_INL
345float64x2 i_bit_xor(const float64x2& a, const float64x2& b)
346{
347#if SIMDPP_USE_SSE2
348 return _mm_xor_pd(a.native(), b.native());
349#elif SIMDPP_USE_NEON64
350 return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.native()),
351 vreinterpretq_u64_f64(b.native())));
352#elif SIMDPP_USE_VSX_206
353 return vec_xor(a.native(), b.native());
354#elif SIMDPP_USE_MSA
355 return (float64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b));
356#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
357 return detail::null::bit_xor(a, b);
358#endif
359}
360
361#if SIMDPP_USE_AVX
362static SIMDPP_INL
363float64x4 i_bit_xor(const float64x4& a, const float64x4& b)
364{
365 return _mm256_xor_pd(a.native(), b.native());
366}
367#endif
368
369#if SIMDPP_USE_AVX512F
370static SIMDPP_INL
371float64<8> i_bit_xor(const float64<8>& a, const float64<8>& b)
372{
373#if SIMDPP_USE_AVX512DQ
374 return _mm512_xor_pd(a.native(), b.native());
375#else
376 return (float64<8>) i_bit_xor(uint64<8>(a), uint64<8>(b));
377#endif
378}
379#endif
380
381// -----------------------------------------------------------------------------
382// mask_float64, mask_float64
383
384static SIMDPP_INL
385mask_float64x2 i_bit_xor(const mask_float64x2& a, const mask_float64x2& b)
386{
387#if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
388 return detail::null::bit_xor_mm(a, b);
389#elif SIMDPP_USE_AVX512VL
390 return a.native() ^ b.native();
391#else
392 return to_mask(i_bit_xor(float64x2(a), float64x2(b)));
393#endif
394}
395
396#if SIMDPP_USE_AVX
397static SIMDPP_INL
398mask_float64x4 i_bit_xor(const mask_float64x4& a, const mask_float64x4& b)
399{
400#if SIMDPP_USE_AVX512VL
401 return a.native() ^ b.native();
402#else
403 return to_mask(i_bit_xor(float64x4(a), float64x4(b)));
404#endif
405}
406#endif
407
408#if SIMDPP_USE_AVX512F
409static SIMDPP_INL
410mask_float64<8> i_bit_xor(const mask_float64<8>& a, const mask_float64<8>& b)
411{
412 return _mm512_kxor(a.native(), b.native());
413}
414#endif
415
416// -----------------------------------------------------------------------------
417
418template<class V, class VM> SIMDPP_INL
419V i_bit_xor(const V& a, const VM& b)
420{
421 SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_xor, a, b)
422}
423
424} // namespace insn
425} // namespace detail
426} // namespace SIMDPP_ARCH_NAMESPACE
427} // namespace simdpp
428
429#endif
430
431
432