1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/to_mask.h> |
17 | #include <simdpp/detail/null/bitwise.h> |
18 | #include <simdpp/detail/vector_array_macros.h> |
19 | |
20 | namespace simdpp { |
21 | namespace SIMDPP_ARCH_NAMESPACE { |
22 | namespace detail { |
23 | namespace insn { |
24 | |
25 | // ----------------------------------------------------------------------------- |
26 | // uint8, uint8 |
27 | static SIMDPP_INL |
28 | uint8x16 i_bit_xor(const uint8x16& a, const uint8x16& b) |
29 | { |
30 | #if SIMDPP_USE_NULL |
31 | return detail::null::bit_xor(a, uint8x16(b)); |
32 | #elif SIMDPP_USE_SSE2 |
33 | return _mm_xor_si128(a.native(), b.native()); |
34 | #elif SIMDPP_USE_NEON |
35 | return veorq_u8(a.native(), b.native()); |
36 | #elif SIMDPP_USE_ALTIVEC |
37 | return vec_xor(a.native(), b.native()); |
38 | #elif SIMDPP_USE_MSA |
39 | return __msa_xor_v(a.native(), b.native()); |
40 | #endif |
41 | } |
42 | |
43 | #if SIMDPP_USE_AVX2 |
44 | static SIMDPP_INL |
45 | uint8x32 i_bit_xor(const uint8x32& a, const uint8x32& b) |
46 | { |
47 | return _mm256_xor_si256(a.native(), b.native()); |
48 | } |
49 | #endif |
50 | |
51 | #if SIMDPP_USE_AVX512BW |
52 | SIMDPP_INL uint8<64> i_bit_xor(const uint8<64>& a, const uint8<64>& b) |
53 | { |
54 | return _mm512_xor_si512(a.native(), b.native()); |
55 | } |
56 | #endif |
57 | |
58 | // ----------------------------------------------------------------------------- |
59 | // mask_int8, mask_int8 |
60 | static SIMDPP_INL |
61 | mask_int8x16 i_bit_xor(const mask_int8x16& a, const mask_int8x16& b) |
62 | { |
63 | #if SIMDPP_USE_NULL |
64 | return detail::null::bit_xor_mm(a, b); |
65 | #elif SIMDPP_USE_AVX512VL |
66 | return a.native() ^ b.native(); |
67 | #else |
68 | return to_mask(i_bit_xor(uint8x16(a), uint8x16(b))); |
69 | #endif |
70 | } |
71 | |
72 | #if SIMDPP_USE_AVX2 |
73 | static SIMDPP_INL |
74 | mask_int8x32 i_bit_xor(const mask_int8x32& a, const mask_int8x32& b) |
75 | { |
76 | #if SIMDPP_USE_AVX512VL |
77 | return a.native() ^ b.native(); |
78 | #else |
79 | return _mm256_xor_si256(a.native(), b.native()); |
80 | #endif |
81 | } |
82 | #endif |
83 | |
84 | #if SIMDPP_USE_AVX512BW |
85 | SIMDPP_INL mask_int8<64> i_bit_xor(const mask_int8<64>& a, const mask_int8<64>& b) |
86 | { |
87 | return a.native() ^ b.native(); |
88 | } |
89 | #endif |
90 | |
91 | // ----------------------------------------------------------------------------- |
92 | // uint16, uint16 |
93 | static SIMDPP_INL |
94 | uint16<8> i_bit_xor(const uint16<8>& a, const uint16<8>& b) |
95 | { |
96 | return (uint16<8>) i_bit_xor(uint8<16>(a), uint8<16>(b)); |
97 | } |
98 | |
99 | #if SIMDPP_USE_AVX2 |
100 | static SIMDPP_INL |
101 | uint16<16> i_bit_xor(const uint16<16>& a, const uint16<16>& b) |
102 | { |
103 | return _mm256_xor_si256(a.native(), b.native()); |
104 | } |
105 | #endif |
106 | |
107 | #if SIMDPP_USE_AVX512BW |
108 | SIMDPP_INL uint16<32> i_bit_xor(const uint16<32>& a, const uint16<32>& b) |
109 | { |
110 | return _mm512_xor_si512(a.native(), b.native()); |
111 | } |
112 | #endif |
113 | |
114 | // ----------------------------------------------------------------------------- |
115 | // mask_int16, mask_int16 |
116 | static SIMDPP_INL |
117 | mask_int16<8> i_bit_xor(const mask_int16<8>& a, const mask_int16<8>& b) |
118 | { |
119 | #if SIMDPP_USE_NULL |
120 | return detail::null::bit_xor_mm(a, b); |
121 | #elif SIMDPP_USE_AVX512VL |
122 | return a.native() ^ b.native(); |
123 | #else |
124 | return to_mask((uint16<8>) i_bit_xor(uint8<16>(a), uint8<16>(b))); |
125 | #endif |
126 | } |
127 | |
128 | #if SIMDPP_USE_AVX2 |
129 | static SIMDPP_INL |
130 | mask_int16<16> i_bit_xor(const mask_int16<16>& a, const mask_int16<16>& b) |
131 | { |
132 | #if SIMDPP_USE_AVX512VL |
133 | return a.native() ^ b.native(); |
134 | #else |
135 | return to_mask((uint16<16>) i_bit_xor(uint16<16>(a), uint16<16>(b))); |
136 | #endif |
137 | } |
138 | #endif |
139 | |
140 | #if SIMDPP_USE_AVX512BW |
141 | SIMDPP_INL mask_int16<32> i_bit_xor(const mask_int16<32>& a, const mask_int16<32>& b) |
142 | { |
143 | return a.native() ^ b.native(); |
144 | } |
145 | #endif |
146 | |
147 | // ----------------------------------------------------------------------------- |
148 | // uint32, uint32 |
149 | static SIMDPP_INL |
150 | uint32<4> i_bit_xor(const uint32<4>& a, const uint32<4>& b) |
151 | { |
152 | return (uint32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b)); |
153 | } |
154 | |
155 | #if SIMDPP_USE_AVX2 |
156 | static SIMDPP_INL |
157 | uint32<8> i_bit_xor(const uint32<8>& a, const uint32<8>& b) |
158 | { |
159 | return _mm256_xor_si256(a.native(), b.native()); |
160 | } |
161 | #endif |
162 | |
163 | #if SIMDPP_USE_AVX512F |
164 | static SIMDPP_INL |
165 | uint32<16> i_bit_xor(const uint32<16>& a, const uint32<16>& b) |
166 | { |
167 | return _mm512_xor_epi32(a.native(), b.native()); |
168 | } |
169 | #endif |
170 | |
171 | // ----------------------------------------------------------------------------- |
172 | // mask_int32, mask_int32 |
173 | static SIMDPP_INL |
174 | mask_int32<4> i_bit_xor(const mask_int32<4>& a, const mask_int32<4>& b) |
175 | { |
176 | #if SIMDPP_USE_NULL |
177 | return detail::null::bit_xor_mm(a, b); |
178 | #elif SIMDPP_USE_AVX512VL |
179 | return a.native() ^ b.native(); |
180 | #else |
181 | return to_mask((uint32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b))); |
182 | #endif |
183 | } |
184 | |
185 | #if SIMDPP_USE_AVX2 |
186 | static SIMDPP_INL |
187 | mask_int32<8> i_bit_xor(const mask_int32<8>& a, const mask_int32<8>& b) |
188 | { |
189 | #if SIMDPP_USE_AVX512VL |
190 | return a.native() ^ b.native(); |
191 | #else |
192 | return to_mask((uint32<8>) i_bit_xor(uint32<8>(a), uint32<8>(b))); |
193 | #endif |
194 | } |
195 | #endif |
196 | |
197 | #if SIMDPP_USE_AVX512F |
198 | static SIMDPP_INL |
199 | mask_int32<16> i_bit_xor(const mask_int32<16>& a, const mask_int32<16>& b) |
200 | { |
201 | return _mm512_kxor(a.native(), b.native()); |
202 | } |
203 | #endif |
204 | |
205 | // ----------------------------------------------------------------------------- |
206 | // uint64, uint64 |
207 | static SIMDPP_INL |
208 | uint64<2> i_bit_xor(const uint64<2>& a, const uint64<2>& b) |
209 | { |
210 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
211 | return detail::null::bit_xor(a, b); |
212 | #else |
213 | return (uint64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b)); |
214 | #endif |
215 | } |
216 | |
217 | #if SIMDPP_USE_AVX2 |
218 | static SIMDPP_INL |
219 | uint64<4> i_bit_xor(const uint64<4>& a, const uint64<4>& b) |
220 | { |
221 | return _mm256_xor_si256(a.native(), b.native()); |
222 | } |
223 | #endif |
224 | |
225 | #if SIMDPP_USE_AVX512F |
226 | static SIMDPP_INL |
227 | uint64<8> i_bit_xor(const uint64<8>& a, const uint64<8>& b) |
228 | { |
229 | return _mm512_xor_epi64(a.native(), b.native()); |
230 | } |
231 | #endif |
232 | |
233 | // ----------------------------------------------------------------------------- |
234 | // mask_int64, mask_int64 |
235 | static SIMDPP_INL |
236 | mask_int64<2> i_bit_xor(const mask_int64<2>& a, const mask_int64<2>& b) |
237 | { |
238 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
239 | return detail::null::bit_xor_mm(a, b); |
240 | #elif SIMDPP_USE_AVX512VL |
241 | return a.native() ^ b.native(); |
242 | #else |
243 | return to_mask((uint64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b))); |
244 | #endif |
245 | } |
246 | |
247 | #if SIMDPP_USE_AVX2 |
248 | static SIMDPP_INL |
249 | mask_int64<4> i_bit_xor(const mask_int64<4>& a, const mask_int64<4>& b) |
250 | { |
251 | #if SIMDPP_USE_AVX512VL |
252 | return a.native() ^ b.native(); |
253 | #else |
254 | return to_mask((uint64<4>) i_bit_xor(uint64<4>(a), uint64<4>(b))); |
255 | #endif |
256 | } |
257 | #endif |
258 | |
259 | #if SIMDPP_USE_AVX512F |
260 | static SIMDPP_INL |
261 | mask_int64<8> i_bit_xor(const mask_int64<8>& a, const mask_int64<8>& b) |
262 | { |
263 | return _mm512_kxor(a.native(), b.native()); |
264 | } |
265 | #endif |
266 | |
267 | // ----------------------------------------------------------------------------- |
268 | |
269 | static SIMDPP_INL |
270 | float32x4 i_bit_xor(const float32x4& a, const float32x4& b) |
271 | { |
272 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
273 | return detail::null::bit_xor(a, b); |
274 | #elif SIMDPP_USE_SSE2 |
275 | return _mm_xor_ps(a.native(), b.native()); |
276 | #elif SIMDPP_USE_NEON |
277 | return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(a.native()), |
278 | vreinterpretq_s32_f32(b.native()))); |
279 | #elif SIMDPP_USE_MSA |
280 | return (float32<4>) i_bit_xor(uint8<16>(a), uint8<16>(b)); |
281 | #elif SIMDPP_USE_ALTIVEC |
282 | return vec_xor(a.native(), b.native()); |
283 | #endif |
284 | } |
285 | |
286 | #if SIMDPP_USE_AVX |
287 | static SIMDPP_INL |
288 | float32x8 i_bit_xor(const float32x8& a, const float32x8& b) |
289 | { |
290 | return _mm256_xor_ps(a.native(), b.native()); |
291 | } |
292 | #endif |
293 | |
294 | #if SIMDPP_USE_AVX512F |
295 | static SIMDPP_INL |
296 | float32<16> i_bit_xor(const float32<16>& a, const float32<16>& b) |
297 | { |
298 | #if SIMDPP_USE_AVX512DQ |
299 | return _mm512_xor_ps(a.native(), b.native()); |
300 | #else |
301 | return (float32<16>) i_bit_xor(uint32<16>(a), uint32<16>(b)); |
302 | #endif |
303 | } |
304 | #endif |
305 | |
306 | // ----------------------------------------------------------------------------- |
307 | // mask_float32, mask_float32 |
308 | |
309 | static SIMDPP_INL |
310 | mask_float32x4 i_bit_xor(const mask_float32x4& a, const mask_float32x4& b) |
311 | { |
312 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
313 | return detail::null::bit_xor_mm(a, b); |
314 | #elif SIMDPP_USE_AVX512VL |
315 | return a.native() ^ b.native(); |
316 | #else |
317 | return to_mask(i_bit_xor(float32x4(a), float32x4(b))); |
318 | #endif |
319 | } |
320 | |
321 | #if SIMDPP_USE_AVX |
322 | static SIMDPP_INL |
323 | mask_float32x8 i_bit_xor(const mask_float32x8& a, const mask_float32x8& b) |
324 | { |
325 | #if SIMDPP_USE_AVX512VL |
326 | return a.native() ^ b.native(); |
327 | #else |
328 | return to_mask(i_bit_xor(float32x8(a), float32x8(b))); |
329 | #endif |
330 | } |
331 | #endif |
332 | |
333 | #if SIMDPP_USE_AVX512F |
334 | static SIMDPP_INL |
335 | mask_float32<16> i_bit_xor(const mask_float32<16>& a, const mask_float32<16>& b) |
336 | { |
337 | return _mm512_kxor(a.native(), b.native()); |
338 | } |
339 | #endif |
340 | |
341 | // ----------------------------------------------------------------------------- |
342 | // float64, float64 |
343 | |
344 | static SIMDPP_INL |
345 | float64x2 i_bit_xor(const float64x2& a, const float64x2& b) |
346 | { |
347 | #if SIMDPP_USE_SSE2 |
348 | return _mm_xor_pd(a.native(), b.native()); |
349 | #elif SIMDPP_USE_NEON64 |
350 | return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.native()), |
351 | vreinterpretq_u64_f64(b.native()))); |
352 | #elif SIMDPP_USE_VSX_206 |
353 | return vec_xor(a.native(), b.native()); |
354 | #elif SIMDPP_USE_MSA |
355 | return (float64<2>) i_bit_xor(uint8<16>(a), uint8<16>(b)); |
356 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
357 | return detail::null::bit_xor(a, b); |
358 | #endif |
359 | } |
360 | |
361 | #if SIMDPP_USE_AVX |
362 | static SIMDPP_INL |
363 | float64x4 i_bit_xor(const float64x4& a, const float64x4& b) |
364 | { |
365 | return _mm256_xor_pd(a.native(), b.native()); |
366 | } |
367 | #endif |
368 | |
369 | #if SIMDPP_USE_AVX512F |
370 | static SIMDPP_INL |
371 | float64<8> i_bit_xor(const float64<8>& a, const float64<8>& b) |
372 | { |
373 | #if SIMDPP_USE_AVX512DQ |
374 | return _mm512_xor_pd(a.native(), b.native()); |
375 | #else |
376 | return (float64<8>) i_bit_xor(uint64<8>(a), uint64<8>(b)); |
377 | #endif |
378 | } |
379 | #endif |
380 | |
381 | // ----------------------------------------------------------------------------- |
382 | // mask_float64, mask_float64 |
383 | |
384 | static SIMDPP_INL |
385 | mask_float64x2 i_bit_xor(const mask_float64x2& a, const mask_float64x2& b) |
386 | { |
387 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206) |
388 | return detail::null::bit_xor_mm(a, b); |
389 | #elif SIMDPP_USE_AVX512VL |
390 | return a.native() ^ b.native(); |
391 | #else |
392 | return to_mask(i_bit_xor(float64x2(a), float64x2(b))); |
393 | #endif |
394 | } |
395 | |
396 | #if SIMDPP_USE_AVX |
397 | static SIMDPP_INL |
398 | mask_float64x4 i_bit_xor(const mask_float64x4& a, const mask_float64x4& b) |
399 | { |
400 | #if SIMDPP_USE_AVX512VL |
401 | return a.native() ^ b.native(); |
402 | #else |
403 | return to_mask(i_bit_xor(float64x4(a), float64x4(b))); |
404 | #endif |
405 | } |
406 | #endif |
407 | |
408 | #if SIMDPP_USE_AVX512F |
409 | static SIMDPP_INL |
410 | mask_float64<8> i_bit_xor(const mask_float64<8>& a, const mask_float64<8>& b) |
411 | { |
412 | return _mm512_kxor(a.native(), b.native()); |
413 | } |
414 | #endif |
415 | |
416 | // ----------------------------------------------------------------------------- |
417 | |
418 | template<class V, class VM> SIMDPP_INL |
419 | V i_bit_xor(const V& a, const VM& b) |
420 | { |
421 | SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_xor, a, b) |
422 | } |
423 | |
424 | } // namespace insn |
425 | } // namespace detail |
426 | } // namespace SIMDPP_ARCH_NAMESPACE |
427 | } // namespace simdpp |
428 | |
429 | #endif |
430 | |
431 | |
432 | |