bit_xor.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/bit_xor.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_XOR_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/to_mask.h>
17	#include <simdpp/detail/null/bitwise.h>
18	#include <simdpp/detail/vector_array_macros.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	// -----------------------------------------------------------------------------
26	// uint8, uint8
27	static SIMDPP_INL
28	uint8x16 i_bit_xor(const uint8x16& a, const uint8x16& b)
29	{
30	#if SIMDPP_USE_NULL
31	return detail::null::bit_xor(a, uint8x16(b));
32	#elif SIMDPP_USE_SSE2
33	return _mm_xor_si128(a.native(), b.native());
34	#elif SIMDPP_USE_NEON
35	return veorq_u8(a.native(), b.native());
36	#elif SIMDPP_USE_ALTIVEC
37	return vec_xor(a.native(), b.native());
38	#elif SIMDPP_USE_MSA
39	return __msa_xor_v(a.native(), b.native());
40	#endif
41	}
42
43	#if SIMDPP_USE_AVX2
44	static SIMDPP_INL
45	uint8x32 i_bit_xor(const uint8x32& a, const uint8x32& b)
46	{
47	return _mm256_xor_si256(a.native(), b.native());
48	}
49	#endif
50
51	#if SIMDPP_USE_AVX512BW
52	SIMDPP_INL uint8<`64`> i_bit_xor(const uint8<`64`>& a, const uint8<`64`>& b)
53	{
54	return _mm512_xor_si512(a.native(), b.native());
55	}
56	#endif
57
58	// -----------------------------------------------------------------------------
59	// mask_int8, mask_int8
60	static SIMDPP_INL
61	mask_int8x16 i_bit_xor(const mask_int8x16& a, const mask_int8x16& b)
62	{
63	#if SIMDPP_USE_NULL
64	return detail::null::bit_xor_mm(a, b);
65	#elif SIMDPP_USE_AVX512VL
66	return a.native() ^ b.native();
67	#else
68	return to_mask(i_bit_xor(uint8x16 (a), uint8x16 (b)));
69	#endif
70	}
71
72	#if SIMDPP_USE_AVX2
73	static SIMDPP_INL
74	mask_int8x32 i_bit_xor(const mask_int8x32& a, const mask_int8x32& b)
75	{
76	#if SIMDPP_USE_AVX512VL
77	return a.native() ^ b.native();
78	#else
79	return _mm256_xor_si256(a.native(), b.native());
80	#endif
81	}
82	#endif
83
84	#if SIMDPP_USE_AVX512BW
85	SIMDPP_INL mask_int8<`64`> i_bit_xor(const mask_int8<`64`>& a, const mask_int8<`64`>& b)
86	{
87	return a.native() ^ b.native();
88	}
89	#endif
90
91	// -----------------------------------------------------------------------------
92	// uint16, uint16
93	static SIMDPP_INL
94	uint16<`8`> i_bit_xor(const uint16<`8`>& a, const uint16<`8`>& b)
95	{
96	return (uint16<`8`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b));
97	}
98
99	#if SIMDPP_USE_AVX2
100	static SIMDPP_INL
101	uint16<`16`> i_bit_xor(const uint16<`16`>& a, const uint16<`16`>& b)
102	{
103	return _mm256_xor_si256(a.native(), b.native());
104	}
105	#endif
106
107	#if SIMDPP_USE_AVX512BW
108	SIMDPP_INL uint16<`32`> i_bit_xor(const uint16<`32`>& a, const uint16<`32`>& b)
109	{
110	return _mm512_xor_si512(a.native(), b.native());
111	}
112	#endif
113
114	// -----------------------------------------------------------------------------
115	// mask_int16, mask_int16
116	static SIMDPP_INL
117	mask_int16<`8`> i_bit_xor(const mask_int16<`8`>& a, const mask_int16<`8`>& b)
118	{
119	#if SIMDPP_USE_NULL
120	return detail::null::bit_xor_mm(a, b);
121	#elif SIMDPP_USE_AVX512VL
122	return a.native() ^ b.native();
123	#else
124	return to_mask((uint16<`8`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b)));
125	#endif
126	}
127
128	#if SIMDPP_USE_AVX2
129	static SIMDPP_INL
130	mask_int16<`16`> i_bit_xor(const mask_int16<`16`>& a, const mask_int16<`16`>& b)
131	{
132	#if SIMDPP_USE_AVX512VL
133	return a.native() ^ b.native();
134	#else
135	return to_mask((uint16<`16`>) i_bit_xor(uint16<`16`>(a), uint16<`16`>(b)));
136	#endif
137	}
138	#endif
139
140	#if SIMDPP_USE_AVX512BW
141	SIMDPP_INL mask_int16<`32`> i_bit_xor(const mask_int16<`32`>& a, const mask_int16<`32`>& b)
142	{
143	return a.native() ^ b.native();
144	}
145	#endif
146
147	// -----------------------------------------------------------------------------
148	// uint32, uint32
149	static SIMDPP_INL
150	uint32<`4`> i_bit_xor(const uint32<`4`>& a, const uint32<`4`>& b)
151	{
152	return (uint32<`4`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b));
153	}
154
155	#if SIMDPP_USE_AVX2
156	static SIMDPP_INL
157	uint32<`8`> i_bit_xor(const uint32<`8`>& a, const uint32<`8`>& b)
158	{
159	return _mm256_xor_si256(a.native(), b.native());
160	}
161	#endif
162
163	#if SIMDPP_USE_AVX512F
164	static SIMDPP_INL
165	uint32<`16`> i_bit_xor(const uint32<`16`>& a, const uint32<`16`>& b)
166	{
167	return _mm512_xor_epi32(a.native(), b.native());
168	}
169	#endif
170
171	// -----------------------------------------------------------------------------
172	// mask_int32, mask_int32
173	static SIMDPP_INL
174	mask_int32<`4`> i_bit_xor(const mask_int32<`4`>& a, const mask_int32<`4`>& b)
175	{
176	#if SIMDPP_USE_NULL
177	return detail::null::bit_xor_mm(a, b);
178	#elif SIMDPP_USE_AVX512VL
179	return a.native() ^ b.native();
180	#else
181	return to_mask((uint32<`4`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b)));
182	#endif
183	}
184
185	#if SIMDPP_USE_AVX2
186	static SIMDPP_INL
187	mask_int32<`8`> i_bit_xor(const mask_int32<`8`>& a, const mask_int32<`8`>& b)
188	{
189	#if SIMDPP_USE_AVX512VL
190	return a.native() ^ b.native();
191	#else
192	return to_mask((uint32<`8`>) i_bit_xor(uint32<`8`>(a), uint32<`8`>(b)));
193	#endif
194	}
195	#endif
196
197	#if SIMDPP_USE_AVX512F
198	static SIMDPP_INL
199	mask_int32<`16`> i_bit_xor(const mask_int32<`16`>& a, const mask_int32<`16`>& b)
200	{
201	return _mm512_kxor(a.native(), b.native());
202	}
203	#endif
204
205	// -----------------------------------------------------------------------------
206	// uint64, uint64
207	static SIMDPP_INL
208	uint64<`2`> i_bit_xor(const uint64<`2`>& a, const uint64<`2`>& b)
209	{
210	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
211	return detail::null::bit_xor(a, b);
212	#else
213	return (uint64<`2`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b));
214	#endif
215	}
216
217	#if SIMDPP_USE_AVX2
218	static SIMDPP_INL
219	uint64<`4`> i_bit_xor(const uint64<`4`>& a, const uint64<`4`>& b)
220	{
221	return _mm256_xor_si256(a.native(), b.native());
222	}
223	#endif
224
225	#if SIMDPP_USE_AVX512F
226	static SIMDPP_INL
227	uint64<`8`> i_bit_xor(const uint64<`8`>& a, const uint64<`8`>& b)
228	{
229	return _mm512_xor_epi64(a.native(), b.native());
230	}
231	#endif
232
233	// -----------------------------------------------------------------------------
234	// mask_int64, mask_int64
235	static SIMDPP_INL
236	mask_int64<`2`> i_bit_xor(const mask_int64<`2`>& a, const mask_int64<`2`>& b)
237	{
238	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239	return detail::null::bit_xor_mm(a, b);
240	#elif SIMDPP_USE_AVX512VL
241	return a.native() ^ b.native();
242	#else
243	return to_mask((uint64<`2`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b)));
244	#endif
245	}
246
247	#if SIMDPP_USE_AVX2
248	static SIMDPP_INL
249	mask_int64<`4`> i_bit_xor(const mask_int64<`4`>& a, const mask_int64<`4`>& b)
250	{
251	#if SIMDPP_USE_AVX512VL
252	return a.native() ^ b.native();
253	#else
254	return to_mask((uint64<`4`>) i_bit_xor(uint64<`4`>(a), uint64<`4`>(b)));
255	#endif
256	}
257	#endif
258
259	#if SIMDPP_USE_AVX512F
260	static SIMDPP_INL
261	mask_int64<`8`> i_bit_xor(const mask_int64<`8`>& a, const mask_int64<`8`>& b)
262	{
263	return _mm512_kxor(a.native(), b.native());
264	}
265	#endif
266
267	// -----------------------------------------------------------------------------
268
269	static SIMDPP_INL
270	float32x4 i_bit_xor(const float32x4& a, const float32x4& b)
271	{
272	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
273	return detail::null::bit_xor(a, b);
274	#elif SIMDPP_USE_SSE2
275	return _mm_xor_ps(a.native(), b.native());
276	#elif SIMDPP_USE_NEON
277	return vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(a.native()),
278	vreinterpretq_s32_f32(b.native())));
279	#elif SIMDPP_USE_MSA
280	return (float32<`4`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b));
281	#elif SIMDPP_USE_ALTIVEC
282	return vec_xor(a.native(), b.native());
283	#endif
284	}
285
286	#if SIMDPP_USE_AVX
287	static SIMDPP_INL
288	float32x8 i_bit_xor(const float32x8& a, const float32x8& b)
289	{
290	return _mm256_xor_ps(a.native(), b.native());
291	}
292	#endif
293
294	#if SIMDPP_USE_AVX512F
295	static SIMDPP_INL
296	float32<`16`> i_bit_xor(const float32<`16`>& a, const float32<`16`>& b)
297	{
298	#if SIMDPP_USE_AVX512DQ
299	return _mm512_xor_ps(a.native(), b.native());
300	#else
301	return (float32<`16`>) i_bit_xor(uint32<`16`>(a), uint32<`16`>(b));
302	#endif
303	}
304	#endif
305
306	// -----------------------------------------------------------------------------
307	// mask_float32, mask_float32
308
309	static SIMDPP_INL
310	mask_float32x4 i_bit_xor(const mask_float32x4& a, const mask_float32x4& b)
311	{
312	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
313	return detail::null::bit_xor_mm(a, b);
314	#elif SIMDPP_USE_AVX512VL
315	return a.native() ^ b.native();
316	#else
317	return to_mask(i_bit_xor(float32x4 (a), float32x4 (b)));
318	#endif
319	}
320
321	#if SIMDPP_USE_AVX
322	static SIMDPP_INL
323	mask_float32x8 i_bit_xor(const mask_float32x8& a, const mask_float32x8& b)
324	{
325	#if SIMDPP_USE_AVX512VL
326	return a.native() ^ b.native();
327	#else
328	return to_mask(i_bit_xor(float32x8(a), float32x8(b)));
329	#endif
330	}
331	#endif
332
333	#if SIMDPP_USE_AVX512F
334	static SIMDPP_INL
335	mask_float32<`16`> i_bit_xor(const mask_float32<`16`>& a, const mask_float32<`16`>& b)
336	{
337	return _mm512_kxor(a.native(), b.native());
338	}
339	#endif
340
341	// -----------------------------------------------------------------------------
342	// float64, float64
343
344	static SIMDPP_INL
345	float64x2 i_bit_xor(const float64x2& a, const float64x2& b)
346	{
347	#if SIMDPP_USE_SSE2
348	return _mm_xor_pd(a.native(), b.native());
349	#elif SIMDPP_USE_NEON64
350	return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.native()),
351	vreinterpretq_u64_f64(b.native())));
352	#elif SIMDPP_USE_VSX_206
353	return vec_xor(a.native(), b.native());
354	#elif SIMDPP_USE_MSA
355	return (float64<`2`>) i_bit_xor(uint8<`16`>(a), uint8<`16`>(b));
356	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
357	return detail::null::bit_xor(a, b);
358	#endif
359	}
360
361	#if SIMDPP_USE_AVX
362	static SIMDPP_INL
363	float64x4 i_bit_xor(const float64x4& a, const float64x4& b)
364	{
365	return _mm256_xor_pd(a.native(), b.native());
366	}
367	#endif
368
369	#if SIMDPP_USE_AVX512F
370	static SIMDPP_INL
371	float64<`8`> i_bit_xor(const float64<`8`>& a, const float64<`8`>& b)
372	{
373	#if SIMDPP_USE_AVX512DQ
374	return _mm512_xor_pd(a.native(), b.native());
375	#else
376	return (float64<`8`>) i_bit_xor(uint64<`8`>(a), uint64<`8`>(b));
377	#endif
378	}
379	#endif
380
381	// -----------------------------------------------------------------------------
382	// mask_float64, mask_float64
383
384	static SIMDPP_INL
385	mask_float64x2 i_bit_xor(const mask_float64x2& a, const mask_float64x2& b)
386	{
387	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
388	return detail::null::bit_xor_mm(a, b);
389	#elif SIMDPP_USE_AVX512VL
390	return a.native() ^ b.native();
391	#else
392	return to_mask(i_bit_xor(float64x2 (a), float64x2 (b)));
393	#endif
394	}
395
396	#if SIMDPP_USE_AVX
397	static SIMDPP_INL
398	mask_float64x4 i_bit_xor(const mask_float64x4& a, const mask_float64x4& b)
399	{
400	#if SIMDPP_USE_AVX512VL
401	return a.native() ^ b.native();
402	#else
403	return to_mask(i_bit_xor(float64x4(a), float64x4(b)));
404	#endif
405	}
406	#endif
407
408	#if SIMDPP_USE_AVX512F
409	static SIMDPP_INL
410	mask_float64<`8`> i_bit_xor(const mask_float64<`8`>& a, const mask_float64<`8`>& b)
411	{
412	return _mm512_kxor(a.native(), b.native());
413	}
414	#endif
415
416	// -----------------------------------------------------------------------------
417
418	template<class V, class VM> SIMDPP_INL
419	V i_bit_xor(const V& a, const VM& b)
420	{
421	SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_xor, a, b)
422	}
423
424	} // namespace insn
425	} // namespace detail
426	} // namespace SIMDPP_ARCH_NAMESPACE
427	} // namespace simdpp
428
429	#endif
430
431
432

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/bit_xor.h