cmp_eq.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_eq.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_EQ_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_EQ_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/make_shuffle_bytes_mask.h>
17	#include <simdpp/core/bit_and.h>
18	#include <simdpp/core/bit_or.h>
19	#include <simdpp/core/i_shift_r.h>
20	#include <simdpp/core/i_shift_l.h>
21	#include <simdpp/core/transpose.h>
22	#include <simdpp/detail/null/compare.h>
23	#include <simdpp/detail/vector_array_macros.h>
24
25	namespace simdpp {
26	namespace SIMDPP_ARCH_NAMESPACE {
27	namespace detail {
28	namespace insn {
29
30
31	static SIMDPP_INL
32	mask_int8x16 i_cmp_eq(const uint8x16& a, const uint8x16& b)
33	{
34	#if SIMDPP_USE_NULL
35	return detail::null::cmp_eq(a, b);
36	#elif SIMDPP_USE_AVX512VL
37	return _mm_cmpeq_epi8_mask(a.native(), b.native());
38	#elif SIMDPP_USE_SSE2
39	return _mm_cmpeq_epi8(a.native(), b.native());
40	#elif SIMDPP_USE_NEON
41	return vceqq_u8(a.native(), b.native());
42	#elif SIMDPP_USE_ALTIVEC
43	return vec_cmpeq(a.native(), b.native());
44	#elif SIMDPP_USE_MSA
45	return (v16u8) __msa_ceq_b((v16i8) a.native(), (v16i8) b.native());
46	#endif
47	}
48
49	#if SIMDPP_USE_AVX512VL
50	static SIMDPP_INL
51	mask_int8<`16`> i_cmp_eq(const mask_int8<`16`>& a, const mask_int8<`16`>& b)
52	{
53	return _mm512_kxnor(a.native(), b.native());
54	}
55	#endif
56
57	#if SIMDPP_USE_AVX2
58	static SIMDPP_INL
59	mask_int8x32 i_cmp_eq(const uint8x32& a, const uint8x32& b)
60	{
61	#if SIMDPP_USE_AVX512VL
62	return _mm256_cmpeq_epi8_mask(a.native(), b.native());
63	#else
64	return _mm256_cmpeq_epi8(a.native(), b.native());
65	#endif
66	}
67	#endif
68
69	#if SIMDPP_USE_AVX512VL
70	static SIMDPP_INL
71	mask_int8<`32`> i_cmp_eq(const mask_int8<`32`>& a, const mask_int8<`32`>& b)
72	{
73	return _mm512_kxnor(a.native(), b.native());
74	}
75	#endif
76
77	#if SIMDPP_USE_AVX512BW
78	SIMDPP_INL mask_int8<`64`> i_cmp_eq(const uint8<`64`>& a, const uint8<`64`>& b)
79	{
80	return _mm512_cmpeq_epi8_mask(a.native(), b.native());
81	}
82
83	SIMDPP_INL mask_int8<`64`> i_cmp_eq(const mask_int8<`64`>& a, const mask_int8<`64`>& b)
84	{
85	return _mm512_kxnor(a.native(), b.native());
86	}
87	#endif
88
89	// -----------------------------------------------------------------------------
90
91	static SIMDPP_INL
92	mask_int16x8 i_cmp_eq(const uint16x8& a, const uint16x8& b)
93	{
94	#if SIMDPP_USE_NULL
95	return detail::null::cmp_eq(a, b);
96	#elif SIMDPP_USE_AVX512VL
97	return _mm_cmpeq_epi16_mask(a.native(), b.native());
98	#elif SIMDPP_USE_SSE2
99	return _mm_cmpeq_epi16(a.native(), b.native());
100	#elif SIMDPP_USE_NEON
101	return vceqq_u16(a.native(), b.native());
102	#elif SIMDPP_USE_ALTIVEC
103	return vec_cmpeq(a.native(), b.native());
104	#elif SIMDPP_USE_MSA
105	return (v8u16) __msa_ceq_h((v8i16) a.native(), (v8i16) b.native());
106	#endif
107	}
108
109	#if SIMDPP_USE_AVX2
110	static SIMDPP_INL
111	mask_int16x16 i_cmp_eq(const uint16x16& a, const uint16x16& b)
112	{
113	#if SIMDPP_USE_AVX512VL
114	return _mm256_cmpeq_epi16_mask(a.native(), b.native());
115	#else
116	return _mm256_cmpeq_epi16(a.native(), b.native());
117	#endif
118	}
119	#endif
120
121	#if SIMDPP_USE_AVX512BW
122	SIMDPP_INL mask_int16<`32`> i_cmp_eq(const uint16<`32`>& a, const uint16<`32`>& b)
123	{
124	return _mm512_cmpeq_epi16_mask(a.native(), b.native());
125	}
126	#endif
127
128	// -----------------------------------------------------------------------------
129
130	static SIMDPP_INL
131	mask_int32x4 i_cmp_eq(const uint32x4& a, const uint32x4& b)
132	{
133	#if SIMDPP_USE_NULL
134	return detail::null::cmp_eq(a, b);
135	#elif SIMDPP_USE_AVX512VL
136	return _mm_cmpeq_epi32_mask(a.native(), b.native());
137	#elif SIMDPP_USE_SSE2
138	return _mm_cmpeq_epi32(a.native(), b.native());
139	#elif SIMDPP_USE_NEON
140	return vceqq_u32(a.native(), b.native());
141	#elif SIMDPP_USE_ALTIVEC
142	return vec_cmpeq(a.native(), b.native());
143	#elif SIMDPP_USE_MSA
144	return (v4u32) __msa_ceq_w((v4i32) a.native(), (v4i32) b.native());
145	#endif
146	}
147
148	#if SIMDPP_USE_AVX512VL
149	static SIMDPP_INL
150	mask_int16<`8`> i_cmp_eq(const mask_int16<`8`>& a, const mask_int16<`8`>& b)
151	{
152	return _mm512_kxnor(a.native(), b.native());
153	}
154	#endif
155
156	#if SIMDPP_USE_AVX2
157	static SIMDPP_INL
158	mask_int32x8 i_cmp_eq(const uint32x8& a, const uint32x8& b)
159	{
160	#if SIMDPP_USE_AVX512VL
161	return _mm256_cmpeq_epi32_mask(a.native(), b.native());
162	#else
163	return _mm256_cmpeq_epi32(a.native(), b.native());
164	#endif
165	}
166	#endif
167
168	#if SIMDPP_USE_AVX512VL
169	static SIMDPP_INL
170	mask_int16<`16`> i_cmp_eq(const mask_int16<`16`>& a, const mask_int16<`16`>& b)
171	{
172	return _mm512_kxnor(a.native(), b.native());
173	}
174	#endif
175
176	#if SIMDPP_USE_AVX512F
177	static SIMDPP_INL
178	mask_int32<`16`> i_cmp_eq(const uint32<`16`>& a, const uint32<`16`>& b)
179	{
180	return _mm512_cmpeq_epi32_mask(a.native(), b.native());
181	}
182
183	static SIMDPP_INL
184	mask_int32<`16`> i_cmp_eq(const mask_int32<`16`>& a, const mask_int32<`16`>& b)
185	{
186	return _mm512_kxnor(a.native(), b.native());
187	}
188	#endif
189
190	// -----------------------------------------------------------------------------
191
192	static SIMDPP_INL
193	mask_int64x2 i_cmp_eq(const uint64x2& a, const uint64x2& b)
194	{
195	#if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
196	return _mm_comeq_epi64(a.native(), b.native());
197	#elif SIMDPP_USE_AVX512VL
198	return _mm_cmpeq_epi64_mask(a.native(), b.native());
199	#elif SIMDPP_USE_SSE4_1
200	return _mm_cmpeq_epi64(a.native(), b.native());
201	#elif SIMDPP_USE_SSE2
202	uint64x2 r32, r32s;
203	r32 = i_cmp_eq(uint32x4(a), uint32x4(b));
204	// swap the 32-bit halves
205	r32s = bit_or(shift_l<`32`>(r32), shift_r<`32`>(r32));
206	// combine the results. Each 32-bit half is ANDed with the neighbouring pair
207	r32 = bit_and(r32, r32s);
208	return r32;
209	#elif SIMDPP_USE_NEON64
210	return vceqq_u64(a.native(), b.native());
211	#elif SIMDPP_USE_NEON32
212	uint32x4 r32, r32s;
213	r32 = i_cmp_eq(uint32x4(a), uint32x4(b));
214	r32s = r32;
215	// swap the 32-bit halves
216	transpose2(r32, r32s);
217	// combine the results. Each 32-bit half is ANDed with the neighbouring pair
218	r32 = bit_and(r32, r32s);
219	return uint64x2(r32);
220	#elif SIMDPP_USE_VSX_207
221	return (__vector uint64_t) vec_cmpeq(a.native(), b.native());
222	#elif SIMDPP_USE_MSA
223	return (v2u64) __msa_ceq_d((v2i64) a.native(), (v2i64) b.native());
224	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
225	return detail::null::cmp_eq(a, b);
226	#endif
227	}
228
229	#if SIMDPP_USE_AVX512VL
230	static SIMDPP_INL
231	mask_int64<`2`> i_cmp_eq(const mask_int64<`2`>& a, const mask_int64<`2`>& b)
232	{
233	return _mm512_kxnor(a.native(), b.native());
234	}
235	#endif
236
237	#if SIMDPP_USE_AVX2
238	static SIMDPP_INL
239	mask_int64x4 i_cmp_eq(const uint64x4& a, const uint64x4& b)
240	{
241	#if SIMDPP_USE_AVX512VL
242	return _mm256_cmpeq_epi64_mask(a.native(), b.native());
243	#else
244	return _mm256_cmpeq_epi64(a.native(), b.native());
245	#endif
246	}
247	#endif
248
249	#if SIMDPP_USE_AVX512VL
250	static SIMDPP_INL
251	mask_int64<`4`> i_cmp_eq(const mask_int64<`4`>& a, const mask_int64<`4`>& b)
252	{
253	return _mm512_kxnor(a.native(), b.native());
254	}
255	#endif
256
257	#if SIMDPP_USE_AVX512F
258	static SIMDPP_INL
259	mask_int64<`8`> i_cmp_eq(const uint64<`8`>& a, const uint64<`8`>& b)
260	{
261	return _mm512_cmpeq_epi64_mask(a.native(), b.native());
262	}
263
264	static SIMDPP_INL
265	mask_int64<`8`> i_cmp_eq(const mask_int64<`8`>& a, const mask_int64<`8`>& b)
266	{
267	return _mm512_kxnor(a.native(), b.native());
268	}
269	#endif
270
271	// -----------------------------------------------------------------------------
272
273	static SIMDPP_INL
274	mask_float32x4 i_cmp_eq(const float32x4& a, const float32x4& b)
275	{
276	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
277	return detail::null::cmp_eq(a, b);
278	#elif SIMDPP_USE_AVX512VL
279	return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_EQ_OQ);
280	#elif SIMDPP_USE_AVX
281	return _mm_cmp_ps(a.native(), b.native(), _CMP_EQ_OQ);
282	#elif SIMDPP_USE_SSE2
283	return _mm_cmpeq_ps(a.native(), b.native());
284	#elif SIMDPP_USE_NEON
285	return vreinterpretq_f32_u32(vceqq_f32(a.native(), b.native()));
286	#elif SIMDPP_USE_ALTIVEC
287	return vec_cmpeq(a.native(), b.native());
288	#elif SIMDPP_USE_MSA
289	return (v4f32) __msa_fceq_w(a.native(), b.native());
290	#endif
291	}
292
293	#if SIMDPP_USE_AVX512VL
294	static SIMDPP_INL
295	mask_float32<`4`> i_cmp_eq(const mask_float32<`4`>& a, const mask_float32<`4`>& b)
296	{
297	return _mm512_kxnor(a.native(), b.native());
298	}
299	#endif
300
301
302	#if SIMDPP_USE_AVX
303	static SIMDPP_INL
304	mask_float32x8 i_cmp_eq(const float32x8& a, const float32x8& b)
305	{
306	#if SIMDPP_USE_AVX512VL
307	return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_EQ_OQ);
308	#else
309	return _mm256_cmp_ps(a.native(), b.native(), _CMP_EQ_OQ);
310	#endif
311	}
312	#endif
313
314	#if SIMDPP_USE_AVX512VL
315	static SIMDPP_INL
316	mask_float32<`8`> i_cmp_eq(const mask_float32<`8`>& a, const mask_float32<`8`>& b)
317	{
318	return _mm512_kxnor(a.native(), b.native());
319	}
320	#endif
321
322	#if SIMDPP_USE_AVX512F
323	static SIMDPP_INL
324	mask_float32<`16`> i_cmp_eq(const float32<`16`>& a, const float32<`16`>& b)
325	{
326	return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_EQ_OQ);
327	}
328
329	static SIMDPP_INL
330	mask_float32<`16`> i_cmp_eq(const mask_float32<`16`>& a, const mask_float32<`16`>& b)
331	{
332	return _mm512_kxnor(a.native(), b.native());
333	}
334	#endif
335
336	// -----------------------------------------------------------------------------
337
338	static SIMDPP_INL
339	mask_float64x2 i_cmp_eq(const float64x2& a, const float64x2& b)
340	{
341	#if SIMDPP_USE_AVX512VL
342	return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_EQ_OQ);
343	#elif SIMDPP_USE_AVX
344	return _mm_cmp_pd(a.native(), b.native(), _CMP_EQ_OQ);
345	#elif SIMDPP_USE_SSE2
346	return _mm_cmpeq_pd(a.native(), b.native());
347	#elif SIMDPP_USE_NEON64
348	return vreinterpretq_f64_u64(vceqq_f64(a.native(), b.native()));
349	#elif SIMDPP_USE_VSX_206
350	return (__vector double) vec_cmpeq(a.native(), b.native());
351	#elif SIMDPP_USE_MSA
352	return (v2f64) __msa_fceq_d(a.native(), b.native());
353	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
354	return detail::null::cmp_eq(a, b);
355	#else
356	return SIMDPP_NOT_IMPLEMENTED2(a, b);
357	#endif
358	}
359
360	#if SIMDPP_USE_AVX512VL
361	static SIMDPP_INL
362	mask_float64<`2`> i_cmp_eq(const mask_float64<`2`>& a, const mask_float64<`2`>& b)
363	{
364	return _mm512_kxnor(a.native(), b.native());
365	}
366	#endif
367
368
369	#if SIMDPP_USE_AVX
370	static SIMDPP_INL
371	mask_float64x4 i_cmp_eq(const float64x4& a, const float64x4& b)
372	{
373	#if SIMDPP_USE_AVX512VL
374	return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_EQ_OQ);
375	#else
376	return _mm256_cmp_pd(a.native(), b.native(), _CMP_EQ_OQ);
377	#endif
378	}
379	#endif
380
381	#if SIMDPP_USE_AVX512VL
382	static SIMDPP_INL
383	mask_float64<`4`> i_cmp_eq(const mask_float64<`4`>& a, const mask_float64<`4`>& b)
384	{
385	return _mm512_kxnor(a.native(), b.native());
386	}
387	#endif
388
389	#if SIMDPP_USE_AVX512F
390	static SIMDPP_INL
391	mask_float64<`8`> i_cmp_eq(const float64<`8`>& a, const float64<`8`>& b)
392	{
393	return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_EQ_OQ);
394	}
395
396	static SIMDPP_INL
397	mask_float64<`8`> i_cmp_eq(const mask_float64<`8`>& a, const mask_float64<`8`>& b)
398	{
399	return _mm512_kxnor(a.native(), b.native());
400	}
401	#endif
402
403	// -----------------------------------------------------------------------------
404
405	template<class V> SIMDPP_INL
406	typename V::mask_vector_type i_cmp_eq(const V& a, const V& b)
407	{
408	SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_eq, a, b);
409	}
410
411	} // namespace insn
412	} // namespace detail
413	} // namespace SIMDPP_ARCH_NAMESPACE
414	} // namespace simdpp
415
416	#endif
417
418

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/cmp_eq.h