zip_lo.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/zip_lo.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_ZIP_LO_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_ZIP_LO_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/shuffle_bytes16.h>
17	#include <simdpp/detail/null/shuffle.h>
18	#include <simdpp/detail/neon/shuffle.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	static SIMDPP_INL
26	uint8x16 i_zip16_lo(const uint8x16& a, const uint8x16& b)
27	{
28	#if SIMDPP_USE_NULL
29	return detail::null::zip16_lo(a, b);
30	#elif SIMDPP_USE_SSE2
31	return _mm_unpacklo_epi8(a.native(), b.native());
32	#elif SIMDPP_USE_NEON
33	// the compiler will optimize multiple vzip instructions if both zip_lo
34	// and zip_hi are used on the same arguments
35	return vzipq_u8(a.native(), b.native()).val[`0`];
36	#elif SIMDPP_USE_ALTIVEC
37	return vec_mergeh(a.native(), b.native());
38	#elif SIMDPP_USE_MSA
39	return (v16u8) __msa_ilvr_b((v16i8)b.native(), (v16i8)a.native());
40	#endif
41	}
42
43	#if SIMDPP_USE_AVX2
44	static SIMDPP_INL
45	uint8x32 i_zip16_lo(const uint8x32& a, const uint8x32& b)
46	{
47	return _mm256_unpacklo_epi8(a.native(), b.native());
48	}
49	#endif
50
51	#if SIMDPP_USE_AVX512BW
52	SIMDPP_INL uint8<`64`> i_zip16_lo(const uint8<`64`>& a, const uint8<`64`>& b)
53	{
54	return _mm512_unpacklo_epi8(a.native(), b.native());
55	}
56	#endif
57
58	template<unsigned N> SIMDPP_INL
59	uint8<N> i_zip16_lo(const uint8<N>& a, const uint8<N>& b)
60	{
61	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_zip16_lo, a, b)
62	}
63
64	// -----------------------------------------------------------------------------
65
66	static SIMDPP_INL
67	uint16x8 i_zip8_lo(const uint16x8& a, const uint16x8& b)
68	{
69	#if SIMDPP_USE_NULL
70	return detail::null::zip8_lo(a, b);
71	#elif SIMDPP_USE_SSE2
72	return _mm_unpacklo_epi16(a.native(), b.native());
73	#elif SIMDPP_USE_NEON
74	return vzipq_u16(a.native(), b.native()).val[`0`];
75	#elif SIMDPP_USE_ALTIVEC
76	return vec_mergeh(a.native(), b.native());
77	#elif SIMDPP_USE_MSA
78	return (v8u16) __msa_ilvr_h((v8i16)b.native(), (v8i16)a.native());
79	#endif
80	}
81
82	#if SIMDPP_USE_AVX2
83	static SIMDPP_INL
84	uint16x16 i_zip8_lo(const uint16x16& a, const uint16x16& b)
85	{
86	return _mm256_unpacklo_epi16(a.native(), b.native());
87	}
88	#endif
89
90	#if SIMDPP_USE_AVX512BW
91	SIMDPP_INL uint16<`32`> i_zip8_lo(const uint16<`32`>& a, const uint16<`32`>& b)
92	{
93	return _mm512_unpacklo_epi16(a.native(), b.native());
94	}
95	#endif
96
97	template<unsigned N> SIMDPP_INL
98	uint16<N> i_zip8_lo(const uint16<N>& a, const uint16<N>& b)
99	{
100	SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_zip8_lo, a, b)
101	}
102
103	// -----------------------------------------------------------------------------
104
105	static SIMDPP_INL
106	uint32x4 i_zip4_lo(const uint32x4& a, const uint32x4& b)
107	{
108	#if SIMDPP_USE_NULL
109	return detail::null::zip4_lo(a, b);
110	#elif SIMDPP_USE_SSE2
111	return _mm_unpacklo_epi32(a.native(), b.native());
112	#elif SIMDPP_USE_NEON
113	return vzipq_u32(a.native(), b.native()).val[`0`];
114	#elif SIMDPP_USE_ALTIVEC
115	return vec_mergeh(a.native(), b.native());
116	#elif SIMDPP_USE_MSA
117	return (v4u32) __msa_ilvr_w((v4i32)b.native(), (v4i32)a.native());
118	#endif
119	}
120
121	#if SIMDPP_USE_AVX2
122	static SIMDPP_INL
123	uint32x8 i_zip4_lo(const uint32x8& a, const uint32x8& b)
124	{
125	return _mm256_unpacklo_epi32(a.native(), b.native());
126	}
127	#endif
128
129	#if SIMDPP_USE_AVX512F
130	static SIMDPP_INL
131	uint32<`16`> i_zip4_lo(const uint32<`16`>& a, const uint32<`16`>& b)
132	{
133	return _mm512_unpacklo_epi32(a.native(), b.native());
134	}
135	#endif
136
137	template<unsigned N> SIMDPP_INL
138	uint32<N> i_zip4_lo(const uint32<N>& a, const uint32<N>& b)
139	{
140	SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_zip4_lo, a, b)
141	}
142
143	// -----------------------------------------------------------------------------
144
145	static SIMDPP_INL
146	uint64x2 i_zip2_lo(const uint64x2& a, const uint64x2& b)
147	{
148	#if SIMDPP_USE_SSE2
149	return _mm_unpacklo_epi64(a.native(), b.native());
150	#elif SIMDPP_USE_NEON
151	return neon::zip2_lo(a, b);
152	#elif SIMDPP_USE_VSX_207
153	return vec_mergeh(a.native(), b.native());
154	#elif SIMDPP_USE_MSA
155	return (v2u64) __msa_ilvr_d((v2i64) b.native(), (v2i64) a.native());
156	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
157	return detail::null::zip2_lo(a, b);
158	#endif
159	}
160
161	#if SIMDPP_USE_AVX2
162	static SIMDPP_INL
163	uint64x4 i_zip2_lo(const uint64x4& a, const uint64x4& b)
164	{
165	return _mm256_unpacklo_epi64(a.native(), b.native());
166	}
167	#endif
168
169	#if SIMDPP_USE_AVX512F
170	static SIMDPP_INL
171	uint64<`8`> i_zip2_lo(const uint64<`8`>& a, const uint64<`8`>& b)
172	{
173	return _mm512_unpacklo_epi64(a.native(), b.native());
174	}
175	#endif
176
177	template<unsigned N> SIMDPP_INL
178	uint64<N> i_zip2_lo(const uint64<N>& a, const uint64<N>& b)
179	{
180	SIMDPP_VEC_ARRAY_IMPL2(uint64<N>, i_zip2_lo, a, b)
181	}
182
183	// -----------------------------------------------------------------------------
184
185	static SIMDPP_INL
186	float32x4 i_zip4_lo(const float32x4& a, const float32x4& b)
187	{
188	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
189	return detail::null::zip4_lo(a, b);
190	#elif SIMDPP_USE_SSE2
191	return _mm_unpacklo_ps(a.native(), b.native());
192	#elif SIMDPP_USE_NEON
193	return vzipq_f32(a.native(), b.native()).val[`0`];
194	#elif SIMDPP_USE_ALTIVEC
195	return vec_mergeh(a.native(), b.native());
196	#elif SIMDPP_USE_MSA
197	return (v4f32) __msa_ilvr_w((v4i32) b.native(), (v4i32) a.native());
198	#endif
199	}
200
201	#if SIMDPP_USE_AVX
202	static SIMDPP_INL
203	float32x8 i_zip4_lo(const float32x8& a, const float32x8& b)
204	{
205	return _mm256_unpacklo_ps(a.native(), b.native());
206	}
207	#endif
208
209	#if SIMDPP_USE_AVX512F
210	static SIMDPP_INL
211	float32<`16`> i_zip4_lo(const float32<`16`>& a, const float32<`16`>& b)
212	{
213	return _mm512_unpacklo_ps(a.native(), b.native());
214	}
215	#endif
216
217	template<unsigned N> SIMDPP_INL
218	float32<N> i_zip4_lo(const float32<N>& a, const float32<N>& b)
219	{
220	SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_zip4_lo, a, b)
221	}
222
223	// -----------------------------------------------------------------------------
224
225	static SIMDPP_INL
226	float64x2 i_zip2_lo(const float64x2& a, const float64x2& b)
227	{
228	#if SIMDPP_USE_SSE2
229	return _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a.native()),
230	_mm_castpd_ps(b.native())));
231	#elif SIMDPP_USE_NEON64
232	return vtrn1q_f64(a.native(), b.native());
233	#elif SIMDPP_USE_VSX_206
234	return (__vector double) vec_mergeh((__vector uint64_t)a.native(),
235	(__vector uint64_t)b.native());
236	#elif SIMDPP_USE_MSA
237	return (v2f64) __msa_ilvr_d((v2i64) b.native(), (v2i64) a.native());
238	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_NEON
239	return detail::null::zip2_lo(a, b);
240	#endif
241	}
242
243	#if SIMDPP_USE_AVX
244	static SIMDPP_INL
245	float64x4 i_zip2_lo(const float64x4& a, const float64x4& b)
246	{
247	return _mm256_unpacklo_pd(a.native(), b.native());
248	}
249	#endif
250
251	#if SIMDPP_USE_AVX512F
252	static SIMDPP_INL
253	float64<`8`> i_zip2_lo(const float64<`8`>& a, const float64<`8`>& b)
254	{
255	return _mm512_unpacklo_pd(a.native(), b.native());
256	}
257	#endif
258
259	template<unsigned N> SIMDPP_INL
260	float64<N> i_zip2_lo(const float64<N>& a, const float64<N>& b)
261	{
262	SIMDPP_VEC_ARRAY_IMPL2(float64<N>, i_zip2_lo, a, b)
263	}
264
265
266	} // namespace insn
267	} // namespace detail
268	} // namespace SIMDPP_ARCH_NAMESPACE
269	} // namespace simdpp
270
271	#endif
272

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/zip_lo.h