unzip_lo.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/unzip_lo.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_LO_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_LO_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_and.h>
17	#include <simdpp/core/shuffle2.h>
18	#include <simdpp/detail/insn/zip_lo.h>
19	#include <simdpp/detail/null/shuffle.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26
27	static SIMDPP_INL
28	uint8x16 i_unzip16_lo(const uint8x16& ca, const uint8x16& cb)
29	{
30	uint8<`16`> a = ca, b = cb;
31	#if SIMDPP_USE_NULL
32	return detail::null::unzip16_lo(a, b);
33	#elif SIMDPP_USE_SSE2
34	uint16x8 mask, r;
35	mask = make_ones();
36	mask = _mm_srli_epi16(mask.native(), `8`);
37	a = bit_and(a, mask);
38	b = bit_and(b, mask);
39	r = _mm_packus_epi16(a.native(), b.native());
40	return (uint8x16)r;
41	#elif SIMDPP_USE_NEON
42	return vuzpq_u8(a.native(), b.native()).val[`0`];
43	#elif SIMDPP_USE_ALTIVEC
44	#if SIMDPP_BIG_ENDIAN
45	uint8x16 mask = make_shuffle_bytes16_mask<`0`,`2`,`4`,`6`,`8`,`10`,`12`,`14`,
46	`16`,`18`,`20`,`22`,`24`,`26`,`28`,`30`>(mask);
47	return shuffle_bytes16(a, b, mask);
48	#else
49	return vec_pack((__vector uint16_t) a.native(),
50	(__vector uint16_t) b.native());
51	#endif
52	#elif SIMDPP_USE_MSA
53	return (v16u8) __msa_pckev_b((v16i8) b.native(), (v16i8) a.native());
54	#endif
55	}
56
57	#if SIMDPP_USE_AVX2
58	static SIMDPP_INL
59	uint8x32 i_unzip16_lo(const uint8x32& ca, const uint8x32& cb)
60	{
61	uint8<`32`> a = ca, b = cb;
62	uint16x16 mask, r;
63	mask = make_ones();
64	mask = _mm256_srli_epi16(mask.native(), `8`);
65	a = bit_and(a, mask);
66	b = bit_and(b, mask);
67	r = _mm256_packus_epi16(a.native(), b.native());
68	return uint8x32(r);
69	}
70	#endif
71
72	#if SIMDPP_USE_AVX512BW
73	SIMDPP_INL uint8<`64`> i_unzip16_lo(const uint8<`64`>& ca, const uint8<`64`>& cb)
74	{
75	uint8<`64`> a = ca, b = cb;
76	uint16<`32`> mask, r;
77	mask = make_ones();
78	mask = _mm512_srli_epi16(mask.native(), `8`);
79	a = bit_and(a, mask);
80	b = bit_and(b, mask);
81	r = _mm512_packus_epi16(a.native(), b.native());
82	return uint8<`64`>(r);
83	}
84	#endif
85
86	template<unsigned N> SIMDPP_INL
87	uint8<N> i_unzip16_lo(const uint8<N>& a, const uint8<N>& b)
88	{
89	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_unzip16_lo, a, b);
90	}
91
92	// -----------------------------------------------------------------------------
93
94	static SIMDPP_INL
95	uint16x8 i_unzip8_lo(const uint16x8& ca, const uint16x8& cb)
96	{
97	uint16<`8`> a = ca, b = cb;
98	#if SIMDPP_USE_NULL
99	return detail::null::unzip8_lo(a, b);
100	#elif SIMDPP_USE_SSE4_1
101	uint32x4 mask, r;
102	mask = make_ones();
103	mask = _mm_srli_epi32(mask.native(), `16`);
104	a = bit_and(a, mask);
105	b = bit_and(b, mask);
106	r = _mm_packus_epi32(a.native(), b.native());
107	return uint16x8 (r);
108	#elif SIMDPP_USE_SSE2
109	uint32x4 r;
110	a = _mm_slli_epi32(a.native(), `16`);
111	b = _mm_slli_epi32(b.native(), `16`);
112	a = _mm_srai_epi32(a.native(), `16`);
113	b = _mm_srai_epi32(b.native(), `16`);
114	r = _mm_packs_epi32(a.native(), b.native());
115	return uint16x8(r);
116	#elif SIMDPP_USE_NEON
117	return vuzpq_u16(a.native(), b.native()).val[`0`];
118	#elif SIMDPP_USE_ALTIVEC
119	#if SIMDPP_BIG_ENDIAN
120	uint16x8 mask = make_shuffle_bytes16_mask<`0`,`2`,`4`,`6`,`8`,`10`,`12`,`14`>(mask);
121	return shuffle_bytes16(a, b, mask);
122	#else
123	return vec_pack((__vector uint32_t) a.native(),
124	(__vector uint32_t) b.native());
125	#endif
126	#elif SIMDPP_USE_MSA
127	return (v8u16) __msa_pckev_h((v8i16) b.native(), (v8i16) a.native());
128	#endif
129	}
130
131	#if SIMDPP_USE_AVX2
132	static SIMDPP_INL
133	uint16x16 i_unzip8_lo(const uint16x16& ca, const uint16x16& cb)
134	{
135	uint16<`16`> a = ca, b = cb;
136	uint32x8 mask, r;
137	mask = make_ones();
138	mask = _mm256_srli_epi32(mask.native(), `16`);
139	a = bit_and(a, mask);
140	b = bit_and(b, mask);
141	r = _mm256_packus_epi32(a.native(), b.native());
142	return uint16x16(r);
143	}
144	#endif
145
146	#if SIMDPP_USE_AVX512BW
147	SIMDPP_INL uint16<`32`> i_unzip8_lo(const uint16<`32`>& ca, const uint16<`32`>& cb)
148	{
149	uint16<`32`> a = ca, b = cb;
150	uint32<`16`> mask, r;
151	mask = make_ones();
152	mask = _mm512_srli_epi32(mask.native(), `16`);
153	a = bit_and(a, mask);
154	b = bit_and(b, mask);
155	r = _mm512_packus_epi32(a.native(), b.native());
156	return uint16<`32`>(r);
157	}
158	#endif
159
160	template<unsigned N> SIMDPP_INL
161	uint16<N> i_unzip8_lo(const uint16<N>& a, const uint16<N>& b)
162	{
163	SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_unzip8_lo, a, b);
164	}
165
166	// -----------------------------------------------------------------------------
167
168	static SIMDPP_INL
169	uint32x4 i_unzip4_lo(const uint32x4& a, const uint32x4& b)
170	{
171	#if SIMDPP_USE_NULL
172	return detail::null::unzip4_lo(a, b);
173	#elif SIMDPP_USE_SSE2
174	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
175	#elif SIMDPP_USE_NEON
176	return vuzpq_u32(a.native(), b.native()).val[`0`];
177	#elif SIMDPP_USE_ALTIVEC
178	uint32x4 mask = make_shuffle_bytes16_mask<`0`,`2`,`4`,`6`>(mask);
179	return shuffle_bytes16(a, b, mask);
180	#elif SIMDPP_USE_MSA
181	return (v4u32) __msa_pckev_w((v4i32) b.native(), (v4i32) a.native());
182	#endif
183	}
184
185	#if SIMDPP_USE_AVX2
186	static SIMDPP_INL
187	uint32x8 i_unzip4_lo(const uint32x8& a, const uint32x8& b)
188	{
189	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
190	}
191	#endif
192
193	#if SIMDPP_USE_AVX512F
194	static SIMDPP_INL
195	uint32<`16`> i_unzip4_lo(const uint32<`16`>& a, const uint32<`16`>& b)
196	{
197	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
198	}
199	#endif
200
201	template<unsigned N> SIMDPP_INL
202	uint32<N> i_unzip4_lo(const uint32<N>& a, const uint32<N>& b)
203	{
204	SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_unzip4_lo, a, b);
205	}
206
207	// -----------------------------------------------------------------------------
208
209	template<unsigned N> SIMDPP_INL
210	uint64<N> i_unzip2_lo(const uint64<N>& a, const uint64<N>& b)
211	{
212	return zip2_lo(a, b);
213	}
214
215	// -----------------------------------------------------------------------------
216
217	static SIMDPP_INL
218	float32x4 i_unzip4_lo(const float32x4& a, const float32x4& b)
219	{
220	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
221	return detail::null::unzip4_lo(a, b);
222	#elif SIMDPP_USE_SSE2
223	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
224	#elif SIMDPP_USE_NEON
225	return vuzpq_f32(a.native(), b.native()).val[`0`];
226	#elif SIMDPP_USE_ALTIVEC
227	uint32x4 mask = make_shuffle_bytes16_mask<`0`,`2`,`4`,`6`>(mask);
228	return shuffle_bytes16(a, b, mask);
229	#elif SIMDPP_USE_MSA
230	return (v4f32) __msa_pckev_w((v4i32) b.native(), (v4i32) a.native());
231	#endif
232	}
233
234	#if SIMDPP_USE_AVX
235	static SIMDPP_INL
236	float32x8 i_unzip4_lo(const float32x8& a, const float32x8& b)
237	{
238	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
239	}
240	#endif
241
242	#if SIMDPP_USE_AVX512F
243	static SIMDPP_INL
244	float32<`16`> i_unzip4_lo(const float32<`16`>& a, const float32<`16`>& b)
245	{
246	return shuffle2<`0`,`2`,`0`,`2`>(a,b);
247	}
248	#endif
249
250	template<unsigned N> SIMDPP_INL
251	float32<N> i_unzip4_lo(const float32<N>& a, const float32<N>& b)
252	{
253	SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_unzip4_lo, a, b);
254	}
255
256	// -----------------------------------------------------------------------------
257
258	template<unsigned N> SIMDPP_INL
259	float64<N> i_unzip2_lo(const float64<N>& a, const float64<N>& b)
260	{
261	return i_zip2_lo(a, b);
262	}
263
264	} // namespace insn
265	} // namespace detail
266	} // namespace SIMDPP_ARCH_NAMESPACE
267	} // namespace simdpp
268
269	#endif
270
271

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/unzip_lo.h