unzip_hi.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/unzip_hi.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_HI_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_HI_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/shuffle2.h>
17	#include <simdpp/core/zip_hi.h>
18	#include <simdpp/detail/null/shuffle.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	static SIMDPP_INL
26	uint8x16 i_unzip16_hi(const uint8x16& ca, const uint8x16& cb)
27	{
28	uint8<`16`> a = ca, b = cb;
29	#if SIMDPP_USE_NULL
30	return detail::null::unzip16_hi(a, b);
31	#elif SIMDPP_USE_SSE2
32	a = _mm_srai_epi16(a.native(), `8`);
33	b = _mm_srai_epi16(b.native(), `8`);
34	a = _mm_packs_epi16(a.native(), b.native());
35	return a;
36	#elif SIMDPP_USE_NEON
37	return vuzpq_u8(a.native(), b.native()).val[`1`];
38	#elif SIMDPP_USE_ALTIVEC
39	#if SIMDPP_BIG_ENDIAN
40	return vec_pack((__vector uint16_t) a.native(),
41	(__vector uint16_t) b.native());
42	#else
43	uint8x16 mask = make_shuffle_bytes16_mask<`1`,`3`,`5`,`7`,`9`,`11`,`13`,`15`,
44	`17`,`19`,`21`,`23`,`25`,`27`,`29`,`31`>(mask);
45	return shuffle_bytes16(a, b, mask);
46	#endif
47	#elif SIMDPP_USE_MSA
48	return (v16u8) __msa_pckod_b((v16i8) b.native(), (v16i8) a.native());
49	#endif
50	}
51
52	#if SIMDPP_USE_AVX2
53	static SIMDPP_INL
54	uint8x32 i_unzip16_hi(const uint8x32& ca, const uint8x32& cb)
55	{
56	uint8<`32`> a = ca, b = cb;
57	a = _mm256_srai_epi16(a.native(), `8`);
58	b = _mm256_srai_epi16(b.native(), `8`);
59	a = _mm256_packs_epi16(a.native(), b.native());
60	return a;
61	}
62	#endif
63
64	#if SIMDPP_USE_AVX512BW
65	SIMDPP_INL uint8<`64`> i_unzip16_hi(const uint8<`64`>& ca, const uint8<`64`>& cb)
66	{
67	uint8<`64`> a = ca, b = cb;
68	a = _mm512_srai_epi16(a.native(), `8`);
69	b = _mm512_srai_epi16(b.native(), `8`);
70	a = _mm512_packs_epi16(a.native(), b.native());
71	return a;
72	}
73	#endif
74
75	template<unsigned N> SIMDPP_INL
76	uint8<N> i_unzip16_hi(const uint8<N>& a, const uint8<N>& b)
77	{
78	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_unzip16_hi, a, b)
79	}
80
81	// -----------------------------------------------------------------------------
82
83	static SIMDPP_INL
84	uint16x8 i_unzip8_hi(const uint16x8& ca, const uint16x8& cb)
85	{
86	uint16<`8`> a = ca, b = cb;
87	#if SIMDPP_USE_NULL
88	return detail::null::unzip8_hi(a, b);
89	#elif SIMDPP_USE_SSE2
90	a = _mm_srai_epi32(a.native(), `16`);
91	b = _mm_srai_epi32(b.native(), `16`);
92	a = _mm_packs_epi32(a.native(), b.native());
93	return a;
94	#elif SIMDPP_USE_NEON
95	return vuzpq_u16(a.native(), b.native()).val[`1`];
96	#elif SIMDPP_USE_ALTIVEC
97	#if SIMDPP_BIG_ENDIAN
98	return vec_pack((__vector uint32_t)a.native(), (__vector uint32_t)b.native());
99	#else
100	uint16x8 mask = make_shuffle_bytes16_mask<`1`,`3`,`5`,`7`,`9`,`11`,`13`,`15`>(mask);
101	return shuffle_bytes16(a, b, mask);
102	#endif
103	#elif SIMDPP_USE_MSA
104	return (v8u16) __msa_pckod_h((v8i16) b.native(), (v8i16) a.native());
105	#endif
106	}
107
108	#if SIMDPP_USE_AVX2
109	static SIMDPP_INL
110	uint16x16 i_unzip8_hi(const uint16x16& ca, const uint16x16& cb)
111	{
112	uint16<`16`> a = ca, b = cb;
113	a = _mm256_srai_epi32(a.native(), `16`);
114	b = _mm256_srai_epi32(b.native(), `16`);
115	a = _mm256_packs_epi32(a.native(), b.native());
116	return a;
117	}
118	#endif
119
120	#if SIMDPP_USE_AVX512BW
121	SIMDPP_INL uint16<`32`> i_unzip8_hi(const uint16<`32`>& ca, const uint16<`32`>& cb)
122	{
123	uint16<`32`> a = ca, b = cb;
124	a = _mm512_srai_epi32(a.native(), `16`);
125	b = _mm512_srai_epi32(b.native(), `16`);
126	a = _mm512_packs_epi32(a.native(), b.native());
127	return a;
128	}
129	#endif
130
131	template<unsigned N> SIMDPP_INL
132	uint16<N> i_unzip8_hi(const uint16<N>& a, const uint16<N>& b)
133	{
134	SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_unzip8_hi, a, b)
135	}
136
137	// -----------------------------------------------------------------------------
138
139	static SIMDPP_INL
140	uint32x4 i_unzip4_hi(const uint32x4& a, const uint32x4& b)
141	{
142	#if SIMDPP_USE_NULL
143	return detail::null::unzip4_hi(a, b);
144	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
145	return shuffle2<`1`,`3`,`1`,`3`>(a, b);
146	#elif SIMDPP_USE_NEON
147	return vuzpq_u32(a.native(), b.native()).val[`1`];
148	#elif SIMDPP_USE_MSA
149	return (v4u32) __msa_pckod_w((v4i32) b.native(), (v4i32) a.native());
150	#endif
151	}
152
153	#if SIMDPP_USE_AVX2
154	static SIMDPP_INL
155	uint32x8 i_unzip4_hi(const uint32x8& a, const uint32x8& b)
156	{
157	return shuffle2<`1`,`3`,`1`,`3`>(a, b);
158	}
159	#endif
160
161	#if SIMDPP_USE_AVX512F
162	static SIMDPP_INL
163	uint32<`16`> i_unzip4_hi(const uint32<`16`>& a, const uint32<`16`>& b)
164	{
165	return shuffle2<`1`,`3`,`1`,`3`>(a, b);
166	}
167	#endif
168
169	template<unsigned N> SIMDPP_INL
170	uint32<N> i_unzip4_hi(const uint32<N>& a, const uint32<N>& b)
171	{
172	SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_unzip4_hi, a, b)
173	}
174
175	// -----------------------------------------------------------------------------
176
177	template<unsigned N> SIMDPP_INL
178	uint64<N> i_unzip2_hi(const uint64<N>& a, const uint64<N>& b)
179	{
180	return i_zip2_hi(a, b);
181	}
182
183	// -----------------------------------------------------------------------------
184
185	static SIMDPP_INL
186	float32x4 i_unzip4_hi(const float32x4& a, const float32x4& b)
187	{
188	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
189	return detail::null::unzip4_hi(a, b);
190	#elif SIMDPP_USE_SSE2
191	return shuffle2<`1`,`3`,`1`,`3`>(a,b);
192	#elif SIMDPP_USE_NEON
193	return vuzpq_f32(a.native(), b.native()).val[`1`];
194	#elif SIMDPP_USE_ALTIVEC
195	return float32x4(i_unzip4_hi((uint32x4)a, (uint32x4)b));
196	#elif SIMDPP_USE_MSA
197	return (v4f32) __msa_pckod_w((v4i32) b.native(), (v4i32) a.native());
198	#endif
199	}
200
201	#if SIMDPP_USE_AVX
202	static SIMDPP_INL
203	float32x8 i_unzip4_hi(const float32x8& a, const float32x8& b)
204	{
205	return shuffle2<`1`,`3`,`1`,`3`>(a, b);
206	}
207	#endif
208
209	#if SIMDPP_USE_AVX512F
210	static SIMDPP_INL
211	float32<`16`> i_unzip4_hi(const float32<`16`>& a, const float32<`16`>& b)
212	{
213	return shuffle2<`1`,`3`,`1`,`3`>(a, b);
214	}
215	#endif
216
217	template<unsigned N> SIMDPP_INL
218	float32<N> i_unzip4_hi(const float32<N>& a, const float32<N>& b)
219	{
220	SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_unzip4_hi, a, b)
221	}
222
223	// -----------------------------------------------------------------------------
224
225	template<unsigned N> SIMDPP_INL
226	float64<N> i_unzip2_hi(const float64<N>& a, const float64<N>& b)
227	{
228	return i_zip2_hi(a, b);
229	}
230
231	} // namespace insn
232	} // namespace detail
233	} // namespace SIMDPP_ARCH_NAMESPACE
234	} // namespace simdpp
235
236	#endif
237
238

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/unzip_hi.h