1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_HI_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_UNZIP_HI_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/shuffle2.h>
17#include <simdpp/core/zip_hi.h>
18#include <simdpp/detail/null/shuffle.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25static SIMDPP_INL
26uint8x16 i_unzip16_hi(const uint8x16& ca, const uint8x16& cb)
27{
28 uint8<16> a = ca, b = cb;
29#if SIMDPP_USE_NULL
30 return detail::null::unzip16_hi(a, b);
31#elif SIMDPP_USE_SSE2
32 a = _mm_srai_epi16(a.native(), 8);
33 b = _mm_srai_epi16(b.native(), 8);
34 a = _mm_packs_epi16(a.native(), b.native());
35 return a;
36#elif SIMDPP_USE_NEON
37 return vuzpq_u8(a.native(), b.native()).val[1];
38#elif SIMDPP_USE_ALTIVEC
39#if SIMDPP_BIG_ENDIAN
40 return vec_pack((__vector uint16_t) a.native(),
41 (__vector uint16_t) b.native());
42#else
43 uint8x16 mask = make_shuffle_bytes16_mask<1,3,5,7,9,11,13,15,
44 17,19,21,23,25,27,29,31>(mask);
45 return shuffle_bytes16(a, b, mask);
46#endif
47#elif SIMDPP_USE_MSA
48 return (v16u8) __msa_pckod_b((v16i8) b.native(), (v16i8) a.native());
49#endif
50}
51
52#if SIMDPP_USE_AVX2
53static SIMDPP_INL
54uint8x32 i_unzip16_hi(const uint8x32& ca, const uint8x32& cb)
55{
56 uint8<32> a = ca, b = cb;
57 a = _mm256_srai_epi16(a.native(), 8);
58 b = _mm256_srai_epi16(b.native(), 8);
59 a = _mm256_packs_epi16(a.native(), b.native());
60 return a;
61}
62#endif
63
64#if SIMDPP_USE_AVX512BW
65SIMDPP_INL uint8<64> i_unzip16_hi(const uint8<64>& ca, const uint8<64>& cb)
66{
67 uint8<64> a = ca, b = cb;
68 a = _mm512_srai_epi16(a.native(), 8);
69 b = _mm512_srai_epi16(b.native(), 8);
70 a = _mm512_packs_epi16(a.native(), b.native());
71 return a;
72}
73#endif
74
75template<unsigned N> SIMDPP_INL
76uint8<N> i_unzip16_hi(const uint8<N>& a, const uint8<N>& b)
77{
78 SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_unzip16_hi, a, b)
79}
80
81// -----------------------------------------------------------------------------
82
83static SIMDPP_INL
84uint16x8 i_unzip8_hi(const uint16x8& ca, const uint16x8& cb)
85{
86 uint16<8> a = ca, b = cb;
87#if SIMDPP_USE_NULL
88 return detail::null::unzip8_hi(a, b);
89#elif SIMDPP_USE_SSE2
90 a = _mm_srai_epi32(a.native(), 16);
91 b = _mm_srai_epi32(b.native(), 16);
92 a = _mm_packs_epi32(a.native(), b.native());
93 return a;
94#elif SIMDPP_USE_NEON
95 return vuzpq_u16(a.native(), b.native()).val[1];
96#elif SIMDPP_USE_ALTIVEC
97#if SIMDPP_BIG_ENDIAN
98 return vec_pack((__vector uint32_t)a.native(), (__vector uint32_t)b.native());
99#else
100 uint16x8 mask = make_shuffle_bytes16_mask<1,3,5,7,9,11,13,15>(mask);
101 return shuffle_bytes16(a, b, mask);
102#endif
103#elif SIMDPP_USE_MSA
104 return (v8u16) __msa_pckod_h((v8i16) b.native(), (v8i16) a.native());
105#endif
106}
107
108#if SIMDPP_USE_AVX2
109static SIMDPP_INL
110uint16x16 i_unzip8_hi(const uint16x16& ca, const uint16x16& cb)
111{
112 uint16<16> a = ca, b = cb;
113 a = _mm256_srai_epi32(a.native(), 16);
114 b = _mm256_srai_epi32(b.native(), 16);
115 a = _mm256_packs_epi32(a.native(), b.native());
116 return a;
117}
118#endif
119
120#if SIMDPP_USE_AVX512BW
121SIMDPP_INL uint16<32> i_unzip8_hi(const uint16<32>& ca, const uint16<32>& cb)
122{
123 uint16<32> a = ca, b = cb;
124 a = _mm512_srai_epi32(a.native(), 16);
125 b = _mm512_srai_epi32(b.native(), 16);
126 a = _mm512_packs_epi32(a.native(), b.native());
127 return a;
128}
129#endif
130
131template<unsigned N> SIMDPP_INL
132uint16<N> i_unzip8_hi(const uint16<N>& a, const uint16<N>& b)
133{
134 SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_unzip8_hi, a, b)
135}
136
137// -----------------------------------------------------------------------------
138
139static SIMDPP_INL
140uint32x4 i_unzip4_hi(const uint32x4& a, const uint32x4& b)
141{
142#if SIMDPP_USE_NULL
143 return detail::null::unzip4_hi(a, b);
144#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC
145 return shuffle2<1,3,1,3>(a, b);
146#elif SIMDPP_USE_NEON
147 return vuzpq_u32(a.native(), b.native()).val[1];
148#elif SIMDPP_USE_MSA
149 return (v4u32) __msa_pckod_w((v4i32) b.native(), (v4i32) a.native());
150#endif
151}
152
153#if SIMDPP_USE_AVX2
154static SIMDPP_INL
155uint32x8 i_unzip4_hi(const uint32x8& a, const uint32x8& b)
156{
157 return shuffle2<1,3,1,3>(a, b);
158}
159#endif
160
161#if SIMDPP_USE_AVX512F
162static SIMDPP_INL
163uint32<16> i_unzip4_hi(const uint32<16>& a, const uint32<16>& b)
164{
165 return shuffle2<1,3,1,3>(a, b);
166}
167#endif
168
169template<unsigned N> SIMDPP_INL
170uint32<N> i_unzip4_hi(const uint32<N>& a, const uint32<N>& b)
171{
172 SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_unzip4_hi, a, b)
173}
174
175// -----------------------------------------------------------------------------
176
177template<unsigned N> SIMDPP_INL
178uint64<N> i_unzip2_hi(const uint64<N>& a, const uint64<N>& b)
179{
180 return i_zip2_hi(a, b);
181}
182
183// -----------------------------------------------------------------------------
184
185static SIMDPP_INL
186float32x4 i_unzip4_hi(const float32x4& a, const float32x4& b)
187{
188#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
189 return detail::null::unzip4_hi(a, b);
190#elif SIMDPP_USE_SSE2
191 return shuffle2<1,3,1,3>(a,b);
192#elif SIMDPP_USE_NEON
193 return vuzpq_f32(a.native(), b.native()).val[1];
194#elif SIMDPP_USE_ALTIVEC
195 return float32x4(i_unzip4_hi((uint32x4)a, (uint32x4)b));
196#elif SIMDPP_USE_MSA
197 return (v4f32) __msa_pckod_w((v4i32) b.native(), (v4i32) a.native());
198#endif
199}
200
201#if SIMDPP_USE_AVX
202static SIMDPP_INL
203float32x8 i_unzip4_hi(const float32x8& a, const float32x8& b)
204{
205 return shuffle2<1,3,1,3>(a, b);
206}
207#endif
208
209#if SIMDPP_USE_AVX512F
210static SIMDPP_INL
211float32<16> i_unzip4_hi(const float32<16>& a, const float32<16>& b)
212{
213 return shuffle2<1,3,1,3>(a, b);
214}
215#endif
216
217template<unsigned N> SIMDPP_INL
218float32<N> i_unzip4_hi(const float32<N>& a, const float32<N>& b)
219{
220 SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_unzip4_hi, a, b)
221}
222
223// -----------------------------------------------------------------------------
224
225template<unsigned N> SIMDPP_INL
226float64<N> i_unzip2_hi(const float64<N>& a, const float64<N>& b)
227{
228 return i_zip2_hi(a, b);
229}
230
231} // namespace insn
232} // namespace detail
233} // namespace SIMDPP_ARCH_NAMESPACE
234} // namespace simdpp
235
236#endif
237
238