1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_UNZIP_HI_H
9#define LIBSIMDPP_SIMDPP_CORE_UNZIP_HI_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/unzip_hi.h>
17
18namespace simdpp {
19namespace SIMDPP_ARCH_NAMESPACE {
20
21/** De-interleaves the even(higher) elements of two vectors
22
23 For example, in case of int8x16:
24
25 @code
26 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
27 r = [ a1 a3 a5 a7 a9 a11 a13 a15 b1 b3 b5 b7 b9 b11 b13 b15 ]
28 @endcode
29
30 @par int8
31
32 @par 128-bit version:
33 @icost{SSE2-AVX2, 3}
34
35 @par 256-bit version:
36 The lower and higher 128-bit halves are processed as if 128-bit instruction
37 was applied to each of them separately.
38
39 @icost{SSE2-AVX, 6}
40 @icost{NEON, ALTIVEC, 2}
41 @icost{AVX2, 3}
42
43 @par int16
44
45 @par 128-bit version:
46 @icost{SSE2-AVX2, 3}
47
48 @par 256-bit version:
49 The lower and higher 128-bit halves are processed as if 128-bit instruction
50 was applied to each of them separately.
51
52 @icost{SSE2-AVX, 6}
53 @icost{NEON, ALTIVEC, 2}
54 @icost{AVX2, 3}
55
56 @par int32
57
58 @par 128-bit version:
59 @icost{ALTIVEC, 1-2}
60
61 @par 256-bit version:
62 The lower and higher 128-bit halves are processed as if 128-bit instruction
63 was applied to each of them separately.
64
65 @icost{ALTIVEC, 2-3}
66 @icost{SSE2-AVX, NEON, 2}
67
68 @par int64
69
70 @par 128-bit version:
71 @icost{ALTIVEC, 1-2}
72
73 @par 256-bit version:
74 The lower and higher 128-bit halves are processed as if 128-bit instruction
75 was applied to each of them separately.
76
77 @icost{ALTIVEC, 2-3}
78 @icost{SSE2-AVX, NEON, 2}
79
80 @par float32
81 @par 256-bit version:
82 The lower and higher 128-bit halves are processed as if 128-bit instruction
83 was applied to each of them separately.
84
85 @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
86
87 @par float64
88 @par 128-bit version:
89 @novec{NEON, ALTIVEC}
90
91 @par 256-bit version:
92 The lower and higher 128-bit halves are processed as if 128-bit instruction
93 was applied to each of them separately.
94
95 @novec{NEON, ALTIVEC}
96 @icost{SSE2-AVX, 2}
97*/
98template<unsigned N, class V1, class V2> SIMDPP_INL
99typename detail::get_expr2_nomask<V1, V2>::empty
100 unzip16_hi(const any_vec8<N,V1>& a, const any_vec8<N,V2>& b)
101{
102 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
103 ra = a.wrapped().eval();
104 rb = b.wrapped().eval();
105 return detail::insn::i_unzip16_hi(ra, rb);
106}
107
108template<unsigned N, class V1, class V2> SIMDPP_INL
109typename detail::get_expr2_nomask<V1, V2>::empty
110 unzip8_hi(const any_vec16<N,V1>& a, const any_vec16<N,V2>& b)
111{
112 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
113 ra = a.wrapped().eval();
114 rb = b.wrapped().eval();
115 return detail::insn::i_unzip8_hi(ra, rb);
116}
117
118template<unsigned N, class V1, class V2> SIMDPP_INL
119typename detail::get_expr2_nomask<V1, V2>::empty
120 unzip4_hi(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
121{
122 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
123 ra = a.wrapped().eval();
124 rb = b.wrapped().eval();
125 return detail::insn::i_unzip4_hi(ra, rb);
126}
127
128template<unsigned N, class V1, class V2> SIMDPP_INL
129typename detail::get_expr2_nomask<V1, V2>::empty
130 unzip2_hi(const any_vec64<N,V1>& a, const any_vec64<N,V2>& b)
131{
132 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
133 ra = a.wrapped().eval();
134 rb = b.wrapped().eval();
135 return detail::insn::i_unzip2_hi(ra, rb);
136}
137
138} // namespace SIMDPP_ARCH_NAMESPACE
139} // namespace simdpp
140
141#endif
142
143