1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_UNZIP_LO_H
9#define LIBSIMDPP_SIMDPP_CORE_UNZIP_LO_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/unzip_lo.h>
17
18namespace simdpp {
19namespace SIMDPP_ARCH_NAMESPACE {
20
21/** De-interleaves the odd(lower) elements of two int8x16 vectors
22
23 For example, in case of int8x16:
24
25 @code
26 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
27 r = [ a0 a2 a4 a6 a8 a10 a12 a14 b0 b2 b4 b6 b8 b10 b12 b14 ]
28 @endcode
29
30 @par int8
31
32 @par 128-bit version:
33 @icost{SSE2-AVX2, 4-5}
34 @icost{ALTIVEC, 1-2}
35
36 @par 256-bit version:
37 The lower and higher 128-bit halves are processed as if 128-bit instruction
38 was applied to each of them separately.
39
40 @icost{SSE2-AVX, 8-9}
41 @icost{NEON, 2}
42 @icost{AVX2, 4-5}
43 @icost{ALTIVEC, 2-3}
44
45 @par int16
46
47 @par 128-bit version:
48 @icost{SSE2-SSSE3, 5}
49 @icost{SSE4.1-AVX2, 4-5}
50 @icost{ALTIVEC, 1-2}
51
52 @par 256-bit version:
53 The lower and higher 128-bit halves are processed as if 128-bit instruction
54 was applied to each of them separately.
55
56 @icost{SSE2-SSSE3, 5}
57 @icost{SSE4.1-AVX, 8-9}
58 @icost{AVX2, 4-5}
59 @icost{NEON, 2}
60 @icost{ALTIVEC, 2-3}
61
62 @par int32
63
64 @par 128-bit version:
65 @icost{ALTIVEC, 1-2}
66
67 @par 256-bit version:
68 @icost{SSE2-AVX, NEON, 2}
69 @icost{ALTIVEC, 2-3}
70
71 The lower and higher 128-bit halves are processed as if 128-bit instruction
72 was applied to each of them separately.
73
74 @par int64
75
76 @par 256-bit version:
77 The lower and higher 128-bit halves are processed as if 128-bit instruction
78 was applied to each of them separately.
79
80 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
81
82 @par float32
83
84 @par 256-bit version:
85 The lower and higher 128-bit halves are processed as if 128-bit instruction
86 was applied to each of them separately.
87
88 @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
89
90 @par float64
91
92 @par 128-bit version:
93 @novec{NEON, ALTIVEC}
94
95 @par 256-bit version:
96 @icost{SSE2-AVX, 2}
97 @novec{NEON, ALTIVEC}
98
99 The lower and higher 128-bit halves are processed as if 128-bit instruction
100 was applied to each of them separately.
101*/
102template<unsigned N, class V1, class V2> SIMDPP_INL
103typename detail::get_expr2_nomask<V1, V2>::empty
104 unzip16_lo(const any_vec8<N,V1>& a, const any_vec8<N,V2>& b)
105{
106 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
107 ra = a.wrapped().eval();
108 rb = b.wrapped().eval();
109 return detail::insn::i_unzip16_lo(ra, rb);
110}
111
112template<unsigned N, class V1, class V2> SIMDPP_INL
113typename detail::get_expr2_nomask<V1, V2>::empty
114 unzip8_lo(const any_vec16<N,V1>& a, const any_vec16<N,V2>& b)
115{
116 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
117 ra = a.wrapped().eval();
118 rb = b.wrapped().eval();
119 return detail::insn::i_unzip8_lo(ra, rb);
120}
121
122template<unsigned N, class V1, class V2> SIMDPP_INL
123typename detail::get_expr2_nomask<V1, V2>::empty
124 unzip4_lo(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
125{
126 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
127 ra = a.wrapped().eval();
128 rb = b.wrapped().eval();
129 return detail::insn::i_unzip4_lo(ra, rb);
130}
131
132template<unsigned N, class V1, class V2> SIMDPP_INL
133typename detail::get_expr2_nomask<V1, V2>::empty
134 unzip2_lo(const any_vec64<N,V1>& a, const any_vec64<N,V2>& b)
135{
136 typename detail::get_expr2_nomask_nosign<V1, V2>::type ra, rb;
137 ra = a.wrapped().eval();
138 rb = b.wrapped().eval();
139 return detail::insn::i_unzip2_lo(ra, rb);
140}
141
142} // namespace SIMDPP_ARCH_NAMESPACE
143} // namespace simdpp
144
145#endif
146
147