1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE2_H
9#define LIBSIMDPP_SIMDPP_CORE_PERMUTE2_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/permute2.h>
17#include <simdpp/detail/get_expr.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21
22/** Permutes the 16-bit values within sets of two consecutive elements of the
23 vector. The selector values must be in range [0; 1].
24
25 @code
26 r0 = a[s0]
27 r1 = a[s1]
28 r2 = a[s0+2]
29 r3 = a[s1+2]
30 r4 = a[s0+4]
31 r5 = a[s1+4]
32 ...
33 @endcode
34
35 @par: 128-bit version:
36 @icost{SSE2-AVX2, 2}
37 @icost{NEON, ALTIVEC, 1-2}
38
39 @par: 256-bit version:
40 @icost{SSE2-AVX, 4}
41 @icost{AVX2, 2}
42 @icost{NEON, 2-4}
43 @icost{ALTIVEC, 2-3}
44*/
45template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
46typename detail::get_expr_nomask<V>::empty
47 permute2(const any_vec16<N,V>& a)
48{
49 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
50 typename detail::get_expr_nomask<V>::type ra;
51 ra = a.wrapped().eval();
52 return detail::insn::i_permute2<s0,s1>(ra);
53}
54
55/** Permutes the values of each set of four consecutive 32-bit values. The
56 selector values must be in range [0; 1].
57
58 @code
59 r0 = a[s0]
60 r1 = a[s1]
61 r2 = a[s0+2]
62 r3 = a[s1+2]
63 256-bit version:
64 r4 = a[s0+4]
65 r5 = a[s1+4]
66 r6 = a[s0+6]
67 r7 = a[s1+6]
68 @endcode
69
70 @par integer
71 @par 128-bit version:
72 @icost{NEON, 2-4}
73 @icost{ALTIVEC, 1-2}
74
75 @par 256-bit version:
76 @icost{SSE2-AVX, 2}
77 @icost{NEON, 4-8}
78 @icost{ALTIVEC, 2-3}
79
80 @par floating-point
81 @par 128-bit version:
82 @icost{NEON, 2-4}
83 @icost{ALTIVEC, 1-2}
84
85 @par 256-bit version:
86 @icost{SSE2-AVX, 2}
87 @icost{NEON, 4-8}
88 @icost{ALTIVEC, 2-3}
89*/
90template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
91typename detail::get_expr_nomask<V>::empty
92 permute2(const any_vec32<N,V>& a)
93{
94 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
95 typename detail::get_expr_nomask<V>::type ra;
96 ra = a.wrapped().eval();
97 return detail::insn::i_permute2<s0,s1>(ra);
98}
99
100/** Permutes the values of each set of four consecutive 32-bit values. The
101 selector values must be in range [0; 1].
102
103 @code
104 r0 = a[s0]
105 r1 = a[s1]
106
107 256-bit version:
108 r2 = a[s0+2]
109 r3 = a[s1+2]
110 @endcode
111
112 @par 128-bit version:
113 @icost{NEON, 1-2}
114 @icost{ALTIVEC, 1-2}
115
116 @par 256-bit version:
117 @icost{SSE2-AVX, 2}
118 @icost{NEON, 2-4}
119 @icost{ALTIVEC, 2-4}
120*/
121template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
122typename detail::get_expr_nomask<V>::empty
123 permute2(const any_vec64<N,V>& a)
124{
125 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
126 typename detail::get_expr_nomask<V>::type ra;
127 ra = a.wrapped().eval();
128 return detail::insn::i_permute2<s0,s1>(ra);
129}
130
131} // namespace SIMDPP_ARCH_NAMESPACE
132} // namespace simdpp
133
134#endif
135
136