1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE4_H
9#define LIBSIMDPP_SIMDPP_CORE_PERMUTE4_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/permute4.h>
17
18namespace simdpp {
19namespace SIMDPP_ARCH_NAMESPACE {
20
21/** Permutes the 16-bit values within each 4 consecutive values of the vector.
22 The selector values must be in range [0; 3].
23
24 @code
25 r0 = a[s0]
26 ...
27 r3 = a[s3]
28 r4 = a[s0+4]
29 ...
30 r7 = a[s3+4]
31
32 256-bit version:
33
34 r8 = a[s0+8]
35 ...
36 r11 = a[s3+8]
37 r12 = a[s0+12]
38 ...
39 r15 = a[s3+12]
40 @endcode
41
42 @par: 128-bit version:
43 @icost{SSE2-AVX2, 2}
44 @icost{NEON, 1-5}
45 @icost{ALTIVEC, 1-2}
46
47 @par: 256-bit version:
48 @icost{SSE2-AVX, 4}
49 @icost{AVX2, 2}
50 @icost{NEON, 2-10}
51 @icost{ALTIVEC, 2-3}
52*/
53template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
54 unsigned N, class V> SIMDPP_INL
55typename detail::get_expr_nomask<V>::empty
56 permute4(const any_vec16<N,V>& a)
57{
58 static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
59 typename detail::get_expr_nomask<V>::type ra;
60 ra = a.wrapped().eval();
61 return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
62}
63
64/** Permutes the values of each set of four consecutive 32-bit values. The
65 selector values must be in range [0; 3].
66
67 @code
68 r0 = a[s0]
69 ...
70 r3 = a[s3]
71
72 256-bit version:
73 r4 = a[s0+4]
74 ...
75 r7 = a[s3+4]
76 @endcode
77
78 @par integer
79 @par 128-bit version:
80 @icost{NEON, 1-4}
81 @icost{ALTIVEC, 1-2}
82
83 @par 256-bit version:
84 @icost{SSE2-AVX, 2}
85 @icost{NEON, 2-8}
86 @icost{ALTIVEC, 2-3}
87
88 @par floating-point
89 @par 128-bit version:
90 @icost{NEON, 1-4}
91 @icost{ALTIVEC, 1-2}
92
93 @par 256-bit version:
94 @icost{SSE2-SSE4.1, 2}
95 @icost{NEON, 2-8}
96 @icost{ALTIVEC, 2-3}
97*/
98template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
99 unsigned N, class V> SIMDPP_INL
100typename detail::get_expr_nomask<V>::empty
101 permute4(const any_vec32<N,V>& a)
102{
103 static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
104 typename detail::get_expr_nomask<V>::type ra;
105 ra = a.wrapped().eval();
106 return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
107}
108
109/** Permutes the values of each set of four consecutive 64-bit values. The
110 selector values must be in range [0; 3].
111
112 @code
113 r0 = a[s0]
114 r1 = a[s1]
115 r2 = a[s2]
116 r3 = a[s3]
117 @endcode
118
119 @par integer
120 @icost{SSE2-AVX, 2}
121
122 @par floating-point
123 @icost{SSE2-AVX, 1-2}
124 @icost{NEON, 1-4}
125 @icost{ALTIVEC, 1-4}
126*/
127template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
128 unsigned N, class V> SIMDPP_INL
129typename detail::get_expr_nomask<V>::empty
130 permute4(const any_vec64<N,V>& a)
131{
132 static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
133 typename detail::get_expr_nomask<V>::type ra;
134 ra = a.wrapped().eval();
135 return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
136}
137
138} // namespace SIMDPP_ARCH_NAMESPACE
139} // namespace simdpp
140
141#endif
142
143