1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/make_shuffle_bytes_mask.h> |
17 | #include <simdpp/core/permute4.h> |
18 | #include <simdpp/detail/null/shuffle.h> |
19 | #include <simdpp/detail/shuffle/neon_int16x8.h> |
20 | #include <simdpp/detail/shuffle/neon_int32x4.h> |
21 | #include <simdpp/detail/shuffle/neon_int64x2.h> |
22 | #include <simdpp/detail/shuffle/shuffle_mask.h> |
23 | #include <simdpp/detail/vector_array_macros.h> |
24 | |
25 | namespace simdpp { |
26 | namespace SIMDPP_ARCH_NAMESPACE { |
27 | namespace detail { |
28 | namespace insn { |
29 | |
30 | |
31 | template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL |
32 | uint16<N> i_permute2(const uint16<N>& a) |
33 | { |
34 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
35 | return i_permute4<s0,s1,s0+2,s1+2>(a); |
36 | } |
37 | |
38 | |
39 | template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL |
40 | uint32<N> i_permute2(const uint32<N>& a) |
41 | { |
42 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
43 | return i_permute4<s0,s1,s0+2,s1+2>(a); |
44 | } |
45 | |
46 | |
47 | template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL |
48 | float32<N> i_permute2(const float32<N>& a) |
49 | { |
50 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
51 | return i_permute4<s0,s1,s0+2,s1+2>(a); |
52 | } |
53 | |
54 | |
55 | template<unsigned s0, unsigned s1> SIMDPP_INL |
56 | uint64x2 i_permute2(const uint64x2& a) |
57 | { |
58 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
59 | #if SIMDPP_USE_SSE2 || SIMDPP_USE_MSA |
60 | return (uint64x2) i_permute4<s0*2, s0*2+1, s1*2, s1*2+1>(int32x4(a)); |
61 | #elif SIMDPP_USE_NEON |
62 | return detail::neon_shuffle_int64x2::permute2<s0,s1>(a); |
63 | #elif SIMDPP_USE_VSX_207 |
64 | return vec_xxpermdi(a.native(), a.native(), |
65 | SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1)); |
66 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
67 | return detail::null::permute<s0,s1>(a); |
68 | #endif |
69 | } |
70 | |
71 | #if SIMDPP_USE_AVX2 |
72 | template<unsigned s0, unsigned s1> SIMDPP_INL |
73 | uint64x4 i_permute2(const uint64x4& a) |
74 | { |
75 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
76 | return i_permute4<s0,s1,s0+2,s1+2>(a); |
77 | } |
78 | #endif |
79 | |
80 | #if SIMDPP_USE_AVX512F |
81 | template<unsigned s0, unsigned s1> SIMDPP_INL |
82 | uint64<8> i_permute2(const uint64<8>& a) |
83 | { |
84 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
85 | return i_permute4<s0,s1,s0+2,s1+2>(a); |
86 | } |
87 | #endif |
88 | |
89 | template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL |
90 | uint64<N> i_permute2(const uint64<N>& a) |
91 | { |
92 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
93 | SIMDPP_VEC_ARRAY_IMPL1(uint64<N>, (i_permute2<s0,s1>), a); |
94 | } |
95 | |
96 | // ----------------------------------------------------------------------------- |
97 | |
98 | template<unsigned s0, unsigned s1> SIMDPP_INL |
99 | float64x2 i_permute2(const float64x2& a) |
100 | { |
101 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
102 | #if SIMDPP_USE_SSE2 |
103 | return _mm_shuffle_pd(a.native(), a.native(), SIMDPP_SHUFFLE_MASK_2x2(s0, s1)); |
104 | #elif SIMDPP_USE_VSX_206 |
105 | return vec_xxpermdi(a.native(), a.native(), |
106 | SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1)); |
107 | #elif SIMDPP_USE_NEON64 || SIMDPP_USE_MSA |
108 | return float64x2(i_permute2<s0,s1>(int64x2(a))); |
109 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
110 | return detail::null::permute<s0,s1>(a); |
111 | #endif |
112 | } |
113 | |
114 | #if SIMDPP_USE_AVX |
115 | template<unsigned s0, unsigned s1> SIMDPP_INL |
116 | float64x4 i_permute2(const float64x4& a) |
117 | { |
118 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
119 | #if SIMDPP_USE_AVX2 |
120 | return _mm256_permute4x64_pd(a.native(), s0 | s1<<2 | (s0+2)<<4 | (s1+2)<<6); |
121 | #else // SIMDPP_USE_AVX |
122 | return _mm256_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3); |
123 | #endif |
124 | } |
125 | #endif |
126 | |
127 | #if SIMDPP_USE_AVX512F |
128 | template<unsigned s0, unsigned s1> SIMDPP_INL |
129 | float64<8> i_permute2(const float64<8>& a) |
130 | { |
131 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
132 | return _mm512_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3 | s0<<4 | s1<<5 | s0<<6 | s1<<7); |
133 | } |
134 | #endif |
135 | |
136 | template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL |
137 | float64<N> i_permute2(const float64<N>& a) |
138 | { |
139 | static_assert(s0 < 2 && s1 < 2, "Selector out of range" ); |
140 | SIMDPP_VEC_ARRAY_IMPL1(float64<N>, (i_permute2<s0,s1>), a); |
141 | } |
142 | |
143 | } // namespace insn |
144 | } // namespace detail |
145 | } // namespace SIMDPP_ARCH_NAMESPACE |
146 | } // namespace simdpp |
147 | |
148 | #endif |
149 | |
150 | |