1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/shuffle4x2.h> |
17 | |
18 | namespace simdpp { |
19 | namespace SIMDPP_ARCH_NAMESPACE { |
20 | |
21 | /** Selects 32-bit floating-point values from two vectors. The first two values |
22 | in each four consecutive values must come from @a a, the last two - from @a |
23 | b. The selector values must be in range [0; 3]. |
24 | |
25 | @code |
26 | r0 = a[a0] |
27 | r1 = a[a1] |
28 | r2 = b[b0] |
29 | r3 = b[b1] |
30 | |
31 | 256-bit version: |
32 | r4 = a[a0+4] |
33 | r5 = a[a1+4] |
34 | r6 = b[b0+4] |
35 | r7 = b[b1+4] |
36 | @endcode |
37 | |
38 | @par floating-point |
39 | @par 128-bit version: |
40 | @icost{ALTIVEC, 1-2} |
41 | @icost{NEON, 1-4} |
42 | |
43 | @par 256-bit version: |
44 | @icost{SSE2-SSE4.1, 2} |
45 | @icost{NEON, 2-8} |
46 | @icost{ALTIVEC, 2-3} |
47 | |
48 | @par integer |
49 | @par 128-bit version: |
50 | @icost{NEON, 1-4} |
51 | @icost{ALTIVEC, 1-2} |
52 | |
53 | @par 256-bit version: |
54 | @icost{SSE2-AVX, 2} |
55 | @icost{NEON, 2-8} |
56 | @icost{ALTIVEC, 2-3} |
57 | */ |
58 | template<unsigned sa0, unsigned sa1, unsigned sb0, unsigned sb1, unsigned N, |
59 | class V1, class V2> SIMDPP_INL |
60 | typename detail::get_expr2_nomask<V1, V2>::empty |
61 | shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b) |
62 | { |
63 | static_assert(sa0 < 4 && sa1 < 4 && sb0 < 4 && sb1 < 4, "Selector out of range" ); |
64 | typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(), |
65 | b0 = b.wrapped().eval(); |
66 | return detail::insn::i_shuffle4x2<sa0,sa1,sb0+4,sb1+4>(a0, b0); |
67 | } |
68 | |
69 | /** Selects 32-bit values from two vectors. The first two values in each four |
70 | consecutive values must come from @a a, the last two - from @a b. The |
71 | selector values must be in range [0; 3]. |
72 | |
73 | @code |
74 | r0 = a[s0] |
75 | r1 = a[s1] |
76 | r2 = b[s0] |
77 | r3 = b[s1] |
78 | |
79 | 256-bit version: |
80 | r4 = a[s0+4] |
81 | r5 = a[s1+4] |
82 | r6 = b[s0+4] |
83 | r7 = b[s1+4] |
84 | @endcode |
85 | |
86 | @par floating-point |
87 | @par 128-bit version: |
88 | @icost{ALTIVEC, 1-2} |
89 | @icost{NEON, 2-4} |
90 | |
91 | @par 256-bit version: |
92 | @icost{SSE2-SSE4.1, 2} |
93 | @icost{NEON, 4-8} |
94 | @icost{ALTIVEC, 2-3} |
95 | |
96 | @par integer |
97 | @par 128-bit version: |
98 | @icost{NEON, 2-4} |
99 | @icost{ALTIVEC, 1-2} |
100 | |
101 | @par 256-bit version: |
102 | @icost{SSE2-AVX, 2} |
103 | @icost{NEON, 4-8} |
104 | @icost{ALTIVEC, 2-3} |
105 | */ |
106 | template<unsigned s0, unsigned s1, unsigned N, |
107 | class V1, class V2> SIMDPP_INL |
108 | typename detail::get_expr2_nomask<V1, V2>::empty |
109 | shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b) |
110 | { |
111 | static_assert(s0 < 4 && s1 < 4, "Selector out of range" ); |
112 | typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(), |
113 | b0 = b.wrapped().eval(); |
114 | return detail::insn::i_shuffle4x2<s0,s1,s0+4,s1+4>(a0, b0); |
115 | } |
116 | |
117 | |
118 | } // namespace SIMDPP_ARCH_NAMESPACE |
119 | } // namespace simdpp |
120 | |
121 | #endif |
122 | |
123 | |