1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H
9#define LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/shuffle4x2.h>
17
18namespace simdpp {
19namespace SIMDPP_ARCH_NAMESPACE {
20
21/** Selects 32-bit floating-point values from two vectors. The first two values
22 in each four consecutive values must come from @a a, the last two - from @a
23 b. The selector values must be in range [0; 3].
24
25 @code
26 r0 = a[a0]
27 r1 = a[a1]
28 r2 = b[b0]
29 r3 = b[b1]
30
31 256-bit version:
32 r4 = a[a0+4]
33 r5 = a[a1+4]
34 r6 = b[b0+4]
35 r7 = b[b1+4]
36 @endcode
37
38 @par floating-point
39 @par 128-bit version:
40 @icost{ALTIVEC, 1-2}
41 @icost{NEON, 1-4}
42
43 @par 256-bit version:
44 @icost{SSE2-SSE4.1, 2}
45 @icost{NEON, 2-8}
46 @icost{ALTIVEC, 2-3}
47
48 @par integer
49 @par 128-bit version:
50 @icost{NEON, 1-4}
51 @icost{ALTIVEC, 1-2}
52
53 @par 256-bit version:
54 @icost{SSE2-AVX, 2}
55 @icost{NEON, 2-8}
56 @icost{ALTIVEC, 2-3}
57*/
58template<unsigned sa0, unsigned sa1, unsigned sb0, unsigned sb1, unsigned N,
59 class V1, class V2> SIMDPP_INL
60typename detail::get_expr2_nomask<V1, V2>::empty
61 shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
62{
63 static_assert(sa0 < 4 && sa1 < 4 && sb0 < 4 && sb1 < 4, "Selector out of range");
64 typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(),
65 b0 = b.wrapped().eval();
66 return detail::insn::i_shuffle4x2<sa0,sa1,sb0+4,sb1+4>(a0, b0);
67}
68
69/** Selects 32-bit values from two vectors. The first two values in each four
70 consecutive values must come from @a a, the last two - from @a b. The
71 selector values must be in range [0; 3].
72
73 @code
74 r0 = a[s0]
75 r1 = a[s1]
76 r2 = b[s0]
77 r3 = b[s1]
78
79 256-bit version:
80 r4 = a[s0+4]
81 r5 = a[s1+4]
82 r6 = b[s0+4]
83 r7 = b[s1+4]
84 @endcode
85
86 @par floating-point
87 @par 128-bit version:
88 @icost{ALTIVEC, 1-2}
89 @icost{NEON, 2-4}
90
91 @par 256-bit version:
92 @icost{SSE2-SSE4.1, 2}
93 @icost{NEON, 4-8}
94 @icost{ALTIVEC, 2-3}
95
96 @par integer
97 @par 128-bit version:
98 @icost{NEON, 2-4}
99 @icost{ALTIVEC, 1-2}
100
101 @par 256-bit version:
102 @icost{SSE2-AVX, 2}
103 @icost{NEON, 4-8}
104 @icost{ALTIVEC, 2-3}
105*/
106template<unsigned s0, unsigned s1, unsigned N,
107 class V1, class V2> SIMDPP_INL
108typename detail::get_expr2_nomask<V1, V2>::empty
109 shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
110{
111 static_assert(s0 < 4 && s1 < 4, "Selector out of range");
112 typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(),
113 b0 = b.wrapped().eval();
114 return detail::insn::i_shuffle4x2<s0,s1,s0+4,s1+4>(a0, b0);
115}
116
117
118} // namespace SIMDPP_ARCH_NAMESPACE
119} // namespace simdpp
120
121#endif
122
123