1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_SPLIT_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_SPLIT_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/split.h> |
17 | |
18 | namespace simdpp { |
19 | namespace SIMDPP_ARCH_NAMESPACE { |
20 | |
21 | namespace detail { |
22 | |
23 | template<class A, class R> SIMDPP_INL |
24 | void v256_split(A a, R& r1, R& r2) |
25 | { |
26 | #if SIMDPP_USE_AVX2 |
27 | r1 = _mm256_extracti128_si256(a.native(), 0); |
28 | r2 = _mm256_extracti128_si256(a.native(), 1); |
29 | #else |
30 | r1 = a[0]; |
31 | r2 = a[1]; |
32 | #endif |
33 | } |
34 | |
35 | } // namespcae detail |
36 | |
37 | /** Splits a vector into two vectors half as wide. This function is useful when |
38 | the ISA supports multiple vector sizes and the user does some operations |
39 | with vectors that are narrower than the widest native vector. |
40 | |
41 | For example, on AVX, a __m256 vector can be split into two __m128 vectors. |
42 | |
43 | @code |
44 | [ r1, r2 ] = a |
45 | @endcode |
46 | |
47 | @icost{AVX2, 1} |
48 | @icost{SSE2-AVX, NEON, ALTIVEC, 0} |
49 | */ |
50 | template<unsigned N> SIMDPP_INL |
51 | void split(const uint8<N>& a, uint8<N/2>& r1, uint8<N/2>& r2) |
52 | { |
53 | detail::insn::i_split(a, r1, r2); |
54 | } |
55 | |
56 | template<unsigned N> SIMDPP_INL |
57 | void split(const uint16<N>& a, uint16<N/2>& r1, uint16<N/2>& r2) |
58 | { |
59 | detail::insn::i_split(a, r1, r2); |
60 | } |
61 | |
62 | template<unsigned N> SIMDPP_INL |
63 | void split(const uint32<N>& a, uint32<N/2>& r1, uint32<N/2>& r2) |
64 | { |
65 | detail::insn::i_split(a, r1, r2); |
66 | } |
67 | |
68 | template<unsigned N> SIMDPP_INL |
69 | void split(const uint64<N>& a, uint64<N/2>& r1, uint64<N/2>& r2) |
70 | { |
71 | detail::insn::i_split(a, r1, r2); |
72 | } |
73 | |
74 | template<unsigned N> SIMDPP_INL |
75 | void split(const int8<N>& a, int8<N/2>& r1, int8<N/2>& r2) |
76 | { |
77 | uint8<N/2> q1, q2; q1 = r1; q2 = r2; |
78 | detail::insn::i_split(uint8<N>(a), q1, q2); |
79 | r1 = q1; r2 = q2; |
80 | } |
81 | |
82 | template<unsigned N> SIMDPP_INL |
83 | void split(const int16<N>& a, int16<N/2>& r1, int16<N/2>& r2) |
84 | { |
85 | uint16<N/2> q1, q2; q1 = r1; q2 = r2; |
86 | detail::insn::i_split(uint16<N>(a), q1, q2); |
87 | r1 = q1; r2 = q2; |
88 | } |
89 | |
90 | template<unsigned N> SIMDPP_INL |
91 | void split(const int32<N>& a, int32<N/2>& r1, int32<N/2>& r2) |
92 | { |
93 | uint32<N/2> q1, q2; q1 = r1; q2 = r2; |
94 | detail::insn::i_split(uint32<N>(a), q1, q2); |
95 | r1 = q1; r2 = q2; |
96 | } |
97 | |
98 | template<unsigned N> SIMDPP_INL |
99 | void split(const int64<N>& a, int64<N/2>& r1, int64<N/2>& r2) |
100 | { |
101 | uint64<N/2> q1, q2; q1 = r1; q2 = r2; |
102 | detail::insn::i_split(uint64<N>(a), q1, q2); |
103 | r1 = q1; r2 = q2; |
104 | } |
105 | |
106 | template<unsigned N> SIMDPP_INL |
107 | void split(const float32<N>& a, float32<N/2>& r1, float32<N/2>& r2) |
108 | { |
109 | detail::insn::i_split(a, r1, r2); |
110 | } |
111 | |
112 | template<unsigned N> SIMDPP_INL |
113 | void split(const float64<N>& a, float64<N/2>& r1, float64<N/2>& r2) |
114 | { |
115 | detail::insn::i_split(a, r1, r2); |
116 | } |
117 | |
118 | } // namespace SIMDPP_ARCH_NAMESPACE |
119 | } // namespace simdpp |
120 | |
121 | #endif |
122 | |