1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_SPLIT_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_SPLIT_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | |
17 | namespace simdpp { |
18 | namespace SIMDPP_ARCH_NAMESPACE { |
19 | namespace detail { |
20 | namespace insn { |
21 | |
22 | |
23 | #if SIMDPP_USE_AVX2 |
24 | static SIMDPP_INL |
25 | void i_split(const uint8<32>& a, uint8<16>& r1, uint8<16>& r2) |
26 | { |
27 | r1 = _mm256_castsi256_si128(a.native()); |
28 | r2 = _mm256_extracti128_si256(a.native(), 1); |
29 | } |
30 | #endif |
31 | |
32 | #if SIMDPP_USE_AVX512BW |
33 | SIMDPP_INL void i_split(const uint8<64>& a, uint8<32>& r1, uint8<32>& r2) |
34 | { |
35 | r1 = _mm512_castsi512_si256(a.native()); |
36 | r2 = _mm512_extracti64x4_epi64(a.native(), 1); |
37 | } |
38 | #endif |
39 | |
40 | // ----------------------------------------------------------------------------- |
41 | |
42 | #if SIMDPP_USE_AVX2 |
43 | static SIMDPP_INL |
44 | void i_split(const uint16<16>& a, uint16<8>& r1, uint16<8>& r2) |
45 | { |
46 | r1 = _mm256_castsi256_si128(a.native()); |
47 | r2 = _mm256_extracti128_si256(a.native(), 1); |
48 | } |
49 | #endif |
50 | |
51 | #if SIMDPP_USE_AVX512BW |
52 | SIMDPP_INL void i_split(const uint16<32>& a, uint16<16>& r1, uint16<16>& r2) |
53 | { |
54 | r1 = _mm512_castsi512_si256(a.native()); |
55 | r2 = _mm512_extracti64x4_epi64(a.native(), 1); |
56 | } |
57 | #endif |
58 | |
59 | // ----------------------------------------------------------------------------- |
60 | |
61 | #if SIMDPP_USE_AVX2 |
62 | static SIMDPP_INL |
63 | void i_split(const uint32<8>& a, uint32<4>& r1, uint32<4>& r2) |
64 | { |
65 | r1 = _mm256_castsi256_si128(a.native()); |
66 | r2 = _mm256_extracti128_si256(a.native(), 1); |
67 | } |
68 | #endif |
69 | |
70 | #if SIMDPP_USE_AVX512F |
71 | static SIMDPP_INL |
72 | void i_split(const uint32<16>& a, uint32<8>& r1, uint32<8>& r2) |
73 | { |
74 | r1 = _mm512_castsi512_si256(a.native()); |
75 | r2 = _mm512_extracti64x4_epi64(a.native(), 1); |
76 | } |
77 | #endif |
78 | |
79 | // ----------------------------------------------------------------------------- |
80 | |
81 | #if SIMDPP_USE_AVX2 |
82 | static SIMDPP_INL |
83 | void i_split(const uint64<4>& a, uint64<2>& r1, uint64<2>& r2) |
84 | { |
85 | r1 = _mm256_castsi256_si128(a.native()); |
86 | r2 = _mm256_extracti128_si256(a.native(), 1); |
87 | } |
88 | #endif |
89 | |
90 | #if SIMDPP_USE_AVX512F |
91 | static SIMDPP_INL |
92 | void i_split(const uint64<8>& a, uint64<4>& r1, uint64<4>& r2) |
93 | { |
94 | r1 = _mm512_castsi512_si256(a.native()); |
95 | r2 = _mm512_extracti64x4_epi64(a.native(), 1); |
96 | } |
97 | #endif |
98 | |
99 | // ----------------------------------------------------------------------------- |
100 | |
101 | #if SIMDPP_USE_AVX |
102 | static SIMDPP_INL |
103 | void i_split(const float32<8>& a, float32<4>& r1, float32<4>& r2) |
104 | { |
105 | r1 = _mm256_castps256_ps128(a.native()); |
106 | r2 = _mm256_extractf128_ps(a.native(), 1); |
107 | } |
108 | #endif |
109 | |
110 | #if SIMDPP_USE_AVX512F |
111 | static SIMDPP_INL |
112 | void i_split(const float32<16>& a, float32<8>& r1, float32<8>& r2) |
113 | { |
114 | r1 = _mm512_castps512_ps256(a.native()); |
115 | r2 = _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a.native()), 1)); |
116 | } |
117 | #endif |
118 | |
119 | // ----------------------------------------------------------------------------- |
120 | |
121 | #if SIMDPP_USE_AVX |
122 | static SIMDPP_INL |
123 | void i_split(const float64<4>& a, float64<2>& r1, float64<2>& r2) |
124 | { |
125 | r1 = _mm256_castpd256_pd128(a.native()); |
126 | r2 = _mm256_extractf128_pd(a.native(), 1); |
127 | } |
128 | #endif |
129 | |
130 | #if SIMDPP_USE_AVX512F |
131 | static SIMDPP_INL |
132 | void i_split(const float64<8>& a, float64<4>& r1, float64<4>& r2) |
133 | { |
134 | // r1 = _mm512_castpd512_pd256(a.native()); GCC BUG |
135 | r1 = _mm512_extractf64x4_pd(a.native(), 0); |
136 | r2 = _mm512_extractf64x4_pd(a.native(), 1); |
137 | } |
138 | #endif |
139 | |
140 | // ----------------------------------------------------------------------------- |
141 | // generic version -- picked up if none of the above matches the arguments |
142 | |
143 | template<class V, class H> SIMDPP_INL |
144 | void i_split(const V& a, H& r1, H& r2) |
145 | { |
146 | unsigned h = H::vec_length; |
147 | for (unsigned i = 0; i < h; ++i) { r1.vec(i) = a.vec(i); } |
148 | for (unsigned i = 0; i < h; ++i) { r2.vec(i) = a.vec(i+h); } |
149 | } |
150 | |
151 | |
152 | } // namespace insn |
153 | } // namespace detail |
154 | } // namespace SIMDPP_ARCH_NAMESPACE |
155 | } // namespace simdpp |
156 | |
157 | #endif |
158 | |