1 | /* Copyright (C) 2013 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_DETAIL_EXTRACT128_H |
9 | #define |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | |
17 | namespace simdpp { |
18 | namespace SIMDPP_ARCH_NAMESPACE { |
19 | namespace detail { |
20 | |
21 | #if SIMDPP_USE_AVX2 |
22 | template<unsigned s> |
23 | SIMDPP_INL uint8x16 extract128(const uint8x32& a) |
24 | { |
25 | return s == 0 ? _mm256_castsi256_si128(a.native()) : _mm256_extracti128_si256(a.native(), 1); |
26 | } |
27 | |
28 | template<unsigned s> |
29 | SIMDPP_INL uint16x8 extract128(const uint16x16& a) { return (uint16x8) extract128<s>(uint8x32(a)); } |
30 | template<unsigned s> |
31 | SIMDPP_INL uint32x4 extract128(const uint32x8& a) { return (uint32x4) extract128<s>(uint8x32(a)); } |
32 | template<unsigned s> |
33 | SIMDPP_INL uint64x2 extract128(const uint64x4& a) { return (uint64x2) extract128<s>(uint8x32(a)); } |
34 | |
35 | template<unsigned s> |
36 | SIMDPP_INL int8x16 extract128(const int8x32& a) { return (int8x16) extract128<s>(uint8x32(a)); } |
37 | template<unsigned s> |
38 | SIMDPP_INL int16x8 extract128(const int16x16& a) { return (int16x8) extract128<s>(uint8x32(a)); } |
39 | template<unsigned s> |
40 | SIMDPP_INL int32x4 extract128(const int32x8& a) { return (int32x4) extract128<s>(uint8x32(a)); } |
41 | template<unsigned s> |
42 | SIMDPP_INL int64x2 extract128(const int64x4& a) { return (int64x2) extract128<s>(uint8x32(a)); } |
43 | #endif |
44 | |
45 | #if SIMDPP_USE_AVX |
46 | template<unsigned s> |
47 | SIMDPP_INL float32x4 extract128(const float32x8& a) |
48 | { |
49 | return s == 0 ? _mm256_castps256_ps128(a.native()) : _mm256_extractf128_ps(a.native(), 1); |
50 | } |
51 | |
52 | template<unsigned s> |
53 | SIMDPP_INL float64x2 extract128(const float64x4& a) |
54 | { |
55 | return s == 0 ? _mm256_castpd256_pd128(a.native()) : _mm256_extractf128_pd(a.native(), 1); |
56 | } |
57 | #endif |
58 | |
59 | #if SIMDPP_USE_AVX512BW |
60 | template<unsigned s> |
61 | SIMDPP_INL uint8<16> extract128(const uint8<64>& a) |
62 | { |
63 | return _mm512_extracti32x4_epi32(a.native(), s); |
64 | } |
65 | template<unsigned s> |
66 | SIMDPP_INL int8<16> extract128(const int8<64>& a) { return (int8<16>) extract128<s>(uint8<64>(a)); } |
67 | |
68 | template<unsigned s> |
69 | SIMDPP_INL uint16<8> extract128(const uint16<32>& a) { return (uint16<8>) extract128<s>(uint8<64>(a)); } |
70 | template<unsigned s> |
71 | SIMDPP_INL int16<8> extract128(const int16<32>& a) { return (int16<8>) extract128<s>(uint8<64>(a)); } |
72 | #endif |
73 | |
74 | #if SIMDPP_USE_AVX512F |
75 | template<unsigned s> |
76 | SIMDPP_INL uint32x4 extract128(const uint32<16>& a) |
77 | { |
78 | return _mm512_extracti32x4_epi32(a.native(), s); |
79 | } |
80 | |
81 | template<unsigned s> |
82 | SIMDPP_INL uint64x2 extract128(const uint64<8>& a) { return (uint64x2) extract128<s>(uint32<16>(a)); } |
83 | |
84 | template<unsigned s> |
85 | SIMDPP_INL int32x4 extract128(const int32<16>& a) { return (int32x4) extract128<s>(uint32<16>(a)); } |
86 | template<unsigned s> |
87 | SIMDPP_INL int64x2 extract128(const int64<8>& a) { return (int64x2) extract128<s>(uint32<16>(a)); } |
88 | |
89 | template<unsigned s> |
90 | SIMDPP_INL float32x4 extract128(const float32<16>& a) |
91 | { |
92 | return _mm512_extractf32x4_ps(a.native(), s); |
93 | } |
94 | |
95 | template<unsigned s> |
96 | SIMDPP_INL float64x2 extract128(const float64<8>& a) |
97 | { |
98 | return _mm_castps_pd(_mm512_extractf32x4_ps(_mm512_castpd_ps(a.native()), s)); |
99 | } |
100 | |
101 | template<unsigned s> |
102 | SIMDPP_INL uint32x8 extract256(const uint32<16>& a) |
103 | { |
104 | return _mm512_extracti64x4_epi64(a.native(), s); |
105 | } |
106 | |
107 | template<unsigned s> |
108 | SIMDPP_INL uint64x4 extract256(const uint64<8>& a) { return (uint64x4) extract256<s>(uint32<16>(a)); } |
109 | |
110 | template<unsigned s> |
111 | SIMDPP_INL int32x8 extract256(const int32<16>& a) { return (int32x8) extract256<s>(uint32<16>(a)); } |
112 | template<unsigned s> |
113 | SIMDPP_INL int64x4 extract256(const int64<8>& a) { return (int64x4) extract256<s>(uint32<16>(a)); } |
114 | |
115 | template<unsigned s> |
116 | SIMDPP_INL float32<8> extract256(const float32<16>& a) |
117 | { |
118 | return _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a.native()), s)); |
119 | } |
120 | |
121 | template<unsigned s> |
122 | SIMDPP_INL float64<4> extract256(const float64<8>& a) |
123 | { |
124 | return _mm512_extractf64x4_pd(a.native(), s); |
125 | } |
126 | #endif |
127 | |
128 | #if SIMDPP_USE_AVX512BW |
129 | template<unsigned s> |
130 | SIMDPP_INL uint8<32> extract256(const uint8<64>& a) |
131 | { |
132 | return _mm512_extracti64x4_epi64(a.native(), s); |
133 | } |
134 | |
135 | template<unsigned s> |
136 | SIMDPP_INL uint16<16> extract256(const uint16<32>& a) |
137 | { |
138 | return _mm512_extracti64x4_epi64(a.native(), s); |
139 | } |
140 | |
141 | template<unsigned s> |
142 | SIMDPP_INL int8<32> extract256(const int8<64>& a) |
143 | { |
144 | return _mm512_extracti64x4_epi64(a.native(), s); |
145 | } |
146 | |
147 | template<unsigned s> |
148 | SIMDPP_INL int16<16> extract256(const int16<32>& a) |
149 | { |
150 | return _mm512_extracti64x4_epi64(a.native(), s); |
151 | } |
152 | #endif |
153 | |
154 | } // namespace detail |
155 | } // namespace SIMDPP_ARCH_NAMESPACE |
156 | } // namespace simdpp |
157 | |
158 | #endif |
159 | |