1/* Copyright (C) 2013 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_DETAIL_EXTRACT128_H
9#define LIBSIMDPP_DETAIL_EXTRACT128_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16
17namespace simdpp {
18namespace SIMDPP_ARCH_NAMESPACE {
19namespace detail {
20
21#if SIMDPP_USE_AVX2
22template<unsigned s>
23SIMDPP_INL uint8x16 extract128(const uint8x32& a)
24{
25 return s == 0 ? _mm256_castsi256_si128(a.native()) : _mm256_extracti128_si256(a.native(), 1);
26}
27
28template<unsigned s>
29SIMDPP_INL uint16x8 extract128(const uint16x16& a) { return (uint16x8) extract128<s>(uint8x32(a)); }
30template<unsigned s>
31SIMDPP_INL uint32x4 extract128(const uint32x8& a) { return (uint32x4) extract128<s>(uint8x32(a)); }
32template<unsigned s>
33SIMDPP_INL uint64x2 extract128(const uint64x4& a) { return (uint64x2) extract128<s>(uint8x32(a)); }
34
35template<unsigned s>
36SIMDPP_INL int8x16 extract128(const int8x32& a) { return (int8x16) extract128<s>(uint8x32(a)); }
37template<unsigned s>
38SIMDPP_INL int16x8 extract128(const int16x16& a) { return (int16x8) extract128<s>(uint8x32(a)); }
39template<unsigned s>
40SIMDPP_INL int32x4 extract128(const int32x8& a) { return (int32x4) extract128<s>(uint8x32(a)); }
41template<unsigned s>
42SIMDPP_INL int64x2 extract128(const int64x4& a) { return (int64x2) extract128<s>(uint8x32(a)); }
43#endif
44
45#if SIMDPP_USE_AVX
46template<unsigned s>
47SIMDPP_INL float32x4 extract128(const float32x8& a)
48{
49 return s == 0 ? _mm256_castps256_ps128(a.native()) : _mm256_extractf128_ps(a.native(), 1);
50}
51
52template<unsigned s>
53SIMDPP_INL float64x2 extract128(const float64x4& a)
54{
55 return s == 0 ? _mm256_castpd256_pd128(a.native()) : _mm256_extractf128_pd(a.native(), 1);
56}
57#endif
58
59#if SIMDPP_USE_AVX512BW
60template<unsigned s>
61SIMDPP_INL uint8<16> extract128(const uint8<64>& a)
62{
63 return _mm512_extracti32x4_epi32(a.native(), s);
64}
65template<unsigned s>
66SIMDPP_INL int8<16> extract128(const int8<64>& a) { return (int8<16>) extract128<s>(uint8<64>(a)); }
67
68template<unsigned s>
69SIMDPP_INL uint16<8> extract128(const uint16<32>& a) { return (uint16<8>) extract128<s>(uint8<64>(a)); }
70template<unsigned s>
71SIMDPP_INL int16<8> extract128(const int16<32>& a) { return (int16<8>) extract128<s>(uint8<64>(a)); }
72#endif
73
74#if SIMDPP_USE_AVX512F
75template<unsigned s>
76SIMDPP_INL uint32x4 extract128(const uint32<16>& a)
77{
78 return _mm512_extracti32x4_epi32(a.native(), s);
79}
80
81template<unsigned s>
82SIMDPP_INL uint64x2 extract128(const uint64<8>& a) { return (uint64x2) extract128<s>(uint32<16>(a)); }
83
84template<unsigned s>
85SIMDPP_INL int32x4 extract128(const int32<16>& a) { return (int32x4) extract128<s>(uint32<16>(a)); }
86template<unsigned s>
87SIMDPP_INL int64x2 extract128(const int64<8>& a) { return (int64x2) extract128<s>(uint32<16>(a)); }
88
89template<unsigned s>
90SIMDPP_INL float32x4 extract128(const float32<16>& a)
91{
92 return _mm512_extractf32x4_ps(a.native(), s);
93}
94
95template<unsigned s>
96SIMDPP_INL float64x2 extract128(const float64<8>& a)
97{
98 return _mm_castps_pd(_mm512_extractf32x4_ps(_mm512_castpd_ps(a.native()), s));
99}
100
101template<unsigned s>
102SIMDPP_INL uint32x8 extract256(const uint32<16>& a)
103{
104 return _mm512_extracti64x4_epi64(a.native(), s);
105}
106
107template<unsigned s>
108SIMDPP_INL uint64x4 extract256(const uint64<8>& a) { return (uint64x4) extract256<s>(uint32<16>(a)); }
109
110template<unsigned s>
111SIMDPP_INL int32x8 extract256(const int32<16>& a) { return (int32x8) extract256<s>(uint32<16>(a)); }
112template<unsigned s>
113SIMDPP_INL int64x4 extract256(const int64<8>& a) { return (int64x4) extract256<s>(uint32<16>(a)); }
114
115template<unsigned s>
116SIMDPP_INL float32<8> extract256(const float32<16>& a)
117{
118 return _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a.native()), s));
119}
120
121template<unsigned s>
122SIMDPP_INL float64<4> extract256(const float64<8>& a)
123{
124 return _mm512_extractf64x4_pd(a.native(), s);
125}
126#endif
127
128#if SIMDPP_USE_AVX512BW
129template<unsigned s>
130SIMDPP_INL uint8<32> extract256(const uint8<64>& a)
131{
132 return _mm512_extracti64x4_epi64(a.native(), s);
133}
134
135template<unsigned s>
136SIMDPP_INL uint16<16> extract256(const uint16<32>& a)
137{
138 return _mm512_extracti64x4_epi64(a.native(), s);
139}
140
141template<unsigned s>
142SIMDPP_INL int8<32> extract256(const int8<64>& a)
143{
144 return _mm512_extracti64x4_epi64(a.native(), s);
145}
146
147template<unsigned s>
148SIMDPP_INL int16<16> extract256(const int16<32>& a)
149{
150 return _mm512_extracti64x4_epi64(a.native(), s);
151}
152#endif
153
154} // namespace detail
155} // namespace SIMDPP_ARCH_NAMESPACE
156} // namespace simdpp
157
158#endif
159