1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_SPLIT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_SPLIT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16
17namespace simdpp {
18namespace SIMDPP_ARCH_NAMESPACE {
19namespace detail {
20namespace insn {
21
22
23#if SIMDPP_USE_AVX2
24static SIMDPP_INL
25void i_split(const uint8<32>& a, uint8<16>& r1, uint8<16>& r2)
26{
27 r1 = _mm256_castsi256_si128(a.native());
28 r2 = _mm256_extracti128_si256(a.native(), 1);
29}
30#endif
31
32#if SIMDPP_USE_AVX512BW
33SIMDPP_INL void i_split(const uint8<64>& a, uint8<32>& r1, uint8<32>& r2)
34{
35 r1 = _mm512_castsi512_si256(a.native());
36 r2 = _mm512_extracti64x4_epi64(a.native(), 1);
37}
38#endif
39
40// -----------------------------------------------------------------------------
41
42#if SIMDPP_USE_AVX2
43static SIMDPP_INL
44void i_split(const uint16<16>& a, uint16<8>& r1, uint16<8>& r2)
45{
46 r1 = _mm256_castsi256_si128(a.native());
47 r2 = _mm256_extracti128_si256(a.native(), 1);
48}
49#endif
50
51#if SIMDPP_USE_AVX512BW
52SIMDPP_INL void i_split(const uint16<32>& a, uint16<16>& r1, uint16<16>& r2)
53{
54 r1 = _mm512_castsi512_si256(a.native());
55 r2 = _mm512_extracti64x4_epi64(a.native(), 1);
56}
57#endif
58
59// -----------------------------------------------------------------------------
60
61#if SIMDPP_USE_AVX2
62static SIMDPP_INL
63void i_split(const uint32<8>& a, uint32<4>& r1, uint32<4>& r2)
64{
65 r1 = _mm256_castsi256_si128(a.native());
66 r2 = _mm256_extracti128_si256(a.native(), 1);
67}
68#endif
69
70#if SIMDPP_USE_AVX512F
71static SIMDPP_INL
72void i_split(const uint32<16>& a, uint32<8>& r1, uint32<8>& r2)
73{
74 r1 = _mm512_castsi512_si256(a.native());
75 r2 = _mm512_extracti64x4_epi64(a.native(), 1);
76}
77#endif
78
79// -----------------------------------------------------------------------------
80
81#if SIMDPP_USE_AVX2
82static SIMDPP_INL
83void i_split(const uint64<4>& a, uint64<2>& r1, uint64<2>& r2)
84{
85 r1 = _mm256_castsi256_si128(a.native());
86 r2 = _mm256_extracti128_si256(a.native(), 1);
87}
88#endif
89
90#if SIMDPP_USE_AVX512F
91static SIMDPP_INL
92void i_split(const uint64<8>& a, uint64<4>& r1, uint64<4>& r2)
93{
94 r1 = _mm512_castsi512_si256(a.native());
95 r2 = _mm512_extracti64x4_epi64(a.native(), 1);
96}
97#endif
98
99// -----------------------------------------------------------------------------
100
101#if SIMDPP_USE_AVX
102static SIMDPP_INL
103void i_split(const float32<8>& a, float32<4>& r1, float32<4>& r2)
104{
105 r1 = _mm256_castps256_ps128(a.native());
106 r2 = _mm256_extractf128_ps(a.native(), 1);
107}
108#endif
109
110#if SIMDPP_USE_AVX512F
111static SIMDPP_INL
112void i_split(const float32<16>& a, float32<8>& r1, float32<8>& r2)
113{
114 r1 = _mm512_castps512_ps256(a.native());
115 r2 = _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a.native()), 1));
116}
117#endif
118
119// -----------------------------------------------------------------------------
120
121#if SIMDPP_USE_AVX
122static SIMDPP_INL
123void i_split(const float64<4>& a, float64<2>& r1, float64<2>& r2)
124{
125 r1 = _mm256_castpd256_pd128(a.native());
126 r2 = _mm256_extractf128_pd(a.native(), 1);
127}
128#endif
129
130#if SIMDPP_USE_AVX512F
131static SIMDPP_INL
132void i_split(const float64<8>& a, float64<4>& r1, float64<4>& r2)
133{
134 // r1 = _mm512_castpd512_pd256(a.native()); GCC BUG
135 r1 = _mm512_extractf64x4_pd(a.native(), 0);
136 r2 = _mm512_extractf64x4_pd(a.native(), 1);
137}
138#endif
139
140// -----------------------------------------------------------------------------
141// generic version -- picked up if none of the above matches the arguments
142
143template<class V, class H> SIMDPP_INL
144void i_split(const V& a, H& r1, H& r2)
145{
146 unsigned h = H::vec_length;
147 for (unsigned i = 0; i < h; ++i) { r1.vec(i) = a.vec(i); }
148 for (unsigned i = 0; i < h; ++i) { r2.vec(i) = a.vec(i+h); }
149}
150
151
152} // namespace insn
153} // namespace detail
154} // namespace SIMDPP_ARCH_NAMESPACE
155} // namespace simdpp
156
157#endif
158