1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BROADCAST_W_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BROADCAST_W_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/splat_n.h>
17#include <simdpp/detail/extract128.h>
18#include <simdpp/detail/insn/shuffle128.h>
19#include <simdpp/detail/shuffle/shuffle_mask.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace insn {
25
26// -----------------------------------------------------------------------------
27
28template<unsigned s> SIMDPP_INL
29uint8x16 i_splat(const uint8x16& a)
30{
31 return i_splat16<s>(a);
32}
33
34#if SIMDPP_USE_AVX2
35template<unsigned s> SIMDPP_INL
36uint8x32 i_splat(const uint8x32& a)
37{
38 static_assert(s < 32, "Access out of bounds");
39 uint8x16 lo;
40 lo = s < 16 ? detail::extract128<0>(a) : detail::extract128<1>(a);
41 lo = move16_l<s % 16>(lo);
42 return _mm256_broadcastb_epi8(lo.native());
43}
44#endif
45
46#if SIMDPP_USE_AVX512BW
47template<unsigned s> SIMDPP_INL
48uint8<64> i_splat(const uint8<64>& a)
49{
50 static_assert(s < 64, "Access out of bounds");
51 uint8<16> lo;
52 lo = detail::extract128<s / 16>(a);
53 lo = move16_l<s % 16>(lo);
54 return _mm512_broadcastb_epi8(lo.native());
55}
56#endif
57
58// -----------------------------------------------------------------------------
59
60template<unsigned s> SIMDPP_INL
61uint16x8 i_splat(const uint16x8& a)
62{
63 return i_splat8<s>(a);
64}
65
66#if SIMDPP_USE_AVX2
67template<unsigned s> SIMDPP_INL
68uint16x16 i_splat(const uint16x16& a)
69{
70 static_assert(s < 16, "Access out of bounds");
71 uint16x8 lo;
72 lo = s < 8 ? detail::extract128<0>(a) : detail::extract128<1>(a);
73 lo = move8_l<s % 8>(lo);
74 return _mm256_broadcastw_epi16(lo.native());
75}
76#endif
77
78#if SIMDPP_USE_AVX512BW
79template<unsigned s> SIMDPP_INL
80uint16<32> i_splat(const uint16<32>& a)
81{
82 static_assert(s < 32, "Access out of bounds");
83 uint16<8> lo;
84 lo = detail::extract128<s / 8>(a);
85 lo = move8_l<s % 8>(lo);
86 return _mm512_broadcastw_epi16(lo.native());
87}
88#endif
89
90// -----------------------------------------------------------------------------
91
92template<unsigned s> SIMDPP_INL
93uint32x4 i_splat(const uint32x4& a)
94{
95 return i_splat4<s>(a);
96}
97
98#if SIMDPP_USE_AVX2
99template<unsigned s> SIMDPP_INL
100uint32x8 i_splat(const uint32x8& ca)
101{
102 static_assert(s < 8, "Access out of bounds");
103 uint32<8> a = ca;
104 a = permute4<s%4,s%4,s%4,s%4>(a);
105 a = detail::shuffle1_128<s/4, s/4>(a, a);
106 return a;
107}
108#endif
109
110#if SIMDPP_USE_AVX512F
111template<unsigned s> SIMDPP_INL
112uint32<16> i_splat(const uint32<16>& ca)
113{
114 static_assert(s < 16, "Access out of bounds");
115 uint32<16> a = ca;
116 a = permute4<s%4,s%4,s%4,s%4>(a);
117 a = detail::shuffle2_128<s/4, s/4, s/4, s/4>(a, a);
118 return a;
119}
120#endif
121
122// -----------------------------------------------------------------------------
123
124template<unsigned s> SIMDPP_INL
125uint64x2 i_splat(const uint64x2& a)
126{
127 return i_splat2<s>(a);
128}
129
130#if SIMDPP_USE_AVX2
131template<unsigned s> SIMDPP_INL
132uint64x4 i_splat(const uint64x4& a)
133{
134 static_assert(s < 4, "Access out of bounds");
135 return permute4<s,s,s,s>(a);
136}
137#endif
138
139#if SIMDPP_USE_AVX512F
140template<unsigned s> SIMDPP_INL
141uint64<8> i_splat(const uint64<8>& ca)
142{
143 static_assert(s < 8, "Access out of bounds");
144 uint64<8> a = ca;
145 a = permute2<s%2,s%2>(a);
146 a = _mm512_shuffle_i64x2(a.native(), a.native(),
147 SIMDPP_SHUFFLE_MASK_4x4(s/2, s/2, s/2, s/2)); // TODO extract
148 return a;
149}
150#endif
151
152// -----------------------------------------------------------------------------
153
154template<unsigned s> SIMDPP_INL
155float32x4 i_splat(const float32x4& a)
156{
157 return i_splat4<s>(a);
158}
159
160#if SIMDPP_USE_AVX
161template<unsigned s> SIMDPP_INL
162float32x8 i_splat(const float32x8& ca)
163{
164 static_assert(s < 8, "Access out of bounds");
165 float32<8> a = ca;
166 a = shuffle1_128<s/4,s/4>(a, a);
167 return permute4<s%4,s%4,s%4,s%4>(a);
168}
169#endif
170
171#if SIMDPP_USE_AVX512F
172template<unsigned s> SIMDPP_INL
173float32<16> i_splat(const float32<16>& ca)
174{
175 static_assert(s < 16, "Access out of bounds");
176 float32<16> a = ca;
177 a = permute4<s%4,s%4,s%4,s%4>(a);
178 a = _mm512_shuffle_f32x4(a.native(), a.native(),
179 SIMDPP_SHUFFLE_MASK_4x4(s/4, s/4, s/4, s/4));
180 return a;
181}
182#endif
183
184// -----------------------------------------------------------------------------
185
186template<unsigned s> SIMDPP_INL
187float64x2 i_splat(const float64x2& a)
188{
189 return i_splat2<s>(a);
190}
191
192#if SIMDPP_USE_AVX
193template<unsigned s> SIMDPP_INL
194float64x4 i_splat(const float64x4& a)
195{
196 static_assert(s < 4, "Access out of bounds");
197#if SIMDPP_USE_AVX2
198 return permute4<s,s,s,s>(a);
199#else // SIMDPP_USE_AVX
200 float64<4> b;
201 b = detail::shuffle1_128<s/2,s/2>(a, a);
202 b = permute2<s%2,s%2>(b);
203 return b;
204#endif
205}
206#endif
207
208#if SIMDPP_USE_AVX512F
209template<unsigned s> SIMDPP_INL
210float64<8> i_splat(const float64<8>& ca)
211{
212 static_assert(s < 8, "Access out of bounds");
213 float64<8> a = ca;
214 a = permute2<s%2,s%2>(a);
215 a = _mm512_shuffle_f64x2(a.native(), a.native(),
216 SIMDPP_SHUFFLE_MASK_4x4(s/2, s/2, s/2, s/2)); // TODO extract
217 return a;
218}
219#endif
220
221// -----------------------------------------------------------------------------
222
223template<unsigned s, class V> SIMDPP_INL
224V i_splat(const V& a)
225{
226 static_assert(s < V::length, "Access out of bounds");
227
228 using U = typename V::base_vector_type;
229 U one = a.vec(s / U::length);
230
231 one = i_splat<s % U::length>(one);
232
233 V r;
234 for (unsigned i = 0; i < V::vec_length; ++i) {
235 r.vec(i) = one;
236 }
237 return r;
238}
239
240} // namespace insn
241} // namespace detail
242} // namespace SIMDPP_ARCH_NAMESPACE
243} // namespace simdpp
244
245#endif
246
247