1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_SPLAT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_SPLAT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/load_u.h>
17#include <simdpp/detail/insn/set_splat.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21namespace detail {
22namespace insn {
23
24static SIMDPP_INL
25void i_load_splat(uint8x16& v, const char* p0)
26{
27 const uint8_t* v0 = reinterpret_cast<const uint8_t*>(p0);
28#if SIMDPP_USE_NULL
29 v = detail::null::make_vec<uint8x16>(*v0);
30#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA
31 i_set_splat(v, *v0);
32#elif SIMDPP_USE_NEON
33 v = vld1q_dup_u8(v0);
34#elif SIMDPP_USE_ALTIVEC
35 v = altivec::load1_u(v, v0);
36 v = splat<0>(v);
37#endif
38}
39
40#if SIMDPP_USE_AVX2
41static SIMDPP_INL
42void i_load_splat(uint8x32& v, const char* p0)
43{
44 i_set_splat(v, *reinterpret_cast<const uint8_t*>(p0));
45}
46#endif
47
48#if SIMDPP_USE_AVX512BW
49SIMDPP_INL void i_load_splat(uint8<64>& v, const char* p0)
50{
51 i_set_splat(v, *reinterpret_cast<const uint8_t*>(p0));
52}
53#endif
54
55// -----------------------------------------------------------------------------
56
57static SIMDPP_INL
58void i_load_splat(uint16x8& v, const char* p0)
59{
60 const uint16_t* v0 = reinterpret_cast<const uint16_t*>(p0);
61#if SIMDPP_USE_NULL
62 v = detail::null::make_vec<uint16x8>(*v0);
63#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA
64 i_set_splat(v, *v0);
65#elif SIMDPP_USE_NEON
66 v = vld1q_dup_u16(v0);
67#elif SIMDPP_USE_ALTIVEC
68 v = altivec::load1_u(v, v0);
69 v = splat<0>(v);
70#endif
71}
72
73#if SIMDPP_USE_AVX2
74static SIMDPP_INL
75void i_load_splat(uint16x16& v, const char* p0)
76{
77 i_set_splat(v, *reinterpret_cast<const uint16_t*>(p0));
78}
79#endif
80
81#if SIMDPP_USE_AVX512BW
82SIMDPP_INL void i_load_splat(uint16<32>& v, const char* p0)
83{
84 i_set_splat(v, *reinterpret_cast<const uint16_t*>(p0));
85}
86#endif
87
88// -----------------------------------------------------------------------------
89
90static SIMDPP_INL
91void i_load_splat(uint32x4& v, const char* p0)
92{
93 const uint32_t* v0 = reinterpret_cast<const uint32_t*>(p0);
94#if SIMDPP_USE_NULL
95 v = detail::null::make_vec<uint32x4>(*v0);
96#elif SIMDPP_USE_SSE2
97 v = _mm_cvtsi32_si128(*v0);
98 v = permute4<0,0,0,0>(v);
99#elif SIMDPP_USE_NEON
100 v = vld1q_dup_u32(v0);
101#elif SIMDPP_USE_ALTIVEC
102 v = altivec::load1_u(v, v0);
103 v = splat<0>(v);
104#elif SIMDPP_USE_MSA
105 i_set_splat(v, *v0);
106#endif
107}
108
109#if SIMDPP_USE_AVX2
110static SIMDPP_INL
111void i_load_splat(uint32x8& v, const char* p0)
112{
113 i_set_splat(v, *reinterpret_cast<const uint32_t*>(p0));
114}
115#endif
116
117#if SIMDPP_USE_AVX512F
118static SIMDPP_INL
119void i_load_splat(uint32<16>& v, const char* p0)
120{
121 __m128 x = _mm_load_ss(reinterpret_cast<const float*>(p0));
122 v = _mm512_broadcastd_epi32(_mm_castps_si128(x));
123}
124#endif
125
126// -----------------------------------------------------------------------------
127
128static SIMDPP_INL
129void i_load_splat(uint64x2& v, const char* p0)
130{
131 const uint64_t* v0 = reinterpret_cast<const uint64_t*>(p0);
132#if SIMDPP_USE_SSE2
133 v = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v0));
134 v = permute2<0,0>(v);
135#elif SIMDPP_USE_NEON
136#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 7)
137 // BUG: GCC 4.7 loads only the first element
138 uint64x1_t x = vld1_dup_u64(v0);
139 v = vdupq_lane_u64(x, 0);
140#else
141 v = vld1q_dup_u64(v0);
142#endif
143#elif SIMDPP_USE_VSX_207
144 v = load_u(v0);
145 v = splat<0>(v);
146#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
147 v = detail::null::make_vec<uint64x2>(*v0);
148#elif SIMDPP_USE_MSA
149 i_set_splat(v, *v0);
150#endif
151}
152
153#if SIMDPP_USE_AVX2
154static SIMDPP_INL
155void i_load_splat(uint64x4& v, const char* p0)
156{
157 __m128i x = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p0));
158 v = _mm256_broadcastq_epi64(x);
159}
160#endif
161
162#if SIMDPP_USE_AVX512F
163static SIMDPP_INL
164void i_load_splat(uint64<8>& v, const char* p0)
165{
166 __m128i x = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(p0));
167 v = _mm512_broadcastq_epi64(x);
168}
169#endif
170
171// -----------------------------------------------------------------------------
172
173static SIMDPP_INL
174void i_load_splat(float32x4& v, const char* p0)
175{
176 const float* v0 = reinterpret_cast<const float*>(p0);
177#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
178 v = detail::null::make_vec<float32x4>(*v0);
179#elif SIMDPP_USE_AVX
180 v = _mm_broadcast_ss(v0);
181#elif SIMDPP_USE_SSE2
182 v = _mm_load_ss(v0);
183 v = permute4<0,0,0,0>(v);
184#elif SIMDPP_USE_NEON
185 v = vld1q_dup_f32(v0);
186#elif SIMDPP_USE_ALTIVEC
187 v = altivec::load1_u(v, v0);
188 v = splat<0>(v);
189#elif SIMDPP_USE_MSA
190 i_set_splat(v, *v0);
191#endif
192}
193
194#if SIMDPP_USE_AVX
195static SIMDPP_INL
196void i_load_splat(float32x8& v, const char* p0)
197{
198 v = _mm256_broadcast_ss(reinterpret_cast<const float*>(p0));
199}
200#endif
201
202#if SIMDPP_USE_AVX512F
203static SIMDPP_INL
204void i_load_splat(float32<16>& v, const char* p0)
205{
206 __m128 x = _mm_load_ss(reinterpret_cast<const float*>(p0));
207 v = _mm512_broadcastss_ps(x);
208}
209#endif
210
211// -----------------------------------------------------------------------------
212
213static SIMDPP_INL
214void i_load_splat(float64x2& v, const char* p0)
215{
216 const double* v0 = reinterpret_cast<const double*>(p0);
217
218#if SIMDPP_USE_SSE3
219 v = _mm_loaddup_pd(v0);
220#elif SIMDPP_USE_SSE2
221 v = _mm_load_sd(v0);
222 v = permute2<0,0>(v);
223#elif SIMDPP_USE_NEON64
224 v = vld1q_dup_f64(v0);
225#elif SIMDPP_USE_VSX_206
226 v = load_u(v0);
227 v = splat<0>(v);
228#elif SIMDPP_USE_MSA
229 i_set_splat(v, *v0);
230#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
231 v = detail::null::make_vec<float64x2>(*v0);
232#endif
233}
234
235#if SIMDPP_USE_AVX
236static SIMDPP_INL
237void i_load_splat(float64x4& v, const char* p0)
238{
239 v = _mm256_broadcast_sd(reinterpret_cast<const double*>(p0));
240}
241#endif
242
243#if SIMDPP_USE_AVX512F
244static SIMDPP_INL
245void i_load_splat(float64<8>& v, const char* p0)
246{
247 __m128d x = _mm_load_sd(reinterpret_cast<const double*>(p0));
248 v = _mm512_broadcastsd_pd(x);
249}
250#endif
251
252// -----------------------------------------------------------------------------
253
254template<class V> SIMDPP_INL
255void i_load_splat(V& v, const char* p0)
256{
257 typename V::base_vector_type tv;
258 i_load_splat(tv, p0);
259 for (unsigned i = 0; i < v.vec_length; ++i) {
260 v.vec(i) = tv;
261 }
262}
263
264// -----------------------------------------------------------------------------
265
266template<class V> SIMDPP_INL
267V i_load_splat_any(const char* p)
268{
269 typename detail::remove_sign<V>::type r;
270 i_load_splat(r, p);
271 return V(r);
272}
273
274// -----------------------------------------------------------------------------
275} // namespace insn
276
277template<class V> SIMDPP_INL
278void construct_eval(V& v, const expr_vec_load_splat& e)
279{
280 v = insn::i_load_splat_any<V>(e.a);
281}
282
283} // namespace detail
284} // namespace SIMDPP_ARCH_NAMESPACE
285} // namespace simdpp
286
287#endif
288
289