1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/types/traits.h>
17#include <simdpp/detail/align.h>
18#include <simdpp/detail/insn/mem_unpack.h>
19#include <simdpp/core/transpose.h>
20#include <simdpp/detail/null/memory.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27static SIMDPP_INL
28void i_load(uint8x16& a, const char* p)
29{
30 p = detail::assume_aligned(p, 16);
31#if SIMDPP_USE_NULL
32 detail::null::load(a, p);
33#elif SIMDPP_USE_SSE2
34 a = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
35#elif SIMDPP_USE_NEON
36 a = vreinterpretq_u8_u64(vld1q_u64(reinterpret_cast<const uint64_t*>(p)));
37#elif SIMDPP_USE_ALTIVEC
38 a = vec_ld(0, reinterpret_cast<const uint8_t*>(p));
39#elif SIMDPP_USE_MSA
40 a = (v16u8) __msa_ld_b(p, 0);
41#endif
42}
43
44static SIMDPP_INL
45void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
46static SIMDPP_INL
47void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
48
49static SIMDPP_INL
50void i_load(uint64x2& a, const char* p)
51{
52#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
53 p = detail::assume_aligned(p, 16);
54 detail::null::load(a, p);
55#else
56 uint8x16 r; i_load(r, p); a = r;
57#endif
58}
59
60static SIMDPP_INL
61void i_load(float32x4& a, const char* p)
62{
63 p = detail::assume_aligned(p, 16);
64 const float* q = reinterpret_cast<const float*>(p);
65 (void) q;
66#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
67 detail::null::load(a, p);
68#elif SIMDPP_USE_SSE2
69 a = _mm_load_ps(q);
70#elif SIMDPP_USE_NEON
71 a = vld1q_f32(q);
72#elif SIMDPP_USE_ALTIVEC
73 a = vec_ld(0, q);
74#elif SIMDPP_USE_MSA
75 a = (v4f32) __msa_ld_w(q, 0);
76#endif
77}
78
79static SIMDPP_INL
80void i_load(float64x2& a, const char* p)
81{
82 p = detail::assume_aligned(p, 16);
83 const double* q = reinterpret_cast<const double*>(p);
84 (void) q;
85#if SIMDPP_USE_SSE2
86 a = _mm_load_pd(q);
87#elif SIMDPP_USE_NEON64
88 a = vld1q_f64(q);
89#elif SIMDPP_USE_VSX_206
90 a = vec_ld(0, reinterpret_cast<const __vector double*>(q));
91#elif SIMDPP_USE_MSA
92 a = (v2f64) __msa_ld_d(q, 0);
93#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC || SIMDPP_USE_NEON32
94 detail::null::load(a, p);
95#endif
96}
97
98#if SIMDPP_USE_AVX2
99static SIMDPP_INL
100void i_load(uint8x32& a, const char* p)
101{
102 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
103}
104static SIMDPP_INL
105void i_load(uint16x16& a, const char* p)
106{
107 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
108}
109static SIMDPP_INL
110void i_load(uint32x8& a, const char* p)
111{
112 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
113}
114static SIMDPP_INL
115void i_load(uint64x4& a, const char* p)
116{
117 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
118}
119#endif
120#if SIMDPP_USE_AVX
121static SIMDPP_INL
122void i_load(float32x8& a, const char* p)
123{
124 a = _mm256_load_ps(reinterpret_cast<const float*>(p));
125}
126static SIMDPP_INL
127void i_load(float64x4& a, const char* p)
128{
129 a = _mm256_load_pd(reinterpret_cast<const double*>(p));
130}
131#endif
132
133#if SIMDPP_USE_AVX512BW
134SIMDPP_INL void i_load(uint8<64>& a, const char* p)
135{
136 a = _mm512_load_epi32(p);
137}
138SIMDPP_INL void i_load(uint16<32>& a, const char* p)
139{
140 a = _mm512_load_epi32(p);
141}
142#endif
143
144#if SIMDPP_USE_AVX512F
145static SIMDPP_INL
146void i_load(uint32<16>& a, const char* p)
147{
148 a = _mm512_load_epi32(p);
149}
150static SIMDPP_INL
151void i_load(uint64<8>& a, const char* p)
152{
153 a = _mm512_load_epi64(p);
154}
155static SIMDPP_INL
156void i_load(float32<16>& a, const char* p)
157{
158 a = _mm512_load_ps(reinterpret_cast<const float*>(p));
159}
160static SIMDPP_INL
161void i_load(float64<8>& a, const char* p)
162{
163 a = _mm512_load_pd(reinterpret_cast<const double*>(p));
164}
165#endif
166
167template<class V> SIMDPP_INL
168void i_load(V& a, const char* p)
169{
170 const unsigned veclen = V::base_vector_type::length_bytes;
171
172 for (unsigned i = 0; i < V::vec_length; ++i) {
173 i_load(a.vec(i), p);
174 p += veclen;
175 }
176}
177
178template<class V> SIMDPP_INL
179V i_load_any(const char* p)
180{
181 typename detail::remove_sign<V>::type r;
182 i_load(r, p);
183 return V(r);
184}
185
186} // namespace insn
187
188template<class V> SIMDPP_INL
189void construct_eval(V& v, const expr_vec_load& e)
190{
191 v = insn::i_load_any<V>(e.a);
192}
193
194} // namespace detail
195} // namespace SIMDPP_ARCH_NAMESPACE
196} // namespace simdpp
197
198#endif
199
200