1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/types/traits.h> |
17 | #include <simdpp/detail/align.h> |
18 | #include <simdpp/detail/insn/mem_unpack.h> |
19 | #include <simdpp/core/transpose.h> |
20 | #include <simdpp/detail/null/memory.h> |
21 | |
22 | namespace simdpp { |
23 | namespace SIMDPP_ARCH_NAMESPACE { |
24 | namespace detail { |
25 | namespace insn { |
26 | |
27 | static SIMDPP_INL |
28 | void i_load(uint8x16& a, const char* p) |
29 | { |
30 | p = detail::assume_aligned(p, 16); |
31 | #if SIMDPP_USE_NULL |
32 | detail::null::load(a, p); |
33 | #elif SIMDPP_USE_SSE2 |
34 | a = _mm_load_si128(reinterpret_cast<const __m128i*>(p)); |
35 | #elif SIMDPP_USE_NEON |
36 | a = vreinterpretq_u8_u64(vld1q_u64(reinterpret_cast<const uint64_t*>(p))); |
37 | #elif SIMDPP_USE_ALTIVEC |
38 | a = vec_ld(0, reinterpret_cast<const uint8_t*>(p)); |
39 | #elif SIMDPP_USE_MSA |
40 | a = (v16u8) __msa_ld_b(p, 0); |
41 | #endif |
42 | } |
43 | |
44 | static SIMDPP_INL |
45 | void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r; } |
46 | static SIMDPP_INL |
47 | void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r; } |
48 | |
49 | static SIMDPP_INL |
50 | void i_load(uint64x2& a, const char* p) |
51 | { |
52 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
53 | p = detail::assume_aligned(p, 16); |
54 | detail::null::load(a, p); |
55 | #else |
56 | uint8x16 r; i_load(r, p); a = r; |
57 | #endif |
58 | } |
59 | |
60 | static SIMDPP_INL |
61 | void i_load(float32x4& a, const char* p) |
62 | { |
63 | p = detail::assume_aligned(p, 16); |
64 | const float* q = reinterpret_cast<const float*>(p); |
65 | (void) q; |
66 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
67 | detail::null::load(a, p); |
68 | #elif SIMDPP_USE_SSE2 |
69 | a = _mm_load_ps(q); |
70 | #elif SIMDPP_USE_NEON |
71 | a = vld1q_f32(q); |
72 | #elif SIMDPP_USE_ALTIVEC |
73 | a = vec_ld(0, q); |
74 | #elif SIMDPP_USE_MSA |
75 | a = (v4f32) __msa_ld_w(q, 0); |
76 | #endif |
77 | } |
78 | |
79 | static SIMDPP_INL |
80 | void i_load(float64x2& a, const char* p) |
81 | { |
82 | p = detail::assume_aligned(p, 16); |
83 | const double* q = reinterpret_cast<const double*>(p); |
84 | (void) q; |
85 | #if SIMDPP_USE_SSE2 |
86 | a = _mm_load_pd(q); |
87 | #elif SIMDPP_USE_NEON64 |
88 | a = vld1q_f64(q); |
89 | #elif SIMDPP_USE_VSX_206 |
90 | a = vec_ld(0, reinterpret_cast<const __vector double*>(q)); |
91 | #elif SIMDPP_USE_MSA |
92 | a = (v2f64) __msa_ld_d(q, 0); |
93 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC || SIMDPP_USE_NEON32 |
94 | detail::null::load(a, p); |
95 | #endif |
96 | } |
97 | |
98 | #if SIMDPP_USE_AVX2 |
99 | static SIMDPP_INL |
100 | void i_load(uint8x32& a, const char* p) |
101 | { |
102 | a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p)); |
103 | } |
104 | static SIMDPP_INL |
105 | void i_load(uint16x16& a, const char* p) |
106 | { |
107 | a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p)); |
108 | } |
109 | static SIMDPP_INL |
110 | void i_load(uint32x8& a, const char* p) |
111 | { |
112 | a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p)); |
113 | } |
114 | static SIMDPP_INL |
115 | void i_load(uint64x4& a, const char* p) |
116 | { |
117 | a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p)); |
118 | } |
119 | #endif |
120 | #if SIMDPP_USE_AVX |
121 | static SIMDPP_INL |
122 | void i_load(float32x8& a, const char* p) |
123 | { |
124 | a = _mm256_load_ps(reinterpret_cast<const float*>(p)); |
125 | } |
126 | static SIMDPP_INL |
127 | void i_load(float64x4& a, const char* p) |
128 | { |
129 | a = _mm256_load_pd(reinterpret_cast<const double*>(p)); |
130 | } |
131 | #endif |
132 | |
133 | #if SIMDPP_USE_AVX512BW |
134 | SIMDPP_INL void i_load(uint8<64>& a, const char* p) |
135 | { |
136 | a = _mm512_load_epi32(p); |
137 | } |
138 | SIMDPP_INL void i_load(uint16<32>& a, const char* p) |
139 | { |
140 | a = _mm512_load_epi32(p); |
141 | } |
142 | #endif |
143 | |
144 | #if SIMDPP_USE_AVX512F |
145 | static SIMDPP_INL |
146 | void i_load(uint32<16>& a, const char* p) |
147 | { |
148 | a = _mm512_load_epi32(p); |
149 | } |
150 | static SIMDPP_INL |
151 | void i_load(uint64<8>& a, const char* p) |
152 | { |
153 | a = _mm512_load_epi64(p); |
154 | } |
155 | static SIMDPP_INL |
156 | void i_load(float32<16>& a, const char* p) |
157 | { |
158 | a = _mm512_load_ps(reinterpret_cast<const float*>(p)); |
159 | } |
160 | static SIMDPP_INL |
161 | void i_load(float64<8>& a, const char* p) |
162 | { |
163 | a = _mm512_load_pd(reinterpret_cast<const double*>(p)); |
164 | } |
165 | #endif |
166 | |
167 | template<class V> SIMDPP_INL |
168 | void i_load(V& a, const char* p) |
169 | { |
170 | const unsigned veclen = V::base_vector_type::length_bytes; |
171 | |
172 | for (unsigned i = 0; i < V::vec_length; ++i) { |
173 | i_load(a.vec(i), p); |
174 | p += veclen; |
175 | } |
176 | } |
177 | |
178 | template<class V> SIMDPP_INL |
179 | V i_load_any(const char* p) |
180 | { |
181 | typename detail::remove_sign<V>::type r; |
182 | i_load(r, p); |
183 | return V(r); |
184 | } |
185 | |
186 | } // namespace insn |
187 | |
188 | template<class V> SIMDPP_INL |
189 | void construct_eval(V& v, const expr_vec_load& e) |
190 | { |
191 | v = insn::i_load_any<V>(e.a); |
192 | } |
193 | |
194 | } // namespace detail |
195 | } // namespace SIMDPP_ARCH_NAMESPACE |
196 | } // namespace simdpp |
197 | |
198 | #endif |
199 | |
200 | |