1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_PACKED2_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_PACKED2_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/align.h>
17#include <simdpp/detail/insn/mem_unpack.h>
18#include <simdpp/core/load.h>
19#include <simdpp/detail/null/memory.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace insn {
25
26
27// collect some boilerplate
28template<class V> SIMDPP_INL
29void v128_load_packed2(V& a, V& b, const char* p);
30template<class V> SIMDPP_INL
31void v256_load_packed2(V& a, V& b, const char* p);
32template<class V> SIMDPP_INL
33void v512_load_packed2(V& a, V& b, const char* p);
34
35// -----------------------------------------------------------------------------
36
37static SIMDPP_INL
38void i_load_packed2(uint8x16& a, uint8x16& b, const char* p)
39{
40 p = detail::assume_aligned(p, 16);
41#if SIMDPP_USE_NULL
42 detail::null::load_packed2(a, b, p);
43#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
44 v128_load_packed2(a, b, p);
45#elif SIMDPP_USE_NEON
46 auto r = vld2q_u8(reinterpret_cast<const uint8_t*>(p));
47 a = r.val[0];
48 b = r.val[1];
49#endif
50}
51
52#if SIMDPP_USE_AVX2
53static SIMDPP_INL
54void i_load_packed2(uint8x32& a, uint8x32& b, const char* p)
55{
56 v256_load_packed2(a, b, p);
57}
58#endif
59
60#if SIMDPP_USE_AVX512BW
61SIMDPP_INL void i_load_packed2(uint8<64>& a, uint8<64>& b, const char* p)
62{
63 v512_load_packed2(a, b, p);
64}
65#endif
66
67// -----------------------------------------------------------------------------
68
69static SIMDPP_INL
70void i_load_packed2(uint16x8& a, uint16x8& b, const char* p)
71{
72 p = detail::assume_aligned(p, 16);
73#if SIMDPP_USE_NULL
74 detail::null::load_packed2(a, b, p);
75#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
76 v128_load_packed2(a, b, p);
77#elif SIMDPP_USE_NEON
78 auto r = vld2q_u16(reinterpret_cast<const uint16_t*>(p));
79 a = r.val[0];
80 b = r.val[1];
81#endif
82}
83
84#if SIMDPP_USE_AVX2
85static SIMDPP_INL
86void i_load_packed2(uint16x16& a, uint16x16& b, const char* p)
87{
88 v256_load_packed2(a, b, p);
89}
90#endif
91
92#if SIMDPP_USE_AVX512BW
93SIMDPP_INL void i_load_packed2(uint16<32>& a, uint16<32>& b, const char* p)
94{
95 v512_load_packed2(a, b, p);
96}
97#endif
98
99// -----------------------------------------------------------------------------
100
101static SIMDPP_INL
102void i_load_packed2(uint32x4& a, uint32x4& b, const char* p)
103{
104 p = detail::assume_aligned(p, 16);
105#if SIMDPP_USE_NULL
106 detail::null::load_packed2(a, b, p);
107#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
108 v128_load_packed2(a, b, p);
109#elif SIMDPP_USE_NEON
110 auto r = vld2q_u32(reinterpret_cast<const uint32_t*>(p));
111 a = r.val[0];
112 b = r.val[1];
113#endif
114}
115
116#if SIMDPP_USE_AVX2
117static SIMDPP_INL
118void i_load_packed2(uint32x8& a, uint32x8& b, const char* p)
119{
120 v256_load_packed2(a, b, p);
121}
122#endif
123
124#if SIMDPP_USE_AVX512F
125static SIMDPP_INL
126void i_load_packed2(uint32<16>& a, uint32<16>& b, const char* p)
127{
128 v512_load_packed2(a, b, p);
129}
130#endif
131
132// -----------------------------------------------------------------------------
133
134static SIMDPP_INL
135void i_load_packed2(uint64x2& a, uint64x2& b, const char* p)
136{
137 p = detail::assume_aligned(p, 16);
138#if SIMDPP_USE_NEON64
139 auto r = vld2q_u64(reinterpret_cast<const uint64_t*>(p));
140 a = r.val[0];
141 b = r.val[1];
142#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
143 v128_load_packed2(a, b, p);
144#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
145 detail::null::load_packed2(a, b, p);
146#endif
147}
148
149#if SIMDPP_USE_AVX2
150static SIMDPP_INL
151void i_load_packed2(uint64x4& a, uint64x4& b, const char* p)
152{
153 v256_load_packed2(a, b, p);
154}
155#endif
156
157#if SIMDPP_USE_AVX512F
158static SIMDPP_INL
159void i_load_packed2(uint64<8>& a, uint64<8>& b, const char* p)
160{
161 v512_load_packed2(a, b, p);
162}
163#endif
164
165// -----------------------------------------------------------------------------
166
167static SIMDPP_INL
168void i_load_packed2(float32x4& a, float32x4& b, const char* p)
169{
170 p = detail::assume_aligned(p, 16);
171#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
172 detail::null::load_packed2(a, b, p);
173#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
174 v128_load_packed2(a, b, p);
175#elif SIMDPP_USE_NEON
176 auto r = vld2q_f32(reinterpret_cast<const float*>(p));
177 a = r.val[0];
178 b = r.val[1];
179#endif
180}
181
182#if SIMDPP_USE_AVX
183static SIMDPP_INL
184void i_load_packed2(float32x8& a, float32x8& b, const char* p)
185{
186 v256_load_packed2(a, b, p);
187}
188#endif
189
190#if SIMDPP_USE_AVX512F
191static SIMDPP_INL
192void i_load_packed2(float32<16>& a, float32<16>& b, const char* p)
193{
194 v512_load_packed2(a, b, p);
195}
196#endif
197
198// -----------------------------------------------------------------------------
199
200static SIMDPP_INL
201void i_load_packed2(float64x2& a, float64x2& b, const char* p)
202{
203 p = detail::assume_aligned(p, 16);
204#if SIMDPP_USE_NEON64
205 auto r = vld2q_f64(reinterpret_cast<const double*>(p));
206 a = r.val[0];
207 b = r.val[1];
208#elif SIMDPP_USE_SSE2 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
209 v128_load_packed2(a, b, p);
210#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
211 detail::null::load_packed2(a, b, p);
212#endif
213}
214
215#if SIMDPP_USE_AVX
216static SIMDPP_INL
217void i_load_packed2(float64x4& a, float64x4& b, const char* p)
218{
219 v256_load_packed2(a, b, p);
220}
221#endif
222
223#if SIMDPP_USE_AVX512F
224static SIMDPP_INL
225void i_load_packed2(float64<8>& a, float64<8>& b, const char* p)
226{
227 v512_load_packed2(a, b, p);
228}
229#endif
230
231// -----------------------------------------------------------------------------
232
233template<class V> SIMDPP_INL
234void v128_load_packed2(V& a, V& b, const char* p)
235{
236 p = detail::assume_aligned(p, 16);
237 a = load(p);
238 b = load(p + 16);
239 mem_unpack2(a, b);
240}
241
242template<class V> SIMDPP_INL
243void v256_load_packed2(V& a, V& b, const char* p)
244{
245 p = detail::assume_aligned(p, 32);
246 a = load(p);
247 b = load(p + 32);
248 mem_unpack2(a, b);
249}
250
251template<class V> SIMDPP_INL
252void v512_load_packed2(V& a, V& b, const char* p)
253{
254 p = detail::assume_aligned(p, 64);
255 a = load(p);
256 b = load(p + 64);
257 mem_unpack2(a, b);
258}
259
260template<class V> SIMDPP_INL
261void i_load_packed2(V& a, V& b, const char* p)
262{
263 const unsigned veclen = V::base_vector_type::length_bytes;
264
265 p = detail::assume_aligned(p, veclen);
266 for (unsigned i = 0; i < V::vec_length; ++i) {
267 i_load_packed2(a.vec(i), b.vec(i), p);
268 p += veclen*2;
269 }
270}
271
272
273} // namespace insn
274} // namespace detail
275} // namespace SIMDPP_ARCH_NAMESPACE
276} // namespace simdpp
277
278#endif
279
280