load_packed2.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/load_packed2.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_PACKED2_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_PACKED2_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/align.h>
17	#include <simdpp/detail/insn/mem_unpack.h>
18	#include <simdpp/core/load.h>
19	#include <simdpp/detail/null/memory.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26
27	// collect some boilerplate
28	template<class V> SIMDPP_INL
29	void v128_load_packed2(V& a, V& b, const char* p);
30	template<class V> SIMDPP_INL
31	void v256_load_packed2(V& a, V& b, const char* p);
32	template<class V> SIMDPP_INL
33	void v512_load_packed2(V& a, V& b, const char* p);
34
35	// -----------------------------------------------------------------------------
36
37	static SIMDPP_INL
38	void i_load_packed2(uint8x16& a, uint8x16& b, const char* p)
39	{
40	p = detail::assume_aligned(p, `16`);
41	#if SIMDPP_USE_NULL
42	detail::null::load_packed2(a, b, p);
43	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
44	v128_load_packed2(a, b, p);
45	#elif SIMDPP_USE_NEON
46	auto r = vld2q_u8(reinterpret_cast<const uint8_t*>(p));
47	a = r.val[`0`];
48	b = r.val[`1`];
49	#endif
50	}
51
52	#if SIMDPP_USE_AVX2
53	static SIMDPP_INL
54	void i_load_packed2(uint8x32& a, uint8x32& b, const char* p)
55	{
56	v256_load_packed2(a, b, p);
57	}
58	#endif
59
60	#if SIMDPP_USE_AVX512BW
61	SIMDPP_INL void i_load_packed2(uint8<`64`>& a, uint8<`64`>& b, const char* p)
62	{
63	v512_load_packed2(a, b, p);
64	}
65	#endif
66
67	// -----------------------------------------------------------------------------
68
69	static SIMDPP_INL
70	void i_load_packed2(uint16x8& a, uint16x8& b, const char* p)
71	{
72	p = detail::assume_aligned(p, `16`);
73	#if SIMDPP_USE_NULL
74	detail::null::load_packed2(a, b, p);
75	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
76	v128_load_packed2(a, b, p);
77	#elif SIMDPP_USE_NEON
78	auto r = vld2q_u16(reinterpret_cast<const uint16_t*>(p));
79	a = r.val[`0`];
80	b = r.val[`1`];
81	#endif
82	}
83
84	#if SIMDPP_USE_AVX2
85	static SIMDPP_INL
86	void i_load_packed2(uint16x16& a, uint16x16& b, const char* p)
87	{
88	v256_load_packed2(a, b, p);
89	}
90	#endif
91
92	#if SIMDPP_USE_AVX512BW
93	SIMDPP_INL void i_load_packed2(uint16<`32`>& a, uint16<`32`>& b, const char* p)
94	{
95	v512_load_packed2(a, b, p);
96	}
97	#endif
98
99	// -----------------------------------------------------------------------------
100
101	static SIMDPP_INL
102	void i_load_packed2(uint32x4& a, uint32x4& b, const char* p)
103	{
104	p = detail::assume_aligned(p, `16`);
105	#if SIMDPP_USE_NULL
106	detail::null::load_packed2(a, b, p);
107	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
108	v128_load_packed2(a, b, p);
109	#elif SIMDPP_USE_NEON
110	auto r = vld2q_u32(reinterpret_cast<const uint32_t*>(p));
111	a = r.val[`0`];
112	b = r.val[`1`];
113	#endif
114	}
115
116	#if SIMDPP_USE_AVX2
117	static SIMDPP_INL
118	void i_load_packed2(uint32x8& a, uint32x8& b, const char* p)
119	{
120	v256_load_packed2(a, b, p);
121	}
122	#endif
123
124	#if SIMDPP_USE_AVX512F
125	static SIMDPP_INL
126	void i_load_packed2(uint32<`16`>& a, uint32<`16`>& b, const char* p)
127	{
128	v512_load_packed2(a, b, p);
129	}
130	#endif
131
132	// -----------------------------------------------------------------------------
133
134	static SIMDPP_INL
135	void i_load_packed2(uint64x2& a, uint64x2& b, const char* p)
136	{
137	p = detail::assume_aligned(p, `16`);
138	#if SIMDPP_USE_NEON64
139	auto r = vld2q_u64(reinterpret_cast<const uint64_t*>(p));
140	a = r.val[`0`];
141	b = r.val[`1`];
142	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
143	v128_load_packed2(a, b, p);
144	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
145	detail::null::load_packed2(a, b, p);
146	#endif
147	}
148
149	#if SIMDPP_USE_AVX2
150	static SIMDPP_INL
151	void i_load_packed2(uint64x4& a, uint64x4& b, const char* p)
152	{
153	v256_load_packed2(a, b, p);
154	}
155	#endif
156
157	#if SIMDPP_USE_AVX512F
158	static SIMDPP_INL
159	void i_load_packed2(uint64<`8`>& a, uint64<`8`>& b, const char* p)
160	{
161	v512_load_packed2(a, b, p);
162	}
163	#endif
164
165	// -----------------------------------------------------------------------------
166
167	static SIMDPP_INL
168	void i_load_packed2(float32x4& a, float32x4& b, const char* p)
169	{
170	p = detail::assume_aligned(p, `16`);
171	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
172	detail::null::load_packed2(a, b, p);
173	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
174	v128_load_packed2(a, b, p);
175	#elif SIMDPP_USE_NEON
176	auto r = vld2q_f32(reinterpret_cast<const float*>(p));
177	a = r.val[`0`];
178	b = r.val[`1`];
179	#endif
180	}
181
182	#if SIMDPP_USE_AVX
183	static SIMDPP_INL
184	void i_load_packed2(float32x8& a, float32x8& b, const char* p)
185	{
186	v256_load_packed2(a, b, p);
187	}
188	#endif
189
190	#if SIMDPP_USE_AVX512F
191	static SIMDPP_INL
192	void i_load_packed2(float32<`16`>& a, float32<`16`>& b, const char* p)
193	{
194	v512_load_packed2(a, b, p);
195	}
196	#endif
197
198	// -----------------------------------------------------------------------------
199
200	static SIMDPP_INL
201	void i_load_packed2(float64x2& a, float64x2& b, const char* p)
202	{
203	p = detail::assume_aligned(p, `16`);
204	#if SIMDPP_USE_NEON64
205	auto r = vld2q_f64(reinterpret_cast<const double*>(p));
206	a = r.val[`0`];
207	b = r.val[`1`];
208	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
209	v128_load_packed2(a, b, p);
210	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
211	detail::null::load_packed2(a, b, p);
212	#endif
213	}
214
215	#if SIMDPP_USE_AVX
216	static SIMDPP_INL
217	void i_load_packed2(float64x4& a, float64x4& b, const char* p)
218	{
219	v256_load_packed2(a, b, p);
220	}
221	#endif
222
223	#if SIMDPP_USE_AVX512F
224	static SIMDPP_INL
225	void i_load_packed2(float64<`8`>& a, float64<`8`>& b, const char* p)
226	{
227	v512_load_packed2(a, b, p);
228	}
229	#endif
230
231	// -----------------------------------------------------------------------------
232
233	template<class V> SIMDPP_INL
234	void v128_load_packed2(V& a, V& b, const char* p)
235	{
236	p = detail::assume_aligned(p, `16`);
237	a = load(p);
238	b = load(p + `16`);
239	mem_unpack2(a, b);
240	}
241
242	template<class V> SIMDPP_INL
243	void v256_load_packed2(V& a, V& b, const char* p)
244	{
245	p = detail::assume_aligned(p, `32`);
246	a = load(p);
247	b = load(p + `32`);
248	mem_unpack2(a, b);
249	}
250
251	template<class V> SIMDPP_INL
252	void v512_load_packed2(V& a, V& b, const char* p)
253	{
254	p = detail::assume_aligned(p, `64`);
255	a = load(p);
256	b = load(p + `64`);
257	mem_unpack2(a, b);
258	}
259
260	template<class V> SIMDPP_INL
261	void i_load_packed2(V& a, V& b, const char* p)
262	{
263	const unsigned veclen = V::base_vector_type::length_bytes;
264
265	p = detail::assume_aligned(p, veclen);
266	for (unsigned i = `0`; i < V::vec_length; ++i) {
267	i_load_packed2(a.vec(i), b.vec(i), p);
268	p += veclen*`2`;
269	}
270	}
271
272
273	} // namespace insn
274	} // namespace detail
275	} // namespace SIMDPP_ARCH_NAMESPACE
276	} // namespace simdpp
277
278	#endif
279
280

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/load_packed2.h