load.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/load.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/types/traits.h>
17	#include <simdpp/detail/align.h>
18	#include <simdpp/detail/insn/mem_unpack.h>
19	#include <simdpp/core/transpose.h>
20	#include <simdpp/detail/null/memory.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	static SIMDPP_INL
28	void i_load(uint8x16& a, const char* p)
29	{
30	p = detail::assume_aligned(p, `16`);
31	#if SIMDPP_USE_NULL
32	detail::null::load(a, p);
33	#elif SIMDPP_USE_SSE2
34	a = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
35	#elif SIMDPP_USE_NEON
36	a = vreinterpretq_u8_u64(vld1q_u64(reinterpret_cast<const uint64_t*>(p)));
37	#elif SIMDPP_USE_ALTIVEC
38	a = vec_ld(`0`, reinterpret_cast<const uint8_t*>(p));
39	#elif SIMDPP_USE_MSA
40	a = (v16u8) __msa_ld_b(p, `0`);
41	#endif
42	}
43
44	static SIMDPP_INL
45	void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
46	static SIMDPP_INL
47	void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
48
49	static SIMDPP_INL
50	void i_load(uint64x2& a, const char* p)
51	{
52	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
53	p = detail::assume_aligned(p, `16`);
54	detail::null::load(a, p);
55	#else
56	uint8x16 r; i_load(r, p); a = r;
57	#endif
58	}
59
60	static SIMDPP_INL
61	void i_load(float32x4& a, const char* p)
62	{
63	p = detail::assume_aligned(p, `16`);
64	const float* q = reinterpret_cast<const float*>(p);
65	(void) q;
66	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
67	detail::null::load(a, p);
68	#elif SIMDPP_USE_SSE2
69	a = _mm_load_ps(q);
70	#elif SIMDPP_USE_NEON
71	a = vld1q_f32(q);
72	#elif SIMDPP_USE_ALTIVEC
73	a = vec_ld(`0`, q);
74	#elif SIMDPP_USE_MSA
75	a = (v4f32) __msa_ld_w(q, `0`);
76	#endif
77	}
78
79	static SIMDPP_INL
80	void i_load(float64x2& a, const char* p)
81	{
82	p = detail::assume_aligned(p, `16`);
83	const double* q = reinterpret_cast<const double*>(p);
84	(void) q;
85	#if SIMDPP_USE_SSE2
86	a = _mm_load_pd(q);
87	#elif SIMDPP_USE_NEON64
88	a = vld1q_f64(q);
89	#elif SIMDPP_USE_VSX_206
90	a = vec_ld(`0`, reinterpret_cast<const __vector double*>(q));
91	#elif SIMDPP_USE_MSA
92	a = (v2f64) __msa_ld_d(q, `0`);
93	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_NEON32
94	detail::null::load(a, p);
95	#endif
96	}
97
98	#if SIMDPP_USE_AVX2
99	static SIMDPP_INL
100	void i_load(uint8x32& a, const char* p)
101	{
102	a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
103	}
104	static SIMDPP_INL
105	void i_load(uint16x16& a, const char* p)
106	{
107	a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
108	}
109	static SIMDPP_INL
110	void i_load(uint32x8& a, const char* p)
111	{
112	a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
113	}
114	static SIMDPP_INL
115	void i_load(uint64x4& a, const char* p)
116	{
117	a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
118	}
119	#endif
120	#if SIMDPP_USE_AVX
121	static SIMDPP_INL
122	void i_load(float32x8& a, const char* p)
123	{
124	a = _mm256_load_ps(reinterpret_cast<const float*>(p));
125	}
126	static SIMDPP_INL
127	void i_load(float64x4& a, const char* p)
128	{
129	a = _mm256_load_pd(reinterpret_cast<const double*>(p));
130	}
131	#endif
132
133	#if SIMDPP_USE_AVX512BW
134	SIMDPP_INL void i_load(uint8<`64`>& a, const char* p)
135	{
136	a = _mm512_load_epi32(p);
137	}
138	SIMDPP_INL void i_load(uint16<`32`>& a, const char* p)
139	{
140	a = _mm512_load_epi32(p);
141	}
142	#endif
143
144	#if SIMDPP_USE_AVX512F
145	static SIMDPP_INL
146	void i_load(uint32<`16`>& a, const char* p)
147	{
148	a = _mm512_load_epi32(p);
149	}
150	static SIMDPP_INL
151	void i_load(uint64<`8`>& a, const char* p)
152	{
153	a = _mm512_load_epi64(p);
154	}
155	static SIMDPP_INL
156	void i_load(float32<`16`>& a, const char* p)
157	{
158	a = _mm512_load_ps(reinterpret_cast<const float*>(p));
159	}
160	static SIMDPP_INL
161	void i_load(float64<`8`>& a, const char* p)
162	{
163	a = _mm512_load_pd(reinterpret_cast<const double*>(p));
164	}
165	#endif
166
167	template<class V> SIMDPP_INL
168	void i_load(V& a, const char* p)
169	{
170	const unsigned veclen = V::base_vector_type::length_bytes;
171
172	for (unsigned i = `0`; i < V::vec_length; ++i) {
173	i_load(a.vec(i), p);
174	p += veclen;
175	}
176	}
177
178	template<class V> SIMDPP_INL
179	V i_load_any(const char* p)
180	{
181	typename detail::remove_sign<V>::type r;
182	i_load(r, p);
183	return V(r);
184	}
185
186	} // namespace insn
187
188	template<class V> SIMDPP_INL
189	void construct_eval(V& v, const expr_vec_load& e)
190	{
191	v = insn::i_load_any<V>(e.a);
192	}
193
194	} // namespace detail
195	} // namespace SIMDPP_ARCH_NAMESPACE
196	} // namespace simdpp
197
198	#endif
199
200

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/load.h