store_u.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_u.h]

1	/ Copyright (C) 2015*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_U_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_U_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/null/memory.h>
17	#include <simdpp/detail/align.h>
18	#include <simdpp/core/cast.h>
19
20	namespace simdpp {
21	#ifndef SIMDPP_DOXYGEN
22	namespace SIMDPP_ARCH_NAMESPACE {
23	#endif
24	namespace detail {
25	namespace insn {
26
27	static SIMDPP_INL
28	void i_store_u(char* p, const uint8<`16`>& a)
29	{
30	#if SIMDPP_USE_NULL
31	detail::null::store(p, a);
32	#elif SIMDPP_USE_SSE2
33	_mm_storeu_si128(reinterpret_cast<__m128i*>(p), a.native());
34	#elif SIMDPP_USE_NEON
35	vst1q_u8(reinterpret_cast<uint8_t*>(p), a.native());
36	#elif SIMDPP_USE_VSX_206
37	uint8_t* q = reinterpret_cast<uint8_t*>(p);
38	vec_vsx_st(a.native(), `0`, q);
39	#elif SIMDPP_USE_ALTIVEC
40	// From https://web.archive.org/web/20110305043420/http://developer.apple.com/hardwaredrivers/ve/alignment.html
41	uint8_t* q = reinterpret_cast<uint8_t*>(p);
42	__vector uint8_t msq, lsq, edges;
43	__vector uint8_t edge_align, align;
44	msq = vec_ld(`0`, q); // most significant quadword
45	lsq = vec_ld(`15`, q); // least significant quadword
46	// The address offset is 15 to take into account storing into aligned
47	// addresses. If 16 were used, then when q is 16-byte aligned we would
48	// access the next second 16-byte block, which could be on different page
49	// and inaccessible.
50	#pragma GCC diagnostic push
51	#pragma GCC diagnostic ignored "-Wdeprecated"
52	edge_align = vec_lvsl(`0`, q); // permute map to extract edges
53	edges = vec_perm(lsq, msq, edge_align); // extract the edges
54	align = vec_lvsr(`0`, q); // permute map to misalign data
55	#pragma GCC diagnostic pop
56	msq = vec_perm(edges, a.native(), align); // misalign the data (msq)
57	lsq = vec_perm(a.native(), edges, align); // misalign the data (lsq)
58	vec_st(lsq, `15`, q); // Store the lsq part first
59	vec_st(msq, `0`, q); // Store the msq part
60	#elif SIMDPP_USE_MSA
61	__msa_st_b((v16i8) a.native(), p, `0`);
62	#endif
63	}
64
65	static SIMDPP_INL
66	void i_store_u(char* p, const uint16<`8`>& a)
67	{
68	#if SIMDPP_USE_NEON
69	vst1q_u16(reinterpret_cast<uint16_t*>(p), a.native());
70	#elif SIMDPP_USE_MSA
71	__msa_st_h((v8i16) a.native(), p, `0`);
72	#else
73	i_store_u(p, uint8<`16`>(a));
74	#endif
75	}
76
77	static SIMDPP_INL
78	void i_store_u(char* p, const uint32<`4`>& a)
79	{
80	#if SIMDPP_USE_NEON
81	vst1q_u32(reinterpret_cast<uint32_t*>(p), a.native());
82	#elif SIMDPP_USE_VSX_206
83	vec_vsx_st(a.native(), `0`, reinterpret_cast<__vector uint32_t*>(p));
84	#elif SIMDPP_USE_MSA
85	__msa_st_w((v4i32) a.native(), p, `0`);
86	#else
87	i_store_u(p, uint8<`16`>(a));
88	#endif
89	}
90
91	static SIMDPP_INL
92	void i_store_u(char* p, const uint64<`2`>& a)
93	{
94	#if SIMDPP_USE_NEON
95	vst1q_u64(reinterpret_cast<uint64_t*>(p), a.native());
96	#elif SIMDPP_USE_VSX_207
97	vec_vsx_st((__vector uint32_t) a.native(), `0`,
98	reinterpret_cast<__vector uint32_t*>(p));
99	#elif SIMDPP_USE_MSA
100	__msa_st_d((v2i64) a.native(), p, `0`);
101	#else
102	i_store_u(p, uint8<`16`>(a));
103	#endif
104	}
105
106	static SIMDPP_INL
107	void i_store_u(char* p, const float32x4& a)
108	{
109	float* q = reinterpret_cast<float*>(p);
110	(void) q;
111	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
112	detail::null::store(p, a);
113	#elif SIMDPP_USE_SSE2
114	_mm_storeu_ps(q, a.native());
115	#elif SIMDPP_USE_NEON
116	vst1q_f32(q, a.native());
117	#elif SIMDPP_USE_VSX_206
118	vec_vsx_st(a.native(), `0`, q);
119	#elif SIMDPP_USE_ALTIVEC
120	uint32x4 b = bit_cast<uint32x4>(a.eval());
121	i_store_u(reinterpret_cast<char*>(q), b);
122	#elif SIMDPP_USE_MSA
123	__msa_st_w((v4i32) a.native(), q, `0`);
124	#endif
125	}
126
127	static SIMDPP_INL
128	void i_store_u(char* p, const float64x2& a)
129	{
130	double* q = reinterpret_cast<double*>(p);
131	(void) q;
132	#if SIMDPP_USE_SSE2
133	_mm_storeu_pd(q, a.native());
134	#elif SIMDPP_USE_NEON64
135	vst1q_f64(q, a.native());
136	#elif SIMDPP_USE_VSX_206
137	vec_vsx_st(a.native(), `0`, q);
138	#elif SIMDPP_USE_MSA
139	__msa_st_d((v2i64) a.native(), q, `0`);
140	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
141	detail::null::store(p, a);
142	#endif
143	}
144
145
146	#if SIMDPP_USE_AVX2
147	static SIMDPP_INL
148	void i_store_u(char* p, const uint8<`32`>& a)
149	{
150	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.native());
151	}
152
153	static SIMDPP_INL
154	void i_store_u(char* p, const uint16<`16`>& a)
155	{
156	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.native());
157	}
158
159	static SIMDPP_INL
160	void i_store_u(char* p, const uint32<`8`>& a)
161	{
162	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.native());
163	}
164
165	static SIMDPP_INL
166	void i_store_u(char* p, const uint64<`4`>& a)
167	{
168	_mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.native());
169	}
170	#endif
171
172	#if SIMDPP_USE_AVX
173	static SIMDPP_INL
174	void i_store_u(char* p, const float32x8& a)
175	{
176	_mm256_storeu_ps(reinterpret_cast<float*>(p), a.native());
177	}
178
179	static SIMDPP_INL
180	void i_store_u(char* p, const float64x4& a)
181	{
182	_mm256_storeu_pd(reinterpret_cast<double*>(p), a.native());
183	}
184	#endif
185
186	#if SIMDPP_USE_AVX512BW
187	SIMDPP_INL void i_store_u(char* p, const uint8<`64`>& a)
188	{
189	_mm512_storeu_si512(reinterpret_cast<__m512i*>(p), a.native());
190	}
191
192	SIMDPP_INL void i_store_u(char* p, const uint16<`32`>& a)
193	{
194	_mm512_storeu_si512(reinterpret_cast<__m512i*>(p), a.native());
195	}
196	#endif
197
198	#if SIMDPP_USE_AVX512F
199	static SIMDPP_INL
200	void i_store_u(char* p, const uint32<`16`>& a)
201	{
202	_mm512_storeu_si512(reinterpret_cast<__m512i*>(p), a.native());
203	}
204
205	static SIMDPP_INL
206	void i_store_u(char* p, const uint64<`8`>& a)
207	{
208	_mm512_storeu_si512(reinterpret_cast<__m512i*>(p), a.native());
209	}
210
211	static SIMDPP_INL
212	void i_store_u(char* p, const float32<`16`>& a)
213	{
214	_mm512_storeu_ps(p, a.native());
215	}
216
217	static SIMDPP_INL
218	void i_store_u(char* p, const float64<`8`>& a)
219	{
220	_mm512_storeu_pd(p, a.native());
221	}
222	#endif
223
224	template<class V> SIMDPP_INL
225	void v_store_u(char* p, const V& a)
226	{
227	const unsigned veclen = V::base_vector_type::length_bytes;
228
229	for (unsigned i = `0`; i < V::vec_length; ++i) {
230	i_store_u(p, a.vec(i));
231	p += veclen;
232	}
233	}
234
235	} // namespace insn
236	} // namespace detail
237	#ifndef SIMDPP_DOXYGEN
238	} // namespace SIMDPP_ARCH_NAMESPACE
239	#endif
240	} // namespace simdpp
241
242	#endif
243
244

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_u.h