store_packed2.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_packed2.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED2_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED2_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/align.h>
17	#include <simdpp/detail/insn/mem_pack.h>
18	#include <simdpp/core/store.h>
19	#include <simdpp/detail/null/memory.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	// collect some boilerplate
27	template<class V> SIMDPP_INL
28	void v128_store_pack2(char* p, const V& ca, const V& cb);
29	template<class V> SIMDPP_INL
30	void v256_store_pack2(char* p, const V& ca, const V& cb);
31	template<class V> SIMDPP_INL
32	void v512_store_pack2(char* p, const V& ca, const V& cb);
33
34	// -----------------------------------------------------------------------------
35
36	static SIMDPP_INL
37	void i_store_packed2(char* p, const uint8x16& a, const uint8x16& b)
38	{
39	p = detail::assume_aligned(p, `16`);
40	#if SIMDPP_USE_NULL
41	detail::null::store_packed2(p, a, b);
42	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
43	v128_store_pack2(p, a, b);
44	#elif SIMDPP_USE_NEON
45	uint8x16x2_t t;
46	t.val[`0`] = a.native();
47	t.val[`1`] = b.native();
48	vst2q_u8(reinterpret_cast<uint8_t*>(p), t);
49	#endif
50	}
51
52	#if SIMDPP_USE_AVX2
53	static SIMDPP_INL
54	void i_store_packed2(char* p, const uint8x32& a, const uint8x32& b)
55	{
56	v256_store_pack2(p, a, b);
57	}
58	#endif
59
60	#if SIMDPP_USE_AVX512BW
61	SIMDPP_INL void i_store_packed2(char* p, const uint8<`64`>& a, const uint8<`64`>& b)
62	{
63	v512_store_pack2(p, a, b);
64	}
65	#endif
66
67	// -----------------------------------------------------------------------------
68
69	static SIMDPP_INL
70	void i_store_packed2(char* p, const uint16x8& a, const uint16x8& b)
71	{
72	p = detail::assume_aligned(p, `16`);
73	#if SIMDPP_USE_NULL
74	detail::null::store_packed2(p, a, b);
75	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
76	v128_store_pack2(p, a, b);
77	#elif SIMDPP_USE_NEON
78	uint16x8x2_t t;
79	t.val[`0`] = a.native();
80	t.val[`1`] = b.native();
81	vst2q_u16(reinterpret_cast<uint16_t*>(p), t);
82	#endif
83	}
84
85	#if SIMDPP_USE_AVX2
86	static SIMDPP_INL
87	void i_store_packed2(char* p, const uint16x16& a, const uint16x16& b)
88	{
89	v256_store_pack2(p, a, b);
90	}
91	#endif
92
93	#if SIMDPP_USE_AVX512BW
94	SIMDPP_INL void i_store_packed2(char* p, const uint16<`32`>& a, const uint16<`32`>& b)
95	{
96	v512_store_pack2(p, a, b);
97	}
98	#endif
99
100	// -----------------------------------------------------------------------------
101
102	static SIMDPP_INL
103	void i_store_packed2(char* p, const uint32x4& a, const uint32x4& b)
104	{
105	p = detail::assume_aligned(p, `16`);
106	#if SIMDPP_USE_NULL
107	detail::null::store_packed2(p, a, b);
108	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
109	v128_store_pack2(p, a, b);
110	#elif SIMDPP_USE_NEON
111	uint32x4x2_t t;
112	t.val[`0`] = a.native();
113	t.val[`1`] = b.native();
114	vst2q_u32(reinterpret_cast<uint32_t*>(p), t);
115	#endif
116	}
117
118	#if SIMDPP_USE_AVX2
119	static SIMDPP_INL
120	void i_store_packed2(char* p, const uint32x8& a, const uint32x8& b)
121	{
122	v256_store_pack2(p, a, b);
123	}
124	#endif
125
126	#if SIMDPP_USE_AVX512F
127	static SIMDPP_INL
128	void i_store_packed2(char* p, const uint32<`16`>& a, const uint32<`16`>& b)
129	{
130	v512_store_pack2(p, a, b);
131	}
132	#endif
133
134	// -----------------------------------------------------------------------------
135
136	static SIMDPP_INL
137	void i_store_packed2(char* p, const uint64x2& a, const uint64x2& b)
138	{
139	#if SIMDPP_USE_NEON64
140	uint64x2x2_t t;
141	t.val[`0`] = a.native();
142	t.val[`1`] = b.native();
143	vst2q_u64(reinterpret_cast<uint64_t*>(p), t);
144	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
145	v128_store_pack2(p, a, b);
146	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
147	detail::null::store_packed2(p, a, b);
148	#endif
149	}
150
151	#if SIMDPP_USE_AVX2
152	static SIMDPP_INL
153	void i_store_packed2(char* p, const uint64x4& a, const uint64x4& b)
154	{
155	v256_store_pack2(p, a, b);
156	}
157	#endif
158
159	#if SIMDPP_USE_AVX512F
160	static SIMDPP_INL
161	void i_store_packed2(char* p, const uint64<`8`>& a, const uint64<`8`>& b)
162	{
163	v512_store_pack2(p, a, b);
164	}
165	#endif
166
167	// -----------------------------------------------------------------------------
168
169	static SIMDPP_INL
170	void i_store_packed2(char* p, const float32x4& a, const float32x4& b)
171	{
172	p = detail::assume_aligned(p, `16`);
173	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
174	detail::null::store_packed2(p, a, b);
175	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
176	v128_store_pack2(p, a, b);
177	#elif SIMDPP_USE_NEON
178	float32x4x2_t t;
179	t.val[`0`] = a.native();
180	t.val[`1`] = b.native();
181	vst2q_f32(reinterpret_cast<float*>(p), t);
182	#endif
183	}
184
185	#if SIMDPP_USE_AVX
186	static SIMDPP_INL
187	void i_store_packed2(char* p, const float32x8& a, const float32x8& b)
188	{
189	v256_store_pack2(p, a, b);
190	}
191	#endif
192
193	#if SIMDPP_USE_AVX512F
194	static SIMDPP_INL
195	void i_store_packed2(char* p, const float32<`16`>& a, const float32<`16`>& b)
196	{
197	v512_store_pack2(p, a, b);
198	}
199	#endif
200
201	// -----------------------------------------------------------------------------
202
203	static SIMDPP_INL
204	void i_store_packed2(char* p, const float64x2& a, const float64x2& b)
205	{
206	#if SIMDPP_USE_NEON64
207	float64x2x2_t t;
208	t.val[`0`] = a.native();
209	t.val[`1`] = b.native();
210	vst2q_f64(reinterpret_cast<double*>(p), t);
211	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
212	v128_store_pack2(p, a, b);
213	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
214	detail::null::store_packed2(p, a, b);
215	#endif
216	}
217
218	#if SIMDPP_USE_AVX
219	static SIMDPP_INL
220	void i_store_packed2(char* p, const float64x4& a, const float64x4& b)
221	{
222	v256_store_pack2(p, a, b);
223	}
224	#endif
225
226	#if SIMDPP_USE_AVX512F
227	static SIMDPP_INL
228	void i_store_packed2(char* p, const float64<`8`>& a, const float64<`8`>& b)
229	{
230	v512_store_pack2(p, a, b);
231	}
232	#endif
233
234	// -----------------------------------------------------------------------------
235
236	template<class V> SIMDPP_INL
237	void v128_store_pack2(char* p, const V& ca, const V& cb)
238	{
239	p = detail::assume_aligned(p, `32`);
240	V a = ca, b = cb;
241	mem_pack2(a, b);
242	i_store(p, a);
243	i_store(p + `16`, b);
244	}
245
246	template<class V> SIMDPP_INL
247	void v256_store_pack2(char* p, const V& ca, const V& cb)
248	{
249	p = detail::assume_aligned(p, `32`);
250	V a = ca, b = cb;
251	mem_pack2(a, b);
252	i_store(p, a);
253	i_store(p + `32`, b);
254	}
255
256	template<class V> SIMDPP_INL
257	void v512_store_pack2(char* p, const V& ca, const V& cb)
258	{
259	p = detail::assume_aligned(p, `32`);
260	V a = ca, b = cb;
261	mem_pack2(a, b);
262	i_store(p, a);
263	i_store(p + `64`, b);
264	}
265
266
267	template<class V> SIMDPP_INL
268	void i_store_packed2(char* p, const V& ca, const V& cb)
269	{
270	const unsigned veclen = V::base_vector_type::length_bytes;
271	typename detail::remove_sign<V>::type a = ca, b = cb;
272
273	p = detail::assume_aligned(p, veclen);
274	for (unsigned i = `0`; i < V::vec_length; ++i) {
275	i_store_packed2(p, a.vec(i), b.vec(i));
276	p += veclen*`2`;
277	}
278	}
279
280	} // namespace insn
281	} // namespace detail
282	} // namespace SIMDPP_ARCH_NAMESPACE
283	} // namespace simdpp
284
285	#endif
286

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_packed2.h