store_packed3.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_packed3.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED3_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED3_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/align.h>
17	#include <simdpp/detail/insn/mem_pack.h>
18	#include <simdpp/core/store.h>
19	#include <simdpp/detail/null/memory.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	// collect some boilerplate
27	template<class V> SIMDPP_INL
28	void v128_store_pack3(char* p, const V& ca, const V& cb, const V& cc);
29	template<class V> SIMDPP_INL
30	void v256_store_pack3(char* p, const V& ca, const V& cb, const V& cc);
31	template<class V> SIMDPP_INL
32	void v512_store_pack3(char* p, const V& ca, const V& cb, const V& cc);
33
34	// -----------------------------------------------------------------------------
35
36	static SIMDPP_INL
37	void i_store_packed3(char* p, const uint8x16& a, const uint8x16& b, const uint8x16& c)
38	{
39	p = detail::assume_aligned(p, `16`);
40	#if SIMDPP_USE_NULL
41	detail::null::store_packed3(p, a, b, c);
42	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
43	v128_store_pack3(p, a, b, c);
44	#elif SIMDPP_USE_NEON
45	uint8x16x3_t t;
46	t.val[`0`] = a.native();
47	t.val[`1`] = b.native();
48	t.val[`2`] = c.native();
49	vst3q_u8(reinterpret_cast<uint8_t*>(p), t);
50	#endif
51	}
52
53	#if SIMDPP_USE_AVX2
54	static SIMDPP_INL
55	void i_store_packed3(char* p, const uint8x32& a, const uint8x32& b, const uint8x32& c)
56	{
57	v256_store_pack3(p, a, b, c);
58	}
59	#endif
60
61	#if SIMDPP_USE_AVX512BW
62	static SIMDPP_INL
63	void i_store_packed3(char* p, const uint8<`64`>& a, const uint8<`64`>& b, const uint8<`64`>& c)
64	{
65	v512_store_pack3(p, a, b, c);
66	}
67	#endif
68
69	// -----------------------------------------------------------------------------
70
71	static SIMDPP_INL
72	void i_store_packed3(char* p, const uint16x8& a, const uint16x8& b, const uint16x8& c)
73	{
74	p = detail::assume_aligned(p, `16`);
75	#if SIMDPP_USE_NULL
76	detail::null::store_packed3(p, a, b, c);
77	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
78	v128_store_pack3(p, a, b, c);
79	#elif SIMDPP_USE_NEON
80	uint16x8x3_t t;
81	t.val[`0`] = a.native();
82	t.val[`1`] = b.native();
83	t.val[`2`] = c.native();
84	vst3q_u16(reinterpret_cast<uint16_t*>(p), t);
85	#endif
86	}
87
88	#if SIMDPP_USE_AVX2
89	static SIMDPP_INL
90	void i_store_packed3(char* p, const uint16x16& a, const uint16x16& b, const uint16x16& c)
91	{
92	v256_store_pack3(p, a, b, c);
93	}
94	#endif
95
96	#if SIMDPP_USE_AVX512BW
97	static SIMDPP_INL
98	void i_store_packed3(char* p, const uint16<`32`>& a, const uint16<`32`>& b, const uint16<`32`>& c)
99	{
100	v512_store_pack3(p, a, b, c);
101	}
102	#endif
103
104	// -----------------------------------------------------------------------------
105
106	static SIMDPP_INL
107	void i_store_packed3(char* p, const uint32x4& a, const uint32x4& b, const uint32x4& c)
108	{
109	p = detail::assume_aligned(p, `16`);
110	#if SIMDPP_USE_NULL
111	detail::null::store_packed3(p, a, b, c);
112	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
113	v128_store_pack3(p, a, b, c);
114	#elif SIMDPP_USE_NEON
115	uint32x4x3_t t;
116	t.val[`0`] = a.native();
117	t.val[`1`] = b.native();
118	t.val[`2`] = c.native();
119	vst3q_u32(reinterpret_cast<uint32_t*>(p), t);
120	#endif
121	}
122
123	#if SIMDPP_USE_AVX2
124	static SIMDPP_INL
125	void i_store_packed3(char* p, const uint32x8& a, const uint32x8& b, const uint32x8& c)
126	{
127	v256_store_pack3(p, a, b, c);
128	}
129	#endif
130
131	#if SIMDPP_USE_AVX512F
132	static SIMDPP_INL
133	void i_store_packed3(char* p, const uint32<`16`>& a, const uint32<`16`>& b, const uint32<`16`>& c)
134	{
135	v512_store_pack3(p, a, b, c);
136	}
137	#endif
138
139	// -----------------------------------------------------------------------------
140
141	static SIMDPP_INL
142	void i_store_packed3(char* p, const uint64x2& a, const uint64x2& b, const uint64x2& c)
143	{
144	p = detail::assume_aligned(p, `16`);
145	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
146	v128_store_pack3(p, a, b, c);
147	#elif SIMDPP_USE_NEON32
148	uint64_t* q = reinterpret_cast<uint64_t*>(p);
149	uint64x1x2_t t1, t2, t3;
150	t1.val[`0`] = vget_low_u64(a.native()); t1.val[`1`] = vget_low_u64(b.native());
151	t2.val[`0`] = vget_low_u64(c.native()); t2.val[`1`] = vget_high_u64(a.native());
152	t3.val[`0`] = vget_high_u64(b.native()); t3.val[`1`] = vget_high_u64(c.native());
153
154	vst2_u64(q, t1);
155	vst2_u64(q+`2`, t2);
156	vst2_u64(q+`4`, t3);
157	#elif SIMDPP_USE_NEON64
158	uint64x2x3_t t;
159	t.val[`0`] = a.native();
160	t.val[`1`] = b.native();
161	t.val[`2`] = c.native();
162	vst3q_u64(reinterpret_cast<uint64_t*>(p), t);
163	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
164	detail::null::store_packed3(p, a, b, c);
165	#endif
166	}
167
168	#if SIMDPP_USE_AVX2
169	static SIMDPP_INL
170	void i_store_packed3(char* p, const uint64x4& a, const uint64x4& b, const uint64x4& c)
171	{
172	v256_store_pack3(p, a, b, c);
173	}
174	#endif
175
176	#if SIMDPP_USE_AVX512F
177	static SIMDPP_INL
178	void i_store_packed3(char* p, const uint64<`8`>& a, const uint64<`8`>& b, const uint64<`8`>& c)
179	{
180	v512_store_pack3(p, a, b, c);
181	}
182	#endif
183
184	// -----------------------------------------------------------------------------
185
186	static SIMDPP_INL
187	void i_store_packed3(char* p, const float32x4& a, const float32x4& b, const float32x4& c)
188	{
189	p = detail::assume_aligned(p, `16`);
190	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
191	detail::null::store_packed3(p, a, b, c);
192	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
193	v128_store_pack3(p, a, b, c);
194	#elif SIMDPP_USE_NEON
195	float32x4x3_t t;
196	t.val[`0`] = a.native();
197	t.val[`1`] = b.native();
198	t.val[`2`] = c.native();
199	vst3q_f32(reinterpret_cast<float*>(p), t);
200	#endif
201	}
202
203	#if SIMDPP_USE_AVX
204	static SIMDPP_INL
205	void i_store_packed3(char* p, const float32x8& a, const float32x8& b, const float32x8& c)
206	{
207	v256_store_pack3(p, a, b, c);
208	}
209	#endif
210
211	#if SIMDPP_USE_AVX512F
212	static SIMDPP_INL
213	void i_store_packed3(char* p, const float32<`16`>& a, const float32<`16`>& b, const float32<`16`>& c)
214	{
215	v512_store_pack3(p, a, b, c);
216	}
217	#endif
218
219	// -----------------------------------------------------------------------------
220
221	static SIMDPP_INL
222	void i_store_packed3(char* p, const float64x2& a, const float64x2& b, const float64x2& c)
223	{
224	p = detail::assume_aligned(p, `16`);
225	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
226	v128_store_pack3(p, a, b, c);
227	#elif SIMDPP_USE_NEON64
228	float64x2x3_t t;
229	t.val[`0`] = a.native();
230	t.val[`1`] = b.native();
231	t.val[`2`] = c.native();
232	vst3q_f64(reinterpret_cast<double*>(p), t);
233	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_NEON
234	detail::null::store_packed3(p, a, b, c);
235	#endif
236	}
237
238	#if SIMDPP_USE_AVX
239	static SIMDPP_INL
240	void i_store_packed3(char* p, const float64x4& a, const float64x4& b, const float64x4& c)
241	{
242	v256_store_pack3(p, a, b, c);
243	}
244	#endif
245
246	#if SIMDPP_USE_AVX512F
247	static SIMDPP_INL
248	void i_store_packed3(char* p, const float64<`8`>& a, const float64<`8`>& b, const float64<`8`>& c)
249	{
250	v512_store_pack3(p, a, b, c);
251	}
252	#endif
253
254	// -----------------------------------------------------------------------------
255
256	template<class V> SIMDPP_INL
257	void v128_store_pack3(char* p, const V& ca, const V& cb, const V& cc)
258	{
259	p = detail::assume_aligned(p, `16`);
260	V a = ca, b = cb, c = cc;
261	mem_pack3(a, b, c);
262	i_store(p, a);
263	i_store(p + `16`, b);
264	i_store(p + `32`, c);
265	}
266
267	template<class V> SIMDPP_INL
268	void v256_store_pack3(char* p, const V& ca, const V& cb, const V& cc)
269	{
270	p = detail::assume_aligned(p, `32`);
271	V a = ca, b = cb, c = cc;
272	mem_pack3(a, b, c);
273	i_store(p, a);
274	i_store(p + `32`, b);
275	i_store(p + `64`, c);
276	}
277
278	template<class V> SIMDPP_INL
279	void v512_store_pack3(char* p, const V& ca, const V& cb, const V& cc)
280	{
281	p = detail::assume_aligned(p, `64`);
282	V a = ca, b = cb, c = cc;
283	mem_pack3(a, b, c);
284	i_store(p, a);
285	i_store(p + `64`, b);
286	i_store(p + `128`, c);
287	}
288
289	template<class V> SIMDPP_INL
290	void i_store_packed3(char* p, const V& ca, const V& cb, const V& cc)
291	{
292	const unsigned veclen = V::base_vector_type::length_bytes;
293	typename detail::remove_sign<V>::type a = ca, b = cb, c = cc;
294
295	p = detail::assume_aligned(p, veclen);
296	for (unsigned i = `0`; i < V::vec_length; ++i) {
297	i_store_packed3(p, a.vec(i), b.vec(i), c.vec(i));
298	p += veclen*`3`;
299	}
300	}
301
302	} // namespace insn
303	} // namespace detail
304	} // namespace SIMDPP_ARCH_NAMESPACE
305	} // namespace simdpp
306
307	#endif
308

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_packed3.h