move_r.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/move_r.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_MOVE_R_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_MOVE_R_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/shuffle/shuffle_mask.h>
17	#include <simdpp/detail/vector_array_macros.h>
18
19	namespace simdpp {
20	namespace SIMDPP_ARCH_NAMESPACE {
21	namespace detail {
22	namespace insn {
23
24
25	template<unsigned shift> SIMDPP_INL
26	uint8x16 i_move16_r(const uint8x16& a)
27	{
28	static_assert(shift <= `16`, "Selector out of range");
29
30	#if SIMDPP_USE_NULL
31	return detail::null::move_n_r<shift>(a);
32	#elif SIMDPP_USE_SSE2
33	return _mm_slli_si128(a.native(), shift);
34	#elif SIMDPP_USE_NEON
35	uint8x16 zero = make_zero();
36	return vextq_u8(zero.native(), a.native(), `16`-shift);
37	#elif SIMDPP_USE_ALTIVEC
38	// return align<16-shift>((uint8x16) make_zero(), a);
39	return vec_sld_biendian<`16`-shift>((uint8<`16`>)make_zero(), a);
40	#elif SIMDPP_USE_MSA
41	uint8x16 zero = make_zero();
42	return (v16u8) __msa_sldi_b((v16i8)a.native(), (v16i8)zero.native(), `16`-shift);
43	#endif
44	}
45
46	#if SIMDPP_USE_AVX2
47	template<unsigned shift> SIMDPP_INL
48	uint8x32 i_move16_r(const uint8x32& a)
49	{
50	static_assert(shift <= `16`, "Selector out of range");
51	return _mm256_slli_si256(a.native(), shift);
52	}
53	#endif
54
55	#if SIMDPP_USE_AVX512BW
56	template<unsigned shift> SIMDPP_INL
57	uint8<`64`> i_move16_r(const uint8<`64`>& a)
58	{
59	static_assert(shift <= `16`, "Selector out of range");
60	return _mm512_bslli_epi128(a.native(), shift);
61	}
62	#endif
63
64	template<unsigned shift, unsigned N> SIMDPP_INL
65	uint8<N> i_move16_r(const uint8<N>& a)
66	{
67	static_assert(shift <= `16`, "Selector out of range");
68	SIMDPP_VEC_ARRAY_IMPL1(uint8<N>, i_move16_r<shift>, a);
69	}
70
71	// -----------------------------------------------------------------------------
72
73	template<unsigned shift> SIMDPP_INL
74	uint16<`8`> i_move8_r(const uint16<`8`>& a)
75	{
76	#if SIMDPP_USE_NULL
77	return detail::null::move_n_r<shift>(a);
78	#else
79	return (uint16<`8`>) i_move16_r<shift*`2`>(uint8<`16`>(a));
80	#endif
81	}
82
83	#if SIMDPP_USE_AVX2
84	template<unsigned shift> SIMDPP_INL
85	uint16<`16`> i_move8_r(const uint16<`16`>& a)
86	{
87	static_assert(shift <= `8`, "Selector out of range");
88	return _mm256_slli_si256(a.native(), shift*`2`);
89	}
90	#endif
91
92	#if SIMDPP_USE_AVX512BW
93	template<unsigned shift> SIMDPP_INL
94	uint16<`32`> i_move8_r(const uint16<`32`>& a)
95	{
96	static_assert(shift <= `8`, "Selector out of range");
97	return _mm512_bslli_epi128(a.native(), shift*`2`);
98	}
99	#endif
100
101	template<unsigned shift, unsigned N> SIMDPP_INL
102	uint16<N> i_move8_r(const uint16<N>& a)
103	{
104	SIMDPP_VEC_ARRAY_IMPL1(uint16<N>, i_move8_r<shift>, a);
105	}
106
107	// -----------------------------------------------------------------------------
108
109	template<unsigned shift> SIMDPP_INL
110	uint32<`4`> i_move4_r(const uint32<`4`>& a)
111	{
112	#if SIMDPP_USE_NULL
113	return detail::null::move_n_r<shift>(a);
114	#else
115	return (uint32<`4`>) i_move16_r<shift*`4`>(uint8<`16`>(a));
116	#endif
117	}
118
119	#if SIMDPP_USE_AVX2
120	template<unsigned shift> SIMDPP_INL
121	uint32<`8`> i_move4_r(const uint32<`8`>& a)
122	{
123	static_assert(shift <= `4`, "Selector out of range");
124	return _mm256_slli_si256(a.native(), shift*`4`);
125	}
126	#endif
127
128	#if SIMDPP_USE_AVX512F
129	template<unsigned shift> SIMDPP_INL
130	uint32<`16`> i_move4_r(const uint32<`16`>& a)
131	{
132	static_assert(shift <= `4`, "Selector out of range");
133	switch (shift) {
134	default:
135	case `0`: return a;
136	case `1`: return _mm512_maskz_shuffle_epi32(`0xeeee`, a.native(),
137	_MM_PERM_ENUM(_MM_SHUFFLE(`2`, `1`, `0`, `0`)));
138	case `2`: return _mm512_maskz_shuffle_epi32(`0xcccc`, a.native(),
139	_MM_PERM_ENUM(_MM_SHUFFLE(`1`, `0`, `0`, `0`)));
140	case `3`: return _mm512_maskz_shuffle_epi32(`0x8888`, a.native(),
141	_MM_PERM_ENUM(_MM_SHUFFLE(`0`, `0`, `0`, `0`)));
142	case `4`: return make_zero();
143	}
144	}
145	#endif
146
147	template<unsigned shift, unsigned N> SIMDPP_INL
148	uint32<N> i_move4_r(const uint32<N>& a)
149	{
150	SIMDPP_VEC_ARRAY_IMPL1(uint32<N>, i_move4_r<shift>, a);
151	}
152
153	// -----------------------------------------------------------------------------
154
155	template<unsigned shift> SIMDPP_INL
156	uint64<`2`> i_move2_r(const uint64<`2`>& a)
157	{
158	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
159	return detail::null::move_n_r<shift>(a);
160	#else
161	return (uint64<`2`>) i_move16_r<shift*`8`>(uint8<`16`>(a));
162	#endif
163	}
164
165	#if SIMDPP_USE_AVX2
166	template<unsigned shift> SIMDPP_INL
167	uint64<`4`> i_move2_r(const uint64<`4`>& a)
168	{
169	static_assert(shift <= `2`, "Selector out of range");
170	return _mm256_slli_si256(a.native(), shift*`8`);
171	}
172	#endif
173
174	#if SIMDPP_USE_AVX512F
175	template<unsigned shift> SIMDPP_INL
176	uint64<`8`> i_move2_r(const uint64<`8`>& a)
177	{
178	static_assert(shift <= `4`, "Selector out of range");
179	return (uint64<`8`>) i_move4_r<shift*`2`>(uint32<`16`>(a));
180	}
181	#endif
182
183	template<unsigned shift, unsigned N> SIMDPP_INL
184	uint64<N> i_move2_r(const uint64<N>& a)
185	{
186	SIMDPP_VEC_ARRAY_IMPL1(uint64<N>, i_move2_r<shift>, a);
187	}
188
189	// -----------------------------------------------------------------------------
190
191	template<unsigned shift> SIMDPP_INL
192	float32<`4`> i_move4_r(const float32<`4`>& a)
193	{
194	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
195	return detail::null::move_n_r<shift>(a);
196	#else
197	return (float32<`4`>) i_move16_r<shift*`4`>(uint8<`16`>(a));
198	#endif
199	}
200
201	#if SIMDPP_USE_AVX
202	template<unsigned shift> SIMDPP_INL
203	float32<`8`> i_move4_r(const float32<`8`>& a)
204	{
205	static_assert(shift <= `4`, "Selector out of range");
206	return (float32<`8`>) i_move16_r<shift*`4`>(uint8<`32`>(a));
207	}
208	#endif
209
210	#if SIMDPP_USE_AVX512F
211	template<unsigned shift> SIMDPP_INL
212	float32<`16`> i_move4_r(const float32<`16`>& a)
213	{
214	static_assert(shift <= `4`, "Selector out of range");
215	switch (shift) {
216	default:
217	case `0`: return a;
218	case `1`: return _mm512_maskz_shuffle_ps(`0xeeee`, a.native(), a.native(),
219	_MM_SHUFFLE(`2`, `1`, `0`, `0`));
220	case `2`: return _mm512_maskz_shuffle_ps(`0xcccc`, a.native(), a.native(),
221	_MM_SHUFFLE(`1`, `0`, `0`, `0`));
222	case `3`: return _mm512_maskz_shuffle_ps(`0x8888`, a.native(), a.native(),
223	_MM_SHUFFLE(`0`, `0`, `0`, `0`));
224	case `4`: return make_zero();
225	}
226	}
227	#endif
228
229	template<unsigned shift, unsigned N> SIMDPP_INL
230	float32<N> i_move4_r(const float32<N>& a)
231	{
232	SIMDPP_VEC_ARRAY_IMPL1(float32<N>, i_move4_r<shift>, a);
233	}
234
235	// -----------------------------------------------------------------------------
236
237	template<unsigned shift> SIMDPP_INL
238	float64<`2`> i_move2_r(const float64<`2`>& a)
239	{
240	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON64 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
241	return (float64<`2`>) i_move16_r<shift*`8`>(uint8<`16`>(a));
242	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
243	return detail::null::move_n_r<shift>(a);
244	#endif
245	}
246
247	#if SIMDPP_USE_AVX
248	template<unsigned shift> SIMDPP_INL
249	float64<`4`> i_move2_r(const float64<`4`>& a)
250	{
251	static_assert(shift <= `2`, "Selector out of range");
252	return (float64<`4`>) i_move16_r<shift*`8`>(uint8<`32`>(a));
253	}
254	#endif
255
256	#if SIMDPP_USE_AVX512F
257	template<unsigned shift> SIMDPP_INL
258	float64<`8`> i_move2_r(const float64<`8`>& a)
259	{
260	static_assert(shift <= `2`, "Selector out of range");
261	switch (shift) {
262	default:
263	case `0`: return a;
264	case `1`: return _mm512_maskz_shuffle_pd(`0xaa`, a.native(), a.native(), SIMDPP_SHUFFLE_MASK_2x2_4(`0`, `0`));
265	case `2`: return make_zero();
266	}
267	}
268	#endif
269
270	template<unsigned shift, unsigned N> SIMDPP_INL
271	float64<N> i_move2_r(const float64<N>& a)
272	{
273	SIMDPP_VEC_ARRAY_IMPL1(float64<N>, i_move2_r<shift>, a);
274	}
275
276	// -----------------------------------------------------------------------------
277	// Certain compilers don't like zero or full vector width moves. The templates
278	// below offer a warkaround
279
280	template<unsigned count>
281	struct i_move2_r_wrapper {
282	template<class V>
283	static SIMDPP_INL V run(const V& arg) { return i_move2_r<count>(arg); }
284	};
285	template<>
286	struct i_move2_r_wrapper<`0`> {
287	template<class V>
288	static SIMDPP_INL V run(const V& arg) { return arg; }
289	};
290	template<>
291	struct i_move2_r_wrapper<`2`> {
292	template<class V>
293	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
294	};
295
296	template<unsigned count>
297	struct i_move4_r_wrapper {
298	template<class V>
299	static SIMDPP_INL V run(const V& arg) { return i_move4_r<count>(arg); }
300	};
301	template<>
302	struct i_move4_r_wrapper<`0`> {
303	template<class V>
304	static SIMDPP_INL V run(const V& arg) { return arg; }
305	};
306	template<>
307	struct i_move4_r_wrapper<`4`> {
308	template<class V>
309	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
310	};
311
312	template<unsigned count>
313	struct i_move8_r_wrapper {
314	template<class V>
315	static SIMDPP_INL V run(const V& arg) { return i_move8_r<count>(arg); }
316	};
317	template<>
318	struct i_move8_r_wrapper<`0`> {
319	template<class V>
320	static SIMDPP_INL V run(const V& arg) { return arg; }
321	};
322	template<>
323	struct i_move8_r_wrapper<`8`> {
324	template<class V>
325	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
326	};
327
328	template<unsigned count>
329	struct i_move16_r_wrapper {
330	template<class V>
331	static SIMDPP_INL V run(const V& arg) { return i_move16_r<count>(arg); }
332	};
333	template<>
334	struct i_move16_r_wrapper<`0`> {
335	template<class V>
336	static SIMDPP_INL V run(const V& arg) { return arg; }
337	};
338	template<>
339	struct i_move16_r_wrapper<`16`> {
340	template<class V>
341	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
342	};
343
344	} // namespace insn
345	} // namespace detail
346	} // namespace SIMDPP_ARCH_NAMESPACE
347	} // namespace simdpp
348
349	#endif
350
351

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/move_r.h