move_l.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/move_l.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_MOVE_L_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_MOVE_L_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/permute4.h>
17	#include <simdpp/detail/null/shuffle.h>
18	#include <simdpp/detail/shuffle/shuffle_mask.h>
19	#include <simdpp/detail/vector_array_macros.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	#if SIMDPP_USE_ALTIVEC
27	template<unsigned shift> SIMDPP_INL
28	uint8<`16`> vec_sld_biendian(const uint8<`16`>& lower, const uint8<`16`>& upper)
29	{
30	#if SIMDPP_BIG_ENDIAN
31	return vec_sld(lower.native(), upper.native(), shift);
32	#else
33	// by default GCC adjusts vec_sld element order to match endianness of the target
34	return vec_sld(upper.native(), lower.native(), `16` - shift);
35	#endif
36	}
37	#endif
38
39	template<unsigned shift> SIMDPP_INL
40	uint8x16 i_move16_l(const uint8x16& a)
41	{
42	static_assert(shift <= `16`, "Selector out of range");
43	#if SIMDPP_USE_NULL
44	return detail::null::move_n_l<shift>(a);
45	#elif SIMDPP_USE_SSE2
46	return _mm_srli_si128(a.native(), shift);
47	#elif SIMDPP_USE_NEON
48	uint8x16 z = make_zero();
49	return vextq_u8(a.native(), z.native(), shift);
50	#elif SIMDPP_USE_ALTIVEC
51	// return align<shift>(a, (uint8x16) make_zero());
52	return vec_sld_biendian<shift>((uint8<`16`>)a, (uint8<`16`>)make_zero());
53	#elif SIMDPP_USE_MSA
54	uint8x16 zero = make_zero();
55	return (v16u8) __msa_sldi_b((v16i8)zero.native(), (v16i8)a.native(), shift);
56	#endif
57	}
58
59	#if SIMDPP_USE_AVX2
60	template<unsigned shift> SIMDPP_INL
61	uint8x32 i_move16_l(const uint8x32& a)
62	{
63	static_assert(shift <= `16`, "Selector out of range");
64	return _mm256_srli_si256(a.native(), shift);
65	}
66	#endif
67
68	#if SIMDPP_USE_AVX512BW
69	template<unsigned shift> SIMDPP_INL
70	uint8<`64`> i_move16_l(const uint8<`64`>& a)
71	{
72	static_assert(shift <= `16`, "Selector out of range");
73	return _mm512_bsrli_epi128(a.native(), shift);
74	}
75	#endif
76
77	template<unsigned shift, unsigned N> SIMDPP_INL
78	uint8<N> i_move16_l(const uint8<N>& a)
79	{
80	static_assert(shift <= `16`, "Selector out of range");
81	SIMDPP_VEC_ARRAY_IMPL1(uint8<N>, i_move16_l<shift>, a);
82	}
83
84	// -----------------------------------------------------------------------------
85
86	template<unsigned shift> SIMDPP_INL
87	uint16<`8`> i_move8_l(const uint16<`8`>& a)
88	{
89	#if SIMDPP_USE_NULL
90	return detail::null::move_n_l<shift>(a);
91	#else
92	return (uint16<`8`>) i_move16_l<shift*`2`>(uint8<`16`>(a));
93	#endif
94	}
95
96	#if SIMDPP_USE_AVX2
97	template<unsigned shift> SIMDPP_INL
98	uint16<`16`> i_move8_l(const uint16<`16`>& a)
99	{
100	static_assert(shift <= `8`, "Selector out of range");
101	return _mm256_srli_si256(a.native(), shift*`2`);
102	}
103	#endif
104
105	#if SIMDPP_USE_AVX512BW
106	template<unsigned shift> SIMDPP_INL
107	uint16<`32`> i_move8_l(const uint16<`32`>& a)
108	{
109	static_assert(shift <= `8`, "Selector out of range");
110	return _mm512_bsrli_epi128(a.native(), shift*`2`);
111	}
112	#endif
113
114	template<unsigned shift, unsigned N> SIMDPP_INL
115	uint16<N> i_move8_l(const uint16<N>& a)
116	{
117	SIMDPP_VEC_ARRAY_IMPL1(uint16<N>, i_move8_l<shift>, a);
118	}
119
120	// -----------------------------------------------------------------------------
121
122	template<unsigned shift> SIMDPP_INL
123	uint32<`4`> i_move4_l(const uint32<`4`>& a)
124	{
125	#if SIMDPP_USE_NULL
126	return detail::null::move_n_l<shift>(a);
127	#else
128	return (uint32<`4`>) i_move16_l<shift*`4`>(uint8<`16`>(a));
129	#endif
130	}
131
132	#if SIMDPP_USE_AVX2
133	template<unsigned shift> SIMDPP_INL
134	uint32<`8`> i_move4_l(const uint32<`8`>& a)
135	{
136	static_assert(shift <= `4`, "Selector out of range");
137	return _mm256_srli_si256(a.native(), shift*`4`);
138	}
139	#endif
140
141	#if SIMDPP_USE_AVX512F
142	template<unsigned shift> SIMDPP_INL
143	uint32<`16`> i_move4_l(const uint32<`16`>& a)
144	{
145	static_assert(shift <= `4`, "Selector out of range");
146	switch (shift) {
147	default:
148	case `0`: return a;
149	case `1`: return _mm512_maskz_shuffle_epi32(`0x7777`, a.native(),
150	_MM_PERM_ENUM(_MM_SHUFFLE(`3`, `3`, `2`, `1`)));
151	case `2`: return _mm512_maskz_shuffle_epi32(`0x3333`, a.native(),
152	_MM_PERM_ENUM(_MM_SHUFFLE(`3`, `3`, `3`, `2`)));
153	case `3`: return _mm512_maskz_shuffle_epi32(`0x1111`, a.native(),
154	_MM_PERM_ENUM(_MM_SHUFFLE(`3`, `3`, `3`, `3`)));
155	case `4`: return make_zero();
156	}
157	}
158	#endif
159
160	template<unsigned shift, unsigned N> SIMDPP_INL
161	uint32<N> i_move4_l(const uint32<N>& a)
162	{
163	SIMDPP_VEC_ARRAY_IMPL1(uint32<N>, i_move4_l<shift>, a);
164	}
165
166	// -----------------------------------------------------------------------------
167
168	template<unsigned shift> SIMDPP_INL
169	uint64<`2`> i_move2_l(const uint64<`2`>& a)
170	{
171	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
172	return detail::null::move_n_l<shift>(a);
173	#else
174	return (uint64<`2`>) i_move16_l<shift*`8`>(uint8<`16`>(a));
175	#endif
176	}
177
178	#if SIMDPP_USE_AVX2
179	template<unsigned shift> SIMDPP_INL
180	uint64<`4`> i_move2_l(const uint64<`4`>& a)
181	{
182	static_assert(shift <= `2`, "Selector out of range");
183	return _mm256_srli_si256(a.native(), shift*`8`);
184	}
185	#endif
186
187	#if SIMDPP_USE_AVX512F
188	template<unsigned shift> SIMDPP_INL
189	uint64<`8`> i_move2_l(const uint64<`8`>& a)
190	{
191	static_assert(shift <= `4`, "Selector out of range");
192	return (uint64<`8`>) i_move4_l<shift*`2`>(uint32<`16`>(a));
193	}
194	#endif
195
196	template<unsigned shift, unsigned N> SIMDPP_INL
197	uint64<N> i_move2_l(const uint64<N>& a)
198	{
199	SIMDPP_VEC_ARRAY_IMPL1(uint64<N>, i_move2_l<shift>, a);
200	}
201
202	// -----------------------------------------------------------------------------
203
204	template<unsigned shift> SIMDPP_INL
205	float32<`4`> i_move4_l(const float32<`4`>& a)
206	{
207	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
208	return detail::null::move_n_l<shift>(a);
209	#else
210	return (float32<`4`>) i_move16_l<shift*`4`>(uint8<`16`>(a));
211	#endif
212	}
213
214	#if SIMDPP_USE_AVX
215	template<unsigned shift> SIMDPP_INL
216	float32<`8`> i_move4_l(const float32<`8`>& a)
217	{
218	static_assert(shift <= `4`, "Selector out of range");
219	return (float32<`8`>) i_move16_l<shift*`4`>(uint8<`32`>(a));
220	}
221	#endif
222
223	#if SIMDPP_USE_AVX512F
224	template<unsigned shift> SIMDPP_INL
225	float32<`16`> i_move4_l(const float32<`16`>& a)
226	{
227	static_assert(shift <= `4`, "Selector out of range");
228	switch (shift) {
229	default:
230	case `0`: return a;
231	case `1`: return _mm512_maskz_shuffle_ps(`0x7777`, a.native(), a.native(),
232	_MM_SHUFFLE(`3`, `3`, `2`, `1`));
233	case `2`: return _mm512_maskz_shuffle_ps(`0x3333`, a.native(), a.native(),
234	_MM_SHUFFLE(`3`, `3`, `3`, `2`));
235	case `3`: return _mm512_maskz_shuffle_ps(`0x1111`, a.native(), a.native(),
236	_MM_SHUFFLE(`3`, `3`, `3`, `3`));
237	case `4`: return make_zero();
238	}
239	}
240	#endif
241
242	template<unsigned shift, unsigned N> SIMDPP_INL
243	float32<N> i_move4_l(const float32<N>& a)
244	{
245	SIMDPP_VEC_ARRAY_IMPL1(float32<N>, i_move4_l<shift>, a);
246	}
247
248	// -----------------------------------------------------------------------------
249
250	template<unsigned shift> SIMDPP_INL
251	float64<`2`> i_move2_l(const float64<`2`>& a)
252	{
253	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON64 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
254	return (float64<`2`>) i_move16_l<shift*`8`>(uint8<`16`>(a));
255	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
256	return detail::null::move_n_l<shift>(a);
257	#endif
258	}
259
260	#if SIMDPP_USE_AVX
261	template<unsigned shift> SIMDPP_INL
262	float64<`4`> i_move2_l(const float64<`4`>& a)
263	{
264	static_assert(shift <= `2`, "Selector out of range");
265	return (float64<`4`>) i_move16_l<shift*`8`>(uint8<`32`>(a));
266	}
267	#endif
268
269	#if SIMDPP_USE_AVX512F
270	template<unsigned shift> SIMDPP_INL
271	float64<`8`> i_move2_l(const float64<`8`>& a)
272	{
273	static_assert(shift <= `2`, "Selector out of range");
274	switch (shift) {
275	default:
276	case `0`: return a;
277	case `1`: return _mm512_maskz_shuffle_pd(`0x55`, a.native(), a.native(),
278	SIMDPP_SHUFFLE_MASK_2x2_4(`1`, `1`));
279	case `2`: return make_zero();
280	}
281	}
282	#endif
283
284	template<unsigned shift, unsigned N> SIMDPP_INL
285	float64<N> i_move2_l(const float64<N>& a)
286	{
287	SIMDPP_VEC_ARRAY_IMPL1(float64<N>, i_move2_l<shift>, a);
288	}
289
290	// -----------------------------------------------------------------------------
291	// Certain compilers don't like zero or full vector width moves. The templates
292	// below offer a warkaround
293
294	template<unsigned count>
295	struct i_move2_l_wrapper {
296	template<class V>
297	static SIMDPP_INL V run(const V& arg) { return i_move2_l<count>(arg); }
298	};
299	template<>
300	struct i_move2_l_wrapper<`0`> {
301	template<class V>
302	static SIMDPP_INL V run(const V& arg) { return arg; }
303	};
304	template<>
305	struct i_move2_l_wrapper<`2`> {
306	template<class V>
307	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
308	};
309
310	template<unsigned count>
311	struct i_move4_l_wrapper {
312	template<class V>
313	static SIMDPP_INL V run(const V& arg) { return i_move4_l<count>(arg); }
314	};
315	template<>
316	struct i_move4_l_wrapper<`0`> {
317	template<class V>
318	static SIMDPP_INL V run(const V& arg) { return arg; }
319	};
320	template<>
321	struct i_move4_l_wrapper<`4`> {
322	template<class V>
323	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
324	};
325
326	template<unsigned count>
327	struct i_move8_l_wrapper {
328	template<class V>
329	static SIMDPP_INL V run(const V& arg) { return i_move8_l<count>(arg); }
330	};
331	template<>
332	struct i_move8_l_wrapper<`0`> {
333	template<class V>
334	static SIMDPP_INL V run(const V& arg) { return arg; }
335	};
336	template<>
337	struct i_move8_l_wrapper<`8`> {
338	template<class V>
339	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
340	};
341
342	template<unsigned count>
343	struct i_move16_l_wrapper {
344	template<class V>
345	static SIMDPP_INL V run(const V& arg) { return i_move16_l<count>(arg); }
346	};
347	template<>
348	struct i_move16_l_wrapper<`0`> {
349	template<class V>
350	static SIMDPP_INL V run(const V& arg) { return arg; }
351	};
352	template<>
353	struct i_move16_l_wrapper<`16`> {
354	template<class V>
355	static SIMDPP_INL V run(const V&) { return (V) make_zero(); }
356	};
357
358	} // namespace insn
359	} // namespace detail
360	} // namespace SIMDPP_ARCH_NAMESPACE
361	} // namespace simdpp
362
363	#endif
364
365

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/move_l.h