i_shift_l.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_shift_l.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SHIFT_L_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SHIFT_L_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/not_implemented.h>
17	#include <simdpp/core/bit_and.h>
18	#include <simdpp/core/bit_andnot.h>
19	#include <simdpp/core/set_splat.h>
20	#include <simdpp/detail/insn/i_shift.h>
21	#include <simdpp/detail/null/math.h>
22	#include <simdpp/detail/vector_array_macros.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29
30	static SIMDPP_INL
31	uint8x16 i_shift_l(const uint8x16& a, unsigned count)
32	{
33	#if SIMDPP_USE_NULL
34	return detail::null::shift_l(a, count);
35	#elif SIMDPP_USE_AVX2
36	uint16x8 mask, a16;
37	uint16_t mask1 = (`0x00ff` >> (`8`-count)) << `8`;
38
39	a16 = a;
40	mask = splat(mask1);
41	a16 = shift_l(a16, count);
42	a16 = bit_andnot(a16, mask);
43	return uint8x16(a16);
44	#elif SIMDPP_USE_SSE2
45	uint16x8 mask, a16;
46	mask = make_ones();
47	mask = shift_r(mask, `16`-count);
48	mask = shift_l(mask, `8`);
49
50	a16 = a;
51	a16 = shift_l(a16, count);
52	a16 = bit_andnot(a16, mask);
53	return uint8x16 (a16);
54	#elif SIMDPP_USE_NEON
55	int8x16 shift = splat(count);
56	return vshlq_u8(a.native(), shift.native());
57	#elif SIMDPP_USE_ALTIVEC
58	uint8x16 shift = splat(count);
59	return vec_sl(a.native(), shift.native());
60	#elif SIMDPP_USE_MSA
61	int8x16 shift = splat(count);
62	return (v16u8) __msa_sll_b((v16i8)a.native(), shift.native());
63	#endif
64	}
65
66	#if SIMDPP_USE_AVX2
67	static SIMDPP_INL
68	uint8x32 i_shift_l(const uint8x32& a, unsigned count)
69	{
70	uint16x16 mask, a16;
71	uint16_t mask1 = (`0x00ff` >> (`8`-count)) << `8`;
72
73	a16 = a;
74	mask = splat(mask1);
75	a16 = shift_l(a16, count);
76	a16 = bit_andnot(a16, mask);
77	return uint8<`32`>(a16);
78	}
79	#endif
80
81	#if SIMDPP_USE_AVX512BW
82	SIMDPP_INL uint8<`64`> i_shift_l(const uint8<`64`>& a, unsigned count)
83	{
84	uint16<`32`> mask, a16;
85	uint16_t mask1 = (`0x00ff` >> (`8`-count)) << `8`;
86
87	a16 = a;
88	mask = splat(mask1);
89	a16 = shift_l(a16, count);
90	a16 = bit_andnot(a16, mask);
91	return uint8<`64`>(a16);
92	}
93	#endif
94
95	// -----------------------------------------------------------------------------
96
97	static SIMDPP_INL
98	uint16x8 i_shift_l(const uint16x8& a, unsigned count)
99	{
100	#if SIMDPP_USE_NULL
101	return detail::null::shift_l(a, count);
102	#elif SIMDPP_USE_SSE2
103	return _mm_sll_epi16(a.native(), _mm_cvtsi32_si128(count));
104	#elif SIMDPP_USE_NEON
105	int16x8 shift = splat(count);
106	return vshlq_u16(a.native(), shift.native());
107	#elif SIMDPP_USE_ALTIVEC
108	uint16x8 shift = splat(count);
109	return vec_sl(a.native(), shift.native());
110	#elif SIMDPP_USE_MSA
111	int16x8 shift = splat(count);
112	return (v8u16) __msa_sll_h((v8i16) a.native(), shift.native());
113	#endif
114	}
115
116	#if SIMDPP_USE_AVX2
117	static SIMDPP_INL
118	uint16x16 i_shift_l(const uint16x16& a, unsigned count)
119	{
120	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
121	__m256i r = a.native();
122	__m128i x = _mm_cvtsi32_si128(count);
123	__asm("vpsllw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
124	return r;
125	#else
126	return _mm256_sll_epi16(a.native(), _mm_cvtsi32_si128(count));
127	#endif
128	}
129	#endif
130
131	#if SIMDPP_USE_AVX512BW
132	SIMDPP_INL uint16<`32`> i_shift_l(const uint16<`32`>& a, unsigned count)
133	{
134	return _mm512_sll_epi16(a.native(), _mm_cvtsi32_si128(count));
135	}
136	#endif
137
138	// -----------------------------------------------------------------------------
139
140	static SIMDPP_INL
141	uint32x4 i_shift_l(const uint32x4& a, unsigned count)
142	{
143	#if SIMDPP_USE_NULL
144	return detail::null::shift_l(a, count);
145	#elif SIMDPP_USE_SSE2
146	return _mm_sll_epi32(a.native(), _mm_cvtsi32_si128(count));
147	#elif SIMDPP_USE_NEON
148	int32x4 shift = splat(count);
149	return vshlq_u32(a.native(), shift.native());
150	#elif SIMDPP_USE_ALTIVEC
151	uint32x4 shift = splat(count);
152	return vec_sl(a.native(), shift.native());
153	#elif SIMDPP_USE_MSA
154	int32x4 shift = splat(count);
155	return (v4u32) __msa_sll_w((v4i32) a.native(), shift.native());
156	#endif
157	}
158
159	#if SIMDPP_USE_AVX2
160	static SIMDPP_INL
161	uint32x8 i_shift_l(const uint32x8& a, unsigned count)
162	{
163	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
164	__m256i r = a.native();
165	__m128i x = _mm_cvtsi32_si128(count);
166	__asm("vpslld %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
167	return r;
168	#else
169	return _mm256_sll_epi32(a.native(), _mm_cvtsi32_si128(count));
170	#endif
171	}
172	#endif
173
174	#if SIMDPP_USE_AVX512F
175	static SIMDPP_INL
176	uint32<`16`> i_shift_l(const uint32<`16`>& a, unsigned count)
177	{
178	return _mm512_sll_epi32(a.native(), _mm_cvtsi32_si128(count));
179	}
180	#endif
181
182	// -----------------------------------------------------------------------------
183
184	static SIMDPP_INL
185	uint64x2 i_shift_l(const uint64x2& a, unsigned count)
186	{
187	#if SIMDPP_USE_SSE2
188	return _mm_sll_epi64(a.native(), _mm_cvtsi32_si128(count));
189	#elif SIMDPP_USE_NEON
190	int64x2 shift = splat(count);
191	return vshlq_u64(a.native(), shift.native());
192	#elif SIMDPP_USE_VSX_207
193	uint64x2 shift = splat(count);
194	return vec_sl(a.native(), shift.native());
195	#elif SIMDPP_USE_MSA
196	int32x4 shift = splat(count);
197	return (v2u64) __msa_sll_d((v2i64) a.native(), (v2i64) shift.native());
198	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
199	return detail::null::shift_l(a, count);
200	#endif
201	}
202
203	#if SIMDPP_USE_AVX2
204	static SIMDPP_INL
205	uint64x4 i_shift_l(const uint64x4& a, unsigned count)
206	{
207	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
208	__m256i r = a.native();
209	__m128i x = _mm_cvtsi32_si128(count);
210	__asm("vpsllq %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
211	return r;
212	#else
213	return _mm256_sll_epi64(a.native(), _mm_cvtsi32_si128(count));
214	#endif
215	}
216	#endif
217
218	#if SIMDPP_USE_AVX512F
219	static SIMDPP_INL
220	uint64<`8`> i_shift_l(const uint64<`8`>& a, unsigned count)
221	{
222	return _mm512_sll_epi64(a.native(), _mm_cvtsi32_si128(count));
223	}
224	#endif
225
226	// -----------------------------------------------------------------------------
227
228	template<class V> SIMDPP_INL
229	V i_shift_l(const V& a, unsigned count)
230	{
231	SIMDPP_VEC_ARRAY_IMPL2S(V, i_shift_l, a, count);
232	}
233
234	// -----------------------------------------------------------------------------
235
236	template<unsigned count, unsigned N> SIMDPP_INL
237	uint8<N> sse_shift_l_8(const uint8<N>& a)
238	{
239	uint8_t mask1 = `0xff` >> count;
240	uint8<N> mask = make_uint(mask1);
241
242	uint16<N/`2`> a16 = (uint16<N/`2`>) bit_and(a, mask);
243	a16 = shift_l<count>(a16);
244
245	return uint8<N>(a16);
246	}
247
248	template<unsigned count> SIMDPP_INL
249	uint8x16 i_shift_l(const uint8x16& a)
250	{
251	static_assert(count < `8`, "Shift out of bounds");
252	#if SIMDPP_USE_NULL
253	return i_shift_l(a, count);
254	#elif SIMDPP_USE_SSE2
255	return sse_shift_l_8<count>(a);
256	#elif SIMDPP_USE_NEON
257	return vshlq_n_u8(a.native(), count);
258	#elif SIMDPP_USE_ALTIVEC
259	uint8x16 shift = make_uint(count);
260	return vec_sl(a.native(), shift.native());
261	#elif SIMDPP_USE_MSA
262	return (v16u8) __msa_slli_b((v16i8) a.native(), count);
263	#endif
264	}
265
266	#if SIMDPP_USE_AVX2
267	template<unsigned count> SIMDPP_INL
268	uint8<`32`> i_shift_l(const uint8<`32`>& a)
269	{
270	static_assert(count < `8`, "Shift out of bounds");
271	return sse_shift_l_8<count>(a);
272	}
273	#endif
274
275	#if SIMDPP_USE_AVX512BW
276	template<unsigned count> SIMDPP_INL
277	uint8<`64`> i_shift_l(const uint8<`64`>& a)
278	{
279	static_assert(count < `8`, "Shift out of bounds");
280	return sse_shift_l_8<count>(a);
281	}
282	#endif
283
284	// -----------------------------------------------------------------------------
285
286	template<unsigned count> SIMDPP_INL
287	uint16x8 i_shift_l(const uint16x8& a)
288	{
289	static_assert(count < `16`, "Shift out of bounds");
290	#if SIMDPP_USE_NULL
291	return i_shift_l(a, count);
292	#elif SIMDPP_USE_SSE2
293	return _mm_slli_epi16(a.native(), count);
294	#elif SIMDPP_USE_NEON
295	return vshlq_n_u16(a.native(), count);
296	#elif SIMDPP_USE_ALTIVEC
297	uint16x8 shift = make_uint(count);
298	return vec_sl(a.native(), shift.native());
299	#elif SIMDPP_USE_MSA
300	return (v8u16) __msa_slli_h((v8i16) a.native(), count);
301	#endif
302	}
303
304	#if SIMDPP_USE_AVX2
305	template<unsigned count> SIMDPP_INL
306	uint16x16 i_shift_l(const uint16x16& a)
307	{
308	static_assert(count < `16`, "Shift out of bounds");
309	return _mm256_slli_epi16(a.native(), count);
310	}
311	#endif
312
313	#if SIMDPP_USE_AVX512BW
314	template<unsigned count> SIMDPP_INL
315	uint16<`32`> i_shift_l(const uint16<`32`>& a)
316	{
317	static_assert(count < `16`, "Shift out of bounds");
318	return _mm512_slli_epi16(a.native(), count);
319	}
320	#endif
321
322	// -----------------------------------------------------------------------------
323
324	template<unsigned count> SIMDPP_INL
325	uint32x4 i_shift_l(const uint32x4& a)
326	{
327	static_assert(count < `32`, "Shift out of bounds");
328	#if SIMDPP_USE_NULL
329	return i_shift_l(a, count);
330	#elif SIMDPP_USE_SSE2
331	return _mm_slli_epi32(a.native(), count);
332	#elif SIMDPP_USE_NEON
333	return vshlq_n_u32(a.native(), count);
334	#elif SIMDPP_USE_ALTIVEC
335	uint32x4 shift = make_uint(count);
336	return vec_sl(a.native(), shift.native());
337	#elif SIMDPP_USE_MSA
338	return (v4u32) __msa_slli_w((v4i32) a.native(), count);
339	#endif
340	}
341
342	#if SIMDPP_USE_AVX2
343	template<unsigned count> SIMDPP_INL
344	uint32x8 i_shift_l(const uint32x8& a)
345	{
346	static_assert(count < `32`, "Shift out of bounds");
347	return _mm256_slli_epi32(a.native(), count);
348	}
349	#endif
350
351	#if SIMDPP_USE_AVX512F
352	template<unsigned count> SIMDPP_INL
353	uint32<`16`> i_shift_l(const uint32<`16`>& a)
354	{
355	static_assert(count < `32`, "Shift out of bounds");
356	return _mm512_slli_epi32(a.native(), count);
357	}
358	#endif
359
360	// -----------------------------------------------------------------------------
361
362	template<unsigned count> SIMDPP_INL
363	uint64x2 i_shift_l(const uint64x2& a)
364	{
365	static_assert(count < `64`, "Shift out of bounds");
366	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
367	return i_shift_l(a, count);
368	#elif SIMDPP_USE_SSE2
369	return _mm_slli_epi64(a.native(), count);
370	#elif SIMDPP_USE_NEON
371	return vshlq_n_u64(a.native(), count);
372	#elif SIMDPP_USE_MSA
373	return (v2u64) __msa_slli_d((v2i64) a.native(), count);
374	#else
375	return SIMDPP_NOT_IMPLEMENTED1(a);
376	#endif
377	}
378
379	#if SIMDPP_USE_AVX2
380	template<unsigned count> SIMDPP_INL
381	uint64x4 i_shift_l(const uint64x4& a)
382	{
383	static_assert(count < `64`, "Shift out of bounds");
384	return _mm256_slli_epi64(a.native(), count);
385	}
386	#endif
387
388	#if SIMDPP_USE_AVX512F
389	template<unsigned count> SIMDPP_INL
390	uint64<`8`> i_shift_l(const uint64<`8`>& a)
391	{
392	static_assert(count < `64`, "Shift out of bounds");
393	return _mm512_slli_epi64(a.native(), count);
394	}
395	#endif
396
397	// -----------------------------------------------------------------------------
398
399	template<unsigned count, class V> SIMDPP_INL
400	V i_shift_l(const V& a)
401	{
402	SIMDPP_VEC_ARRAY_IMPL1(V, i_shift_l<count>, a);
403	}
404
405	template<bool no_shift>
406	struct i_shift_l_wrapper {
407	template<unsigned count, class V>
408	static SIMDPP_INL V run(const V& arg) { return i_shift_l<count>(arg); }
409	};
410	template<>
411	struct i_shift_l_wrapper<true> {
412	template<unsigned count, class V>
413	static SIMDPP_INL V run(const V& arg) { return arg; }
414	};
415
416	} // namespace insn
417	} // namespace detail
418	} // namespace SIMDPP_ARCH_NAMESPACE
419	} // namespace simdpp
420
421	#endif
422
423

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_shift_l.h