i_shift_r.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_shift_r.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SHIFT_R_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SHIFT_R_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/not_implemented.h>
17	#include <simdpp/core/bit_and.h>
18	#include <simdpp/core/bit_andnot.h>
19	#include <simdpp/core/bit_or.h>
20	#include <simdpp/core/i_add.h>
21	#include <simdpp/core/i_sub.h>
22	#include <simdpp/core/splat.h>
23	#include <simdpp/core/set_splat.h>
24	#include <simdpp/core/permute4.h>
25	#include <simdpp/core/shuffle2.h>
26	#include <simdpp/detail/insn/i_shift.h>
27	#include <simdpp/detail/null/math.h>
28	#include <simdpp/detail/vector_array_macros.h>
29
30	namespace simdpp {
31	namespace SIMDPP_ARCH_NAMESPACE {
32	namespace detail {
33	namespace insn {
34
35
36	static SIMDPP_INL
37	int8x16 i_shift_r(const int8x16& a, unsigned count)
38	{
39	#if SIMDPP_USE_NULL
40	return detail::null::shift_r(a, count);
41	#elif SIMDPP_USE_SSE2
42	uint16x8 hi, lo;
43	lo = hi = a;
44
45	lo = shift_l<`8`>(lo);
46	lo = shift_r(int16x8 (lo), count);
47	lo = shift_r<`8`>(lo);
48
49	hi = shift_r(int16x8 (hi), `8`+count);
50	hi = shift_l<`8`>(hi);
51	return (int8<`16`>) bit_or(lo, hi); //higher part of lo is already clear
52	#elif SIMDPP_USE_NEON
53	int8x16 shift = splat(-int(count));
54	return vshlq_s8(a.native(), shift.native());
55	#elif SIMDPP_USE_ALTIVEC
56	uint8x16 shift = splat(count);
57	return vec_sra(a.native(), shift.native());
58	#elif SIMDPP_USE_MSA
59	int8x16 shift = splat(count);
60	return __msa_sra_b(a.native(), shift.native());
61	#endif
62	}
63
64	#if SIMDPP_USE_AVX2
65	static SIMDPP_INL
66	int8x32 i_shift_r(const int8x32& a, unsigned count)
67	{
68	uint16x16 hi, lo;
69	lo = hi = a;
70
71	lo = shift_l<`8`>(lo);
72	lo = shift_r(int16x16(lo), count);
73	lo = shift_r<`8`>(lo);
74
75	hi = shift_r(int16x16(hi), `8`+count);
76	hi = shift_l<`8`>(hi);
77	return (int8<`32`>) bit_or(lo, hi); //higher part of lo is already clear
78	}
79	#endif
80
81	#if SIMDPP_USE_AVX512BW
82	SIMDPP_INL int8<`64`> i_shift_r(const int8<`64`>& a, unsigned count)
83	{
84	uint16<`32`> hi, lo;
85	lo = hi = a;
86
87	lo = shift_l<`8`>(lo);
88	lo = shift_r(int16<`32`>(lo), count);
89	lo = shift_r<`8`>(lo);
90
91	hi = shift_r(int16<`32`>(hi), `8`+count);
92	hi = shift_l<`8`>(hi);
93	return (int8<`64`>) bit_or(lo, hi); //higher part of lo is already clear
94	}
95	#endif
96
97	// -----------------------------------------------------------------------------
98
99	static SIMDPP_INL
100	uint8x16 i_shift_r(const uint8x16& a, unsigned count)
101	{
102	#if SIMDPP_USE_NULL
103	return detail::null::shift_r(a, count);
104	#elif SIMDPP_USE_SSE2
105	uint16x8 mask, a16;
106	mask = make_ones();
107	mask = shift_l(mask, `16`-count);
108	mask = shift_r<`8`>(mask);
109
110	a16 = a;
111	a16 = shift_r(a16, count);
112	a16 = bit_andnot(a16, mask);
113	return uint8x16 (a16);
114	#elif SIMDPP_USE_NEON
115	int8x16 shift = splat(-int(count));
116	return vshlq_u8(a.native(), shift.native());
117	#elif SIMDPP_USE_ALTIVEC
118	uint8x16 shift = splat(count);
119	return vec_sr(a.native(), shift.native());
120	#elif SIMDPP_USE_MSA
121	int8x16 shift = splat(count);
122	return (v16u8) __msa_srl_b((v16i8) a.native(), shift.native());
123	#endif
124	}
125
126	#if SIMDPP_USE_AVX2
127	static SIMDPP_INL
128	uint8x32 i_shift_r(const uint8x32& a, unsigned count)
129	{
130	unsigned shift = `8` - count;
131	uint16_t mask1 = (`0xff` >> shift) << shift;
132	uint16x16 mask, a16;
133	mask = splat(mask1);
134
135	a16 = a;
136	a16 = shift_r(a16, count);
137	a16 = bit_andnot(a16, mask);
138	return uint8x32(a16);
139	}
140	#endif
141
142	#if SIMDPP_USE_AVX512BW
143	SIMDPP_INL uint8<`64`> i_shift_r(const uint8<`64`>& a, unsigned count)
144	{
145	unsigned shift = `8` - count;
146	uint16_t mask1 = (`0xff` >> shift) << shift;
147	uint16<`32`> mask, a16;
148	mask = splat(mask1);
149
150	a16 = a;
151	a16 = shift_r(a16, count);
152	a16 = bit_andnot(a16, mask);
153	return uint8<`64`>(a16);
154	}
155	#endif
156
157	// -----------------------------------------------------------------------------
158
159	static SIMDPP_INL
160	int16x8 i_shift_r(const int16x8& a, unsigned count)
161	{
162	#if SIMDPP_USE_NULL
163	return detail::null::shift_r(a, count);
164	#elif SIMDPP_USE_SSE2
165	return _mm_sra_epi16(a.native(), _mm_cvtsi32_si128(count));
166	#elif SIMDPP_USE_NEON
167	int16x8 shift = splat(-int(count));
168	return vshlq_s16(a.native(), shift.native());
169	#elif SIMDPP_USE_ALTIVEC
170	uint16x8 shift = splat(count);
171	return vec_sra(a.native(), shift.native());
172	#elif SIMDPP_USE_MSA
173	int16x8 shift = splat(count);
174	return __msa_sra_h(a.native(), shift.native());
175	#endif
176	}
177
178	#if SIMDPP_USE_AVX2
179	static SIMDPP_INL
180	int16x16 i_shift_r(const int16x16& a, unsigned count)
181	{
182	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
183	__m256i r = a.native();
184	__m128i x = _mm_cvtsi32_si128(count);
185	__asm("vpsraw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
186	return r;
187	#else
188	return _mm256_sra_epi16(a.native(), _mm_cvtsi32_si128(count));
189	#endif
190	}
191	#endif
192
193	#if SIMDPP_USE_AVX512BW
194	SIMDPP_INL int16<`32`> i_shift_r(const int16<`32`>& a, unsigned count)
195	{
196	return _mm512_sra_epi16(a.native(), _mm_cvtsi32_si128(count));
197	}
198	#endif
199
200	// -----------------------------------------------------------------------------
201
202	static SIMDPP_INL
203	uint16x8 i_shift_r(const uint16x8& a, unsigned count)
204	{
205	#if SIMDPP_USE_NULL
206	return detail::null::shift_r(a, count);
207	#elif SIMDPP_USE_SSE2
208	return _mm_srli_epi16(a.native(), count);
209	#elif SIMDPP_USE_NEON
210	int16x8 shift = splat(-int(count));
211	return vshlq_u16(a.native(), shift.native());
212	#elif SIMDPP_USE_ALTIVEC
213	uint16x8 shift = splat(count);
214	return vec_sr(a.native(), shift.native());
215	#elif SIMDPP_USE_MSA
216	int16x8 shift = splat(count);
217	return (v8u16) __msa_srl_h((v8i16) a.native(), shift.native());
218	#endif
219	}
220
221	#if SIMDPP_USE_AVX2
222	static SIMDPP_INL
223	uint16x16 i_shift_r(const uint16x16& a, unsigned count)
224	{
225	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
226	__m256i r = a.native();
227	__m128i x = _mm_cvtsi32_si128(count);
228	__asm("vpsrlw %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
229	return r;
230	#else
231	return _mm256_srl_epi16(a.native(), _mm_cvtsi32_si128(count));
232	#endif
233	}
234	#endif
235
236	#if SIMDPP_USE_AVX512BW
237	SIMDPP_INL uint16<`32`> i_shift_r(const uint16<`32`>& a, unsigned count)
238	{
239	return _mm512_srl_epi16(a.native(), _mm_cvtsi32_si128(count));
240	}
241	#endif
242
243	// -----------------------------------------------------------------------------
244
245	static SIMDPP_INL
246	int32x4 i_shift_r(const int32x4& a, unsigned count)
247	{
248	#if SIMDPP_USE_NULL
249	return detail::null::shift_r(a, count);
250	#elif SIMDPP_USE_SSE2
251	return _mm_sra_epi32(a.native(), _mm_cvtsi32_si128(count));
252	#elif SIMDPP_USE_NEON
253	int32x4 shift = splat(-int(count));
254	return vshlq_s32(a.native(), shift.native());
255	#elif SIMDPP_USE_ALTIVEC
256	uint32x4 shift = splat(count);
257	return vec_sra(a.native(), shift.native());
258	#elif SIMDPP_USE_MSA
259	int32x4 shift = splat(count);
260	return __msa_sra_w(a.native(), shift.native());
261	#endif
262	}
263
264	#if SIMDPP_USE_AVX2
265	static SIMDPP_INL
266	int32x8 i_shift_r(const int32x8& a, unsigned count)
267	{
268	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
269	__m256i r = a.native();
270	__m128i x = _mm_cvtsi32_si128(count);
271	__asm("vpsrad %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
272	return r;
273	#else
274	return _mm256_sra_epi32(a.native(), _mm_cvtsi32_si128(count));
275	#endif
276	}
277	#endif
278
279	#if SIMDPP_USE_AVX512F
280	static SIMDPP_INL
281	int32<`16`> i_shift_r(const int32<`16`>& a, unsigned count)
282	{
283	return _mm512_sra_epi32(a.native(), _mm_cvtsi32_si128(count));
284	}
285	#endif
286
287	// -----------------------------------------------------------------------------
288
289	static SIMDPP_INL
290	uint32x4 i_shift_r(const uint32x4& a, unsigned count)
291	{
292	#if SIMDPP_USE_NULL
293	return detail::null::shift_r(a, count);
294	#elif SIMDPP_USE_SSE2
295	return _mm_srl_epi32(a.native(), _mm_cvtsi32_si128(count));
296	#elif SIMDPP_USE_NEON
297	int32x4 shift = splat(-int(count));
298	return vshlq_u32(a.native(), shift.native());
299	#elif SIMDPP_USE_ALTIVEC
300	uint32x4 shift = splat(count);
301	return vec_sr(a.native(), shift.native());
302	#elif SIMDPP_USE_MSA
303	int32x4 shift = splat(count);
304	return (v4u32) __msa_srl_w((v4i32) a.native(), shift.native());
305	#endif
306	}
307
308	#if SIMDPP_USE_AVX2
309	static SIMDPP_INL
310	uint32x8 i_shift_r(const uint32x8& a, unsigned count)
311	{
312	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
313	__m256i r = a.native();
314	__m128i x = _mm_cvtsi32_si128(count);
315	__asm("vpsrld %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
316	return r;
317	#else
318	return _mm256_srl_epi32(a.native(), _mm_cvtsi32_si128(count));
319	#endif
320	}
321	#endif
322
323	#if SIMDPP_USE_AVX512F
324	static SIMDPP_INL
325	uint32<`16`> i_shift_r(const uint32<`16`>& a, unsigned count)
326	{
327	return _mm512_srl_epi32(a.native(), _mm_cvtsi32_si128(count));
328	}
329	#endif
330
331	// -----------------------------------------------------------------------------
332
333	static SIMDPP_INL
334	uint64x2 i_shift_r(const uint64x2& a, unsigned count)
335	{
336	#if SIMDPP_USE_SSE2
337	return _mm_srl_epi64(a.native(), _mm_cvtsi32_si128(count));
338	#elif SIMDPP_USE_NEON
339	int64x2 shift = splat(-int(count));
340	return vshlq_u64(a.native(), shift.native());
341	#elif SIMDPP_USE_VSX_207
342	uint64x2 shift = splat(count);
343	return vec_sr(a.native(), shift.native());
344	#elif SIMDPP_USE_MSA
345	int32x4 shift = splat(count);
346	return (v2u64) __msa_srl_d((v2i64) a.native(), (v2i64) shift.native());
347	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
348	return detail::null::shift_r(a, count);
349	#endif
350	}
351
352	#if SIMDPP_USE_AVX2
353	static SIMDPP_INL
354	uint64x4 i_shift_r(const uint64x4& a, unsigned count)
355	{
356	#if SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS
357	__m256i r = a.native();
358	__m128i x = _mm_cvtsi32_si128(count);
359	__asm("vpsrlq %1, %2, %0" : "=x"(r) : "x"(x), "x"(r));
360	return r;
361	#else
362	return _mm256_srl_epi64(a.native(), _mm_cvtsi32_si128(count));
363	#endif
364	}
365	#endif
366
367	#if SIMDPP_USE_AVX512F
368	static SIMDPP_INL
369	uint64<`8`> i_shift_r(const uint64<`8`>& a, unsigned count)
370	{
371	return _mm512_srl_epi64(a.native(), _mm_cvtsi32_si128(count));
372	}
373	#endif
374
375	// -----------------------------------------------------------------------------
376
377	static SIMDPP_INL
378	int64x2 i_shift_r(const int64x2& a, unsigned count)
379	{
380	#if SIMDPP_USE_SSE2
381	uint64<`2`> ret, bias;
382	bias = make_uint(`0x8000000000000000`);
383	ret = shift_r(add(uint64<`2`>(a), bias), count);
384	ret = sub(ret, shift_r(bias, count));
385	return (int64<`2`>) ret;
386	#elif SIMDPP_USE_NEON
387	int64x2 shift = splat(-int(count));
388	return vshlq_s64(a.native(), shift.native());
389	#elif SIMDPP_USE_VSX_207
390	uint64x2 shift = splat(count);
391	return vec_sra(a.native(), shift.native());
392	#elif SIMDPP_USE_MSA
393	int32x4 shift = splat(count);
394	return __msa_sra_d(a.native(), (v2i64) shift.native());
395	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
396	return detail::null::shift_r(a, count);
397	#endif
398	}
399
400	#if SIMDPP_USE_AVX2
401	static SIMDPP_INL
402	int64x4 i_shift_r(const int64x4& a, unsigned count)
403	{
404	uint64<`4`> ret, bias;
405	bias = make_uint(`0x8000000000000000`);
406	ret = shift_r(add(uint64<`4`>(a), bias), count);
407	ret = sub(ret, shift_r(bias, count));
408	return (int64<`4`>) ret;
409	}
410	#endif
411
412	#if SIMDPP_USE_AVX512F
413	static SIMDPP_INL
414	int64<`8`> i_shift_r(const int64<`8`>& a, unsigned count)
415	{
416	return _mm512_sra_epi64(a.native(), _mm_cvtsi32_si128(count));
417	}
418	#endif
419
420	// -----------------------------------------------------------------------------
421
422	template<class V> SIMDPP_INL
423	V i_shift_r(const V& a, unsigned count)
424	{
425	SIMDPP_VEC_ARRAY_IMPL2S(V, i_shift_r, a, count);
426	}
427
428
429	// -----------------------------------------------------------------------------
430
431	template<unsigned count, unsigned N> SIMDPP_INL
432	uint8<N> shift_r_u8(const uint8<N>& a);
433
434
435	template<unsigned count> SIMDPP_INL
436	int8x16 i_shift_r(const int8x16& a)
437	{
438	static_assert(count < `8`, "Shift out of bounds");
439	#if SIMDPP_USE_NULL
440	return i_shift_r(a, count);
441	#elif SIMDPP_USE_SSE2
442	uint16<`8`> hi, lo;
443	lo = hi = a;
444
445	lo = shift_l<`8`>(lo);
446	lo = shift_r<count>(int16<`8`>(lo));
447	lo = shift_r<`8`>(lo);
448
449	hi = shift_r<`8`+count>(int16<`8`>(hi));
450	hi = shift_l<`8`>(hi);
451	return (int8<`16`>) bit_or(lo, hi); //higher part of lo is already clear
452	#elif SIMDPP_USE_NEON
453	return vshrq_n_s8(a.native(), count);
454	#elif SIMDPP_USE_ALTIVEC
455	uint8x16 shift = make_uint(count);
456	return vec_sra(a.native(), shift.native());
457	#elif SIMDPP_USE_MSA
458	return __msa_srai_b(a.native(), count);
459	#endif
460	}
461
462	#if SIMDPP_USE_AVX2
463	template<unsigned count> SIMDPP_INL
464	int8x32 i_shift_r(const int8x32& a)
465	{
466	static_assert(count < `8`, "Shift out of bounds");
467	uint16<`16`> hi, lo;
468	lo = hi = a;
469
470	lo = shift_l<`8`>(lo);
471	lo = shift_r<count>(int16<`16`>(lo));
472	lo = shift_r<`8`>(lo);
473
474	hi = shift_r<`8`+count>(int16<`16`>(hi));
475	hi = shift_l<`8`>(hi);
476	return (int8<`32`>) bit_or(lo, hi); //higher part of lo is already clear
477	}
478	#endif
479
480	#if SIMDPP_USE_AVX512BW
481	template<unsigned count> SIMDPP_INL
482	int8<`64`> i_shift_r(const int8<`64`>& a)
483	{
484	static_assert(count < `8`, "Shift out of bounds");
485	uint16<`32`> hi, lo;
486	lo = hi = a;
487
488	lo = shift_l<`8`>(lo);
489	lo = shift_r<count>(int16<`32`>(lo));
490	lo = shift_r<`8`>(lo);
491
492	hi = shift_r<`8`+count>(int16<`32`>(hi));
493	hi = shift_l<`8`>(hi);
494	return (int8<`64`>) bit_or(lo, hi); //higher part of lo is already clear
495	}
496	#endif
497
498	// -----------------------------------------------------------------------------
499
500	template<unsigned count, unsigned N> SIMDPP_INL
501	uint8<N> sse_shift_r_u8(const uint8<N>& a)
502	{
503	uint8_t mask1 = (`0xff` << count) & `0xff`;
504	uint8<N> mask = make_uint(mask1);
505
506	uint16<N/`2`> a16 = (uint16<N/`2`>) bit_and(a, mask);
507	a16 = shift_r<count>(a16);
508
509	return uint8<N>(a16);
510	}
511
512	template<unsigned count> SIMDPP_INL
513	uint8x16 i_shift_r(const uint8x16& a)
514	{
515	static_assert(count < `8`, "Shift out of bounds");
516	#if SIMDPP_USE_NULL
517	return i_shift_r(a, count);
518	#elif SIMDPP_USE_SSE2
519	return sse_shift_r_u8<count>(a);
520	#elif SIMDPP_USE_NEON
521	return vshrq_n_u8(a.native(), count);
522	#elif SIMDPP_USE_ALTIVEC
523	uint8x16 shift = make_uint(count);
524	return vec_sr(a.native(), shift.native());
525	#elif SIMDPP_USE_MSA
526	return (v16u8) __msa_srli_b((v16i8) a.native(), count);
527	#endif
528	}
529
530	#if SIMDPP_USE_AVX2
531	template<unsigned count> SIMDPP_INL
532	uint8x32 i_shift_r(const uint8x32& a)
533	{
534	static_assert(count < `8`, "Shift out of bounds");
535	return sse_shift_r_u8<count>(a);
536	}
537	#endif
538
539	#if SIMDPP_USE_AVX512BW
540	template<unsigned count> SIMDPP_INL
541	uint8<`64`> i_shift_r(const uint8<`64`>& a)
542	{
543	static_assert(count < `8`, "Shift out of bounds");
544	return sse_shift_r_u8<count>(a);
545	}
546	#endif
547
548	// -----------------------------------------------------------------------------
549
550	template<unsigned count> SIMDPP_INL
551	int16x8 i_shift_r(const int16x8& a)
552	{
553	static_assert(count < `16`, "Shift out of bounds");
554	#if SIMDPP_USE_NULL
555	return detail::null::shift_r(a, count);
556	#elif SIMDPP_USE_SSE2
557	return _mm_srai_epi16(a.native(), count);
558	#elif SIMDPP_USE_NEON
559	return vshrq_n_s16(a.native(), count);
560	#elif SIMDPP_USE_ALTIVEC
561	uint16x8 shift = make_uint(count);
562	return vec_sra(a.native(), shift.native());
563	#elif SIMDPP_USE_MSA
564	return __msa_srai_h(a.native(), count);
565	#endif
566	}
567
568	#if SIMDPP_USE_AVX2
569	template<unsigned count> SIMDPP_INL
570	int16x16 i_shift_r(const int16x16& a)
571	{
572	static_assert(count < `16`, "Shift out of bounds");
573	return _mm256_srai_epi16(a.native(), count);
574	}
575	#endif
576
577	#if SIMDPP_USE_AVX512BW
578	template<unsigned count> SIMDPP_INL
579	int16<`32`> i_shift_r(const int16<`32`>& a)
580	{
581	static_assert(count < `16`, "Shift out of bounds");
582	return _mm512_srai_epi16(a.native(), count);
583	}
584	#endif
585
586	// -----------------------------------------------------------------------------
587
588	template<unsigned count> SIMDPP_INL
589	uint16x8 i_shift_r(const uint16x8& a)
590	{
591	static_assert(count < `16`, "Shift out of bounds");
592	#if SIMDPP_USE_NULL
593	return i_shift_r(a, count);
594	#elif SIMDPP_USE_SSE2
595	return _mm_srli_epi16(a.native(), count);
596	#elif SIMDPP_USE_NEON
597	return vshrq_n_u16(a.native(), count);
598	#elif SIMDPP_USE_ALTIVEC
599	uint16x8 shift = make_uint(count);
600	return vec_sr(a.native(), shift.native());
601	#elif SIMDPP_USE_MSA
602	return (v8u16) __msa_srli_h((v8i16) a.native(), count);
603	#endif
604	}
605
606	#if SIMDPP_USE_AVX2
607	template<unsigned count> SIMDPP_INL
608	uint16x16 i_shift_r(const uint16x16& a)
609	{
610	static_assert(count < `16`, "Shift out of bounds");
611	return _mm256_srli_epi16(a.native(), count);
612	}
613	#endif
614
615	#if SIMDPP_USE_AVX512BW
616	template<unsigned count> SIMDPP_INL
617	uint16<`32`> i_shift_r(const uint16<`32`>& a)
618	{
619	static_assert(count < `16`, "Shift out of bounds");
620	return _mm512_srli_epi16(a.native(), count);
621	}
622	#endif
623
624	// -----------------------------------------------------------------------------
625
626	template<unsigned count> SIMDPP_INL
627	int32x4 i_shift_r(const int32x4& a)
628	{
629	static_assert(count < `32`, "Shift out of bounds");
630	#if SIMDPP_USE_NULL
631	return i_shift_r(a, count);
632	#elif SIMDPP_USE_SSE2
633	return _mm_srai_epi32(a.native(), count);
634	#elif SIMDPP_USE_NEON
635	return vshrq_n_s32(a.native(), count);
636	#elif SIMDPP_USE_ALTIVEC
637	uint32x4 shift = make_uint(count);
638	return vec_sra(a.native(), shift.native());
639	#elif SIMDPP_USE_MSA
640	return __msa_srai_w(a.native(), count);
641	#endif
642	}
643
644	#if SIMDPP_USE_AVX2
645	template<unsigned count> SIMDPP_INL
646	int32x8 i_shift_r(const int32x8& a)
647	{
648	static_assert(count < `32`, "Shift out of bounds");
649	return _mm256_srai_epi32(a.native(), count);
650	}
651	#endif
652
653	#if SIMDPP_USE_AVX512F
654	template<unsigned count> SIMDPP_INL
655	int32<`16`> i_shift_r(const int32<`16`>& a)
656	{
657	static_assert(count < `32`, "Shift out of bounds");
658	return _mm512_srai_epi32(a.native(), count);
659	}
660	#endif
661
662	// -----------------------------------------------------------------------------
663
664	template<unsigned count> SIMDPP_INL
665	uint32x4 i_shift_r(const uint32x4& a)
666	{
667	static_assert(count < `32`, "Shift out of bounds");
668	#if SIMDPP_USE_NULL
669	return i_shift_r(a, count);
670	#elif SIMDPP_USE_SSE2
671	return _mm_srli_epi32(a.native(), count);
672	#elif SIMDPP_USE_NEON
673	return vshrq_n_u32(a.native(), count);
674	#elif SIMDPP_USE_ALTIVEC
675	uint32x4 shift = make_uint(count);
676	return vec_sr(a.native(), shift.native());
677	#elif SIMDPP_USE_MSA
678	return (v4u32) __msa_srli_w((v4i32) a.native(), count);
679	#endif
680	}
681
682	#if SIMDPP_USE_AVX2
683	template<unsigned count> SIMDPP_INL
684	uint32x8 i_shift_r(const uint32x8& a)
685	{
686	static_assert(count < `32`, "Shift out of bounds");
687	return _mm256_srli_epi32(a.native(), count);
688	}
689	#endif
690
691	#if SIMDPP_USE_AVX512F
692	template<unsigned count> SIMDPP_INL
693	uint32<`16`> i_shift_r(const uint32<`16`>& a)
694	{
695	static_assert(count < `32`, "Shift out of bounds");
696	return _mm512_srli_epi32(a.native(), count);
697	}
698	#endif
699
700	// -----------------------------------------------------------------------------
701
702	template<unsigned count> SIMDPP_INL
703	int64x2 i_shift_r(const int64x2& a)
704	{
705	static_assert(count < `64`, "Shift out of bounds");
706	#if SIMDPP_USE_NEON
707	return vshrq_n_s64(a.native(), count);
708	#elif SIMDPP_USE_VSX_207
709	uint64x2 shift = splat(count);
710	return vec_sra(a.native(), shift.native());
711	#elif SIMDPP_USE_MSA
712	return __msa_srai_d(a.native(), count);
713	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
714	return i_shift_r(a, count);
715	#else
716	return SIMDPP_NOT_IMPLEMENTED_TEMPLATE1(int64<count>, a);
717	#endif
718	}
719
720	#if SIMDPP_USE_AVX2
721	template<unsigned count> SIMDPP_INL
722	int64x4 i_shift_r(const int64x4& a)
723	{
724	return i_shift_r(a, count);
725	}
726	#endif
727
728	#if SIMDPP_USE_AVX512F
729	template<unsigned count> SIMDPP_INL
730	int64<`8`> i_shift_r(const int64<`8`>& a)
731	{
732	static_assert(count < `64`, "Shift out of bounds");
733	return _mm512_srai_epi64(a.native(), count);
734	}
735	#endif
736
737	// -----------------------------------------------------------------------------
738
739	template<unsigned count> SIMDPP_INL
740	uint64x2 i_shift_r(const uint64x2& a)
741	{
742	static_assert(count < `64`, "Shift out of bounds");
743	#if SIMDPP_USE_SSE2
744	return _mm_srli_epi64(a.native(), count);
745	#elif SIMDPP_USE_NEON
746	return vshrq_n_u64(a.native(), count);
747	#elif SIMDPP_USE_VSX_207
748	uint64x2 shift = splat(count);
749	return vec_sr(a.native(), shift.native());
750	#elif SIMDPP_USE_MSA
751	return (v2u64) __msa_srli_d((v2i64) a.native(), count);
752	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
753	return i_shift_r(a, count);
754	#else
755	return SIMDPP_NOT_IMPLEMENTED_TEMPLATE1(int64<count>, a);
756	#endif
757	}
758
759	#if SIMDPP_USE_AVX2
760	template<unsigned count> SIMDPP_INL
761	uint64x4 i_shift_r(const uint64x4& a)
762	{
763	static_assert(count < `64`, "Shift out of bounds");
764	return _mm256_srli_epi64(a.native(), count);
765	}
766	#endif
767
768	#if SIMDPP_USE_AVX512F
769	template<unsigned count> SIMDPP_INL
770	uint64<`8`> i_shift_r(const uint64<`8`>& a)
771	{
772	static_assert(count < `64`, "Shift out of bounds");
773	return _mm512_srli_epi64(a.native(), count);
774	}
775	#endif
776
777	// -----------------------------------------------------------------------------
778
779	template<unsigned count, class V> SIMDPP_INL
780	V i_shift_r(const V& a)
781	{
782	static_assert(count < `64`, "Shift out of bounds");
783	SIMDPP_VEC_ARRAY_IMPL1(V, i_shift_r<count>, a);
784	}
785
786	// -----------------------------------------------------------------------------
787
788	template<bool no_shift>
789	struct i_shift_r_wrapper {
790	template<unsigned count, class V>
791	static SIMDPP_INL V run(const V& arg) { return i_shift_r<count>(arg); }
792	};
793	template<>
794	struct i_shift_r_wrapper<true> {
795	template<unsigned count, class V>
796	static SIMDPP_INL V run(const V& arg) { return arg; }
797	};
798
799	} // namespace insn
800	} // namespace detail
801	} // namespace SIMDPP_ARCH_NAMESPACE
802	} // namespace simdpp
803
804	#endif
805
806

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_shift_r.h