shuffle128.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/shuffle128.h]

1	/ Copyright (C) 2012-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_SHUFFLE128_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_SHUFFLE128_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16
17	namespace simdpp {
18	namespace SIMDPP_ARCH_NAMESPACE {
19	namespace detail {
20
21	/* Shuffles 128 bit parts within the vectors.*
22
23	@code
24	switch(s0):
25	case 0: r[0..127] = a[0..127]
26	case 1: r[0..127] = a[128..255]
27	case 2: r[0..127] = a[256..383]
28	case 3: r[0..127] = a[384..511]
29
30	switch(s1):
31	case 0: r[128..255] = a[0..127]
32	case 1: r[128..255] = a[128..255]
33	case 2: r[128..255] = a[256..383]
34	case 3: r[128..255] = a[384..511]
35
36	switch(s2):
37	case 0: r[256..383] = b[0..127]
38	case 1: r[256..383] = b[128..255]
39	case 2: r[256..383] = b[256..383]
40	case 3: r[256..383] = b[384..511]
41
42	switch(s3):
43	case 0: r[384..511] = b[0..127]
44	case 1: r[384..511] = b[128..255]
45	case 2: r[384..511] = b[256..383]
46	case 3: r[384..511] = b[384..511]
47	@endcode
48	*/
49	#if SIMDPP_USE_AVX512BW
50	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
51	uint8<`64`> shuffle2_128(const uint8<`64`>& a, const uint8<`64`>& b)
52	{
53	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
54	return _mm512_shuffle_i32x4(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
55	}
56
57	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
58	uint16<`32`> shuffle2_128(const uint16<`32`>& a, const uint16<`32`>& b)
59	{
60	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
61	return _mm512_shuffle_i32x4(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
62	}
63	#endif
64	#if SIMDPP_USE_AVX512F
65	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
66	uint32<`16`> shuffle2_128(const uint32<`16`>& a, const uint32<`16`>& b)
67	{
68	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
69	return _mm512_shuffle_i32x4(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
70	}
71	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
72	uint64<`8`> shuffle2_128(const uint64<`8`>& a, const uint64<`8`>& b)
73	{
74	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
75	return _mm512_shuffle_i64x2(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
76	}
77	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
78	float32<`16`> shuffle2_128(const float32<`16`>& a, const float32<`16`>& b)
79	{
80	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
81	return _mm512_shuffle_f32x4(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
82	}
83	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> SIMDPP_INL
84	float64<`8`> shuffle2_128(const float64<`8`>& a, const float64<`8`>& b)
85	{
86	static_assert(s0 < `4` && s1 < `4` && s2 < `4` && s3 < `4`, "Selector out of range");
87	return _mm512_shuffle_f64x2(a.native(), b.native(), (s3<<`6`) + (s2<<`4`) + (s1<<`2`) + s0);
88	}
89	#endif
90
91	/* Shuffles 128 bit parts within the vectors.*
92
93	For larger than 256-bits vectors the same operation is applied to each
94	256-bit element.
95
96	@code
97	switch(s0):
98	case 0: r[0..127] = a[0..127]
99	case 1: r[0..127] = a[128..255]
100
101	switch(s1):
102	case 0: r[128..255] = b[0..127]
103	case 1: r[128..255] = b[128..255]
104	@endcode
105	*/
106	template<unsigned s0, unsigned s1> SIMDPP_INL
107	uint8x32 shuffle1_128(const uint8x32& a, const uint8x32& b)
108	{
109	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
110	#if SIMDPP_USE_AVX2
111	return _mm256_permute2x128_si256(a.native(), b.native(), ((s1+`2`)<<`4`) + s0);
112	#else
113	uint8x32 r;
114	r.vec(`0`) = a.vec(s0);
115	r.vec(`1`) = b.vec(s1);
116	return r;
117	#endif
118	}
119	template<unsigned s0, unsigned s1> SIMDPP_INL
120	uint16x16 shuffle1_128(const uint16x16& a, const uint16x16& b) { return (uint16x16)shuffle1_128<s0,s1>(uint8x32 (a), uint8x32 (b)); }
121	template<unsigned s0, unsigned s1> SIMDPP_INL
122	uint32x8 shuffle1_128(const uint32x8& a, const uint32x8& b) { return (uint32x8)shuffle1_128<s0,s1>(uint8x32 (a), uint8x32 (b)); }
123	template<unsigned s0, unsigned s1> SIMDPP_INL
124	uint64x4 shuffle1_128(const uint64x4& a, const uint64x4& b) { return (uint64x4)shuffle1_128<s0,s1>(uint8x32 (a), uint8x32 (b)); }
125
126	template<unsigned s0, unsigned s1> SIMDPP_INL
127	float32x8 shuffle1_128(const float32x8& a, const float32x8& b)
128	{
129	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
130	#if SIMDPP_USE_AVX
131	return _mm256_permute2f128_ps(a.native(), b.native(), ((s1+`2`)<<`4`) + s0);
132	#else
133	float32x8 r;
134	r.vec(`0`) = a.vec(s0);
135	r.vec(`1`) = b.vec(s1);
136	return r;
137	#endif
138	}
139	template<unsigned s0, unsigned s1> SIMDPP_INL
140	float64x4 shuffle1_128(const float64x4& a, const float64x4& b)
141	{
142	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
143	#if SIMDPP_USE_AVX
144	return _mm256_permute2f128_pd(a.native(), b.native(), ((s1+`2`)<<`4`) + s0);
145	#else
146	float64x4 r;
147	r.vec(`0`) = a.vec(s0);
148	r.vec(`1`) = b.vec(s1);
149	return r;
150	#endif
151	}
152
153	#if SIMDPP_USE_AVX512F
154	template<unsigned s0, unsigned s1> SIMDPP_INL
155	uint32<`16`> shuffle1_128(const uint32<`16`>& a, const uint32<`16`>& b)
156	{
157	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
158	return shuffle2_128<s0,s1,s0+`2`,s1+`2`>(a, b);
159	}
160
161	template<unsigned s0, unsigned s1> SIMDPP_INL
162	uint64<`8`> shuffle1_128(const uint64<`8`>& a, const uint64<`8`>& b)
163	{
164	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
165	return shuffle2_128<s0,s1,s0+`2`,s1+`2`>(a, b);
166	}
167
168	template<unsigned s0, unsigned s1> SIMDPP_INL
169	float32<`16`> shuffle1_128(const float32<`16`>& a, const float32<`16`>& b)
170	{
171	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
172	return shuffle2_128<s0,s1,s0+`2`,s1+`2`>(a, b);
173	}
174
175	template<unsigned s0, unsigned s1> SIMDPP_INL
176	float64<`8`> shuffle1_128(const float64<`8`>& a, const float64<`8`>& b)
177	{
178	static_assert(s0 < `2` && s1 < `2`, "Selector out of range");
179	return shuffle2_128<s0,s1,s0+`2`,s1+`2`>(a, b);
180	}
181	#endif
182
183	#if SIMDPP_USE_AVX512F
184	template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, class V> SIMDPP_INL
185	V permute4_128(const V& a)
186	{
187	return shuffle2_128<s0,s1,s2,s3>(a, a);
188	}
189	#endif
190
191	} // namespace detail
192	} // namespace SIMDPP_ARCH_NAMESPACE
193	} // namespace simdpp
194
195	#endif
196
197

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/shuffle128.h