conv_extend_to_int32.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int32.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT32_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT32_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/combine.h>
17	#include <simdpp/detail/insn/conv_extend_to_int16.h>
18	#include <simdpp/core/i_shift_r.h>
19	#include <simdpp/core/move_l.h>
20	#include <simdpp/core/zip_hi.h>
21	#include <simdpp/core/zip_lo.h>
22	#include <simdpp/core/unzip_lo.h>
23	#include <simdpp/detail/vector_array_conv_macros.h>
24
25	namespace simdpp {
26	namespace SIMDPP_ARCH_NAMESPACE {
27	namespace detail {
28	namespace insn {
29
30	// -----------------------------------------------------------------------------
31
32	static SIMDPP_INL
33	uint32<`8`> i_to_uint32(const uint16<`8`>& a)
34	{
35	#if SIMDPP_USE_NULL
36	uint32<`8`> r;
37	for (unsigned i = `0`; i < r.length; i++) {
38	r.vec(i/`4`).el(i%`4`) = uint32_t(a.vec(`0`).el(i));
39	}
40	return r;
41	#elif SIMDPP_USE_AVX2
42	return _mm256_cvtepu16_epi32(a.native());
43	#elif SIMDPP_USE_SSE4_1
44	uint32<`8`> r;
45	r.vec(`0`) = _mm_cvtepu16_epi32(a.native());
46	r.vec(`1`) = _mm_cvtepu16_epi32(move8_l<`4`>(a).eval().native());
47	return r;
48	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_MSA \|\| (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN)
49	uint16<`8`> zero = make_zero();
50	uint32<`8`> r;
51	r.vec(`0`) = zip8_lo(a, zero);
52	r.vec(`1`) = zip8_hi(a, zero);
53	return r;
54	#elif (SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN)
55	uint16<`8`> zero = make_zero();
56	uint32<`8`> r;
57	r.vec(`0`) = zip8_lo(zero, a);
58	r.vec(`1`) = zip8_hi(zero, a);
59	return r;
60	#elif SIMDPP_USE_NEON
61	uint32<`8`> r;
62	r.vec(`0`) = vmovl_u16(vget_low_u16(a.vec(`0`).native()));
63	r.vec(`1`) = vmovl_u16(vget_high_u16(a.vec(`1`).native()));
64	return r;
65	#endif
66	}
67
68	#if SIMDPP_USE_AVX2
69	SIMDPP_INL uint32<`16`> i_to_uint32(const uint16<`16`>& a)
70	{
71	#if SIMDPP_USE_AVX512F
72	return _mm512_cvtepu16_epi32(a.native());
73	#else
74	uint32<`16`> r;
75	uint16<`8`> a0, a1;
76	split(a, a0, a1);
77	r.vec(`0`) = _mm256_cvtepu16_epi32(a0.native());
78	r.vec(`1`) = _mm256_cvtepu16_epi32(a1.native());
79	return r;
80	#endif
81	}
82	#endif
83
84	#if SIMDPP_USE_AVX512BW
85	SIMDPP_INL uint32<`32`> i_to_uint32(const uint16<`32`>& a)
86	{
87	uint32<`32`> r;
88	uint16<`16`> a0, a1;
89	split(a, a0, a1);
90	r.vec(`0`) = _mm512_cvtepu16_epi32(a0.native());
91	r.vec(`1`) = _mm512_cvtepu16_epi32(a1.native());
92	return r;
93	}
94	#endif
95
96	template<unsigned N> SIMDPP_INL
97	uint32<N> i_to_uint32(const uint16<N>& a)
98	{
99	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint32<N>, i_to_uint32, a)
100	}
101
102	// -----------------------------------------------------------------------------
103
104	static SIMDPP_INL
105	uint32<`16`> i_to_uint32(const uint8<`16`>& a)
106	{
107	#if SIMDPP_USE_NULL
108	uint32<`16`> r;
109	for (unsigned i = `0`; i < r.length; i++) {
110	r.vec(i/`4`).el(i%`4`) = uint32_t(a.vec(`0`).el(i));
111	}
112	return r;
113	#elif SIMDPP_USE_AVX512F
114	return _mm512_cvtepu8_epi32(a.native());
115	#elif SIMDPP_USE_AVX2
116	uint32<`16`> r;
117	r.vec(`0`) = _mm256_cvtepu8_epi32(a.native());
118	r.vec(`1`) = _mm256_cvtepu8_epi32(move16_l<`8`>(a).eval().native());
119	return r;
120	#elif SIMDPP_USE_SSE4_1
121	uint32<`16`> r;
122	r.vec(`0`) = _mm_cvtepu8_epi32(a.native());
123	r.vec(`1`) = _mm_cvtepu8_epi32(move16_l<`4`>(a).eval().native());
124	r.vec(`2`) = _mm_cvtepu8_epi32(move16_l<`8`>(a).eval().native());
125	r.vec(`3`) = _mm_cvtepu8_epi32(move16_l<`12`>(a).eval().native());
126	return r;
127	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
128	return i_to_uint32(i_to_uint16(a));
129	#endif
130	}
131
132	#if SIMDPP_USE_AVX2
133	SIMDPP_INL uint32<`32`> i_to_uint32(const uint8<`32`>& a)
134	{
135	#if SIMDPP_USE_AVX512F
136	uint32<`32`> r;
137	uint8<`16`> a0, a1;
138	split(a, a0, a1);
139	r.vec(`0`) = _mm512_cvtepu8_epi32(a0.native());
140	r.vec(`1`) = _mm512_cvtepu8_epi32(a1.native());
141	return r;
142	#else
143	uint32<`32`> r;
144	uint8<`16`> a0, a1;
145	split(a, a0, a1);
146	r.vec(`0`) = _mm256_cvtepu8_epi32(a0.native());
147	r.vec(`1`) = _mm256_cvtepu8_epi32(move16_l<`8`>(a0).eval().native());
148	r.vec(`2`) = _mm256_cvtepu8_epi32(a1.native());
149	r.vec(`3`) = _mm256_cvtepu8_epi32(move16_l<`8`>(a1).eval().native());
150	return r;
151	#endif
152	}
153	#endif
154
155	#if SIMDPP_USE_AVX512BW
156	SIMDPP_INL uint32<`64`> i_to_uint32(const uint8<`64`>& a)
157	{
158	uint32<`64`> r;
159	uint8<`32`> a01, a23;
160	uint8<`16`> a0, a1, a2, a3;
161	split(a, a01, a23);
162	split(a01, a0, a1);
163	split(a23, a2, a3);
164
165	r.vec(`0`) = _mm512_cvtepu8_epi32(a0.native());
166	r.vec(`1`) = _mm512_cvtepu8_epi32(a1.native());
167	r.vec(`2`) = _mm512_cvtepu8_epi32(a2.native());
168	r.vec(`3`) = _mm512_cvtepu8_epi32(a3.native());
169	return r;
170	}
171	#endif
172
173	template<unsigned N> SIMDPP_INL
174	uint32<N> i_to_uint32(const uint8<N>& a)
175	{
176	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint32<N>, i_to_uint32, a)
177	}
178
179	// -----------------------------------------------------------------------------
180
181	static SIMDPP_INL
182	int32<`8`> i_to_int32(const int16<`8`>& a)
183	{
184	#if SIMDPP_USE_NULL
185	int32<`8`> r;
186	for (unsigned i = `0`; i < r.length; i++) {
187	r.vec(i/`4`).el(i%`4`) = int32_t(a.vec(`0`).el(i));
188	}
189	return r;
190	#elif SIMDPP_USE_AVX2
191	return _mm256_cvtepi16_epi32(a.native());
192	#elif SIMDPP_USE_SSE4_1
193	int32x8 r;
194	r.vec(`0`) = _mm_cvtepi16_epi32(a.native());
195	r.vec(`1`) = _mm_cvtepi16_epi32(move8_l<`4`>(a).eval().native());
196	return r;
197	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_MSA
198	int16x8 sign = shift_r<`15`>(a);
199	int32x4 lo, hi;
200	lo = zip8_lo(a, sign);
201	hi = zip8_hi(a, sign);
202	return combine(lo, hi);
203	#elif SIMDPP_USE_NEON
204	int32x8 r;
205	r.vec(`0`) = vmovl_s16(vget_low_s16(a.vec(`0`).native()));
206	r.vec(`1`) = vmovl_s16(vget_high_s16(a.vec(`1`).native()));
207	return r;
208	#elif SIMDPP_USE_ALTIVEC
209	int32x4 b0, b1;
210	b0 = vec_unpackh((__vector int16_t)a.vec(`0`).native());
211	b1 = vec_unpackl((__vector int16_t)a.vec(`0`).native());
212	return combine(b0, b1);
213	#endif
214	}
215
216	#if SIMDPP_USE_AVX2
217	static SIMDPP_INL
218	int32<`16`> i_to_int32(const int16<`16`>& a)
219	{
220	#if SIMDPP_USE_AVX512F
221	return _mm512_cvtepi16_epi32(a.native());
222	#else
223	int32<`8`> r0, r1;
224	int16<`8`> a0, a1;
225	split(a, a0, a1);
226	r0 = _mm256_cvtepi16_epi32(a0.native());
227	r1 = _mm256_cvtepi16_epi32(a1.native());
228	return combine(r0, r1);
229	#endif
230	}
231	#endif
232
233	#if SIMDPP_USE_AVX512BW
234	SIMDPP_INL int32<`32`> i_to_int32(const int16<`32`>& a)
235	{
236	int32<`16`> r0, r1;
237	int16<`16`> a0, a1;
238	split(a, a0, a1);
239	r0 = _mm512_cvtepi16_epi32(a0.native());
240	r1 = _mm512_cvtepi16_epi32(a1.native());
241	return combine(r0, r1);
242	}
243	#endif
244
245	template<unsigned N> SIMDPP_INL
246	int32<N> i_to_int32(const int16<N>& a)
247	{
248	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int32<N>, i_to_int32, a)
249	}
250
251	// -----------------------------------------------------------------------------
252
253	static SIMDPP_INL
254	int32<`16`> i_to_int32(const int8<`16`>& a)
255	{
256	#if SIMDPP_USE_NULL
257	int32<`16`> r;
258	for (unsigned i = `0`; i < r.length; i++) {
259	r.vec(i/`4`).el(i%`4`) = int32_t(a.vec(`0`).el(i));
260	}
261	return r;
262	#elif SIMDPP_USE_AVX512F
263	return _mm512_cvtepi8_epi32(a.native());
264	#elif SIMDPP_USE_AVX2
265	int32<`16`> r;
266	r.vec(`0`) = _mm256_cvtepi8_epi32(a.native());
267	r.vec(`1`) = _mm256_cvtepi8_epi32(move16_l<`8`>(a).eval().native());
268	return r;
269	#elif SIMDPP_USE_SSE4_1
270	int32<`16`> r;
271	r.vec(`0`) = _mm_cvtepi8_epi32(a.native());
272	r.vec(`1`) = _mm_cvtepi8_epi32(move16_l<`4`>(a).eval().native());
273	r.vec(`2`) = _mm_cvtepi8_epi32(move16_l<`8`>(a).eval().native());
274	r.vec(`3`) = _mm_cvtepi8_epi32(move16_l<`12`>(a).eval().native());
275	return r;
276	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
277	return i_to_int32(i_to_int16(a));
278	#endif
279	}
280
281	#if SIMDPP_USE_AVX2
282	static SIMDPP_INL
283	int32<`32`> i_to_int32(const int8<`32`>& a)
284	{
285	#if SIMDPP_USE_AVX512F
286	int32<`32`> r;
287	int8<`16`> a0, a1;
288	split(a, a0, a1);
289	r.vec(`0`) = _mm512_cvtepi8_epi32(a0.native());
290	r.vec(`1`) = _mm512_cvtepi8_epi32(a1.native());
291	return r;
292	#else
293	int32<`32`> r;
294	int8<`16`> a0, a1;
295	split(a, a0, a1);
296	r.vec(`0`) = _mm256_cvtepi8_epi32(a0.native());
297	r.vec(`1`) = _mm256_cvtepi8_epi32(move16_l<`8`>(a0).eval().native());
298	r.vec(`2`) = _mm256_cvtepi8_epi32(a1.native());
299	r.vec(`3`) = _mm256_cvtepi8_epi32(move16_l<`8`>(a1).eval().native());
300	return r;
301	#endif
302	}
303	#endif
304
305	#if SIMDPP_USE_AVX512BW
306	SIMDPP_INL int32<`64`> i_to_int32(const int8<`64`>& a)
307	{
308	int32<`64`> r;
309	int8<`32`> a01, a23;
310	int8<`16`> a0, a1, a2, a3;
311	split(a, a01, a23);
312	split(a01, a0, a1);
313	split(a23, a2, a3);
314
315	r.vec(`0`) = _mm512_cvtepi8_epi32(a0.native());
316	r.vec(`1`) = _mm512_cvtepi8_epi32(a1.native());
317	r.vec(`2`) = _mm512_cvtepi8_epi32(a2.native());
318	r.vec(`3`) = _mm512_cvtepi8_epi32(a3.native());
319	return r;
320	}
321	#endif
322
323	template<unsigned N> SIMDPP_INL
324	int32<N> i_to_int32(const int8<N>& a)
325	{
326	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int32<N>, i_to_int32, a)
327	}
328
329
330	} // namespace insn
331	} // namespace detail
332	} // namespace SIMDPP_ARCH_NAMESPACE
333	} // namespace simdpp
334
335	#endif
336
337
338

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int32.h