conv_extend_to_int16.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int16.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT16_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT16_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/combine.h>
17	#include <simdpp/detail/not_implemented.h>
18	#include <simdpp/core/i_shift_r.h>
19	#include <simdpp/core/move_l.h>
20	#include <simdpp/core/zip_hi.h>
21	#include <simdpp/core/zip_lo.h>
22	#include <simdpp/detail/vector_array_conv_macros.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29	SIMDPP_INL uint16<`16`> i_to_uint16(const uint8<`16`>& a)
30	{
31	#if SIMDPP_USE_NULL
32	uint16x16 r;
33	for (unsigned i = `0`; i < `16`; i++) {
34	r.vec(i/`8`).el(i%`8`) = uint16_t(a.el(i));
35	}
36	return r;
37	#elif SIMDPP_USE_AVX2
38	return _mm256_cvtepu8_epi16(a.native());
39	#elif SIMDPP_USE_SSE4_1
40	uint16x8 r1, r2;
41	r1 = _mm_cvtepu8_epi16(a.native());
42	r2 = _mm_cvtepu8_epi16(move16_l<`8`>(a).eval().native());
43	return combine(r1, r2);
44	#elif SIMDPP_USE_SSE2 \|\| (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN) \|\| SIMDPP_USE_MSA
45	uint16x8 r1, r2;
46	r1 = zip16_lo(a, (uint8x16) make_zero());
47	r2 = zip16_hi(a, (uint8x16) make_zero());
48	return combine(r1, r2);
49	#elif SIMDPP_USE_NEON
50	uint16x16 r;
51	r.vec(`0`) = vmovl_u8(vget_low_u8(a.native()));
52	r.vec(`1`) = vmovl_u8(vget_high_u8(a.native()));
53	return r;
54	#elif (SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN)
55	uint16x8 r1, r2;
56	r1 = zip16_lo((uint8x16) make_zero(), a);
57	r2 = zip16_hi((uint8x16) make_zero(), a);
58	return combine(r1, r2);
59	#endif
60	}
61
62	#if SIMDPP_USE_AVX2
63	SIMDPP_INL uint16<`32`> i_to_uint16(const uint8<`32`>& a)
64	{
65	#if SIMDPP_USE_AVX512BW
66	return _mm512_cvtepu8_epi16(a.native());
67	#else
68	uint16<`16`> r0, r1;
69	uint8<`16`> a0, a1;
70	split(a, a0, a1);
71	r0 = _mm256_cvtepu8_epi16(a0.native());
72	r1 = _mm256_cvtepu8_epi16(a1.native());
73	return combine(r0, r1);
74	#endif
75	}
76	#endif
77
78	#if SIMDPP_USE_AVX512BW
79	SIMDPP_INL uint16<`64`> i_to_uint16(const uint8<`64`>& a)
80	{
81	uint16<`32`> r0, r1;
82	uint8<`32`> a0, a1;
83	split(a, a0, a1);
84	r0 = _mm512_cvtepu8_epi16(a0.native());
85	r1 = _mm512_cvtepu8_epi16(a1.native());
86	return combine(r0, r1);
87	}
88	#endif
89
90	template<unsigned N> SIMDPP_INL
91	uint16<N> i_to_uint16(const uint8<N>& a)
92	{
93	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint16<N>, i_to_uint16, a)
94	}
95
96	// -----------------------------------------------------------------------------
97
98	SIMDPP_INL int16x16 i_to_int16(const int8x16& a)
99	{
100	#if SIMDPP_USE_NULL
101	int16x16 r;
102	for (unsigned i = `0`; i < `16`; i++) {
103	r.vec(i/`8`).el(i%`8`) = int16_t(a.el(i));
104	}
105	return r;
106	#elif SIMDPP_USE_AVX2
107	return _mm256_cvtepi8_epi16(a.native());
108	#elif SIMDPP_USE_SSE4_1
109	int16x8 r1, r2;
110	r1 = _mm_cvtepi8_epi16(a.native());
111	r2 = _mm_cvtepi8_epi16(move16_l<`8`>(a).eval().native());
112	return combine(r1, r2);
113	#elif SIMDPP_USE_SSE2
114	int16x8 r1, r2;
115	r1 = zip16_lo((int8x16) make_zero(), a);
116	r1 = shift_r<`8`>(r1);
117	r2 = zip16_hi((int8x16) make_zero(), a);
118	r2 = shift_r<`8`>(r2);
119	return combine(r1, r2);
120	#elif SIMDPP_USE_NEON
121	int16x16 r;
122	r.vec(`0`) = vmovl_s8(vget_low_s8(a.native()));
123	r.vec(`1`) = vmovl_s8(vget_high_s8(a.native()));
124	return r;
125	#elif SIMDPP_USE_MSA
126	int8x16 sign = shift_r<`7`>(a);
127	int16x8 lo, hi;
128	lo = zip16_lo(a, sign);
129	hi = zip16_hi(a, sign);
130	return combine(lo, hi);
131	#elif SIMDPP_USE_ALTIVEC
132	int16x16 r;
133	r.vec(`0`) = vec_unpackh(a.vec(`0`).native());
134	r.vec(`1`) = vec_unpackl(a.vec(`0`).native());
135	return r;
136	#endif
137	}
138
139	#if SIMDPP_USE_AVX2
140	SIMDPP_INL int16<`32`> i_to_int16(const int8<`32`>& a)
141	{
142	#if SIMDPP_USE_AVX512BW
143	return _mm512_cvtepi8_epi16(a.native());
144	#else
145	int16<`16`> r0, r1;
146	int8<`16`> a0, a1;
147	split(a, a0, a1);
148	r0 = _mm256_cvtepi8_epi16(a0.native());
149	r1 = _mm256_cvtepi8_epi16(a1.native());
150	return combine(r0, r1);
151	#endif
152	}
153	#endif
154
155	#if SIMDPP_USE_AVX512BW
156	SIMDPP_INL int16<`64`> i_to_int16(const int8<`64`>& a)
157	{
158	int16<`32`> r0, r1;
159	int8<`32`> a0, a1;
160	split(a, a0, a1);
161	r0 = _mm512_cvtepi8_epi16(a0.native());
162	r1 = _mm512_cvtepi8_epi16(a1.native());
163	return combine(r0, r1);
164	}
165	#endif
166
167	template<unsigned N> SIMDPP_INL
168	int16<N> i_to_int16(const int8<N>& a)
169	{
170	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int16<N>, i_to_int16, a)
171	}
172
173
174	} // namespace insn
175	} // namespace detail
176	} // namespace SIMDPP_ARCH_NAMESPACE
177	} // namespace simdpp
178
179	#endif
180
181
182

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int16.h