1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT16_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT16_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/combine.h>
17#include <simdpp/detail/not_implemented.h>
18#include <simdpp/core/i_shift_r.h>
19#include <simdpp/core/move_l.h>
20#include <simdpp/core/zip_hi.h>
21#include <simdpp/core/zip_lo.h>
22#include <simdpp/detail/vector_array_conv_macros.h>
23
24namespace simdpp {
25namespace SIMDPP_ARCH_NAMESPACE {
26namespace detail {
27namespace insn {
28
29SIMDPP_INL uint16<16> i_to_uint16(const uint8<16>& a)
30{
31#if SIMDPP_USE_NULL
32 uint16x16 r;
33 for (unsigned i = 0; i < 16; i++) {
34 r.vec(i/8).el(i%8) = uint16_t(a.el(i));
35 }
36 return r;
37#elif SIMDPP_USE_AVX2
38 return _mm256_cvtepu8_epi16(a.native());
39#elif SIMDPP_USE_SSE4_1
40 uint16x8 r1, r2;
41 r1 = _mm_cvtepu8_epi16(a.native());
42 r2 = _mm_cvtepu8_epi16(move16_l<8>(a).eval().native());
43 return combine(r1, r2);
44#elif SIMDPP_USE_SSE2 || (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN) || SIMDPP_USE_MSA
45 uint16x8 r1, r2;
46 r1 = zip16_lo(a, (uint8x16) make_zero());
47 r2 = zip16_hi(a, (uint8x16) make_zero());
48 return combine(r1, r2);
49#elif SIMDPP_USE_NEON
50 uint16x16 r;
51 r.vec(0) = vmovl_u8(vget_low_u8(a.native()));
52 r.vec(1) = vmovl_u8(vget_high_u8(a.native()));
53 return r;
54#elif (SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN)
55 uint16x8 r1, r2;
56 r1 = zip16_lo((uint8x16) make_zero(), a);
57 r2 = zip16_hi((uint8x16) make_zero(), a);
58 return combine(r1, r2);
59#endif
60}
61
62#if SIMDPP_USE_AVX2
63SIMDPP_INL uint16<32> i_to_uint16(const uint8<32>& a)
64{
65#if SIMDPP_USE_AVX512BW
66 return _mm512_cvtepu8_epi16(a.native());
67#else
68 uint16<16> r0, r1;
69 uint8<16> a0, a1;
70 split(a, a0, a1);
71 r0 = _mm256_cvtepu8_epi16(a0.native());
72 r1 = _mm256_cvtepu8_epi16(a1.native());
73 return combine(r0, r1);
74#endif
75}
76#endif
77
78#if SIMDPP_USE_AVX512BW
79SIMDPP_INL uint16<64> i_to_uint16(const uint8<64>& a)
80{
81 uint16<32> r0, r1;
82 uint8<32> a0, a1;
83 split(a, a0, a1);
84 r0 = _mm512_cvtepu8_epi16(a0.native());
85 r1 = _mm512_cvtepu8_epi16(a1.native());
86 return combine(r0, r1);
87}
88#endif
89
90template<unsigned N> SIMDPP_INL
91uint16<N> i_to_uint16(const uint8<N>& a)
92{
93 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint16<N>, i_to_uint16, a)
94}
95
96// -----------------------------------------------------------------------------
97
98SIMDPP_INL int16x16 i_to_int16(const int8x16& a)
99{
100#if SIMDPP_USE_NULL
101 int16x16 r;
102 for (unsigned i = 0; i < 16; i++) {
103 r.vec(i/8).el(i%8) = int16_t(a.el(i));
104 }
105 return r;
106#elif SIMDPP_USE_AVX2
107 return _mm256_cvtepi8_epi16(a.native());
108#elif SIMDPP_USE_SSE4_1
109 int16x8 r1, r2;
110 r1 = _mm_cvtepi8_epi16(a.native());
111 r2 = _mm_cvtepi8_epi16(move16_l<8>(a).eval().native());
112 return combine(r1, r2);
113#elif SIMDPP_USE_SSE2
114 int16x8 r1, r2;
115 r1 = zip16_lo((int8x16) make_zero(), a);
116 r1 = shift_r<8>(r1);
117 r2 = zip16_hi((int8x16) make_zero(), a);
118 r2 = shift_r<8>(r2);
119 return combine(r1, r2);
120#elif SIMDPP_USE_NEON
121 int16x16 r;
122 r.vec(0) = vmovl_s8(vget_low_s8(a.native()));
123 r.vec(1) = vmovl_s8(vget_high_s8(a.native()));
124 return r;
125#elif SIMDPP_USE_MSA
126 int8x16 sign = shift_r<7>(a);
127 int16x8 lo, hi;
128 lo = zip16_lo(a, sign);
129 hi = zip16_hi(a, sign);
130 return combine(lo, hi);
131#elif SIMDPP_USE_ALTIVEC
132 int16x16 r;
133 r.vec(0) = vec_unpackh(a.vec(0).native());
134 r.vec(1) = vec_unpackl(a.vec(0).native());
135 return r;
136#endif
137}
138
139#if SIMDPP_USE_AVX2
140SIMDPP_INL int16<32> i_to_int16(const int8<32>& a)
141{
142#if SIMDPP_USE_AVX512BW
143 return _mm512_cvtepi8_epi16(a.native());
144#else
145 int16<16> r0, r1;
146 int8<16> a0, a1;
147 split(a, a0, a1);
148 r0 = _mm256_cvtepi8_epi16(a0.native());
149 r1 = _mm256_cvtepi8_epi16(a1.native());
150 return combine(r0, r1);
151#endif
152}
153#endif
154
155#if SIMDPP_USE_AVX512BW
156SIMDPP_INL int16<64> i_to_int16(const int8<64>& a)
157{
158 int16<32> r0, r1;
159 int8<32> a0, a1;
160 split(a, a0, a1);
161 r0 = _mm512_cvtepi8_epi16(a0.native());
162 r1 = _mm512_cvtepi8_epi16(a1.native());
163 return combine(r0, r1);
164}
165#endif
166
167template<unsigned N> SIMDPP_INL
168int16<N> i_to_int16(const int8<N>& a)
169{
170 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int16<N>, i_to_int16, a)
171}
172
173
174} // namespace insn
175} // namespace detail
176} // namespace SIMDPP_ARCH_NAMESPACE
177} // namespace simdpp
178
179#endif
180
181
182