1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/permute4.h>
17#include <simdpp/core/unzip_lo.h>
18#include <simdpp/core/zip_lo.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25
26SIMDPP_INL uint32<4> i_to_uint32(const uint64<4>& a)
27{
28#if SIMDPP_USE_NULL
29 uint32<4> r;
30 for (unsigned i = 0; i < 4; i++) {
31 r.el(i) = uint32_t(a.vec(i/2).el(i%2));
32 }
33 return r;
34#elif SIMDPP_USE_AVX512VL
35 return _mm256_cvtepi64_epi32(a.native());
36#elif SIMDPP_USE_AVX2
37 uint64<4> a64;
38 a64 = permute4<0,2,0,2>((uint32<8>) a);
39 a64 = permute4<0,2,0,2>(a64);
40 return _mm256_castsi256_si128(a64.native());
41#elif SIMDPP_USE_NEON64
42 uint32x2_t low = vmovn_u64(a.vec(0).native());
43 return vmovn_high_u64(low, a.vec(1).native());
44#elif SIMDPP_USE_NEON
45 uint32x2_t low = vmovn_u64(a.vec(0).native());
46 uint32x2_t high = vmovn_u64(a.vec(1).native());
47 return vcombine_u32(low, high);
48#elif SIMDPP_USE_VSX_207
49 return vec_pack(a.vec(0).native(), a.vec(1).native());
50#elif SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN
51 uint32<4> r1, r2;
52 r1 = a.vec(0);
53 r2 = a.vec(1);
54 return unzip4_hi(r1, r2);
55#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA || (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN)
56 uint32<4> r1, r2;
57 r1 = a.vec(0);
58 r2 = a.vec(1);
59 return unzip4_lo(r1, r2);
60#endif
61}
62
63#if SIMDPP_USE_AVX2
64SIMDPP_INL uint32<8> i_to_uint32(const uint64<8>& a)
65{
66#if SIMDPP_USE_AVX512F
67 return _mm512_cvtepi64_epi32(a.native());
68#else
69 uint64<4> b0, b1;
70 b0 = permute4<0,2,0,2>((uint32<8>) a.vec(0));
71 b1 = permute4<0,2,0,2>((uint32<8>) a.vec(1));
72 b0 = zip2_lo(b0, b1);
73 b0 = permute4<0,2,1,3>(b0);
74 return (uint32<8>) b0;
75#endif
76}
77#endif
78
79#if SIMDPP_USE_AVX512F
80SIMDPP_INL uint32<16> i_to_uint32(const uint64<16>& a)
81{
82 uint32<8> r1 = _mm512_cvtepi64_epi32(a.vec(0).native());
83 uint32<8> r2 = _mm512_cvtepi64_epi32(a.vec(1).native());
84 return combine(r1, r2);
85}
86#endif
87
88template<unsigned N> SIMDPP_INL
89uint32<N> i_to_uint32(const uint64<N>& a)
90{
91 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint32<N>, i_to_uint32, a)
92}
93
94
95} // namespace insn
96} // namespace detail
97} // namespace SIMDPP_ARCH_NAMESPACE
98} // namespace simdpp
99
100#endif
101
102
103