1/* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/conv_any_to_float64.h>
17#include <simdpp/detail/vector_array_conv_macros.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21namespace detail {
22namespace insn {
23
24
25static SIMDPP_INL
26uint64<2> i_to_uint64(const float64<2>& a)
27{
28#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
29 return _mm_cvttpd_epu64(a.native());
30#elif SIMDPP_USE_AVX512DQ
31 __m512d a512 = _mm512_castpd128_pd512(a.native());
32 return _mm512_castsi512_si128(_mm512_cvttpd_epu64(a512));
33#elif SIMDPP_USE_NEON64
34 return vcvtq_u64_f64(a.native());
35#elif SIMDPP_USE_VSX_206
36 uint32<4> r;
37 r = (__vector uint32_t) vec_ctu(a.native(), 0);
38 return (uint64<2>) r;
39#elif SIMDPP_USE_VSX_207
40 return vec_ctu(a.native(), 0);
41#elif SIMDPP_USE_MSA
42 return __msa_ftrunc_u_d(a.native());
43#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
44 detail::mem_block<uint64<2>> r;
45 r[0] = uint64_t(a.el(0));
46 r[1] = uint64_t(a.el(1));
47 return r;
48#else
49 return SIMDPP_NOT_IMPLEMENTED1(a);
50#endif
51}
52
53#if SIMDPP_USE_AVX
54static SIMDPP_INL
55uint64<4> i_to_uint64(const float64<4>& a)
56{
57#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
58 return _mm256_cvttpd_epu64(a.native());
59#elif SIMDPP_USE_AVX512DQ
60 __m512d a512 = _mm512_castpd256_pd512(a.native());
61 return _mm512_castsi512_si256(_mm512_cvttpd_epu64(a512));
62#else
63 return SIMDPP_NOT_IMPLEMENTED1(a);
64#endif
65}
66#endif
67
68#if SIMDPP_USE_AVX512F
69static SIMDPP_INL
70uint64<8> i_to_uint64(const float64<8>& a)
71{
72#if SIMDPP_USE_AVX512DQ
73 return _mm512_cvttpd_epu64(a.native());
74#else
75 return SIMDPP_NOT_IMPLEMENTED1(a);
76#endif
77}
78#endif
79
80template<unsigned N> SIMDPP_INL
81uint64<N> i_to_uint64(const float64<N>& a)
82{
83 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint64<N>, i_to_uint64, a)
84}
85
86// -----------------------------------------------------------------------------
87
88static SIMDPP_INL
89int64<2> i_to_int64(const float64<2>& a)
90{
91#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
92 return _mm_cvttpd_epi64(a.native());
93#elif SIMDPP_USE_AVX512DQ
94 __m512d a512 = _mm512_castpd128_pd512(a.native());
95 return _mm512_castsi512_si128(_mm512_cvttpd_epi64(a512));
96#elif SIMDPP_USE_NEON64
97 return vcvtq_s64_f64(a.native());
98#elif SIMDPP_USE_VSX_207
99 return vec_cts(a.native(), 0);
100#elif SIMDPP_USE_VSX_206
101 int32<4> r;
102 r = (__vector int32_t) vec_cts(a.native(), 0);
103 return (int64<2>) r;
104#elif SIMDPP_USE_MSA
105 return __msa_ftrunc_s_d(a.native());
106#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
107 detail::mem_block<int64<2>> r;
108 r[0] = int64_t(a.el(0));
109 r[1] = int64_t(a.el(1));
110 return r;
111#else
112 return SIMDPP_NOT_IMPLEMENTED1(a);
113#endif
114}
115
116#if SIMDPP_USE_AVX
117static SIMDPP_INL
118int64<4> i_to_int64(const float64<4>& a)
119{
120#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
121 return _mm256_cvttpd_epi64(a.native());
122#elif SIMDPP_USE_AVX512DQ
123 __m512d a512 = _mm512_castpd256_pd512(a.native());
124 return _mm512_castsi512_si256(_mm512_cvttpd_epi64(a512));
125#else
126 return SIMDPP_NOT_IMPLEMENTED1(a);
127#endif
128}
129#endif
130
131#if SIMDPP_USE_AVX512F
132static SIMDPP_INL
133int64<8> i_to_int64(const float64<8>& a)
134{
135#if SIMDPP_USE_AVX512DQ
136 return _mm512_cvttpd_epi64(a.native());
137#else
138 return SIMDPP_NOT_IMPLEMENTED1(a);
139#endif
140}
141#endif
142
143template<unsigned N> SIMDPP_INL
144int64<N> i_to_int64(const float64<N>& a)
145{
146 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(int64<N>, i_to_int64, a)
147}
148
149// ----------------------------------------------------------------------------
150
151static SIMDPP_INL
152uint64<4> i_to_uint64(const float32<4>& a)
153{
154#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
155 return _mm256_cvttps_epu64(a.native());
156#elif SIMDPP_USE_AVX512DQ
157 __m256 a256 = _mm256_castps128_ps256(a.native());
158 return _mm512_castsi512_si256(_mm512_cvttps_epu64(a256));
159#elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
160 return i_to_uint64(i_to_float64(a));
161#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
162 detail::mem_block<uint64<4>> r;
163 detail::mem_block<float32<4>> mi(a);
164 r[0] = int64_t(mi[0]);
165 r[1] = int64_t(mi[1]);
166 r[2] = int64_t(mi[2]);
167 r[3] = int64_t(mi[3]);
168 return r;
169#else
170 return SIMDPP_NOT_IMPLEMENTED1(a);
171#endif
172}
173
174#if SIMDPP_USE_AVX
175static SIMDPP_INL
176uint64<8> i_to_uint64(const float32<8>& a)
177{
178#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
179 return _mm512_cvttps_epu64(a.native());
180#else
181 return SIMDPP_NOT_IMPLEMENTED1(a);
182#endif
183}
184#endif
185
186#if SIMDPP_USE_AVX512F
187static SIMDPP_INL
188uint64<16> i_to_uint64(const float32<16>& a)
189{
190#if SIMDPP_USE_AVX512DQ
191 float32<8> a0, a1;
192 uint64<16> r;
193 split(a, a0, a1);
194
195 r.vec(0) = _mm512_cvttps_epu64(a0.native());
196 r.vec(1) = _mm512_cvttps_epu64(a1.native());
197
198 return r;
199#else
200 return SIMDPP_NOT_IMPLEMENTED1(a);
201#endif
202}
203#endif
204
205template<unsigned N> SIMDPP_INL
206uint64<N> i_to_uint64(const float32<N>& a)
207{
208 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
209}
210
211// -----------------------------------------------------------------------------
212
213static SIMDPP_INL
214int64<4> i_to_int64(const float32<4>& a)
215{
216#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
217 return _mm256_cvttps_epi64(a.native());
218#elif SIMDPP_USE_AVX512DQ
219 __m256 a256 = _mm256_castps128_ps256(a.native());
220 return _mm512_castsi512_si256(_mm512_cvttps_epi64(a256));
221#elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
222 return i_to_int64(i_to_float64(a));
223#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
224 detail::mem_block<int64<4>> r;
225 detail::mem_block<float32<4>> mi(a);
226 r[0] = int64_t(mi[0]);
227 r[1] = int64_t(mi[1]);
228 r[2] = int64_t(mi[2]);
229 r[3] = int64_t(mi[3]);
230 return r;
231#else
232 return SIMDPP_NOT_IMPLEMENTED1(a);
233#endif
234}
235
236#if SIMDPP_USE_AVX
237static SIMDPP_INL
238int64<8> i_to_int64(const float32<8>& a)
239{
240#if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
241 return _mm512_cvttps_epi64(a.native());
242#else
243 return SIMDPP_NOT_IMPLEMENTED1(a);
244#endif
245}
246#endif
247
248#if SIMDPP_USE_AVX512F
249static SIMDPP_INL
250int64<16> i_to_int64(const float32<16>& a)
251{
252#if SIMDPP_USE_AVX512DQ
253 float32<8> a0, a1;
254 int64<16> r;
255 split(a, a0, a1);
256
257 r.vec(0) = _mm512_cvttps_epi64(a0.native());
258 r.vec(1) = _mm512_cvttps_epi64(a1.native());
259
260 return r;
261#else
262 return SIMDPP_NOT_IMPLEMENTED1(a);
263#endif
264}
265#endif
266
267template<unsigned N> SIMDPP_INL
268int64<N> i_to_int64(const float32<N>& a)
269{
270 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
271}
272
273} // namespace insn
274} // namespace detail
275} // namespace SIMDPP_ARCH_NAMESPACE
276} // namespace simdpp
277
278#endif
279
280
281