1 | /* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/conv_any_to_float64.h> |
17 | #include <simdpp/detail/vector_array_conv_macros.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | namespace detail { |
22 | namespace insn { |
23 | |
24 | |
25 | static SIMDPP_INL |
26 | uint64<2> i_to_uint64(const float64<2>& a) |
27 | { |
28 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
29 | return _mm_cvttpd_epu64(a.native()); |
30 | #elif SIMDPP_USE_AVX512DQ |
31 | __m512d a512 = _mm512_castpd128_pd512(a.native()); |
32 | return _mm512_castsi512_si128(_mm512_cvttpd_epu64(a512)); |
33 | #elif SIMDPP_USE_NEON64 |
34 | return vcvtq_u64_f64(a.native()); |
35 | #elif SIMDPP_USE_VSX_206 |
36 | uint32<4> r; |
37 | r = (__vector uint32_t) vec_ctu(a.native(), 0); |
38 | return (uint64<2>) r; |
39 | #elif SIMDPP_USE_VSX_207 |
40 | return vec_ctu(a.native(), 0); |
41 | #elif SIMDPP_USE_MSA |
42 | return __msa_ftrunc_u_d(a.native()); |
43 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
44 | detail::mem_block<uint64<2>> r; |
45 | r[0] = uint64_t(a.el(0)); |
46 | r[1] = uint64_t(a.el(1)); |
47 | return r; |
48 | #else |
49 | return SIMDPP_NOT_IMPLEMENTED1(a); |
50 | #endif |
51 | } |
52 | |
53 | #if SIMDPP_USE_AVX |
54 | static SIMDPP_INL |
55 | uint64<4> i_to_uint64(const float64<4>& a) |
56 | { |
57 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
58 | return _mm256_cvttpd_epu64(a.native()); |
59 | #elif SIMDPP_USE_AVX512DQ |
60 | __m512d a512 = _mm512_castpd256_pd512(a.native()); |
61 | return _mm512_castsi512_si256(_mm512_cvttpd_epu64(a512)); |
62 | #else |
63 | return SIMDPP_NOT_IMPLEMENTED1(a); |
64 | #endif |
65 | } |
66 | #endif |
67 | |
68 | #if SIMDPP_USE_AVX512F |
69 | static SIMDPP_INL |
70 | uint64<8> i_to_uint64(const float64<8>& a) |
71 | { |
72 | #if SIMDPP_USE_AVX512DQ |
73 | return _mm512_cvttpd_epu64(a.native()); |
74 | #else |
75 | return SIMDPP_NOT_IMPLEMENTED1(a); |
76 | #endif |
77 | } |
78 | #endif |
79 | |
80 | template<unsigned N> SIMDPP_INL |
81 | uint64<N> i_to_uint64(const float64<N>& a) |
82 | { |
83 | SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint64<N>, i_to_uint64, a) |
84 | } |
85 | |
86 | // ----------------------------------------------------------------------------- |
87 | |
88 | static SIMDPP_INL |
89 | int64<2> i_to_int64(const float64<2>& a) |
90 | { |
91 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
92 | return _mm_cvttpd_epi64(a.native()); |
93 | #elif SIMDPP_USE_AVX512DQ |
94 | __m512d a512 = _mm512_castpd128_pd512(a.native()); |
95 | return _mm512_castsi512_si128(_mm512_cvttpd_epi64(a512)); |
96 | #elif SIMDPP_USE_NEON64 |
97 | return vcvtq_s64_f64(a.native()); |
98 | #elif SIMDPP_USE_VSX_207 |
99 | return vec_cts(a.native(), 0); |
100 | #elif SIMDPP_USE_VSX_206 |
101 | int32<4> r; |
102 | r = (__vector int32_t) vec_cts(a.native(), 0); |
103 | return (int64<2>) r; |
104 | #elif SIMDPP_USE_MSA |
105 | return __msa_ftrunc_s_d(a.native()); |
106 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
107 | detail::mem_block<int64<2>> r; |
108 | r[0] = int64_t(a.el(0)); |
109 | r[1] = int64_t(a.el(1)); |
110 | return r; |
111 | #else |
112 | return SIMDPP_NOT_IMPLEMENTED1(a); |
113 | #endif |
114 | } |
115 | |
116 | #if SIMDPP_USE_AVX |
117 | static SIMDPP_INL |
118 | int64<4> i_to_int64(const float64<4>& a) |
119 | { |
120 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
121 | return _mm256_cvttpd_epi64(a.native()); |
122 | #elif SIMDPP_USE_AVX512DQ |
123 | __m512d a512 = _mm512_castpd256_pd512(a.native()); |
124 | return _mm512_castsi512_si256(_mm512_cvttpd_epi64(a512)); |
125 | #else |
126 | return SIMDPP_NOT_IMPLEMENTED1(a); |
127 | #endif |
128 | } |
129 | #endif |
130 | |
131 | #if SIMDPP_USE_AVX512F |
132 | static SIMDPP_INL |
133 | int64<8> i_to_int64(const float64<8>& a) |
134 | { |
135 | #if SIMDPP_USE_AVX512DQ |
136 | return _mm512_cvttpd_epi64(a.native()); |
137 | #else |
138 | return SIMDPP_NOT_IMPLEMENTED1(a); |
139 | #endif |
140 | } |
141 | #endif |
142 | |
143 | template<unsigned N> SIMDPP_INL |
144 | int64<N> i_to_int64(const float64<N>& a) |
145 | { |
146 | SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(int64<N>, i_to_int64, a) |
147 | } |
148 | |
149 | // ---------------------------------------------------------------------------- |
150 | |
151 | static SIMDPP_INL |
152 | uint64<4> i_to_uint64(const float32<4>& a) |
153 | { |
154 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
155 | return _mm256_cvttps_epu64(a.native()); |
156 | #elif SIMDPP_USE_AVX512DQ |
157 | __m256 a256 = _mm256_castps128_ps256(a.native()); |
158 | return _mm512_castsi512_si256(_mm512_cvttps_epu64(a256)); |
159 | #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA |
160 | return i_to_uint64(i_to_float64(a)); |
161 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
162 | detail::mem_block<uint64<4>> r; |
163 | detail::mem_block<float32<4>> mi(a); |
164 | r[0] = int64_t(mi[0]); |
165 | r[1] = int64_t(mi[1]); |
166 | r[2] = int64_t(mi[2]); |
167 | r[3] = int64_t(mi[3]); |
168 | return r; |
169 | #else |
170 | return SIMDPP_NOT_IMPLEMENTED1(a); |
171 | #endif |
172 | } |
173 | |
174 | #if SIMDPP_USE_AVX |
175 | static SIMDPP_INL |
176 | uint64<8> i_to_uint64(const float32<8>& a) |
177 | { |
178 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
179 | return _mm512_cvttps_epu64(a.native()); |
180 | #else |
181 | return SIMDPP_NOT_IMPLEMENTED1(a); |
182 | #endif |
183 | } |
184 | #endif |
185 | |
186 | #if SIMDPP_USE_AVX512F |
187 | static SIMDPP_INL |
188 | uint64<16> i_to_uint64(const float32<16>& a) |
189 | { |
190 | #if SIMDPP_USE_AVX512DQ |
191 | float32<8> a0, a1; |
192 | uint64<16> r; |
193 | split(a, a0, a1); |
194 | |
195 | r.vec(0) = _mm512_cvttps_epu64(a0.native()); |
196 | r.vec(1) = _mm512_cvttps_epu64(a1.native()); |
197 | |
198 | return r; |
199 | #else |
200 | return SIMDPP_NOT_IMPLEMENTED1(a); |
201 | #endif |
202 | } |
203 | #endif |
204 | |
205 | template<unsigned N> SIMDPP_INL |
206 | uint64<N> i_to_uint64(const float32<N>& a) |
207 | { |
208 | SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a) |
209 | } |
210 | |
211 | // ----------------------------------------------------------------------------- |
212 | |
213 | static SIMDPP_INL |
214 | int64<4> i_to_int64(const float32<4>& a) |
215 | { |
216 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
217 | return _mm256_cvttps_epi64(a.native()); |
218 | #elif SIMDPP_USE_AVX512DQ |
219 | __m256 a256 = _mm256_castps128_ps256(a.native()); |
220 | return _mm512_castsi512_si256(_mm512_cvttps_epi64(a256)); |
221 | #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA |
222 | return i_to_int64(i_to_float64(a)); |
223 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
224 | detail::mem_block<int64<4>> r; |
225 | detail::mem_block<float32<4>> mi(a); |
226 | r[0] = int64_t(mi[0]); |
227 | r[1] = int64_t(mi[1]); |
228 | r[2] = int64_t(mi[2]); |
229 | r[3] = int64_t(mi[3]); |
230 | return r; |
231 | #else |
232 | return SIMDPP_NOT_IMPLEMENTED1(a); |
233 | #endif |
234 | } |
235 | |
236 | #if SIMDPP_USE_AVX |
237 | static SIMDPP_INL |
238 | int64<8> i_to_int64(const float32<8>& a) |
239 | { |
240 | #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL |
241 | return _mm512_cvttps_epi64(a.native()); |
242 | #else |
243 | return SIMDPP_NOT_IMPLEMENTED1(a); |
244 | #endif |
245 | } |
246 | #endif |
247 | |
248 | #if SIMDPP_USE_AVX512F |
249 | static SIMDPP_INL |
250 | int64<16> i_to_int64(const float32<16>& a) |
251 | { |
252 | #if SIMDPP_USE_AVX512DQ |
253 | float32<8> a0, a1; |
254 | int64<16> r; |
255 | split(a, a0, a1); |
256 | |
257 | r.vec(0) = _mm512_cvttps_epi64(a0.native()); |
258 | r.vec(1) = _mm512_cvttps_epi64(a1.native()); |
259 | |
260 | return r; |
261 | #else |
262 | return SIMDPP_NOT_IMPLEMENTED1(a); |
263 | #endif |
264 | } |
265 | #endif |
266 | |
267 | template<unsigned N> SIMDPP_INL |
268 | int64<N> i_to_int64(const float32<N>& a) |
269 | { |
270 | SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a) |
271 | } |
272 | |
273 | } // namespace insn |
274 | } // namespace detail |
275 | } // namespace SIMDPP_ARCH_NAMESPACE |
276 | } // namespace simdpp |
277 | |
278 | #endif |
279 | |
280 | |
281 | |