1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT64_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT64_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/combine.h>
17#include <simdpp/core/f_add.h>
18#include <simdpp/core/f_neg.h>
19#include <simdpp/core/i_shift_r.h>
20#include <simdpp/core/move_l.h>
21#include <simdpp/core/zip_lo.h>
22#include <simdpp/core/zip_hi.h>
23#include <simdpp/core/detail/subvec_insert.h>
24#include <simdpp/detail/mem_block.h>
25#include <simdpp/detail/insn/conv_extend_to_int64.h>
26
27namespace simdpp {
28namespace SIMDPP_ARCH_NAMESPACE {
29namespace detail {
30namespace insn {
31
32
33static SIMDPP_INL
34float64x4 i_to_float64(const float32x4& a)
35{
36#if SIMDPP_USE_AVX
37 return _mm256_cvtps_pd(a.native());
38#elif SIMDPP_USE_SSE2
39 float64x2 r1, r2;
40 r1 = _mm_cvtps_pd(a.native());
41 r2 = _mm_cvtps_pd(move4_l<2>(a).eval().native());
42 return combine(r1, r2);
43#elif SIMDPP_USE_NEON64
44 float64<2> r1, r2;
45 r1 = vcvt_f64_f32(vget_low_f32(a.native()));
46 r2 = vcvt_high_f64_f32(a.native());
47 return combine(r1, r2);
48#elif SIMDPP_USE_VSX_206
49 float32<4> lo, hi;
50#if SIMDPP_BIG_ENDIAN
51 lo = zip4_lo(a, (float32<4>) make_zero());
52 hi = zip4_hi(a, (float32<4>) make_zero());
53#else
54 lo = zip4_lo((float32<4>) make_zero(), a);
55 hi = zip4_hi((float32<4>) make_zero(), a);
56#endif
57 float64<2> lo_f, hi_f;
58 lo_f = __builtin_vsx_xvcvspdp(lo.native());
59 hi_f = __builtin_vsx_xvcvspdp(hi.native());
60 return combine(lo_f, hi_f);
61#elif SIMDPP_USE_MSA
62 float64<2> lo, hi;
63 lo = __msa_fexupr_d(a.native());
64 hi = __msa_fexupl_d(a.native());
65 return combine(lo, hi);
66#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
67 detail::mem_block<float32x4> ax(a);
68 float64x4 r;
69 r.vec(0).el(0) = double(ax[0]);
70 r.vec(0).el(1) = double(ax[1]);
71 r.vec(1).el(0) = double(ax[2]);
72 r.vec(1).el(1) = double(ax[3]);
73 return r;
74#endif
75}
76
77#if SIMDPP_USE_AVX
78static SIMDPP_INL
79float64<8> i_to_float64(const float32x8& a)
80{
81#if SIMDPP_USE_AVX512F
82 return _mm512_cvtps_pd(a.native());
83#else
84 float64x4 r1, r2;
85 float32x4 a1, a2;
86 split(a, a1, a2);
87 r1 = _mm256_cvtps_pd(a1.native());
88 r2 = _mm256_cvtps_pd(a2.native());
89 return combine(r1, r2);
90#endif
91}
92#endif
93
94#if SIMDPP_USE_AVX512F
95static SIMDPP_INL
96float64<16> i_to_float64(const float32<16>& a)
97{
98 float64<8> r1, r2;
99 float32<8> a1, a2;
100 split(a, a1, a2);
101 r1 = _mm512_cvtps_pd(a1.native());
102 r2 = _mm512_cvtps_pd(a2.native());
103 return combine(r1, r2);
104}
105#endif
106
107template<unsigned N> SIMDPP_INL
108float64<N> i_to_float64(const float32<N>& a)
109{
110 float64<N> r;
111 for (unsigned i = 0; i < a.vec_length; ++i) {
112 detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
113 }
114 return r;
115}
116
117// -----------------------------------------------------------------------------
118
119static SIMDPP_INL
120float64x4 i_to_float64(const int32x4& a)
121{
122#if SIMDPP_USE_AVX
123 return _mm256_cvtepi32_pd(a.native());
124#elif SIMDPP_USE_SSE2
125 float64x2 r1, r2;
126 r1 = _mm_cvtepi32_pd(a.native());
127 r2 = _mm_cvtepi32_pd(move4_l<2>(a).eval().native());
128 return combine(r1, r2);
129#elif SIMDPP_USE_NEON64
130 float64<2> r1, r2;
131 r1 = vcvtq_f64_s64(vmovl_s32(vget_low_s32(a.native())));
132 r2 = vcvtq_f64_s64(vmovl_s32(vget_high_s32(a.native())));
133 return combine(r1, r2);
134#elif SIMDPP_USE_VSX_206
135#if SIMDPP_USE_VSX_207
136 int64<4> a64 = i_to_int64(a);
137 __vector int64_t b0 = a64.vec(0).native();
138 __vector int64_t b1 = a64.vec(1).native();
139#else
140 int32<4> sign = shift_r<31>(a);
141 __vector int64_t b0 = (__vector int64_t) vec_mergeh(a.native(), sign.native());
142 __vector int64_t b1 = (__vector int64_t) vec_mergel(a.native(), sign.native());
143#endif
144
145 float64<4> r;
146 r.vec(0) = vec_ctf(b0, 0);
147 r.vec(1) = vec_ctf(b1, 0);
148 return r;
149#elif SIMDPP_USE_MSA
150 int64<4> a64 = i_to_int64(a);
151 float64<4> r;
152 r.vec(0) = __msa_ffint_s_d(a64.vec(0).native());
153 r.vec(1) = __msa_ffint_s_d(a64.vec(1).native());
154 return r;
155#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
156 detail::mem_block<int32x4> ax(a);
157 float64x4 r;
158 r.vec(0).el(0) = double(ax[0]);
159 r.vec(0).el(1) = double(ax[1]);
160 r.vec(1).el(0) = double(ax[2]);
161 r.vec(1).el(1) = double(ax[3]);
162 return r;
163#endif
164}
165
166#if SIMDPP_USE_AVX2
167static SIMDPP_INL
168float64<8> i_to_float64(const int32x8& a)
169{
170#if SIMDPP_USE_AVX512F
171 return _mm512_cvtepi32_pd(a.native());
172#else
173 float64x4 r1, r2;
174 int32x4 a1, a2;
175 split(a, a1, a2);
176 r1 = _mm256_cvtepi32_pd(a1.native());
177 r2 = _mm256_cvtepi32_pd(a2.native());
178 return combine(r1, r2);
179#endif
180}
181#endif
182
183#if SIMDPP_USE_AVX512F
184static SIMDPP_INL
185float64<16> i_to_float64(const int32<16>& a)
186{
187 float64<8> r1, r2;
188 r1 = _mm512_cvtepi32_pd(_mm512_castsi512_si256(a.native()));
189 r2 = _mm512_cvtepi32_pd(_mm512_extracti64x4_epi64(a.native(), 1));
190 return combine(r1, r2);
191}
192#endif
193
194template<unsigned N> SIMDPP_INL
195float64<N> i_to_float64(const int32<N>& a)
196{
197 float64<N> r;
198 for (unsigned i = 0; i < a.vec_length; ++i) {
199 detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
200 }
201 return r;
202}
203
204// -----------------------------------------------------------------------------
205
206static SIMDPP_INL
207float64<4> i_to_float64(const uint32<4>& a)
208{
209#if SIMDPP_USE_AVX512VL
210 return _mm256_cvtepu32_pd(a.native());
211#elif SIMDPP_USE_SSE2
212 float64<4> f;
213#if SIMDPP_USE_AVX
214 f = _mm256_cvtepi32_pd(a.native());
215#else
216 f.vec(0) = _mm_cvtepi32_pd(a.native());
217 f.vec(1) = _mm_cvtepi32_pd(move4_l<2>(a).eval().native());
218#endif
219 // if result is negative, we converted integer larger than 0x7fffffff
220 mask_float64<4> is_large = cmp_lt(f, 0);
221 return blend(add(f, (double)0x100000000), f, is_large);
222#elif SIMDPP_USE_NEON64
223 float64<2> r1, r2;
224 r1 = vcvtq_f64_u64(vmovl_u32(vget_low_u32(a.native())));
225 r2 = vcvtq_f64_u64(vmovl_u32(vget_high_u32(a.native())));
226 return combine(r1, r2);
227#elif SIMDPP_USE_VSX_206
228#if SIMDPP_USE_VSX_207
229 uint64<4> a64 = i_to_uint64(a);
230 __vector uint64_t b0 = a64.vec(0).native();
231 __vector uint64_t b1 = a64.vec(1).native();
232#else
233 uint32<4> zero = make_zero();
234 __vector uint64_t b0 = (__vector uint64_t) vec_mergeh(a.native(), zero.native());
235 __vector uint64_t b1 = (__vector uint64_t) vec_mergel(a.native(), zero.native());
236#endif
237
238 float64<4> r;
239 r.vec(0) = vec_ctf(b0, 0);
240 r.vec(1) = vec_ctf(b1, 0);
241 return r;
242#elif SIMDPP_USE_MSA
243 uint64<4> a64 = i_to_uint64(a);
244 float64<4> r;
245 r.vec(0) = __msa_ffint_u_d(a64.vec(0).native());
246 r.vec(1) = __msa_ffint_u_d(a64.vec(1).native());
247 return r;
248#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
249 detail::mem_block<uint32<4>> ax(a);
250 float64<4> r;
251 r.vec(0).el(0) = double(ax[0]);
252 r.vec(0).el(1) = double(ax[1]);
253 r.vec(1).el(0) = double(ax[2]);
254 r.vec(1).el(1) = double(ax[3]);
255 return r;
256#endif
257}
258
259#if SIMDPP_USE_AVX2
260static SIMDPP_INL
261float64<8> i_to_float64(const uint32<8>& a)
262{
263#if SIMDPP_USE_AVX512F
264 return _mm512_cvtepu32_pd(a.native());
265#else
266 uint32<4> a0, a1;
267 float64<8> f;
268 split(a, a0, a1);
269
270 f.vec(0) = _mm256_cvtepi32_pd(a0.native());
271 f.vec(1) = _mm256_cvtepi32_pd(a1.native());
272
273 // if result is negative, we converted integer larger than 0x7fffffff
274 mask_float64<8> is_large = cmp_lt(f, 0);
275 return blend(add(f, (double)0x100000000), f, is_large);
276#endif
277}
278#endif
279
280#if SIMDPP_USE_AVX512F
281static SIMDPP_INL
282float64<16> i_to_float64(const uint32<16>& a)
283{
284 float64<16> r;
285 uint32<8> a0, a1;
286 split(a, a0, a1);
287
288 r.vec(0) = _mm512_cvtepu32_pd(a0.native());
289 r.vec(1) = _mm512_cvtepu32_pd(a1.native());
290 return r;
291}
292#endif
293
294template<unsigned N> SIMDPP_INL
295float64<N> i_to_float64(const uint32<N>& a)
296{
297 float64<N> r;
298 for (unsigned i = 0; i < a.vec_length; ++i) {
299 detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
300 }
301 return r;
302}
303
304// -----------------------------------------------------------------------------
305
306template<unsigned N> SIMDPP_INL
307float64<N> i_to_float64(const uint16<N>& a)
308{
309 return i_to_float64(i_to_uint32(a));
310}
311
312template<unsigned N> SIMDPP_INL
313float64<N> i_to_float64(const int16<N>& a)
314{
315 return i_to_float64(i_to_int32(a));
316}
317
318// -----------------------------------------------------------------------------
319
320template<unsigned N> SIMDPP_INL
321float64<N> i_to_float64(const uint8<N>& a)
322{
323 return i_to_float64(i_to_uint32(a));
324}
325
326template<unsigned N> SIMDPP_INL
327float64<N> i_to_float64(const int8<N>& a)
328{
329 return i_to_float64(i_to_int32(a));
330}
331
332// -----------------------------------------------------------------------------
333
334static SIMDPP_INL
335float64<2> i_to_float64(const int64<2>& a)
336{
337#if SIMDPP_USE_AVX512VL
338 return _mm_cvtepi64_pd(a.native());
339#elif SIMDPP_USE_NEON64
340 return vcvtq_f64_s64(a.native());
341#elif SIMDPP_USE_VSX_207
342 return vec_ctf(a.native(), 0);
343#elif SIMDPP_USE_VSX_206
344 int32<4> a32; a32 = a; // a stores 64-bit values in GPR
345 return vec_ctf((__vector int64_t)a32.native(), 0);
346#elif SIMDPP_USE_MSA
347 return __msa_ffint_s_d(a.native());
348#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
349 detail::mem_block<int64<2>> ax(a);
350 float64<2> r;
351 r.el(0) = double(ax[0]);
352 r.el(1) = double(ax[1]);
353 return r;
354#else
355 return SIMDPP_NOT_IMPLEMENTED1(a);
356#endif
357}
358
359#if SIMDPP_USE_AVX
360static SIMDPP_INL
361float64<4> i_to_float64(const int64<4>& a)
362{
363#if SIMDPP_USE_AVX512VL
364 return _mm256_cvtepi64_pd(a.native());
365#else
366 return SIMDPP_NOT_IMPLEMENTED1(a);
367#endif
368}
369#endif
370
371#if SIMDPP_USE_AVX512F
372static SIMDPP_INL
373float64<8> i_to_float64(const int64<8>& a)
374{
375 return _mm512_cvtepi64_pd(a.native());
376}
377#endif
378
379template<unsigned N> SIMDPP_INL
380float64<N> i_to_float64(const int64<N>& a)
381{
382 float64<N> r;
383 for (unsigned i = 0; i < a.vec_length; ++i) {
384 detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
385 }
386 return r;
387}
388
389// -----------------------------------------------------------------------------
390
391static SIMDPP_INL
392float64<2> i_to_float64(const uint64<2>& a)
393{
394#if SIMDPP_USE_AVX512VL
395 return _mm_cvtepu64_pd(a.native());
396#elif SIMDPP_USE_NEON64
397 return vcvtq_f64_u64(a.native());
398#elif SIMDPP_USE_VSX_207
399 return vec_ctf(a.native(), 0);
400#elif SIMDPP_USE_VSX_206
401 uint32<4> a32; a32 = a; // a stores 64-bit values in GPR
402 return vec_ctf((__vector uint64_t)a32.native(), 0);
403#elif SIMDPP_USE_MSA
404 return __msa_ffint_u_d(a.native());
405#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
406 detail::mem_block<uint64<2>> ax(a);
407 float64<2> r;
408 r.el(0) = double(ax[0]);
409 r.el(1) = double(ax[1]);
410 return r;
411#else
412 return SIMDPP_NOT_IMPLEMENTED1(a);
413#endif
414}
415
416#if SIMDPP_USE_AVX
417static SIMDPP_INL
418float64<4> i_to_float64(const uint64<4>& a)
419{
420#if SIMDPP_USE_AVX512VL
421 return _mm256_cvtepu64_pd(a.native());
422#else
423 return SIMDPP_NOT_IMPLEMENTED1(a);
424#endif
425}
426#endif
427
428#if SIMDPP_USE_AVX512F
429static SIMDPP_INL
430float64<8> i_to_float64(const uint64<8>& a)
431{
432 return _mm512_cvtepu64_pd(a.native());
433}
434#endif
435
436template<unsigned N> SIMDPP_INL
437float64<N> i_to_float64(const uint64<N>& a)
438{
439 float64<N> r;
440 for (unsigned i = 0; i < a.vec_length; ++i) {
441 detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
442 }
443 return r;
444}
445
446// -----------------------------------------------------------------------------
447
448} // namespace insn
449} // namespace detail
450} // namespace SIMDPP_ARCH_NAMESPACE
451} // namespace simdpp
452
453#endif
454
455
456