conv_any_to_float64.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_any_to_float64.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT64_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT64_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/combine.h>
17	#include <simdpp/core/f_add.h>
18	#include <simdpp/core/f_neg.h>
19	#include <simdpp/core/i_shift_r.h>
20	#include <simdpp/core/move_l.h>
21	#include <simdpp/core/zip_lo.h>
22	#include <simdpp/core/zip_hi.h>
23	#include <simdpp/core/detail/subvec_insert.h>
24	#include <simdpp/detail/mem_block.h>
25	#include <simdpp/detail/insn/conv_extend_to_int64.h>
26
27	namespace simdpp {
28	namespace SIMDPP_ARCH_NAMESPACE {
29	namespace detail {
30	namespace insn {
31
32
33	static SIMDPP_INL
34	float64x4 i_to_float64(const float32x4& a)
35	{
36	#if SIMDPP_USE_AVX
37	return _mm256_cvtps_pd(a.native());
38	#elif SIMDPP_USE_SSE2
39	float64x2 r1, r2;
40	r1 = _mm_cvtps_pd(a.native());
41	r2 = _mm_cvtps_pd(move4_l<`2`>(a).eval().native());
42	return combine(r1, r2);
43	#elif SIMDPP_USE_NEON64
44	float64<`2`> r1, r2;
45	r1 = vcvt_f64_f32(vget_low_f32(a.native()));
46	r2 = vcvt_high_f64_f32(a.native());
47	return combine(r1, r2);
48	#elif SIMDPP_USE_VSX_206
49	float32<`4`> lo, hi;
50	#if SIMDPP_BIG_ENDIAN
51	lo = zip4_lo(a, (float32<`4`>) make_zero());
52	hi = zip4_hi(a, (float32<`4`>) make_zero());
53	#else
54	lo = zip4_lo((float32<`4`>) make_zero(), a);
55	hi = zip4_hi((float32<`4`>) make_zero(), a);
56	#endif
57	float64<`2`> lo_f, hi_f;
58	lo_f = __builtin_vsx_xvcvspdp(lo.native());
59	hi_f = __builtin_vsx_xvcvspdp(hi.native());
60	return combine(lo_f, hi_f);
61	#elif SIMDPP_USE_MSA
62	float64<`2`> lo, hi;
63	lo = __msa_fexupr_d(a.native());
64	hi = __msa_fexupl_d(a.native());
65	return combine(lo, hi);
66	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
67	detail::mem_block<float32x4> ax(a);
68	float64x4 r;
69	r.vec(`0`).el(`0`) = double(ax[`0`]);
70	r.vec(`0`).el(`1`) = double(ax[`1`]);
71	r.vec(`1`).el(`0`) = double(ax[`2`]);
72	r.vec(`1`).el(`1`) = double(ax[`3`]);
73	return r;
74	#endif
75	}
76
77	#if SIMDPP_USE_AVX
78	static SIMDPP_INL
79	float64<`8`> i_to_float64(const float32x8& a)
80	{
81	#if SIMDPP_USE_AVX512F
82	return _mm512_cvtps_pd(a.native());
83	#else
84	float64x4 r1, r2;
85	float32x4 a1, a2;
86	split(a, a1, a2);
87	r1 = _mm256_cvtps_pd(a1.native());
88	r2 = _mm256_cvtps_pd(a2.native());
89	return combine(r1, r2);
90	#endif
91	}
92	#endif
93
94	#if SIMDPP_USE_AVX512F
95	static SIMDPP_INL
96	float64<`16`> i_to_float64(const float32<`16`>& a)
97	{
98	float64<`8`> r1, r2;
99	float32<`8`> a1, a2;
100	split(a, a1, a2);
101	r1 = _mm512_cvtps_pd(a1.native());
102	r2 = _mm512_cvtps_pd(a2.native());
103	return combine(r1, r2);
104	}
105	#endif
106
107	template<unsigned N> SIMDPP_INL
108	float64<N> i_to_float64(const float32<N>& a)
109	{
110	float64<N> r;
111	for (unsigned i = `0`; i < a.vec_length; ++i) {
112	detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
113	}
114	return r;
115	}
116
117	// -----------------------------------------------------------------------------
118
119	static SIMDPP_INL
120	float64x4 i_to_float64(const int32x4& a)
121	{
122	#if SIMDPP_USE_AVX
123	return _mm256_cvtepi32_pd(a.native());
124	#elif SIMDPP_USE_SSE2
125	float64x2 r1, r2;
126	r1 = _mm_cvtepi32_pd(a.native());
127	r2 = _mm_cvtepi32_pd(move4_l<`2`>(a).eval().native());
128	return combine(r1, r2);
129	#elif SIMDPP_USE_NEON64
130	float64<`2`> r1, r2;
131	r1 = vcvtq_f64_s64(vmovl_s32(vget_low_s32(a.native())));
132	r2 = vcvtq_f64_s64(vmovl_s32(vget_high_s32(a.native())));
133	return combine(r1, r2);
134	#elif SIMDPP_USE_VSX_206
135	#if SIMDPP_USE_VSX_207
136	int64<`4`> a64 = i_to_int64(a);
137	__vector int64_t b0 = a64.vec(`0`).native();
138	__vector int64_t b1 = a64.vec(`1`).native();
139	#else
140	int32<`4`> sign = shift_r<`31`>(a);
141	__vector int64_t b0 = (__vector int64_t) vec_mergeh(a.native(), sign.native());
142	__vector int64_t b1 = (__vector int64_t) vec_mergel(a.native(), sign.native());
143	#endif
144
145	float64<`4`> r;
146	r.vec(`0`) = vec_ctf(b0, `0`);
147	r.vec(`1`) = vec_ctf(b1, `0`);
148	return r;
149	#elif SIMDPP_USE_MSA
150	int64<`4`> a64 = i_to_int64(a);
151	float64<`4`> r;
152	r.vec(`0`) = __msa_ffint_s_d(a64.vec(`0`).native());
153	r.vec(`1`) = __msa_ffint_s_d(a64.vec(`1`).native());
154	return r;
155	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
156	detail::mem_block<int32x4> ax(a);
157	float64x4 r;
158	r.vec(`0`).el(`0`) = double(ax[`0`]);
159	r.vec(`0`).el(`1`) = double(ax[`1`]);
160	r.vec(`1`).el(`0`) = double(ax[`2`]);
161	r.vec(`1`).el(`1`) = double(ax[`3`]);
162	return r;
163	#endif
164	}
165
166	#if SIMDPP_USE_AVX2
167	static SIMDPP_INL
168	float64<`8`> i_to_float64(const int32x8& a)
169	{
170	#if SIMDPP_USE_AVX512F
171	return _mm512_cvtepi32_pd(a.native());
172	#else
173	float64x4 r1, r2;
174	int32x4 a1, a2;
175	split(a, a1, a2);
176	r1 = _mm256_cvtepi32_pd(a1.native());
177	r2 = _mm256_cvtepi32_pd(a2.native());
178	return combine(r1, r2);
179	#endif
180	}
181	#endif
182
183	#if SIMDPP_USE_AVX512F
184	static SIMDPP_INL
185	float64<`16`> i_to_float64(const int32<`16`>& a)
186	{
187	float64<`8`> r1, r2;
188	r1 = _mm512_cvtepi32_pd(_mm512_castsi512_si256(a.native()));
189	r2 = _mm512_cvtepi32_pd(_mm512_extracti64x4_epi64(a.native(), `1`));
190	return combine(r1, r2);
191	}
192	#endif
193
194	template<unsigned N> SIMDPP_INL
195	float64<N> i_to_float64(const int32<N>& a)
196	{
197	float64<N> r;
198	for (unsigned i = `0`; i < a.vec_length; ++i) {
199	detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
200	}
201	return r;
202	}
203
204	// -----------------------------------------------------------------------------
205
206	static SIMDPP_INL
207	float64<`4`> i_to_float64(const uint32<`4`>& a)
208	{
209	#if SIMDPP_USE_AVX512VL
210	return _mm256_cvtepu32_pd(a.native());
211	#elif SIMDPP_USE_SSE2
212	float64<`4`> f;
213	#if SIMDPP_USE_AVX
214	f = _mm256_cvtepi32_pd(a.native());
215	#else
216	f.vec(`0`) = _mm_cvtepi32_pd(a.native());
217	f.vec(`1`) = _mm_cvtepi32_pd(move4_l<`2`>(a).eval().native());
218	#endif
219	// if result is negative, we converted integer larger than 0x7fffffff
220	mask_float64<`4`> is_large = cmp_lt(f, `0`);
221	return blend(add(f, (double)`0x100000000`), f, is_large);
222	#elif SIMDPP_USE_NEON64
223	float64<`2`> r1, r2;
224	r1 = vcvtq_f64_u64(vmovl_u32(vget_low_u32(a.native())));
225	r2 = vcvtq_f64_u64(vmovl_u32(vget_high_u32(a.native())));
226	return combine(r1, r2);
227	#elif SIMDPP_USE_VSX_206
228	#if SIMDPP_USE_VSX_207
229	uint64<`4`> a64 = i_to_uint64(a);
230	__vector uint64_t b0 = a64.vec(`0`).native();
231	__vector uint64_t b1 = a64.vec(`1`).native();
232	#else
233	uint32<`4`> zero = make_zero();
234	__vector uint64_t b0 = (__vector uint64_t) vec_mergeh(a.native(), zero.native());
235	__vector uint64_t b1 = (__vector uint64_t) vec_mergel(a.native(), zero.native());
236	#endif
237
238	float64<`4`> r;
239	r.vec(`0`) = vec_ctf(b0, `0`);
240	r.vec(`1`) = vec_ctf(b1, `0`);
241	return r;
242	#elif SIMDPP_USE_MSA
243	uint64<`4`> a64 = i_to_uint64(a);
244	float64<`4`> r;
245	r.vec(`0`) = __msa_ffint_u_d(a64.vec(`0`).native());
246	r.vec(`1`) = __msa_ffint_u_d(a64.vec(`1`).native());
247	return r;
248	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
249	detail::mem_block<uint32<`4`>> ax(a);
250	float64<`4`> r;
251	r.vec(`0`).el(`0`) = double(ax[`0`]);
252	r.vec(`0`).el(`1`) = double(ax[`1`]);
253	r.vec(`1`).el(`0`) = double(ax[`2`]);
254	r.vec(`1`).el(`1`) = double(ax[`3`]);
255	return r;
256	#endif
257	}
258
259	#if SIMDPP_USE_AVX2
260	static SIMDPP_INL
261	float64<`8`> i_to_float64(const uint32<`8`>& a)
262	{
263	#if SIMDPP_USE_AVX512F
264	return _mm512_cvtepu32_pd(a.native());
265	#else
266	uint32<`4`> a0, a1;
267	float64<`8`> f;
268	split(a, a0, a1);
269
270	f.vec(`0`) = _mm256_cvtepi32_pd(a0.native());
271	f.vec(`1`) = _mm256_cvtepi32_pd(a1.native());
272
273	// if result is negative, we converted integer larger than 0x7fffffff
274	mask_float64<`8`> is_large = cmp_lt(f, `0`);
275	return blend(add(f, (double)`0x100000000`), f, is_large);
276	#endif
277	}
278	#endif
279
280	#if SIMDPP_USE_AVX512F
281	static SIMDPP_INL
282	float64<`16`> i_to_float64(const uint32<`16`>& a)
283	{
284	float64<`16`> r;
285	uint32<`8`> a0, a1;
286	split(a, a0, a1);
287
288	r.vec(`0`) = _mm512_cvtepu32_pd(a0.native());
289	r.vec(`1`) = _mm512_cvtepu32_pd(a1.native());
290	return r;
291	}
292	#endif
293
294	template<unsigned N> SIMDPP_INL
295	float64<N> i_to_float64(const uint32<N>& a)
296	{
297	float64<N> r;
298	for (unsigned i = `0`; i < a.vec_length; ++i) {
299	detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
300	}
301	return r;
302	}
303
304	// -----------------------------------------------------------------------------
305
306	template<unsigned N> SIMDPP_INL
307	float64<N> i_to_float64(const uint16<N>& a)
308	{
309	return i_to_float64(i_to_uint32(a));
310	}
311
312	template<unsigned N> SIMDPP_INL
313	float64<N> i_to_float64(const int16<N>& a)
314	{
315	return i_to_float64(i_to_int32(a));
316	}
317
318	// -----------------------------------------------------------------------------
319
320	template<unsigned N> SIMDPP_INL
321	float64<N> i_to_float64(const uint8<N>& a)
322	{
323	return i_to_float64(i_to_uint32(a));
324	}
325
326	template<unsigned N> SIMDPP_INL
327	float64<N> i_to_float64(const int8<N>& a)
328	{
329	return i_to_float64(i_to_int32(a));
330	}
331
332	// -----------------------------------------------------------------------------
333
334	static SIMDPP_INL
335	float64<`2`> i_to_float64(const int64<`2`>& a)
336	{
337	#if SIMDPP_USE_AVX512VL
338	return _mm_cvtepi64_pd(a.native());
339	#elif SIMDPP_USE_NEON64
340	return vcvtq_f64_s64(a.native());
341	#elif SIMDPP_USE_VSX_207
342	return vec_ctf(a.native(), `0`);
343	#elif SIMDPP_USE_VSX_206
344	int32<`4`> a32; a32 = a; // a stores 64-bit values in GPR
345	return vec_ctf((__vector int64_t)a32.native(), `0`);
346	#elif SIMDPP_USE_MSA
347	return __msa_ffint_s_d(a.native());
348	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
349	detail::mem_block<int64<`2`>> ax(a);
350	float64<`2`> r;
351	r.el(`0`) = double(ax[`0`]);
352	r.el(`1`) = double(ax[`1`]);
353	return r;
354	#else
355	return SIMDPP_NOT_IMPLEMENTED1(a);
356	#endif
357	}
358
359	#if SIMDPP_USE_AVX
360	static SIMDPP_INL
361	float64<`4`> i_to_float64(const int64<`4`>& a)
362	{
363	#if SIMDPP_USE_AVX512VL
364	return _mm256_cvtepi64_pd(a.native());
365	#else
366	return SIMDPP_NOT_IMPLEMENTED1(a);
367	#endif
368	}
369	#endif
370
371	#if SIMDPP_USE_AVX512F
372	static SIMDPP_INL
373	float64<`8`> i_to_float64(const int64<`8`>& a)
374	{
375	return _mm512_cvtepi64_pd(a.native());
376	}
377	#endif
378
379	template<unsigned N> SIMDPP_INL
380	float64<N> i_to_float64(const int64<N>& a)
381	{
382	float64<N> r;
383	for (unsigned i = `0`; i < a.vec_length; ++i) {
384	detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
385	}
386	return r;
387	}
388
389	// -----------------------------------------------------------------------------
390
391	static SIMDPP_INL
392	float64<`2`> i_to_float64(const uint64<`2`>& a)
393	{
394	#if SIMDPP_USE_AVX512VL
395	return _mm_cvtepu64_pd(a.native());
396	#elif SIMDPP_USE_NEON64
397	return vcvtq_f64_u64(a.native());
398	#elif SIMDPP_USE_VSX_207
399	return vec_ctf(a.native(), `0`);
400	#elif SIMDPP_USE_VSX_206
401	uint32<`4`> a32; a32 = a; // a stores 64-bit values in GPR
402	return vec_ctf((__vector uint64_t)a32.native(), `0`);
403	#elif SIMDPP_USE_MSA
404	return __msa_ffint_u_d(a.native());
405	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
406	detail::mem_block<uint64<`2`>> ax(a);
407	float64<`2`> r;
408	r.el(`0`) = double(ax[`0`]);
409	r.el(`1`) = double(ax[`1`]);
410	return r;
411	#else
412	return SIMDPP_NOT_IMPLEMENTED1(a);
413	#endif
414	}
415
416	#if SIMDPP_USE_AVX
417	static SIMDPP_INL
418	float64<`4`> i_to_float64(const uint64<`4`>& a)
419	{
420	#if SIMDPP_USE_AVX512VL
421	return _mm256_cvtepu64_pd(a.native());
422	#else
423	return SIMDPP_NOT_IMPLEMENTED1(a);
424	#endif
425	}
426	#endif
427
428	#if SIMDPP_USE_AVX512F
429	static SIMDPP_INL
430	float64<`8`> i_to_float64(const uint64<`8`>& a)
431	{
432	return _mm512_cvtepu64_pd(a.native());
433	}
434	#endif
435
436	template<unsigned N> SIMDPP_INL
437	float64<N> i_to_float64(const uint64<N>& a)
438	{
439	float64<N> r;
440	for (unsigned i = `0`; i < a.vec_length; ++i) {
441	detail::subvec_insert(r, i_to_float64(a.vec(i)), i);
442	}
443	return r;
444	}
445
446	// -----------------------------------------------------------------------------
447
448	} // namespace insn
449	} // namespace detail
450	} // namespace SIMDPP_ARCH_NAMESPACE
451	} // namespace simdpp
452
453	#endif
454
455
456

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_any_to_float64.h