conv_any_to_float32.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_any_to_float32.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT32_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_ANY_TO_FLOAT32_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/combine.h>
17	#include <simdpp/core/zip_hi.h>
18	#include <simdpp/core/zip_lo.h>
19	#include <simdpp/core/insert.h>
20	#include <simdpp/core/f_mul.h>
21	#include <simdpp/core/make_shuffle_bytes_mask.h>
22	#include <simdpp/core/shuffle_bytes16.h>
23	#include <simdpp/core/detail/subvec_extract.h>
24	#include <simdpp/detail/mem_block.h>
25	#include <simdpp/detail/insn/conv_extend_to_int32.h>
26	#include <simdpp/detail/insn/conv_any_to_float64.h>
27
28	namespace simdpp {
29	namespace SIMDPP_ARCH_NAMESPACE {
30	namespace detail {
31	namespace insn {
32
33
34	static SIMDPP_INL
35	float32<`4`> i_to_float32(const float64<`4`>& a)
36	{
37	#if SIMDPP_USE_AVX
38	return _mm256_cvtpd_ps(a.native());
39	#elif SIMDPP_USE_SSE2
40	float32x4 r1, r2;
41	r1 = _mm_cvtpd_ps(a.vec(`0`).native());
42	r2 = _mm_cvtpd_ps(a.vec(`1`).native());
43	return _mm_movelh_ps(r1.native(), r2.native());
44	#elif SIMDPP_USE_NEON64
45	float32<`4`> r;
46	r = vcvt_high_f32_f64(vcvt_f32_f64(a.vec(`0`).native()),
47	a.vec(`1`).native());
48	return r;
49	#elif SIMDPP_USE_VSX_206
50	float32<`4`> lo, hi;
51	uint32<`4`> shuffle_mask;
52	lo = __builtin_vsx_xvcvdpsp(a.vec(`0`).native());
53	hi = __builtin_vsx_xvcvdpsp(a.vec(`1`).native());
54	shuffle_mask = make_shuffle_bytes16_mask<`0`,`2`,`4`,`6`>(shuffle_mask);
55	return shuffle_bytes16(lo, hi, shuffle_mask);
56	#elif SIMDPP_USE_MSA
57	return __msa_fexdo_w(a.vec(`0`).native(), a.vec(`1`).native());
58	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
59	detail::mem_block<float32x4> r;
60	r[`0`] = float(a.vec(`0`).el(`0`));
61	r[`1`] = float(a.vec(`0`).el(`1`));
62	r[`2`] = float(a.vec(`1`).el(`0`));
63	r[`3`] = float(a.vec(`1`).el(`1`));
64	return r;
65	#endif
66	}
67
68	#if SIMDPP_USE_AVX
69	static SIMDPP_INL
70	float32<`8`> i_to_float32(const float64<`8`>& a)
71	{
72	#if SIMDPP_USE_AVX512F
73	return _mm512_cvt_roundpd_ps(a.native(), (_MM_FROUND_TO_ZERO \|_MM_FROUND_NO_EXC));
74	#else
75	float32x4 r1, r2;
76	r1 = i_to_float32(a.vec(`0`));
77	r2 = i_to_float32(a.vec(`1`));
78	return combine(r1, r2);
79	#endif
80	}
81	#endif
82
83	#if SIMDPP_USE_AVX512F
84	static SIMDPP_INL
85	float32<`16`> i_to_float32(const float64<`16`>& a)
86	{
87	float32<`8`> r1, r2;
88	r1 = i_to_float32(a.vec(`0`));
89	r2 = i_to_float32(a.vec(`1`));
90	return combine(r1, r2);
91	}
92	#endif
93
94	template<unsigned N> SIMDPP_INL
95	float32<N> i_to_float32(const float64<N>& a)
96	{
97	SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(float32<N>, i_to_float32, a)
98	}
99
100	// -----------------------------------------------------------------------------
101
102	static SIMDPP_INL
103	float32<`4`> i_to_float32(const int64<`4`>& a)
104	{
105	#if SIMDPP_USE_NULL
106	float32<`4`> r;
107	for (unsigned i = `0`; i < a.length; i++) {
108	r.el(i) = float(a.vec(i/`2`).el(i%`2`));
109	}
110	return r;
111	#elif SIMDPP_USE_AVX512VL
112	return _mm256_cvtepi64_ps(a.native());
113	#else
114	return i_to_float32(i_to_float64(a));
115	#endif
116	}
117
118	#if SIMDPP_USE_AVX
119	static SIMDPP_INL
120	float32<`8`> i_to_float32(const int64<`8`>& a)
121	{
122	#if SIMDPP_USE_AVX512DQ
123	return _mm512_cvtepi64_ps(a.native());
124	#else
125	return SIMDPP_NOT_IMPLEMENTED1(a);
126	#endif
127	}
128	#endif
129
130	#if SIMDPP_USE_AVX512F
131	static SIMDPP_INL
132	float32<`16`> i_to_float32(const int64<`16`>& a)
133	{
134	#if SIMDPP_USE_AVX512DQ
135	float32<`8`> r0 = _mm512_cvtepi64_ps(a.vec(`0`).native());
136	float32<`8`> r1 = _mm512_cvtepi64_ps(a.vec(`1`).native());
137	return combine(r0, r1);
138	#else
139	return i_to_float32(i_to_float64(a));
140	#endif
141	}
142	#endif
143
144	template<unsigned N> SIMDPP_INL
145	float32<N> i_to_float32(const int64<N>& a)
146	{
147	return i_to_float32(i_to_float64(a));
148	}
149
150	// -----------------------------------------------------------------------------
151
152	static SIMDPP_INL
153	float32<`4`> i_to_float32(const uint64<`4`>& a)
154	{
155	#if SIMDPP_USE_NULL
156	float32<`4`> r;
157	for (unsigned i = `0`; i < a.length; i++) {
158	r.el(i) = float(a.vec(i/`2`).el(i%`2`));
159	}
160	return r;
161	#elif SIMDPP_USE_AVX512VL
162	return _mm256_cvtepu64_ps(a.native());
163	#else
164	return i_to_float32(i_to_float64(a));
165	#endif
166	}
167
168	#if SIMDPP_USE_AVX
169	static SIMDPP_INL
170	float32<`8`> i_to_float32(const uint64<`8`>& a)
171	{
172	#if SIMDPP_USE_AVX512DQ
173	return _mm512_cvtepu64_ps(a.native());
174	#else
175	return SIMDPP_NOT_IMPLEMENTED1(a);
176	#endif
177	}
178	#endif
179
180	#if SIMDPP_USE_AVX512F
181	static SIMDPP_INL
182	float32<`16`> i_to_float32(const uint64<`16`>& a)
183	{
184	#if SIMDPP_USE_AVX512DQ
185	float32<`8`> r0 = _mm512_cvtepu64_ps(a.vec(`0`).native());
186	float32<`8`> r1 = _mm512_cvtepu64_ps(a.vec(`1`).native());
187	return combine(r0, r1);
188	#else
189	return i_to_float32(i_to_float64(a));
190	#endif
191	}
192	#endif
193
194	template<unsigned N> SIMDPP_INL
195	float32<N> i_to_float32(const uint64<N>& a)
196	{
197	return i_to_float32(i_to_float64(a));
198	}
199
200	// -----------------------------------------------------------------------------
201
202	static SIMDPP_INL
203	float32<`4`> i_to_float32(const int32<`4`>& a)
204	{
205	#if SIMDPP_USE_NULL
206	float32<`4`> r;
207	for (unsigned i = `0`; i < a.length; i++) {
208	r.el(i) = float(a.el(i));
209	}
210	return r;
211	#elif SIMDPP_USE_SSE2
212	return _mm_cvtepi32_ps(a.native());
213	#elif SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP
214	detail::mem_block<int32<`4`>> mi(a);
215	float32<`4`> r;
216	r.el(`0`) = float(mi[`0`]);
217	r.el(`1`) = float(mi[`1`]);
218	r.el(`2`) = float(mi[`2`]);
219	r.el(`3`) = float(mi[`3`]);
220	return r;
221	#elif SIMDPP_USE_NEON_FLT_SP
222	return vcvtq_f32_s32(a.native());
223	#elif SIMDPP_USE_ALTIVEC
224	return vec_ctf(a.native(), `0`);
225	#elif SIMDPP_USE_MSA
226	return __msa_ffint_s_w(a.native());
227	#endif
228	}
229
230	#if SIMDPP_USE_AVX
231	static SIMDPP_INL
232	float32x8 i_to_float32(const int32x8& a)
233	{
234	#if SIMDPP_USE_AVX2
235	return _mm256_cvtepi32_ps(a.native());
236	#else
237	__m256i a1;
238	a1 = _mm256_castsi128_si256(a.vec(`0`).native());
239	a1 = _mm256_insertf128_si256(a1, a.vec(`1`).native(), `1`);
240	return _mm256_cvtepi32_ps(a1);
241	#endif
242	}
243	#endif
244
245	#if SIMDPP_USE_AVX512F
246	static SIMDPP_INL
247	float32<`16`> i_to_float32(const int32<`16`>& a)
248	{
249	return _mm512_cvtepi32_ps(a.native());
250	}
251	#endif
252
253	template<unsigned N> SIMDPP_INL
254	float32<N> i_to_float32(const int32<N>& a)
255	{
256	SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(float32<N>, i_to_float32, a)
257	}
258
259	// -----------------------------------------------------------------------------
260
261	static SIMDPP_INL
262	float32<`4`> i_to_float32(const uint32<`4`>& a)
263	{
264	#if SIMDPP_USE_NULL
265	float32<`4`> r;
266	for (unsigned i = `0`; i < a.length; i++) {
267	r.el(i) = float(a.el(i));
268	}
269	return r;
270	#elif SIMDPP_USE_SSE2
271	// true when a is in the range [0x80000000, 0xffffffff)
272	mask_float32<`4`> is_large = mask_float32<`4`>(cmp_lt(int32<`4`>(a), `0`));
273
274	float32<`4`> f_a = _mm_cvtepi32_ps(a.native());
275	// f_a has values in the range [0x80000000, 0xffffffff) wrapped around to
276	// negative values. Conditionally bias the result to fix that. Note, that
277	// the result is in sufficient precision even for large argument values.
278	// The result has lowest precision around 0x80000000, and the precision
279	// increases going towards 0xffffffff. The final result after bias will
280	// have lower precision in this whole range.
281	return add(f_a, bit_and(is_large, splat<float32<`4`>>(`0x100000000`)));
282	#elif SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP
283	detail::mem_block<uint32<`4`>> mi(a);
284	detail::mem_block<float32<`4`>> mf;
285	mf[`0`] = float(mi[`0`]);
286	mf[`1`] = float(mi[`1`]);
287	mf[`2`] = float(mi[`2`]);
288	mf[`3`] = float(mi[`3`]);
289	return mf;
290	#elif SIMDPP_USE_NEON_FLT_SP
291	return vcvtq_f32_u32(a.native());
292	#elif SIMDPP_USE_ALTIVEC
293	return vec_ctf(a.native(), `0`);
294	#elif SIMDPP_USE_MSA
295	return __msa_ffint_u_w(a.native());
296	#endif
297	}
298
299	#if SIMDPP_USE_AVX
300	static SIMDPP_INL
301	float32x8 i_to_float32(const uint32x8& a)
302	{
303	#if SIMDPP_USE_AVX512F
304	__m512i a512 = _mm512_castsi256_si512(a.native());
305	return _mm512_castps512_ps256(_mm512_cvtepu32_ps(a512));
306	#elif SIMDPP_USE_AVX2
307	// true when a is in the range [0x80000000, 0xffffffff)
308	mask_float32<`8`> is_large = mask_float32<`8`>(cmp_lt(int32<`8`>(a), `0`));
309
310	float32<`8`> f_a = _mm256_cvtepi32_ps(a.native());
311	return add(f_a, bit_and(is_large, splat<float32<`8`>>(`0x100000000`)));
312	#else
313	return combine(i_to_float32(a.vec(`0`)), i_to_float32(a.vec(`1`)));
314	#endif
315	}
316	#endif
317
318	#if SIMDPP_USE_AVX512F
319	static SIMDPP_INL
320	float32<`16`> i_to_float32(const uint32<`16`>& a)
321	{
322	return _mm512_cvtepu32_ps(a.native());
323	}
324	#endif
325
326	template<unsigned N> SIMDPP_INL
327	float32<N> i_to_float32(const uint32<N>& a)
328	{
329	SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(float32<N>, i_to_float32, a)
330	}
331
332	// -----------------------------------------------------------------------------
333
334	template<unsigned N> SIMDPP_INL
335	float32<N> i_to_float32(const uint16<N>& a)
336	{
337	return i_to_float32(i_to_uint32(a));
338	}
339
340	template<unsigned N> SIMDPP_INL
341	float32<N> i_to_float32(const int16<N>& a)
342	{
343	return i_to_float32(i_to_int32(a));
344	}
345
346	// -----------------------------------------------------------------------------
347
348	template<unsigned N> SIMDPP_INL
349	float32<N> i_to_float32(const uint8<N>& a)
350	{
351	return i_to_float32(i_to_uint32(a));
352	}
353
354	template<unsigned N> SIMDPP_INL
355	float32<N> i_to_float32(const int8<N>& a)
356	{
357	return i_to_float32(i_to_int32(a));
358	}
359
360	// -----------------------------------------------------------------------------
361
362	} // namespace insn
363	} // namespace detail
364	} // namespace SIMDPP_ARCH_NAMESPACE
365	} // namespace simdpp
366
367	#endif
368
369
370

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_any_to_float32.h