conv_float_to_int32.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_float_to_int32.h]

1	/ Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT32_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT32_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/i_add.h>
17	#include <simdpp/core/f_sub.h>
18	#include <simdpp/core/cmp_ge.h>
19	#include <simdpp/detail/vector_array_conv_macros.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	// -----------------------------------------------------------------------------
27
28	static SIMDPP_INL
29	int32<`4`> i_to_int32(const float32<`4`>& a)
30	{
31	#if SIMDPP_USE_NULL
32	int32x4 r;
33	for (unsigned i = `0`; i < a.length; i++) {
34	r.el(i) = int32_t(a.el(i));
35	}
36	return r;
37	#elif SIMDPP_USE_SSE2
38	return _mm_cvttps_epi32(a.native());
39	#elif SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP
40	detail::mem_block<float32x4> mf(a);
41	detail::mem_block<int32x4> mi;
42	mi[`0`] = int(mf[`0`]);
43	mi[`1`] = int(mf[`1`]);
44	mi[`2`] = int(mf[`2`]);
45	mi[`3`] = int(mf[`3`]);
46	return mi;
47	#elif SIMDPP_USE_NEON_FLT_SP
48	return vcvtq_s32_f32(a.native());
49	#elif SIMDPP_USE_MSA
50	return __msa_ftrunc_s_w(a.native());
51	#elif SIMDPP_USE_ALTIVEC
52	return vec_cts(a.native(), `0`);
53	#endif
54	}
55
56	#if SIMDPP_USE_AVX
57	static SIMDPP_INL
58	int32<`8`> i_to_int32(const float32<`8`>& a)
59	{
60	#if SIMDPP_USE_AVX2
61	return _mm256_cvttps_epi32(a.native());
62	#else
63	__m256i r = _mm256_cvttps_epi32(a.native());
64	uint32<`4`> r1, r2;
65	r1 = _mm256_castsi256_si128(r);
66	r2 = _mm256_extractf128_si256(r, `1`);
67	return combine(r1, r2);
68	#endif
69	}
70	#endif
71
72	#if SIMDPP_USE_AVX512F
73	static SIMDPP_INL
74	int32<`16`> i_to_int32(const float32<`16`>& a)
75	{
76	return _mm512_cvttps_epi32(a.native());
77	}
78	#endif
79
80	template<unsigned N> SIMDPP_INL
81	int32<N> i_to_int32(const float32<N>& a)
82	{
83	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int32<N>, i_to_int32, a)
84	}
85
86	// -----------------------------------------------------------------------------
87
88	static SIMDPP_INL
89	uint32<`4`> i_to_uint32(const float32<`4`>& a)
90	{
91	#if SIMDPP_USE_NULL
92	uint32x4 r;
93	for (unsigned i = `0`; i < a.length; i++) {
94	r.el(i) = uint32_t(a.el(i));
95	}
96	return r;
97	#elif SIMDPP_USE_AVX512VL
98	return _mm_cvttps_epu32(a.native());
99	#elif SIMDPP_USE_AVX512F
100	__m512 a512 = _mm512_castps128_ps512(a.native());
101	return _mm512_castsi512_si128(_mm512_cvttps_epu32(a512));
102	#elif SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP
103	detail::mem_block<float32x4> mf(a);
104	detail::mem_block<uint32x4> mi;
105	mi[`0`] = unsigned(mf[`0`]);
106	mi[`1`] = unsigned(mf[`1`]);
107	mi[`2`] = unsigned(mf[`2`]);
108	mi[`3`] = unsigned(mf[`3`]);
109	return mi;
110	#elif SIMDPP_USE_NEON_FLT_SP
111	return vcvtq_u32_f32(a.native());
112	#elif SIMDPP_USE_MSA
113	return __msa_ftrunc_u_w(a.native());
114	#elif SIMDPP_USE_ALTIVEC
115	return vec_ctu(a.native(), `0`);
116	#else
117	// Smaller than 0x80000000 numbers can be represented as int32, so we can
118	// use i_to_int32 which is available as instruction on all supported
119	// architectures. Values >= 0x80000000 are biased into the range -0x80000000..0xffffffff.
120	// These conveniently convert through i_to_int32() to 0x80000000..0xffffffff. No further
121	// unbiasing is required. No attempt is made to produce a reliable overflow value for
122	// values outside the range 0 .. 0xffffffff.
123	mask_float32<`4`> is_large = cmp_ge(a, `0x80000000`);
124	return uint32<`4`>( i_to_int32(sub(a, bit_and(is_large, splat<float32<`4`>>(`0x100000000`)))) );
125	#endif
126	}
127
128	#if SIMDPP_USE_AVX
129	static SIMDPP_INL
130	uint32<`8`> i_to_uint32(const float32<`8`>& a)
131	{
132	#if SIMDPP_USE_AVX512VL
133	return _mm256_cvttps_epu32(a.native());
134	#elif SIMDPP_USE_AVX512F
135	__m512 a512 = _mm512_castps256_ps512(a.native());
136	return _mm512_castsi512_si256(_mm512_cvttps_epu32(a512));
137	#else
138	mask_float32<`8`> is_large = cmp_ge(a, `0x80000000`);
139	return uint32<`8`>( i_to_int32(sub(a, bit_and(is_large, splat<float32<`8`>>(`0x100000000`)))) );
140	#endif
141	}
142	#endif
143
144	#if SIMDPP_USE_AVX512F
145	static SIMDPP_INL
146	uint32<`16`> i_to_uint32(const float32<`16`>& a)
147	{
148	return _mm512_cvttps_epu32(a.native());
149	}
150	#endif
151
152	template<unsigned N> SIMDPP_INL
153	uint32<N> i_to_uint32(const float32<N>& a)
154	{
155	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint32<N>, i_to_uint32, a)
156	}
157
158	// -----------------------------------------------------------------------------
159
160	static SIMDPP_INL
161	uint32<`4`> i_to_uint32(const float64<`4`>& a)
162	{
163	#if SIMDPP_USE_AVX512VL
164	return _mm256_cvttpd_epu32(a.native());
165	#elif SIMDPP_USE_AVX512F
166	__m512d a512 = _mm512_castpd256_pd512(a.native());
167	return _mm256_castsi256_si128(_mm512_cvttpd_epu32(a512));
168	#elif SIMDPP_USE_NEON64
169	uint64x2_t r1, r2;
170	r1 = vcvtq_u64_f64(a.vec(`0`).native());
171	r2 = vcvtq_u64_f64(a.vec(`1`).native());
172	// FIXME: saturation
173	uint32<`4`> r = vcombine_u32(vqmovn_u64(r1), vqmovn_u64(r2));
174	return r;
175	#elif SIMDPP_USE_VSX_206
176	uint32<`4`> r, r1, r2;
177	r1 = (__vector uint32_t) vec_ctu(a.vec(`0`).native(), `0`);
178	r2 = (__vector uint32_t) vec_ctu(a.vec(`1`).native(), `0`);
179	r = unzip4_lo(r1, r2);
180	return r;
181	#elif SIMDPP_USE_MSA
182	uint64<`2`> r1, r2;
183	r1 = __msa_ftrunc_u_d(a.vec(`0`).native());
184	r2 = __msa_ftrunc_u_d(a.vec(`1`).native());
185	return unzip4_lo((uint32<`4`>)r1, (uint32<`4`>)r2);
186	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
187	detail::mem_block<uint32x4> r;
188	r[`0`] = uint32_t(a.vec(`0`).el(`0`));
189	r[`1`] = uint32_t(a.vec(`0`).el(`1`));
190	r[`2`] = uint32_t(a.vec(`1`).el(`0`));
191	r[`3`] = uint32_t(a.vec(`1`).el(`1`));
192	return r;
193	#else
194	return SIMDPP_NOT_IMPLEMENTED1(a);
195	#endif
196	}
197
198	#if SIMDPP_USE_AVX
199	static SIMDPP_INL
200	uint32<`8`> i_to_uint32(const float64<`8`>& a)
201	{
202	#if SIMDPP_USE_AVX512F
203	return _mm512_cvttpd_epu32(a.native());
204	#else
205	return SIMDPP_NOT_IMPLEMENTED1(a);
206	#endif
207	}
208	#endif
209
210	#if SIMDPP_USE_AVX512F
211	static SIMDPP_INL
212	uint32<`16`> i_to_uint32(const float64<`16`>& a)
213	{
214	uint32<`8`> r0, r1;
215	r0 = _mm512_cvttpd_epu32(a.vec(`0`).native());
216	r1 = _mm512_cvttpd_epu32(a.vec(`1`).native());
217	return combine(r0, r1);
218	}
219	#endif
220
221	template<unsigned N> SIMDPP_INL
222	uint32<N> i_to_uint32(const float64<N>& a)
223	{
224	SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint32<N>, i_to_uint32, a)
225	}
226
227	// -----------------------------------------------------------------------------
228
229	static SIMDPP_INL
230	int32x4 i_to_int32(const float64x4& a)
231	{
232	#if SIMDPP_USE_AVX512VL
233	return _mm256_cvttpd_epi32(a.native());
234	#elif SIMDPP_USE_SSE2
235	int32x4 r, r1, r2;
236	float64x2 a1, a2;
237	split(a, a1, a2);
238	r1 = _mm_cvttpd_epi32(a1.native());
239	r2 = _mm_cvttpd_epi32(a2.native());
240	r = zip2_lo(int64<`2`>(r1), int64<`2`>(r2));
241	return r;
242	#elif SIMDPP_USE_NEON64
243	int64x2_t r1, r2;
244	r1 = vcvtq_s64_f64(a.vec(`0`).native());
245	r2 = vcvtq_s64_f64(a.vec(`1`).native());
246	// FIXME: saturation
247	int32<`4`> r = vcombine_s32(vqmovn_s64(r1), vqmovn_s64(r2));
248	return r;
249	#elif SIMDPP_USE_VSX_206
250	int32<`4`> r, r1, r2;
251	r1 = (__vector int32_t) vec_cts(a.vec(`0`).native(), `0`);
252	r2 = (__vector int32_t) vec_cts(a.vec(`1`).native(), `0`);
253	r = unzip4_lo(r1, r2);
254	return r;
255	#elif SIMDPP_USE_MSA
256	int64<`2`> r1, r2;
257	r1 = __msa_ftrunc_s_d(a.vec(`0`).native());
258	r2 = __msa_ftrunc_s_d(a.vec(`1`).native());
259	return unzip4_lo((int32<`4`>)r1, (int32<`4`>)r2);
260	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
261	detail::mem_block<int32x4> r;
262	r[`0`] = int32_t(a.vec(`0`).el(`0`));
263	r[`1`] = int32_t(a.vec(`0`).el(`1`));
264	r[`2`] = int32_t(a.vec(`1`).el(`0`));
265	r[`3`] = int32_t(a.vec(`1`).el(`1`));
266	return r;
267	#else
268	return SIMDPP_NOT_IMPLEMENTED1(a);
269	#endif
270	}
271
272	#if SIMDPP_USE_AVX
273	static SIMDPP_INL
274	int32<`8`> i_to_int32(const float64<`8`>& a)
275	{
276	#if SIMDPP_USE_AVX512F
277	return _mm512_cvttpd_epi32(a.native());
278	#else
279	int32<`4`> r1, r2;
280	r1 = _mm256_cvttpd_epi32(a.vec(`0`).native());
281	r2 = _mm256_cvttpd_epi32(a.vec(`1`).native());
282	return combine(r1, r2);
283	#endif
284	}
285	#endif
286
287	#if SIMDPP_USE_AVX512F
288	static SIMDPP_INL
289	int32<`16`> i_to_int32(const float64<`16`>& a)
290	{
291	int32<`8`> r0, r1;
292	r0 = _mm512_cvttpd_epi32(a.vec(`0`).native());
293	r1 = _mm512_cvttpd_epi32(a.vec(`1`).native());
294	return combine(r0, r1);
295	}
296	#endif
297
298	template<unsigned N> SIMDPP_INL
299	int32<N> i_to_int32(const float64<N>& a)
300	{
301	SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(int32<N>, i_to_int32, a)
302	}
303
304
305	} // namespace insn
306	} // namespace detail
307	} // namespace SIMDPP_ARCH_NAMESPACE
308	} // namespace simdpp
309
310	#endif
311
312
313

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_float_to_int32.h