i_abs.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_abs.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_xor.h>
17	#include <simdpp/core/cmp_lt.h>
18	#include <simdpp/core/cmp_neq.h>
19	#include <simdpp/core/i_shift_r.h>
20	#include <simdpp/core/i_sub.h>
21	#include <simdpp/core/move_r.h>
22	#include <simdpp/detail/null/math.h>
23	#include <simdpp/detail/vector_array_macros.h>
24
25	namespace simdpp {
26	namespace SIMDPP_ARCH_NAMESPACE {
27	namespace detail {
28	namespace insn {
29
30	static SIMDPP_INL
31	uint8<`16`> i_iabs(const int8<`16`>& a)
32	{
33	#if SIMDPP_USE_NULL
34	return detail::null::abs(a);
35	#elif SIMDPP_USE_SSSE3
36	return _mm_abs_epi8(a.native());
37	#elif SIMDPP_USE_SSE2
38	int8<`16`> t, r;
39	t = cmp_lt(a, `0`);
40	r = bit_xor(a, t);
41	r = sub(r, t);
42	return r;
43	#elif SIMDPP_USE_NEON
44	return int8x16(vabsq_s8(a.native()));
45	#elif SIMDPP_USE_ALTIVEC
46	// expands to 3 instructions
47	return (__vector uint8_t) vec_abs(a.native());
48	#elif SIMDPP_USE_MSA
49	int8<`16`> zero = make_zero();
50	return (v16u8) __msa_add_a_b(a.native(), zero.native());
51	#endif
52	}
53
54	#if SIMDPP_USE_AVX2
55	static SIMDPP_INL
56	uint8<`32`> i_iabs(const int8<`32`>& a)
57	{
58	return _mm256_abs_epi8(a.native());
59	}
60	#endif
61
62	#if SIMDPP_USE_AVX512BW
63	static SIMDPP_INL
64	uint8<`64`> i_iabs(const int8<`64`>& a)
65	{
66	return _mm512_abs_epi8(a.native());
67	}
68	#endif
69
70	// -----------------------------------------------------------------------------
71
72	static SIMDPP_INL
73	uint16<`8`> i_iabs(const int16<`8`>& a)
74	{
75	#if SIMDPP_USE_NULL
76	return detail::null::abs(a);
77	#elif SIMDPP_USE_SSSE3
78	return _mm_abs_epi16(a.native());
79	#elif SIMDPP_USE_SSE2
80	int16<`8`> t, r;
81	t = cmp_lt(a, `0`);
82	r = bit_xor(a, t);
83	r = sub(r, t);
84	return r;
85	#elif SIMDPP_USE_NEON
86	return int16x8(vabsq_s16(a.native()));
87	#elif SIMDPP_USE_ALTIVEC
88	// expands to 3 instructions
89	return (__vector uint16_t) vec_abs(a.native());
90	#elif SIMDPP_USE_MSA
91	int16<`8`> zero = make_zero();
92	return (v8u16) __msa_add_a_h(a.native(), zero.native());
93	#endif
94	}
95
96	#if SIMDPP_USE_AVX2
97	static SIMDPP_INL
98	uint16<`16`> i_iabs(const int16<`16`>& a)
99	{
100	return _mm256_abs_epi16(a.native());
101	}
102	#endif
103
104	#if SIMDPP_USE_AVX512BW
105	static SIMDPP_INL
106	uint16<`32`> i_iabs(const int16<`32`>& a)
107	{
108	return _mm512_abs_epi16(a.native());
109	}
110	#endif
111
112	// -----------------------------------------------------------------------------
113
114	static SIMDPP_INL
115	uint32<`4`> i_iabs(const int32<`4`>& a)
116	{
117	#if SIMDPP_USE_NULL
118	return detail::null::abs(a);
119	#elif SIMDPP_USE_SSSE3
120	return _mm_abs_epi32(a.native());
121	#elif SIMDPP_USE_SSE2
122	int32<`4`> t, r;
123	t = cmp_lt(a, `0`);
124	r = bit_xor(a, t);
125	r = sub(r, t);
126	return r;
127	#elif SIMDPP_USE_NEON
128	return int32x4(vabsq_s32(a.native()));
129	#elif SIMDPP_USE_ALTIVEC
130	// expands to 3 instructions
131	return (__vector uint32_t) vec_abs(a.native());
132	#elif SIMDPP_USE_MSA
133	int32<`4`> zero = make_zero();
134	return (v4u32) __msa_add_a_w(a.native(), zero.native());
135	#endif
136	}
137
138	#if SIMDPP_USE_AVX2
139	static SIMDPP_INL
140	uint32<`8`> i_iabs(const int32<`8`>& a)
141	{
142	return _mm256_abs_epi32(a.native());
143	}
144	#endif
145
146	#if SIMDPP_USE_AVX512F
147	static SIMDPP_INL
148	uint32<`16`> i_iabs(const int32<`16`>& a)
149	{
150	return _mm512_abs_epi32(a.native());
151	}
152	#endif
153
154	// -----------------------------------------------------------------------------
155
156	static SIMDPP_INL
157	uint64<`2`> i_iabs(const int64<`2`>& a)
158	{
159	#if SIMDPP_USE_AVX512VL
160	return _mm_abs_epi64(a.native());
161	#elif SIMDPP_USE_SSE2
162	uint32x4 ta;
163	int64x2 t, r;
164	ta = (uint32x4) bit_and(a, `0x8000000000000000`);
165	ta = shift_r<`1`>(ta);
166	t = cmp_neq(float64x2 (ta), `0`);
167	r = bit_xor(a, t);
168	r = sub(r, t);
169	return r;
170	#elif SIMDPP_USE_NEON
171	int32x4 z;
172	int64<`2`> r;
173	z = shift_r<`63`>(uint64x2(a));
174	z = cmp_eq(z, `0`);
175	z = permute4<`0`,`0`,`2`,`2`>(z);
176	z = bit_not(z);
177	int64x2 t;
178	t = z;
179	r = bit_xor(a, t);
180	r = sub(r, t);
181	return r;
182	#elif SIMDPP_USE_VSX_207
183	// expands to 3 instructions
184	return (__vector uint64_t) vec_abs(a.native());
185	#elif SIMDPP_USE_MSA
186	int64<`2`> zero = make_zero();
187	return (v2u64) __msa_add_a_d(a.native(), zero.native());
188	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
189	return detail::null::abs(a);
190	#endif
191	}
192
193	#if SIMDPP_USE_AVX2
194	static SIMDPP_INL
195	uint64<`4`> i_iabs(const int64<`4`>& a)
196	{
197	#if SIMDPP_USE_AVX512VL
198	return _mm256_abs_epi64(a.native());
199	#else
200	int64x4 t, r;
201	int64x4 zero = make_zero();
202	t = _mm256_cmpgt_epi64(zero.native(), a.native());
203	r = bit_xor(a, t);
204	r = sub(r, t);
205	return r;
206	#endif
207	}
208	#endif
209
210	#if SIMDPP_USE_AVX512F
211	static SIMDPP_INL
212	uint64<`8`> i_iabs(const int64<`8`>& a)
213	{
214	return _mm512_abs_epi64(a.native());
215	}
216	#endif
217
218	// -----------------------------------------------------------------------------
219
220	template<class V> SIMDPP_INL
221	V i_iabs(const V& a)
222	{
223	SIMDPP_VEC_ARRAY_IMPL1(V, i_iabs, a)
224	}
225
226	} // namespace insn
227	} // namespace detail
228	} // namespace SIMDPP_ARCH_NAMESPACE
229	} // namespace simdpp
230
231	#endif
232
233

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_abs.h