1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/bit_xor.h>
17#include <simdpp/core/cmp_lt.h>
18#include <simdpp/core/cmp_neq.h>
19#include <simdpp/core/i_shift_r.h>
20#include <simdpp/core/i_sub.h>
21#include <simdpp/core/move_r.h>
22#include <simdpp/detail/null/math.h>
23#include <simdpp/detail/vector_array_macros.h>
24
25namespace simdpp {
26namespace SIMDPP_ARCH_NAMESPACE {
27namespace detail {
28namespace insn {
29
30static SIMDPP_INL
31uint8<16> i_iabs(const int8<16>& a)
32{
33#if SIMDPP_USE_NULL
34 return detail::null::abs(a);
35#elif SIMDPP_USE_SSSE3
36 return _mm_abs_epi8(a.native());
37#elif SIMDPP_USE_SSE2
38 int8<16> t, r;
39 t = cmp_lt(a, 0);
40 r = bit_xor(a, t);
41 r = sub(r, t);
42 return r;
43#elif SIMDPP_USE_NEON
44 return int8x16(vabsq_s8(a.native()));
45#elif SIMDPP_USE_ALTIVEC
46 // expands to 3 instructions
47 return (__vector uint8_t) vec_abs(a.native());
48#elif SIMDPP_USE_MSA
49 int8<16> zero = make_zero();
50 return (v16u8) __msa_add_a_b(a.native(), zero.native());
51#endif
52}
53
54#if SIMDPP_USE_AVX2
55static SIMDPP_INL
56uint8<32> i_iabs(const int8<32>& a)
57{
58 return _mm256_abs_epi8(a.native());
59}
60#endif
61
62#if SIMDPP_USE_AVX512BW
63static SIMDPP_INL
64uint8<64> i_iabs(const int8<64>& a)
65{
66 return _mm512_abs_epi8(a.native());
67}
68#endif
69
70// -----------------------------------------------------------------------------
71
72static SIMDPP_INL
73uint16<8> i_iabs(const int16<8>& a)
74{
75#if SIMDPP_USE_NULL
76 return detail::null::abs(a);
77#elif SIMDPP_USE_SSSE3
78 return _mm_abs_epi16(a.native());
79#elif SIMDPP_USE_SSE2
80 int16<8> t, r;
81 t = cmp_lt(a, 0);
82 r = bit_xor(a, t);
83 r = sub(r, t);
84 return r;
85#elif SIMDPP_USE_NEON
86 return int16x8(vabsq_s16(a.native()));
87#elif SIMDPP_USE_ALTIVEC
88 // expands to 3 instructions
89 return (__vector uint16_t) vec_abs(a.native());
90#elif SIMDPP_USE_MSA
91 int16<8> zero = make_zero();
92 return (v8u16) __msa_add_a_h(a.native(), zero.native());
93#endif
94}
95
96#if SIMDPP_USE_AVX2
97static SIMDPP_INL
98uint16<16> i_iabs(const int16<16>& a)
99{
100 return _mm256_abs_epi16(a.native());
101}
102#endif
103
104#if SIMDPP_USE_AVX512BW
105static SIMDPP_INL
106uint16<32> i_iabs(const int16<32>& a)
107{
108 return _mm512_abs_epi16(a.native());
109}
110#endif
111
112// -----------------------------------------------------------------------------
113
114static SIMDPP_INL
115uint32<4> i_iabs(const int32<4>& a)
116{
117#if SIMDPP_USE_NULL
118 return detail::null::abs(a);
119#elif SIMDPP_USE_SSSE3
120 return _mm_abs_epi32(a.native());
121#elif SIMDPP_USE_SSE2
122 int32<4> t, r;
123 t = cmp_lt(a, 0);
124 r = bit_xor(a, t);
125 r = sub(r, t);
126 return r;
127#elif SIMDPP_USE_NEON
128 return int32x4(vabsq_s32(a.native()));
129#elif SIMDPP_USE_ALTIVEC
130 // expands to 3 instructions
131 return (__vector uint32_t) vec_abs(a.native());
132#elif SIMDPP_USE_MSA
133 int32<4> zero = make_zero();
134 return (v4u32) __msa_add_a_w(a.native(), zero.native());
135#endif
136}
137
138#if SIMDPP_USE_AVX2
139static SIMDPP_INL
140uint32<8> i_iabs(const int32<8>& a)
141{
142 return _mm256_abs_epi32(a.native());
143}
144#endif
145
146#if SIMDPP_USE_AVX512F
147static SIMDPP_INL
148uint32<16> i_iabs(const int32<16>& a)
149{
150 return _mm512_abs_epi32(a.native());
151}
152#endif
153
154// -----------------------------------------------------------------------------
155
156static SIMDPP_INL
157uint64<2> i_iabs(const int64<2>& a)
158{
159#if SIMDPP_USE_AVX512VL
160 return _mm_abs_epi64(a.native());
161#elif SIMDPP_USE_SSE2
162 uint32x4 ta;
163 int64x2 t, r;
164 ta = (uint32x4) bit_and(a, 0x8000000000000000);
165 ta = shift_r<1>(ta);
166 t = cmp_neq(float64x2(ta), 0);
167 r = bit_xor(a, t);
168 r = sub(r, t);
169 return r;
170#elif SIMDPP_USE_NEON
171 int32x4 z;
172 int64<2> r;
173 z = shift_r<63>(uint64x2(a));
174 z = cmp_eq(z, 0);
175 z = permute4<0,0,2,2>(z);
176 z = bit_not(z);
177 int64x2 t;
178 t = z;
179 r = bit_xor(a, t);
180 r = sub(r, t);
181 return r;
182#elif SIMDPP_USE_VSX_207
183 // expands to 3 instructions
184 return (__vector uint64_t) vec_abs(a.native());
185#elif SIMDPP_USE_MSA
186 int64<2> zero = make_zero();
187 return (v2u64) __msa_add_a_d(a.native(), zero.native());
188#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
189 return detail::null::abs(a);
190#endif
191}
192
193#if SIMDPP_USE_AVX2
194static SIMDPP_INL
195uint64<4> i_iabs(const int64<4>& a)
196{
197#if SIMDPP_USE_AVX512VL
198 return _mm256_abs_epi64(a.native());
199#else
200 int64x4 t, r;
201 int64x4 zero = make_zero();
202 t = _mm256_cmpgt_epi64(zero.native(), a.native());
203 r = bit_xor(a, t);
204 r = sub(r, t);
205 return r;
206#endif
207}
208#endif
209
210#if SIMDPP_USE_AVX512F
211static SIMDPP_INL
212uint64<8> i_iabs(const int64<8>& a)
213{
214 return _mm512_abs_epi64(a.native());
215}
216#endif
217
218// -----------------------------------------------------------------------------
219
220template<class V> SIMDPP_INL
221V i_iabs(const V& a)
222{
223 SIMDPP_VEC_ARRAY_IMPL1(V, i_iabs, a)
224}
225
226} // namespace insn
227} // namespace detail
228} // namespace SIMDPP_ARCH_NAMESPACE
229} // namespace simdpp
230
231#endif
232
233