1 | /* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ABS_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/bit_xor.h> |
17 | #include <simdpp/core/cmp_lt.h> |
18 | #include <simdpp/core/cmp_neq.h> |
19 | #include <simdpp/core/i_shift_r.h> |
20 | #include <simdpp/core/i_sub.h> |
21 | #include <simdpp/core/move_r.h> |
22 | #include <simdpp/detail/null/math.h> |
23 | #include <simdpp/detail/vector_array_macros.h> |
24 | |
25 | namespace simdpp { |
26 | namespace SIMDPP_ARCH_NAMESPACE { |
27 | namespace detail { |
28 | namespace insn { |
29 | |
30 | static SIMDPP_INL |
31 | uint8<16> i_iabs(const int8<16>& a) |
32 | { |
33 | #if SIMDPP_USE_NULL |
34 | return detail::null::abs(a); |
35 | #elif SIMDPP_USE_SSSE3 |
36 | return _mm_abs_epi8(a.native()); |
37 | #elif SIMDPP_USE_SSE2 |
38 | int8<16> t, r; |
39 | t = cmp_lt(a, 0); |
40 | r = bit_xor(a, t); |
41 | r = sub(r, t); |
42 | return r; |
43 | #elif SIMDPP_USE_NEON |
44 | return int8x16(vabsq_s8(a.native())); |
45 | #elif SIMDPP_USE_ALTIVEC |
46 | // expands to 3 instructions |
47 | return (__vector uint8_t) vec_abs(a.native()); |
48 | #elif SIMDPP_USE_MSA |
49 | int8<16> zero = make_zero(); |
50 | return (v16u8) __msa_add_a_b(a.native(), zero.native()); |
51 | #endif |
52 | } |
53 | |
54 | #if SIMDPP_USE_AVX2 |
55 | static SIMDPP_INL |
56 | uint8<32> i_iabs(const int8<32>& a) |
57 | { |
58 | return _mm256_abs_epi8(a.native()); |
59 | } |
60 | #endif |
61 | |
62 | #if SIMDPP_USE_AVX512BW |
63 | static SIMDPP_INL |
64 | uint8<64> i_iabs(const int8<64>& a) |
65 | { |
66 | return _mm512_abs_epi8(a.native()); |
67 | } |
68 | #endif |
69 | |
70 | // ----------------------------------------------------------------------------- |
71 | |
72 | static SIMDPP_INL |
73 | uint16<8> i_iabs(const int16<8>& a) |
74 | { |
75 | #if SIMDPP_USE_NULL |
76 | return detail::null::abs(a); |
77 | #elif SIMDPP_USE_SSSE3 |
78 | return _mm_abs_epi16(a.native()); |
79 | #elif SIMDPP_USE_SSE2 |
80 | int16<8> t, r; |
81 | t = cmp_lt(a, 0); |
82 | r = bit_xor(a, t); |
83 | r = sub(r, t); |
84 | return r; |
85 | #elif SIMDPP_USE_NEON |
86 | return int16x8(vabsq_s16(a.native())); |
87 | #elif SIMDPP_USE_ALTIVEC |
88 | // expands to 3 instructions |
89 | return (__vector uint16_t) vec_abs(a.native()); |
90 | #elif SIMDPP_USE_MSA |
91 | int16<8> zero = make_zero(); |
92 | return (v8u16) __msa_add_a_h(a.native(), zero.native()); |
93 | #endif |
94 | } |
95 | |
96 | #if SIMDPP_USE_AVX2 |
97 | static SIMDPP_INL |
98 | uint16<16> i_iabs(const int16<16>& a) |
99 | { |
100 | return _mm256_abs_epi16(a.native()); |
101 | } |
102 | #endif |
103 | |
104 | #if SIMDPP_USE_AVX512BW |
105 | static SIMDPP_INL |
106 | uint16<32> i_iabs(const int16<32>& a) |
107 | { |
108 | return _mm512_abs_epi16(a.native()); |
109 | } |
110 | #endif |
111 | |
112 | // ----------------------------------------------------------------------------- |
113 | |
114 | static SIMDPP_INL |
115 | uint32<4> i_iabs(const int32<4>& a) |
116 | { |
117 | #if SIMDPP_USE_NULL |
118 | return detail::null::abs(a); |
119 | #elif SIMDPP_USE_SSSE3 |
120 | return _mm_abs_epi32(a.native()); |
121 | #elif SIMDPP_USE_SSE2 |
122 | int32<4> t, r; |
123 | t = cmp_lt(a, 0); |
124 | r = bit_xor(a, t); |
125 | r = sub(r, t); |
126 | return r; |
127 | #elif SIMDPP_USE_NEON |
128 | return int32x4(vabsq_s32(a.native())); |
129 | #elif SIMDPP_USE_ALTIVEC |
130 | // expands to 3 instructions |
131 | return (__vector uint32_t) vec_abs(a.native()); |
132 | #elif SIMDPP_USE_MSA |
133 | int32<4> zero = make_zero(); |
134 | return (v4u32) __msa_add_a_w(a.native(), zero.native()); |
135 | #endif |
136 | } |
137 | |
138 | #if SIMDPP_USE_AVX2 |
139 | static SIMDPP_INL |
140 | uint32<8> i_iabs(const int32<8>& a) |
141 | { |
142 | return _mm256_abs_epi32(a.native()); |
143 | } |
144 | #endif |
145 | |
146 | #if SIMDPP_USE_AVX512F |
147 | static SIMDPP_INL |
148 | uint32<16> i_iabs(const int32<16>& a) |
149 | { |
150 | return _mm512_abs_epi32(a.native()); |
151 | } |
152 | #endif |
153 | |
154 | // ----------------------------------------------------------------------------- |
155 | |
156 | static SIMDPP_INL |
157 | uint64<2> i_iabs(const int64<2>& a) |
158 | { |
159 | #if SIMDPP_USE_AVX512VL |
160 | return _mm_abs_epi64(a.native()); |
161 | #elif SIMDPP_USE_SSE2 |
162 | uint32x4 ta; |
163 | int64x2 t, r; |
164 | ta = (uint32x4) bit_and(a, 0x8000000000000000); |
165 | ta = shift_r<1>(ta); |
166 | t = cmp_neq(float64x2(ta), 0); |
167 | r = bit_xor(a, t); |
168 | r = sub(r, t); |
169 | return r; |
170 | #elif SIMDPP_USE_NEON |
171 | int32x4 z; |
172 | int64<2> r; |
173 | z = shift_r<63>(uint64x2(a)); |
174 | z = cmp_eq(z, 0); |
175 | z = permute4<0,0,2,2>(z); |
176 | z = bit_not(z); |
177 | int64x2 t; |
178 | t = z; |
179 | r = bit_xor(a, t); |
180 | r = sub(r, t); |
181 | return r; |
182 | #elif SIMDPP_USE_VSX_207 |
183 | // expands to 3 instructions |
184 | return (__vector uint64_t) vec_abs(a.native()); |
185 | #elif SIMDPP_USE_MSA |
186 | int64<2> zero = make_zero(); |
187 | return (v2u64) __msa_add_a_d(a.native(), zero.native()); |
188 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
189 | return detail::null::abs(a); |
190 | #endif |
191 | } |
192 | |
193 | #if SIMDPP_USE_AVX2 |
194 | static SIMDPP_INL |
195 | uint64<4> i_iabs(const int64<4>& a) |
196 | { |
197 | #if SIMDPP_USE_AVX512VL |
198 | return _mm256_abs_epi64(a.native()); |
199 | #else |
200 | int64x4 t, r; |
201 | int64x4 zero = make_zero(); |
202 | t = _mm256_cmpgt_epi64(zero.native(), a.native()); |
203 | r = bit_xor(a, t); |
204 | r = sub(r, t); |
205 | return r; |
206 | #endif |
207 | } |
208 | #endif |
209 | |
210 | #if SIMDPP_USE_AVX512F |
211 | static SIMDPP_INL |
212 | uint64<8> i_iabs(const int64<8>& a) |
213 | { |
214 | return _mm512_abs_epi64(a.native()); |
215 | } |
216 | #endif |
217 | |
218 | // ----------------------------------------------------------------------------- |
219 | |
220 | template<class V> SIMDPP_INL |
221 | V i_iabs(const V& a) |
222 | { |
223 | SIMDPP_VEC_ARRAY_IMPL1(V, i_iabs, a) |
224 | } |
225 | |
226 | } // namespace insn |
227 | } // namespace detail |
228 | } // namespace SIMDPP_ARCH_NAMESPACE |
229 | } // namespace simdpp |
230 | |
231 | #endif |
232 | |
233 | |