1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/bit_xor.h> |
17 | #include <simdpp/core/to_mask.h> |
18 | #include <simdpp/detail/null/bitwise.h> |
19 | #include <simdpp/detail/vector_array_macros.h> |
20 | |
21 | namespace simdpp { |
22 | namespace SIMDPP_ARCH_NAMESPACE { |
23 | namespace detail { |
24 | namespace insn { |
25 | |
26 | |
27 | static SIMDPP_INL |
28 | uint8x16 i_bit_not(const uint8x16& a) |
29 | { |
30 | #if SIMDPP_USE_NULL |
31 | uint8x16 r; |
32 | for (unsigned i = 0; i < a.length; i++) { |
33 | r.el(i) = ~a.el(i); |
34 | } |
35 | return r; |
36 | #elif SIMDPP_USE_SSE2 |
37 | return bit_xor(a, 0xff); |
38 | #elif SIMDPP_USE_NEON |
39 | return vmvnq_u8(a.native()); |
40 | #elif SIMDPP_USE_ALTIVEC |
41 | return vec_nor(a.native(), a.native()); |
42 | #elif SIMDPP_USE_MSA |
43 | return __msa_nor_v(a.native(), a.native()); |
44 | #endif |
45 | } |
46 | |
47 | #if SIMDPP_USE_AVX2 |
48 | static SIMDPP_INL |
49 | uint8x32 i_bit_not(const uint8x32& a) |
50 | { |
51 | return bit_xor(a, 0xff); |
52 | } |
53 | #endif |
54 | |
55 | #if SIMDPP_USE_AVX512BW |
56 | SIMDPP_INL uint8<64> i_bit_not(const uint8<64>& a) |
57 | { |
58 | __m512i n = a.native(); |
59 | return _mm512_ternarylogic_epi32(n, n, n, 0x1); |
60 | } |
61 | #endif |
62 | |
63 | // ----------------------------------------------------------------------------- |
64 | |
65 | SIMDPP_INL uint16<8> i_bit_not(const uint16<8>& a) |
66 | { |
67 | return uint16<8>(i_bit_not(uint8<16>(a))); |
68 | } |
69 | |
70 | #if SIMDPP_USE_AVX2 |
71 | SIMDPP_INL uint16<16> i_bit_not(const uint16<16>& a) |
72 | { |
73 | return uint16<16>(i_bit_not(uint8<32>(a))); |
74 | } |
75 | #endif |
76 | |
77 | #if SIMDPP_USE_AVX512BW |
78 | SIMDPP_INL uint16<32> i_bit_not(const uint16<32>& a) |
79 | { |
80 | __m512i n = a.native(); |
81 | return _mm512_ternarylogic_epi32(n, n, n, 0x1); |
82 | } |
83 | #endif |
84 | |
85 | // ----------------------------------------------------------------------------- |
86 | |
87 | static SIMDPP_INL |
88 | uint32<4> i_bit_not(const uint32<4>& a) |
89 | { |
90 | return uint32<4>(i_bit_not(uint8<16>(a))); |
91 | } |
92 | |
93 | #if SIMDPP_USE_AVX2 |
94 | static SIMDPP_INL |
95 | uint32<8> i_bit_not(const uint32<8>& a) |
96 | { |
97 | return uint32<8>(i_bit_not(uint8<32>(a))); |
98 | } |
99 | #endif |
100 | |
101 | #if SIMDPP_USE_AVX512F |
102 | static SIMDPP_INL |
103 | uint32<16> i_bit_not(const uint32<16>& a) |
104 | { |
105 | __m512i n = a.native(); |
106 | return _mm512_ternarylogic_epi32(n, n, n, 0x1); |
107 | } |
108 | #endif |
109 | |
110 | // ----------------------------------------------------------------------------- |
111 | |
112 | static SIMDPP_INL |
113 | uint64<2> i_bit_not(const uint64<2>& a) |
114 | { |
115 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
116 | uint64x2 r; |
117 | for (unsigned i = 0; i < a.length; i++) { |
118 | r.el(i) = ~a.el(i); |
119 | } |
120 | return r; |
121 | #else |
122 | return uint64<2>(i_bit_not(uint8<16>(a))); |
123 | #endif |
124 | } |
125 | |
126 | #if SIMDPP_USE_AVX2 |
127 | static SIMDPP_INL |
128 | uint64<4> i_bit_not(const uint64<4>& a) |
129 | { |
130 | return uint64<4>(i_bit_not(uint8<32>(a))); |
131 | } |
132 | #endif |
133 | |
134 | #if SIMDPP_USE_AVX512F |
135 | static SIMDPP_INL |
136 | uint64<8> i_bit_not(const uint64<8>& a) |
137 | { |
138 | __m512i n = a.native(); |
139 | return _mm512_ternarylogic_epi64(n, n, n, 0x1); |
140 | } |
141 | #endif |
142 | |
143 | // ----------------------------------------------------------------------------- |
144 | |
145 | static SIMDPP_INL |
146 | mask_int8x16 i_bit_not(const mask_int8x16& a) |
147 | { |
148 | #if SIMDPP_USE_NULL |
149 | return detail::null::bit_not_mm(a); |
150 | #else |
151 | return to_mask(i_bit_not(uint8x16(a))); |
152 | #endif |
153 | } |
154 | |
155 | static SIMDPP_INL |
156 | mask_int16x8 i_bit_not(const mask_int16x8& a) |
157 | { |
158 | #if SIMDPP_USE_NULL |
159 | return detail::null::bit_not_mm(a); |
160 | #else |
161 | return to_mask(i_bit_not(uint16x8(a))); |
162 | #endif |
163 | } |
164 | |
165 | static SIMDPP_INL |
166 | mask_int32x4 i_bit_not(const mask_int32x4& a) |
167 | { |
168 | #if SIMDPP_USE_NULL |
169 | return detail::null::bit_not_mm(a); |
170 | #else |
171 | return to_mask(i_bit_not(uint32x4(a))); |
172 | #endif |
173 | } |
174 | |
175 | static SIMDPP_INL |
176 | mask_int64x2 i_bit_not(const mask_int64x2& a) |
177 | { |
178 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
179 | return detail::null::bit_not_mm(a); |
180 | #else |
181 | return to_mask(i_bit_not(uint64x2(a))); |
182 | #endif |
183 | } |
184 | |
185 | #if SIMDPP_USE_AVX2 && !SIMDPP_USE_AVX512VL |
186 | static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return i_bit_not(uint8x32(a)); } |
187 | static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return i_bit_not(uint16x16(a)); } |
188 | static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return i_bit_not(uint32x8(a)); } |
189 | static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return i_bit_not(uint64x4(a)); } |
190 | #endif |
191 | |
192 | #if SIMDPP_USE_AVX512VL |
193 | static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return ~a.native(); } |
194 | static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return ~a.native(); } |
195 | static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return ~a.native(); } |
196 | static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return ~a.native(); } |
197 | #endif |
198 | |
199 | #if SIMDPP_USE_AVX512F |
200 | static SIMDPP_INL |
201 | mask_int32<16> i_bit_not(const mask_int32<16>& a) |
202 | { |
203 | return _mm512_knot(a.native()); |
204 | } |
205 | |
206 | static SIMDPP_INL |
207 | mask_int64<8> i_bit_not(const mask_int64<8>& a) |
208 | { |
209 | return _mm512_knot(a.native()); |
210 | } |
211 | #endif |
212 | |
213 | #if SIMDPP_USE_AVX512BW |
214 | SIMDPP_INL mask_int8<64> i_bit_not(const mask_int8<64>& a) |
215 | { |
216 | return ~a.native(); |
217 | } |
218 | |
219 | SIMDPP_INL mask_int16<32> i_bit_not(const mask_int16<32>& a) |
220 | { |
221 | return ~a.native(); |
222 | } |
223 | #endif |
224 | |
225 | // ----------------------------------------------------------------------------- |
226 | |
227 | static SIMDPP_INL |
228 | float32x4 i_bit_not(const float32x4& a) |
229 | { |
230 | #if SIMDPP_USE_SSE2 |
231 | return bit_xor(a, 0xffffffff); |
232 | #elif SIMDPP_USE_NEON_FLT_SP |
233 | return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a.native()))); |
234 | #elif SIMDPP_USE_ALTIVEC |
235 | return vec_nor(a.native(), a.native()); |
236 | #elif SIMDPP_USE_NULL || SIMDPP_USE_MSA || SIMDPP_USE_NEON_NO_FLT_SP |
237 | return float32x4(i_bit_not(uint32x4(a))); |
238 | #endif |
239 | } |
240 | |
241 | #if SIMDPP_USE_AVX |
242 | static SIMDPP_INL |
243 | float32x8 i_bit_not(const float32x8& a) |
244 | { |
245 | return bit_xor(a, 0xffffffff); |
246 | } |
247 | #endif |
248 | |
249 | #if SIMDPP_USE_AVX512F |
250 | static SIMDPP_INL |
251 | float32<16> i_bit_not(const float32<16>& a) |
252 | { |
253 | __m512i n = _mm512_castps_si512(a.native()); |
254 | n = _mm512_ternarylogic_epi32(n, n, n, 0x1); |
255 | return _mm512_castsi512_ps(n); |
256 | } |
257 | #endif |
258 | |
259 | // ----------------------------------------------------------------------------- |
260 | |
261 | static SIMDPP_INL |
262 | float64x2 i_bit_not(const float64x2& a) |
263 | { |
264 | #if SIMDPP_USE_SSE2 |
265 | return bit_xor(a, 0xffffffffffffffff); |
266 | #elif SIMDPP_USE_NEON64 |
267 | return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a.native()))); |
268 | #elif SIMDPP_USE_VSX_206 |
269 | return vec_nor(a.native(), a.native()); |
270 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
271 | return float64x2(i_bit_not(uint64x2(a))); |
272 | #endif |
273 | } |
274 | |
275 | #if SIMDPP_USE_AVX |
276 | static SIMDPP_INL |
277 | float64x4 i_bit_not(const float64x4& a) |
278 | { |
279 | return bit_xor(a, 0xffffffffffffffff); |
280 | } |
281 | #endif |
282 | |
283 | #if SIMDPP_USE_AVX512F |
284 | static SIMDPP_INL |
285 | float64<8> i_bit_not(const float64<8>& a) |
286 | { |
287 | __m512i n = _mm512_castpd_si512(a.native()); |
288 | n = _mm512_ternarylogic_epi64(n, n, n, 0x1); |
289 | return _mm512_castsi512_pd(n); |
290 | } |
291 | #endif |
292 | |
293 | // ----------------------------------------------------------------------------- |
294 | |
295 | static SIMDPP_INL |
296 | mask_float32x4 i_bit_not(const mask_float32x4& a) |
297 | { |
298 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
299 | return detail::null::bit_not_mm(a); |
300 | #else |
301 | return to_mask(i_bit_not(float32<4>(a))); |
302 | #endif |
303 | } |
304 | |
305 | #if SIMDPP_USE_AVX |
306 | static SIMDPP_INL |
307 | mask_float32x8 i_bit_not(const mask_float32x8& a) |
308 | { |
309 | return to_mask(i_bit_not(float32x8(a))); |
310 | } |
311 | #endif |
312 | |
313 | #if SIMDPP_USE_AVX512F |
314 | static SIMDPP_INL |
315 | mask_float32<16> i_bit_not(const mask_float32<16>& a) |
316 | { |
317 | return _mm512_knot(a.native()); |
318 | } |
319 | #endif |
320 | |
321 | // ----------------------------------------------------------------------------- |
322 | |
323 | static SIMDPP_INL |
324 | mask_float64x2 i_bit_not(const mask_float64x2& a) |
325 | { |
326 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206) |
327 | return detail::null::bit_not_mm(a); |
328 | #else |
329 | return to_mask(i_bit_not(float64x2(a))); |
330 | #endif |
331 | } |
332 | |
333 | #if SIMDPP_USE_AVX |
334 | static SIMDPP_INL |
335 | mask_float64x4 i_bit_not(const mask_float64x4& a) |
336 | { |
337 | return to_mask(i_bit_not(float64x4(a))); |
338 | } |
339 | #endif |
340 | |
341 | #if SIMDPP_USE_AVX512F |
342 | static SIMDPP_INL |
343 | mask_float64<8> i_bit_not(const mask_float64<8>& a) |
344 | { |
345 | return _mm512_knot(a.native()); |
346 | } |
347 | #endif |
348 | |
349 | // ----------------------------------------------------------------------------- |
350 | |
351 | template<class V> SIMDPP_INL |
352 | V i_bit_not(const V& a) |
353 | { |
354 | SIMDPP_VEC_ARRAY_IMPL1(V, i_bit_not, a) |
355 | } |
356 | |
357 | } // namespace insn |
358 | } // namespace detail |
359 | } // namespace SIMDPP_ARCH_NAMESPACE |
360 | } // namespace simdpp |
361 | |
362 | #endif |
363 | |
364 | |