1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/bit_xor.h>
17#include <simdpp/core/to_mask.h>
18#include <simdpp/detail/null/bitwise.h>
19#include <simdpp/detail/vector_array_macros.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace insn {
25
26
27static SIMDPP_INL
28uint8x16 i_bit_not(const uint8x16& a)
29{
30#if SIMDPP_USE_NULL
31 uint8x16 r;
32 for (unsigned i = 0; i < a.length; i++) {
33 r.el(i) = ~a.el(i);
34 }
35 return r;
36#elif SIMDPP_USE_SSE2
37 return bit_xor(a, 0xff);
38#elif SIMDPP_USE_NEON
39 return vmvnq_u8(a.native());
40#elif SIMDPP_USE_ALTIVEC
41 return vec_nor(a.native(), a.native());
42#elif SIMDPP_USE_MSA
43 return __msa_nor_v(a.native(), a.native());
44#endif
45}
46
47#if SIMDPP_USE_AVX2
48static SIMDPP_INL
49uint8x32 i_bit_not(const uint8x32& a)
50{
51 return bit_xor(a, 0xff);
52}
53#endif
54
55#if SIMDPP_USE_AVX512BW
56SIMDPP_INL uint8<64> i_bit_not(const uint8<64>& a)
57{
58 __m512i n = a.native();
59 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
60}
61#endif
62
63// -----------------------------------------------------------------------------
64
65SIMDPP_INL uint16<8> i_bit_not(const uint16<8>& a)
66{
67 return uint16<8>(i_bit_not(uint8<16>(a)));
68}
69
70#if SIMDPP_USE_AVX2
71SIMDPP_INL uint16<16> i_bit_not(const uint16<16>& a)
72{
73 return uint16<16>(i_bit_not(uint8<32>(a)));
74}
75#endif
76
77#if SIMDPP_USE_AVX512BW
78SIMDPP_INL uint16<32> i_bit_not(const uint16<32>& a)
79{
80 __m512i n = a.native();
81 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
82}
83#endif
84
85// -----------------------------------------------------------------------------
86
87static SIMDPP_INL
88uint32<4> i_bit_not(const uint32<4>& a)
89{
90 return uint32<4>(i_bit_not(uint8<16>(a)));
91}
92
93#if SIMDPP_USE_AVX2
94static SIMDPP_INL
95uint32<8> i_bit_not(const uint32<8>& a)
96{
97 return uint32<8>(i_bit_not(uint8<32>(a)));
98}
99#endif
100
101#if SIMDPP_USE_AVX512F
102static SIMDPP_INL
103uint32<16> i_bit_not(const uint32<16>& a)
104{
105 __m512i n = a.native();
106 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
107}
108#endif
109
110// -----------------------------------------------------------------------------
111
112static SIMDPP_INL
113uint64<2> i_bit_not(const uint64<2>& a)
114{
115#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
116 uint64x2 r;
117 for (unsigned i = 0; i < a.length; i++) {
118 r.el(i) = ~a.el(i);
119 }
120 return r;
121#else
122 return uint64<2>(i_bit_not(uint8<16>(a)));
123#endif
124}
125
126#if SIMDPP_USE_AVX2
127static SIMDPP_INL
128uint64<4> i_bit_not(const uint64<4>& a)
129{
130 return uint64<4>(i_bit_not(uint8<32>(a)));
131}
132#endif
133
134#if SIMDPP_USE_AVX512F
135static SIMDPP_INL
136uint64<8> i_bit_not(const uint64<8>& a)
137{
138 __m512i n = a.native();
139 return _mm512_ternarylogic_epi64(n, n, n, 0x1);
140}
141#endif
142
143// -----------------------------------------------------------------------------
144
145static SIMDPP_INL
146mask_int8x16 i_bit_not(const mask_int8x16& a)
147{
148#if SIMDPP_USE_NULL
149 return detail::null::bit_not_mm(a);
150#else
151 return to_mask(i_bit_not(uint8x16(a)));
152#endif
153}
154
155static SIMDPP_INL
156mask_int16x8 i_bit_not(const mask_int16x8& a)
157{
158#if SIMDPP_USE_NULL
159 return detail::null::bit_not_mm(a);
160#else
161 return to_mask(i_bit_not(uint16x8(a)));
162#endif
163}
164
165static SIMDPP_INL
166mask_int32x4 i_bit_not(const mask_int32x4& a)
167{
168#if SIMDPP_USE_NULL
169 return detail::null::bit_not_mm(a);
170#else
171 return to_mask(i_bit_not(uint32x4(a)));
172#endif
173}
174
175static SIMDPP_INL
176mask_int64x2 i_bit_not(const mask_int64x2& a)
177{
178#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
179 return detail::null::bit_not_mm(a);
180#else
181 return to_mask(i_bit_not(uint64x2(a)));
182#endif
183}
184
185#if SIMDPP_USE_AVX2 && !SIMDPP_USE_AVX512VL
186static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return i_bit_not(uint8x32(a)); }
187static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return i_bit_not(uint16x16(a)); }
188static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return i_bit_not(uint32x8(a)); }
189static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return i_bit_not(uint64x4(a)); }
190#endif
191
192#if SIMDPP_USE_AVX512VL
193static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return ~a.native(); }
194static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return ~a.native(); }
195static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return ~a.native(); }
196static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return ~a.native(); }
197#endif
198
199#if SIMDPP_USE_AVX512F
200static SIMDPP_INL
201mask_int32<16> i_bit_not(const mask_int32<16>& a)
202{
203 return _mm512_knot(a.native());
204}
205
206static SIMDPP_INL
207mask_int64<8> i_bit_not(const mask_int64<8>& a)
208{
209 return _mm512_knot(a.native());
210}
211#endif
212
213#if SIMDPP_USE_AVX512BW
214SIMDPP_INL mask_int8<64> i_bit_not(const mask_int8<64>& a)
215{
216 return ~a.native();
217}
218
219SIMDPP_INL mask_int16<32> i_bit_not(const mask_int16<32>& a)
220{
221 return ~a.native();
222}
223#endif
224
225// -----------------------------------------------------------------------------
226
227static SIMDPP_INL
228float32x4 i_bit_not(const float32x4& a)
229{
230#if SIMDPP_USE_SSE2
231 return bit_xor(a, 0xffffffff);
232#elif SIMDPP_USE_NEON_FLT_SP
233 return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a.native())));
234#elif SIMDPP_USE_ALTIVEC
235 return vec_nor(a.native(), a.native());
236#elif SIMDPP_USE_NULL || SIMDPP_USE_MSA || SIMDPP_USE_NEON_NO_FLT_SP
237 return float32x4(i_bit_not(uint32x4(a)));
238#endif
239}
240
241#if SIMDPP_USE_AVX
242static SIMDPP_INL
243float32x8 i_bit_not(const float32x8& a)
244{
245 return bit_xor(a, 0xffffffff);
246}
247#endif
248
249#if SIMDPP_USE_AVX512F
250static SIMDPP_INL
251float32<16> i_bit_not(const float32<16>& a)
252{
253 __m512i n = _mm512_castps_si512(a.native());
254 n = _mm512_ternarylogic_epi32(n, n, n, 0x1);
255 return _mm512_castsi512_ps(n);
256}
257#endif
258
259// -----------------------------------------------------------------------------
260
261static SIMDPP_INL
262float64x2 i_bit_not(const float64x2& a)
263{
264#if SIMDPP_USE_SSE2
265 return bit_xor(a, 0xffffffffffffffff);
266#elif SIMDPP_USE_NEON64
267 return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a.native())));
268#elif SIMDPP_USE_VSX_206
269 return vec_nor(a.native(), a.native());
270#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
271 return float64x2(i_bit_not(uint64x2(a)));
272#endif
273}
274
275#if SIMDPP_USE_AVX
276static SIMDPP_INL
277float64x4 i_bit_not(const float64x4& a)
278{
279 return bit_xor(a, 0xffffffffffffffff);
280}
281#endif
282
283#if SIMDPP_USE_AVX512F
284static SIMDPP_INL
285float64<8> i_bit_not(const float64<8>& a)
286{
287 __m512i n = _mm512_castpd_si512(a.native());
288 n = _mm512_ternarylogic_epi64(n, n, n, 0x1);
289 return _mm512_castsi512_pd(n);
290}
291#endif
292
293// -----------------------------------------------------------------------------
294
295static SIMDPP_INL
296mask_float32x4 i_bit_not(const mask_float32x4& a)
297{
298#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
299 return detail::null::bit_not_mm(a);
300#else
301 return to_mask(i_bit_not(float32<4>(a)));
302#endif
303}
304
305#if SIMDPP_USE_AVX
306static SIMDPP_INL
307mask_float32x8 i_bit_not(const mask_float32x8& a)
308{
309 return to_mask(i_bit_not(float32x8(a)));
310}
311#endif
312
313#if SIMDPP_USE_AVX512F
314static SIMDPP_INL
315mask_float32<16> i_bit_not(const mask_float32<16>& a)
316{
317 return _mm512_knot(a.native());
318}
319#endif
320
321// -----------------------------------------------------------------------------
322
323static SIMDPP_INL
324mask_float64x2 i_bit_not(const mask_float64x2& a)
325{
326#if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
327 return detail::null::bit_not_mm(a);
328#else
329 return to_mask(i_bit_not(float64x2(a)));
330#endif
331}
332
333#if SIMDPP_USE_AVX
334static SIMDPP_INL
335mask_float64x4 i_bit_not(const mask_float64x4& a)
336{
337 return to_mask(i_bit_not(float64x4(a)));
338}
339#endif
340
341#if SIMDPP_USE_AVX512F
342static SIMDPP_INL
343mask_float64<8> i_bit_not(const mask_float64<8>& a)
344{
345 return _mm512_knot(a.native());
346}
347#endif
348
349// -----------------------------------------------------------------------------
350
351template<class V> SIMDPP_INL
352V i_bit_not(const V& a)
353{
354 SIMDPP_VEC_ARRAY_IMPL1(V, i_bit_not, a)
355}
356
357} // namespace insn
358} // namespace detail
359} // namespace SIMDPP_ARCH_NAMESPACE
360} // namespace simdpp
361
362#endif
363
364