1/* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_TEST_BITS_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_TEST_BITS_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/expr.h>
17#include <simdpp/core/bit_and.h>
18#include <simdpp/core/extract.h>
19#include <simdpp/core/move_l.h>
20#include <simdpp/detail/insn/test_bits.h>
21#include <simdpp/detail/null/bitwise.h>
22
23namespace simdpp {
24namespace SIMDPP_ARCH_NAMESPACE {
25namespace detail {
26namespace insn {
27
28static SIMDPP_INL
29bool i_test_bits_any(const uint32<4>& a)
30{
31#if SIMDPP_USE_NULL
32 return null::test_bits_any(a);
33#elif SIMDPP_USE_SSE4_1
34 return !_mm_testz_si128(a.native(), a.native());
35#elif SIMDPP_USE_SSE2
36 uint32<4> r = _mm_cmpeq_epi32(a.native(), _mm_setzero_si128());
37 return _mm_movemask_epi8(r.native()) != 0xffff;
38#elif SIMDPP_USE_NEON
39#if SIMDPP_64_BITS
40 uint64<2> r;
41 r = a;
42 r = bit_or(r, move2_l<1>(r));
43 return extract<0>(r) != 0;
44#else
45 uint32x4 r = bit_or(a, move4_l<2>(a));
46 r = bit_or(r, move4_l<1>(r));
47 return extract<0>(r) != 0;
48#endif
49#elif SIMDPP_USE_ALTIVEC
50 uint32<4> z = make_uint(0);
51 return vec_any_gt(a.native(), z.native());
52#elif SIMDPP_USE_MSA
53 return __msa_test_bnz_v((v16u8) a.native());
54#endif
55}
56
57static SIMDPP_INL
58bool i_test_bits_any(const uint16<8>& a) { return i_test_bits_any(uint32<4>(a)); }
59static SIMDPP_INL
60bool i_test_bits_any(const uint8<16>& a) { return i_test_bits_any(uint32<4>(a)); }
61
62static SIMDPP_INL
63bool i_test_bits_any(const uint64<2>& a)
64{
65#if SIMDPP_USE_VSX_207
66 uint64<2> z = make_zero();
67 return vec_any_gt(a.native(), z.native());
68#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
69 return null::test_bits_any(a);
70#else
71 return i_test_bits_any(uint32<4>(a));
72#endif
73}
74
75static SIMDPP_INL
76bool i_test_bits_any(const float32<4>& a)
77{
78#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
79 return null::test_bits_any(a);
80#else
81 return i_test_bits_any(uint32<4>(a));
82#endif
83}
84
85static SIMDPP_INL
86bool i_test_bits_any(const float64<2>& a)
87{
88#if SIMDPP_USE_VSX_206
89 return i_test_bits_any(uint32<4>(a));
90#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
91 return null::test_bits_any(a);
92#else
93 return i_test_bits_any(uint32<4>(a));
94#endif
95}
96
97#if SIMDPP_USE_AVX
98static SIMDPP_INL
99bool i_test_bits_any(const float32<8>& a)
100{
101 return !_mm256_testz_si256(_mm256_castps_si256(a.native()),
102 _mm256_castps_si256(a.native()));
103}
104static SIMDPP_INL
105bool i_test_bits_any(const float64<4>& a)
106{
107 return !_mm256_testz_si256(_mm256_castpd_si256(a.native()),
108 _mm256_castpd_si256(a.native()));
109}
110#endif
111
112#if SIMDPP_USE_AVX2
113static SIMDPP_INL
114bool i_test_bits_any(const uint8<32>& a) { return !_mm256_testz_si256(a.native(), a.native()); }
115static SIMDPP_INL
116bool i_test_bits_any(const uint16<16>& a) { return !_mm256_testz_si256(a.native(), a.native()); }
117static SIMDPP_INL
118bool i_test_bits_any(const uint32<8>& a) { return !_mm256_testz_si256(a.native(), a.native()); }
119static SIMDPP_INL
120bool i_test_bits_any(const uint64<4>& a) { return !_mm256_testz_si256(a.native(), a.native()); }
121#endif
122
123#if SIMDPP_USE_AVX512F
124static SIMDPP_INL
125bool i_test_bits_any(const uint32<16>& a)
126{
127 return _mm512_test_epi64_mask(a.native(), a.native()) != 0;
128}
129static SIMDPP_INL
130bool i_test_bits_any(const uint64<8>& a) { return i_test_bits_any(uint32<16>(a)); }
131static SIMDPP_INL
132bool i_test_bits_any(const float32<16>& a) { return i_test_bits_any(uint32<16>(a)); }
133static SIMDPP_INL
134bool i_test_bits_any(const float64<8>& a) { return i_test_bits_any(uint32<16>(a)); }
135#endif
136
137#if SIMDPP_USE_AVX512BW
138SIMDPP_INL bool i_test_bits_any(const uint8<64>& a) { return i_test_bits_any(uint32<16>(a)); }
139SIMDPP_INL bool i_test_bits_any(const uint16<32>& a) { return i_test_bits_any(uint32<16>(a)); }
140#endif
141
142template<unsigned N, class V>
143SIMDPP_INL bool i_test_bits_any(const any_vec<N,V>& a)
144{
145 const V& wa = a.wrapped();
146 typename V::base_vector_type r = wa.vec(0);
147 for (unsigned i = 1; i < wa.vec_length; ++i)
148 r = bit_or(r, wa.vec(i));
149 return i_test_bits_any(r);
150}
151
152} // namespace insn
153} // namespace detail
154} // namespace SIMDPP_ARCH_NAMESPACE
155} // namespace simdpp
156
157#endif
158