1 | /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_DETAIL_NULL_BITWISE_H |
9 | #define LIBSIMDPP_DETAIL_NULL_BITWISE_H |
10 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
11 | |
12 | #ifndef LIBSIMDPP_SIMD_H |
13 | #error "This file must be included through simd.h" |
14 | #endif |
15 | |
16 | #include <simdpp/types.h> |
17 | #include <simdpp/core/cast.h> |
18 | #include <simdpp/detail/null/mask.h> |
19 | |
20 | namespace simdpp { |
21 | namespace SIMDPP_ARCH_NAMESPACE { |
22 | namespace detail { |
23 | namespace null { |
24 | |
25 | /* *_vm variants accept a vector as the first argument and a mask as the second |
26 | *_mm variants accept masks as both arguments |
27 | */ |
28 | |
29 | template<class V> SIMDPP_INL |
30 | V bit_and(const V& a, const V& b) |
31 | { |
32 | V r; |
33 | using E = typename V::element_type; |
34 | using U = typename V::uint_element_type; |
35 | for (unsigned i = 0; i < V::length; i++) { |
36 | U a1 = bit_cast<U, E>(a.el(i)); |
37 | U b1 = bit_cast<U, E>(b.el(i)); |
38 | r.el(i) = bit_cast<E, U>(a1 & b1); |
39 | } |
40 | return r; |
41 | } |
42 | |
43 | template<class V, class M> SIMDPP_INL |
44 | V bit_and_vm(const V& a, const M& m) |
45 | { |
46 | V r; |
47 | for (unsigned i = 0; i < V::length; i++) { |
48 | r.el(i) = m.el(i) ? a.el(i) : 0; |
49 | } |
50 | return r; |
51 | } |
52 | |
53 | template<class M> SIMDPP_INL |
54 | M bit_and_mm(const M& a, const M& b) |
55 | { |
56 | M r; |
57 | for (unsigned i = 0; i < M::length; i++) { |
58 | r.el(i) = a.el(i) & b.el(i); |
59 | } |
60 | return r; |
61 | } |
62 | |
63 | |
64 | template<class V> SIMDPP_INL |
65 | V bit_andnot(const V& a, const V& b) |
66 | { |
67 | V r; |
68 | using E = typename V::element_type; |
69 | using U = typename V::uint_element_type; |
70 | for (unsigned i = 0; i < V::length; i++) { |
71 | U a1 = bit_cast<U, E>(a.el(i)); |
72 | U b1 = bit_cast<U, E>(b.el(i)); |
73 | r.el(i) = bit_cast<E, U>(a1 & ~b1); |
74 | } |
75 | return r; |
76 | } |
77 | |
78 | template<class V, class M> SIMDPP_INL |
79 | V bit_andnot_vm(const V& a, const M& m) |
80 | { |
81 | V r; |
82 | for (unsigned i = 0; i < V::length; i++) { |
83 | r.el(i) = m.el(i) ? 0 : a.el(i); |
84 | } |
85 | return r; |
86 | } |
87 | |
88 | template<class M> SIMDPP_INL |
89 | M bit_andnot_mm(const M& a, const M& b) |
90 | { |
91 | M r; |
92 | for (unsigned i = 0; i < M::length; i++) { |
93 | r.el(i) = a.el(i) & (b.el(i) ^ 1); |
94 | } |
95 | return r; |
96 | } |
97 | |
98 | |
99 | template<class V> SIMDPP_INL |
100 | V bit_or(const V& a, const V& b) |
101 | { |
102 | V r; |
103 | using E = typename V::element_type; |
104 | using U = typename V::uint_element_type; |
105 | for (unsigned i = 0; i < V::length; i++) { |
106 | U a1 = bit_cast<U, E>(a.el(i)); |
107 | U b1 = bit_cast<U, E>(b.el(i)); |
108 | r.el(i) = bit_cast<E, U>(a1 | b1); |
109 | } |
110 | return r; |
111 | } |
112 | |
113 | template<class M> SIMDPP_INL |
114 | M bit_or_mm(const M& a, const M& b) |
115 | { |
116 | M r; |
117 | for (unsigned i = 0; i < M::length; i++) { |
118 | r.el(i) = a.el(i) | b.el(i); |
119 | } |
120 | return r; |
121 | } |
122 | |
123 | template<class V> SIMDPP_INL |
124 | V bit_xor(const V& a, const V& b) |
125 | { |
126 | V r; |
127 | using E = typename V::element_type; |
128 | using U = typename V::uint_element_type; |
129 | for (unsigned i = 0; i < V::length; i++) { |
130 | U a1 = bit_cast<U, E>(a.el(i)); |
131 | U b1 = bit_cast<U, E>(b.el(i)); |
132 | r.el(i) = bit_cast<E, U>(a1 ^ b1); |
133 | } |
134 | return r; |
135 | } |
136 | |
137 | template<class M> SIMDPP_INL |
138 | M bit_xor_mm(const M& a, const M& b) |
139 | { |
140 | M r; |
141 | for (unsigned i = 0; i < M::length; i++) { |
142 | r.el(i) = a.el(i) ^ b.el(i); |
143 | } |
144 | return r; |
145 | } |
146 | |
147 | template<class M> SIMDPP_INL |
148 | M bit_not_mm(const M& a) |
149 | { |
150 | M r; |
151 | for (unsigned i = 0; i < M::length; i++) { |
152 | r.el(i) = a.el(i) ^ 1; |
153 | } |
154 | return r; |
155 | } |
156 | |
157 | template<class V> SIMDPP_INL |
158 | bool test_bits_any(const V& a) |
159 | { |
160 | using U = typename V::uint_element_type; |
161 | U r = 0; |
162 | for (unsigned i = 0; i < a.length; ++i) { |
163 | r |= bit_cast<U>(a.el(i)); |
164 | } |
165 | return r != 0; |
166 | } |
167 | |
168 | static SIMDPP_INL |
169 | uint8_t el_popcnt8(uint8_t v) |
170 | { |
171 | uint8_t m55 = 0x55; |
172 | uint8_t m33 = 0x33; |
173 | uint8_t m0f = 0x0f; |
174 | v = v - ((v >> 1) & m55); |
175 | v = (v & m33) + ((v >> 2) & m33); |
176 | v = (v + (v >> 4)) & m0f; |
177 | return v; |
178 | } |
179 | |
180 | static SIMDPP_INL |
181 | uint16_t el_popcnt16(uint16_t v) |
182 | { |
183 | uint16_t m55 = 0x5555; |
184 | uint16_t m33 = 0x3333; |
185 | uint16_t m0f = 0x0f0f; |
186 | v = v - ((v >> 1) & m55); |
187 | v = (v & m33) + ((v >> 2) & m33); |
188 | v = (v + (v >> 4)) & m0f; |
189 | v = (v + (v >> 8)); |
190 | v = v & 0x00ff; |
191 | return v; |
192 | } |
193 | |
194 | static SIMDPP_INL |
195 | uint32_t el_popcnt32(uint32_t v) |
196 | { |
197 | uint32_t m55 = 0x55555555; |
198 | uint32_t m33 = 0x33333333; |
199 | uint32_t m0f = 0x0f0f0f0f; |
200 | v = v - ((v >> 1) & m55); |
201 | v = (v & m33) + ((v >> 2) & m33); |
202 | v = (v + (v >> 4)) & m0f; |
203 | // rather than doing 2 adds + 2 shifts we can do 1 mul + 1 shift |
204 | v = (v * 0x01010101) >> 24; |
205 | return v; |
206 | } |
207 | |
208 | static SIMDPP_INL |
209 | uint64_t el_popcnt64(uint64_t v) |
210 | { |
211 | uint64_t m55 = 0x5555555555555555; |
212 | uint64_t m33 = 0x3333333333333333; |
213 | uint64_t m0f = 0x0f0f0f0f0f0f0f0f; |
214 | v = v - ((v >> 1) & m55); |
215 | v = (v & m33) + ((v >> 2) & m33); |
216 | v = (v + (v >> 4)) & m0f; |
217 | // rather than doing 3 adds + 3 shifts we can do 1 mul + 1 shift |
218 | v = (v * 0x0101010101010101) >> 56; |
219 | return v; |
220 | } |
221 | |
222 | } // namespace null |
223 | } // namespace detail |
224 | } // namespace SIMDPP_ARCH_NAMESPACE |
225 | } // namespace simdpp |
226 | |
227 | #endif |
228 | #endif |
229 | |