1/* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_DETAIL_NULL_BITWISE_H
9#define LIBSIMDPP_DETAIL_NULL_BITWISE_H
10#if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
11
12#ifndef LIBSIMDPP_SIMD_H
13 #error "This file must be included through simd.h"
14#endif
15
16#include <simdpp/types.h>
17#include <simdpp/core/cast.h>
18#include <simdpp/detail/null/mask.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace null {
24
25/* *_vm variants accept a vector as the first argument and a mask as the second
26 *_mm variants accept masks as both arguments
27*/
28
29template<class V> SIMDPP_INL
30V bit_and(const V& a, const V& b)
31{
32 V r;
33 using E = typename V::element_type;
34 using U = typename V::uint_element_type;
35 for (unsigned i = 0; i < V::length; i++) {
36 U a1 = bit_cast<U, E>(a.el(i));
37 U b1 = bit_cast<U, E>(b.el(i));
38 r.el(i) = bit_cast<E, U>(a1 & b1);
39 }
40 return r;
41}
42
43template<class V, class M> SIMDPP_INL
44V bit_and_vm(const V& a, const M& m)
45{
46 V r;
47 for (unsigned i = 0; i < V::length; i++) {
48 r.el(i) = m.el(i) ? a.el(i) : 0;
49 }
50 return r;
51}
52
53template<class M> SIMDPP_INL
54M bit_and_mm(const M& a, const M& b)
55{
56 M r;
57 for (unsigned i = 0; i < M::length; i++) {
58 r.el(i) = a.el(i) & b.el(i);
59 }
60 return r;
61}
62
63
64template<class V> SIMDPP_INL
65V bit_andnot(const V& a, const V& b)
66{
67 V r;
68 using E = typename V::element_type;
69 using U = typename V::uint_element_type;
70 for (unsigned i = 0; i < V::length; i++) {
71 U a1 = bit_cast<U, E>(a.el(i));
72 U b1 = bit_cast<U, E>(b.el(i));
73 r.el(i) = bit_cast<E, U>(a1 & ~b1);
74 }
75 return r;
76}
77
78template<class V, class M> SIMDPP_INL
79V bit_andnot_vm(const V& a, const M& m)
80{
81 V r;
82 for (unsigned i = 0; i < V::length; i++) {
83 r.el(i) = m.el(i) ? 0 : a.el(i);
84 }
85 return r;
86}
87
88template<class M> SIMDPP_INL
89M bit_andnot_mm(const M& a, const M& b)
90{
91 M r;
92 for (unsigned i = 0; i < M::length; i++) {
93 r.el(i) = a.el(i) & (b.el(i) ^ 1);
94 }
95 return r;
96}
97
98
99template<class V> SIMDPP_INL
100V bit_or(const V& a, const V& b)
101{
102 V r;
103 using E = typename V::element_type;
104 using U = typename V::uint_element_type;
105 for (unsigned i = 0; i < V::length; i++) {
106 U a1 = bit_cast<U, E>(a.el(i));
107 U b1 = bit_cast<U, E>(b.el(i));
108 r.el(i) = bit_cast<E, U>(a1 | b1);
109 }
110 return r;
111}
112
113template<class M> SIMDPP_INL
114M bit_or_mm(const M& a, const M& b)
115{
116 M r;
117 for (unsigned i = 0; i < M::length; i++) {
118 r.el(i) = a.el(i) | b.el(i);
119 }
120 return r;
121}
122
123template<class V> SIMDPP_INL
124V bit_xor(const V& a, const V& b)
125{
126 V r;
127 using E = typename V::element_type;
128 using U = typename V::uint_element_type;
129 for (unsigned i = 0; i < V::length; i++) {
130 U a1 = bit_cast<U, E>(a.el(i));
131 U b1 = bit_cast<U, E>(b.el(i));
132 r.el(i) = bit_cast<E, U>(a1 ^ b1);
133 }
134 return r;
135}
136
137template<class M> SIMDPP_INL
138M bit_xor_mm(const M& a, const M& b)
139{
140 M r;
141 for (unsigned i = 0; i < M::length; i++) {
142 r.el(i) = a.el(i) ^ b.el(i);
143 }
144 return r;
145}
146
147template<class M> SIMDPP_INL
148M bit_not_mm(const M& a)
149{
150 M r;
151 for (unsigned i = 0; i < M::length; i++) {
152 r.el(i) = a.el(i) ^ 1;
153 }
154 return r;
155}
156
157template<class V> SIMDPP_INL
158bool test_bits_any(const V& a)
159{
160 using U = typename V::uint_element_type;
161 U r = 0;
162 for (unsigned i = 0; i < a.length; ++i) {
163 r |= bit_cast<U>(a.el(i));
164 }
165 return r != 0;
166}
167
168static SIMDPP_INL
169uint8_t el_popcnt8(uint8_t v)
170{
171 uint8_t m55 = 0x55;
172 uint8_t m33 = 0x33;
173 uint8_t m0f = 0x0f;
174 v = v - ((v >> 1) & m55);
175 v = (v & m33) + ((v >> 2) & m33);
176 v = (v + (v >> 4)) & m0f;
177 return v;
178}
179
180static SIMDPP_INL
181uint16_t el_popcnt16(uint16_t v)
182{
183 uint16_t m55 = 0x5555;
184 uint16_t m33 = 0x3333;
185 uint16_t m0f = 0x0f0f;
186 v = v - ((v >> 1) & m55);
187 v = (v & m33) + ((v >> 2) & m33);
188 v = (v + (v >> 4)) & m0f;
189 v = (v + (v >> 8));
190 v = v & 0x00ff;
191 return v;
192}
193
194static SIMDPP_INL
195uint32_t el_popcnt32(uint32_t v)
196{
197 uint32_t m55 = 0x55555555;
198 uint32_t m33 = 0x33333333;
199 uint32_t m0f = 0x0f0f0f0f;
200 v = v - ((v >> 1) & m55);
201 v = (v & m33) + ((v >> 2) & m33);
202 v = (v + (v >> 4)) & m0f;
203 // rather than doing 2 adds + 2 shifts we can do 1 mul + 1 shift
204 v = (v * 0x01010101) >> 24;
205 return v;
206}
207
208static SIMDPP_INL
209uint64_t el_popcnt64(uint64_t v)
210{
211 uint64_t m55 = 0x5555555555555555;
212 uint64_t m33 = 0x3333333333333333;
213 uint64_t m0f = 0x0f0f0f0f0f0f0f0f;
214 v = v - ((v >> 1) & m55);
215 v = (v & m33) + ((v >> 2) & m33);
216 v = (v + (v >> 4)) & m0f;
217 // rather than doing 3 adds + 3 shifts we can do 1 mul + 1 shift
218 v = (v * 0x0101010101010101) >> 56;
219 return v;
220}
221
222} // namespace null
223} // namespace detail
224} // namespace SIMDPP_ARCH_NAMESPACE
225} // namespace simdpp
226
227#endif
228#endif
229