bitfield.h source code [ClickHouse/contrib/hyperscan/src/util/bitfield.h]

1	/*
2	* Copyright (c) 2015-2018, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Fast bitset class with find_first and find_next operations.
31	*/
32
33	#ifndef BITFIELD_H
34	#define BITFIELD_H
35
36	#include "ue2common.h"
37	#include "popcount.h"
38	#include "util/bitutils.h"
39	#include "util/hash.h"
40
41	#include <array>
42	#include <cassert>
43
44	#include <boost/dynamic_bitset.hpp>
45
46	namespace ue2 {
47
48	/**
49	* \brief Templated bitset class with find_first and find_next operations.
50	*
51	* This is a simple (but hopefully fast) class to replace our use of
52	* std::bitset<>.
53	*
54	* Note: underlying storage is allocated as an array of 64-bit blocks. All
55	* mutating operations MUST ensure that the trailer (the bits between
56	* requested_size and the end of the array) is filled with zeroes; there's a
57	* clear_trailer member function for this.
58	*/
59	template<size_t requested_size>
60	class bitfield {
61	public:
62	/// Empty constructor, zero initializes all bits.
63	bitfield() : bits{{`0`}} {
64	assert(none());
65	}
66
67	bitfield(const boost::dynamic_bitset<> &a) : bits{{`0`}} {
68	assert(a.size() == requested_size);
69	assert(none());
70	for (auto i = a.find_first(); i != a.npos; i = a.find_next(i)) {
71	set(i);
72	}
73	}
74
75	/// Complete bitset equality.
76	bool operator==(const bitfield &a) const {
77	return bits == a.bits;
78	}
79
80	/// Inequality.
81	bool operator!=(const bitfield &a) const {
82	return bits != a.bits;
83	}
84
85	/// Ordering.
86	bool operator<(const bitfield &a) const {
87	return bits < a.bits;
88	}
89
90	/// Set all bits.
91	void setall() {
92	for (auto &e : bits) {
93	e = all_ones;
94	}
95	clear_trailer();
96	}
97
98	/// Set all bits (alias for bitset::setall, to match dynamic_bitset).
99	void set() {
100	setall();
101	}
102
103	/// Clear all bits.
104	void clear() {
105	for (auto &e : bits) {
106	e = `0`;
107	}
108	}
109
110	/// Clear all bits (alias for bitset::clear).
111	void reset() {
112	clear();
113	}
114
115	/// Clear bit N.
116	void clear(size_t n) {
117	assert(n < size());
118	bits[getword(n)] &= ~maskbit(n);
119	}
120
121	/// Set bit N.
122	void set(size_t n) {
123	assert(n < size());
124	bits[getword(n)] \|= maskbit(n);
125	}
126
127	/// Test bit N.
128	bool test(size_t n) const {
129	assert(n < size());
130	return bits[getword(n)] & maskbit(n);
131	}
132
133	/// Flip bit N.
134	void flip(size_t n) {
135	assert(n < size());
136	bits[getword(n)] ^= maskbit(n);
137	}
138
139	/// Flip all bits.
140	void flip() {
141	for (auto &e : bits) {
142	e = ~e;
143	}
144	clear_trailer();
145	}
146
147	/// Switch on the bit in the range [from, to], inclusive.
148	void set_range(size_t from, size_t to) {
149	assert(from <= to);
150	assert(to < requested_size);
151
152	if (from / block_size == to / block_size) {
153	// Small case, our indices are in the same block.
154	block_type block = all_ones << (from % block_size);
155	if (to % block_size != block_size - `1`) {
156	block &= maskbit(to + `1`) - `1`;
157	}
158	bits[from / block_size] \|= block;
159	return;
160	}
161
162	// Large case, work in block units. Write a partial mask, then a
163	// run of all-ones blocks, then a partial mask at the end.
164	size_t i = from;
165	if (i % block_size) {
166	block_type block = all_ones << (i % block_size);
167	bits[i / block_size] \|= block;
168	i = ROUNDUP_N(i, block_size);
169	}
170
171	for (; i + block_size <= to + `1`; i += block_size) {
172	bits[i / block_size] = all_ones;
173	}
174
175	if (i <= to) {
176	assert(to - i + `1` < block_size);
177	bits[i / block_size] \|= (maskbit(to + `1`) - `1`);
178	}
179	}
180
181	/// Returns total number of bits.
182	static constexpr size_t size() {
183	return requested_size;
184	}
185
186	/// Returns number of bits set on.
187	size_t count() const {
188	static_assert(block_size == `64`, "adjust popcount for block_type");
189	size_t sum = `0`;
190	size_t i = `0`;
191	for (; i + `4` <= num_blocks; i += `4`) {
192	sum += popcount64(bits[i]);
193	sum += popcount64(bits[i + `1`]);
194	sum += popcount64(bits[i + `2`]);
195	sum += popcount64(bits[i + `3`]);
196	}
197	for (; i < num_blocks; i++) {
198	sum += popcount64(bits[i]);
199	}
200	assert(sum <= size());
201	return sum;
202	}
203
204	/// Are no bits set?
205	bool none() const {
206	for (const auto &e : bits) {
207	if (e != `0`) {
208	return false;
209	}
210	}
211	return true;
212	}
213
214	/// Is any bit set?
215	bool any() const {
216	return !none();
217	}
218
219	/// Are all bits set?
220	bool all() const {
221	for (size_t i = `0`; i < bits.size() - `1`; i++) {
222	if (bits[i] != all_ones) {
223	return false;
224	}
225	}
226	size_t rem = requested_size % block_size;
227	block_type exp = rem ? ((block_type{`1`} << rem) - `1`) : all_ones;
228	return *bits.rbegin() == exp;
229	}
230
231	/// Returns first bit set, or bitfield::npos if none set.
232	size_t find_first() const {
233	for (size_t i = `0`; i < bits.size(); i++) {
234	if (bits[i] != `0`) {
235	return (i * block_size) + word_ctz(i);
236	}
237	}
238	return npos;
239	}
240
241	// Returns last bit set, or bitfield::npos if none set.
242	size_t find_last() const {
243	for (int i = bits.size() - `1`; i >= `0`; i--) {
244	if (bits[i]) {
245	static_assert(block_size == `64`, "adjust clz for block_type");
246	return (i * block_size) + block_size - `1` - clz64(bits[i]);
247	}
248	}
249	return npos;
250	}
251
252	/// Returns next bit set, or bitfield::npos if none set after 'last'.
253	size_t find_next(size_t last) const {
254	if (last >= size()) {
255	return npos;
256	}
257
258	// check current word.
259	size_t i = getword(last);
260	block_type lastword = bits[i];
261
262	if ((last % block_size) != (block_size - `1`)) {
263	lastword &= (all_ones << ((last % block_size) + `1`));
264
265	if (lastword) {
266	static_assert(block_size == `64`, "adjust ctz for block_type");
267	return (i * block_size) + ctz64(lastword);
268	}
269	}
270
271	// check the rest.
272	for (i++; i < bits.size(); i++) {
273	if (bits[i]) {
274	return (i * block_size) + word_ctz(i);
275	}
276	}
277
278	return npos;
279	}
280
281	size_t find_nth(size_t n) const {
282	assert(n < npos);
283
284	static_assert(block_size == `64`, "adjust for block_type");
285
286	size_t sum = `0`;
287	for (size_t i = `0`; i < bits.size(); i++) {
288	block_type block = bits[i];
289	size_t aftersum = sum + popcount64(block);
290	if (aftersum > n) { // Block contains the nth bit.
291	for (; sum < n; sum++) {
292	assert(block);
293	block &= (block - `1`);
294	}
295	assert(block);
296	size_t bit = (i * block_size) + ctz64(block);
297	assert(test(bit));
298	return bit;
299	}
300	sum = aftersum;
301	}
302
303	assert(count() < n + `1`);
304	return npos;
305	}
306
307	/// Bitwise OR.
308	bitfield operator\|(const bitfield &a) const {
309	bitfield b = a;
310	b \|= *this;
311	return b;
312	}
313
314	/// Bitwise OR-equals.
315	void operator\|=(const bitfield &a) {
316	size_t i = `0`;
317	for (; i + `4` <= num_blocks; i += `4`) {
318	bits[i] \|= a.bits[i];
319	bits[i + `1`] \|= a.bits[i + `1`];
320	bits[i + `2`] \|= a.bits[i + `2`];
321	bits[i + `3`] \|= a.bits[i + `3`];
322	}
323	for (; i < num_blocks; i++) {
324	bits[i] \|= a.bits[i];
325	}
326	}
327
328	/// Bitwise AND.
329	bitfield operator&(const bitfield &a) const {
330	bitfield b = a;
331	b &= *this;
332	return b;
333	}
334
335	/// Bitwise AND-equals.
336	void operator&=(const bitfield &a) {
337	size_t i = `0`;
338	for (; i + `4` <= num_blocks; i += `4`) {
339	bits[i] &= a.bits[i];
340	bits[i + `1`] &= a.bits[i + `1`];
341	bits[i + `2`] &= a.bits[i + `2`];
342	bits[i + `3`] &= a.bits[i + `3`];
343	}
344	for (; i < num_blocks; i++) {
345	bits[i] &= a.bits[i];
346	}
347	}
348
349	/// Bitwise XOR.
350	bitfield operator^(bitfield a) const {
351	a ^= *this;
352	return a;
353	}
354
355	/// Bitwise XOR-equals.
356	void operator^=(bitfield a) {
357	size_t i = `0`;
358	for (; i + `4` <= num_blocks; i += `4`) {
359	bits[i] ^= a.bits[i];
360	bits[i + `1`] ^= a.bits[i + `1`];
361	bits[i + `2`] ^= a.bits[i + `2`];
362	bits[i + `3`] ^= a.bits[i + `3`];
363	}
364	for (; i < num_blocks; i++) {
365	bits[i] ^= a.bits[i];
366	}
367	}
368
369	/// Bitwise complement.
370	bitfield operator~(void) const {
371	bitfield cr(*this);
372	cr.flip();
373	return cr;
374	}
375
376	/// Simple hash.
377	size_t hash() const {
378	return ue2_hasher ()(bits);
379	}
380
381	/// Sentinel value meaning "no more bits", used by find_first and
382	/// find_next.
383	static constexpr size_t npos = requested_size;
384
385	private:
386	/// Underlying block type.
387	using block_type = u64a;
388
389	/// A block filled with on bits.
390	static constexpr block_type all_ones = ~block_type{`0`};
391
392	/// Size of a block.
393	static constexpr size_t block_size = sizeof(block_type) * `8`;
394
395	static size_t getword(size_t n) {
396	return n / block_size;
397	}
398
399	static block_type maskbit(size_t n) {
400	return (block_type{`1`} << (n % block_size));
401	}
402
403	size_t word_ctz(size_t n) const {
404	static_assert(block_size == `64`, "adjust ctz call for block type");
405	return ctz64(bits[n]);
406	}
407
408	/// Ensures that bits between our requested size and the end of storage are
409	/// zero.
410	void clear_trailer() {
411	size_t final_bits = requested_size % block_size;
412	if (final_bits) {
413	bits.back() &= ((block_type{`1`} << final_bits) - `1`);
414	}
415	}
416
417	/// Size of storage array of blocks.
418	static constexpr size_t num_blocks =
419	(requested_size + block_size - `1`) / block_size;
420
421	/// Underlying storage.
422	std::array<block_type, num_blocks> bits;
423	};
424
425	} // namespace ue2
426
427	namespace std {
428
429	template<size_t requested_size>
430	struct hash<ue2::bitfield<requested_size>> {
431	size_t operator()(const ue2::bitfield<requested_size> &b) const {
432	return b.hash();
433	}
434	};
435
436	} // namespace std
437
438	#endif // BITFIELD_H
439

Browse the source code of ClickHouse/contrib/hyperscan/src/util/bitfield.h