1/*
2 * bitset.h
3 *
4 */
5
6#ifndef INCLUDE_CONTAINERS_BITSET_H_
7#define INCLUDE_CONTAINERS_BITSET_H_
8
9#include <roaring/portability.h>
10#include <roaring/roaring_types.h>
11#include <roaring/utilasm.h>
12#include <stdbool.h>
13#include <stdint.h>
14
15#ifdef USEAVX
16#define ALIGN_AVX __attribute__((aligned(sizeof(__m256i))))
17#else
18#define ALIGN_AVX
19#endif
20
21enum {
22 BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64,
23 BITSET_UNKNOWN_CARDINALITY = -1
24};
25
26struct bitset_container_s {
27 int32_t cardinality;
28 uint64_t *array;
29};
30
31typedef struct bitset_container_s bitset_container_t;
32
33/* Create a new bitset. Return NULL in case of failure. */
34bitset_container_t *bitset_container_create(void);
35
36/* Free memory. */
37void bitset_container_free(bitset_container_t *bitset);
38
39/* Clear bitset (sets bits to 0). */
40void bitset_container_clear(bitset_container_t *bitset);
41
42/* Set all bits to 1. */
43void bitset_container_set_all(bitset_container_t *bitset);
44
45/* Duplicate bitset */
46bitset_container_t *bitset_container_clone(const bitset_container_t *src);
47
48int32_t bitset_container_serialize(const bitset_container_t *container,
49 char *buf) WARN_UNUSED;
50
51uint32_t bitset_container_serialization_len(void);
52
53void *bitset_container_deserialize(const char *buf, size_t buf_len);
54
55/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow
56 * and
57 * should not be used in performance-sensitive code. Ever. */
58void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
59 uint32_t end);
60
61#ifdef ASMBITMANIPOPTIMIZATION
62/* Set the ith bit. */
63static inline void bitset_container_set(bitset_container_t *bitset,
64 uint16_t pos) {
65 uint64_t shift = 6;
66 uint64_t offset;
67 uint64_t p = pos;
68 ASM_SHIFT_RIGHT(p, shift, offset);
69 uint64_t load = bitset->array[offset];
70 ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
71 bitset->array[offset] = load;
72}
73
74/* Unset the ith bit. */
75static inline void bitset_container_unset(bitset_container_t *bitset,
76 uint16_t pos) {
77 uint64_t shift = 6;
78 uint64_t offset;
79 uint64_t p = pos;
80 ASM_SHIFT_RIGHT(p, shift, offset);
81 uint64_t load = bitset->array[offset];
82 ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
83 bitset->array[offset] = load;
84}
85
86/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
87 * than bitset_container_set. */
88static inline bool bitset_container_add(bitset_container_t *bitset,
89 uint16_t pos) {
90 uint64_t shift = 6;
91 uint64_t offset;
92 uint64_t p = pos;
93 ASM_SHIFT_RIGHT(p, shift, offset);
94 uint64_t load = bitset->array[offset];
95 // could be possibly slightly further optimized
96 const int32_t oldcard = bitset->cardinality;
97 ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
98 bitset->array[offset] = load;
99 return bitset->cardinality - oldcard;
100}
101
102/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
103 * slower than bitset_container_unset. */
104static inline bool bitset_container_remove(bitset_container_t *bitset,
105 uint16_t pos) {
106 uint64_t shift = 6;
107 uint64_t offset;
108 uint64_t p = pos;
109 ASM_SHIFT_RIGHT(p, shift, offset);
110 uint64_t load = bitset->array[offset];
111 // could be possibly slightly further optimized
112 const int32_t oldcard = bitset->cardinality;
113 ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
114 bitset->array[offset] = load;
115 return oldcard - bitset->cardinality;
116}
117
118/* Get the value of the ith bit. */
119inline bool bitset_container_get(const bitset_container_t *bitset,
120 uint16_t pos) {
121 uint64_t word = bitset->array[pos >> 6];
122 const uint64_t p = pos;
123 ASM_INPLACESHIFT_RIGHT(word, p);
124 return word & 1;
125}
126
127#else
128
129/* Set the ith bit. */
130static inline void bitset_container_set(bitset_container_t *bitset,
131 uint16_t pos) {
132 const uint64_t old_word = bitset->array[pos >> 6];
133 const int index = pos & 63;
134 const uint64_t new_word = old_word | (UINT64_C(1) << index);
135 bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
136 bitset->array[pos >> 6] = new_word;
137}
138
139/* Unset the ith bit. */
140static inline void bitset_container_unset(bitset_container_t *bitset,
141 uint16_t pos) {
142 const uint64_t old_word = bitset->array[pos >> 6];
143 const int index = pos & 63;
144 const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
145 bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
146 bitset->array[pos >> 6] = new_word;
147}
148
149/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
150 * than bitset_container_set. */
151static inline bool bitset_container_add(bitset_container_t *bitset,
152 uint16_t pos) {
153 const uint64_t old_word = bitset->array[pos >> 6];
154 const int index = pos & 63;
155 const uint64_t new_word = old_word | (UINT64_C(1) << index);
156 const uint64_t increment = (old_word ^ new_word) >> index;
157 bitset->cardinality += (uint32_t)increment;
158 bitset->array[pos >> 6] = new_word;
159 return increment > 0;
160}
161
162/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
163 * slower than bitset_container_unset. */
164static inline bool bitset_container_remove(bitset_container_t *bitset,
165 uint16_t pos) {
166 const uint64_t old_word = bitset->array[pos >> 6];
167 const int index = pos & 63;
168 const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
169 const uint64_t increment = (old_word ^ new_word) >> index;
170 bitset->cardinality -= (uint32_t)increment;
171 bitset->array[pos >> 6] = new_word;
172 return increment > 0;
173}
174
175/* Get the value of the ith bit. */
176inline bool bitset_container_get(const bitset_container_t *bitset,
177 uint16_t pos) {
178 const uint64_t word = bitset->array[pos >> 6];
179 return (word >> (pos & 63)) & 1;
180}
181
182#endif
183
184/*
185* Check if all bits are set in a range of positions from pos_start (included) to
186* pos_end (excluded).
187*/
188static inline bool bitset_container_get_range(const bitset_container_t *bitset,
189 uint32_t pos_start, uint32_t pos_end) {
190
191 const uint32_t start = pos_start >> 6;
192 const uint32_t end = pos_end >> 6;
193
194 const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
195 const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
196
197 if (start == end) return ((bitset->array[end] & first & last) == (first & last));
198 if ((bitset->array[start] & first) != first) return false;
199
200 if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){
201
202 return false;
203 }
204
205 for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
206
207 if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
208 }
209
210 return true;
211}
212
213/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */
214inline bool bitset_container_contains(const bitset_container_t *bitset,
215 uint16_t pos) {
216 return bitset_container_get(bitset, pos);
217}
218
219/*
220* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded)
221* is present in `bitset'. Calls bitset_container_get_all.
222*/
223static inline bool bitset_container_contains_range(const bitset_container_t *bitset,
224 uint32_t pos_start, uint32_t pos_end) {
225 return bitset_container_get_range(bitset, pos_start, pos_end);
226}
227
228/* Get the number of bits set */
229static inline int bitset_container_cardinality(
230 const bitset_container_t *bitset) {
231 return bitset->cardinality;
232}
233
234
235
236
237/* Copy one container into another. We assume that they are distinct. */
238void bitset_container_copy(const bitset_container_t *source,
239 bitset_container_t *dest);
240
241/* Add all the values [min,max) at a distance k*step from min: min,
242 * min+step,.... */
243void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
244 uint32_t max, uint16_t step);
245
246/* Get the number of bits set (force computation). This does not modify bitset.
247 * To update the cardinality, you should do
248 * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/
249int bitset_container_compute_cardinality(const bitset_container_t *bitset);
250
251/* Get whether there is at least one bit set (see bitset_container_empty for the reverse),
252 when the cardinality is unknown, it is computed and stored in the struct */
253static inline bool bitset_container_nonzero_cardinality(
254 bitset_container_t *bitset) {
255 // account for laziness
256 if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
257 // could bail early instead with a nonzero result
258 bitset->cardinality = bitset_container_compute_cardinality(bitset);
259 }
260 return bitset->cardinality > 0;
261}
262
263/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse),
264 * it never modifies the bitset struct. */
265static inline bool bitset_container_empty(
266 const bitset_container_t *bitset) {
267 if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
268 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
269 if((bitset->array[i]) != 0) return false;
270 }
271 return true;
272 }
273 return bitset->cardinality == 0;
274}
275
276
277/* Get whether there is at least one bit set (see bitset_container_empty for the reverse),
278 the bitset is never modified */
279static inline bool bitset_container_const_nonzero_cardinality(
280 const bitset_container_t *bitset) {
281 return !bitset_container_empty(bitset);
282}
283
284/*
285 * Check whether the two bitsets intersect
286 */
287bool bitset_container_intersect(const bitset_container_t *src_1,
288 const bitset_container_t *src_2);
289
290/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
291 * cardinality. */
292int bitset_container_or(const bitset_container_t *src_1,
293 const bitset_container_t *src_2,
294 bitset_container_t *dst);
295
296/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality.
297 */
298int bitset_container_or_justcard(const bitset_container_t *src_1,
299 const bitset_container_t *src_2);
300
301/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
302 * cardinality. Same as bitset_container_or. */
303int bitset_container_union(const bitset_container_t *src_1,
304 const bitset_container_t *src_2,
305 bitset_container_t *dst);
306
307/* Computes the union of bitsets `src_1' and `src_2' and return the
308 * cardinality. Same as bitset_container_or_justcard. */
309int bitset_container_union_justcard(const bitset_container_t *src_1,
310 const bitset_container_t *src_2);
311
312/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
313 * update the cardinality. Provided to optimize chained operations. */
314int bitset_container_or_nocard(const bitset_container_t *src_1,
315 const bitset_container_t *src_2,
316 bitset_container_t *dst);
317
318/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
319 * return the cardinality. */
320int bitset_container_and(const bitset_container_t *src_1,
321 const bitset_container_t *src_2,
322 bitset_container_t *dst);
323
324/* Computes the intersection of bitsets `src_1' and `src_2' and return the
325 * cardinality. */
326int bitset_container_and_justcard(const bitset_container_t *src_1,
327 const bitset_container_t *src_2);
328
329/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
330 * return the cardinality. Same as bitset_container_and. */
331int bitset_container_intersection(const bitset_container_t *src_1,
332 const bitset_container_t *src_2,
333 bitset_container_t *dst);
334
335/* Computes the intersection of bitsets `src_1' and `src_2' and return the
336 * cardinality. Same as bitset_container_and_justcard. */
337int bitset_container_intersection_justcard(const bitset_container_t *src_1,
338 const bitset_container_t *src_2);
339
340/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
341 * not update the cardinality. Provided to optimize chained operations. */
342int bitset_container_and_nocard(const bitset_container_t *src_1,
343 const bitset_container_t *src_2,
344 bitset_container_t *dst);
345
346/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and
347 * return the cardinality. */
348int bitset_container_xor(const bitset_container_t *src_1,
349 const bitset_container_t *src_2,
350 bitset_container_t *dst);
351
352/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the
353 * cardinality. */
354int bitset_container_xor_justcard(const bitset_container_t *src_1,
355 const bitset_container_t *src_2);
356
357/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does
358 * not update the cardinality. Provided to optimize chained operations. */
359int bitset_container_xor_nocard(const bitset_container_t *src_1,
360 const bitset_container_t *src_2,
361 bitset_container_t *dst);
362
363/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the
364 * cardinality. */
365int bitset_container_andnot(const bitset_container_t *src_1,
366 const bitset_container_t *src_2,
367 bitset_container_t *dst);
368
369/* Computes the and not of bitsets `src_1' and `src_2' and return the
370 * cardinality. */
371int bitset_container_andnot_justcard(const bitset_container_t *src_1,
372 const bitset_container_t *src_2);
373
374/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does
375 * not update the cardinality. Provided to optimize chained operations. */
376int bitset_container_andnot_nocard(const bitset_container_t *src_1,
377 const bitset_container_t *src_2,
378 bitset_container_t *dst);
379
380/*
381 * Write out the 16-bit integers contained in this container as a list of 32-bit
382 * integers using base
383 * as the starting value (it might be expected that base has zeros in its 16
384 * least significant bits).
385 * The function returns the number of values written.
386 * The caller is responsible for allocating enough memory in out.
387 * The out pointer should point to enough memory (the cardinality times 32
388 * bits).
389 */
390int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont,
391 uint32_t base);
392
393/*
394 * Print this container using printf (useful for debugging).
395 */
396void bitset_container_printf(const bitset_container_t *v);
397
398/*
399 * Print this container using printf as a comma-separated list of 32-bit
400 * integers starting at base.
401 */
402void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
403 uint32_t base);
404
405/**
406 * Return the serialized size in bytes of a container.
407 */
408static inline int32_t bitset_container_serialized_size_in_bytes(void) {
409 return BITSET_CONTAINER_SIZE_IN_WORDS * 8;
410}
411
412/**
413 * Return the the number of runs.
414 */
415int bitset_container_number_of_runs(bitset_container_t *b);
416
417bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
418 roaring_iterator iterator, void *ptr);
419bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
420 roaring_iterator64 iterator, uint64_t high_bits,
421 void *ptr);
422
423/**
424 * Writes the underlying array to buf, outputs how many bytes were written.
425 * This is meant to be byte-by-byte compatible with the Java and Go versions of
426 * Roaring.
427 * The number of bytes written should be
428 * bitset_container_size_in_bytes(container).
429 */
430int32_t bitset_container_write(const bitset_container_t *container, char *buf);
431
432/**
433 * Reads the instance from buf, outputs how many bytes were read.
434 * This is meant to be byte-by-byte compatible with the Java and Go versions of
435 * Roaring.
436 * The number of bytes read should be bitset_container_size_in_bytes(container).
437 * You need to provide the (known) cardinality.
438 */
439int32_t bitset_container_read(int32_t cardinality,
440 bitset_container_t *container, const char *buf);
441/**
442 * Return the serialized size in bytes of a container (see
443 * bitset_container_write).
444 * This is meant to be compatible with the Java and Go versions of Roaring and
445 * assumes
446 * that the cardinality of the container is already known or can be computed.
447 */
448static inline int32_t bitset_container_size_in_bytes(
449 const bitset_container_t *container) {
450 (void)container;
451 return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
452}
453
454/**
455 * Return true if the two containers have the same content.
456 */
457bool bitset_container_equals(const bitset_container_t *container1,
458 const bitset_container_t *container2);
459
460/**
461* Return true if container1 is a subset of container2.
462*/
463bool bitset_container_is_subset(const bitset_container_t *container1,
464 const bitset_container_t *container2);
465
466/**
467 * If the element of given rank is in this container, supposing that the first
468 * element has rank start_rank, then the function returns true and sets element
469 * accordingly.
470 * Otherwise, it returns false and update start_rank.
471 */
472bool bitset_container_select(const bitset_container_t *container,
473 uint32_t *start_rank, uint32_t rank,
474 uint32_t *element);
475
476/* Returns the smallest value (assumes not empty) */
477uint16_t bitset_container_minimum(const bitset_container_t *container);
478
479/* Returns the largest value (assumes not empty) */
480uint16_t bitset_container_maximum(const bitset_container_t *container);
481
482/* Returns the number of values equal or smaller than x */
483int bitset_container_rank(const bitset_container_t *container, uint16_t x);
484
485/* Returns the index of the first value equal or larger than x, or -1 */
486int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
487#endif /* INCLUDE_CONTAINERS_BITSET_H_ */
488