1/*
2A C++ header for Roaring Bitmaps.
3*/
4#ifndef INCLUDE_ROARING_HH_
5#define INCLUDE_ROARING_HH_
6
7#include <stdarg.h>
8
9#include <roaring/roaring.h>
10#include <algorithm>
11#include <new>
12#include <stdexcept>
13#include <string>
14
15class RoaringSetBitForwardIterator;
16
17class Roaring {
18 public:
19 /**
20 * Create an empty bitmap
21 */
22 Roaring() {
23 ra_init(&roaring.high_low_container);
24 }
25
26 /**
27 * Construct a bitmap from a list of integer values.
28 */
29 Roaring(size_t n, const uint32_t *data) : Roaring() {
30 roaring_bitmap_add_many(&roaring, n, data);
31 }
32
33 /**
34 * Copy constructor
35 */
36 Roaring(const Roaring &r) {
37 bool is_ok =
38 ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
39 roaring_bitmap_get_copy_on_write(&r.roaring));
40 if (!is_ok) {
41 throw std::runtime_error("failed memory alloc in constructor");
42 }
43 roaring_bitmap_set_copy_on_write(&roaring,
44 roaring_bitmap_get_copy_on_write(&r.roaring));
45 }
46
47 /**
48 * Move constructor. The moved object remains valid, i.e.
49 * all methods can still be called on it.
50 */
51 Roaring(Roaring &&r) noexcept {
52 roaring = std::move(r.roaring);
53 ra_init(&r.roaring.high_low_container);
54 }
55
56 /**
57 * Construct a roaring object from the C struct.
58 *
59 * Passing a NULL point is unsafe.
60 * the pointer to the C struct will be invalid after the call.
61 */
62 Roaring(roaring_bitmap_t *s) noexcept {
63 // steal the interior struct
64 roaring.high_low_container = s->high_low_container;
65 // deallocate the old container
66 free(s);
67 }
68
69 /**
70 * Construct a bitmap from a list of integer values.
71 */
72 static Roaring bitmapOf(size_t n, ...) {
73 Roaring ans;
74 va_list vl;
75 va_start(vl, n);
76 for (size_t i = 0; i < n; i++) {
77 ans.add(va_arg(vl, uint32_t));
78 }
79 va_end(vl);
80 return ans;
81 }
82
83 /**
84 * Add value x
85 *
86 */
87 void add(uint32_t x) { roaring_bitmap_add(&roaring, x); }
88
89 /**
90 * Add value x
91 * Returns true if a new value was added, false if the value was already existing.
92 */
93 bool addChecked(uint32_t x) {
94 return roaring_bitmap_add_checked(&roaring, x);
95 }
96
97 /**
98 * add if all values from x (included) to y (excluded)
99 */
100 void addRange(const uint64_t x, const uint64_t y) {
101 return roaring_bitmap_add_range(&roaring, x, y);
102 }
103
104 /**
105 * Add value n_args from pointer vals
106 *
107 */
108 void addMany(size_t n_args, const uint32_t *vals) {
109 roaring_bitmap_add_many(&roaring, n_args, vals);
110 }
111
112 /**
113 * Remove value x
114 *
115 */
116 void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); }
117
118 /**
119 * Remove value x
120 * Returns true if a new value was removed, false if the value was not existing.
121 */
122 bool removeChecked(uint32_t x) {
123 return roaring_bitmap_remove_checked(&roaring, x);
124 }
125
126 /**
127 * Return the largest value (if not empty)
128 *
129 */
130 uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); }
131
132 /**
133 * Return the smallest value (if not empty)
134 *
135 */
136 uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); }
137
138 /**
139 * Check if value x is present
140 */
141 bool contains(uint32_t x) const {
142 return roaring_bitmap_contains(&roaring, x);
143 }
144
145 /**
146 * Check if all values from x (included) to y (excluded) are present
147 */
148 bool containsRange(const uint64_t x, const uint64_t y) const {
149 return roaring_bitmap_contains_range(&roaring, x, y);
150 }
151
152 /**
153 * Destructor
154 */
155 ~Roaring() { ra_clear(&roaring.high_low_container); }
156
157 /**
158 * Copies the content of the provided bitmap, and
159 * discard the current content.
160 */
161 Roaring &operator=(const Roaring &r) {
162 ra_clear(&roaring.high_low_container);
163 bool is_ok =
164 ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
165 roaring_bitmap_get_copy_on_write(&r.roaring));
166 if (!is_ok) {
167 throw std::runtime_error("failed memory alloc in assignment");
168 }
169 roaring_bitmap_set_copy_on_write(&roaring,
170 roaring_bitmap_get_copy_on_write(&r.roaring));
171 return *this;
172 }
173
174 /**
175 * Moves the content of the provided bitmap, and
176 * discard the current content.
177 */
178 Roaring &operator=(Roaring &&r) noexcept {
179 ra_clear(&roaring.high_low_container);
180 roaring = std::move(r.roaring);
181 ra_init(&r.roaring.high_low_container);
182 return *this;
183 }
184
185 /**
186 * Compute the intersection between the current bitmap and the provided
187 * bitmap,
188 * writing the result in the current bitmap. The provided bitmap is not
189 * modified.
190 */
191 Roaring &operator&=(const Roaring &r) {
192 roaring_bitmap_and_inplace(&roaring, &r.roaring);
193 return *this;
194 }
195
196 /**
197 * Compute the difference between the current bitmap and the provided
198 * bitmap,
199 * writing the result in the current bitmap. The provided bitmap is not
200 * modified.
201 */
202 Roaring &operator-=(const Roaring &r) {
203 roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
204 return *this;
205 }
206
207 /**
208 * Compute the union between the current bitmap and the provided bitmap,
209 * writing the result in the current bitmap. The provided bitmap is not
210 * modified.
211 *
212 * See also the fastunion function to aggregate many bitmaps more quickly.
213 */
214 Roaring &operator|=(const Roaring &r) {
215 roaring_bitmap_or_inplace(&roaring, &r.roaring);
216 return *this;
217 }
218
219 /**
220 * Compute the symmetric union between the current bitmap and the provided
221 * bitmap,
222 * writing the result in the current bitmap. The provided bitmap is not
223 * modified.
224 */
225 Roaring &operator^=(const Roaring &r) {
226 roaring_bitmap_xor_inplace(&roaring, &r.roaring);
227 return *this;
228 }
229
230 /**
231 * Exchange the content of this bitmap with another.
232 */
233 void swap(Roaring &r) { std::swap(r.roaring, roaring); }
234
235 /**
236 * Get the cardinality of the bitmap (number of elements).
237 */
238 uint64_t cardinality() const {
239 return roaring_bitmap_get_cardinality(&roaring);
240 }
241
242 /**
243 * Returns true if the bitmap is empty (cardinality is zero).
244 */
245 bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); }
246
247 /**
248 * Returns true if the bitmap is subset of the other.
249 */
250 bool isSubset(const Roaring &r) const {
251 return roaring_bitmap_is_subset(&roaring, &r.roaring);
252 }
253
254 /**
255 * Returns true if the bitmap is strict subset of the other.
256 */
257 bool isStrictSubset(const Roaring &r) const {
258 return roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
259 }
260
261 /**
262 * Convert the bitmap to an array. Write the output to "ans",
263 * caller is responsible to ensure that there is enough memory
264 * allocated
265 * (e.g., ans = new uint32[mybitmap.cardinality()];)
266 */
267 void toUint32Array(uint32_t *ans) const {
268 roaring_bitmap_to_uint32_array(&roaring, ans);
269 }
270 /**
271 * to int array with pagination
272 *
273 */
274 void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const {
275 roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
276 }
277
278 /**
279 * Return true if the two bitmaps contain the same elements.
280 */
281 bool operator==(const Roaring &r) const {
282 return roaring_bitmap_equals(&roaring, &r.roaring);
283 }
284
285 /**
286 * compute the negation of the roaring bitmap within a specified interval.
287 * areas outside the range are passed through unchanged.
288 */
289 void flip(uint64_t range_start, uint64_t range_end) {
290 roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
291 }
292
293 /**
294 * Remove run-length encoding even when it is more space efficient
295 * return whether a change was applied
296 */
297 bool removeRunCompression() {
298 return roaring_bitmap_remove_run_compression(&roaring);
299 }
300
301 /** convert array and bitmap containers to run containers when it is more
302 * efficient;
303 * also convert from run containers when more space efficient. Returns
304 * true if the result has at least one run container.
305 * Additional savings might be possible by calling shrinkToFit().
306 */
307 bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); }
308
309 /**
310 * If needed, reallocate memory to shrink the memory usage. Returns
311 * the number of bytes saved.
312 */
313 size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); }
314
315 /**
316 * Iterate over the bitmap elements. The function iterator is called once for
317 * all the values with ptr (can be NULL) as the second parameter of each call.
318 *
319 * roaring_iterator is simply a pointer to a function that returns bool
320 * (true means that the iteration should continue while false means that it
321 * should stop), and takes (uint32_t,void*) as inputs.
322 */
323 void iterate(roaring_iterator iterator, void *ptr) const {
324 roaring_iterate(&roaring, iterator, ptr);
325 }
326
327 /**
328 * If the size of the roaring bitmap is strictly greater than rank, then
329 * this function returns true and set element to the element of given rank.
330 * Otherwise, it returns false.
331 */
332 bool select(uint32_t rnk, uint32_t *element) const {
333 return roaring_bitmap_select(&roaring, rnk, element);
334 }
335
336 /**
337 * Computes the size of the intersection between two bitmaps.
338 *
339 */
340 uint64_t and_cardinality(const Roaring &r) const {
341 return roaring_bitmap_and_cardinality(&roaring, &r.roaring);
342 }
343
344 /**
345 * Check whether the two bitmaps intersect.
346 *
347 */
348 bool intersect(const Roaring &r) const {
349 return roaring_bitmap_intersect(&roaring, &r.roaring);
350 }
351
352 /**
353 * Computes the Jaccard index between two bitmaps. (Also known as the
354 * Tanimoto distance,
355 * or the Jaccard similarity coefficient)
356 *
357 * The Jaccard index is undefined if both bitmaps are empty.
358 *
359 */
360 double jaccard_index(const Roaring &r) const {
361 return roaring_bitmap_jaccard_index(&roaring, &r.roaring);
362 }
363
364 /**
365 * Computes the size of the union between two bitmaps.
366 *
367 */
368 uint64_t or_cardinality(const Roaring &r) const {
369 return roaring_bitmap_or_cardinality(&roaring, &r.roaring);
370 }
371
372 /**
373 * Computes the size of the difference (andnot) between two bitmaps.
374 *
375 */
376 uint64_t andnot_cardinality(const Roaring &r) const {
377 return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
378 }
379
380 /**
381 * Computes the size of the symmetric difference (andnot) between two
382 * bitmaps.
383 *
384 */
385 uint64_t xor_cardinality(const Roaring &r) const {
386 return roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
387 }
388
389 /**
390 * Returns the number of integers that are smaller or equal to x.
391 */
392 uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); }
393
394 /**
395 * write a bitmap to a char buffer. This is meant to be compatible with
396 * the
397 * Java and Go versions. Returns how many bytes were written which should be
398 * getSizeInBytes().
399 *
400 * Setting the portable flag to false enable a custom format that
401 * can save space compared to the portable format (e.g., for very
402 * sparse bitmaps).
403 *
404 * Boost users can serialize bitmaps in this manner:
405 *
406 * BOOST_SERIALIZATION_SPLIT_FREE(Roaring)
407 * namespace boost {
408 * namespace serialization {
409 *
410 * template <class Archive>
411 * void save(Archive& ar, const Roaring& bitmask,
412 * const unsigned int version) {
413 * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes();
414 * std::vector<char> buffer(expected_size_in_bytes);
415 * std::size_t size_in_bytes = bitmask.write(buffer.data());
416 *
417 * ar& size_in_bytes;
418 * ar& boost::serialization::make_binary_object(buffer.data(),
419 * size_in_bytes);
420 * }
421 * template <class Archive>
422 * void load(Archive& ar, Roaring& bitmask,
423 * const unsigned int version) {
424 * std::size_t size_in_bytes = 0;
425 * ar& size_in_bytes;
426 * std::vector<char> buffer(size_in_bytes);
427 * ar& boost::serialization::make_binary_object(buffer.data(),
428 * size_in_bytes);
429 * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes);
430 *}
431 *} // namespace serialization
432 *} // namespace boost
433 */
434 size_t write(char *buf, bool portable = true) const {
435 if (portable)
436 return roaring_bitmap_portable_serialize(&roaring, buf);
437 else
438 return roaring_bitmap_serialize(&roaring, buf);
439 }
440
441 /**
442 * read a bitmap from a serialized version. This is meant to be compatible
443 * with the Java and Go versions.
444 *
445 * Setting the portable flag to false enable a custom format that
446 * can save space compared to the portable format (e.g., for very
447 * sparse bitmaps).
448 *
449 * This function is unsafe in the sense that if you provide bad data,
450 * many, many bytes could be read. See also readSafe.
451 */
452 static Roaring read(const char *buf, bool portable = true) {
453 roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf);
454 if (r == NULL) {
455 throw std::runtime_error("failed alloc while reading");
456 }
457 return Roaring(r);
458 }
459 /**
460 * read a bitmap from a serialized version, reading no more than maxbytes bytes.
461 * This is meant to be compatible with the Java and Go versions.
462 *
463 */
464 static Roaring readSafe(const char *buf, size_t maxbytes) {
465 roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes);
466 if (r == NULL) {
467 throw std::runtime_error("failed alloc while reading");
468 }
469 return Roaring(r);
470 }
471 /**
472 * How many bytes are required to serialize this bitmap (meant to be
473 * compatible
474 * with Java and Go versions)
475 *
476 * Setting the portable flag to false enable a custom format that
477 * can save space compared to the portable format (e.g., for very
478 * sparse bitmaps).
479 */
480 size_t getSizeInBytes(bool portable = true) const {
481 if (portable)
482 return roaring_bitmap_portable_size_in_bytes(&roaring);
483 else
484 return roaring_bitmap_size_in_bytes(&roaring);
485 }
486
487 /**
488 * Computes the intersection between two bitmaps and returns new bitmap.
489 * The current bitmap and the provided bitmap are unchanged.
490 */
491 Roaring operator&(const Roaring &o) const {
492 roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring);
493 if (r == NULL) {
494 throw std::runtime_error("failed materalization in and");
495 }
496 return Roaring(r);
497 }
498
499 /**
500 * Computes the difference between two bitmaps and returns new bitmap.
501 * The current bitmap and the provided bitmap are unchanged.
502 */
503 Roaring operator-(const Roaring &o) const {
504 roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring);
505 if (r == NULL) {
506 throw std::runtime_error("failed materalization in andnot");
507 }
508 return Roaring(r);
509 }
510
511 /**
512 * Computes the union between two bitmaps and returns new bitmap.
513 * The current bitmap and the provided bitmap are unchanged.
514 */
515 Roaring operator|(const Roaring &o) const {
516 roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring);
517 if (r == NULL) {
518 throw std::runtime_error("failed materalization in or");
519 }
520 return Roaring(r);
521 }
522
523 /**
524 * Computes the symmetric union between two bitmaps and returns new bitmap.
525 * The current bitmap and the provided bitmap are unchanged.
526 */
527 Roaring operator^(const Roaring &o) const {
528 roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring);
529 if (r == NULL) {
530 throw std::runtime_error("failed materalization in xor");
531 }
532 return Roaring(r);
533 }
534
535 /**
536 * Whether or not we apply copy and write.
537 */
538 void setCopyOnWrite(bool val) {
539 roaring_bitmap_set_copy_on_write(&roaring, val);
540 }
541
542 /**
543 * Print the content of the bitmap
544 */
545 void printf() const { roaring_bitmap_printf(&roaring); }
546
547 /**
548 * Print the content of the bitmap into a string
549 */
550 std::string toString() const {
551 struct iter_data {
552 std::string str;
553 char first_char = '{';
554 } outer_iter_data;
555 if (!isEmpty()) {
556 iterate(
557 [](uint32_t value, void *inner_iter_data) -> bool {
558 ((iter_data *)inner_iter_data)->str +=
559 ((iter_data *)inner_iter_data)->first_char;
560 ((iter_data *)inner_iter_data)->str +=
561 std::to_string(value);
562 ((iter_data *)inner_iter_data)->first_char = ',';
563 return true;
564 },
565 (void *)&outer_iter_data);
566 } else
567 outer_iter_data.str = '{';
568 outer_iter_data.str += '}';
569 return outer_iter_data.str;
570 }
571
572 /**
573 * Whether or not copy and write is active.
574 */
575 bool getCopyOnWrite() const {
576 return roaring_bitmap_get_copy_on_write(&roaring);
577 }
578
579 /**
580 * computes the logical or (union) between "n" bitmaps (referenced by a
581 * pointer).
582 */
583 static Roaring fastunion(size_t n, const Roaring **inputs) {
584 const roaring_bitmap_t **x =
585 (const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *));
586 if (x == NULL) {
587 throw std::runtime_error("failed memory alloc in fastunion");
588 }
589 for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring;
590
591 roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x);
592 if (c_ans == NULL) {
593 free(x);
594 throw std::runtime_error("failed memory alloc in fastunion");
595 }
596 Roaring ans(c_ans);
597 free(x);
598 return ans;
599 }
600
601 typedef RoaringSetBitForwardIterator const_iterator;
602
603 /**
604 * Returns an iterator that can be used to access the position of the
605 * set bits. The running time complexity of a full scan is proportional to
606 * the
607 * number
608 * of set bits: be aware that if you have long strings of 1s, this can be
609 * very inefficient.
610 *
611 * It can be much faster to use the toArray method if you want to
612 * retrieve the set bits.
613 */
614 const_iterator begin() const;
615
616 /**
617 * A bogus iterator that can be used together with begin()
618 * for constructions such as for(auto i = b.begin();
619 * i!=b.end(); ++i) {}
620 */
621 const_iterator &end() const;
622
623 roaring_bitmap_t roaring;
624};
625
626/**
627 * Used to go through the set bits. Not optimally fast, but convenient.
628 */
629class RoaringSetBitForwardIterator final {
630 public:
631 typedef std::forward_iterator_tag iterator_category;
632 typedef uint32_t *pointer;
633 typedef uint32_t &reference_type;
634 typedef uint32_t value_type;
635 typedef int32_t difference_type;
636 typedef RoaringSetBitForwardIterator type_of_iterator;
637
638 /**
639 * Provides the location of the set bit.
640 */
641 value_type operator*() const { return i.current_value; }
642
643 bool operator<(const type_of_iterator &o) {
644 if (!i.has_value) return false;
645 if (!o.i.has_value) return true;
646 return i.current_value < *o;
647 }
648
649 bool operator<=(const type_of_iterator &o) {
650 if (!o.i.has_value) return true;
651 if (!i.has_value) return false;
652 return i.current_value <= *o;
653 }
654
655 bool operator>(const type_of_iterator &o) {
656 if (!o.i.has_value) return false;
657 if (!i.has_value) return true;
658 return i.current_value > *o;
659 }
660
661 bool operator>=(const type_of_iterator &o) {
662 if (!i.has_value) return true;
663 if (!o.i.has_value) return false;
664 return i.current_value >= *o;
665 }
666
667 /**
668 * Move the iterator to the first value >= val.
669 */
670 void equalorlarger(uint32_t val) {
671 roaring_move_uint32_iterator_equalorlarger(&i,val);
672 }
673
674 type_of_iterator &operator++() { // ++i, must returned inc. value
675 roaring_advance_uint32_iterator(&i);
676 return *this;
677 }
678
679 type_of_iterator operator++(int) { // i++, must return orig. value
680 RoaringSetBitForwardIterator orig(*this);
681 roaring_advance_uint32_iterator(&i);
682 return orig;
683 }
684
685 type_of_iterator& operator--() { // prefix --
686 roaring_previous_uint32_iterator(&i);
687 return *this;
688 }
689
690 type_of_iterator operator--(int) { // postfix --
691 RoaringSetBitForwardIterator orig(*this);
692 roaring_previous_uint32_iterator(&i);
693 return orig;
694 }
695
696 bool operator==(const RoaringSetBitForwardIterator &o) const {
697 return i.current_value == *o && i.has_value == o.i.has_value;
698 }
699
700 bool operator!=(const RoaringSetBitForwardIterator &o) const {
701 return i.current_value != *o || i.has_value != o.i.has_value;
702 }
703
704 RoaringSetBitForwardIterator(const Roaring &parent,
705 bool exhausted = false) {
706 if (exhausted) {
707 i.parent = &parent.roaring;
708 i.container_index = INT32_MAX;
709 i.has_value = false;
710 i.current_value = UINT32_MAX;
711 } else {
712 roaring_init_iterator(&parent.roaring, &i);
713 }
714 }
715
716 roaring_uint32_iterator_t i;
717};
718
719inline RoaringSetBitForwardIterator Roaring::begin() const {
720 return RoaringSetBitForwardIterator(*this);
721}
722
723inline RoaringSetBitForwardIterator &Roaring::end() const {
724 static RoaringSetBitForwardIterator e(*this, true);
725 return e;
726}
727
728#endif /* INCLUDE_ROARING_HH_ */
729