1#pragma once
2
3#include <algorithm>
4#include <IO/ReadHelpers.h>
5#include <IO/WriteHelpers.h>
6#include <boost/noncopyable.hpp>
7#include <Common/HashTable/SmallTable.h>
8#include <Common/PODArray.h>
9
10// Include this header last, because it is an auto-generated dump of questionable
11// garbage that breaks the build (e.g. it changes _POSIX_C_SOURCE).
12// TODO: find out what it is. On github, they have proper inteface headers like
13// this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h
14#pragma GCC diagnostic push
15#pragma GCC diagnostic warning "-Wold-style-cast"
16#include <roaring/roaring.h>
17#pragma GCC diagnostic pop
18
19namespace DB
20{
21/**
22 * For a small number of values - an array of fixed size "on the stack".
23 * For large, roaring_bitmap_t is allocated.
24 * For a description of the roaring_bitmap_t, see: https://github.com/RoaringBitmap/CRoaring
25 */
26template <typename T, UInt8 small_set_size>
27class RoaringBitmapWithSmallSet : private boost::noncopyable
28{
29private:
30 using Small = SmallSet<T, small_set_size>;
31 using ValueBuffer = std::vector<T>;
32 Small small;
33 roaring_bitmap_t * rb = nullptr;
34
35 void toLarge()
36 {
37 rb = roaring_bitmap_create();
38
39 for (const auto & x : small)
40 roaring_bitmap_add(rb, x.getValue());
41 }
42
43public:
44 bool isLarge() const { return rb != nullptr; }
45
46 bool isSmall() const { return rb == nullptr; }
47
48 ~RoaringBitmapWithSmallSet()
49 {
50 if (isLarge())
51 roaring_bitmap_free(rb);
52 }
53
54 void add(T value)
55 {
56 if (isSmall())
57 {
58 if (small.find(value) == small.end())
59 {
60 if (!small.full())
61 small.insert(value);
62 else
63 {
64 toLarge();
65 roaring_bitmap_add(rb, value);
66 }
67 }
68 }
69 else
70 roaring_bitmap_add(rb, value);
71 }
72
73 UInt64 size() const
74 {
75 return isSmall()
76 ? small.size()
77 : roaring_bitmap_get_cardinality(rb);
78 }
79
80 void merge(const RoaringBitmapWithSmallSet & r1)
81 {
82 if (r1.isLarge())
83 {
84 if (isSmall())
85 toLarge();
86
87 roaring_bitmap_or_inplace(rb, r1.rb);
88 }
89 else
90 {
91 for (const auto & x : r1.small)
92 add(x.getValue());
93 }
94 }
95
96 void read(DB::ReadBuffer & in)
97 {
98 bool is_large;
99 readBinary(is_large, in);
100
101 if (is_large)
102 {
103 std::string s;
104 readStringBinary(s,in);
105 rb = roaring_bitmap_portable_deserialize(s.c_str());
106 for (const auto & x : small) // merge from small
107 roaring_bitmap_add(rb, x.getValue());
108 }
109 else
110 small.read(in);
111 }
112
113 void write(DB::WriteBuffer & out) const
114 {
115 writeBinary(isLarge(), out);
116
117 if (isLarge())
118 {
119 uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb);
120 std::string s(expectedsize,0);
121 roaring_bitmap_portable_serialize(rb, const_cast<char*>(s.data()));
122 writeStringBinary(s,out);
123 }
124 else
125 small.write(out);
126 }
127
128
129 roaring_bitmap_t * getRb() const { return rb; }
130
131 Small & getSmall() const { return small; }
132
133 /**
134 * Get a new roaring_bitmap_t from elements of small
135 */
136 roaring_bitmap_t * getNewRbFromSmall() const
137 {
138 roaring_bitmap_t * smallRb = roaring_bitmap_create();
139 for (const auto & x : small)
140 roaring_bitmap_add(smallRb, x.getValue());
141 return smallRb;
142 }
143
144 /**
145 * Computes the intersection between two bitmaps
146 */
147 void rb_and(const RoaringBitmapWithSmallSet & r1)
148 {
149 ValueBuffer buffer;
150 if (isSmall() && r1.isSmall())
151 {
152 // intersect
153 for (const auto & x : small)
154 if (r1.small.find(x.getValue()) != r1.small.end())
155 buffer.push_back(x.getValue());
156
157 // Clear out the original values
158 small.clear();
159
160 for (const auto & value : buffer)
161 small.insert(value);
162
163 buffer.clear();
164 }
165 else if (isSmall() && r1.isLarge())
166 {
167 for (const auto & x : small)
168 if (roaring_bitmap_contains(r1.rb, x.getValue()))
169 buffer.push_back(x.getValue());
170
171 // Clear out the original values
172 small.clear();
173
174 for (const auto & value : buffer)
175 small.insert(value);
176
177 buffer.clear();
178 }
179 else
180 {
181 roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
182 roaring_bitmap_and_inplace(rb, rb1);
183 if (r1.isSmall())
184 roaring_bitmap_free(rb1);
185 }
186 }
187
188 /**
189 * Computes the union between two bitmaps.
190 */
191 void rb_or(const RoaringBitmapWithSmallSet & r1) { merge(r1); }
192
193 /**
194 * Computes the symmetric difference (xor) between two bitmaps.
195 */
196 void rb_xor(const RoaringBitmapWithSmallSet & r1)
197 {
198 if (isSmall())
199 toLarge();
200 roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
201 roaring_bitmap_xor_inplace(rb, rb1);
202 if (r1.isSmall())
203 roaring_bitmap_free(rb1);
204 }
205
206 /**
207 * Computes the difference (andnot) between two bitmaps
208 */
209 void rb_andnot(const RoaringBitmapWithSmallSet & r1)
210 {
211 ValueBuffer buffer;
212 if (isSmall() && r1.isSmall())
213 {
214 // subtract
215 for (const auto & x : small)
216 if (r1.small.find(x.getValue()) == r1.small.end())
217 buffer.push_back(x.getValue());
218
219 // Clear out the original values
220 small.clear();
221
222 for (const auto & value : buffer)
223 small.insert(value);
224
225 buffer.clear();
226 }
227 else if (isSmall() && r1.isLarge())
228 {
229 for (const auto & x : small)
230 if (!roaring_bitmap_contains(r1.rb, x.getValue()))
231 buffer.push_back(x.getValue());
232
233 // Clear out the original values
234 small.clear();
235
236 for (const auto & value : buffer)
237 small.insert(value);
238
239 buffer.clear();
240 }
241 else
242 {
243 roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
244 roaring_bitmap_andnot_inplace(rb, rb1);
245 if (r1.isSmall())
246 roaring_bitmap_free(rb1);
247 }
248 }
249
250 /**
251 * Computes the cardinality of the intersection between two bitmaps.
252 */
253 UInt64 rb_and_cardinality(const RoaringBitmapWithSmallSet & r1) const
254 {
255 UInt64 retSize = 0;
256 if (isSmall() && r1.isSmall())
257 {
258 for (const auto & x : small)
259 if (r1.small.find(x.getValue()) != r1.small.end())
260 ++retSize;
261 }
262 else if (isSmall() && r1.isLarge())
263 {
264 for (const auto & x : small)
265 if (roaring_bitmap_contains(r1.rb, x.getValue()))
266 ++retSize;
267 }
268 else
269 {
270 roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
271 retSize = roaring_bitmap_and_cardinality(rb, rb1);
272 if (r1.isSmall())
273 roaring_bitmap_free(rb1);
274 }
275 return retSize;
276 }
277
278 /**
279 * Computes the cardinality of the union between two bitmaps.
280 */
281 UInt64 rb_or_cardinality(const RoaringBitmapWithSmallSet & r1) const
282 {
283 UInt64 c1 = size();
284 UInt64 c2 = r1.size();
285 UInt64 inter = rb_and_cardinality(r1);
286 return c1 + c2 - inter;
287 }
288
289 /**
290 * Computes the cardinality of the symmetric difference (andnot) between two bitmaps.
291 */
292 UInt64 rb_xor_cardinality(const RoaringBitmapWithSmallSet & r1) const
293 {
294 UInt64 c1 = size();
295 UInt64 c2 = r1.size();
296 UInt64 inter = rb_and_cardinality(r1);
297 return c1 + c2 - 2 * inter;
298 }
299
300 /**
301 * Computes the cardinality of the difference (andnot) between two bitmaps.
302 */
303 UInt64 rb_andnot_cardinality(const RoaringBitmapWithSmallSet & r1) const
304 {
305 UInt64 c1 = size();
306 UInt64 inter = rb_and_cardinality(r1);
307 return c1 - inter;
308 }
309
310 /**
311 * Return 1 if the two bitmaps contain the same elements.
312 */
313 UInt8 rb_equals(const RoaringBitmapWithSmallSet & r1)
314 {
315 if (isSmall())
316 toLarge();
317 roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb();
318 UInt8 is_true = roaring_bitmap_equals(rb, rb1);
319 if (r1.isSmall())
320 roaring_bitmap_free(rb1);
321 return is_true;
322 }
323
324 /**
325 * Check whether two bitmaps intersect.
326 * Intersection with an empty set is always 0 (consistent with hasAny).
327 */
328 UInt8 rb_intersect(const RoaringBitmapWithSmallSet & r1) const
329 {
330 if (isSmall())
331 {
332 if (r1.isSmall())
333 {
334 for (const auto & x : r1.small)
335 if (small.find(x.getValue()) != small.end())
336 return 1;
337 }
338 else
339 {
340 for (const auto & x : small)
341 if (roaring_bitmap_contains(r1.rb, x.getValue()))
342 return 1;
343 }
344 }
345 else if (r1.isSmall())
346 {
347 for (const auto & x : r1.small)
348 if (roaring_bitmap_contains(rb, x.getValue()))
349 return 1;
350 }
351 else if (roaring_bitmap_intersect(rb, r1.rb))
352 return 1;
353
354 return 0;
355 }
356
357 /**
358 * Check whether the argument is the subset of this set.
359 * Empty set is a subset of any other set (consistent with hasAll).
360 */
361 UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const
362 {
363 if (isSmall())
364 {
365 if (r1.isSmall())
366 {
367 for (const auto & x : r1.small)
368 if (small.find(x.getValue()) == small.end())
369 return 0;
370 }
371 else
372 {
373 UInt64 r1_size = r1.size();
374
375 if (r1_size > small.size())
376 return 0; // A bigger set can not be a subset of ours.
377
378 // This is a rare case with a small number of elements on
379 // both sides: r1 was promoted to large for some reason and
380 // it is still not larger than our small set.
381 // If r1 is our subset then our size must be equal to
382 // r1_size + number of not found elements, if this sum becomes
383 // greater then r1 is not a subset.
384 for (const auto & x : small)
385 if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size())
386 return 0;
387 }
388 }
389 else if (r1.isSmall())
390 {
391 for (const auto & x : r1.small)
392 if (!roaring_bitmap_contains(rb, x.getValue()))
393 return 0;
394 }
395 else if (!roaring_bitmap_is_subset(r1.rb, rb))
396 return 0;
397
398 return 1;
399 }
400
401 /**
402 * Check whether this bitmap contains the argument.
403 */
404 UInt8 rb_contains(const UInt32 x) const
405 {
406 return isSmall() ? small.find(x) != small.end() : roaring_bitmap_contains(rb, x);
407 }
408
409 /**
410 * Remove value
411 */
412 void rb_remove(UInt64 offsetid)
413 {
414 if (isSmall())
415 toLarge();
416 roaring_bitmap_remove(rb, offsetid);
417 }
418
419 /**
420 * compute (in place) the negation of the roaring bitmap within a specified
421 * interval: [range_start, range_end). The number of negated values is
422 * range_end - range_start.
423 * Areas outside the range are passed through unchanged.
424 */
425 void rb_flip(UInt64 offsetstart, UInt64 offsetend)
426 {
427 if (isSmall())
428 toLarge();
429 roaring_bitmap_flip_inplace(rb, offsetstart, offsetend);
430 }
431
432 /**
433 * returns the number of integers that are smaller or equal to offsetid.
434 */
435 UInt64 rb_rank(UInt64 offsetid)
436 {
437 if (isSmall())
438 toLarge();
439 return roaring_bitmap_rank(rb, offsetid);
440 }
441
442 /**
443 * Convert elements to integer array, return number of elements
444 */
445 template <typename Element>
446 UInt64 rb_to_array(PaddedPODArray<Element> & res_data) const
447 {
448 UInt64 count = 0;
449 if (isSmall())
450 {
451 for (const auto & x : small)
452 {
453 res_data.emplace_back(x.getValue());
454 count++;
455 }
456 }
457 else
458 {
459 roaring_uint32_iterator_t iterator;
460 roaring_init_iterator(rb, &iterator);
461 while (iterator.has_value)
462 {
463 res_data.emplace_back(iterator.current_value);
464 roaring_advance_uint32_iterator(&iterator);
465 count++;
466 }
467 }
468 return count;
469 }
470
471 /**
472 * Return new set with specified range (not include the range_end)
473 */
474 UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet & r1) const
475 {
476 UInt64 count = 0;
477 if (range_start >= range_end)
478 return count;
479 if (isSmall())
480 {
481 for (const auto & x : small)
482 {
483 T val = x.getValue();
484 if (UInt32(val) >= range_start && UInt32(val) < range_end)
485 {
486 r1.add(val);
487 ++count;
488 }
489 }
490 }
491 else
492 {
493 roaring_uint32_iterator_t iterator;
494 roaring_init_iterator(rb, &iterator);
495 roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
496 while (iterator.has_value && UInt32(iterator.current_value) < range_end)
497 {
498 r1.add(iterator.current_value);
499 roaring_advance_uint32_iterator(&iterator);
500 ++count;
501 }
502 }
503 return count;
504 }
505
506 /**
507 * Return new set of the smallest `limit` values in set which is no less than `range_start`.
508 */
509 UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet & r1) const
510 {
511 UInt64 count = 0;
512 if (isSmall())
513 {
514 std::vector<T> ans;
515 for (const auto & x : small)
516 {
517 T val = x.getValue();
518 if (UInt32(val) >= range_start)
519 {
520 ans.push_back(val);
521 }
522 }
523 sort(ans.begin(), ans.end());
524 if (limit > ans.size())
525 limit = ans.size();
526 for (size_t i = 0; i < limit; ++i)
527 r1.add(ans[i]);
528 count = UInt64(limit);
529 }
530 else
531 {
532 roaring_uint32_iterator_t iterator;
533 roaring_init_iterator(rb, &iterator);
534 roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
535 while (UInt32(count) < limit && iterator.has_value)
536 {
537 r1.add(iterator.current_value);
538 roaring_advance_uint32_iterator(&iterator);
539 ++count;
540 }
541 }
542 return count;
543 }
544
545 UInt64 rb_min() const
546 {
547 UInt64 min_val = UINT32_MAX;
548 if (isSmall())
549 {
550 for (const auto & x : small)
551 {
552 T val = x.getValue();
553 if (UInt64(val) < min_val)
554 {
555 min_val = UInt64(val);
556 }
557 }
558 }
559 else
560 {
561 min_val = UInt64(roaring_bitmap_minimum(rb));
562 }
563 return min_val;
564 }
565
566 UInt64 rb_max() const
567 {
568 UInt64 max_val = 0;
569 if (isSmall())
570 {
571 for (const auto & x : small)
572 {
573 T val = x.getValue();
574 if (UInt64(val) > max_val)
575 {
576 max_val = UInt64(val);
577 }
578 }
579 }
580 else
581 {
582 max_val = UInt64(roaring_bitmap_maximum(rb));
583 }
584 return max_val;
585 }
586
587 /**
588 * Replace value
589 */
590 void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
591 {
592 if (isSmall())
593 toLarge();
594 for (size_t i = 0; i < num; ++i)
595 {
596 if (from_vals[i] == to_vals[i])
597 continue;
598 bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]);
599 if (changed)
600 roaring_bitmap_add(rb, to_vals[i]);
601 }
602 }
603
604private:
605 /// To read and write the DB Buffer directly, migrate code from CRoaring
606 void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)
607 {
608 void * container = nullptr; // hold value of last container touched
609 uint8_t typecode = 0; // typecode of last container touched
610 uint32_t prev = 0; // previous valued inserted
611 size_t i = 0; // index of value
612 int containerindex = 0;
613 if (n_args == 0)
614 return;
615 uint32_t val;
616 readBinary(val, dbBuf);
617 container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
618 prev = val;
619 ++i;
620 for (; i < n_args; ++i)
621 {
622 readBinary(val, dbBuf);
623 if (((prev ^ val) >> 16) == 0)
624 { // no need to seek the container, it is at hand
625 // because we already have the container at hand, we can do the
626 // insertion
627 // automatically, bypassing the roaring_bitmap_add call
628 uint8_t newtypecode = typecode;
629 void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode);
630 // rare instance when we need to
631 if (container2 != container)
632 {
633 // change the container type
634 container_free(container, typecode);
635 ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode);
636 typecode = newtypecode;
637 container = container2;
638 }
639 }
640 else
641 {
642 container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
643 }
644 prev = val;
645 }
646 }
647
648 void db_ra_to_uint32_array(DB::WriteBuffer & dbBuf, roaring_array_t * ra) const
649 {
650 size_t ctr = 0;
651 for (Int32 i = 0; i < ra->size; ++i)
652 {
653 Int32 num_added = db_container_to_uint32_array(dbBuf, ra->containers[i], ra->typecodes[i], (static_cast<UInt32>(ra->keys[i])) << 16);
654 ctr += num_added;
655 }
656 }
657
658 UInt32 db_container_to_uint32_array(DB::WriteBuffer & dbBuf, const void * container, UInt8 typecode, UInt32 base) const
659 {
660 container = container_unwrap_shared(container, &typecode);
661 switch (typecode)
662 {
663 case BITSET_CONTAINER_TYPE_CODE:
664 return db_bitset_container_to_uint32_array(dbBuf, static_cast<const bitset_container_t *>(container), base);
665 case ARRAY_CONTAINER_TYPE_CODE:
666 return db_array_container_to_uint32_array(dbBuf, static_cast<const array_container_t *>(container), base);
667 case RUN_CONTAINER_TYPE_CODE:
668 return db_run_container_to_uint32_array(dbBuf, static_cast<const run_container_t *>(container), base);
669 }
670 return 0;
671 }
672
673 UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & dbBuf, const bitset_container_t * cont, UInt32 base) const
674 {
675 return static_cast<UInt32>(db_bitset_extract_setbits(dbBuf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base));
676 }
677
678 size_t db_bitset_extract_setbits(DB::WriteBuffer & dbBuf, UInt64 * bitset, size_t length, UInt32 base) const
679 {
680 UInt32 outpos = 0;
681 for (size_t i = 0; i < length; ++i)
682 {
683 UInt64 w = bitset[i];
684 while (w != 0)
685 {
686 UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
687 UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT
688 UInt32 val = r + base;
689 writePODBinary(val, dbBuf);
690 outpos++;
691 w ^= t;
692 }
693 base += 64;
694 }
695 return outpos;
696 }
697
698 int db_array_container_to_uint32_array(DB::WriteBuffer & dbBuf, const array_container_t * cont, UInt32 base) const
699 {
700 UInt32 outpos = 0;
701 for (Int32 i = 0; i < cont->cardinality; ++i)
702 {
703 const UInt32 val = base + cont->array[i];
704 writePODBinary(val, dbBuf);
705 outpos++;
706 }
707 return outpos;
708 }
709
710 int db_run_container_to_uint32_array(DB::WriteBuffer & dbBuf, const run_container_t * cont, UInt32 base) const
711 {
712 UInt32 outpos = 0;
713 for (Int32 i = 0; i < cont->n_runs; ++i)
714 {
715 UInt32 run_start = base + cont->runs[i].value;
716 UInt16 le = cont->runs[i].length;
717 for (Int32 j = 0; j <= le; ++j)
718 {
719 UInt32 val = run_start + j;
720 writePODBinary(val, dbBuf);
721 outpos++;
722 }
723 }
724 return outpos;
725 }
726};
727
728template <typename T>
729struct AggregateFunctionGroupBitmapData
730{
731 bool doneFirst = false;
732 RoaringBitmapWithSmallSet<T, 32> rbs;
733 static const char * name() { return "groupBitmap"; }
734};
735
736
737}
738