1 | /* |
2 | A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many |
3 | 32-bit Roaring Bitmaps. |
4 | */ |
5 | #ifndef INCLUDE_ROARING_64_MAP_HH_ |
6 | #define INCLUDE_ROARING_64_MAP_HH_ |
7 | |
8 | #include <algorithm> |
9 | #include <cstdarg> |
10 | #include <cstdio> |
11 | #include <limits> |
12 | #include <map> |
13 | #include <new> |
14 | #include <numeric> |
15 | #include <stdexcept> |
16 | #include <string> |
17 | #include <utility> |
18 | |
19 | #include "roaring.hh" |
20 | |
21 | class Roaring64MapSetBitForwardIterator; |
22 | class Roaring64MapSetBitBiDirectionalIterator; |
23 | |
24 | class Roaring64Map { |
25 | public: |
26 | /** |
27 | * Create an empty bitmap |
28 | */ |
29 | Roaring64Map() = default; |
30 | |
31 | /** |
32 | * Construct a bitmap from a list of 32-bit integer values. |
33 | */ |
34 | Roaring64Map(size_t n, const uint32_t *data) { addMany(n, data); } |
35 | |
36 | /** |
37 | * Construct a bitmap from a list of 64-bit integer values. |
38 | */ |
39 | Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); } |
40 | |
41 | /** |
42 | * Construct a 64-bit map from a 32-bit one |
43 | */ |
44 | Roaring64Map(const Roaring &r) { emplaceOrInsert(0, r); } |
45 | |
46 | /** |
47 | * Construct a roaring object from the C struct. |
48 | * |
49 | * Passing a NULL point is unsafe. |
50 | */ |
51 | Roaring64Map(roaring_bitmap_t *s) { emplaceOrInsert(0, s); } |
52 | |
53 | /** |
54 | * Assignment operator. |
55 | */ |
56 | Roaring64Map &operator=(const Roaring64Map &r) { |
57 | roarings = r.roarings; |
58 | return *this; |
59 | } |
60 | |
61 | /** |
62 | * Construct a bitmap from a list of integer values. |
63 | */ |
64 | static Roaring64Map bitmapOf(size_t n...) { |
65 | Roaring64Map ans; |
66 | va_list vl; |
67 | va_start(vl, n); |
68 | for (size_t i = 0; i < n; i++) { |
69 | ans.add(va_arg(vl, uint64_t)); |
70 | } |
71 | va_end(vl); |
72 | return ans; |
73 | } |
74 | |
75 | /** |
76 | * Add value x |
77 | * |
78 | */ |
79 | void add(uint32_t x) { |
80 | roarings[0].add(x); |
81 | roarings[0].setCopyOnWrite(copyOnWrite); |
82 | } |
83 | void add(uint64_t x) { |
84 | roarings[highBytes(x)].add(lowBytes(x)); |
85 | roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); |
86 | } |
87 | |
88 | /** |
89 | * Add value x |
90 | * Returns true if a new value was added, false if the value was already existing. |
91 | */ |
92 | bool addChecked(uint32_t x) { |
93 | bool result = roarings[0].addChecked(x); |
94 | roarings[0].setCopyOnWrite(copyOnWrite); |
95 | return result; |
96 | } |
97 | bool addChecked(uint64_t x) { |
98 | bool result = roarings[highBytes(x)].addChecked(lowBytes(x)); |
99 | roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); |
100 | return result; |
101 | } |
102 | |
103 | /** |
104 | * Add value n_args from pointer vals |
105 | * |
106 | */ |
107 | void addMany(size_t n_args, const uint32_t *vals) { |
108 | for (size_t lcv = 0; lcv < n_args; lcv++) { |
109 | roarings[0].add(vals[lcv]); |
110 | roarings[0].setCopyOnWrite(copyOnWrite); |
111 | } |
112 | } |
113 | void addMany(size_t n_args, const uint64_t *vals) { |
114 | for (size_t lcv = 0; lcv < n_args; lcv++) { |
115 | roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); |
116 | roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); |
117 | } |
118 | } |
119 | |
120 | /** |
121 | * Remove value x |
122 | * |
123 | */ |
124 | void remove(uint32_t x) { roarings[0].remove(x); } |
125 | void remove(uint64_t x) { |
126 | auto roaring_iter = roarings.find(highBytes(x)); |
127 | if (roaring_iter != roarings.cend()) |
128 | roaring_iter->second.remove(lowBytes(x)); |
129 | } |
130 | |
131 | /** |
132 | * Remove value x |
133 | * Returns true if a new value was removed, false if the value was not existing. |
134 | */ |
135 | bool removeChecked(uint32_t x) { |
136 | return roarings[0].removeChecked(x); |
137 | } |
138 | bool removeChecked(uint64_t x) { |
139 | auto roaring_iter = roarings.find(highBytes(x)); |
140 | if (roaring_iter != roarings.cend()) |
141 | return roaring_iter->second.removeChecked(lowBytes(x)); |
142 | return false; |
143 | } |
144 | |
145 | /** |
146 | * Clear the bitmap |
147 | */ |
148 | void clear() { |
149 | roarings.clear(); |
150 | } |
151 | |
152 | /** |
153 | * Return the largest value (if not empty) |
154 | * |
155 | */ |
156 | uint64_t maximum() const { |
157 | for (auto roaring_iter = roarings.crbegin(); |
158 | roaring_iter != roarings.crend(); ++roaring_iter) { |
159 | if (!roaring_iter->second.isEmpty()) { |
160 | return uniteBytes(roaring_iter->first, |
161 | roaring_iter->second.maximum()); |
162 | } |
163 | } |
164 | // we put std::numeric_limits<>::max/min in parenthesis |
165 | // to avoid a clash with the Windows.h header under Windows |
166 | return (std::numeric_limits<uint64_t>::min)(); |
167 | } |
168 | |
169 | /** |
170 | * Return the smallest value (if not empty) |
171 | * |
172 | */ |
173 | uint64_t minimum() const { |
174 | for (auto roaring_iter = roarings.cbegin(); |
175 | roaring_iter != roarings.cend(); ++roaring_iter) { |
176 | if (!roaring_iter->second.isEmpty()) { |
177 | return uniteBytes(roaring_iter->first, |
178 | roaring_iter->second.minimum()); |
179 | } |
180 | } |
181 | // we put std::numeric_limits<>::max/min in parenthesis |
182 | // to avoid a clash with the Windows.h header under Windows |
183 | return (std::numeric_limits<uint64_t>::max)(); |
184 | } |
185 | |
186 | /** |
187 | * Check if value x is present |
188 | */ |
189 | bool contains(uint32_t x) const { |
190 | return roarings.count(0) == 0 ? false : roarings.at(0).contains(x); |
191 | } |
192 | bool contains(uint64_t x) const { |
193 | return roarings.count(highBytes(x)) == 0 |
194 | ? false |
195 | : roarings.at(highBytes(x)).contains(lowBytes(x)); |
196 | } |
197 | |
198 | /** |
199 | * Compute the intersection between the current bitmap and the provided |
200 | * bitmap, |
201 | * writing the result in the current bitmap. The provided bitmap is not |
202 | * modified. |
203 | */ |
204 | Roaring64Map &operator&=(const Roaring64Map &r) { |
205 | for (auto &map_entry : roarings) { |
206 | if (r.roarings.count(map_entry.first) == 1) |
207 | map_entry.second &= r.roarings.at(map_entry.first); |
208 | else |
209 | map_entry.second = Roaring(); |
210 | } |
211 | return *this; |
212 | } |
213 | |
214 | /** |
215 | * Compute the difference between the current bitmap and the provided |
216 | * bitmap, |
217 | * writing the result in the current bitmap. The provided bitmap is not |
218 | * modified. |
219 | */ |
220 | Roaring64Map &operator-=(const Roaring64Map &r) { |
221 | for (auto &map_entry : roarings) { |
222 | if (r.roarings.count(map_entry.first) == 1) |
223 | map_entry.second -= r.roarings.at(map_entry.first); |
224 | } |
225 | return *this; |
226 | } |
227 | |
228 | /** |
229 | * Compute the union between the current bitmap and the provided bitmap, |
230 | * writing the result in the current bitmap. The provided bitmap is not |
231 | * modified. |
232 | * |
233 | * See also the fastunion function to aggregate many bitmaps more quickly. |
234 | */ |
235 | Roaring64Map &operator|=(const Roaring64Map &r) { |
236 | for (const auto &map_entry : r.roarings) { |
237 | if (roarings.count(map_entry.first) == 0) { |
238 | roarings[map_entry.first] = map_entry.second; |
239 | roarings[map_entry.first].setCopyOnWrite(copyOnWrite); |
240 | } else |
241 | roarings[map_entry.first] |= map_entry.second; |
242 | } |
243 | return *this; |
244 | } |
245 | |
246 | /** |
247 | * Compute the symmetric union between the current bitmap and the provided |
248 | * bitmap, |
249 | * writing the result in the current bitmap. The provided bitmap is not |
250 | * modified. |
251 | */ |
252 | Roaring64Map &operator^=(const Roaring64Map &r) { |
253 | for (const auto &map_entry : r.roarings) { |
254 | if (roarings.count(map_entry.first) == 0) { |
255 | roarings[map_entry.first] = map_entry.second; |
256 | roarings[map_entry.first].setCopyOnWrite(copyOnWrite); |
257 | } else |
258 | roarings[map_entry.first] ^= map_entry.second; |
259 | } |
260 | return *this; |
261 | } |
262 | |
263 | /** |
264 | * Exchange the content of this bitmap with another. |
265 | */ |
266 | void swap(Roaring64Map &r) { roarings.swap(r.roarings); } |
267 | |
268 | /** |
269 | * Get the cardinality of the bitmap (number of elements). |
270 | * Throws std::length_error in the special case where the bitmap is full |
271 | * (cardinality() == 2^64). Check isFull() before calling to avoid |
272 | * exception. |
273 | */ |
274 | uint64_t cardinality() const { |
275 | if (isFull()) { |
276 | throw std::length_error( |
277 | "bitmap is full, cardinality is 2^64, " |
278 | "unable to represent in a 64-bit integer" ); |
279 | } |
280 | return std::accumulate( |
281 | roarings.cbegin(), roarings.cend(), (uint64_t)0, |
282 | [](uint64_t previous, |
283 | const std::pair<uint32_t, Roaring> &map_entry) { |
284 | return previous + map_entry.second.cardinality(); |
285 | }); |
286 | } |
287 | |
288 | /** |
289 | * Returns true if the bitmap is empty (cardinality is zero). |
290 | */ |
291 | bool isEmpty() const { |
292 | return std::all_of(roarings.cbegin(), roarings.cend(), |
293 | [](const std::pair<uint32_t, Roaring> &map_entry) { |
294 | return map_entry.second.isEmpty(); |
295 | }); |
296 | } |
297 | |
298 | /** |
299 | * Returns true if the bitmap is full (cardinality is max uint64_t + 1). |
300 | */ |
301 | bool isFull() const { |
302 | // only bother to check if map is fully saturated |
303 | // |
304 | // we put std::numeric_limits<>::max/min in parenthesis |
305 | // to avoid a clash with the Windows.h header under Windows |
306 | return roarings.size() == |
307 | ((size_t)(std::numeric_limits<uint32_t>::max)()) + 1 |
308 | ? std::all_of( |
309 | roarings.cbegin(), roarings.cend(), |
310 | [](const std::pair<uint32_t, Roaring> &roaring_map_entry) { |
311 | // roarings within map are saturated if cardinality |
312 | // is uint32_t max + 1 |
313 | return roaring_map_entry.second.cardinality() == |
314 | ((uint64_t) |
315 | (std::numeric_limits<uint32_t>::max)()) + |
316 | 1; |
317 | }) |
318 | : false; |
319 | } |
320 | |
321 | /** |
322 | * Returns true if the bitmap is subset of the other. |
323 | */ |
324 | bool isSubset(const Roaring64Map &r) const { |
325 | for (const auto &map_entry : roarings) { |
326 | auto roaring_iter = r.roarings.find(map_entry.first); |
327 | if (roaring_iter == roarings.cend()) |
328 | return false; |
329 | else if (!map_entry.second.isSubset(roaring_iter->second)) |
330 | return false; |
331 | } |
332 | return true; |
333 | } |
334 | |
335 | /** |
336 | * Returns true if the bitmap is strict subset of the other. |
337 | * Throws std::length_error in the special case where the bitmap is full |
338 | * (cardinality() == 2^64). Check isFull() before calling to avoid exception. |
339 | */ |
340 | bool isStrictSubset(const Roaring64Map &r) const { |
341 | return isSubset(r) && cardinality() != r.cardinality(); |
342 | } |
343 | |
344 | /** |
345 | * Convert the bitmap to an array. Write the output to "ans", |
346 | * caller is responsible to ensure that there is enough memory |
347 | * allocated |
348 | * (e.g., ans = new uint32[mybitmap.cardinality()];) |
349 | */ |
350 | void toUint64Array(uint64_t *ans) const { |
351 | // Annoyingly, VS 2017 marks std::accumulate() as [[nodiscard]] |
352 | (void)std::accumulate(roarings.cbegin(), roarings.cend(), ans, |
353 | [](uint64_t *previous, |
354 | const std::pair<uint32_t, Roaring> &map_entry) { |
355 | for (uint32_t low_bits : map_entry.second) |
356 | *previous++ = |
357 | uniteBytes(map_entry.first, low_bits); |
358 | return previous; |
359 | }); |
360 | } |
361 | |
362 | /** |
363 | * Return true if the two bitmaps contain the same elements. |
364 | */ |
365 | bool operator==(const Roaring64Map &r) const { |
366 | // we cannot use operator == on the map because either side may contain |
367 | // empty Roaring Bitmaps |
368 | auto lhs_iter = roarings.cbegin(); |
369 | auto rhs_iter = r.roarings.cbegin(); |
370 | do { |
371 | // if the left map has reached its end, ensure that the right map |
372 | // contains only empty Bitmaps |
373 | if (lhs_iter == roarings.cend()) { |
374 | while (rhs_iter != r.roarings.cend()) { |
375 | if (rhs_iter->second.isEmpty()) { |
376 | ++rhs_iter; |
377 | continue; |
378 | } |
379 | return false; |
380 | } |
381 | return true; |
382 | } |
383 | // if the left map has an empty bitmap, skip it |
384 | if (lhs_iter->second.isEmpty()) { |
385 | ++lhs_iter; |
386 | continue; |
387 | } |
388 | |
389 | do { |
390 | // if the right map has reached its end, ensure that the right |
391 | // map contains only empty Bitmaps |
392 | if (rhs_iter == r.roarings.cend()) { |
393 | while (lhs_iter != roarings.cend()) { |
394 | if (lhs_iter->second.isEmpty()) { |
395 | ++lhs_iter; |
396 | continue; |
397 | } |
398 | return false; |
399 | } |
400 | return true; |
401 | } |
402 | // if the right map has an empty bitmap, skip it |
403 | if (rhs_iter->second.isEmpty()) { |
404 | ++rhs_iter; |
405 | continue; |
406 | } |
407 | } while (false); |
408 | // if neither map has reached its end ensure elements are equal and |
409 | // move to the next element in both |
410 | } while (lhs_iter++->second == rhs_iter++->second); |
411 | return false; |
412 | } |
413 | |
414 | /** |
415 | * compute the negation of the roaring bitmap within a specified interval. |
416 | * areas outside the range are passed through unchanged. |
417 | */ |
418 | void flip(uint64_t range_start, uint64_t range_end) { |
419 | uint32_t start_high = highBytes(range_start); |
420 | uint32_t start_low = lowBytes(range_start); |
421 | uint32_t end_high = highBytes(range_end); |
422 | uint32_t end_low = lowBytes(range_end); |
423 | |
424 | if (start_high == end_high) { |
425 | roarings[start_high].flip(start_low, end_low); |
426 | return; |
427 | } |
428 | // we put std::numeric_limits<>::max/min in parenthesis |
429 | // to avoid a clash with the Windows.h header under Windows |
430 | roarings[start_high].flip(start_low, |
431 | (std::numeric_limits<uint32_t>::max)()); |
432 | roarings[start_high++].setCopyOnWrite(copyOnWrite); |
433 | |
434 | for (; start_high <= highBytes(range_end) - 1; ++start_high) { |
435 | roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(), |
436 | (std::numeric_limits<uint32_t>::max)()); |
437 | roarings[start_high].setCopyOnWrite(copyOnWrite); |
438 | } |
439 | |
440 | roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(), |
441 | end_low); |
442 | roarings[start_high].setCopyOnWrite(copyOnWrite); |
443 | } |
444 | |
445 | /** |
446 | * Remove run-length encoding even when it is more space efficient |
447 | * return whether a change was applied |
448 | */ |
449 | bool removeRunCompression() { |
450 | return std::accumulate( |
451 | roarings.begin(), roarings.end(), false, |
452 | [](bool previous, std::pair<const uint32_t, Roaring> &map_entry) { |
453 | return map_entry.second.removeRunCompression() && previous; |
454 | }); |
455 | } |
456 | |
457 | /** convert array and bitmap containers to run containers when it is more |
458 | * efficient; |
459 | * also convert from run containers when more space efficient. Returns |
460 | * true if the result has at least one run container. |
461 | * Additional savings might be possible by calling shrinkToFit(). |
462 | */ |
463 | bool runOptimize() { |
464 | return std::accumulate( |
465 | roarings.begin(), roarings.end(), false, |
466 | [](bool previous, std::pair<const uint32_t, Roaring> &map_entry) { |
467 | return map_entry.second.runOptimize() && previous; |
468 | }); |
469 | } |
470 | |
471 | /** |
472 | * If needed, reallocate memory to shrink the memory usage. Returns |
473 | * the number of bytes saved. |
474 | */ |
475 | size_t shrinkToFit() { |
476 | size_t savedBytes = 0; |
477 | auto iter = roarings.begin(); |
478 | while (iter != roarings.cend()) { |
479 | if (iter->second.isEmpty()) { |
480 | // empty Roarings are 84 bytes |
481 | savedBytes += 88; |
482 | roarings.erase(iter++); |
483 | } else { |
484 | savedBytes += iter->second.shrinkToFit(); |
485 | iter++; |
486 | } |
487 | } |
488 | return savedBytes; |
489 | } |
490 | |
491 | /** |
492 | * Iterate over the bitmap elements. The function iterator is called once |
493 | * for all the values with ptr (can be NULL) as the second parameter of each |
494 | * call. |
495 | * |
496 | * roaring_iterator is simply a pointer to a function that returns bool |
497 | * (true means that the iteration should continue while false means that it |
498 | * should stop), and takes (uint32_t,void*) as inputs. |
499 | */ |
500 | void iterate(roaring_iterator64 iterator, void *ptr) const { |
501 | std::for_each(roarings.begin(), roarings.cend(), |
502 | [=](const std::pair<uint32_t, Roaring> &map_entry) { |
503 | roaring_iterate64(&map_entry.second.roaring, iterator, |
504 | uint64_t(map_entry.first) << 32, |
505 | ptr); |
506 | }); |
507 | } |
508 | |
509 | /** |
510 | * If the size of the roaring bitmap is strictly greater than rank, then |
511 | this |
512 | function returns true and set element to the element of given rank. |
513 | Otherwise, it returns false. |
514 | */ |
515 | bool select(uint64_t rnk, uint64_t *element) const { |
516 | for (const auto &map_entry : roarings) { |
517 | uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality(); |
518 | if (rnk < sub_cardinality) { |
519 | *element = ((uint64_t)map_entry.first) << 32; |
520 | // assuming little endian |
521 | return map_entry.second.select((uint32_t)rnk, |
522 | ((uint32_t *)element)); |
523 | } |
524 | rnk -= sub_cardinality; |
525 | } |
526 | return false; |
527 | } |
528 | |
529 | /** |
530 | * Returns the number of integers that are smaller or equal to x. |
531 | */ |
532 | uint64_t rank(uint64_t x) const { |
533 | uint64_t result = 0; |
534 | auto roaring_destination = roarings.find(highBytes(x)); |
535 | if (roaring_destination != roarings.cend()) { |
536 | for (auto roaring_iter = roarings.cbegin(); |
537 | roaring_iter != roaring_destination; ++roaring_iter) { |
538 | result += roaring_iter->second.cardinality(); |
539 | } |
540 | result += roaring_destination->second.rank(lowBytes(x)); |
541 | return result; |
542 | } |
543 | roaring_destination = roarings.lower_bound(highBytes(x)); |
544 | for (auto roaring_iter = roarings.cbegin(); |
545 | roaring_iter != roaring_destination; ++roaring_iter) { |
546 | result += roaring_iter->second.cardinality(); |
547 | } |
548 | return result; |
549 | } |
550 | |
551 | /** |
552 | * write a bitmap to a char buffer. This is meant to be compatible with |
553 | * the |
554 | * Java and Go versions. Returns how many bytes were written which should be |
555 | * getSizeInBytes(). |
556 | * |
557 | * Setting the portable flag to false enable a custom format that |
558 | * can save space compared to the portable format (e.g., for very |
559 | * sparse bitmaps). |
560 | */ |
561 | size_t write(char *buf, bool portable = true) const { |
562 | const char *orig = buf; |
563 | // push map size |
564 | *((uint64_t *)buf) = roarings.size(); |
565 | buf += sizeof(uint64_t); |
566 | std::for_each( |
567 | roarings.cbegin(), roarings.cend(), |
568 | [&buf, portable](const std::pair<uint32_t, Roaring> &map_entry) { |
569 | // push map key |
570 | memcpy(buf, &map_entry.first, |
571 | sizeof(uint32_t)); // this is undefined: |
572 | // *((uint32_t*)buf) = |
573 | // map_entry.first; |
574 | buf += sizeof(uint32_t); |
575 | // push map value Roaring |
576 | buf += map_entry.second.write(buf, portable); |
577 | }); |
578 | return buf - orig; |
579 | } |
580 | |
581 | /** |
582 | * read a bitmap from a serialized version. This is meant to be compatible |
583 | * with |
584 | * the |
585 | * Java and Go versions. |
586 | * |
587 | * Setting the portable flag to false enable a custom format that |
588 | * can save space compared to the portable format (e.g., for very |
589 | * sparse bitmaps). |
590 | * |
591 | * This function is unsafe in the sense that if you provide bad data, |
592 | * many bytes could be read, possibly causing a buffer overflow. See also readSafe. |
593 | */ |
594 | static Roaring64Map read(const char *buf, bool portable = true) { |
595 | Roaring64Map result; |
596 | // get map size |
597 | uint64_t map_size = *((uint64_t *)buf); |
598 | buf += sizeof(uint64_t); |
599 | for (uint64_t lcv = 0; lcv < map_size; lcv++) { |
600 | // get map key |
601 | uint32_t key; |
602 | memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t |
603 | // key = *((uint32_t*)buf); |
604 | buf += sizeof(uint32_t); |
605 | // read map value Roaring |
606 | Roaring read = Roaring::read(buf, portable); |
607 | result.emplaceOrInsert(key, read); |
608 | // forward buffer past the last Roaring Bitmap |
609 | buf += read.getSizeInBytes(portable); |
610 | } |
611 | return result; |
612 | } |
613 | |
614 | /** |
615 | * read a bitmap from a serialized version, reading no more than maxbytes bytes. |
616 | * This is meant to be compatible with the Java and Go versions. |
617 | * |
618 | * Setting the portable flag to false enable a custom format that |
619 | * can save space compared to the portable format (e.g., for very |
620 | * sparse bitmaps). |
621 | */ |
622 | static Roaring64Map readSafe(const char *buf, size_t maxbytes) { |
623 | Roaring64Map result; |
624 | // get map size |
625 | uint64_t map_size = *((uint64_t *)buf); |
626 | buf += sizeof(uint64_t); |
627 | for (uint64_t lcv = 0; lcv < map_size; lcv++) { |
628 | // get map key |
629 | if(maxbytes < sizeof(uint32_t)) { |
630 | throw std::runtime_error("ran out of bytes" ); |
631 | } |
632 | uint32_t key; |
633 | memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t |
634 | // key = *((uint32_t*)buf); |
635 | buf += sizeof(uint32_t); |
636 | maxbytes -= sizeof(uint32_t); |
637 | // read map value Roaring |
638 | Roaring read = Roaring::readSafe(buf, maxbytes); |
639 | result.emplaceOrInsert(key, read); |
640 | // forward buffer past the last Roaring Bitmap |
641 | size_t tz = read.getSizeInBytes(true); |
642 | buf += tz; |
643 | maxbytes -= tz; |
644 | } |
645 | return result; |
646 | } |
647 | |
648 | /** |
649 | * How many bytes are required to serialize this bitmap (meant to be |
650 | * compatible |
651 | * with Java and Go versions) |
652 | * |
653 | * Setting the portable flag to false enable a custom format that |
654 | * can save space compared to the portable format (e.g., for very |
655 | * sparse bitmaps). |
656 | */ |
657 | size_t getSizeInBytes(bool portable = true) const { |
658 | // start with, respectively, map size and size of keys for each map |
659 | // entry |
660 | return std::accumulate( |
661 | roarings.cbegin(), roarings.cend(), |
662 | sizeof(uint64_t) + roarings.size() * sizeof(uint32_t), |
663 | [=](size_t previous, |
664 | const std::pair<uint32_t, Roaring> &map_entry) { |
665 | // add in bytes used by each Roaring |
666 | return previous + map_entry.second.getSizeInBytes(portable); |
667 | }); |
668 | } |
669 | |
670 | /** |
671 | * Computes the intersection between two bitmaps and returns new bitmap. |
672 | * The current bitmap and the provided bitmap are unchanged. |
673 | */ |
674 | Roaring64Map operator&(const Roaring64Map &o) const { |
675 | return Roaring64Map(*this) &= o; |
676 | } |
677 | |
678 | /** |
679 | * Computes the difference between two bitmaps and returns new bitmap. |
680 | * The current bitmap and the provided bitmap are unchanged. |
681 | */ |
682 | Roaring64Map operator-(const Roaring64Map &o) const { |
683 | return Roaring64Map(*this) -= o; |
684 | } |
685 | |
686 | /** |
687 | * Computes the union between two bitmaps and returns new bitmap. |
688 | * The current bitmap and the provided bitmap are unchanged. |
689 | */ |
690 | Roaring64Map operator|(const Roaring64Map &o) const { |
691 | return Roaring64Map(*this) |= o; |
692 | } |
693 | |
694 | /** |
695 | * Computes the symmetric union between two bitmaps and returns new bitmap. |
696 | * The current bitmap and the provided bitmap are unchanged. |
697 | */ |
698 | Roaring64Map operator^(const Roaring64Map &o) const { |
699 | return Roaring64Map(*this) ^= o; |
700 | } |
701 | |
702 | /** |
703 | * Whether or not we apply copy and write. |
704 | */ |
705 | void setCopyOnWrite(bool val) { |
706 | if (copyOnWrite == val) return; |
707 | copyOnWrite = val; |
708 | std::for_each(roarings.begin(), roarings.end(), |
709 | [=](std::pair<const uint32_t, Roaring> &map_entry) { |
710 | map_entry.second.setCopyOnWrite(val); |
711 | }); |
712 | } |
713 | |
714 | /** |
715 | * Print the content of the bitmap |
716 | */ |
717 | void printf() const { |
718 | if (!isEmpty()) { |
719 | auto map_iter = roarings.cbegin(); |
720 | while (map_iter->second.isEmpty()) ++map_iter; |
721 | struct iter_data { |
722 | uint32_t high_bits; |
723 | char first_char = '{'; |
724 | } outer_iter_data; |
725 | outer_iter_data.high_bits = roarings.begin()->first; |
726 | map_iter->second.iterate( |
727 | [](uint32_t low_bits, void *inner_iter_data) -> bool { |
728 | std::printf("%c%llu" , |
729 | ((iter_data *)inner_iter_data)->first_char, |
730 | (long long unsigned)uniteBytes( |
731 | ((iter_data *)inner_iter_data)->high_bits, |
732 | low_bits)); |
733 | ((iter_data *)inner_iter_data)->first_char = ','; |
734 | return true; |
735 | }, |
736 | (void *)&outer_iter_data); |
737 | std::for_each( |
738 | ++map_iter, roarings.cend(), |
739 | [](const std::pair<uint32_t, Roaring> &map_entry) { |
740 | map_entry.second.iterate( |
741 | [](uint32_t low_bits, void *high_bits) -> bool { |
742 | std::printf(",%llu" , |
743 | (long long unsigned)uniteBytes( |
744 | *(uint32_t *)high_bits, low_bits)); |
745 | return true; |
746 | }, |
747 | (void *)&map_entry.first); |
748 | }); |
749 | } else |
750 | std::printf("{" ); |
751 | std::printf("}\n" ); |
752 | } |
753 | |
754 | /** |
755 | * Print the content of the bitmap into a string |
756 | */ |
757 | std::string toString() const { |
758 | struct iter_data { |
759 | std::string str; |
760 | uint32_t high_bits; |
761 | char first_char = '{'; |
762 | } outer_iter_data; |
763 | if (!isEmpty()) { |
764 | auto map_iter = roarings.cbegin(); |
765 | while (map_iter->second.isEmpty()) ++map_iter; |
766 | outer_iter_data.high_bits = roarings.begin()->first; |
767 | map_iter->second.iterate( |
768 | [](uint32_t low_bits, void *inner_iter_data) -> bool { |
769 | ((iter_data *)inner_iter_data)->str += |
770 | ((iter_data *)inner_iter_data)->first_char; |
771 | ((iter_data *)inner_iter_data)->str += std::to_string( |
772 | uniteBytes(((iter_data *)inner_iter_data)->high_bits, |
773 | low_bits)); |
774 | ((iter_data *)inner_iter_data)->first_char = ','; |
775 | return true; |
776 | }, |
777 | (void *)&outer_iter_data); |
778 | std::for_each( |
779 | ++map_iter, roarings.cend(), |
780 | [&outer_iter_data]( |
781 | const std::pair<uint32_t, Roaring> &map_entry) { |
782 | outer_iter_data.high_bits = map_entry.first; |
783 | map_entry.second.iterate( |
784 | [](uint32_t low_bits, void *inner_iter_data) -> bool { |
785 | ((iter_data *)inner_iter_data)->str += |
786 | ((iter_data *)inner_iter_data)->first_char; |
787 | ((iter_data *)inner_iter_data)->str += |
788 | std::to_string(uniteBytes( |
789 | ((iter_data *)inner_iter_data)->high_bits, |
790 | low_bits)); |
791 | return true; |
792 | }, |
793 | (void *)&outer_iter_data); |
794 | }); |
795 | } else |
796 | outer_iter_data.str = '{'; |
797 | outer_iter_data.str += '}'; |
798 | return outer_iter_data.str; |
799 | } |
800 | |
801 | /** |
802 | * Whether or not copy and write is active. |
803 | */ |
804 | bool getCopyOnWrite() const { return copyOnWrite; } |
805 | |
806 | /** |
807 | * computes the logical or (union) between "n" bitmaps (referenced by a |
808 | * pointer). |
809 | */ |
810 | static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { |
811 | Roaring64Map ans; |
812 | // not particularly fast |
813 | for (size_t lcv = 0; lcv < n; ++lcv) { |
814 | ans |= *(inputs[lcv]); |
815 | } |
816 | return ans; |
817 | } |
818 | |
819 | friend class Roaring64MapSetBitForwardIterator; |
820 | friend class Roaring64MapSetBitBiDirectionalIterator; |
821 | typedef Roaring64MapSetBitForwardIterator const_iterator; |
822 | typedef Roaring64MapSetBitBiDirectionalIterator const_bidirectional_iterator; |
823 | |
824 | /** |
825 | * Returns an iterator that can be used to access the position of the |
826 | * set bits. The running time complexity of a full scan is proportional to |
827 | * the |
828 | * number |
829 | * of set bits: be aware that if you have long strings of 1s, this can be |
830 | * very inefficient. |
831 | * |
832 | * It can be much faster to use the toArray method if you want to |
833 | * retrieve the set bits. |
834 | */ |
835 | const_iterator begin() const; |
836 | |
837 | /** |
838 | * A bogus iterator that can be used together with begin() |
839 | * for constructions such as for(auto i = b.begin(); |
840 | * i!=b.end(); ++i) {} |
841 | */ |
842 | const_iterator end() const; |
843 | |
844 | private: |
845 | std::map<uint32_t, Roaring> roarings; |
846 | bool copyOnWrite = false; |
847 | static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } |
848 | static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } |
849 | static uint64_t uniteBytes(const uint32_t highBytes, |
850 | const uint32_t lowBytes) { |
851 | return (uint64_t(highBytes) << 32) | uint64_t(lowBytes); |
852 | } |
853 | // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace |
854 | // prior to version 4.8 |
855 | void emplaceOrInsert(const uint32_t key, const Roaring &value) { |
856 | #if defined(__GLIBCXX__) && __GLIBCXX__ < 20130322 |
857 | roarings.insert(std::make_pair(key, value)); |
858 | #else |
859 | roarings.emplace(std::make_pair(key, value)); |
860 | #endif |
861 | } |
862 | }; |
863 | |
864 | /** |
865 | * Used to go through the set bits. Not optimally fast, but convenient. |
866 | */ |
867 | class Roaring64MapSetBitForwardIterator { |
868 | public: |
869 | typedef std::forward_iterator_tag iterator_category; |
870 | typedef uint64_t *pointer; |
871 | typedef uint64_t &reference_type; |
872 | typedef uint64_t value_type; |
873 | typedef int64_t difference_type; |
874 | typedef Roaring64MapSetBitForwardIterator type_of_iterator; |
875 | |
876 | /** |
877 | * Provides the location of the set bit. |
878 | */ |
879 | value_type operator*() const { |
880 | return Roaring64Map::uniteBytes(map_iter->first, i.current_value); |
881 | } |
882 | |
883 | bool operator<(const type_of_iterator &o) { |
884 | if (map_iter == map_end) return false; |
885 | if (o.map_iter == o.map_end) return true; |
886 | return **this < *o; |
887 | } |
888 | |
889 | bool operator<=(const type_of_iterator &o) { |
890 | if (o.map_iter == o.map_end) return true; |
891 | if (map_iter == map_end) return false; |
892 | return **this <= *o; |
893 | } |
894 | |
895 | bool operator>(const type_of_iterator &o) { |
896 | if (o.map_iter == o.map_end) return false; |
897 | if (map_iter == map_end) return true; |
898 | return **this > *o; |
899 | } |
900 | |
901 | bool operator>=(const type_of_iterator &o) { |
902 | if (map_iter == map_end) return true; |
903 | if (o.map_iter == o.map_end) return false; |
904 | return **this >= *o; |
905 | } |
906 | |
907 | type_of_iterator &operator++() { // ++i, must returned inc. value |
908 | if (i.has_value == true) roaring_advance_uint32_iterator(&i); |
909 | while (!i.has_value) { |
910 | map_iter++; |
911 | if (map_iter == map_end) return *this; |
912 | roaring_init_iterator(&map_iter->second.roaring, &i); |
913 | } |
914 | return *this; |
915 | } |
916 | |
917 | type_of_iterator operator++(int) { // i++, must return orig. value |
918 | Roaring64MapSetBitForwardIterator orig(*this); |
919 | roaring_advance_uint32_iterator(&i); |
920 | while (!i.has_value) { |
921 | map_iter++; |
922 | if (map_iter == map_end) return orig; |
923 | roaring_init_iterator(&map_iter->second.roaring, &i); |
924 | } |
925 | return orig; |
926 | } |
927 | |
928 | bool move(const value_type& x) { |
929 | map_iter = p.lower_bound(Roaring64Map::highBytes(x)); |
930 | if (map_iter != p.cend()) { |
931 | roaring_init_iterator(&map_iter->second.roaring, &i); |
932 | if (map_iter->first == Roaring64Map::highBytes(x)) { |
933 | if (roaring_move_uint32_iterator_equalorlarger(&i, Roaring64Map::lowBytes(x))) |
934 | return true; |
935 | map_iter++; |
936 | if (map_iter == map_end) return false; |
937 | roaring_init_iterator(&map_iter->second.roaring, &i); |
938 | } |
939 | return true; |
940 | } |
941 | return false; |
942 | } |
943 | |
944 | bool operator==(const Roaring64MapSetBitForwardIterator &o) { |
945 | if (map_iter == map_end && o.map_iter == o.map_end) return true; |
946 | if (o.map_iter == o.map_end) return false; |
947 | return **this == *o; |
948 | } |
949 | |
950 | bool operator!=(const Roaring64MapSetBitForwardIterator &o) { |
951 | if (map_iter == map_end && o.map_iter == o.map_end) return false; |
952 | if (o.map_iter == o.map_end) return true; |
953 | return **this != *o; |
954 | } |
955 | |
956 | Roaring64MapSetBitForwardIterator &operator=(const Roaring64MapSetBitForwardIterator& r) { |
957 | map_iter = r.map_iter; |
958 | map_end = r.map_end; |
959 | i = r.i; |
960 | return *this; |
961 | } |
962 | |
963 | Roaring64MapSetBitForwardIterator(const Roaring64Map &parent, |
964 | bool exhausted = false) |
965 | : p(parent.roarings), map_end(parent.roarings.cend()) { |
966 | if (exhausted || parent.roarings.empty()) { |
967 | map_iter = parent.roarings.cend(); |
968 | } else { |
969 | map_iter = parent.roarings.cbegin(); |
970 | roaring_init_iterator(&map_iter->second.roaring, &i); |
971 | while (!i.has_value) { |
972 | map_iter++; |
973 | if (map_iter == map_end) return; |
974 | roaring_init_iterator(&map_iter->second.roaring, &i); |
975 | } |
976 | } |
977 | } |
978 | |
979 | protected: |
980 | const std::map<uint32_t, Roaring>& p; |
981 | std::map<uint32_t, Roaring>::const_iterator map_iter; |
982 | std::map<uint32_t, Roaring>::const_iterator map_end; |
983 | roaring_uint32_iterator_t i; |
984 | }; |
985 | |
986 | class Roaring64MapSetBitBiDirectionalIterator final :public Roaring64MapSetBitForwardIterator { |
987 | public: |
988 | Roaring64MapSetBitBiDirectionalIterator(const Roaring64Map &parent, |
989 | bool exhausted = false) |
990 | : Roaring64MapSetBitForwardIterator(parent, exhausted), map_begin(parent.roarings.cbegin()) {} |
991 | |
992 | Roaring64MapSetBitBiDirectionalIterator &operator=(const Roaring64MapSetBitForwardIterator& r) { |
993 | *(Roaring64MapSetBitForwardIterator*)this = r; |
994 | return *this; |
995 | } |
996 | |
997 | type_of_iterator& operator--() { // --i, must return dec.value |
998 | if (map_iter == map_end) { |
999 | --map_iter; |
1000 | roaring_init_iterator_last(&map_iter->second.roaring, &i); |
1001 | if (i.has_value) return *this; |
1002 | } |
1003 | |
1004 | roaring_previous_uint32_iterator(&i); |
1005 | while (!i.has_value) { |
1006 | if (map_iter == map_begin) return *this; |
1007 | map_iter--; |
1008 | roaring_init_iterator_last(&map_iter->second.roaring, &i); |
1009 | } |
1010 | return *this; |
1011 | } |
1012 | |
1013 | type_of_iterator operator--(int) { // i--, must return orig. value |
1014 | Roaring64MapSetBitBiDirectionalIterator orig(*this); |
1015 | if (map_iter == map_end) { |
1016 | --map_iter; |
1017 | roaring_init_iterator_last(&map_iter->second.roaring, &i); |
1018 | return orig; |
1019 | } |
1020 | |
1021 | roaring_previous_uint32_iterator(&i); |
1022 | while (!i.has_value) { |
1023 | if (map_iter == map_begin) return orig; |
1024 | map_iter--; |
1025 | roaring_init_iterator_last(&map_iter->second.roaring, &i); |
1026 | } |
1027 | return orig; |
1028 | } |
1029 | |
1030 | protected: |
1031 | std::map<uint32_t, Roaring>::const_iterator map_begin; |
1032 | }; |
1033 | |
1034 | inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const { |
1035 | return Roaring64MapSetBitForwardIterator(*this); |
1036 | } |
1037 | |
1038 | inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const { |
1039 | return Roaring64MapSetBitForwardIterator(*this, true); |
1040 | } |
1041 | |
1042 | #endif /* INCLUDE_ROARING_64_MAP_HH_ */ |
1043 | |