1#include <assert.h>
2#include <roaring/array_util.h>
3#include <roaring/roaring.h>
4#include <roaring/roaring_array.h>
5#include <roaring/bitset_util.h>
6#include <stdarg.h>
7#include <stdint.h>
8#include <stdio.h>
9#include <string.h>
10#include <inttypes.h>
11
12extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r,
13 uint32_t val);
14extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
15extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
16
17static inline bool is_cow(const roaring_bitmap_t *r) {
18 return r->high_low_container.flags & ROARING_FLAG_COW;
19}
20static inline bool is_frozen(const roaring_bitmap_t *r) {
21 return r->high_low_container.flags & ROARING_FLAG_FROZEN;
22}
23
24// this is like roaring_bitmap_add, but it populates pointer arguments in such a
25// way
26// that we can recover the container touched, which, in turn can be used to
27// accelerate some functions (when you repeatedly need to add to the same
28// container)
29static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r,
30 uint32_t val,
31 uint8_t *typecode,
32 int *index) {
33 uint16_t hb = val >> 16;
34 const int i = ra_get_index(&r->high_low_container, hb);
35 if (i >= 0) {
36 ra_unshare_container_at_index(&r->high_low_container, i);
37 void *container =
38 ra_get_container_at_index(&r->high_low_container, i, typecode);
39 uint8_t newtypecode = *typecode;
40 void *container2 =
41 container_add(container, val & 0xFFFF, *typecode, &newtypecode);
42 *index = i;
43 if (container2 != container) {
44 container_free(container, *typecode);
45 ra_set_container_at_index(&r->high_low_container, i, container2,
46 newtypecode);
47 *typecode = newtypecode;
48 return container2;
49 } else {
50 return container;
51 }
52 } else {
53 array_container_t *newac = array_container_create();
54 void *container = container_add(newac, val & 0xFFFF,
55 ARRAY_CONTAINER_TYPE_CODE, typecode);
56 // we could just assume that it stays an array container
57 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
58 container, *typecode);
59 *index = -i - 1;
60 return container;
61 }
62}
63
64roaring_bitmap_t *roaring_bitmap_create() {
65 roaring_bitmap_t *ans =
66 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
67 if (!ans) {
68 return NULL;
69 }
70 ra_init(&ans->high_low_container);
71 return ans;
72}
73
74roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
75 roaring_bitmap_t *ans =
76 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
77 if (!ans) {
78 return NULL;
79 }
80 bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
81 if (!is_ok) {
82 free(ans);
83 return NULL;
84 }
85 return ans;
86}
87
88void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
89 const uint32_t *vals) {
90 void *container = NULL; // hold value of last container touched
91 uint8_t typecode = 0; // typecode of last container touched
92 uint32_t prev = 0; // previous valued inserted
93 size_t i = 0; // index of value
94 int containerindex = 0;
95 if (n_args == 0) return;
96 uint32_t val;
97 memcpy(&val, vals + i, sizeof(val));
98 container =
99 containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
100 prev = val;
101 i++;
102 for (; i < n_args; i++) {
103 memcpy(&val, vals + i, sizeof(val));
104 if (((prev ^ val) >> 16) ==
105 0) { // no need to seek the container, it is at hand
106 // because we already have the container at hand, we can do the
107 // insertion
108 // automatically, bypassing the roaring_bitmap_add call
109 uint8_t newtypecode = typecode;
110 void *container2 =
111 container_add(container, val & 0xFFFF, typecode, &newtypecode);
112 if (container2 != container) { // rare instance when we need to
113 // change the container type
114 container_free(container, typecode);
115 ra_set_container_at_index(&r->high_low_container,
116 containerindex, container2,
117 newtypecode);
118 typecode = newtypecode;
119 container = container2;
120 }
121 } else {
122 container = containerptr_roaring_bitmap_add(r, val, &typecode,
123 &containerindex);
124 }
125 prev = val;
126 }
127}
128
129roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
130 roaring_bitmap_t *answer = roaring_bitmap_create();
131 roaring_bitmap_add_many(answer, n_args, vals);
132 return answer;
133}
134
135roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
136 // todo: could be greatly optimized but we do not expect this call to ever
137 // include long lists
138 roaring_bitmap_t *answer = roaring_bitmap_create();
139 va_list ap;
140 va_start(ap, n_args);
141 for (size_t i = 1; i <= n_args; i++) {
142 uint32_t val = va_arg(ap, uint32_t);
143 roaring_bitmap_add(answer, val);
144 }
145 va_end(ap);
146 return answer;
147}
148
149static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
150 return (a < b) ? a : b;
151}
152
153static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
154 return (a < b) ? a : b;
155}
156
157roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
158 uint32_t step) {
159 if(max >= UINT64_C(0x100000000)) {
160 max = UINT64_C(0x100000000);
161 }
162 if (step == 0) return NULL;
163 if (max <= min) return NULL;
164 roaring_bitmap_t *answer = roaring_bitmap_create();
165 if (step >= (1 << 16)) {
166 for (uint32_t value = (uint32_t)min; value < max; value += step) {
167 roaring_bitmap_add(answer, value);
168 }
169 return answer;
170 }
171 uint64_t min_tmp = min;
172 do {
173 uint32_t key = (uint32_t)min_tmp >> 16;
174 uint32_t container_min = min_tmp & 0xFFFF;
175 uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
176 uint8_t type;
177 void *container = container_from_range(&type, container_min,
178 container_max, (uint16_t)step);
179 ra_append(&answer->high_low_container, key, container, type);
180 uint32_t gap = container_max - container_min + step - 1;
181 min_tmp += gap - (gap % step);
182 } while (min_tmp < max);
183 // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
184 return answer;
185}
186
187void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
188 if (min > max) {
189 return;
190 }
191
192 uint32_t min_key = min >> 16;
193 uint32_t max_key = max >> 16;
194
195 int32_t num_required_containers = max_key - min_key + 1;
196 int32_t suffix_length = count_greater(ra->high_low_container.keys,
197 ra->high_low_container.size,
198 max_key);
199 int32_t prefix_length = count_less(ra->high_low_container.keys,
200 ra->high_low_container.size - suffix_length,
201 min_key);
202 int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length;
203
204 if (num_required_containers > common_length) {
205 ra_shift_tail(&ra->high_low_container, suffix_length,
206 num_required_containers - common_length);
207 }
208
209 int32_t src = prefix_length + common_length - 1;
210 int32_t dst = ra->high_low_container.size - suffix_length - 1;
211 for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0
212 uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
213 uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
214 void* new_container;
215 uint8_t new_type;
216
217 if (src >= 0 && ra->high_low_container.keys[src] == key) {
218 ra_unshare_container_at_index(&ra->high_low_container, src);
219 new_container = container_add_range(ra->high_low_container.containers[src],
220 ra->high_low_container.typecodes[src],
221 container_min, container_max, &new_type);
222 if (new_container != ra->high_low_container.containers[src]) {
223 container_free(ra->high_low_container.containers[src],
224 ra->high_low_container.typecodes[src]);
225 }
226 src--;
227 } else {
228 new_container = container_from_range(&new_type, container_min,
229 container_max+1, 1);
230 }
231 ra_replace_key_and_container_at_index(&ra->high_low_container, dst,
232 key, new_container, new_type);
233 dst--;
234 }
235}
236
237void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
238 if (min > max) {
239 return;
240 }
241
242 uint32_t min_key = min >> 16;
243 uint32_t max_key = max >> 16;
244
245 int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key);
246 int32_t dst = src;
247 while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) {
248 uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0;
249 uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff;
250 ra_unshare_container_at_index(&ra->high_low_container, src);
251 void *new_container;
252 uint8_t new_type;
253 new_container = container_remove_range(ra->high_low_container.containers[src],
254 ra->high_low_container.typecodes[src],
255 container_min, container_max,
256 &new_type);
257 if (new_container != ra->high_low_container.containers[src]) {
258 container_free(ra->high_low_container.containers[src],
259 ra->high_low_container.typecodes[src]);
260 }
261 if (new_container) {
262 ra_replace_key_and_container_at_index(&ra->high_low_container, dst,
263 ra->high_low_container.keys[src],
264 new_container, new_type);
265 dst++;
266 }
267 src++;
268 }
269 if (src > dst) {
270 ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src);
271 }
272}
273
274extern inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max);
275extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max);
276
277void roaring_bitmap_printf(const roaring_bitmap_t *ra) {
278 printf("{");
279 for (int i = 0; i < ra->high_low_container.size; ++i) {
280 container_printf_as_uint32_array(
281 ra->high_low_container.containers[i],
282 ra->high_low_container.typecodes[i],
283 ((uint32_t)ra->high_low_container.keys[i]) << 16);
284 if (i + 1 < ra->high_low_container.size) printf(",");
285 }
286 printf("}");
287}
288
289void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) {
290 printf("{");
291 for (int i = 0; i < ra->high_low_container.size; ++i) {
292 printf("%d: %s (%d)", ra->high_low_container.keys[i],
293 get_full_container_name(ra->high_low_container.containers[i],
294 ra->high_low_container.typecodes[i]),
295 container_get_cardinality(ra->high_low_container.containers[i],
296 ra->high_low_container.typecodes[i]));
297 if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) {
298 printf(
299 "(shared count = %" PRIu32 " )",
300 ((shared_container_t *)(ra->high_low_container.containers[i]))
301 ->counter);
302 }
303
304 if (i + 1 < ra->high_low_container.size) printf(", ");
305 }
306 printf("}");
307}
308
309typedef struct min_max_sum_s {
310 uint32_t min;
311 uint32_t max;
312 uint64_t sum;
313} min_max_sum_t;
314
315static bool min_max_sum_fnc(uint32_t value, void *param) {
316 min_max_sum_t *mms = (min_max_sum_t *)param;
317 if (value > mms->max) mms->max = value;
318 if (value < mms->min) mms->min = value;
319 mms->sum += value;
320 return true; // we always process all data points
321}
322
323/**
324* (For advanced users.)
325* Collect statistics about the bitmap
326*/
327void roaring_bitmap_statistics(const roaring_bitmap_t *ra,
328 roaring_statistics_t *stat) {
329 memset(stat, 0, sizeof(*stat));
330 stat->n_containers = ra->high_low_container.size;
331 stat->cardinality = roaring_bitmap_get_cardinality(ra);
332 min_max_sum_t mms;
333 mms.min = UINT32_C(0xFFFFFFFF);
334 mms.max = UINT32_C(0);
335 mms.sum = 0;
336 roaring_iterate(ra, &min_max_sum_fnc, &mms);
337 stat->min_value = mms.min;
338 stat->max_value = mms.max;
339 stat->sum_value = mms.sum;
340
341 for (int i = 0; i < ra->high_low_container.size; ++i) {
342 uint8_t truetype =
343 get_container_type(ra->high_low_container.containers[i],
344 ra->high_low_container.typecodes[i]);
345 uint32_t card =
346 container_get_cardinality(ra->high_low_container.containers[i],
347 ra->high_low_container.typecodes[i]);
348 uint32_t sbytes =
349 container_size_in_bytes(ra->high_low_container.containers[i],
350 ra->high_low_container.typecodes[i]);
351 switch (truetype) {
352 case BITSET_CONTAINER_TYPE_CODE:
353 stat->n_bitset_containers++;
354 stat->n_values_bitset_containers += card;
355 stat->n_bytes_bitset_containers += sbytes;
356 break;
357 case ARRAY_CONTAINER_TYPE_CODE:
358 stat->n_array_containers++;
359 stat->n_values_array_containers += card;
360 stat->n_bytes_array_containers += sbytes;
361 break;
362 case RUN_CONTAINER_TYPE_CODE:
363 stat->n_run_containers++;
364 stat->n_values_run_containers += card;
365 stat->n_bytes_run_containers += sbytes;
366 break;
367 default:
368 assert(false);
369 __builtin_unreachable();
370 }
371 }
372}
373
374roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
375 roaring_bitmap_t *ans =
376 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
377 if (!ans) {
378 return NULL;
379 }
380 bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container,
381 is_cow(r));
382 if (!is_ok) {
383 free(ans);
384 return NULL;
385 }
386 roaring_bitmap_set_copy_on_write(ans, is_cow(r));
387 return ans;
388}
389
390bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
391 const roaring_bitmap_t *src) {
392 return ra_overwrite(&src->high_low_container, &dest->high_low_container,
393 is_cow(src));
394}
395
396void roaring_bitmap_free(const roaring_bitmap_t *r) {
397 if (!is_frozen(r)) {
398 ra_clear((roaring_array_t*)&r->high_low_container);
399 }
400 free((roaring_bitmap_t*)r);
401}
402
403void roaring_bitmap_clear(roaring_bitmap_t *r) {
404 ra_reset(&r->high_low_container);
405}
406
407void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
408 const uint16_t hb = val >> 16;
409 const int i = ra_get_index(&r->high_low_container, hb);
410 uint8_t typecode;
411 if (i >= 0) {
412 ra_unshare_container_at_index(&r->high_low_container, i);
413 void *container =
414 ra_get_container_at_index(&r->high_low_container, i, &typecode);
415 uint8_t newtypecode = typecode;
416 void *container2 =
417 container_add(container, val & 0xFFFF, typecode, &newtypecode);
418 if (container2 != container) {
419 container_free(container, typecode);
420 ra_set_container_at_index(&r->high_low_container, i, container2,
421 newtypecode);
422 }
423 } else {
424 array_container_t *newac = array_container_create();
425 void *container = container_add(newac, val & 0xFFFF,
426 ARRAY_CONTAINER_TYPE_CODE, &typecode);
427 // we could just assume that it stays an array container
428 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
429 container, typecode);
430 }
431}
432
433bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
434 const uint16_t hb = val >> 16;
435 const int i = ra_get_index(&r->high_low_container, hb);
436 uint8_t typecode;
437 bool result = false;
438 if (i >= 0) {
439 ra_unshare_container_at_index(&r->high_low_container, i);
440 void *container =
441 ra_get_container_at_index(&r->high_low_container, i, &typecode);
442
443 const int oldCardinality =
444 container_get_cardinality(container, typecode);
445
446 uint8_t newtypecode = typecode;
447 void *container2 =
448 container_add(container, val & 0xFFFF, typecode, &newtypecode);
449 if (container2 != container) {
450 container_free(container, typecode);
451 ra_set_container_at_index(&r->high_low_container, i, container2,
452 newtypecode);
453 result = true;
454 } else {
455 const int newCardinality =
456 container_get_cardinality(container, newtypecode);
457
458 result = oldCardinality != newCardinality;
459 }
460 } else {
461 array_container_t *newac = array_container_create();
462 void *container = container_add(newac, val & 0xFFFF,
463 ARRAY_CONTAINER_TYPE_CODE, &typecode);
464 // we could just assume that it stays an array container
465 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
466 container, typecode);
467 result = true;
468 }
469
470 return result;
471}
472
473void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
474 const uint16_t hb = val >> 16;
475 const int i = ra_get_index(&r->high_low_container, hb);
476 uint8_t typecode;
477 if (i >= 0) {
478 ra_unshare_container_at_index(&r->high_low_container, i);
479 void *container =
480 ra_get_container_at_index(&r->high_low_container, i, &typecode);
481 uint8_t newtypecode = typecode;
482 void *container2 =
483 container_remove(container, val & 0xFFFF, typecode, &newtypecode);
484 if (container2 != container) {
485 container_free(container, typecode);
486 ra_set_container_at_index(&r->high_low_container, i, container2,
487 newtypecode);
488 }
489 if (container_get_cardinality(container2, newtypecode) != 0) {
490 ra_set_container_at_index(&r->high_low_container, i, container2,
491 newtypecode);
492 } else {
493 ra_remove_at_index_and_free(&r->high_low_container, i);
494 }
495 }
496}
497
498bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
499 const uint16_t hb = val >> 16;
500 const int i = ra_get_index(&r->high_low_container, hb);
501 uint8_t typecode;
502 bool result = false;
503 if (i >= 0) {
504 ra_unshare_container_at_index(&r->high_low_container, i);
505 void *container =
506 ra_get_container_at_index(&r->high_low_container, i, &typecode);
507
508 const int oldCardinality =
509 container_get_cardinality(container, typecode);
510
511 uint8_t newtypecode = typecode;
512 void *container2 =
513 container_remove(container, val & 0xFFFF, typecode, &newtypecode);
514 if (container2 != container) {
515 container_free(container, typecode);
516 ra_set_container_at_index(&r->high_low_container, i, container2,
517 newtypecode);
518 }
519
520 const int newCardinality =
521 container_get_cardinality(container2, newtypecode);
522
523 if (newCardinality != 0) {
524 ra_set_container_at_index(&r->high_low_container, i, container2,
525 newtypecode);
526 } else {
527 ra_remove_at_index_and_free(&r->high_low_container, i);
528 }
529
530 result = oldCardinality != newCardinality;
531 }
532 return result;
533}
534
535void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
536 const uint32_t *vals) {
537 if (n_args == 0 || r->high_low_container.size == 0) {
538 return;
539 }
540 int32_t pos = -1; // position of the container used in the previous iteration
541 for (size_t i = 0; i < n_args; i++) {
542 uint16_t key = (uint16_t)(vals[i] >> 16);
543 if (pos < 0 || key != r->high_low_container.keys[pos]) {
544 pos = ra_get_index(&r->high_low_container, key);
545 }
546 if (pos >= 0) {
547 uint8_t new_typecode;
548 void *new_container;
549 new_container = container_remove(r->high_low_container.containers[pos],
550 vals[i] & 0xffff,
551 r->high_low_container.typecodes[pos],
552 &new_typecode);
553 if (new_container != r->high_low_container.containers[pos]) {
554 container_free(r->high_low_container.containers[pos],
555 r->high_low_container.typecodes[pos]);
556 ra_replace_key_and_container_at_index(&r->high_low_container,
557 pos, key, new_container,
558 new_typecode);
559 }
560 if (!container_nonzero_cardinality(new_container, new_typecode)) {
561 container_free(new_container, new_typecode);
562 ra_remove_at_index(&r->high_low_container, pos);
563 pos = -1;
564 }
565 }
566 }
567}
568
569// there should be some SIMD optimizations possible here
570roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
571 const roaring_bitmap_t *x2) {
572 uint8_t container_result_type = 0;
573 const int length1 = x1->high_low_container.size,
574 length2 = x2->high_low_container.size;
575 uint32_t neededcap = length1 > length2 ? length2 : length1;
576 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
577 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
578
579 int pos1 = 0, pos2 = 0;
580
581 while (pos1 < length1 && pos2 < length2) {
582 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
583 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
584
585 if (s1 == s2) {
586 uint8_t container_type_1, container_type_2;
587 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
588 &container_type_1);
589 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
590 &container_type_2);
591 void *c = container_and(c1, container_type_1, c2, container_type_2,
592 &container_result_type);
593 if (container_nonzero_cardinality(c, container_result_type)) {
594 ra_append(&answer->high_low_container, s1, c,
595 container_result_type);
596 } else {
597 container_free(
598 c, container_result_type); // otherwise:memory leak!
599 }
600 ++pos1;
601 ++pos2;
602 } else if (s1 < s2) { // s1 < s2
603 pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
604 } else { // s1 > s2
605 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
606 }
607 }
608 return answer;
609}
610
611/**
612 * Compute the union of 'number' bitmaps.
613 */
614roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
615 const roaring_bitmap_t **x) {
616 if (number == 0) {
617 return roaring_bitmap_create();
618 }
619 if (number == 1) {
620 return roaring_bitmap_copy(x[0]);
621 }
622 roaring_bitmap_t *answer =
623 roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
624 for (size_t i = 2; i < number; i++) {
625 roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
626 }
627 roaring_bitmap_repair_after_lazy(answer);
628 return answer;
629}
630
631/**
632 * Compute the xor of 'number' bitmaps.
633 */
634roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
635 const roaring_bitmap_t **x) {
636 if (number == 0) {
637 return roaring_bitmap_create();
638 }
639 if (number == 1) {
640 return roaring_bitmap_copy(x[0]);
641 }
642 roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
643 for (size_t i = 2; i < number; i++) {
644 roaring_bitmap_lazy_xor_inplace(answer, x[i]);
645 }
646 roaring_bitmap_repair_after_lazy(answer);
647 return answer;
648}
649
650// inplace and (modifies its first argument).
651void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
652 const roaring_bitmap_t *x2) {
653 if (x1 == x2) return;
654 int pos1 = 0, pos2 = 0, intersection_size = 0;
655 const int length1 = ra_get_size(&x1->high_low_container);
656 const int length2 = ra_get_size(&x2->high_low_container);
657
658 // any skipped-over or newly emptied containers in x1
659 // have to be freed.
660 while (pos1 < length1 && pos2 < length2) {
661 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
662 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
663
664 if (s1 == s2) {
665 uint8_t typecode1, typecode2, typecode_result;
666 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
667 &typecode1);
668 c1 = get_writable_copy_if_shared(c1, &typecode1);
669 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
670 &typecode2);
671 void *c =
672 container_iand(c1, typecode1, c2, typecode2, &typecode_result);
673 if (c != c1) { // in this instance a new container was created, and
674 // we need to free the old one
675 container_free(c1, typecode1);
676 }
677 if (container_nonzero_cardinality(c, typecode_result)) {
678 ra_replace_key_and_container_at_index(&x1->high_low_container,
679 intersection_size, s1, c,
680 typecode_result);
681 intersection_size++;
682 } else {
683 container_free(c, typecode_result);
684 }
685 ++pos1;
686 ++pos2;
687 } else if (s1 < s2) {
688 pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
689 } else { // s1 > s2
690 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
691 }
692 }
693
694 // if we ended early because x2 ran out, then all remaining in x1 should be
695 // freed
696 while (pos1 < length1) {
697 container_free(x1->high_low_container.containers[pos1],
698 x1->high_low_container.typecodes[pos1]);
699 ++pos1;
700 }
701
702 // all containers after this have either been copied or freed
703 ra_downsize(&x1->high_low_container, intersection_size);
704}
705
706roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
707 const roaring_bitmap_t *x2) {
708 uint8_t container_result_type = 0;
709 const int length1 = x1->high_low_container.size,
710 length2 = x2->high_low_container.size;
711 if (0 == length1) {
712 return roaring_bitmap_copy(x2);
713 }
714 if (0 == length2) {
715 return roaring_bitmap_copy(x1);
716 }
717 roaring_bitmap_t *answer =
718 roaring_bitmap_create_with_capacity(length1 + length2);
719 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
720 int pos1 = 0, pos2 = 0;
721 uint8_t container_type_1, container_type_2;
722 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
723 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
724 while (true) {
725 if (s1 == s2) {
726 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
727 &container_type_1);
728 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
729 &container_type_2);
730 void *c = container_or(c1, container_type_1, c2, container_type_2,
731 &container_result_type);
732 // since we assume that the initial containers are non-empty, the
733 // result here
734 // can only be non-empty
735 ra_append(&answer->high_low_container, s1, c,
736 container_result_type);
737 ++pos1;
738 ++pos2;
739 if (pos1 == length1) break;
740 if (pos2 == length2) break;
741 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
742 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
743
744 } else if (s1 < s2) { // s1 < s2
745 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
746 &container_type_1);
747 // c1 = container_clone(c1, container_type_1);
748 c1 =
749 get_copy_of_container(c1, &container_type_1, is_cow(x1));
750 if (is_cow(x1)) {
751 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
752 container_type_1);
753 }
754 ra_append(&answer->high_low_container, s1, c1, container_type_1);
755 pos1++;
756 if (pos1 == length1) break;
757 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
758
759 } else { // s1 > s2
760 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
761 &container_type_2);
762 // c2 = container_clone(c2, container_type_2);
763 c2 =
764 get_copy_of_container(c2, &container_type_2, is_cow(x2));
765 if (is_cow(x2)) {
766 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
767 container_type_2);
768 }
769 ra_append(&answer->high_low_container, s2, c2, container_type_2);
770 pos2++;
771 if (pos2 == length2) break;
772 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
773 }
774 }
775 if (pos1 == length1) {
776 ra_append_copy_range(&answer->high_low_container,
777 &x2->high_low_container, pos2, length2,
778 is_cow(x2));
779 } else if (pos2 == length2) {
780 ra_append_copy_range(&answer->high_low_container,
781 &x1->high_low_container, pos1, length1,
782 is_cow(x1));
783 }
784 return answer;
785}
786
787// inplace or (modifies its first argument).
788void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
789 const roaring_bitmap_t *x2) {
790 uint8_t container_result_type = 0;
791 int length1 = x1->high_low_container.size;
792 const int length2 = x2->high_low_container.size;
793
794 if (0 == length2) return;
795
796 if (0 == length1) {
797 roaring_bitmap_overwrite(x1, x2);
798 return;
799 }
800 int pos1 = 0, pos2 = 0;
801 uint8_t container_type_1, container_type_2;
802 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
803 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
804 while (true) {
805 if (s1 == s2) {
806 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
807 &container_type_1);
808 if (!container_is_full(c1, container_type_1)) {
809 c1 = get_writable_copy_if_shared(c1, &container_type_1);
810
811 void *c2 = ra_get_container_at_index(&x2->high_low_container,
812 pos2, &container_type_2);
813 void *c =
814 container_ior(c1, container_type_1, c2, container_type_2,
815 &container_result_type);
816 if (c !=
817 c1) { // in this instance a new container was created, and
818 // we need to free the old one
819 container_free(c1, container_type_1);
820 }
821
822 ra_set_container_at_index(&x1->high_low_container, pos1, c,
823 container_result_type);
824 }
825 ++pos1;
826 ++pos2;
827 if (pos1 == length1) break;
828 if (pos2 == length2) break;
829 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
830 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
831
832 } else if (s1 < s2) { // s1 < s2
833 pos1++;
834 if (pos1 == length1) break;
835 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
836
837 } else { // s1 > s2
838 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
839 &container_type_2);
840 c2 =
841 get_copy_of_container(c2, &container_type_2, is_cow(x2));
842 if (is_cow(x2)) {
843 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
844 container_type_2);
845 }
846
847 // void *c2_clone = container_clone(c2, container_type_2);
848 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
849 container_type_2);
850 pos1++;
851 length1++;
852 pos2++;
853 if (pos2 == length2) break;
854 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
855 }
856 }
857 if (pos1 == length1) {
858 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
859 pos2, length2, is_cow(x2));
860 }
861}
862
863roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
864 const roaring_bitmap_t *x2) {
865 uint8_t container_result_type = 0;
866 const int length1 = x1->high_low_container.size,
867 length2 = x2->high_low_container.size;
868 if (0 == length1) {
869 return roaring_bitmap_copy(x2);
870 }
871 if (0 == length2) {
872 return roaring_bitmap_copy(x1);
873 }
874 roaring_bitmap_t *answer =
875 roaring_bitmap_create_with_capacity(length1 + length2);
876 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
877 int pos1 = 0, pos2 = 0;
878 uint8_t container_type_1, container_type_2;
879 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
880 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
881 while (true) {
882 if (s1 == s2) {
883 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
884 &container_type_1);
885 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
886 &container_type_2);
887 void *c = container_xor(c1, container_type_1, c2, container_type_2,
888 &container_result_type);
889
890 if (container_nonzero_cardinality(c, container_result_type)) {
891 ra_append(&answer->high_low_container, s1, c,
892 container_result_type);
893 } else {
894 container_free(c, container_result_type);
895 }
896 ++pos1;
897 ++pos2;
898 if (pos1 == length1) break;
899 if (pos2 == length2) break;
900 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
901 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
902
903 } else if (s1 < s2) { // s1 < s2
904 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
905 &container_type_1);
906 c1 =
907 get_copy_of_container(c1, &container_type_1, is_cow(x1));
908 if (is_cow(x1)) {
909 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
910 container_type_1);
911 }
912 ra_append(&answer->high_low_container, s1, c1, container_type_1);
913 pos1++;
914 if (pos1 == length1) break;
915 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
916
917 } else { // s1 > s2
918 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
919 &container_type_2);
920 c2 =
921 get_copy_of_container(c2, &container_type_2, is_cow(x2));
922 if (is_cow(x2)) {
923 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
924 container_type_2);
925 }
926 ra_append(&answer->high_low_container, s2, c2, container_type_2);
927 pos2++;
928 if (pos2 == length2) break;
929 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
930 }
931 }
932 if (pos1 == length1) {
933 ra_append_copy_range(&answer->high_low_container,
934 &x2->high_low_container, pos2, length2,
935 is_cow(x2));
936 } else if (pos2 == length2) {
937 ra_append_copy_range(&answer->high_low_container,
938 &x1->high_low_container, pos1, length1,
939 is_cow(x1));
940 }
941 return answer;
942}
943
944// inplace xor (modifies its first argument).
945
946void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
947 const roaring_bitmap_t *x2) {
948 assert(x1 != x2);
949 uint8_t container_result_type = 0;
950 int length1 = x1->high_low_container.size;
951 const int length2 = x2->high_low_container.size;
952
953 if (0 == length2) return;
954
955 if (0 == length1) {
956 roaring_bitmap_overwrite(x1, x2);
957 return;
958 }
959
960 // XOR can have new containers inserted from x2, but can also
961 // lose containers when x1 and x2 are nonempty and identical.
962
963 int pos1 = 0, pos2 = 0;
964 uint8_t container_type_1, container_type_2;
965 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
966 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
967 while (true) {
968 if (s1 == s2) {
969 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
970 &container_type_1);
971 c1 = get_writable_copy_if_shared(c1, &container_type_1);
972
973 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
974 &container_type_2);
975 void *c = container_ixor(c1, container_type_1, c2, container_type_2,
976 &container_result_type);
977
978 if (container_nonzero_cardinality(c, container_result_type)) {
979 ra_set_container_at_index(&x1->high_low_container, pos1, c,
980 container_result_type);
981 ++pos1;
982 } else {
983 container_free(c, container_result_type);
984 ra_remove_at_index(&x1->high_low_container, pos1);
985 --length1;
986 }
987
988 ++pos2;
989 if (pos1 == length1) break;
990 if (pos2 == length2) break;
991 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
992 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
993
994 } else if (s1 < s2) { // s1 < s2
995 pos1++;
996 if (pos1 == length1) break;
997 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
998
999 } else { // s1 > s2
1000 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
1001 &container_type_2);
1002 c2 =
1003 get_copy_of_container(c2, &container_type_2, is_cow(x2));
1004 if (is_cow(x2)) {
1005 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
1006 container_type_2);
1007 }
1008
1009 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
1010 container_type_2);
1011 pos1++;
1012 length1++;
1013 pos2++;
1014 if (pos2 == length2) break;
1015 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
1016 }
1017 }
1018 if (pos1 == length1) {
1019 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
1020 pos2, length2, is_cow(x2));
1021 }
1022}
1023
1024roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
1025 const roaring_bitmap_t *x2) {
1026 uint8_t container_result_type = 0;
1027 const int length1 = x1->high_low_container.size,
1028 length2 = x2->high_low_container.size;
1029 if (0 == length1) {
1030 roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
1031 roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) && is_cow(x2));
1032 return empty_bitmap;
1033 }
1034 if (0 == length2) {
1035 return roaring_bitmap_copy(x1);
1036 }
1037 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
1038 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
1039
1040 int pos1 = 0, pos2 = 0;
1041 uint8_t container_type_1, container_type_2;
1042 uint16_t s1 = 0;
1043 uint16_t s2 = 0;
1044 while (true) {
1045 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
1046 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
1047
1048 if (s1 == s2) {
1049 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
1050 &container_type_1);
1051 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
1052 &container_type_2);
1053 void *c =
1054 container_andnot(c1, container_type_1, c2, container_type_2,
1055 &container_result_type);
1056
1057 if (container_nonzero_cardinality(c, container_result_type)) {
1058 ra_append(&answer->high_low_container, s1, c,
1059 container_result_type);
1060 } else {
1061 container_free(c, container_result_type);
1062 }
1063 ++pos1;
1064 ++pos2;
1065 if (pos1 == length1) break;
1066 if (pos2 == length2) break;
1067 } else if (s1 < s2) { // s1 < s2
1068 const int next_pos1 =
1069 ra_advance_until(&x1->high_low_container, s2, pos1);
1070 ra_append_copy_range(&answer->high_low_container,
1071 &x1->high_low_container, pos1, next_pos1,
1072 is_cow(x1));
1073 // TODO : perhaps some of the copy_on_write should be based on
1074 // answer rather than x1 (more stringent?). Many similar cases
1075 pos1 = next_pos1;
1076 if (pos1 == length1) break;
1077 } else { // s1 > s2
1078 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
1079 if (pos2 == length2) break;
1080 }
1081 }
1082 if (pos2 == length2) {
1083 ra_append_copy_range(&answer->high_low_container,
1084 &x1->high_low_container, pos1, length1,
1085 is_cow(x1));
1086 }
1087 return answer;
1088}
1089
1090// inplace andnot (modifies its first argument).
1091
1092void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
1093 const roaring_bitmap_t *x2) {
1094 assert(x1 != x2);
1095
1096 uint8_t container_result_type = 0;
1097 int length1 = x1->high_low_container.size;
1098 const int length2 = x2->high_low_container.size;
1099 int intersection_size = 0;
1100
1101 if (0 == length2) return;
1102
1103 if (0 == length1) {
1104 roaring_bitmap_clear(x1);
1105 return;
1106 }
1107
1108 int pos1 = 0, pos2 = 0;
1109 uint8_t container_type_1, container_type_2;
1110 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
1111 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
1112 while (true) {
1113 if (s1 == s2) {
1114 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
1115 &container_type_1);
1116 c1 = get_writable_copy_if_shared(c1, &container_type_1);
1117
1118 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
1119 &container_type_2);
1120 void *c =
1121 container_iandnot(c1, container_type_1, c2, container_type_2,
1122 &container_result_type);
1123
1124 if (container_nonzero_cardinality(c, container_result_type)) {
1125 ra_replace_key_and_container_at_index(&x1->high_low_container,
1126 intersection_size++, s1,
1127 c, container_result_type);
1128 } else {
1129 container_free(c, container_result_type);
1130 }
1131
1132 ++pos1;
1133 ++pos2;
1134 if (pos1 == length1) break;
1135 if (pos2 == length2) break;
1136 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
1137 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
1138
1139 } else if (s1 < s2) { // s1 < s2
1140 if (pos1 != intersection_size) {
1141 void *c1 = ra_get_container_at_index(&x1->high_low_container,
1142 pos1, &container_type_1);
1143
1144 ra_replace_key_and_container_at_index(&x1->high_low_container,
1145 intersection_size, s1, c1,
1146 container_type_1);
1147 }
1148 intersection_size++;
1149 pos1++;
1150 if (pos1 == length1) break;
1151 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
1152
1153 } else { // s1 > s2
1154 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
1155 if (pos2 == length2) break;
1156 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
1157 }
1158 }
1159
1160 if (pos1 < length1) {
1161 // all containers between intersection_size and
1162 // pos1 are junk. However, they have either been moved
1163 // (thus still referenced) or involved in an iandnot
1164 // that will clean up all containers that could not be reused.
1165 // Thus we should not free the junk containers between
1166 // intersection_size and pos1.
1167 if (pos1 > intersection_size) {
1168 // left slide of remaining items
1169 ra_copy_range(&x1->high_low_container, pos1, length1,
1170 intersection_size);
1171 }
1172 // else current placement is fine
1173 intersection_size += (length1 - pos1);
1174 }
1175 ra_downsize(&x1->high_low_container, intersection_size);
1176}
1177
1178uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) {
1179 uint64_t card = 0;
1180 for (int i = 0; i < ra->high_low_container.size; ++i)
1181 card += container_get_cardinality(ra->high_low_container.containers[i],
1182 ra->high_low_container.typecodes[i]);
1183 return card;
1184}
1185
1186uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra,
1187 uint64_t range_start,
1188 uint64_t range_end) {
1189 if (range_end > UINT32_MAX) {
1190 range_end = UINT32_MAX + UINT64_C(1);
1191 }
1192 if (range_start >= range_end) {
1193 return 0;
1194 }
1195 range_end--; // make range_end inclusive
1196 // now we have: 0 <= range_start <= range_end <= UINT32_MAX
1197
1198 uint16_t minhb = range_start >> 16;
1199 uint16_t maxhb = range_end >> 16;
1200
1201 uint64_t card = 0;
1202
1203 int i = ra_get_index(&ra->high_low_container, minhb);
1204 if (i >= 0) {
1205 if (minhb == maxhb) {
1206 card += container_rank(ra->high_low_container.containers[i],
1207 ra->high_low_container.typecodes[i],
1208 range_end & 0xffff);
1209 } else {
1210 card += container_get_cardinality(ra->high_low_container.containers[i],
1211 ra->high_low_container.typecodes[i]);
1212 }
1213 if ((range_start & 0xffff) != 0) {
1214 card -= container_rank(ra->high_low_container.containers[i],
1215 ra->high_low_container.typecodes[i],
1216 (range_start & 0xffff) - 1);
1217 }
1218 i++;
1219 } else {
1220 i = -i - 1;
1221 }
1222
1223 for (; i < ra->high_low_container.size; i++) {
1224 uint16_t key = ra->high_low_container.keys[i];
1225 if (key < maxhb) {
1226 card += container_get_cardinality(ra->high_low_container.containers[i],
1227 ra->high_low_container.typecodes[i]);
1228 } else if (key == maxhb) {
1229 card += container_rank(ra->high_low_container.containers[i],
1230 ra->high_low_container.typecodes[i],
1231 range_end & 0xffff);
1232 break;
1233 } else {
1234 break;
1235 }
1236 }
1237
1238 return card;
1239}
1240
1241
1242bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) {
1243 return ra->high_low_container.size == 0;
1244}
1245
1246void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) {
1247 ra_to_uint32_array(&ra->high_low_container, ans);
1248}
1249
1250bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) {
1251 return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans);
1252}
1253
1254/** convert array and bitmap containers to run containers when it is more
1255 * efficient;
1256 * also convert from run containers when more space efficient. Returns
1257 * true if the result has at least one run container.
1258*/
1259bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
1260 bool answer = false;
1261 for (int i = 0; i < r->high_low_container.size; i++) {
1262 uint8_t typecode_original, typecode_after;
1263 ra_unshare_container_at_index(
1264 &r->high_low_container, i); // TODO: this introduces extra cloning!
1265 void *c = ra_get_container_at_index(&r->high_low_container, i,
1266 &typecode_original);
1267 void *c1 = convert_run_optimize(c, typecode_original, &typecode_after);
1268 if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true;
1269 ra_set_container_at_index(&r->high_low_container, i, c1,
1270 typecode_after);
1271 }
1272 return answer;
1273}
1274
1275size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
1276 size_t answer = 0;
1277 for (int i = 0; i < r->high_low_container.size; i++) {
1278 uint8_t typecode_original;
1279 void *c = ra_get_container_at_index(&r->high_low_container, i,
1280 &typecode_original);
1281 answer += container_shrink_to_fit(c, typecode_original);
1282 }
1283 answer += ra_shrink_to_fit(&r->high_low_container);
1284 return answer;
1285}
1286
1287/**
1288 * Remove run-length encoding even when it is more space efficient
1289 * return whether a change was applied
1290 */
1291bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
1292 bool answer = false;
1293 for (int i = 0; i < r->high_low_container.size; i++) {
1294 uint8_t typecode_original, typecode_after;
1295 void *c = ra_get_container_at_index(&r->high_low_container, i,
1296 &typecode_original);
1297 if (get_container_type(c, typecode_original) ==
1298 RUN_CONTAINER_TYPE_CODE) {
1299 answer = true;
1300 if (typecode_original == SHARED_CONTAINER_TYPE_CODE) {
1301 run_container_t *truec =
1302 (run_container_t *)((shared_container_t *)c)->container;
1303 int32_t card = run_container_cardinality(truec);
1304 void *c1 = convert_to_bitset_or_array_container(
1305 truec, card, &typecode_after);
1306 shared_container_free((shared_container_t *)c);
1307 ra_set_container_at_index(&r->high_low_container, i, c1,
1308 typecode_after);
1309
1310 } else {
1311 int32_t card = run_container_cardinality((run_container_t *)c);
1312 void *c1 = convert_to_bitset_or_array_container(
1313 (run_container_t *)c, card, &typecode_after);
1314 ra_set_container_at_index(&r->high_low_container, i, c1,
1315 typecode_after);
1316 }
1317 }
1318 }
1319 return answer;
1320}
1321
1322size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) {
1323 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
1324 uint64_t cardinality = roaring_bitmap_get_cardinality(ra);
1325 uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
1326 if (portablesize < sizeasarray) {
1327 buf[0] = SERIALIZATION_CONTAINER;
1328 return roaring_bitmap_portable_serialize(ra, buf + 1) + 1;
1329 } else {
1330 buf[0] = SERIALIZATION_ARRAY_UINT32;
1331 memcpy(buf + 1, &cardinality, sizeof(uint32_t));
1332 roaring_bitmap_to_uint32_array(
1333 ra, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
1334 return 1 + (size_t)sizeasarray;
1335 }
1336}
1337
1338size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) {
1339 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
1340 uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) +
1341 sizeof(uint32_t);
1342 return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
1343}
1344
1345size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) {
1346 return ra_portable_size_in_bytes(&ra->high_low_container);
1347}
1348
1349
1350roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
1351 roaring_bitmap_t *ans =
1352 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
1353 if (ans == NULL) {
1354 return NULL;
1355 }
1356 size_t bytesread;
1357 bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
1358 if(is_ok) assert(bytesread <= maxbytes);
1359 roaring_bitmap_set_copy_on_write(ans, false);
1360 if (!is_ok) {
1361 free(ans);
1362 return NULL;
1363 }
1364 return ans;
1365}
1366
1367roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
1368 return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
1369}
1370
1371
1372size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
1373 return ra_portable_deserialize_size(buf, maxbytes);
1374}
1375
1376
1377size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra,
1378 char *buf) {
1379 return ra_portable_serialize(&ra->high_low_container, buf);
1380}
1381
1382roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
1383 const char *bufaschar = (const char *)buf;
1384 if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) {
1385 /* This looks like a compressed set of uint32_t elements */
1386 uint32_t card;
1387 memcpy(&card, bufaschar + 1, sizeof(uint32_t));
1388 const uint32_t *elems =
1389 (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
1390
1391 return roaring_bitmap_of_ptr(card, elems);
1392 } else if (bufaschar[0] == SERIALIZATION_CONTAINER) {
1393 return roaring_bitmap_portable_deserialize(bufaschar + 1);
1394 } else
1395 return (NULL);
1396}
1397
1398bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator,
1399 void *ptr) {
1400 for (int i = 0; i < ra->high_low_container.size; ++i)
1401 if (!container_iterate(ra->high_low_container.containers[i],
1402 ra->high_low_container.typecodes[i],
1403 ((uint32_t)ra->high_low_container.keys[i]) << 16,
1404 iterator, ptr)) {
1405 return false;
1406 }
1407 return true;
1408}
1409
1410bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator,
1411 uint64_t high_bits, void *ptr) {
1412 for (int i = 0; i < ra->high_low_container.size; ++i)
1413 if (!container_iterate64(
1414 ra->high_low_container.containers[i],
1415 ra->high_low_container.typecodes[i],
1416 ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator,
1417 high_bits, ptr)) {
1418 return false;
1419 }
1420 return true;
1421}
1422
1423/****
1424* begin roaring_uint32_iterator_t
1425*****/
1426
1427// Partially initializes the roaring iterator when it begins looking at
1428// a new container.
1429static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
1430 newit->in_container_index = 0;
1431 newit->run_index = 0;
1432 newit->current_value = 0;
1433 if (newit->container_index >= newit->parent->high_low_container.size ||
1434 newit->container_index < 0) {
1435 newit->current_value = UINT32_MAX;
1436 return (newit->has_value = false);
1437 }
1438 // assume not empty
1439 newit->has_value = true;
1440 // we precompute container, typecode and highbits so that successive
1441 // iterators do not have to grab them from odd memory locations
1442 // and have to worry about the (easily predicted) container_unwrap_shared
1443 // call.
1444 newit->container =
1445 newit->parent->high_low_container.containers[newit->container_index];
1446 newit->typecode =
1447 newit->parent->high_low_container.typecodes[newit->container_index];
1448 newit->highbits =
1449 ((uint32_t)
1450 newit->parent->high_low_container.keys[newit->container_index])
1451 << 16;
1452 newit->container =
1453 container_unwrap_shared(newit->container, &(newit->typecode));
1454 return newit->has_value;
1455}
1456
1457static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
1458 if (!iter_new_container_partial_init(newit))
1459 return newit->has_value;
1460
1461 uint32_t wordindex;
1462 uint64_t word; // used for bitsets
1463 switch (newit->typecode) {
1464 case BITSET_CONTAINER_TYPE_CODE:
1465 wordindex = 0;
1466 while ((word = ((const bitset_container_t *)(newit->container))
1467 ->array[wordindex]) == 0)
1468 wordindex++; // advance
1469 // here "word" is non-zero
1470 newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
1471 newit->current_value = newit->highbits | newit->in_container_index;
1472 break;
1473 case ARRAY_CONTAINER_TYPE_CODE:
1474 newit->current_value =
1475 newit->highbits |
1476 ((const array_container_t *)(newit->container))->array[0];
1477 break;
1478 case RUN_CONTAINER_TYPE_CODE:
1479 newit->current_value =
1480 newit->highbits |
1481 (((const run_container_t *)(newit->container))->runs[0].value);
1482 break;
1483 default:
1484 // if this ever happens, bug!
1485 assert(false);
1486 } // switch (typecode)
1487 return true;
1488}
1489
1490static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
1491 if (!iter_new_container_partial_init(newit))
1492 return newit->has_value;
1493
1494 switch(newit->typecode) {
1495 case BITSET_CONTAINER_TYPE_CODE: {
1496 uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
1497 uint64_t word;
1498 const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
1499 while ((word = bitset_container->array[wordindex]) == 0)
1500 --wordindex;
1501
1502 int num_leading_zeros = __builtin_clzll(word);
1503 newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
1504 newit->current_value = newit->highbits | newit->in_container_index;
1505 break;
1506 }
1507 case ARRAY_CONTAINER_TYPE_CODE: {
1508 const array_container_t* array_container = (const array_container_t*)newit->container;
1509 newit->in_container_index = array_container->cardinality - 1;
1510 newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
1511 break;
1512 }
1513 case RUN_CONTAINER_TYPE_CODE: {
1514 const run_container_t* run_container = (const run_container_t*)newit->container;
1515 newit->run_index = run_container->n_runs - 1;
1516 const rle16_t* last_run = &run_container->runs[newit->run_index];
1517 newit->current_value = newit->highbits | (last_run->value + last_run->length);
1518 break;
1519 }
1520 default:
1521 // if this ever happens, bug!
1522 assert(false);
1523 }
1524 return true;
1525}
1526
1527// prerequesite: the value should be in range of the container
1528static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
1529 // Don't have to check return value because of prerequisite
1530 iter_new_container_partial_init(newit);
1531 uint16_t lb = val & 0xFFFF;
1532
1533 switch (newit->typecode) {
1534 case BITSET_CONTAINER_TYPE_CODE:
1535 newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb);
1536 newit->current_value = newit->highbits | newit->in_container_index;
1537 break;
1538 case ARRAY_CONTAINER_TYPE_CODE:
1539 newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb);
1540 newit->current_value =
1541 newit->highbits |
1542 ((const array_container_t *)(newit->container))->array[newit->in_container_index];
1543 break;
1544 case RUN_CONTAINER_TYPE_CODE:
1545 newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb);
1546 if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) {
1547 newit->current_value = val;
1548 } else {
1549 newit->current_value =
1550 newit->highbits |
1551 (((const run_container_t *)(newit->container))->runs[newit->run_index].value);
1552 }
1553 break;
1554 default:
1555 // if this ever happens, bug!
1556 assert(false);
1557 } // switch (typecode)
1558 return true;
1559}
1560
1561void roaring_init_iterator(const roaring_bitmap_t *ra,
1562 roaring_uint32_iterator_t *newit) {
1563 newit->parent = ra;
1564 newit->container_index = 0;
1565 newit->has_value = loadfirstvalue(newit);
1566}
1567
1568void roaring_init_iterator_last(const roaring_bitmap_t *ra,
1569 roaring_uint32_iterator_t *newit) {
1570 newit->parent = ra;
1571 newit->container_index = newit->parent->high_low_container.size - 1;
1572 newit->has_value = loadlastvalue(newit);
1573}
1574
1575roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) {
1576 roaring_uint32_iterator_t *newit =
1577 (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t));
1578 if (newit == NULL) return NULL;
1579 roaring_init_iterator(ra, newit);
1580 return newit;
1581}
1582
1583roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
1584 const roaring_uint32_iterator_t *it) {
1585 roaring_uint32_iterator_t *newit =
1586 (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t));
1587 memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
1588 return newit;
1589}
1590
1591bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
1592 uint16_t hb = val >> 16;
1593 const int i = ra_get_index(& it->parent->high_low_container, hb);
1594 if (i >= 0) {
1595 uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
1596 uint16_t lb = val & 0xFFFF;
1597 if(lowvalue < lb ) {
1598 it->container_index = i+1; // will have to load first value of next container
1599 } else {// the value is necessarily within the range of the container
1600 it->container_index = i;
1601 it->has_value = loadfirstvalue_largeorequal(it, val);
1602 return it->has_value;
1603 }
1604 } else {
1605 // there is no matching, so we are going for the next container
1606 it->container_index = -i-1;
1607 }
1608 it->has_value = loadfirstvalue(it);
1609 return it->has_value;
1610}
1611
1612
1613bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
1614 if (it->container_index >= it->parent->high_low_container.size) {
1615 return (it->has_value = false);
1616 }
1617 if (it->container_index < 0) {
1618 it->container_index = 0;
1619 return (it->has_value = loadfirstvalue(it));
1620 }
1621
1622 uint32_t wordindex; // used for bitsets
1623 uint64_t word; // used for bitsets
1624 switch (it->typecode) {
1625 case BITSET_CONTAINER_TYPE_CODE:
1626 it->in_container_index++;
1627 wordindex = it->in_container_index / 64;
1628 if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
1629 word = ((const bitset_container_t *)(it->container))
1630 ->array[wordindex] &
1631 (UINT64_MAX << (it->in_container_index % 64));
1632 // next part could be optimized/simplified
1633 while ((word == 0) &&
1634 (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
1635 wordindex++;
1636 word = ((const bitset_container_t *)(it->container))
1637 ->array[wordindex];
1638 }
1639 if (word != 0) {
1640 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
1641 it->current_value = it->highbits | it->in_container_index;
1642 return (it->has_value = true);
1643 }
1644 break;
1645 case ARRAY_CONTAINER_TYPE_CODE:
1646 it->in_container_index++;
1647 if (it->in_container_index <
1648 ((const array_container_t *)(it->container))->cardinality) {
1649 it->current_value = it->highbits |
1650 ((const array_container_t *)(it->container))
1651 ->array[it->in_container_index];
1652 return (it->has_value = true);
1653 }
1654 break;
1655 case RUN_CONTAINER_TYPE_CODE: {
1656 if(it->current_value == UINT32_MAX) {
1657 return (it->has_value = false); // without this, we risk an overflow to zero
1658 }
1659
1660 const run_container_t* run_container = (const run_container_t*)it->container;
1661 if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value +
1662 run_container->runs[it->run_index].length))) {
1663 return (it->has_value = true);
1664 }
1665
1666 if (++it->run_index < run_container->n_runs) {
1667 // Assume the run has a value
1668 it->current_value = it->highbits | run_container->runs[it->run_index].value;
1669 return (it->has_value = true);
1670 }
1671 break;
1672 }
1673 default:
1674 // if this ever happens, bug!
1675 assert(false);
1676 } // switch (typecode)
1677 // moving to next container
1678 it->container_index++;
1679 return (it->has_value = loadfirstvalue(it));
1680}
1681
1682bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
1683 if (it->container_index < 0) {
1684 return (it->has_value = false);
1685 }
1686 if (it->container_index >= it->parent->high_low_container.size) {
1687 it->container_index = it->parent->high_low_container.size - 1;
1688 return (it->has_value = loadlastvalue(it));
1689 }
1690
1691 switch (it->typecode) {
1692 case BITSET_CONTAINER_TYPE_CODE: {
1693 if (--it->in_container_index < 0)
1694 break;
1695
1696 const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
1697 int32_t wordindex = it->in_container_index / 64;
1698 uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
1699
1700 while (word == 0 && --wordindex >= 0) {
1701 word = bitset_container->array[wordindex];
1702 }
1703 if (word == 0)
1704 break;
1705
1706 int num_leading_zeros = __builtin_clzll(word);
1707 it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
1708 it->current_value = it->highbits | it->in_container_index;
1709 return (it->has_value = true);
1710 }
1711 case ARRAY_CONTAINER_TYPE_CODE: {
1712 if (--it->in_container_index < 0)
1713 break;
1714
1715 const array_container_t* array_container = (const array_container_t*)it->container;
1716 it->current_value = it->highbits | array_container->array[it->in_container_index];
1717 return (it->has_value = true);
1718 }
1719 case RUN_CONTAINER_TYPE_CODE: {
1720 if(it->current_value == 0)
1721 return (it->has_value = false);
1722
1723 const run_container_t* run_container = (const run_container_t*)it->container;
1724 if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
1725 return (it->has_value = true);
1726 }
1727
1728 if (--it->run_index < 0)
1729 break;
1730
1731 it->current_value = it->highbits | (run_container->runs[it->run_index].value +
1732 run_container->runs[it->run_index].length);
1733 return (it->has_value = true);
1734 }
1735 default:
1736 // if this ever happens, bug!
1737 assert(false);
1738 } // switch (typecode)
1739
1740 // moving to previous container
1741 it->container_index--;
1742 return (it->has_value = loadlastvalue(it));
1743}
1744
1745uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
1746 uint32_t ret = 0;
1747 uint32_t num_values;
1748 uint32_t wordindex; // used for bitsets
1749 uint64_t word; // used for bitsets
1750 const array_container_t* acont; //TODO remove
1751 const run_container_t* rcont; //TODO remove
1752 const bitset_container_t* bcont; //TODO remove
1753
1754 while (it->has_value && ret < count) {
1755 switch (it->typecode) {
1756 case BITSET_CONTAINER_TYPE_CODE:
1757 bcont = (const bitset_container_t*)(it->container);
1758 wordindex = it->in_container_index / 64;
1759 word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
1760 do {
1761 while (word != 0 && ret < count) {
1762 buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
1763 word = word & (word - 1);
1764 buf++;
1765 ret++;
1766 }
1767 while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
1768 wordindex++;
1769 word = bcont->array[wordindex];
1770 }
1771 } while (word != 0 && ret < count);
1772 it->has_value = (word != 0);
1773 if (it->has_value) {
1774 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
1775 it->current_value = it->highbits | it->in_container_index;
1776 }
1777 break;
1778 case ARRAY_CONTAINER_TYPE_CODE:
1779 acont = (const array_container_t *)(it->container);
1780 num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
1781 for (uint32_t i = 0; i < num_values; i++) {
1782 buf[i] = it->highbits | acont->array[it->in_container_index + i];
1783 }
1784 buf += num_values;
1785 ret += num_values;
1786 it->in_container_index += num_values;
1787 it->has_value = (it->in_container_index < acont->cardinality);
1788 if (it->has_value) {
1789 it->current_value = it->highbits | acont->array[it->in_container_index];
1790 }
1791 break;
1792 case RUN_CONTAINER_TYPE_CODE:
1793 rcont = (const run_container_t*)(it->container);
1794 //"in_run_index" name is misleading, read it as "max_value_in_current_run"
1795 do {
1796 uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
1797 num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
1798 for (uint32_t i = 0; i < num_values; i++) {
1799 buf[i] = it->current_value + i;
1800 }
1801 it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
1802 buf += num_values;
1803 ret += num_values;
1804
1805 if (it->current_value > largest_run_value || it->current_value == 0) {
1806 it->run_index++;
1807 if (it->run_index < rcont->n_runs) {
1808 it->current_value = it->highbits | rcont->runs[it->run_index].value;
1809 } else {
1810 it->has_value = false;
1811 }
1812 }
1813 } while ((ret < count) && it->has_value);
1814 break;
1815 default:
1816 assert(false);
1817 }
1818 if (it->has_value) {
1819 assert(ret == count);
1820 return ret;
1821 }
1822 it->container_index++;
1823 it->has_value = loadfirstvalue(it);
1824 }
1825 return ret;
1826}
1827
1828
1829
1830void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); }
1831
1832/****
1833* end of roaring_uint32_iterator_t
1834*****/
1835
1836bool roaring_bitmap_equals(const roaring_bitmap_t *ra1,
1837 const roaring_bitmap_t *ra2) {
1838 if (ra1->high_low_container.size != ra2->high_low_container.size) {
1839 return false;
1840 }
1841 for (int i = 0; i < ra1->high_low_container.size; ++i) {
1842 if (ra1->high_low_container.keys[i] !=
1843 ra2->high_low_container.keys[i]) {
1844 return false;
1845 }
1846 }
1847 for (int i = 0; i < ra1->high_low_container.size; ++i) {
1848 bool areequal = container_equals(ra1->high_low_container.containers[i],
1849 ra1->high_low_container.typecodes[i],
1850 ra2->high_low_container.containers[i],
1851 ra2->high_low_container.typecodes[i]);
1852 if (!areequal) {
1853 return false;
1854 }
1855 }
1856 return true;
1857}
1858
1859bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1,
1860 const roaring_bitmap_t *ra2) {
1861 const int length1 = ra1->high_low_container.size,
1862 length2 = ra2->high_low_container.size;
1863
1864 int pos1 = 0, pos2 = 0;
1865
1866 while (pos1 < length1 && pos2 < length2) {
1867 const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1);
1868 const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2);
1869
1870 if (s1 == s2) {
1871 uint8_t container_type_1, container_type_2;
1872 void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1,
1873 &container_type_1);
1874 void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2,
1875 &container_type_2);
1876 bool subset =
1877 container_is_subset(c1, container_type_1, c2, container_type_2);
1878 if (!subset) return false;
1879 ++pos1;
1880 ++pos2;
1881 } else if (s1 < s2) { // s1 < s2
1882 return false;
1883 } else { // s1 > s2
1884 pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2);
1885 }
1886 }
1887 if (pos1 == length1)
1888 return true;
1889 else
1890 return false;
1891}
1892
1893static void insert_flipped_container(roaring_array_t *ans_arr,
1894 const roaring_array_t *x1_arr, uint16_t hb,
1895 uint16_t lb_start, uint16_t lb_end) {
1896 const int i = ra_get_index(x1_arr, hb);
1897 const int j = ra_get_index(ans_arr, hb);
1898 uint8_t ctype_in, ctype_out;
1899 void *flipped_container = NULL;
1900 if (i >= 0) {
1901 void *container_to_flip =
1902 ra_get_container_at_index(x1_arr, i, &ctype_in);
1903 flipped_container =
1904 container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
1905 (uint32_t)(lb_end + 1), &ctype_out);
1906
1907 if (container_get_cardinality(flipped_container, ctype_out))
1908 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
1909 ctype_out);
1910 else {
1911 container_free(flipped_container, ctype_out);
1912 }
1913 } else {
1914 flipped_container = container_range_of_ones(
1915 (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
1916 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
1917 ctype_out);
1918 }
1919}
1920
1921static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
1922 uint16_t lb_start, uint16_t lb_end) {
1923 const int i = ra_get_index(x1_arr, hb);
1924 uint8_t ctype_in, ctype_out;
1925 void *flipped_container = NULL;
1926 if (i >= 0) {
1927 void *container_to_flip =
1928 ra_get_container_at_index(x1_arr, i, &ctype_in);
1929 flipped_container = container_inot_range(
1930 container_to_flip, ctype_in, (uint32_t)lb_start,
1931 (uint32_t)(lb_end + 1), &ctype_out);
1932 // if a new container was created, the old one was already freed
1933 if (container_get_cardinality(flipped_container, ctype_out)) {
1934 ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
1935 } else {
1936 container_free(flipped_container, ctype_out);
1937 ra_remove_at_index(x1_arr, i);
1938 }
1939
1940 } else {
1941 flipped_container = container_range_of_ones(
1942 (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
1943 ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
1944 ctype_out);
1945 }
1946}
1947
1948static void insert_fully_flipped_container(roaring_array_t *ans_arr,
1949 const roaring_array_t *x1_arr,
1950 uint16_t hb) {
1951 const int i = ra_get_index(x1_arr, hb);
1952 const int j = ra_get_index(ans_arr, hb);
1953 uint8_t ctype_in, ctype_out;
1954 void *flipped_container = NULL;
1955 if (i >= 0) {
1956 void *container_to_flip =
1957 ra_get_container_at_index(x1_arr, i, &ctype_in);
1958 flipped_container =
1959 container_not(container_to_flip, ctype_in, &ctype_out);
1960 if (container_get_cardinality(flipped_container, ctype_out))
1961 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
1962 ctype_out);
1963 else {
1964 container_free(flipped_container, ctype_out);
1965 }
1966 } else {
1967 flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
1968 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
1969 ctype_out);
1970 }
1971}
1972
1973static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
1974 const int i = ra_get_index(x1_arr, hb);
1975 uint8_t ctype_in, ctype_out;
1976 void *flipped_container = NULL;
1977 if (i >= 0) {
1978 void *container_to_flip =
1979 ra_get_container_at_index(x1_arr, i, &ctype_in);
1980 flipped_container =
1981 container_inot(container_to_flip, ctype_in, &ctype_out);
1982
1983 if (container_get_cardinality(flipped_container, ctype_out)) {
1984 ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
1985 } else {
1986 container_free(flipped_container, ctype_out);
1987 ra_remove_at_index(x1_arr, i);
1988 }
1989
1990 } else {
1991 flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
1992 ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
1993 ctype_out);
1994 }
1995}
1996
1997roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
1998 uint64_t range_start,
1999 uint64_t range_end) {
2000 if (range_start >= range_end) {
2001 return roaring_bitmap_copy(x1);
2002 }
2003 if(range_end >= UINT64_C(0x100000000)) {
2004 range_end = UINT64_C(0x100000000);
2005 }
2006
2007 roaring_bitmap_t *ans = roaring_bitmap_create();
2008 roaring_bitmap_set_copy_on_write(ans, is_cow(x1));
2009
2010 uint16_t hb_start = (uint16_t)(range_start >> 16);
2011 const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
2012 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
2013 const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
2014
2015 ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
2016 hb_start, is_cow(x1));
2017 if (hb_start == hb_end) {
2018 insert_flipped_container(&ans->high_low_container,
2019 &x1->high_low_container, hb_start, lb_start,
2020 lb_end);
2021 } else {
2022 // start and end containers are distinct
2023 if (lb_start > 0) {
2024 // handle first (partial) container
2025 insert_flipped_container(&ans->high_low_container,
2026 &x1->high_low_container, hb_start,
2027 lb_start, 0xFFFF);
2028 ++hb_start; // for the full containers. Can't wrap.
2029 }
2030
2031 if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
2032
2033 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
2034 insert_fully_flipped_container(&ans->high_low_container,
2035 &x1->high_low_container, hb);
2036 }
2037
2038 // handle a partial final container
2039 if (lb_end != 0xFFFF) {
2040 insert_flipped_container(&ans->high_low_container,
2041 &x1->high_low_container, hb_end + 1, 0,
2042 lb_end);
2043 ++hb_end;
2044 }
2045 }
2046 ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
2047 hb_end, is_cow(x1));
2048 return ans;
2049}
2050
2051void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
2052 uint64_t range_end) {
2053 if (range_start >= range_end) {
2054 return; // empty range
2055 }
2056 if(range_end >= UINT64_C(0x100000000)) {
2057 range_end = UINT64_C(0x100000000);
2058 }
2059
2060 uint16_t hb_start = (uint16_t)(range_start >> 16);
2061 const uint16_t lb_start = (uint16_t)range_start;
2062 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
2063 const uint16_t lb_end = (uint16_t)(range_end - 1);
2064
2065 if (hb_start == hb_end) {
2066 inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
2067 lb_end);
2068 } else {
2069 // start and end containers are distinct
2070 if (lb_start > 0) {
2071 // handle first (partial) container
2072 inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
2073 0xFFFF);
2074 ++hb_start; // for the full containers. Can't wrap.
2075 }
2076
2077 if (lb_end != 0xFFFF) --hb_end;
2078
2079 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
2080 inplace_fully_flip_container(&x1->high_low_container, hb);
2081 }
2082 // handle a partial final container
2083 if (lb_end != 0xFFFF) {
2084 inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
2085 lb_end);
2086 ++hb_end;
2087 }
2088 }
2089}
2090
2091roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
2092 const roaring_bitmap_t *x2,
2093 const bool bitsetconversion) {
2094 uint8_t container_result_type = 0;
2095 const int length1 = x1->high_low_container.size,
2096 length2 = x2->high_low_container.size;
2097 if (0 == length1) {
2098 return roaring_bitmap_copy(x2);
2099 }
2100 if (0 == length2) {
2101 return roaring_bitmap_copy(x1);
2102 }
2103 roaring_bitmap_t *answer =
2104 roaring_bitmap_create_with_capacity(length1 + length2);
2105 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
2106 int pos1 = 0, pos2 = 0;
2107 uint8_t container_type_1, container_type_2;
2108 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2109 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2110 while (true) {
2111 if (s1 == s2) {
2112 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2113 &container_type_1);
2114 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2115 &container_type_2);
2116 void *c;
2117 if (bitsetconversion && (get_container_type(c1, container_type_1) !=
2118 BITSET_CONTAINER_TYPE_CODE) &&
2119 (get_container_type(c2, container_type_2) !=
2120 BITSET_CONTAINER_TYPE_CODE)) {
2121 void *newc1 =
2122 container_mutable_unwrap_shared(c1, &container_type_1);
2123 newc1 = container_to_bitset(newc1, container_type_1);
2124 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
2125 c = container_lazy_ior(newc1, container_type_1, c2,
2126 container_type_2,
2127 &container_result_type);
2128 if (c != newc1) { // should not happen
2129 container_free(newc1, container_type_1);
2130 }
2131 } else {
2132 c = container_lazy_or(c1, container_type_1, c2,
2133 container_type_2, &container_result_type);
2134 }
2135 // since we assume that the initial containers are non-empty,
2136 // the
2137 // result here
2138 // can only be non-empty
2139 ra_append(&answer->high_low_container, s1, c,
2140 container_result_type);
2141 ++pos1;
2142 ++pos2;
2143 if (pos1 == length1) break;
2144 if (pos2 == length2) break;
2145 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2146 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2147
2148 } else if (s1 < s2) { // s1 < s2
2149 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2150 &container_type_1);
2151 c1 =
2152 get_copy_of_container(c1, &container_type_1, is_cow(x1));
2153 if (is_cow(x1)) {
2154 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
2155 container_type_1);
2156 }
2157 ra_append(&answer->high_low_container, s1, c1, container_type_1);
2158 pos1++;
2159 if (pos1 == length1) break;
2160 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2161
2162 } else { // s1 > s2
2163 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2164 &container_type_2);
2165 c2 =
2166 get_copy_of_container(c2, &container_type_2, is_cow(x2));
2167 if (is_cow(x2)) {
2168 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
2169 container_type_2);
2170 }
2171 ra_append(&answer->high_low_container, s2, c2, container_type_2);
2172 pos2++;
2173 if (pos2 == length2) break;
2174 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2175 }
2176 }
2177 if (pos1 == length1) {
2178 ra_append_copy_range(&answer->high_low_container,
2179 &x2->high_low_container, pos2, length2,
2180 is_cow(x2));
2181 } else if (pos2 == length2) {
2182 ra_append_copy_range(&answer->high_low_container,
2183 &x1->high_low_container, pos1, length1,
2184 is_cow(x1));
2185 }
2186 return answer;
2187}
2188
2189void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
2190 const roaring_bitmap_t *x2,
2191 const bool bitsetconversion) {
2192 uint8_t container_result_type = 0;
2193 int length1 = x1->high_low_container.size;
2194 const int length2 = x2->high_low_container.size;
2195
2196 if (0 == length2) return;
2197
2198 if (0 == length1) {
2199 roaring_bitmap_overwrite(x1, x2);
2200 return;
2201 }
2202 int pos1 = 0, pos2 = 0;
2203 uint8_t container_type_1, container_type_2;
2204 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2205 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2206 while (true) {
2207 if (s1 == s2) {
2208 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2209 &container_type_1);
2210 if (!container_is_full(c1, container_type_1)) {
2211 if ((bitsetconversion == false) ||
2212 (get_container_type(c1, container_type_1) ==
2213 BITSET_CONTAINER_TYPE_CODE)) {
2214 c1 = get_writable_copy_if_shared(c1, &container_type_1);
2215 } else {
2216 // convert to bitset
2217 void *oldc1 = c1;
2218 uint8_t oldt1 = container_type_1;
2219 c1 = container_mutable_unwrap_shared(c1, &container_type_1);
2220 c1 = container_to_bitset(c1, container_type_1);
2221 container_free(oldc1, oldt1);
2222 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
2223 }
2224
2225 void *c2 = ra_get_container_at_index(&x2->high_low_container,
2226 pos2, &container_type_2);
2227 void *c = container_lazy_ior(c1, container_type_1, c2,
2228 container_type_2,
2229 &container_result_type);
2230 if (c !=
2231 c1) { // in this instance a new container was created, and
2232 // we need to free the old one
2233 container_free(c1, container_type_1);
2234 }
2235
2236 ra_set_container_at_index(&x1->high_low_container, pos1, c,
2237 container_result_type);
2238 }
2239 ++pos1;
2240 ++pos2;
2241 if (pos1 == length1) break;
2242 if (pos2 == length2) break;
2243 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2244 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2245
2246 } else if (s1 < s2) { // s1 < s2
2247 pos1++;
2248 if (pos1 == length1) break;
2249 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2250
2251 } else { // s1 > s2
2252 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2253 &container_type_2);
2254 // void *c2_clone = container_clone(c2, container_type_2);
2255 c2 =
2256 get_copy_of_container(c2, &container_type_2, is_cow(x2));
2257 if (is_cow(x2)) {
2258 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
2259 container_type_2);
2260 }
2261 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
2262 container_type_2);
2263 pos1++;
2264 length1++;
2265 pos2++;
2266 if (pos2 == length2) break;
2267 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2268 }
2269 }
2270 if (pos1 == length1) {
2271 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
2272 pos2, length2, is_cow(x2));
2273 }
2274}
2275
2276roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
2277 const roaring_bitmap_t *x2) {
2278 uint8_t container_result_type = 0;
2279 const int length1 = x1->high_low_container.size,
2280 length2 = x2->high_low_container.size;
2281 if (0 == length1) {
2282 return roaring_bitmap_copy(x2);
2283 }
2284 if (0 == length2) {
2285 return roaring_bitmap_copy(x1);
2286 }
2287 roaring_bitmap_t *answer =
2288 roaring_bitmap_create_with_capacity(length1 + length2);
2289 roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2));
2290 int pos1 = 0, pos2 = 0;
2291 uint8_t container_type_1, container_type_2;
2292 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2293 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2294 while (true) {
2295 if (s1 == s2) {
2296 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2297 &container_type_1);
2298 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2299 &container_type_2);
2300 void *c =
2301 container_lazy_xor(c1, container_type_1, c2, container_type_2,
2302 &container_result_type);
2303
2304 if (container_nonzero_cardinality(c, container_result_type)) {
2305 ra_append(&answer->high_low_container, s1, c,
2306 container_result_type);
2307 } else {
2308 container_free(c, container_result_type);
2309 }
2310
2311 ++pos1;
2312 ++pos2;
2313 if (pos1 == length1) break;
2314 if (pos2 == length2) break;
2315 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2316 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2317
2318 } else if (s1 < s2) { // s1 < s2
2319 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2320 &container_type_1);
2321 c1 =
2322 get_copy_of_container(c1, &container_type_1, is_cow(x1));
2323 if (is_cow(x1)) {
2324 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
2325 container_type_1);
2326 }
2327 ra_append(&answer->high_low_container, s1, c1, container_type_1);
2328 pos1++;
2329 if (pos1 == length1) break;
2330 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2331
2332 } else { // s1 > s2
2333 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2334 &container_type_2);
2335 c2 =
2336 get_copy_of_container(c2, &container_type_2, is_cow(x2));
2337 if (is_cow(x2)) {
2338 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
2339 container_type_2);
2340 }
2341 ra_append(&answer->high_low_container, s2, c2, container_type_2);
2342 pos2++;
2343 if (pos2 == length2) break;
2344 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2345 }
2346 }
2347 if (pos1 == length1) {
2348 ra_append_copy_range(&answer->high_low_container,
2349 &x2->high_low_container, pos2, length2,
2350 is_cow(x2));
2351 } else if (pos2 == length2) {
2352 ra_append_copy_range(&answer->high_low_container,
2353 &x1->high_low_container, pos1, length1,
2354 is_cow(x1));
2355 }
2356 return answer;
2357}
2358
2359void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
2360 const roaring_bitmap_t *x2) {
2361 assert(x1 != x2);
2362 uint8_t container_result_type = 0;
2363 int length1 = x1->high_low_container.size;
2364 const int length2 = x2->high_low_container.size;
2365
2366 if (0 == length2) return;
2367
2368 if (0 == length1) {
2369 roaring_bitmap_overwrite(x1, x2);
2370 return;
2371 }
2372 int pos1 = 0, pos2 = 0;
2373 uint8_t container_type_1, container_type_2;
2374 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2375 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2376 while (true) {
2377 if (s1 == s2) {
2378 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2379 &container_type_1);
2380 c1 = get_writable_copy_if_shared(c1, &container_type_1);
2381 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2382 &container_type_2);
2383 void *c =
2384 container_lazy_ixor(c1, container_type_1, c2, container_type_2,
2385 &container_result_type);
2386 if (container_nonzero_cardinality(c, container_result_type)) {
2387 ra_set_container_at_index(&x1->high_low_container, pos1, c,
2388 container_result_type);
2389 ++pos1;
2390 } else {
2391 container_free(c, container_result_type);
2392 ra_remove_at_index(&x1->high_low_container, pos1);
2393 --length1;
2394 }
2395 ++pos2;
2396 if (pos1 == length1) break;
2397 if (pos2 == length2) break;
2398 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2399 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2400
2401 } else if (s1 < s2) { // s1 < s2
2402 pos1++;
2403 if (pos1 == length1) break;
2404 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2405
2406 } else { // s1 > s2
2407 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2408 &container_type_2);
2409 // void *c2_clone = container_clone(c2, container_type_2);
2410 c2 =
2411 get_copy_of_container(c2, &container_type_2, is_cow(x2));
2412 if (is_cow(x2)) {
2413 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
2414 container_type_2);
2415 }
2416 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
2417 container_type_2);
2418 pos1++;
2419 length1++;
2420 pos2++;
2421 if (pos2 == length2) break;
2422 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2423 }
2424 }
2425 if (pos1 == length1) {
2426 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
2427 pos2, length2, is_cow(x2));
2428 }
2429}
2430
2431void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) {
2432 for (int i = 0; i < ra->high_low_container.size; ++i) {
2433 const uint8_t original_typecode = ra->high_low_container.typecodes[i];
2434 void *container = ra->high_low_container.containers[i];
2435 uint8_t new_typecode = original_typecode;
2436 void *newcontainer =
2437 container_repair_after_lazy(container, &new_typecode);
2438 ra->high_low_container.containers[i] = newcontainer;
2439 ra->high_low_container.typecodes[i] = new_typecode;
2440 }
2441}
2442
2443
2444
2445/**
2446* roaring_bitmap_rank returns the number of integers that are smaller or equal
2447* to x.
2448*/
2449uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
2450 uint64_t size = 0;
2451 uint32_t xhigh = x >> 16;
2452 for (int i = 0; i < bm->high_low_container.size; i++) {
2453 uint32_t key = bm->high_low_container.keys[i];
2454 if (xhigh > key) {
2455 size +=
2456 container_get_cardinality(bm->high_low_container.containers[i],
2457 bm->high_low_container.typecodes[i]);
2458 } else if (xhigh == key) {
2459 return size + container_rank(bm->high_low_container.containers[i],
2460 bm->high_low_container.typecodes[i],
2461 x & 0xFFFF);
2462 } else {
2463 return size;
2464 }
2465 }
2466 return size;
2467}
2468
2469/**
2470* roaring_bitmap_smallest returns the smallest value in the set.
2471* Returns UINT32_MAX if the set is empty.
2472*/
2473uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
2474 if (bm->high_low_container.size > 0) {
2475 void *container = bm->high_low_container.containers[0];
2476 uint8_t typecode = bm->high_low_container.typecodes[0];
2477 uint32_t key = bm->high_low_container.keys[0];
2478 uint32_t lowvalue = container_minimum(container, typecode);
2479 return lowvalue | (key << 16);
2480 }
2481 return UINT32_MAX;
2482}
2483
2484/**
2485* roaring_bitmap_smallest returns the greatest value in the set.
2486* Returns 0 if the set is empty.
2487*/
2488uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
2489 if (bm->high_low_container.size > 0) {
2490 void *container =
2491 bm->high_low_container.containers[bm->high_low_container.size - 1];
2492 uint8_t typecode =
2493 bm->high_low_container.typecodes[bm->high_low_container.size - 1];
2494 uint32_t key =
2495 bm->high_low_container.keys[bm->high_low_container.size - 1];
2496 uint32_t lowvalue = container_maximum(container, typecode);
2497 return lowvalue | (key << 16);
2498 }
2499 return 0;
2500}
2501
2502bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
2503 uint32_t *element) {
2504 void *container;
2505 uint8_t typecode;
2506 uint16_t key;
2507 uint32_t start_rank = 0;
2508 int i = 0;
2509 bool valid = false;
2510 while (!valid && i < bm->high_low_container.size) {
2511 container = bm->high_low_container.containers[i];
2512 typecode = bm->high_low_container.typecodes[i];
2513 valid =
2514 container_select(container, typecode, &start_rank, rank, element);
2515 i++;
2516 }
2517
2518 if (valid) {
2519 key = bm->high_low_container.keys[i - 1];
2520 *element |= (key << 16);
2521 return true;
2522 } else
2523 return false;
2524}
2525
2526bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
2527 const roaring_bitmap_t *x2) {
2528 const int length1 = x1->high_low_container.size,
2529 length2 = x2->high_low_container.size;
2530 uint64_t answer = 0;
2531 int pos1 = 0, pos2 = 0;
2532
2533 while (pos1 < length1 && pos2 < length2) {
2534 const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
2535 const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
2536
2537 if (s1 == s2) {
2538 uint8_t container_type_1, container_type_2;
2539 void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1,
2540 &container_type_1);
2541 void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2,
2542 &container_type_2);
2543 if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true;
2544 ++pos1;
2545 ++pos2;
2546 } else if (s1 < s2) { // s1 < s2
2547 pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
2548 } else { // s1 > s2
2549 pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
2550 }
2551 }
2552 return answer;
2553}
2554
2555
2556uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
2557 const roaring_bitmap_t *x2) {
2558 const int length1 = x1->high_low_container.size,
2559 length2 = x2->high_low_container.size;
2560 uint64_t answer = 0;
2561 int pos1 = 0, pos2 = 0;
2562
2563 while (pos1 < length1 && pos2 < length2) {
2564 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
2565 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
2566
2567 if (s1 == s2) {
2568 uint8_t container_type_1, container_type_2;
2569 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
2570 &container_type_1);
2571 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
2572 &container_type_2);
2573 answer += container_and_cardinality(c1, container_type_1, c2,
2574 container_type_2);
2575 ++pos1;
2576 ++pos2;
2577 } else if (s1 < s2) { // s1 < s2
2578 pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
2579 } else { // s1 > s2
2580 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
2581 }
2582 }
2583 return answer;
2584}
2585
2586double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
2587 const roaring_bitmap_t *x2) {
2588 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
2589 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
2590 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
2591 return (double)inter / (double)(c1 + c2 - inter);
2592}
2593
2594uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
2595 const roaring_bitmap_t *x2) {
2596 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
2597 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
2598 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
2599 return c1 + c2 - inter;
2600}
2601
2602uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
2603 const roaring_bitmap_t *x2) {
2604 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
2605 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
2606 return c1 - inter;
2607}
2608
2609uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
2610 const roaring_bitmap_t *x2) {
2611 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
2612 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
2613 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
2614 return c1 + c2 - 2 * inter;
2615}
2616
2617
2618/**
2619 * Check whether a range of values from range_start (included) to range_end (excluded) is present
2620 */
2621bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
2622 if(range_end >= UINT64_C(0x100000000)) {
2623 range_end = UINT64_C(0x100000000);
2624 }
2625 if (range_start >= range_end) return true; // empty range are always contained!
2626 if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
2627 uint16_t hb_rs = (uint16_t)(range_start >> 16);
2628 uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
2629 const int32_t span = hb_re - hb_rs;
2630 const int32_t hlc_sz = ra_get_size(&r->high_low_container);
2631 if (hlc_sz < span + 1) {
2632 return false;
2633 }
2634 int32_t is = ra_get_index(&r->high_low_container, hb_rs);
2635 int32_t ie = ra_get_index(&r->high_low_container, hb_re);
2636 ie = (ie < 0 ? -ie - 1 : ie);
2637 if ((is < 0) || ((ie - is) != span)) {
2638 return false;
2639 }
2640 const uint32_t lb_rs = range_start & 0xFFFF;
2641 const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
2642 uint8_t typecode;
2643 void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode);
2644 if (hb_rs == hb_re) {
2645 return container_contains_range(container, lb_rs, lb_re, typecode);
2646 }
2647 if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) {
2648 return false;
2649 }
2650 assert(ie < hlc_sz); // would indicate an algorithmic bug
2651 container = ra_get_container_at_index(&r->high_low_container, ie, &typecode);
2652 if (!container_contains_range(container, 0, lb_re, typecode)) {
2653 return false;
2654 }
2655 for (int32_t i = is + 1; i < ie; ++i) {
2656 container = ra_get_container_at_index(&r->high_low_container, i, &typecode);
2657 if (!container_is_full(container, typecode) ) {
2658 return false;
2659 }
2660 }
2661 return true;
2662}
2663
2664
2665bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1,
2666 const roaring_bitmap_t *ra2) {
2667 return (roaring_bitmap_get_cardinality(ra2) >
2668 roaring_bitmap_get_cardinality(ra1) &&
2669 roaring_bitmap_is_subset(ra1, ra2));
2670}
2671
2672
2673/*
2674 * FROZEN SERIALIZATION FORMAT DESCRIPTION
2675 *
2676 * -- (beginning must be aligned by 32 bytes) --
2677 * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
2678 * <run_data> rle16_t[total number of rle elements in all run containers]
2679 * <array_data> uint16_t[total number of array elements in all array containers]
2680 * <keys> uint16_t[num_containers]
2681 * <counts> uint16_t[num_containers]
2682 * <typecodes> uint8_t[num_containers]
2683 * <header> uint32_t
2684 *
2685 * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
2686 * and the number of containers (17 bits).
2687 *
2688 * <counts> stores number of elements for every container.
2689 * Its meaning depends on container type.
2690 * For array and bitset containers, this value is the container cardinality minus one.
2691 * For run container, it is the number of rle_t elements (n_runs).
2692 *
2693 * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
2694 * all containers of respective type.
2695 *
2696 * <*_data> and <keys> are kept close together because they are not accessed
2697 * during deserilization. This may reduce IO in case of large mmaped bitmaps.
2698 * All members have their native alignments during deserilization except <header>,
2699 * which is not guaranteed to be aligned by 4 bytes.
2700 */
2701
2702size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
2703 const roaring_array_t *ra = &rb->high_low_container;
2704 size_t num_bytes = 0;
2705 for (int32_t i = 0; i < ra->size; i++) {
2706 switch (ra->typecodes[i]) {
2707 case BITSET_CONTAINER_TYPE_CODE: {
2708 num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
2709 break;
2710 }
2711 case RUN_CONTAINER_TYPE_CODE: {
2712 const run_container_t *run =
2713 (const run_container_t *) ra->containers[i];
2714 num_bytes += run->n_runs * sizeof(rle16_t);
2715 break;
2716 }
2717 case ARRAY_CONTAINER_TYPE_CODE: {
2718 const array_container_t *array =
2719 (const array_container_t *) ra->containers[i];
2720 num_bytes += array->cardinality * sizeof(uint16_t);
2721 break;
2722 }
2723 default:
2724 __builtin_unreachable();
2725 }
2726 }
2727 num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
2728 num_bytes += 4; // header
2729 return num_bytes;
2730}
2731
2732inline static void *arena_alloc(char **arena, size_t num_bytes) {
2733 char *res = *arena;
2734 *arena += num_bytes;
2735 return res;
2736}
2737
2738void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
2739 /*
2740 * Note: we do not require user to supply spicificly aligned buffer.
2741 * Thus we have to use memcpy() everywhere.
2742 */
2743
2744 const roaring_array_t *ra = &rb->high_low_container;
2745
2746 size_t bitset_zone_size = 0;
2747 size_t run_zone_size = 0;
2748 size_t array_zone_size = 0;
2749 for (int32_t i = 0; i < ra->size; i++) {
2750 switch (ra->typecodes[i]) {
2751 case BITSET_CONTAINER_TYPE_CODE: {
2752 bitset_zone_size +=
2753 BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
2754 break;
2755 }
2756 case RUN_CONTAINER_TYPE_CODE: {
2757 const run_container_t *run =
2758 (const run_container_t *) ra->containers[i];
2759 run_zone_size += run->n_runs * sizeof(rle16_t);
2760 break;
2761 }
2762 case ARRAY_CONTAINER_TYPE_CODE: {
2763 const array_container_t *array =
2764 (const array_container_t *) ra->containers[i];
2765 array_zone_size += array->cardinality * sizeof(uint16_t);
2766 break;
2767 }
2768 default:
2769 __builtin_unreachable();
2770 }
2771 }
2772
2773 uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
2774 rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
2775 uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
2776 uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
2777 uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
2778 uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
2779 uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
2780
2781 for (int32_t i = 0; i < ra->size; i++) {
2782 uint16_t count;
2783 switch (ra->typecodes[i]) {
2784 case BITSET_CONTAINER_TYPE_CODE: {
2785 const bitset_container_t *bitset =
2786 (const bitset_container_t *) ra->containers[i];
2787 memcpy(bitset_zone, bitset->array,
2788 BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
2789 bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
2790 if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) {
2791 count = bitset->cardinality - 1;
2792 } else {
2793 count = bitset_container_compute_cardinality(bitset) - 1;
2794 }
2795 break;
2796 }
2797 case RUN_CONTAINER_TYPE_CODE: {
2798 const run_container_t *run =
2799 (const run_container_t *) ra->containers[i];
2800 size_t num_bytes = run->n_runs * sizeof(rle16_t);
2801 memcpy(run_zone, run->runs, num_bytes);
2802 run_zone += run->n_runs;
2803 count = run->n_runs;
2804 break;
2805 }
2806 case ARRAY_CONTAINER_TYPE_CODE: {
2807 const array_container_t *array =
2808 (const array_container_t *) ra->containers[i];
2809 size_t num_bytes = array->cardinality * sizeof(uint16_t);
2810 memcpy(array_zone, array->array, num_bytes);
2811 array_zone += array->cardinality;
2812 count = array->cardinality - 1;
2813 break;
2814 }
2815 default:
2816 __builtin_unreachable();
2817 }
2818 memcpy(&count_zone[i], &count, 2);
2819 }
2820 memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
2821 memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
2822 uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
2823 memcpy(header_zone, &header, 4);
2824}
2825
2826const roaring_bitmap_t *
2827roaring_bitmap_frozen_view(const char *buf, size_t length) {
2828 if ((uintptr_t)buf % 32 != 0) {
2829 return NULL;
2830 }
2831
2832 // cookie and num_containers
2833 if (length < 4) {
2834 return NULL;
2835 }
2836 uint32_t header;
2837 memcpy(&header, buf + length - 4, 4); // header may be misaligned
2838 if ((header & 0x7FFF) != FROZEN_COOKIE) {
2839 return NULL;
2840 }
2841 int32_t num_containers = (header >> 15);
2842
2843 // typecodes, counts and keys
2844 if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
2845 return NULL;
2846 }
2847 uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
2848 uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
2849 uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
2850
2851 // {bitset,array,run}_zone
2852 int32_t num_bitset_containers = 0;
2853 int32_t num_run_containers = 0;
2854 int32_t num_array_containers = 0;
2855 size_t bitset_zone_size = 0;
2856 size_t run_zone_size = 0;
2857 size_t array_zone_size = 0;
2858 for (int32_t i = 0; i < num_containers; i++) {
2859 switch (typecodes[i]) {
2860 case BITSET_CONTAINER_TYPE_CODE:
2861 num_bitset_containers++;
2862 bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
2863 break;
2864 case RUN_CONTAINER_TYPE_CODE:
2865 num_run_containers++;
2866 run_zone_size += counts[i] * sizeof(rle16_t);
2867 break;
2868 case ARRAY_CONTAINER_TYPE_CODE:
2869 num_array_containers++;
2870 array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
2871 break;
2872 default:
2873 return NULL;
2874 }
2875 }
2876 if (length != bitset_zone_size + run_zone_size + array_zone_size +
2877 5 * num_containers + 4) {
2878 return NULL;
2879 }
2880 uint64_t *bitset_zone = (uint64_t*) (buf);
2881 rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
2882 uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
2883
2884 size_t alloc_size = 0;
2885 alloc_size += sizeof(roaring_bitmap_t);
2886 alloc_size += num_containers * sizeof(void *);
2887 alloc_size += num_bitset_containers * sizeof(bitset_container_t);
2888 alloc_size += num_run_containers * sizeof(run_container_t);
2889 alloc_size += num_array_containers * sizeof(array_container_t);
2890
2891 char *arena = (char *)malloc(alloc_size);
2892 if (arena == NULL) {
2893 return NULL;
2894 }
2895
2896 roaring_bitmap_t *rb = (roaring_bitmap_t *)
2897 arena_alloc(&arena, sizeof(roaring_bitmap_t));
2898 rb->high_low_container.flags = ROARING_FLAG_FROZEN;
2899 rb->high_low_container.allocation_size = num_containers;
2900 rb->high_low_container.size = num_containers;
2901 rb->high_low_container.keys = (uint16_t *)keys;
2902 rb->high_low_container.typecodes = (uint8_t *)typecodes;
2903 rb->high_low_container.containers =
2904 (void **)arena_alloc(&arena, sizeof(void*) * num_containers);
2905 for (int32_t i = 0; i < num_containers; i++) {
2906 switch (typecodes[i]) {
2907 case BITSET_CONTAINER_TYPE_CODE: {
2908 bitset_container_t *bitset = (bitset_container_t *)
2909 arena_alloc(&arena, sizeof(bitset_container_t));
2910 bitset->array = bitset_zone;
2911 bitset->cardinality = counts[i] + UINT32_C(1);
2912 rb->high_low_container.containers[i] = bitset;
2913 bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
2914 break;
2915 }
2916 case RUN_CONTAINER_TYPE_CODE: {
2917 run_container_t *run = (run_container_t *)
2918 arena_alloc(&arena, sizeof(run_container_t));
2919 run->capacity = counts[i];
2920 run->n_runs = counts[i];
2921 run->runs = run_zone;
2922 rb->high_low_container.containers[i] = run;
2923 run_zone += run->n_runs;
2924 break;
2925 }
2926 case ARRAY_CONTAINER_TYPE_CODE: {
2927 array_container_t *array = (array_container_t *)
2928 arena_alloc(&arena, sizeof(array_container_t));
2929 array->capacity = counts[i] + UINT32_C(1);
2930 array->cardinality = counts[i] + UINT32_C(1);
2931 array->array = array_zone;
2932 rb->high_low_container.containers[i] = array;
2933 array_zone += counts[i] + UINT32_C(1);
2934 break;
2935 }
2936 default:
2937 free(arena);
2938 return NULL;
2939 }
2940 }
2941
2942 return rb;
2943}
2944