1 | #include <assert.h> |
2 | #include <roaring/array_util.h> |
3 | #include <roaring/roaring.h> |
4 | #include <roaring/roaring_array.h> |
5 | #include <roaring/bitset_util.h> |
6 | #include <stdarg.h> |
7 | #include <stdint.h> |
8 | #include <stdio.h> |
9 | #include <string.h> |
10 | #include <inttypes.h> |
11 | |
12 | extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, |
13 | uint32_t val); |
14 | extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r); |
15 | extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow); |
16 | |
17 | static inline bool is_cow(const roaring_bitmap_t *r) { |
18 | return r->high_low_container.flags & ROARING_FLAG_COW; |
19 | } |
20 | static inline bool is_frozen(const roaring_bitmap_t *r) { |
21 | return r->high_low_container.flags & ROARING_FLAG_FROZEN; |
22 | } |
23 | |
24 | // this is like roaring_bitmap_add, but it populates pointer arguments in such a |
25 | // way |
26 | // that we can recover the container touched, which, in turn can be used to |
27 | // accelerate some functions (when you repeatedly need to add to the same |
28 | // container) |
29 | static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, |
30 | uint32_t val, |
31 | uint8_t *typecode, |
32 | int *index) { |
33 | uint16_t hb = val >> 16; |
34 | const int i = ra_get_index(&r->high_low_container, hb); |
35 | if (i >= 0) { |
36 | ra_unshare_container_at_index(&r->high_low_container, i); |
37 | void *container = |
38 | ra_get_container_at_index(&r->high_low_container, i, typecode); |
39 | uint8_t newtypecode = *typecode; |
40 | void *container2 = |
41 | container_add(container, val & 0xFFFF, *typecode, &newtypecode); |
42 | *index = i; |
43 | if (container2 != container) { |
44 | container_free(container, *typecode); |
45 | ra_set_container_at_index(&r->high_low_container, i, container2, |
46 | newtypecode); |
47 | *typecode = newtypecode; |
48 | return container2; |
49 | } else { |
50 | return container; |
51 | } |
52 | } else { |
53 | array_container_t *newac = array_container_create(); |
54 | void *container = container_add(newac, val & 0xFFFF, |
55 | ARRAY_CONTAINER_TYPE_CODE, typecode); |
56 | // we could just assume that it stays an array container |
57 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
58 | container, *typecode); |
59 | *index = -i - 1; |
60 | return container; |
61 | } |
62 | } |
63 | |
64 | roaring_bitmap_t *roaring_bitmap_create() { |
65 | roaring_bitmap_t *ans = |
66 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
67 | if (!ans) { |
68 | return NULL; |
69 | } |
70 | ra_init(&ans->high_low_container); |
71 | return ans; |
72 | } |
73 | |
74 | roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { |
75 | roaring_bitmap_t *ans = |
76 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
77 | if (!ans) { |
78 | return NULL; |
79 | } |
80 | bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); |
81 | if (!is_ok) { |
82 | free(ans); |
83 | return NULL; |
84 | } |
85 | return ans; |
86 | } |
87 | |
88 | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, |
89 | const uint32_t *vals) { |
90 | void *container = NULL; // hold value of last container touched |
91 | uint8_t typecode = 0; // typecode of last container touched |
92 | uint32_t prev = 0; // previous valued inserted |
93 | size_t i = 0; // index of value |
94 | int containerindex = 0; |
95 | if (n_args == 0) return; |
96 | uint32_t val; |
97 | memcpy(&val, vals + i, sizeof(val)); |
98 | container = |
99 | containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); |
100 | prev = val; |
101 | i++; |
102 | for (; i < n_args; i++) { |
103 | memcpy(&val, vals + i, sizeof(val)); |
104 | if (((prev ^ val) >> 16) == |
105 | 0) { // no need to seek the container, it is at hand |
106 | // because we already have the container at hand, we can do the |
107 | // insertion |
108 | // automatically, bypassing the roaring_bitmap_add call |
109 | uint8_t newtypecode = typecode; |
110 | void *container2 = |
111 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
112 | if (container2 != container) { // rare instance when we need to |
113 | // change the container type |
114 | container_free(container, typecode); |
115 | ra_set_container_at_index(&r->high_low_container, |
116 | containerindex, container2, |
117 | newtypecode); |
118 | typecode = newtypecode; |
119 | container = container2; |
120 | } |
121 | } else { |
122 | container = containerptr_roaring_bitmap_add(r, val, &typecode, |
123 | &containerindex); |
124 | } |
125 | prev = val; |
126 | } |
127 | } |
128 | |
129 | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { |
130 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
131 | roaring_bitmap_add_many(answer, n_args, vals); |
132 | return answer; |
133 | } |
134 | |
135 | roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { |
136 | // todo: could be greatly optimized but we do not expect this call to ever |
137 | // include long lists |
138 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
139 | va_list ap; |
140 | va_start(ap, n_args); |
141 | for (size_t i = 1; i <= n_args; i++) { |
142 | uint32_t val = va_arg(ap, uint32_t); |
143 | roaring_bitmap_add(answer, val); |
144 | } |
145 | va_end(ap); |
146 | return answer; |
147 | } |
148 | |
149 | static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { |
150 | return (a < b) ? a : b; |
151 | } |
152 | |
153 | static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { |
154 | return (a < b) ? a : b; |
155 | } |
156 | |
157 | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, |
158 | uint32_t step) { |
159 | if(max >= UINT64_C(0x100000000)) { |
160 | max = UINT64_C(0x100000000); |
161 | } |
162 | if (step == 0) return NULL; |
163 | if (max <= min) return NULL; |
164 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
165 | if (step >= (1 << 16)) { |
166 | for (uint32_t value = (uint32_t)min; value < max; value += step) { |
167 | roaring_bitmap_add(answer, value); |
168 | } |
169 | return answer; |
170 | } |
171 | uint64_t min_tmp = min; |
172 | do { |
173 | uint32_t key = (uint32_t)min_tmp >> 16; |
174 | uint32_t container_min = min_tmp & 0xFFFF; |
175 | uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); |
176 | uint8_t type; |
177 | void *container = container_from_range(&type, container_min, |
178 | container_max, (uint16_t)step); |
179 | ra_append(&answer->high_low_container, key, container, type); |
180 | uint32_t gap = container_max - container_min + step - 1; |
181 | min_tmp += gap - (gap % step); |
182 | } while (min_tmp < max); |
183 | // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step |
184 | return answer; |
185 | } |
186 | |
187 | void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
188 | if (min > max) { |
189 | return; |
190 | } |
191 | |
192 | uint32_t min_key = min >> 16; |
193 | uint32_t max_key = max >> 16; |
194 | |
195 | int32_t num_required_containers = max_key - min_key + 1; |
196 | int32_t suffix_length = count_greater(ra->high_low_container.keys, |
197 | ra->high_low_container.size, |
198 | max_key); |
199 | int32_t prefix_length = count_less(ra->high_low_container.keys, |
200 | ra->high_low_container.size - suffix_length, |
201 | min_key); |
202 | int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; |
203 | |
204 | if (num_required_containers > common_length) { |
205 | ra_shift_tail(&ra->high_low_container, suffix_length, |
206 | num_required_containers - common_length); |
207 | } |
208 | |
209 | int32_t src = prefix_length + common_length - 1; |
210 | int32_t dst = ra->high_low_container.size - suffix_length - 1; |
211 | for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 |
212 | uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; |
213 | uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; |
214 | void* new_container; |
215 | uint8_t new_type; |
216 | |
217 | if (src >= 0 && ra->high_low_container.keys[src] == key) { |
218 | ra_unshare_container_at_index(&ra->high_low_container, src); |
219 | new_container = container_add_range(ra->high_low_container.containers[src], |
220 | ra->high_low_container.typecodes[src], |
221 | container_min, container_max, &new_type); |
222 | if (new_container != ra->high_low_container.containers[src]) { |
223 | container_free(ra->high_low_container.containers[src], |
224 | ra->high_low_container.typecodes[src]); |
225 | } |
226 | src--; |
227 | } else { |
228 | new_container = container_from_range(&new_type, container_min, |
229 | container_max+1, 1); |
230 | } |
231 | ra_replace_key_and_container_at_index(&ra->high_low_container, dst, |
232 | key, new_container, new_type); |
233 | dst--; |
234 | } |
235 | } |
236 | |
237 | void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
238 | if (min > max) { |
239 | return; |
240 | } |
241 | |
242 | uint32_t min_key = min >> 16; |
243 | uint32_t max_key = max >> 16; |
244 | |
245 | int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); |
246 | int32_t dst = src; |
247 | while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { |
248 | uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; |
249 | uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; |
250 | ra_unshare_container_at_index(&ra->high_low_container, src); |
251 | void *new_container; |
252 | uint8_t new_type; |
253 | new_container = container_remove_range(ra->high_low_container.containers[src], |
254 | ra->high_low_container.typecodes[src], |
255 | container_min, container_max, |
256 | &new_type); |
257 | if (new_container != ra->high_low_container.containers[src]) { |
258 | container_free(ra->high_low_container.containers[src], |
259 | ra->high_low_container.typecodes[src]); |
260 | } |
261 | if (new_container) { |
262 | ra_replace_key_and_container_at_index(&ra->high_low_container, dst, |
263 | ra->high_low_container.keys[src], |
264 | new_container, new_type); |
265 | dst++; |
266 | } |
267 | src++; |
268 | } |
269 | if (src > dst) { |
270 | ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); |
271 | } |
272 | } |
273 | |
274 | extern inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); |
275 | extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); |
276 | |
277 | void roaring_bitmap_printf(const roaring_bitmap_t *ra) { |
278 | printf("{" ); |
279 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
280 | container_printf_as_uint32_array( |
281 | ra->high_low_container.containers[i], |
282 | ra->high_low_container.typecodes[i], |
283 | ((uint32_t)ra->high_low_container.keys[i]) << 16); |
284 | if (i + 1 < ra->high_low_container.size) printf("," ); |
285 | } |
286 | printf("}" ); |
287 | } |
288 | |
289 | void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { |
290 | printf("{" ); |
291 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
292 | printf("%d: %s (%d)" , ra->high_low_container.keys[i], |
293 | get_full_container_name(ra->high_low_container.containers[i], |
294 | ra->high_low_container.typecodes[i]), |
295 | container_get_cardinality(ra->high_low_container.containers[i], |
296 | ra->high_low_container.typecodes[i])); |
297 | if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { |
298 | printf( |
299 | "(shared count = %" PRIu32 " )" , |
300 | ((shared_container_t *)(ra->high_low_container.containers[i])) |
301 | ->counter); |
302 | } |
303 | |
304 | if (i + 1 < ra->high_low_container.size) printf(", " ); |
305 | } |
306 | printf("}" ); |
307 | } |
308 | |
309 | typedef struct min_max_sum_s { |
310 | uint32_t min; |
311 | uint32_t max; |
312 | uint64_t sum; |
313 | } min_max_sum_t; |
314 | |
315 | static bool min_max_sum_fnc(uint32_t value, void *param) { |
316 | min_max_sum_t *mms = (min_max_sum_t *)param; |
317 | if (value > mms->max) mms->max = value; |
318 | if (value < mms->min) mms->min = value; |
319 | mms->sum += value; |
320 | return true; // we always process all data points |
321 | } |
322 | |
323 | /** |
324 | * (For advanced users.) |
325 | * Collect statistics about the bitmap |
326 | */ |
327 | void roaring_bitmap_statistics(const roaring_bitmap_t *ra, |
328 | roaring_statistics_t *stat) { |
329 | memset(stat, 0, sizeof(*stat)); |
330 | stat->n_containers = ra->high_low_container.size; |
331 | stat->cardinality = roaring_bitmap_get_cardinality(ra); |
332 | min_max_sum_t mms; |
333 | mms.min = UINT32_C(0xFFFFFFFF); |
334 | mms.max = UINT32_C(0); |
335 | mms.sum = 0; |
336 | roaring_iterate(ra, &min_max_sum_fnc, &mms); |
337 | stat->min_value = mms.min; |
338 | stat->max_value = mms.max; |
339 | stat->sum_value = mms.sum; |
340 | |
341 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
342 | uint8_t truetype = |
343 | get_container_type(ra->high_low_container.containers[i], |
344 | ra->high_low_container.typecodes[i]); |
345 | uint32_t card = |
346 | container_get_cardinality(ra->high_low_container.containers[i], |
347 | ra->high_low_container.typecodes[i]); |
348 | uint32_t sbytes = |
349 | container_size_in_bytes(ra->high_low_container.containers[i], |
350 | ra->high_low_container.typecodes[i]); |
351 | switch (truetype) { |
352 | case BITSET_CONTAINER_TYPE_CODE: |
353 | stat->n_bitset_containers++; |
354 | stat->n_values_bitset_containers += card; |
355 | stat->n_bytes_bitset_containers += sbytes; |
356 | break; |
357 | case ARRAY_CONTAINER_TYPE_CODE: |
358 | stat->n_array_containers++; |
359 | stat->n_values_array_containers += card; |
360 | stat->n_bytes_array_containers += sbytes; |
361 | break; |
362 | case RUN_CONTAINER_TYPE_CODE: |
363 | stat->n_run_containers++; |
364 | stat->n_values_run_containers += card; |
365 | stat->n_bytes_run_containers += sbytes; |
366 | break; |
367 | default: |
368 | assert(false); |
369 | __builtin_unreachable(); |
370 | } |
371 | } |
372 | } |
373 | |
374 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { |
375 | roaring_bitmap_t *ans = |
376 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
377 | if (!ans) { |
378 | return NULL; |
379 | } |
380 | bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, |
381 | is_cow(r)); |
382 | if (!is_ok) { |
383 | free(ans); |
384 | return NULL; |
385 | } |
386 | roaring_bitmap_set_copy_on_write(ans, is_cow(r)); |
387 | return ans; |
388 | } |
389 | |
390 | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, |
391 | const roaring_bitmap_t *src) { |
392 | return ra_overwrite(&src->high_low_container, &dest->high_low_container, |
393 | is_cow(src)); |
394 | } |
395 | |
396 | void roaring_bitmap_free(const roaring_bitmap_t *r) { |
397 | if (!is_frozen(r)) { |
398 | ra_clear((roaring_array_t*)&r->high_low_container); |
399 | } |
400 | free((roaring_bitmap_t*)r); |
401 | } |
402 | |
403 | void roaring_bitmap_clear(roaring_bitmap_t *r) { |
404 | ra_reset(&r->high_low_container); |
405 | } |
406 | |
407 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { |
408 | const uint16_t hb = val >> 16; |
409 | const int i = ra_get_index(&r->high_low_container, hb); |
410 | uint8_t typecode; |
411 | if (i >= 0) { |
412 | ra_unshare_container_at_index(&r->high_low_container, i); |
413 | void *container = |
414 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
415 | uint8_t newtypecode = typecode; |
416 | void *container2 = |
417 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
418 | if (container2 != container) { |
419 | container_free(container, typecode); |
420 | ra_set_container_at_index(&r->high_low_container, i, container2, |
421 | newtypecode); |
422 | } |
423 | } else { |
424 | array_container_t *newac = array_container_create(); |
425 | void *container = container_add(newac, val & 0xFFFF, |
426 | ARRAY_CONTAINER_TYPE_CODE, &typecode); |
427 | // we could just assume that it stays an array container |
428 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
429 | container, typecode); |
430 | } |
431 | } |
432 | |
433 | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { |
434 | const uint16_t hb = val >> 16; |
435 | const int i = ra_get_index(&r->high_low_container, hb); |
436 | uint8_t typecode; |
437 | bool result = false; |
438 | if (i >= 0) { |
439 | ra_unshare_container_at_index(&r->high_low_container, i); |
440 | void *container = |
441 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
442 | |
443 | const int oldCardinality = |
444 | container_get_cardinality(container, typecode); |
445 | |
446 | uint8_t newtypecode = typecode; |
447 | void *container2 = |
448 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
449 | if (container2 != container) { |
450 | container_free(container, typecode); |
451 | ra_set_container_at_index(&r->high_low_container, i, container2, |
452 | newtypecode); |
453 | result = true; |
454 | } else { |
455 | const int newCardinality = |
456 | container_get_cardinality(container, newtypecode); |
457 | |
458 | result = oldCardinality != newCardinality; |
459 | } |
460 | } else { |
461 | array_container_t *newac = array_container_create(); |
462 | void *container = container_add(newac, val & 0xFFFF, |
463 | ARRAY_CONTAINER_TYPE_CODE, &typecode); |
464 | // we could just assume that it stays an array container |
465 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
466 | container, typecode); |
467 | result = true; |
468 | } |
469 | |
470 | return result; |
471 | } |
472 | |
473 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { |
474 | const uint16_t hb = val >> 16; |
475 | const int i = ra_get_index(&r->high_low_container, hb); |
476 | uint8_t typecode; |
477 | if (i >= 0) { |
478 | ra_unshare_container_at_index(&r->high_low_container, i); |
479 | void *container = |
480 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
481 | uint8_t newtypecode = typecode; |
482 | void *container2 = |
483 | container_remove(container, val & 0xFFFF, typecode, &newtypecode); |
484 | if (container2 != container) { |
485 | container_free(container, typecode); |
486 | ra_set_container_at_index(&r->high_low_container, i, container2, |
487 | newtypecode); |
488 | } |
489 | if (container_get_cardinality(container2, newtypecode) != 0) { |
490 | ra_set_container_at_index(&r->high_low_container, i, container2, |
491 | newtypecode); |
492 | } else { |
493 | ra_remove_at_index_and_free(&r->high_low_container, i); |
494 | } |
495 | } |
496 | } |
497 | |
498 | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { |
499 | const uint16_t hb = val >> 16; |
500 | const int i = ra_get_index(&r->high_low_container, hb); |
501 | uint8_t typecode; |
502 | bool result = false; |
503 | if (i >= 0) { |
504 | ra_unshare_container_at_index(&r->high_low_container, i); |
505 | void *container = |
506 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
507 | |
508 | const int oldCardinality = |
509 | container_get_cardinality(container, typecode); |
510 | |
511 | uint8_t newtypecode = typecode; |
512 | void *container2 = |
513 | container_remove(container, val & 0xFFFF, typecode, &newtypecode); |
514 | if (container2 != container) { |
515 | container_free(container, typecode); |
516 | ra_set_container_at_index(&r->high_low_container, i, container2, |
517 | newtypecode); |
518 | } |
519 | |
520 | const int newCardinality = |
521 | container_get_cardinality(container2, newtypecode); |
522 | |
523 | if (newCardinality != 0) { |
524 | ra_set_container_at_index(&r->high_low_container, i, container2, |
525 | newtypecode); |
526 | } else { |
527 | ra_remove_at_index_and_free(&r->high_low_container, i); |
528 | } |
529 | |
530 | result = oldCardinality != newCardinality; |
531 | } |
532 | return result; |
533 | } |
534 | |
535 | void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, |
536 | const uint32_t *vals) { |
537 | if (n_args == 0 || r->high_low_container.size == 0) { |
538 | return; |
539 | } |
540 | int32_t pos = -1; // position of the container used in the previous iteration |
541 | for (size_t i = 0; i < n_args; i++) { |
542 | uint16_t key = (uint16_t)(vals[i] >> 16); |
543 | if (pos < 0 || key != r->high_low_container.keys[pos]) { |
544 | pos = ra_get_index(&r->high_low_container, key); |
545 | } |
546 | if (pos >= 0) { |
547 | uint8_t new_typecode; |
548 | void *new_container; |
549 | new_container = container_remove(r->high_low_container.containers[pos], |
550 | vals[i] & 0xffff, |
551 | r->high_low_container.typecodes[pos], |
552 | &new_typecode); |
553 | if (new_container != r->high_low_container.containers[pos]) { |
554 | container_free(r->high_low_container.containers[pos], |
555 | r->high_low_container.typecodes[pos]); |
556 | ra_replace_key_and_container_at_index(&r->high_low_container, |
557 | pos, key, new_container, |
558 | new_typecode); |
559 | } |
560 | if (!container_nonzero_cardinality(new_container, new_typecode)) { |
561 | container_free(new_container, new_typecode); |
562 | ra_remove_at_index(&r->high_low_container, pos); |
563 | pos = -1; |
564 | } |
565 | } |
566 | } |
567 | } |
568 | |
569 | // there should be some SIMD optimizations possible here |
570 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, |
571 | const roaring_bitmap_t *x2) { |
572 | uint8_t container_result_type = 0; |
573 | const int length1 = x1->high_low_container.size, |
574 | length2 = x2->high_low_container.size; |
575 | uint32_t neededcap = length1 > length2 ? length2 : length1; |
576 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); |
577 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
578 | |
579 | int pos1 = 0, pos2 = 0; |
580 | |
581 | while (pos1 < length1 && pos2 < length2) { |
582 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
583 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
584 | |
585 | if (s1 == s2) { |
586 | uint8_t container_type_1, container_type_2; |
587 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
588 | &container_type_1); |
589 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
590 | &container_type_2); |
591 | void *c = container_and(c1, container_type_1, c2, container_type_2, |
592 | &container_result_type); |
593 | if (container_nonzero_cardinality(c, container_result_type)) { |
594 | ra_append(&answer->high_low_container, s1, c, |
595 | container_result_type); |
596 | } else { |
597 | container_free( |
598 | c, container_result_type); // otherwise:memory leak! |
599 | } |
600 | ++pos1; |
601 | ++pos2; |
602 | } else if (s1 < s2) { // s1 < s2 |
603 | pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); |
604 | } else { // s1 > s2 |
605 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
606 | } |
607 | } |
608 | return answer; |
609 | } |
610 | |
611 | /** |
612 | * Compute the union of 'number' bitmaps. |
613 | */ |
614 | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, |
615 | const roaring_bitmap_t **x) { |
616 | if (number == 0) { |
617 | return roaring_bitmap_create(); |
618 | } |
619 | if (number == 1) { |
620 | return roaring_bitmap_copy(x[0]); |
621 | } |
622 | roaring_bitmap_t *answer = |
623 | roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); |
624 | for (size_t i = 2; i < number; i++) { |
625 | roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); |
626 | } |
627 | roaring_bitmap_repair_after_lazy(answer); |
628 | return answer; |
629 | } |
630 | |
631 | /** |
632 | * Compute the xor of 'number' bitmaps. |
633 | */ |
634 | roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, |
635 | const roaring_bitmap_t **x) { |
636 | if (number == 0) { |
637 | return roaring_bitmap_create(); |
638 | } |
639 | if (number == 1) { |
640 | return roaring_bitmap_copy(x[0]); |
641 | } |
642 | roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); |
643 | for (size_t i = 2; i < number; i++) { |
644 | roaring_bitmap_lazy_xor_inplace(answer, x[i]); |
645 | } |
646 | roaring_bitmap_repair_after_lazy(answer); |
647 | return answer; |
648 | } |
649 | |
650 | // inplace and (modifies its first argument). |
651 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, |
652 | const roaring_bitmap_t *x2) { |
653 | if (x1 == x2) return; |
654 | int pos1 = 0, pos2 = 0, intersection_size = 0; |
655 | const int length1 = ra_get_size(&x1->high_low_container); |
656 | const int length2 = ra_get_size(&x2->high_low_container); |
657 | |
658 | // any skipped-over or newly emptied containers in x1 |
659 | // have to be freed. |
660 | while (pos1 < length1 && pos2 < length2) { |
661 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
662 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
663 | |
664 | if (s1 == s2) { |
665 | uint8_t typecode1, typecode2, typecode_result; |
666 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
667 | &typecode1); |
668 | c1 = get_writable_copy_if_shared(c1, &typecode1); |
669 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
670 | &typecode2); |
671 | void *c = |
672 | container_iand(c1, typecode1, c2, typecode2, &typecode_result); |
673 | if (c != c1) { // in this instance a new container was created, and |
674 | // we need to free the old one |
675 | container_free(c1, typecode1); |
676 | } |
677 | if (container_nonzero_cardinality(c, typecode_result)) { |
678 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
679 | intersection_size, s1, c, |
680 | typecode_result); |
681 | intersection_size++; |
682 | } else { |
683 | container_free(c, typecode_result); |
684 | } |
685 | ++pos1; |
686 | ++pos2; |
687 | } else if (s1 < s2) { |
688 | pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); |
689 | } else { // s1 > s2 |
690 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
691 | } |
692 | } |
693 | |
694 | // if we ended early because x2 ran out, then all remaining in x1 should be |
695 | // freed |
696 | while (pos1 < length1) { |
697 | container_free(x1->high_low_container.containers[pos1], |
698 | x1->high_low_container.typecodes[pos1]); |
699 | ++pos1; |
700 | } |
701 | |
702 | // all containers after this have either been copied or freed |
703 | ra_downsize(&x1->high_low_container, intersection_size); |
704 | } |
705 | |
706 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, |
707 | const roaring_bitmap_t *x2) { |
708 | uint8_t container_result_type = 0; |
709 | const int length1 = x1->high_low_container.size, |
710 | length2 = x2->high_low_container.size; |
711 | if (0 == length1) { |
712 | return roaring_bitmap_copy(x2); |
713 | } |
714 | if (0 == length2) { |
715 | return roaring_bitmap_copy(x1); |
716 | } |
717 | roaring_bitmap_t *answer = |
718 | roaring_bitmap_create_with_capacity(length1 + length2); |
719 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
720 | int pos1 = 0, pos2 = 0; |
721 | uint8_t container_type_1, container_type_2; |
722 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
723 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
724 | while (true) { |
725 | if (s1 == s2) { |
726 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
727 | &container_type_1); |
728 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
729 | &container_type_2); |
730 | void *c = container_or(c1, container_type_1, c2, container_type_2, |
731 | &container_result_type); |
732 | // since we assume that the initial containers are non-empty, the |
733 | // result here |
734 | // can only be non-empty |
735 | ra_append(&answer->high_low_container, s1, c, |
736 | container_result_type); |
737 | ++pos1; |
738 | ++pos2; |
739 | if (pos1 == length1) break; |
740 | if (pos2 == length2) break; |
741 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
742 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
743 | |
744 | } else if (s1 < s2) { // s1 < s2 |
745 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
746 | &container_type_1); |
747 | // c1 = container_clone(c1, container_type_1); |
748 | c1 = |
749 | get_copy_of_container(c1, &container_type_1, is_cow(x1)); |
750 | if (is_cow(x1)) { |
751 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
752 | container_type_1); |
753 | } |
754 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
755 | pos1++; |
756 | if (pos1 == length1) break; |
757 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
758 | |
759 | } else { // s1 > s2 |
760 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
761 | &container_type_2); |
762 | // c2 = container_clone(c2, container_type_2); |
763 | c2 = |
764 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
765 | if (is_cow(x2)) { |
766 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
767 | container_type_2); |
768 | } |
769 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
770 | pos2++; |
771 | if (pos2 == length2) break; |
772 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
773 | } |
774 | } |
775 | if (pos1 == length1) { |
776 | ra_append_copy_range(&answer->high_low_container, |
777 | &x2->high_low_container, pos2, length2, |
778 | is_cow(x2)); |
779 | } else if (pos2 == length2) { |
780 | ra_append_copy_range(&answer->high_low_container, |
781 | &x1->high_low_container, pos1, length1, |
782 | is_cow(x1)); |
783 | } |
784 | return answer; |
785 | } |
786 | |
787 | // inplace or (modifies its first argument). |
788 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, |
789 | const roaring_bitmap_t *x2) { |
790 | uint8_t container_result_type = 0; |
791 | int length1 = x1->high_low_container.size; |
792 | const int length2 = x2->high_low_container.size; |
793 | |
794 | if (0 == length2) return; |
795 | |
796 | if (0 == length1) { |
797 | roaring_bitmap_overwrite(x1, x2); |
798 | return; |
799 | } |
800 | int pos1 = 0, pos2 = 0; |
801 | uint8_t container_type_1, container_type_2; |
802 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
803 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
804 | while (true) { |
805 | if (s1 == s2) { |
806 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
807 | &container_type_1); |
808 | if (!container_is_full(c1, container_type_1)) { |
809 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
810 | |
811 | void *c2 = ra_get_container_at_index(&x2->high_low_container, |
812 | pos2, &container_type_2); |
813 | void *c = |
814 | container_ior(c1, container_type_1, c2, container_type_2, |
815 | &container_result_type); |
816 | if (c != |
817 | c1) { // in this instance a new container was created, and |
818 | // we need to free the old one |
819 | container_free(c1, container_type_1); |
820 | } |
821 | |
822 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
823 | container_result_type); |
824 | } |
825 | ++pos1; |
826 | ++pos2; |
827 | if (pos1 == length1) break; |
828 | if (pos2 == length2) break; |
829 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
830 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
831 | |
832 | } else if (s1 < s2) { // s1 < s2 |
833 | pos1++; |
834 | if (pos1 == length1) break; |
835 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
836 | |
837 | } else { // s1 > s2 |
838 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
839 | &container_type_2); |
840 | c2 = |
841 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
842 | if (is_cow(x2)) { |
843 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
844 | container_type_2); |
845 | } |
846 | |
847 | // void *c2_clone = container_clone(c2, container_type_2); |
848 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
849 | container_type_2); |
850 | pos1++; |
851 | length1++; |
852 | pos2++; |
853 | if (pos2 == length2) break; |
854 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
855 | } |
856 | } |
857 | if (pos1 == length1) { |
858 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
859 | pos2, length2, is_cow(x2)); |
860 | } |
861 | } |
862 | |
863 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, |
864 | const roaring_bitmap_t *x2) { |
865 | uint8_t container_result_type = 0; |
866 | const int length1 = x1->high_low_container.size, |
867 | length2 = x2->high_low_container.size; |
868 | if (0 == length1) { |
869 | return roaring_bitmap_copy(x2); |
870 | } |
871 | if (0 == length2) { |
872 | return roaring_bitmap_copy(x1); |
873 | } |
874 | roaring_bitmap_t *answer = |
875 | roaring_bitmap_create_with_capacity(length1 + length2); |
876 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
877 | int pos1 = 0, pos2 = 0; |
878 | uint8_t container_type_1, container_type_2; |
879 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
880 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
881 | while (true) { |
882 | if (s1 == s2) { |
883 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
884 | &container_type_1); |
885 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
886 | &container_type_2); |
887 | void *c = container_xor(c1, container_type_1, c2, container_type_2, |
888 | &container_result_type); |
889 | |
890 | if (container_nonzero_cardinality(c, container_result_type)) { |
891 | ra_append(&answer->high_low_container, s1, c, |
892 | container_result_type); |
893 | } else { |
894 | container_free(c, container_result_type); |
895 | } |
896 | ++pos1; |
897 | ++pos2; |
898 | if (pos1 == length1) break; |
899 | if (pos2 == length2) break; |
900 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
901 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
902 | |
903 | } else if (s1 < s2) { // s1 < s2 |
904 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
905 | &container_type_1); |
906 | c1 = |
907 | get_copy_of_container(c1, &container_type_1, is_cow(x1)); |
908 | if (is_cow(x1)) { |
909 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
910 | container_type_1); |
911 | } |
912 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
913 | pos1++; |
914 | if (pos1 == length1) break; |
915 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
916 | |
917 | } else { // s1 > s2 |
918 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
919 | &container_type_2); |
920 | c2 = |
921 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
922 | if (is_cow(x2)) { |
923 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
924 | container_type_2); |
925 | } |
926 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
927 | pos2++; |
928 | if (pos2 == length2) break; |
929 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
930 | } |
931 | } |
932 | if (pos1 == length1) { |
933 | ra_append_copy_range(&answer->high_low_container, |
934 | &x2->high_low_container, pos2, length2, |
935 | is_cow(x2)); |
936 | } else if (pos2 == length2) { |
937 | ra_append_copy_range(&answer->high_low_container, |
938 | &x1->high_low_container, pos1, length1, |
939 | is_cow(x1)); |
940 | } |
941 | return answer; |
942 | } |
943 | |
944 | // inplace xor (modifies its first argument). |
945 | |
946 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, |
947 | const roaring_bitmap_t *x2) { |
948 | assert(x1 != x2); |
949 | uint8_t container_result_type = 0; |
950 | int length1 = x1->high_low_container.size; |
951 | const int length2 = x2->high_low_container.size; |
952 | |
953 | if (0 == length2) return; |
954 | |
955 | if (0 == length1) { |
956 | roaring_bitmap_overwrite(x1, x2); |
957 | return; |
958 | } |
959 | |
960 | // XOR can have new containers inserted from x2, but can also |
961 | // lose containers when x1 and x2 are nonempty and identical. |
962 | |
963 | int pos1 = 0, pos2 = 0; |
964 | uint8_t container_type_1, container_type_2; |
965 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
966 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
967 | while (true) { |
968 | if (s1 == s2) { |
969 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
970 | &container_type_1); |
971 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
972 | |
973 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
974 | &container_type_2); |
975 | void *c = container_ixor(c1, container_type_1, c2, container_type_2, |
976 | &container_result_type); |
977 | |
978 | if (container_nonzero_cardinality(c, container_result_type)) { |
979 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
980 | container_result_type); |
981 | ++pos1; |
982 | } else { |
983 | container_free(c, container_result_type); |
984 | ra_remove_at_index(&x1->high_low_container, pos1); |
985 | --length1; |
986 | } |
987 | |
988 | ++pos2; |
989 | if (pos1 == length1) break; |
990 | if (pos2 == length2) break; |
991 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
992 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
993 | |
994 | } else if (s1 < s2) { // s1 < s2 |
995 | pos1++; |
996 | if (pos1 == length1) break; |
997 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
998 | |
999 | } else { // s1 > s2 |
1000 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
1001 | &container_type_2); |
1002 | c2 = |
1003 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
1004 | if (is_cow(x2)) { |
1005 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
1006 | container_type_2); |
1007 | } |
1008 | |
1009 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
1010 | container_type_2); |
1011 | pos1++; |
1012 | length1++; |
1013 | pos2++; |
1014 | if (pos2 == length2) break; |
1015 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
1016 | } |
1017 | } |
1018 | if (pos1 == length1) { |
1019 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
1020 | pos2, length2, is_cow(x2)); |
1021 | } |
1022 | } |
1023 | |
1024 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, |
1025 | const roaring_bitmap_t *x2) { |
1026 | uint8_t container_result_type = 0; |
1027 | const int length1 = x1->high_low_container.size, |
1028 | length2 = x2->high_low_container.size; |
1029 | if (0 == length1) { |
1030 | roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); |
1031 | roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) && is_cow(x2)); |
1032 | return empty_bitmap; |
1033 | } |
1034 | if (0 == length2) { |
1035 | return roaring_bitmap_copy(x1); |
1036 | } |
1037 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); |
1038 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
1039 | |
1040 | int pos1 = 0, pos2 = 0; |
1041 | uint8_t container_type_1, container_type_2; |
1042 | uint16_t s1 = 0; |
1043 | uint16_t s2 = 0; |
1044 | while (true) { |
1045 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
1046 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
1047 | |
1048 | if (s1 == s2) { |
1049 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
1050 | &container_type_1); |
1051 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
1052 | &container_type_2); |
1053 | void *c = |
1054 | container_andnot(c1, container_type_1, c2, container_type_2, |
1055 | &container_result_type); |
1056 | |
1057 | if (container_nonzero_cardinality(c, container_result_type)) { |
1058 | ra_append(&answer->high_low_container, s1, c, |
1059 | container_result_type); |
1060 | } else { |
1061 | container_free(c, container_result_type); |
1062 | } |
1063 | ++pos1; |
1064 | ++pos2; |
1065 | if (pos1 == length1) break; |
1066 | if (pos2 == length2) break; |
1067 | } else if (s1 < s2) { // s1 < s2 |
1068 | const int next_pos1 = |
1069 | ra_advance_until(&x1->high_low_container, s2, pos1); |
1070 | ra_append_copy_range(&answer->high_low_container, |
1071 | &x1->high_low_container, pos1, next_pos1, |
1072 | is_cow(x1)); |
1073 | // TODO : perhaps some of the copy_on_write should be based on |
1074 | // answer rather than x1 (more stringent?). Many similar cases |
1075 | pos1 = next_pos1; |
1076 | if (pos1 == length1) break; |
1077 | } else { // s1 > s2 |
1078 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
1079 | if (pos2 == length2) break; |
1080 | } |
1081 | } |
1082 | if (pos2 == length2) { |
1083 | ra_append_copy_range(&answer->high_low_container, |
1084 | &x1->high_low_container, pos1, length1, |
1085 | is_cow(x1)); |
1086 | } |
1087 | return answer; |
1088 | } |
1089 | |
1090 | // inplace andnot (modifies its first argument). |
1091 | |
1092 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, |
1093 | const roaring_bitmap_t *x2) { |
1094 | assert(x1 != x2); |
1095 | |
1096 | uint8_t container_result_type = 0; |
1097 | int length1 = x1->high_low_container.size; |
1098 | const int length2 = x2->high_low_container.size; |
1099 | int intersection_size = 0; |
1100 | |
1101 | if (0 == length2) return; |
1102 | |
1103 | if (0 == length1) { |
1104 | roaring_bitmap_clear(x1); |
1105 | return; |
1106 | } |
1107 | |
1108 | int pos1 = 0, pos2 = 0; |
1109 | uint8_t container_type_1, container_type_2; |
1110 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
1111 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
1112 | while (true) { |
1113 | if (s1 == s2) { |
1114 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
1115 | &container_type_1); |
1116 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
1117 | |
1118 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
1119 | &container_type_2); |
1120 | void *c = |
1121 | container_iandnot(c1, container_type_1, c2, container_type_2, |
1122 | &container_result_type); |
1123 | |
1124 | if (container_nonzero_cardinality(c, container_result_type)) { |
1125 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
1126 | intersection_size++, s1, |
1127 | c, container_result_type); |
1128 | } else { |
1129 | container_free(c, container_result_type); |
1130 | } |
1131 | |
1132 | ++pos1; |
1133 | ++pos2; |
1134 | if (pos1 == length1) break; |
1135 | if (pos2 == length2) break; |
1136 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
1137 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
1138 | |
1139 | } else if (s1 < s2) { // s1 < s2 |
1140 | if (pos1 != intersection_size) { |
1141 | void *c1 = ra_get_container_at_index(&x1->high_low_container, |
1142 | pos1, &container_type_1); |
1143 | |
1144 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
1145 | intersection_size, s1, c1, |
1146 | container_type_1); |
1147 | } |
1148 | intersection_size++; |
1149 | pos1++; |
1150 | if (pos1 == length1) break; |
1151 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
1152 | |
1153 | } else { // s1 > s2 |
1154 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
1155 | if (pos2 == length2) break; |
1156 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
1157 | } |
1158 | } |
1159 | |
1160 | if (pos1 < length1) { |
1161 | // all containers between intersection_size and |
1162 | // pos1 are junk. However, they have either been moved |
1163 | // (thus still referenced) or involved in an iandnot |
1164 | // that will clean up all containers that could not be reused. |
1165 | // Thus we should not free the junk containers between |
1166 | // intersection_size and pos1. |
1167 | if (pos1 > intersection_size) { |
1168 | // left slide of remaining items |
1169 | ra_copy_range(&x1->high_low_container, pos1, length1, |
1170 | intersection_size); |
1171 | } |
1172 | // else current placement is fine |
1173 | intersection_size += (length1 - pos1); |
1174 | } |
1175 | ra_downsize(&x1->high_low_container, intersection_size); |
1176 | } |
1177 | |
1178 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { |
1179 | uint64_t card = 0; |
1180 | for (int i = 0; i < ra->high_low_container.size; ++i) |
1181 | card += container_get_cardinality(ra->high_low_container.containers[i], |
1182 | ra->high_low_container.typecodes[i]); |
1183 | return card; |
1184 | } |
1185 | |
1186 | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, |
1187 | uint64_t range_start, |
1188 | uint64_t range_end) { |
1189 | if (range_end > UINT32_MAX) { |
1190 | range_end = UINT32_MAX + UINT64_C(1); |
1191 | } |
1192 | if (range_start >= range_end) { |
1193 | return 0; |
1194 | } |
1195 | range_end--; // make range_end inclusive |
1196 | // now we have: 0 <= range_start <= range_end <= UINT32_MAX |
1197 | |
1198 | uint16_t minhb = range_start >> 16; |
1199 | uint16_t maxhb = range_end >> 16; |
1200 | |
1201 | uint64_t card = 0; |
1202 | |
1203 | int i = ra_get_index(&ra->high_low_container, minhb); |
1204 | if (i >= 0) { |
1205 | if (minhb == maxhb) { |
1206 | card += container_rank(ra->high_low_container.containers[i], |
1207 | ra->high_low_container.typecodes[i], |
1208 | range_end & 0xffff); |
1209 | } else { |
1210 | card += container_get_cardinality(ra->high_low_container.containers[i], |
1211 | ra->high_low_container.typecodes[i]); |
1212 | } |
1213 | if ((range_start & 0xffff) != 0) { |
1214 | card -= container_rank(ra->high_low_container.containers[i], |
1215 | ra->high_low_container.typecodes[i], |
1216 | (range_start & 0xffff) - 1); |
1217 | } |
1218 | i++; |
1219 | } else { |
1220 | i = -i - 1; |
1221 | } |
1222 | |
1223 | for (; i < ra->high_low_container.size; i++) { |
1224 | uint16_t key = ra->high_low_container.keys[i]; |
1225 | if (key < maxhb) { |
1226 | card += container_get_cardinality(ra->high_low_container.containers[i], |
1227 | ra->high_low_container.typecodes[i]); |
1228 | } else if (key == maxhb) { |
1229 | card += container_rank(ra->high_low_container.containers[i], |
1230 | ra->high_low_container.typecodes[i], |
1231 | range_end & 0xffff); |
1232 | break; |
1233 | } else { |
1234 | break; |
1235 | } |
1236 | } |
1237 | |
1238 | return card; |
1239 | } |
1240 | |
1241 | |
1242 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { |
1243 | return ra->high_low_container.size == 0; |
1244 | } |
1245 | |
1246 | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { |
1247 | ra_to_uint32_array(&ra->high_low_container, ans); |
1248 | } |
1249 | |
1250 | bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
1251 | return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); |
1252 | } |
1253 | |
1254 | /** convert array and bitmap containers to run containers when it is more |
1255 | * efficient; |
1256 | * also convert from run containers when more space efficient. Returns |
1257 | * true if the result has at least one run container. |
1258 | */ |
1259 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { |
1260 | bool answer = false; |
1261 | for (int i = 0; i < r->high_low_container.size; i++) { |
1262 | uint8_t typecode_original, typecode_after; |
1263 | ra_unshare_container_at_index( |
1264 | &r->high_low_container, i); // TODO: this introduces extra cloning! |
1265 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
1266 | &typecode_original); |
1267 | void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); |
1268 | if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; |
1269 | ra_set_container_at_index(&r->high_low_container, i, c1, |
1270 | typecode_after); |
1271 | } |
1272 | return answer; |
1273 | } |
1274 | |
1275 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { |
1276 | size_t answer = 0; |
1277 | for (int i = 0; i < r->high_low_container.size; i++) { |
1278 | uint8_t typecode_original; |
1279 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
1280 | &typecode_original); |
1281 | answer += container_shrink_to_fit(c, typecode_original); |
1282 | } |
1283 | answer += ra_shrink_to_fit(&r->high_low_container); |
1284 | return answer; |
1285 | } |
1286 | |
1287 | /** |
1288 | * Remove run-length encoding even when it is more space efficient |
1289 | * return whether a change was applied |
1290 | */ |
1291 | bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { |
1292 | bool answer = false; |
1293 | for (int i = 0; i < r->high_low_container.size; i++) { |
1294 | uint8_t typecode_original, typecode_after; |
1295 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
1296 | &typecode_original); |
1297 | if (get_container_type(c, typecode_original) == |
1298 | RUN_CONTAINER_TYPE_CODE) { |
1299 | answer = true; |
1300 | if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { |
1301 | run_container_t *truec = |
1302 | (run_container_t *)((shared_container_t *)c)->container; |
1303 | int32_t card = run_container_cardinality(truec); |
1304 | void *c1 = convert_to_bitset_or_array_container( |
1305 | truec, card, &typecode_after); |
1306 | shared_container_free((shared_container_t *)c); |
1307 | ra_set_container_at_index(&r->high_low_container, i, c1, |
1308 | typecode_after); |
1309 | |
1310 | } else { |
1311 | int32_t card = run_container_cardinality((run_container_t *)c); |
1312 | void *c1 = convert_to_bitset_or_array_container( |
1313 | (run_container_t *)c, card, &typecode_after); |
1314 | ra_set_container_at_index(&r->high_low_container, i, c1, |
1315 | typecode_after); |
1316 | } |
1317 | } |
1318 | } |
1319 | return answer; |
1320 | } |
1321 | |
1322 | size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { |
1323 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
1324 | uint64_t cardinality = roaring_bitmap_get_cardinality(ra); |
1325 | uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); |
1326 | if (portablesize < sizeasarray) { |
1327 | buf[0] = SERIALIZATION_CONTAINER; |
1328 | return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; |
1329 | } else { |
1330 | buf[0] = SERIALIZATION_ARRAY_UINT32; |
1331 | memcpy(buf + 1, &cardinality, sizeof(uint32_t)); |
1332 | roaring_bitmap_to_uint32_array( |
1333 | ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); |
1334 | return 1 + (size_t)sizeasarray; |
1335 | } |
1336 | } |
1337 | |
1338 | size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { |
1339 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
1340 | uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + |
1341 | sizeof(uint32_t); |
1342 | return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; |
1343 | } |
1344 | |
1345 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { |
1346 | return ra_portable_size_in_bytes(&ra->high_low_container); |
1347 | } |
1348 | |
1349 | |
1350 | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { |
1351 | roaring_bitmap_t *ans = |
1352 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
1353 | if (ans == NULL) { |
1354 | return NULL; |
1355 | } |
1356 | size_t bytesread; |
1357 | bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); |
1358 | if(is_ok) assert(bytesread <= maxbytes); |
1359 | roaring_bitmap_set_copy_on_write(ans, false); |
1360 | if (!is_ok) { |
1361 | free(ans); |
1362 | return NULL; |
1363 | } |
1364 | return ans; |
1365 | } |
1366 | |
1367 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { |
1368 | return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); |
1369 | } |
1370 | |
1371 | |
1372 | size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { |
1373 | return ra_portable_deserialize_size(buf, maxbytes); |
1374 | } |
1375 | |
1376 | |
1377 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, |
1378 | char *buf) { |
1379 | return ra_portable_serialize(&ra->high_low_container, buf); |
1380 | } |
1381 | |
1382 | roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { |
1383 | const char *bufaschar = (const char *)buf; |
1384 | if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { |
1385 | /* This looks like a compressed set of uint32_t elements */ |
1386 | uint32_t card; |
1387 | memcpy(&card, bufaschar + 1, sizeof(uint32_t)); |
1388 | const uint32_t *elems = |
1389 | (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); |
1390 | |
1391 | return roaring_bitmap_of_ptr(card, elems); |
1392 | } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { |
1393 | return roaring_bitmap_portable_deserialize(bufaschar + 1); |
1394 | } else |
1395 | return (NULL); |
1396 | } |
1397 | |
1398 | bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, |
1399 | void *ptr) { |
1400 | for (int i = 0; i < ra->high_low_container.size; ++i) |
1401 | if (!container_iterate(ra->high_low_container.containers[i], |
1402 | ra->high_low_container.typecodes[i], |
1403 | ((uint32_t)ra->high_low_container.keys[i]) << 16, |
1404 | iterator, ptr)) { |
1405 | return false; |
1406 | } |
1407 | return true; |
1408 | } |
1409 | |
1410 | bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, |
1411 | uint64_t high_bits, void *ptr) { |
1412 | for (int i = 0; i < ra->high_low_container.size; ++i) |
1413 | if (!container_iterate64( |
1414 | ra->high_low_container.containers[i], |
1415 | ra->high_low_container.typecodes[i], |
1416 | ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, |
1417 | high_bits, ptr)) { |
1418 | return false; |
1419 | } |
1420 | return true; |
1421 | } |
1422 | |
1423 | /**** |
1424 | * begin roaring_uint32_iterator_t |
1425 | *****/ |
1426 | |
1427 | // Partially initializes the roaring iterator when it begins looking at |
1428 | // a new container. |
1429 | static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { |
1430 | newit->in_container_index = 0; |
1431 | newit->run_index = 0; |
1432 | newit->current_value = 0; |
1433 | if (newit->container_index >= newit->parent->high_low_container.size || |
1434 | newit->container_index < 0) { |
1435 | newit->current_value = UINT32_MAX; |
1436 | return (newit->has_value = false); |
1437 | } |
1438 | // assume not empty |
1439 | newit->has_value = true; |
1440 | // we precompute container, typecode and highbits so that successive |
1441 | // iterators do not have to grab them from odd memory locations |
1442 | // and have to worry about the (easily predicted) container_unwrap_shared |
1443 | // call. |
1444 | newit->container = |
1445 | newit->parent->high_low_container.containers[newit->container_index]; |
1446 | newit->typecode = |
1447 | newit->parent->high_low_container.typecodes[newit->container_index]; |
1448 | newit->highbits = |
1449 | ((uint32_t) |
1450 | newit->parent->high_low_container.keys[newit->container_index]) |
1451 | << 16; |
1452 | newit->container = |
1453 | container_unwrap_shared(newit->container, &(newit->typecode)); |
1454 | return newit->has_value; |
1455 | } |
1456 | |
1457 | static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { |
1458 | if (!iter_new_container_partial_init(newit)) |
1459 | return newit->has_value; |
1460 | |
1461 | uint32_t wordindex; |
1462 | uint64_t word; // used for bitsets |
1463 | switch (newit->typecode) { |
1464 | case BITSET_CONTAINER_TYPE_CODE: |
1465 | wordindex = 0; |
1466 | while ((word = ((const bitset_container_t *)(newit->container)) |
1467 | ->array[wordindex]) == 0) |
1468 | wordindex++; // advance |
1469 | // here "word" is non-zero |
1470 | newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
1471 | newit->current_value = newit->highbits | newit->in_container_index; |
1472 | break; |
1473 | case ARRAY_CONTAINER_TYPE_CODE: |
1474 | newit->current_value = |
1475 | newit->highbits | |
1476 | ((const array_container_t *)(newit->container))->array[0]; |
1477 | break; |
1478 | case RUN_CONTAINER_TYPE_CODE: |
1479 | newit->current_value = |
1480 | newit->highbits | |
1481 | (((const run_container_t *)(newit->container))->runs[0].value); |
1482 | break; |
1483 | default: |
1484 | // if this ever happens, bug! |
1485 | assert(false); |
1486 | } // switch (typecode) |
1487 | return true; |
1488 | } |
1489 | |
1490 | static bool loadlastvalue(roaring_uint32_iterator_t* newit) { |
1491 | if (!iter_new_container_partial_init(newit)) |
1492 | return newit->has_value; |
1493 | |
1494 | switch(newit->typecode) { |
1495 | case BITSET_CONTAINER_TYPE_CODE: { |
1496 | uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; |
1497 | uint64_t word; |
1498 | const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; |
1499 | while ((word = bitset_container->array[wordindex]) == 0) |
1500 | --wordindex; |
1501 | |
1502 | int num_leading_zeros = __builtin_clzll(word); |
1503 | newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); |
1504 | newit->current_value = newit->highbits | newit->in_container_index; |
1505 | break; |
1506 | } |
1507 | case ARRAY_CONTAINER_TYPE_CODE: { |
1508 | const array_container_t* array_container = (const array_container_t*)newit->container; |
1509 | newit->in_container_index = array_container->cardinality - 1; |
1510 | newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; |
1511 | break; |
1512 | } |
1513 | case RUN_CONTAINER_TYPE_CODE: { |
1514 | const run_container_t* run_container = (const run_container_t*)newit->container; |
1515 | newit->run_index = run_container->n_runs - 1; |
1516 | const rle16_t* last_run = &run_container->runs[newit->run_index]; |
1517 | newit->current_value = newit->highbits | (last_run->value + last_run->length); |
1518 | break; |
1519 | } |
1520 | default: |
1521 | // if this ever happens, bug! |
1522 | assert(false); |
1523 | } |
1524 | return true; |
1525 | } |
1526 | |
1527 | // prerequesite: the value should be in range of the container |
1528 | static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { |
1529 | // Don't have to check return value because of prerequisite |
1530 | iter_new_container_partial_init(newit); |
1531 | uint16_t lb = val & 0xFFFF; |
1532 | |
1533 | switch (newit->typecode) { |
1534 | case BITSET_CONTAINER_TYPE_CODE: |
1535 | newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); |
1536 | newit->current_value = newit->highbits | newit->in_container_index; |
1537 | break; |
1538 | case ARRAY_CONTAINER_TYPE_CODE: |
1539 | newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); |
1540 | newit->current_value = |
1541 | newit->highbits | |
1542 | ((const array_container_t *)(newit->container))->array[newit->in_container_index]; |
1543 | break; |
1544 | case RUN_CONTAINER_TYPE_CODE: |
1545 | newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); |
1546 | if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { |
1547 | newit->current_value = val; |
1548 | } else { |
1549 | newit->current_value = |
1550 | newit->highbits | |
1551 | (((const run_container_t *)(newit->container))->runs[newit->run_index].value); |
1552 | } |
1553 | break; |
1554 | default: |
1555 | // if this ever happens, bug! |
1556 | assert(false); |
1557 | } // switch (typecode) |
1558 | return true; |
1559 | } |
1560 | |
1561 | void roaring_init_iterator(const roaring_bitmap_t *ra, |
1562 | roaring_uint32_iterator_t *newit) { |
1563 | newit->parent = ra; |
1564 | newit->container_index = 0; |
1565 | newit->has_value = loadfirstvalue(newit); |
1566 | } |
1567 | |
1568 | void roaring_init_iterator_last(const roaring_bitmap_t *ra, |
1569 | roaring_uint32_iterator_t *newit) { |
1570 | newit->parent = ra; |
1571 | newit->container_index = newit->parent->high_low_container.size - 1; |
1572 | newit->has_value = loadlastvalue(newit); |
1573 | } |
1574 | |
1575 | roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { |
1576 | roaring_uint32_iterator_t *newit = |
1577 | (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); |
1578 | if (newit == NULL) return NULL; |
1579 | roaring_init_iterator(ra, newit); |
1580 | return newit; |
1581 | } |
1582 | |
1583 | roaring_uint32_iterator_t *roaring_copy_uint32_iterator( |
1584 | const roaring_uint32_iterator_t *it) { |
1585 | roaring_uint32_iterator_t *newit = |
1586 | (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); |
1587 | memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); |
1588 | return newit; |
1589 | } |
1590 | |
1591 | bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { |
1592 | uint16_t hb = val >> 16; |
1593 | const int i = ra_get_index(& it->parent->high_low_container, hb); |
1594 | if (i >= 0) { |
1595 | uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); |
1596 | uint16_t lb = val & 0xFFFF; |
1597 | if(lowvalue < lb ) { |
1598 | it->container_index = i+1; // will have to load first value of next container |
1599 | } else {// the value is necessarily within the range of the container |
1600 | it->container_index = i; |
1601 | it->has_value = loadfirstvalue_largeorequal(it, val); |
1602 | return it->has_value; |
1603 | } |
1604 | } else { |
1605 | // there is no matching, so we are going for the next container |
1606 | it->container_index = -i-1; |
1607 | } |
1608 | it->has_value = loadfirstvalue(it); |
1609 | return it->has_value; |
1610 | } |
1611 | |
1612 | |
1613 | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { |
1614 | if (it->container_index >= it->parent->high_low_container.size) { |
1615 | return (it->has_value = false); |
1616 | } |
1617 | if (it->container_index < 0) { |
1618 | it->container_index = 0; |
1619 | return (it->has_value = loadfirstvalue(it)); |
1620 | } |
1621 | |
1622 | uint32_t wordindex; // used for bitsets |
1623 | uint64_t word; // used for bitsets |
1624 | switch (it->typecode) { |
1625 | case BITSET_CONTAINER_TYPE_CODE: |
1626 | it->in_container_index++; |
1627 | wordindex = it->in_container_index / 64; |
1628 | if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; |
1629 | word = ((const bitset_container_t *)(it->container)) |
1630 | ->array[wordindex] & |
1631 | (UINT64_MAX << (it->in_container_index % 64)); |
1632 | // next part could be optimized/simplified |
1633 | while ((word == 0) && |
1634 | (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { |
1635 | wordindex++; |
1636 | word = ((const bitset_container_t *)(it->container)) |
1637 | ->array[wordindex]; |
1638 | } |
1639 | if (word != 0) { |
1640 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
1641 | it->current_value = it->highbits | it->in_container_index; |
1642 | return (it->has_value = true); |
1643 | } |
1644 | break; |
1645 | case ARRAY_CONTAINER_TYPE_CODE: |
1646 | it->in_container_index++; |
1647 | if (it->in_container_index < |
1648 | ((const array_container_t *)(it->container))->cardinality) { |
1649 | it->current_value = it->highbits | |
1650 | ((const array_container_t *)(it->container)) |
1651 | ->array[it->in_container_index]; |
1652 | return (it->has_value = true); |
1653 | } |
1654 | break; |
1655 | case RUN_CONTAINER_TYPE_CODE: { |
1656 | if(it->current_value == UINT32_MAX) { |
1657 | return (it->has_value = false); // without this, we risk an overflow to zero |
1658 | } |
1659 | |
1660 | const run_container_t* run_container = (const run_container_t*)it->container; |
1661 | if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value + |
1662 | run_container->runs[it->run_index].length))) { |
1663 | return (it->has_value = true); |
1664 | } |
1665 | |
1666 | if (++it->run_index < run_container->n_runs) { |
1667 | // Assume the run has a value |
1668 | it->current_value = it->highbits | run_container->runs[it->run_index].value; |
1669 | return (it->has_value = true); |
1670 | } |
1671 | break; |
1672 | } |
1673 | default: |
1674 | // if this ever happens, bug! |
1675 | assert(false); |
1676 | } // switch (typecode) |
1677 | // moving to next container |
1678 | it->container_index++; |
1679 | return (it->has_value = loadfirstvalue(it)); |
1680 | } |
1681 | |
1682 | bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { |
1683 | if (it->container_index < 0) { |
1684 | return (it->has_value = false); |
1685 | } |
1686 | if (it->container_index >= it->parent->high_low_container.size) { |
1687 | it->container_index = it->parent->high_low_container.size - 1; |
1688 | return (it->has_value = loadlastvalue(it)); |
1689 | } |
1690 | |
1691 | switch (it->typecode) { |
1692 | case BITSET_CONTAINER_TYPE_CODE: { |
1693 | if (--it->in_container_index < 0) |
1694 | break; |
1695 | |
1696 | const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; |
1697 | int32_t wordindex = it->in_container_index / 64; |
1698 | uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); |
1699 | |
1700 | while (word == 0 && --wordindex >= 0) { |
1701 | word = bitset_container->array[wordindex]; |
1702 | } |
1703 | if (word == 0) |
1704 | break; |
1705 | |
1706 | int num_leading_zeros = __builtin_clzll(word); |
1707 | it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); |
1708 | it->current_value = it->highbits | it->in_container_index; |
1709 | return (it->has_value = true); |
1710 | } |
1711 | case ARRAY_CONTAINER_TYPE_CODE: { |
1712 | if (--it->in_container_index < 0) |
1713 | break; |
1714 | |
1715 | const array_container_t* array_container = (const array_container_t*)it->container; |
1716 | it->current_value = it->highbits | array_container->array[it->in_container_index]; |
1717 | return (it->has_value = true); |
1718 | } |
1719 | case RUN_CONTAINER_TYPE_CODE: { |
1720 | if(it->current_value == 0) |
1721 | return (it->has_value = false); |
1722 | |
1723 | const run_container_t* run_container = (const run_container_t*)it->container; |
1724 | if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { |
1725 | return (it->has_value = true); |
1726 | } |
1727 | |
1728 | if (--it->run_index < 0) |
1729 | break; |
1730 | |
1731 | it->current_value = it->highbits | (run_container->runs[it->run_index].value + |
1732 | run_container->runs[it->run_index].length); |
1733 | return (it->has_value = true); |
1734 | } |
1735 | default: |
1736 | // if this ever happens, bug! |
1737 | assert(false); |
1738 | } // switch (typecode) |
1739 | |
1740 | // moving to previous container |
1741 | it->container_index--; |
1742 | return (it->has_value = loadlastvalue(it)); |
1743 | } |
1744 | |
1745 | uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { |
1746 | uint32_t ret = 0; |
1747 | uint32_t num_values; |
1748 | uint32_t wordindex; // used for bitsets |
1749 | uint64_t word; // used for bitsets |
1750 | const array_container_t* acont; //TODO remove |
1751 | const run_container_t* rcont; //TODO remove |
1752 | const bitset_container_t* bcont; //TODO remove |
1753 | |
1754 | while (it->has_value && ret < count) { |
1755 | switch (it->typecode) { |
1756 | case BITSET_CONTAINER_TYPE_CODE: |
1757 | bcont = (const bitset_container_t*)(it->container); |
1758 | wordindex = it->in_container_index / 64; |
1759 | word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); |
1760 | do { |
1761 | while (word != 0 && ret < count) { |
1762 | buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); |
1763 | word = word & (word - 1); |
1764 | buf++; |
1765 | ret++; |
1766 | } |
1767 | while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { |
1768 | wordindex++; |
1769 | word = bcont->array[wordindex]; |
1770 | } |
1771 | } while (word != 0 && ret < count); |
1772 | it->has_value = (word != 0); |
1773 | if (it->has_value) { |
1774 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
1775 | it->current_value = it->highbits | it->in_container_index; |
1776 | } |
1777 | break; |
1778 | case ARRAY_CONTAINER_TYPE_CODE: |
1779 | acont = (const array_container_t *)(it->container); |
1780 | num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); |
1781 | for (uint32_t i = 0; i < num_values; i++) { |
1782 | buf[i] = it->highbits | acont->array[it->in_container_index + i]; |
1783 | } |
1784 | buf += num_values; |
1785 | ret += num_values; |
1786 | it->in_container_index += num_values; |
1787 | it->has_value = (it->in_container_index < acont->cardinality); |
1788 | if (it->has_value) { |
1789 | it->current_value = it->highbits | acont->array[it->in_container_index]; |
1790 | } |
1791 | break; |
1792 | case RUN_CONTAINER_TYPE_CODE: |
1793 | rcont = (const run_container_t*)(it->container); |
1794 | //"in_run_index" name is misleading, read it as "max_value_in_current_run" |
1795 | do { |
1796 | uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); |
1797 | num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); |
1798 | for (uint32_t i = 0; i < num_values; i++) { |
1799 | buf[i] = it->current_value + i; |
1800 | } |
1801 | it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 |
1802 | buf += num_values; |
1803 | ret += num_values; |
1804 | |
1805 | if (it->current_value > largest_run_value || it->current_value == 0) { |
1806 | it->run_index++; |
1807 | if (it->run_index < rcont->n_runs) { |
1808 | it->current_value = it->highbits | rcont->runs[it->run_index].value; |
1809 | } else { |
1810 | it->has_value = false; |
1811 | } |
1812 | } |
1813 | } while ((ret < count) && it->has_value); |
1814 | break; |
1815 | default: |
1816 | assert(false); |
1817 | } |
1818 | if (it->has_value) { |
1819 | assert(ret == count); |
1820 | return ret; |
1821 | } |
1822 | it->container_index++; |
1823 | it->has_value = loadfirstvalue(it); |
1824 | } |
1825 | return ret; |
1826 | } |
1827 | |
1828 | |
1829 | |
1830 | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } |
1831 | |
1832 | /**** |
1833 | * end of roaring_uint32_iterator_t |
1834 | *****/ |
1835 | |
1836 | bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, |
1837 | const roaring_bitmap_t *ra2) { |
1838 | if (ra1->high_low_container.size != ra2->high_low_container.size) { |
1839 | return false; |
1840 | } |
1841 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
1842 | if (ra1->high_low_container.keys[i] != |
1843 | ra2->high_low_container.keys[i]) { |
1844 | return false; |
1845 | } |
1846 | } |
1847 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
1848 | bool areequal = container_equals(ra1->high_low_container.containers[i], |
1849 | ra1->high_low_container.typecodes[i], |
1850 | ra2->high_low_container.containers[i], |
1851 | ra2->high_low_container.typecodes[i]); |
1852 | if (!areequal) { |
1853 | return false; |
1854 | } |
1855 | } |
1856 | return true; |
1857 | } |
1858 | |
1859 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, |
1860 | const roaring_bitmap_t *ra2) { |
1861 | const int length1 = ra1->high_low_container.size, |
1862 | length2 = ra2->high_low_container.size; |
1863 | |
1864 | int pos1 = 0, pos2 = 0; |
1865 | |
1866 | while (pos1 < length1 && pos2 < length2) { |
1867 | const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); |
1868 | const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); |
1869 | |
1870 | if (s1 == s2) { |
1871 | uint8_t container_type_1, container_type_2; |
1872 | void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, |
1873 | &container_type_1); |
1874 | void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, |
1875 | &container_type_2); |
1876 | bool subset = |
1877 | container_is_subset(c1, container_type_1, c2, container_type_2); |
1878 | if (!subset) return false; |
1879 | ++pos1; |
1880 | ++pos2; |
1881 | } else if (s1 < s2) { // s1 < s2 |
1882 | return false; |
1883 | } else { // s1 > s2 |
1884 | pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); |
1885 | } |
1886 | } |
1887 | if (pos1 == length1) |
1888 | return true; |
1889 | else |
1890 | return false; |
1891 | } |
1892 | |
1893 | static void insert_flipped_container(roaring_array_t *ans_arr, |
1894 | const roaring_array_t *x1_arr, uint16_t hb, |
1895 | uint16_t lb_start, uint16_t lb_end) { |
1896 | const int i = ra_get_index(x1_arr, hb); |
1897 | const int j = ra_get_index(ans_arr, hb); |
1898 | uint8_t ctype_in, ctype_out; |
1899 | void *flipped_container = NULL; |
1900 | if (i >= 0) { |
1901 | void *container_to_flip = |
1902 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
1903 | flipped_container = |
1904 | container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, |
1905 | (uint32_t)(lb_end + 1), &ctype_out); |
1906 | |
1907 | if (container_get_cardinality(flipped_container, ctype_out)) |
1908 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
1909 | ctype_out); |
1910 | else { |
1911 | container_free(flipped_container, ctype_out); |
1912 | } |
1913 | } else { |
1914 | flipped_container = container_range_of_ones( |
1915 | (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); |
1916 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
1917 | ctype_out); |
1918 | } |
1919 | } |
1920 | |
1921 | static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, |
1922 | uint16_t lb_start, uint16_t lb_end) { |
1923 | const int i = ra_get_index(x1_arr, hb); |
1924 | uint8_t ctype_in, ctype_out; |
1925 | void *flipped_container = NULL; |
1926 | if (i >= 0) { |
1927 | void *container_to_flip = |
1928 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
1929 | flipped_container = container_inot_range( |
1930 | container_to_flip, ctype_in, (uint32_t)lb_start, |
1931 | (uint32_t)(lb_end + 1), &ctype_out); |
1932 | // if a new container was created, the old one was already freed |
1933 | if (container_get_cardinality(flipped_container, ctype_out)) { |
1934 | ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); |
1935 | } else { |
1936 | container_free(flipped_container, ctype_out); |
1937 | ra_remove_at_index(x1_arr, i); |
1938 | } |
1939 | |
1940 | } else { |
1941 | flipped_container = container_range_of_ones( |
1942 | (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); |
1943 | ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, |
1944 | ctype_out); |
1945 | } |
1946 | } |
1947 | |
1948 | static void insert_fully_flipped_container(roaring_array_t *ans_arr, |
1949 | const roaring_array_t *x1_arr, |
1950 | uint16_t hb) { |
1951 | const int i = ra_get_index(x1_arr, hb); |
1952 | const int j = ra_get_index(ans_arr, hb); |
1953 | uint8_t ctype_in, ctype_out; |
1954 | void *flipped_container = NULL; |
1955 | if (i >= 0) { |
1956 | void *container_to_flip = |
1957 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
1958 | flipped_container = |
1959 | container_not(container_to_flip, ctype_in, &ctype_out); |
1960 | if (container_get_cardinality(flipped_container, ctype_out)) |
1961 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
1962 | ctype_out); |
1963 | else { |
1964 | container_free(flipped_container, ctype_out); |
1965 | } |
1966 | } else { |
1967 | flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); |
1968 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
1969 | ctype_out); |
1970 | } |
1971 | } |
1972 | |
1973 | static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { |
1974 | const int i = ra_get_index(x1_arr, hb); |
1975 | uint8_t ctype_in, ctype_out; |
1976 | void *flipped_container = NULL; |
1977 | if (i >= 0) { |
1978 | void *container_to_flip = |
1979 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
1980 | flipped_container = |
1981 | container_inot(container_to_flip, ctype_in, &ctype_out); |
1982 | |
1983 | if (container_get_cardinality(flipped_container, ctype_out)) { |
1984 | ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); |
1985 | } else { |
1986 | container_free(flipped_container, ctype_out); |
1987 | ra_remove_at_index(x1_arr, i); |
1988 | } |
1989 | |
1990 | } else { |
1991 | flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); |
1992 | ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, |
1993 | ctype_out); |
1994 | } |
1995 | } |
1996 | |
1997 | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, |
1998 | uint64_t range_start, |
1999 | uint64_t range_end) { |
2000 | if (range_start >= range_end) { |
2001 | return roaring_bitmap_copy(x1); |
2002 | } |
2003 | if(range_end >= UINT64_C(0x100000000)) { |
2004 | range_end = UINT64_C(0x100000000); |
2005 | } |
2006 | |
2007 | roaring_bitmap_t *ans = roaring_bitmap_create(); |
2008 | roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); |
2009 | |
2010 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
2011 | const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; |
2012 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
2013 | const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; |
2014 | |
2015 | ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, |
2016 | hb_start, is_cow(x1)); |
2017 | if (hb_start == hb_end) { |
2018 | insert_flipped_container(&ans->high_low_container, |
2019 | &x1->high_low_container, hb_start, lb_start, |
2020 | lb_end); |
2021 | } else { |
2022 | // start and end containers are distinct |
2023 | if (lb_start > 0) { |
2024 | // handle first (partial) container |
2025 | insert_flipped_container(&ans->high_low_container, |
2026 | &x1->high_low_container, hb_start, |
2027 | lb_start, 0xFFFF); |
2028 | ++hb_start; // for the full containers. Can't wrap. |
2029 | } |
2030 | |
2031 | if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block |
2032 | |
2033 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
2034 | insert_fully_flipped_container(&ans->high_low_container, |
2035 | &x1->high_low_container, hb); |
2036 | } |
2037 | |
2038 | // handle a partial final container |
2039 | if (lb_end != 0xFFFF) { |
2040 | insert_flipped_container(&ans->high_low_container, |
2041 | &x1->high_low_container, hb_end + 1, 0, |
2042 | lb_end); |
2043 | ++hb_end; |
2044 | } |
2045 | } |
2046 | ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, |
2047 | hb_end, is_cow(x1)); |
2048 | return ans; |
2049 | } |
2050 | |
2051 | void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, |
2052 | uint64_t range_end) { |
2053 | if (range_start >= range_end) { |
2054 | return; // empty range |
2055 | } |
2056 | if(range_end >= UINT64_C(0x100000000)) { |
2057 | range_end = UINT64_C(0x100000000); |
2058 | } |
2059 | |
2060 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
2061 | const uint16_t lb_start = (uint16_t)range_start; |
2062 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
2063 | const uint16_t lb_end = (uint16_t)(range_end - 1); |
2064 | |
2065 | if (hb_start == hb_end) { |
2066 | inplace_flip_container(&x1->high_low_container, hb_start, lb_start, |
2067 | lb_end); |
2068 | } else { |
2069 | // start and end containers are distinct |
2070 | if (lb_start > 0) { |
2071 | // handle first (partial) container |
2072 | inplace_flip_container(&x1->high_low_container, hb_start, lb_start, |
2073 | 0xFFFF); |
2074 | ++hb_start; // for the full containers. Can't wrap. |
2075 | } |
2076 | |
2077 | if (lb_end != 0xFFFF) --hb_end; |
2078 | |
2079 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
2080 | inplace_fully_flip_container(&x1->high_low_container, hb); |
2081 | } |
2082 | // handle a partial final container |
2083 | if (lb_end != 0xFFFF) { |
2084 | inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, |
2085 | lb_end); |
2086 | ++hb_end; |
2087 | } |
2088 | } |
2089 | } |
2090 | |
2091 | roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, |
2092 | const roaring_bitmap_t *x2, |
2093 | const bool bitsetconversion) { |
2094 | uint8_t container_result_type = 0; |
2095 | const int length1 = x1->high_low_container.size, |
2096 | length2 = x2->high_low_container.size; |
2097 | if (0 == length1) { |
2098 | return roaring_bitmap_copy(x2); |
2099 | } |
2100 | if (0 == length2) { |
2101 | return roaring_bitmap_copy(x1); |
2102 | } |
2103 | roaring_bitmap_t *answer = |
2104 | roaring_bitmap_create_with_capacity(length1 + length2); |
2105 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
2106 | int pos1 = 0, pos2 = 0; |
2107 | uint8_t container_type_1, container_type_2; |
2108 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2109 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2110 | while (true) { |
2111 | if (s1 == s2) { |
2112 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2113 | &container_type_1); |
2114 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2115 | &container_type_2); |
2116 | void *c; |
2117 | if (bitsetconversion && (get_container_type(c1, container_type_1) != |
2118 | BITSET_CONTAINER_TYPE_CODE) && |
2119 | (get_container_type(c2, container_type_2) != |
2120 | BITSET_CONTAINER_TYPE_CODE)) { |
2121 | void *newc1 = |
2122 | container_mutable_unwrap_shared(c1, &container_type_1); |
2123 | newc1 = container_to_bitset(newc1, container_type_1); |
2124 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
2125 | c = container_lazy_ior(newc1, container_type_1, c2, |
2126 | container_type_2, |
2127 | &container_result_type); |
2128 | if (c != newc1) { // should not happen |
2129 | container_free(newc1, container_type_1); |
2130 | } |
2131 | } else { |
2132 | c = container_lazy_or(c1, container_type_1, c2, |
2133 | container_type_2, &container_result_type); |
2134 | } |
2135 | // since we assume that the initial containers are non-empty, |
2136 | // the |
2137 | // result here |
2138 | // can only be non-empty |
2139 | ra_append(&answer->high_low_container, s1, c, |
2140 | container_result_type); |
2141 | ++pos1; |
2142 | ++pos2; |
2143 | if (pos1 == length1) break; |
2144 | if (pos2 == length2) break; |
2145 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2146 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2147 | |
2148 | } else if (s1 < s2) { // s1 < s2 |
2149 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2150 | &container_type_1); |
2151 | c1 = |
2152 | get_copy_of_container(c1, &container_type_1, is_cow(x1)); |
2153 | if (is_cow(x1)) { |
2154 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
2155 | container_type_1); |
2156 | } |
2157 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
2158 | pos1++; |
2159 | if (pos1 == length1) break; |
2160 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2161 | |
2162 | } else { // s1 > s2 |
2163 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2164 | &container_type_2); |
2165 | c2 = |
2166 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
2167 | if (is_cow(x2)) { |
2168 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
2169 | container_type_2); |
2170 | } |
2171 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
2172 | pos2++; |
2173 | if (pos2 == length2) break; |
2174 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2175 | } |
2176 | } |
2177 | if (pos1 == length1) { |
2178 | ra_append_copy_range(&answer->high_low_container, |
2179 | &x2->high_low_container, pos2, length2, |
2180 | is_cow(x2)); |
2181 | } else if (pos2 == length2) { |
2182 | ra_append_copy_range(&answer->high_low_container, |
2183 | &x1->high_low_container, pos1, length1, |
2184 | is_cow(x1)); |
2185 | } |
2186 | return answer; |
2187 | } |
2188 | |
2189 | void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, |
2190 | const roaring_bitmap_t *x2, |
2191 | const bool bitsetconversion) { |
2192 | uint8_t container_result_type = 0; |
2193 | int length1 = x1->high_low_container.size; |
2194 | const int length2 = x2->high_low_container.size; |
2195 | |
2196 | if (0 == length2) return; |
2197 | |
2198 | if (0 == length1) { |
2199 | roaring_bitmap_overwrite(x1, x2); |
2200 | return; |
2201 | } |
2202 | int pos1 = 0, pos2 = 0; |
2203 | uint8_t container_type_1, container_type_2; |
2204 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2205 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2206 | while (true) { |
2207 | if (s1 == s2) { |
2208 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2209 | &container_type_1); |
2210 | if (!container_is_full(c1, container_type_1)) { |
2211 | if ((bitsetconversion == false) || |
2212 | (get_container_type(c1, container_type_1) == |
2213 | BITSET_CONTAINER_TYPE_CODE)) { |
2214 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
2215 | } else { |
2216 | // convert to bitset |
2217 | void *oldc1 = c1; |
2218 | uint8_t oldt1 = container_type_1; |
2219 | c1 = container_mutable_unwrap_shared(c1, &container_type_1); |
2220 | c1 = container_to_bitset(c1, container_type_1); |
2221 | container_free(oldc1, oldt1); |
2222 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
2223 | } |
2224 | |
2225 | void *c2 = ra_get_container_at_index(&x2->high_low_container, |
2226 | pos2, &container_type_2); |
2227 | void *c = container_lazy_ior(c1, container_type_1, c2, |
2228 | container_type_2, |
2229 | &container_result_type); |
2230 | if (c != |
2231 | c1) { // in this instance a new container was created, and |
2232 | // we need to free the old one |
2233 | container_free(c1, container_type_1); |
2234 | } |
2235 | |
2236 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
2237 | container_result_type); |
2238 | } |
2239 | ++pos1; |
2240 | ++pos2; |
2241 | if (pos1 == length1) break; |
2242 | if (pos2 == length2) break; |
2243 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2244 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2245 | |
2246 | } else if (s1 < s2) { // s1 < s2 |
2247 | pos1++; |
2248 | if (pos1 == length1) break; |
2249 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2250 | |
2251 | } else { // s1 > s2 |
2252 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2253 | &container_type_2); |
2254 | // void *c2_clone = container_clone(c2, container_type_2); |
2255 | c2 = |
2256 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
2257 | if (is_cow(x2)) { |
2258 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
2259 | container_type_2); |
2260 | } |
2261 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
2262 | container_type_2); |
2263 | pos1++; |
2264 | length1++; |
2265 | pos2++; |
2266 | if (pos2 == length2) break; |
2267 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2268 | } |
2269 | } |
2270 | if (pos1 == length1) { |
2271 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
2272 | pos2, length2, is_cow(x2)); |
2273 | } |
2274 | } |
2275 | |
2276 | roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, |
2277 | const roaring_bitmap_t *x2) { |
2278 | uint8_t container_result_type = 0; |
2279 | const int length1 = x1->high_low_container.size, |
2280 | length2 = x2->high_low_container.size; |
2281 | if (0 == length1) { |
2282 | return roaring_bitmap_copy(x2); |
2283 | } |
2284 | if (0 == length2) { |
2285 | return roaring_bitmap_copy(x1); |
2286 | } |
2287 | roaring_bitmap_t *answer = |
2288 | roaring_bitmap_create_with_capacity(length1 + length2); |
2289 | roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); |
2290 | int pos1 = 0, pos2 = 0; |
2291 | uint8_t container_type_1, container_type_2; |
2292 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2293 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2294 | while (true) { |
2295 | if (s1 == s2) { |
2296 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2297 | &container_type_1); |
2298 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2299 | &container_type_2); |
2300 | void *c = |
2301 | container_lazy_xor(c1, container_type_1, c2, container_type_2, |
2302 | &container_result_type); |
2303 | |
2304 | if (container_nonzero_cardinality(c, container_result_type)) { |
2305 | ra_append(&answer->high_low_container, s1, c, |
2306 | container_result_type); |
2307 | } else { |
2308 | container_free(c, container_result_type); |
2309 | } |
2310 | |
2311 | ++pos1; |
2312 | ++pos2; |
2313 | if (pos1 == length1) break; |
2314 | if (pos2 == length2) break; |
2315 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2316 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2317 | |
2318 | } else if (s1 < s2) { // s1 < s2 |
2319 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2320 | &container_type_1); |
2321 | c1 = |
2322 | get_copy_of_container(c1, &container_type_1, is_cow(x1)); |
2323 | if (is_cow(x1)) { |
2324 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
2325 | container_type_1); |
2326 | } |
2327 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
2328 | pos1++; |
2329 | if (pos1 == length1) break; |
2330 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2331 | |
2332 | } else { // s1 > s2 |
2333 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2334 | &container_type_2); |
2335 | c2 = |
2336 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
2337 | if (is_cow(x2)) { |
2338 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
2339 | container_type_2); |
2340 | } |
2341 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
2342 | pos2++; |
2343 | if (pos2 == length2) break; |
2344 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2345 | } |
2346 | } |
2347 | if (pos1 == length1) { |
2348 | ra_append_copy_range(&answer->high_low_container, |
2349 | &x2->high_low_container, pos2, length2, |
2350 | is_cow(x2)); |
2351 | } else if (pos2 == length2) { |
2352 | ra_append_copy_range(&answer->high_low_container, |
2353 | &x1->high_low_container, pos1, length1, |
2354 | is_cow(x1)); |
2355 | } |
2356 | return answer; |
2357 | } |
2358 | |
2359 | void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, |
2360 | const roaring_bitmap_t *x2) { |
2361 | assert(x1 != x2); |
2362 | uint8_t container_result_type = 0; |
2363 | int length1 = x1->high_low_container.size; |
2364 | const int length2 = x2->high_low_container.size; |
2365 | |
2366 | if (0 == length2) return; |
2367 | |
2368 | if (0 == length1) { |
2369 | roaring_bitmap_overwrite(x1, x2); |
2370 | return; |
2371 | } |
2372 | int pos1 = 0, pos2 = 0; |
2373 | uint8_t container_type_1, container_type_2; |
2374 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2375 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2376 | while (true) { |
2377 | if (s1 == s2) { |
2378 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2379 | &container_type_1); |
2380 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
2381 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2382 | &container_type_2); |
2383 | void *c = |
2384 | container_lazy_ixor(c1, container_type_1, c2, container_type_2, |
2385 | &container_result_type); |
2386 | if (container_nonzero_cardinality(c, container_result_type)) { |
2387 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
2388 | container_result_type); |
2389 | ++pos1; |
2390 | } else { |
2391 | container_free(c, container_result_type); |
2392 | ra_remove_at_index(&x1->high_low_container, pos1); |
2393 | --length1; |
2394 | } |
2395 | ++pos2; |
2396 | if (pos1 == length1) break; |
2397 | if (pos2 == length2) break; |
2398 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2399 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2400 | |
2401 | } else if (s1 < s2) { // s1 < s2 |
2402 | pos1++; |
2403 | if (pos1 == length1) break; |
2404 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2405 | |
2406 | } else { // s1 > s2 |
2407 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2408 | &container_type_2); |
2409 | // void *c2_clone = container_clone(c2, container_type_2); |
2410 | c2 = |
2411 | get_copy_of_container(c2, &container_type_2, is_cow(x2)); |
2412 | if (is_cow(x2)) { |
2413 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
2414 | container_type_2); |
2415 | } |
2416 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
2417 | container_type_2); |
2418 | pos1++; |
2419 | length1++; |
2420 | pos2++; |
2421 | if (pos2 == length2) break; |
2422 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2423 | } |
2424 | } |
2425 | if (pos1 == length1) { |
2426 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
2427 | pos2, length2, is_cow(x2)); |
2428 | } |
2429 | } |
2430 | |
2431 | void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { |
2432 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
2433 | const uint8_t original_typecode = ra->high_low_container.typecodes[i]; |
2434 | void *container = ra->high_low_container.containers[i]; |
2435 | uint8_t new_typecode = original_typecode; |
2436 | void *newcontainer = |
2437 | container_repair_after_lazy(container, &new_typecode); |
2438 | ra->high_low_container.containers[i] = newcontainer; |
2439 | ra->high_low_container.typecodes[i] = new_typecode; |
2440 | } |
2441 | } |
2442 | |
2443 | |
2444 | |
2445 | /** |
2446 | * roaring_bitmap_rank returns the number of integers that are smaller or equal |
2447 | * to x. |
2448 | */ |
2449 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { |
2450 | uint64_t size = 0; |
2451 | uint32_t xhigh = x >> 16; |
2452 | for (int i = 0; i < bm->high_low_container.size; i++) { |
2453 | uint32_t key = bm->high_low_container.keys[i]; |
2454 | if (xhigh > key) { |
2455 | size += |
2456 | container_get_cardinality(bm->high_low_container.containers[i], |
2457 | bm->high_low_container.typecodes[i]); |
2458 | } else if (xhigh == key) { |
2459 | return size + container_rank(bm->high_low_container.containers[i], |
2460 | bm->high_low_container.typecodes[i], |
2461 | x & 0xFFFF); |
2462 | } else { |
2463 | return size; |
2464 | } |
2465 | } |
2466 | return size; |
2467 | } |
2468 | |
2469 | /** |
2470 | * roaring_bitmap_smallest returns the smallest value in the set. |
2471 | * Returns UINT32_MAX if the set is empty. |
2472 | */ |
2473 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { |
2474 | if (bm->high_low_container.size > 0) { |
2475 | void *container = bm->high_low_container.containers[0]; |
2476 | uint8_t typecode = bm->high_low_container.typecodes[0]; |
2477 | uint32_t key = bm->high_low_container.keys[0]; |
2478 | uint32_t lowvalue = container_minimum(container, typecode); |
2479 | return lowvalue | (key << 16); |
2480 | } |
2481 | return UINT32_MAX; |
2482 | } |
2483 | |
2484 | /** |
2485 | * roaring_bitmap_smallest returns the greatest value in the set. |
2486 | * Returns 0 if the set is empty. |
2487 | */ |
2488 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { |
2489 | if (bm->high_low_container.size > 0) { |
2490 | void *container = |
2491 | bm->high_low_container.containers[bm->high_low_container.size - 1]; |
2492 | uint8_t typecode = |
2493 | bm->high_low_container.typecodes[bm->high_low_container.size - 1]; |
2494 | uint32_t key = |
2495 | bm->high_low_container.keys[bm->high_low_container.size - 1]; |
2496 | uint32_t lowvalue = container_maximum(container, typecode); |
2497 | return lowvalue | (key << 16); |
2498 | } |
2499 | return 0; |
2500 | } |
2501 | |
2502 | bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, |
2503 | uint32_t *element) { |
2504 | void *container; |
2505 | uint8_t typecode; |
2506 | uint16_t key; |
2507 | uint32_t start_rank = 0; |
2508 | int i = 0; |
2509 | bool valid = false; |
2510 | while (!valid && i < bm->high_low_container.size) { |
2511 | container = bm->high_low_container.containers[i]; |
2512 | typecode = bm->high_low_container.typecodes[i]; |
2513 | valid = |
2514 | container_select(container, typecode, &start_rank, rank, element); |
2515 | i++; |
2516 | } |
2517 | |
2518 | if (valid) { |
2519 | key = bm->high_low_container.keys[i - 1]; |
2520 | *element |= (key << 16); |
2521 | return true; |
2522 | } else |
2523 | return false; |
2524 | } |
2525 | |
2526 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, |
2527 | const roaring_bitmap_t *x2) { |
2528 | const int length1 = x1->high_low_container.size, |
2529 | length2 = x2->high_low_container.size; |
2530 | uint64_t answer = 0; |
2531 | int pos1 = 0, pos2 = 0; |
2532 | |
2533 | while (pos1 < length1 && pos2 < length2) { |
2534 | const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); |
2535 | const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); |
2536 | |
2537 | if (s1 == s2) { |
2538 | uint8_t container_type_1, container_type_2; |
2539 | void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, |
2540 | &container_type_1); |
2541 | void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, |
2542 | &container_type_2); |
2543 | if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; |
2544 | ++pos1; |
2545 | ++pos2; |
2546 | } else if (s1 < s2) { // s1 < s2 |
2547 | pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); |
2548 | } else { // s1 > s2 |
2549 | pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); |
2550 | } |
2551 | } |
2552 | return answer; |
2553 | } |
2554 | |
2555 | |
2556 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, |
2557 | const roaring_bitmap_t *x2) { |
2558 | const int length1 = x1->high_low_container.size, |
2559 | length2 = x2->high_low_container.size; |
2560 | uint64_t answer = 0; |
2561 | int pos1 = 0, pos2 = 0; |
2562 | |
2563 | while (pos1 < length1 && pos2 < length2) { |
2564 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
2565 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
2566 | |
2567 | if (s1 == s2) { |
2568 | uint8_t container_type_1, container_type_2; |
2569 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
2570 | &container_type_1); |
2571 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
2572 | &container_type_2); |
2573 | answer += container_and_cardinality(c1, container_type_1, c2, |
2574 | container_type_2); |
2575 | ++pos1; |
2576 | ++pos2; |
2577 | } else if (s1 < s2) { // s1 < s2 |
2578 | pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); |
2579 | } else { // s1 > s2 |
2580 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
2581 | } |
2582 | } |
2583 | return answer; |
2584 | } |
2585 | |
2586 | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, |
2587 | const roaring_bitmap_t *x2) { |
2588 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
2589 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
2590 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
2591 | return (double)inter / (double)(c1 + c2 - inter); |
2592 | } |
2593 | |
2594 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, |
2595 | const roaring_bitmap_t *x2) { |
2596 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
2597 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
2598 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
2599 | return c1 + c2 - inter; |
2600 | } |
2601 | |
2602 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, |
2603 | const roaring_bitmap_t *x2) { |
2604 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
2605 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
2606 | return c1 - inter; |
2607 | } |
2608 | |
2609 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, |
2610 | const roaring_bitmap_t *x2) { |
2611 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
2612 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
2613 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
2614 | return c1 + c2 - 2 * inter; |
2615 | } |
2616 | |
2617 | |
2618 | /** |
2619 | * Check whether a range of values from range_start (included) to range_end (excluded) is present |
2620 | */ |
2621 | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { |
2622 | if(range_end >= UINT64_C(0x100000000)) { |
2623 | range_end = UINT64_C(0x100000000); |
2624 | } |
2625 | if (range_start >= range_end) return true; // empty range are always contained! |
2626 | if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); |
2627 | uint16_t hb_rs = (uint16_t)(range_start >> 16); |
2628 | uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); |
2629 | const int32_t span = hb_re - hb_rs; |
2630 | const int32_t hlc_sz = ra_get_size(&r->high_low_container); |
2631 | if (hlc_sz < span + 1) { |
2632 | return false; |
2633 | } |
2634 | int32_t is = ra_get_index(&r->high_low_container, hb_rs); |
2635 | int32_t ie = ra_get_index(&r->high_low_container, hb_re); |
2636 | ie = (ie < 0 ? -ie - 1 : ie); |
2637 | if ((is < 0) || ((ie - is) != span)) { |
2638 | return false; |
2639 | } |
2640 | const uint32_t lb_rs = range_start & 0xFFFF; |
2641 | const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; |
2642 | uint8_t typecode; |
2643 | void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); |
2644 | if (hb_rs == hb_re) { |
2645 | return container_contains_range(container, lb_rs, lb_re, typecode); |
2646 | } |
2647 | if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { |
2648 | return false; |
2649 | } |
2650 | assert(ie < hlc_sz); // would indicate an algorithmic bug |
2651 | container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); |
2652 | if (!container_contains_range(container, 0, lb_re, typecode)) { |
2653 | return false; |
2654 | } |
2655 | for (int32_t i = is + 1; i < ie; ++i) { |
2656 | container = ra_get_container_at_index(&r->high_low_container, i, &typecode); |
2657 | if (!container_is_full(container, typecode) ) { |
2658 | return false; |
2659 | } |
2660 | } |
2661 | return true; |
2662 | } |
2663 | |
2664 | |
2665 | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, |
2666 | const roaring_bitmap_t *ra2) { |
2667 | return (roaring_bitmap_get_cardinality(ra2) > |
2668 | roaring_bitmap_get_cardinality(ra1) && |
2669 | roaring_bitmap_is_subset(ra1, ra2)); |
2670 | } |
2671 | |
2672 | |
2673 | /* |
2674 | * FROZEN SERIALIZATION FORMAT DESCRIPTION |
2675 | * |
2676 | * -- (beginning must be aligned by 32 bytes) -- |
2677 | * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers] |
2678 | * <run_data> rle16_t[total number of rle elements in all run containers] |
2679 | * <array_data> uint16_t[total number of array elements in all array containers] |
2680 | * <keys> uint16_t[num_containers] |
2681 | * <counts> uint16_t[num_containers] |
2682 | * <typecodes> uint8_t[num_containers] |
2683 | * <header> uint32_t |
2684 | * |
2685 | * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) |
2686 | * and the number of containers (17 bits). |
2687 | * |
2688 | * <counts> stores number of elements for every container. |
2689 | * Its meaning depends on container type. |
2690 | * For array and bitset containers, this value is the container cardinality minus one. |
2691 | * For run container, it is the number of rle_t elements (n_runs). |
2692 | * |
2693 | * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of |
2694 | * all containers of respective type. |
2695 | * |
2696 | * <*_data> and <keys> are kept close together because they are not accessed |
2697 | * during deserilization. This may reduce IO in case of large mmaped bitmaps. |
2698 | * All members have their native alignments during deserilization except <header>, |
2699 | * which is not guaranteed to be aligned by 4 bytes. |
2700 | */ |
2701 | |
2702 | size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { |
2703 | const roaring_array_t *ra = &rb->high_low_container; |
2704 | size_t num_bytes = 0; |
2705 | for (int32_t i = 0; i < ra->size; i++) { |
2706 | switch (ra->typecodes[i]) { |
2707 | case BITSET_CONTAINER_TYPE_CODE: { |
2708 | num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
2709 | break; |
2710 | } |
2711 | case RUN_CONTAINER_TYPE_CODE: { |
2712 | const run_container_t *run = |
2713 | (const run_container_t *) ra->containers[i]; |
2714 | num_bytes += run->n_runs * sizeof(rle16_t); |
2715 | break; |
2716 | } |
2717 | case ARRAY_CONTAINER_TYPE_CODE: { |
2718 | const array_container_t *array = |
2719 | (const array_container_t *) ra->containers[i]; |
2720 | num_bytes += array->cardinality * sizeof(uint16_t); |
2721 | break; |
2722 | } |
2723 | default: |
2724 | __builtin_unreachable(); |
2725 | } |
2726 | } |
2727 | num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes |
2728 | num_bytes += 4; // header |
2729 | return num_bytes; |
2730 | } |
2731 | |
2732 | inline static void *arena_alloc(char **arena, size_t num_bytes) { |
2733 | char *res = *arena; |
2734 | *arena += num_bytes; |
2735 | return res; |
2736 | } |
2737 | |
2738 | void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { |
2739 | /* |
2740 | * Note: we do not require user to supply spicificly aligned buffer. |
2741 | * Thus we have to use memcpy() everywhere. |
2742 | */ |
2743 | |
2744 | const roaring_array_t *ra = &rb->high_low_container; |
2745 | |
2746 | size_t bitset_zone_size = 0; |
2747 | size_t run_zone_size = 0; |
2748 | size_t array_zone_size = 0; |
2749 | for (int32_t i = 0; i < ra->size; i++) { |
2750 | switch (ra->typecodes[i]) { |
2751 | case BITSET_CONTAINER_TYPE_CODE: { |
2752 | bitset_zone_size += |
2753 | BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
2754 | break; |
2755 | } |
2756 | case RUN_CONTAINER_TYPE_CODE: { |
2757 | const run_container_t *run = |
2758 | (const run_container_t *) ra->containers[i]; |
2759 | run_zone_size += run->n_runs * sizeof(rle16_t); |
2760 | break; |
2761 | } |
2762 | case ARRAY_CONTAINER_TYPE_CODE: { |
2763 | const array_container_t *array = |
2764 | (const array_container_t *) ra->containers[i]; |
2765 | array_zone_size += array->cardinality * sizeof(uint16_t); |
2766 | break; |
2767 | } |
2768 | default: |
2769 | __builtin_unreachable(); |
2770 | } |
2771 | } |
2772 | |
2773 | uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); |
2774 | rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); |
2775 | uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); |
2776 | uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); |
2777 | uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); |
2778 | uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); |
2779 | uint32_t * = (uint32_t *)arena_alloc(&buf, 4); |
2780 | |
2781 | for (int32_t i = 0; i < ra->size; i++) { |
2782 | uint16_t count; |
2783 | switch (ra->typecodes[i]) { |
2784 | case BITSET_CONTAINER_TYPE_CODE: { |
2785 | const bitset_container_t *bitset = |
2786 | (const bitset_container_t *) ra->containers[i]; |
2787 | memcpy(bitset_zone, bitset->array, |
2788 | BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
2789 | bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; |
2790 | if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
2791 | count = bitset->cardinality - 1; |
2792 | } else { |
2793 | count = bitset_container_compute_cardinality(bitset) - 1; |
2794 | } |
2795 | break; |
2796 | } |
2797 | case RUN_CONTAINER_TYPE_CODE: { |
2798 | const run_container_t *run = |
2799 | (const run_container_t *) ra->containers[i]; |
2800 | size_t num_bytes = run->n_runs * sizeof(rle16_t); |
2801 | memcpy(run_zone, run->runs, num_bytes); |
2802 | run_zone += run->n_runs; |
2803 | count = run->n_runs; |
2804 | break; |
2805 | } |
2806 | case ARRAY_CONTAINER_TYPE_CODE: { |
2807 | const array_container_t *array = |
2808 | (const array_container_t *) ra->containers[i]; |
2809 | size_t num_bytes = array->cardinality * sizeof(uint16_t); |
2810 | memcpy(array_zone, array->array, num_bytes); |
2811 | array_zone += array->cardinality; |
2812 | count = array->cardinality - 1; |
2813 | break; |
2814 | } |
2815 | default: |
2816 | __builtin_unreachable(); |
2817 | } |
2818 | memcpy(&count_zone[i], &count, 2); |
2819 | } |
2820 | memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); |
2821 | memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); |
2822 | uint32_t = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; |
2823 | memcpy(header_zone, &header, 4); |
2824 | } |
2825 | |
2826 | const roaring_bitmap_t * |
2827 | roaring_bitmap_frozen_view(const char *buf, size_t length) { |
2828 | if ((uintptr_t)buf % 32 != 0) { |
2829 | return NULL; |
2830 | } |
2831 | |
2832 | // cookie and num_containers |
2833 | if (length < 4) { |
2834 | return NULL; |
2835 | } |
2836 | uint32_t ; |
2837 | memcpy(&header, buf + length - 4, 4); // header may be misaligned |
2838 | if ((header & 0x7FFF) != FROZEN_COOKIE) { |
2839 | return NULL; |
2840 | } |
2841 | int32_t num_containers = (header >> 15); |
2842 | |
2843 | // typecodes, counts and keys |
2844 | if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { |
2845 | return NULL; |
2846 | } |
2847 | uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); |
2848 | uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); |
2849 | uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); |
2850 | |
2851 | // {bitset,array,run}_zone |
2852 | int32_t num_bitset_containers = 0; |
2853 | int32_t num_run_containers = 0; |
2854 | int32_t num_array_containers = 0; |
2855 | size_t bitset_zone_size = 0; |
2856 | size_t run_zone_size = 0; |
2857 | size_t array_zone_size = 0; |
2858 | for (int32_t i = 0; i < num_containers; i++) { |
2859 | switch (typecodes[i]) { |
2860 | case BITSET_CONTAINER_TYPE_CODE: |
2861 | num_bitset_containers++; |
2862 | bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
2863 | break; |
2864 | case RUN_CONTAINER_TYPE_CODE: |
2865 | num_run_containers++; |
2866 | run_zone_size += counts[i] * sizeof(rle16_t); |
2867 | break; |
2868 | case ARRAY_CONTAINER_TYPE_CODE: |
2869 | num_array_containers++; |
2870 | array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); |
2871 | break; |
2872 | default: |
2873 | return NULL; |
2874 | } |
2875 | } |
2876 | if (length != bitset_zone_size + run_zone_size + array_zone_size + |
2877 | 5 * num_containers + 4) { |
2878 | return NULL; |
2879 | } |
2880 | uint64_t *bitset_zone = (uint64_t*) (buf); |
2881 | rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); |
2882 | uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); |
2883 | |
2884 | size_t alloc_size = 0; |
2885 | alloc_size += sizeof(roaring_bitmap_t); |
2886 | alloc_size += num_containers * sizeof(void *); |
2887 | alloc_size += num_bitset_containers * sizeof(bitset_container_t); |
2888 | alloc_size += num_run_containers * sizeof(run_container_t); |
2889 | alloc_size += num_array_containers * sizeof(array_container_t); |
2890 | |
2891 | char *arena = (char *)malloc(alloc_size); |
2892 | if (arena == NULL) { |
2893 | return NULL; |
2894 | } |
2895 | |
2896 | roaring_bitmap_t *rb = (roaring_bitmap_t *) |
2897 | arena_alloc(&arena, sizeof(roaring_bitmap_t)); |
2898 | rb->high_low_container.flags = ROARING_FLAG_FROZEN; |
2899 | rb->high_low_container.allocation_size = num_containers; |
2900 | rb->high_low_container.size = num_containers; |
2901 | rb->high_low_container.keys = (uint16_t *)keys; |
2902 | rb->high_low_container.typecodes = (uint8_t *)typecodes; |
2903 | rb->high_low_container.containers = |
2904 | (void **)arena_alloc(&arena, sizeof(void*) * num_containers); |
2905 | for (int32_t i = 0; i < num_containers; i++) { |
2906 | switch (typecodes[i]) { |
2907 | case BITSET_CONTAINER_TYPE_CODE: { |
2908 | bitset_container_t *bitset = (bitset_container_t *) |
2909 | arena_alloc(&arena, sizeof(bitset_container_t)); |
2910 | bitset->array = bitset_zone; |
2911 | bitset->cardinality = counts[i] + UINT32_C(1); |
2912 | rb->high_low_container.containers[i] = bitset; |
2913 | bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; |
2914 | break; |
2915 | } |
2916 | case RUN_CONTAINER_TYPE_CODE: { |
2917 | run_container_t *run = (run_container_t *) |
2918 | arena_alloc(&arena, sizeof(run_container_t)); |
2919 | run->capacity = counts[i]; |
2920 | run->n_runs = counts[i]; |
2921 | run->runs = run_zone; |
2922 | rb->high_low_container.containers[i] = run; |
2923 | run_zone += run->n_runs; |
2924 | break; |
2925 | } |
2926 | case ARRAY_CONTAINER_TYPE_CODE: { |
2927 | array_container_t *array = (array_container_t *) |
2928 | arena_alloc(&arena, sizeof(array_container_t)); |
2929 | array->capacity = counts[i] + UINT32_C(1); |
2930 | array->cardinality = counts[i] + UINT32_C(1); |
2931 | array->array = array_zone; |
2932 | rb->high_low_container.containers[i] = array; |
2933 | array_zone += counts[i] + UINT32_C(1); |
2934 | break; |
2935 | } |
2936 | default: |
2937 | free(arena); |
2938 | return NULL; |
2939 | } |
2940 | } |
2941 | |
2942 | return rb; |
2943 | } |
2944 | |