1#include <assert.h>
2#include <stdbool.h>
3#include <stdio.h>
4#include <stdlib.h>
5#include <string.h>
6#include <inttypes.h>
7
8#include <roaring/containers/bitset.h>
9#include <roaring/containers/containers.h>
10#include <roaring/roaring_array.h>
11
12// Convention: [0,ra->size) all elements are initialized
13// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
14
15extern inline int32_t ra_get_size(const roaring_array_t *ra);
16extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
17extern inline void *ra_get_container_at_index(const roaring_array_t *ra,
18 uint16_t i, uint8_t *typecode);
19extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
20 uint16_t i);
21extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
22 int32_t i,
23 uint16_t key, void *c,
24 uint8_t typecode);
25extern inline void ra_set_container_at_index(const roaring_array_t *ra,
26 int32_t i, void *c,
27 uint8_t typecode);
28
29static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
30 // because we combine the allocations, it is not possible to use realloc
31 /*ra->keys =
32 (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity);
33ra->containers =
34 (void **)realloc(ra->containers, sizeof(void *) * new_capacity);
35ra->typecodes =
36 (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity);
37if (!ra->keys || !ra->containers || !ra->typecodes) {
38 free(ra->keys);
39 free(ra->containers);
40 free(ra->typecodes);
41 return false;
42}*/
43
44 if ( new_capacity == 0 ) {
45 free(ra->containers);
46 ra->containers = NULL;
47 ra->keys = NULL;
48 ra->typecodes = NULL;
49 ra->allocation_size = 0;
50 return true;
51 }
52 const size_t memoryneeded =
53 new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
54 void *bigalloc = malloc(memoryneeded);
55 if (!bigalloc) return false;
56 void *oldbigalloc = ra->containers;
57 void **newcontainers = (void **)bigalloc;
58 uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
59 uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
60 assert((char *)(newtypecodes + new_capacity) ==
61 (char *)bigalloc + memoryneeded);
62 if(ra->size > 0) {
63 memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size);
64 memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
65 memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
66 }
67 ra->containers = newcontainers;
68 ra->keys = newkeys;
69 ra->typecodes = newtypecodes;
70 ra->allocation_size = new_capacity;
71 free(oldbigalloc);
72 return true;
73}
74
75bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
76 if (!new_ra) return false;
77 ra_init(new_ra);
78
79 if (cap > INT32_MAX) { return false; }
80
81 if(cap > 0) {
82 void *bigalloc =
83 malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)));
84 if( bigalloc == NULL ) return false;
85 new_ra->containers = (void **)bigalloc;
86 new_ra->keys = (uint16_t *)(new_ra->containers + cap);
87 new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
88 // Narrowing is safe because of above check
89 new_ra->allocation_size = (int32_t)cap;
90 }
91 return true;
92}
93
94int ra_shrink_to_fit(roaring_array_t *ra) {
95 int savings = (ra->allocation_size - ra->size) *
96 (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
97 if (!realloc_array(ra, ra->size)) {
98 return 0;
99 }
100 ra->allocation_size = ra->size;
101 return savings;
102}
103
104void ra_init(roaring_array_t *new_ra) {
105 if (!new_ra) { return; }
106 new_ra->keys = NULL;
107 new_ra->containers = NULL;
108 new_ra->typecodes = NULL;
109
110 new_ra->allocation_size = 0;
111 new_ra->size = 0;
112 new_ra->flags = 0;
113}
114
115bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
116 bool copy_on_write) {
117 if (!ra_init_with_capacity(dest, source->size)) return false;
118 dest->size = source->size;
119 dest->allocation_size = source->size;
120 if(dest->size > 0) {
121 memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
122 }
123 // we go through the containers, turning them into shared containers...
124 if (copy_on_write) {
125 for (int32_t i = 0; i < dest->size; ++i) {
126 source->containers[i] = get_copy_of_container(
127 source->containers[i], &source->typecodes[i], copy_on_write);
128 }
129 // we do a shallow copy to the other bitmap
130 if(dest->size > 0) {
131 memcpy(dest->containers, source->containers,
132 dest->size * sizeof(void *));
133 memcpy(dest->typecodes, source->typecodes,
134 dest->size * sizeof(uint8_t));
135 }
136 } else {
137 if(dest->size > 0) {
138 memcpy(dest->typecodes, source->typecodes,
139 dest->size * sizeof(uint8_t));
140 }
141 for (int32_t i = 0; i < dest->size; i++) {
142 dest->containers[i] =
143 container_clone(source->containers[i], source->typecodes[i]);
144 if (dest->containers[i] == NULL) {
145 for (int32_t j = 0; j < i; j++) {
146 container_free(dest->containers[j], dest->typecodes[j]);
147 }
148 ra_clear_without_containers(dest);
149 return false;
150 }
151 }
152 }
153 return true;
154}
155
156bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
157 bool copy_on_write) {
158 ra_clear_containers(dest); // we are going to overwrite them
159 if (dest->allocation_size < source->size) {
160 if (!realloc_array(dest, source->size)) {
161 return false;
162 }
163 }
164 dest->size = source->size;
165 memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
166 // we go through the containers, turning them into shared containers...
167 if (copy_on_write) {
168 for (int32_t i = 0; i < dest->size; ++i) {
169 source->containers[i] = get_copy_of_container(
170 source->containers[i], &source->typecodes[i], copy_on_write);
171 }
172 // we do a shallow copy to the other bitmap
173 memcpy(dest->containers, source->containers,
174 dest->size * sizeof(void *));
175 memcpy(dest->typecodes, source->typecodes,
176 dest->size * sizeof(uint8_t));
177 } else {
178 memcpy(dest->typecodes, source->typecodes,
179 dest->size * sizeof(uint8_t));
180 for (int32_t i = 0; i < dest->size; i++) {
181 dest->containers[i] =
182 container_clone(source->containers[i], source->typecodes[i]);
183 if (dest->containers[i] == NULL) {
184 for (int32_t j = 0; j < i; j++) {
185 container_free(dest->containers[j], dest->typecodes[j]);
186 }
187 ra_clear_without_containers(dest);
188 return false;
189 }
190 }
191 }
192 return true;
193}
194
195void ra_clear_containers(roaring_array_t *ra) {
196 for (int32_t i = 0; i < ra->size; ++i) {
197 container_free(ra->containers[i], ra->typecodes[i]);
198 }
199}
200
201void ra_reset(roaring_array_t *ra) {
202 ra_clear_containers(ra);
203 ra->size = 0;
204 ra_shrink_to_fit(ra);
205}
206
207void ra_clear_without_containers(roaring_array_t *ra) {
208 free(ra->containers); // keys and typecodes are allocated with containers
209 ra->size = 0;
210 ra->allocation_size = 0;
211 ra->containers = NULL;
212 ra->keys = NULL;
213 ra->typecodes = NULL;
214}
215
216void ra_clear(roaring_array_t *ra) {
217 ra_clear_containers(ra);
218 ra_clear_without_containers(ra);
219}
220
221bool extend_array(roaring_array_t *ra, int32_t k) {
222 int32_t desired_size = ra->size + k;
223 assert(desired_size <= MAX_CONTAINERS);
224 if (desired_size > ra->allocation_size) {
225 int32_t new_capacity =
226 (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
227 if (new_capacity > MAX_CONTAINERS) {
228 new_capacity = MAX_CONTAINERS;
229 }
230
231 return realloc_array(ra, new_capacity);
232 }
233 return true;
234}
235
236void ra_append(roaring_array_t *ra, uint16_t key, void *container,
237 uint8_t typecode) {
238 extend_array(ra, 1);
239 const int32_t pos = ra->size;
240
241 ra->keys[pos] = key;
242 ra->containers[pos] = container;
243 ra->typecodes[pos] = typecode;
244 ra->size++;
245}
246
247void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
248 uint16_t index, bool copy_on_write) {
249 extend_array(ra, 1);
250 const int32_t pos = ra->size;
251
252 // old contents is junk not needing freeing
253 ra->keys[pos] = sa->keys[index];
254 // the shared container will be in two bitmaps
255 if (copy_on_write) {
256 sa->containers[index] = get_copy_of_container(
257 sa->containers[index], &sa->typecodes[index], copy_on_write);
258 ra->containers[pos] = sa->containers[index];
259 ra->typecodes[pos] = sa->typecodes[index];
260 } else {
261 ra->containers[pos] =
262 container_clone(sa->containers[index], sa->typecodes[index]);
263 ra->typecodes[pos] = sa->typecodes[index];
264 }
265 ra->size++;
266}
267
268void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
269 uint16_t stopping_key, bool copy_on_write) {
270 for (int32_t i = 0; i < sa->size; ++i) {
271 if (sa->keys[i] >= stopping_key) break;
272 ra_append_copy(ra, sa, i, copy_on_write);
273 }
274}
275
276void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
277 int32_t start_index, int32_t end_index,
278 bool copy_on_write) {
279 extend_array(ra, end_index - start_index);
280 for (int32_t i = start_index; i < end_index; ++i) {
281 const int32_t pos = ra->size;
282 ra->keys[pos] = sa->keys[i];
283 if (copy_on_write) {
284 sa->containers[i] = get_copy_of_container(
285 sa->containers[i], &sa->typecodes[i], copy_on_write);
286 ra->containers[pos] = sa->containers[i];
287 ra->typecodes[pos] = sa->typecodes[i];
288 } else {
289 ra->containers[pos] =
290 container_clone(sa->containers[i], sa->typecodes[i]);
291 ra->typecodes[pos] = sa->typecodes[i];
292 }
293 ra->size++;
294 }
295}
296
297void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
298 uint16_t before_start, bool copy_on_write) {
299 int start_location = ra_get_index(sa, before_start);
300 if (start_location >= 0)
301 ++start_location;
302 else
303 start_location = -start_location - 1;
304 ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write);
305}
306
307void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
308 int32_t start_index, int32_t end_index) {
309 extend_array(ra, end_index - start_index);
310
311 for (int32_t i = start_index; i < end_index; ++i) {
312 const int32_t pos = ra->size;
313
314 ra->keys[pos] = sa->keys[i];
315 ra->containers[pos] = sa->containers[i];
316 ra->typecodes[pos] = sa->typecodes[i];
317 ra->size++;
318 }
319}
320
321void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
322 int32_t start_index, int32_t end_index,
323 bool copy_on_write) {
324 extend_array(ra, end_index - start_index);
325
326 for (int32_t i = start_index; i < end_index; ++i) {
327 const int32_t pos = ra->size;
328 ra->keys[pos] = sa->keys[i];
329 if (copy_on_write) {
330 sa->containers[i] = get_copy_of_container(
331 sa->containers[i], &sa->typecodes[i], copy_on_write);
332 ra->containers[pos] = sa->containers[i];
333 ra->typecodes[pos] = sa->typecodes[i];
334 } else {
335 ra->containers[pos] =
336 container_clone(sa->containers[i], sa->typecodes[i]);
337 ra->typecodes[pos] = sa->typecodes[i];
338 }
339 ra->size++;
340 }
341}
342
343void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) {
344 int i = binarySearch(ra->keys, (int32_t)ra->size, x);
345 if (i < 0) return NULL;
346 *typecode = ra->typecodes[i];
347 return ra->containers[i];
348}
349
350extern inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i,
351 uint8_t *typecode);
352
353void *ra_get_writable_container(roaring_array_t *ra, uint16_t x,
354 uint8_t *typecode) {
355 int i = binarySearch(ra->keys, (int32_t)ra->size, x);
356 if (i < 0) return NULL;
357 *typecode = ra->typecodes[i];
358 return get_writable_copy_if_shared(ra->containers[i], typecode);
359}
360
361void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i,
362 uint8_t *typecode) {
363 assert(i < ra->size);
364 *typecode = ra->typecodes[i];
365 return get_writable_copy_if_shared(ra->containers[i], typecode);
366}
367
368uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
369 return ra->keys[i];
370}
371
372extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
373
374extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
375 int32_t pos);
376
377// everything skipped over is freed
378int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
379 while (pos < ra->size && ra->keys[pos] < x) {
380 container_free(ra->containers[pos], ra->typecodes[pos]);
381 ++pos;
382 }
383 return pos;
384}
385
386void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
387 void *container, uint8_t typecode) {
388 extend_array(ra, 1);
389 // May be an optimization opportunity with DIY memmove
390 memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
391 sizeof(uint16_t) * (ra->size - i));
392 memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
393 sizeof(void *) * (ra->size - i));
394 memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
395 sizeof(uint8_t) * (ra->size - i));
396 ra->keys[i] = key;
397 ra->containers[i] = container;
398 ra->typecodes[i] = typecode;
399 ra->size++;
400}
401
402// note: Java routine set things to 0, enabling GC.
403// Java called it "resize" but it was always used to downsize.
404// Allowing upsize would break the conventions about
405// valid containers below ra->size.
406
407void ra_downsize(roaring_array_t *ra, int32_t new_length) {
408 assert(new_length <= ra->size);
409 ra->size = new_length;
410}
411
412void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
413 memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
414 sizeof(void *) * (ra->size - i - 1));
415 memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
416 sizeof(uint16_t) * (ra->size - i - 1));
417 memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
418 sizeof(uint8_t) * (ra->size - i - 1));
419 ra->size--;
420}
421
422void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
423 container_free(ra->containers[i], ra->typecodes[i]);
424 ra_remove_at_index(ra, i);
425}
426
427// used in inplace andNot only, to slide left the containers from
428// the mutated RoaringBitmap that are after the largest container of
429// the argument RoaringBitmap. In use it should be followed by a call to
430// downsize.
431//
432void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
433 uint32_t new_begin) {
434 assert(begin <= end);
435 assert(new_begin < begin);
436
437 const int range = end - begin;
438
439 // We ensure to previously have freed overwritten containers
440 // that are not copied elsewhere
441
442 memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
443 sizeof(void *) * range);
444 memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
445 sizeof(uint16_t) * range);
446 memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
447 sizeof(uint8_t) * range);
448}
449
450void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
451 if (distance > 0) {
452 extend_array(ra, distance);
453 }
454 int32_t srcpos = ra->size - count;
455 int32_t dstpos = srcpos + distance;
456 memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
457 sizeof(uint16_t) * count);
458 memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
459 sizeof(void *) * count);
460 memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
461 sizeof(uint8_t) * count);
462 ra->size += distance;
463}
464
465
466size_t ra_size_in_bytes(roaring_array_t *ra) {
467 size_t cardinality = 0;
468 size_t tot_len =
469 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) +
470 ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
471 for (int32_t i = 0; i < ra->size; i++) {
472 tot_len +=
473 (container_serialization_len(ra->containers[i], ra->typecodes[i]) +
474 sizeof(uint16_t));
475 cardinality +=
476 container_get_cardinality(ra->containers[i], ra->typecodes[i]);
477 }
478
479 if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) {
480 return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t);
481 }
482 return tot_len;
483}
484
485void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
486 size_t ctr = 0;
487 for (int32_t i = 0; i < ra->size; ++i) {
488 int num_added = container_to_uint32_array(
489 ans + ctr, ra->containers[i], ra->typecodes[i],
490 ((uint32_t)ra->keys[i]) << 16);
491 ctr += num_added;
492 }
493}
494
495bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
496 size_t ctr = 0;
497 size_t dtr = 0;
498
499 size_t t_limit = 0;
500
501 bool first = false;
502 size_t first_skip = 0;
503
504 uint32_t *t_ans = NULL;
505 size_t cur_len = 0;
506
507 for (int i = 0; i < ra->size; ++i) {
508
509 const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]);
510 switch (ra->typecodes[i]) {
511 case BITSET_CONTAINER_TYPE_CODE:
512 t_limit = ((const bitset_container_t *)container)->cardinality;
513 break;
514 case ARRAY_CONTAINER_TYPE_CODE:
515 t_limit = ((const array_container_t *)container)->cardinality;
516 break;
517 case RUN_CONTAINER_TYPE_CODE:
518 t_limit = run_container_cardinality((const run_container_t *)container);
519 break;
520 }
521 if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
522 if (!first){
523 //first_skip = t_limit - (ctr + t_limit - offset);
524 first_skip = offset - ctr;
525 first = true;
526 t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit));
527 if(t_ans == NULL) {
528 return false;
529 }
530 memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
531 cur_len = first_skip + limit;
532 }
533 if (dtr + t_limit > cur_len){
534 uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit));
535 if(append_ans == NULL) {
536 if(t_ans != NULL) free(t_ans);
537 return false;
538 }
539 memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
540 cur_len = cur_len + t_limit;
541 memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
542 free(t_ans);
543 t_ans = append_ans;
544 }
545 switch (ra->typecodes[i]) {
546 case BITSET_CONTAINER_TYPE_CODE:
547 container_to_uint32_array(
548 t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i],
549 ((uint32_t)ra->keys[i]) << 16);
550 break;
551 case ARRAY_CONTAINER_TYPE_CODE:
552 container_to_uint32_array(
553 t_ans + dtr, (const array_container_t *)container, ra->typecodes[i],
554 ((uint32_t)ra->keys[i]) << 16);
555 break;
556 case RUN_CONTAINER_TYPE_CODE:
557 container_to_uint32_array(
558 t_ans + dtr, (const run_container_t *)container, ra->typecodes[i],
559 ((uint32_t)ra->keys[i]) << 16);
560 break;
561 }
562 dtr += t_limit;
563 }
564 ctr += t_limit;
565 if (dtr-first_skip >= limit) break;
566 }
567 if(t_ans != NULL) {
568 memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
569 free(t_ans);
570 }
571 return true;
572}
573
574bool ra_has_run_container(const roaring_array_t *ra) {
575 for (int32_t k = 0; k < ra->size; ++k) {
576 if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
577 RUN_CONTAINER_TYPE_CODE)
578 return true;
579 }
580 return false;
581}
582
583uint32_t ra_portable_header_size(const roaring_array_t *ra) {
584 if (ra_has_run_container(ra)) {
585 if (ra->size <
586 NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
587 return 4 + (ra->size + 7) / 8 + 4 * ra->size;
588 }
589 return 4 + (ra->size + 7) / 8 +
590 8 * ra->size; // - 4 because we pack the size with the cookie
591 } else {
592 return 4 + 4 + 8 * ra->size;
593 }
594}
595
596size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
597 size_t count = ra_portable_header_size(ra);
598
599 for (int32_t k = 0; k < ra->size; ++k) {
600 count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
601 }
602 return count;
603}
604
605size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
606 char *initbuf = buf;
607 uint32_t startOffset = 0;
608 bool hasrun = ra_has_run_container(ra);
609 if (hasrun) {
610 uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
611 memcpy(buf, &cookie, sizeof(cookie));
612 buf += sizeof(cookie);
613 uint32_t s = (ra->size + 7) / 8;
614 uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1);
615 assert(bitmapOfRunContainers != NULL); // todo: handle
616 for (int32_t i = 0; i < ra->size; ++i) {
617 if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
618 RUN_CONTAINER_TYPE_CODE) {
619 bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
620 }
621 }
622 memcpy(buf, bitmapOfRunContainers, s);
623 buf += s;
624 free(bitmapOfRunContainers);
625 if (ra->size < NO_OFFSET_THRESHOLD) {
626 startOffset = 4 + 4 * ra->size + s;
627 } else {
628 startOffset = 4 + 8 * ra->size + s;
629 }
630 } else { // backwards compatibility
631 uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
632
633 memcpy(buf, &cookie, sizeof(cookie));
634 buf += sizeof(cookie);
635 memcpy(buf, &ra->size, sizeof(ra->size));
636 buf += sizeof(ra->size);
637
638 startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
639 }
640 for (int32_t k = 0; k < ra->size; ++k) {
641 memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
642 buf += sizeof(ra->keys[k]);
643 // get_cardinality returns a value in [1,1<<16], subtracting one
644 // we get [0,1<<16 - 1] which fits in 16 bits
645 uint16_t card = (uint16_t)(
646 container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
647 memcpy(buf, &card, sizeof(card));
648 buf += sizeof(card);
649 }
650 if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
651 // writing the containers offsets
652 for (int32_t k = 0; k < ra->size; k++) {
653 memcpy(buf, &startOffset, sizeof(startOffset));
654 buf += sizeof(startOffset);
655 startOffset =
656 startOffset +
657 container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
658 }
659 }
660 for (int32_t k = 0; k < ra->size; ++k) {
661 buf += container_write(ra->containers[k], ra->typecodes[k], buf);
662 }
663 return buf - initbuf;
664}
665
666// Quickly checks whether there is a serialized bitmap at the pointer,
667// not exceeding size "maxbytes" in bytes. This function does not allocate
668// memory dynamically.
669//
670// This function returns 0 if and only if no valid bitmap is found.
671// Otherwise, it returns how many bytes are occupied.
672//
673size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
674 size_t bytestotal = sizeof(int32_t);// for cookie
675 if(bytestotal > maxbytes) return 0;
676 uint32_t cookie;
677 memcpy(&cookie, buf, sizeof(int32_t));
678 buf += sizeof(uint32_t);
679 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
680 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
681 return 0;
682 }
683 int32_t size;
684
685 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
686 size = (cookie >> 16) + 1;
687 else {
688 bytestotal += sizeof(int32_t);
689 if(bytestotal > maxbytes) return 0;
690 memcpy(&size, buf, sizeof(int32_t));
691 buf += sizeof(uint32_t);
692 }
693 if (size > (1<<16)) {
694 return 0; // logically impossible
695 }
696 char *bitmapOfRunContainers = NULL;
697 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
698 if (hasrun) {
699 int32_t s = (size + 7) / 8;
700 bytestotal += s;
701 if(bytestotal > maxbytes) return 0;
702 bitmapOfRunContainers = (char *)buf;
703 buf += s;
704 }
705 bytestotal += size * 2 * sizeof(uint16_t);
706 if(bytestotal > maxbytes) return 0;
707 uint16_t *keyscards = (uint16_t *)buf;
708 buf += size * 2 * sizeof(uint16_t);
709 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
710 // skipping the offsets
711 bytestotal += size * 4;
712 if(bytestotal > maxbytes) return 0;
713 buf += size * 4;
714 }
715 // Reading the containers
716 for (int32_t k = 0; k < size; ++k) {
717 uint16_t tmp;
718 memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
719 uint32_t thiscard = tmp + 1;
720 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
721 bool isrun = false;
722 if(hasrun) {
723 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
724 isbitmap = false;
725 isrun = true;
726 }
727 }
728 if (isbitmap) {
729 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
730 bytestotal += containersize;
731 if(bytestotal > maxbytes) return 0;
732 buf += containersize;
733 } else if (isrun) {
734 bytestotal += sizeof(uint16_t);
735 if(bytestotal > maxbytes) return 0;
736 uint16_t n_runs;
737 memcpy(&n_runs, buf, sizeof(uint16_t));
738 buf += sizeof(uint16_t);
739 size_t containersize = n_runs * sizeof(rle16_t);
740 bytestotal += containersize;
741 if(bytestotal > maxbytes) return 0;
742 buf += containersize;
743 } else {
744 size_t containersize = thiscard * sizeof(uint16_t);
745 bytestotal += containersize;
746 if(bytestotal > maxbytes) return 0;
747 buf += containersize;
748 }
749 }
750 return bytestotal;
751}
752
753
754// this function populates answer from the content of buf (reading up to maxbytes bytes).
755// The function returns false if a properly serialized bitmap cannot be found.
756// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
757bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
758 *readbytes = sizeof(int32_t);// for cookie
759 if(*readbytes > maxbytes) {
760 fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
761 return false;
762 }
763 uint32_t cookie;
764 memcpy(&cookie, buf, sizeof(int32_t));
765 buf += sizeof(uint32_t);
766 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
767 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
768 fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
769 cookie);
770 return false;
771 }
772 int32_t size;
773
774 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
775 size = (cookie >> 16) + 1;
776 else {
777 *readbytes += sizeof(int32_t);
778 if(*readbytes > maxbytes) {
779 fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
780 return false;
781 }
782 memcpy(&size, buf, sizeof(int32_t));
783 buf += sizeof(uint32_t);
784 }
785 if (size > (1<<16)) {
786 fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
787 size);
788 return false; // logically impossible
789 }
790 const char *bitmapOfRunContainers = NULL;
791 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
792 if (hasrun) {
793 int32_t s = (size + 7) / 8;
794 *readbytes += s;
795 if(*readbytes > maxbytes) {// data is corrupted?
796 fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
797 return false;
798 }
799 bitmapOfRunContainers = buf;
800 buf += s;
801 }
802 uint16_t *keyscards = (uint16_t *)buf;
803
804 *readbytes += size * 2 * sizeof(uint16_t);
805 if(*readbytes > maxbytes) {
806 fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
807 return false;
808 }
809 buf += size * 2 * sizeof(uint16_t);
810
811 bool is_ok = ra_init_with_capacity(answer, size);
812 if (!is_ok) {
813 fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
814 return false;
815 }
816
817 for (int32_t k = 0; k < size; ++k) {
818 uint16_t tmp;
819 memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
820 answer->keys[k] = tmp;
821 }
822 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
823 *readbytes += size * 4;
824 if(*readbytes > maxbytes) {// data is corrupted?
825 fprintf(stderr, "Ran out of bytes while reading offsets.\n");
826 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
827 return false;
828 }
829
830 // skipping the offsets
831 buf += size * 4;
832 }
833 // Reading the containers
834 for (int32_t k = 0; k < size; ++k) {
835 uint16_t tmp;
836 memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
837 uint32_t thiscard = tmp + 1;
838 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
839 bool isrun = false;
840 if(hasrun) {
841 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
842 isbitmap = false;
843 isrun = true;
844 }
845 }
846 if (isbitmap) {
847 // we check that the read is allowed
848 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
849 *readbytes += containersize;
850 if(*readbytes > maxbytes) {
851 fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
852 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
853 return false;
854 }
855 // it is now safe to read
856 bitset_container_t *c = bitset_container_create();
857 if(c == NULL) {// memory allocation failure
858 fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
859 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
860 return false;
861 }
862 answer->size++;
863 buf += bitset_container_read(thiscard, c, buf);
864 answer->containers[k] = c;
865 answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE;
866 } else if (isrun) {
867 // we check that the read is allowed
868 *readbytes += sizeof(uint16_t);
869 if(*readbytes > maxbytes) {
870 fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
871 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
872 return false;
873 }
874 uint16_t n_runs;
875 memcpy(&n_runs, buf, sizeof(uint16_t));
876 size_t containersize = n_runs * sizeof(rle16_t);
877 *readbytes += containersize;
878 if(*readbytes > maxbytes) {// data is corrupted?
879 fprintf(stderr, "Running out of bytes while reading a run container.\n");
880 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
881 return false;
882 }
883 // it is now safe to read
884
885 run_container_t *c = run_container_create();
886 if(c == NULL) {// memory allocation failure
887 fprintf(stderr, "Failed to allocate memory for a run container.\n");
888 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
889 return false;
890 }
891 answer->size++;
892 buf += run_container_read(thiscard, c, buf);
893 answer->containers[k] = c;
894 answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE;
895 } else {
896 // we check that the read is allowed
897 size_t containersize = thiscard * sizeof(uint16_t);
898 *readbytes += containersize;
899 if(*readbytes > maxbytes) {// data is corrupted?
900 fprintf(stderr, "Running out of bytes while reading an array container.\n");
901 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
902 return false;
903 }
904 // it is now safe to read
905 array_container_t *c =
906 array_container_create_given_capacity(thiscard);
907 if(c == NULL) {// memory allocation failure
908 fprintf(stderr, "Failed to allocate memory for an array container.\n");
909 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
910 return false;
911 }
912 answer->size++;
913 buf += array_container_read(thiscard, c, buf);
914 answer->containers[k] = c;
915 answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE;
916 }
917 }
918 return true;
919}
920