1 | #include <assert.h> |
2 | #include <stdbool.h> |
3 | #include <stdio.h> |
4 | #include <stdlib.h> |
5 | #include <string.h> |
6 | #include <inttypes.h> |
7 | |
8 | #include <roaring/containers/bitset.h> |
9 | #include <roaring/containers/containers.h> |
10 | #include <roaring/roaring_array.h> |
11 | |
12 | // Convention: [0,ra->size) all elements are initialized |
13 | // [ra->size, ra->allocation_size) is junk and contains nothing needing freeing |
14 | |
15 | extern inline int32_t ra_get_size(const roaring_array_t *ra); |
16 | extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); |
17 | extern inline void *ra_get_container_at_index(const roaring_array_t *ra, |
18 | uint16_t i, uint8_t *typecode); |
19 | extern inline void ra_unshare_container_at_index(roaring_array_t *ra, |
20 | uint16_t i); |
21 | extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, |
22 | int32_t i, |
23 | uint16_t key, void *c, |
24 | uint8_t typecode); |
25 | extern inline void ra_set_container_at_index(const roaring_array_t *ra, |
26 | int32_t i, void *c, |
27 | uint8_t typecode); |
28 | |
29 | static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { |
30 | // because we combine the allocations, it is not possible to use realloc |
31 | /*ra->keys = |
32 | (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); |
33 | ra->containers = |
34 | (void **)realloc(ra->containers, sizeof(void *) * new_capacity); |
35 | ra->typecodes = |
36 | (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); |
37 | if (!ra->keys || !ra->containers || !ra->typecodes) { |
38 | free(ra->keys); |
39 | free(ra->containers); |
40 | free(ra->typecodes); |
41 | return false; |
42 | }*/ |
43 | |
44 | if ( new_capacity == 0 ) { |
45 | free(ra->containers); |
46 | ra->containers = NULL; |
47 | ra->keys = NULL; |
48 | ra->typecodes = NULL; |
49 | ra->allocation_size = 0; |
50 | return true; |
51 | } |
52 | const size_t memoryneeded = |
53 | new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
54 | void *bigalloc = malloc(memoryneeded); |
55 | if (!bigalloc) return false; |
56 | void *oldbigalloc = ra->containers; |
57 | void **newcontainers = (void **)bigalloc; |
58 | uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); |
59 | uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); |
60 | assert((char *)(newtypecodes + new_capacity) == |
61 | (char *)bigalloc + memoryneeded); |
62 | if(ra->size > 0) { |
63 | memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); |
64 | memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); |
65 | memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); |
66 | } |
67 | ra->containers = newcontainers; |
68 | ra->keys = newkeys; |
69 | ra->typecodes = newtypecodes; |
70 | ra->allocation_size = new_capacity; |
71 | free(oldbigalloc); |
72 | return true; |
73 | } |
74 | |
75 | bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { |
76 | if (!new_ra) return false; |
77 | ra_init(new_ra); |
78 | |
79 | if (cap > INT32_MAX) { return false; } |
80 | |
81 | if(cap > 0) { |
82 | void *bigalloc = |
83 | malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); |
84 | if( bigalloc == NULL ) return false; |
85 | new_ra->containers = (void **)bigalloc; |
86 | new_ra->keys = (uint16_t *)(new_ra->containers + cap); |
87 | new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); |
88 | // Narrowing is safe because of above check |
89 | new_ra->allocation_size = (int32_t)cap; |
90 | } |
91 | return true; |
92 | } |
93 | |
94 | int ra_shrink_to_fit(roaring_array_t *ra) { |
95 | int savings = (ra->allocation_size - ra->size) * |
96 | (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
97 | if (!realloc_array(ra, ra->size)) { |
98 | return 0; |
99 | } |
100 | ra->allocation_size = ra->size; |
101 | return savings; |
102 | } |
103 | |
104 | void ra_init(roaring_array_t *new_ra) { |
105 | if (!new_ra) { return; } |
106 | new_ra->keys = NULL; |
107 | new_ra->containers = NULL; |
108 | new_ra->typecodes = NULL; |
109 | |
110 | new_ra->allocation_size = 0; |
111 | new_ra->size = 0; |
112 | new_ra->flags = 0; |
113 | } |
114 | |
115 | bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, |
116 | bool copy_on_write) { |
117 | if (!ra_init_with_capacity(dest, source->size)) return false; |
118 | dest->size = source->size; |
119 | dest->allocation_size = source->size; |
120 | if(dest->size > 0) { |
121 | memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); |
122 | } |
123 | // we go through the containers, turning them into shared containers... |
124 | if (copy_on_write) { |
125 | for (int32_t i = 0; i < dest->size; ++i) { |
126 | source->containers[i] = get_copy_of_container( |
127 | source->containers[i], &source->typecodes[i], copy_on_write); |
128 | } |
129 | // we do a shallow copy to the other bitmap |
130 | if(dest->size > 0) { |
131 | memcpy(dest->containers, source->containers, |
132 | dest->size * sizeof(void *)); |
133 | memcpy(dest->typecodes, source->typecodes, |
134 | dest->size * sizeof(uint8_t)); |
135 | } |
136 | } else { |
137 | if(dest->size > 0) { |
138 | memcpy(dest->typecodes, source->typecodes, |
139 | dest->size * sizeof(uint8_t)); |
140 | } |
141 | for (int32_t i = 0; i < dest->size; i++) { |
142 | dest->containers[i] = |
143 | container_clone(source->containers[i], source->typecodes[i]); |
144 | if (dest->containers[i] == NULL) { |
145 | for (int32_t j = 0; j < i; j++) { |
146 | container_free(dest->containers[j], dest->typecodes[j]); |
147 | } |
148 | ra_clear_without_containers(dest); |
149 | return false; |
150 | } |
151 | } |
152 | } |
153 | return true; |
154 | } |
155 | |
156 | bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, |
157 | bool copy_on_write) { |
158 | ra_clear_containers(dest); // we are going to overwrite them |
159 | if (dest->allocation_size < source->size) { |
160 | if (!realloc_array(dest, source->size)) { |
161 | return false; |
162 | } |
163 | } |
164 | dest->size = source->size; |
165 | memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); |
166 | // we go through the containers, turning them into shared containers... |
167 | if (copy_on_write) { |
168 | for (int32_t i = 0; i < dest->size; ++i) { |
169 | source->containers[i] = get_copy_of_container( |
170 | source->containers[i], &source->typecodes[i], copy_on_write); |
171 | } |
172 | // we do a shallow copy to the other bitmap |
173 | memcpy(dest->containers, source->containers, |
174 | dest->size * sizeof(void *)); |
175 | memcpy(dest->typecodes, source->typecodes, |
176 | dest->size * sizeof(uint8_t)); |
177 | } else { |
178 | memcpy(dest->typecodes, source->typecodes, |
179 | dest->size * sizeof(uint8_t)); |
180 | for (int32_t i = 0; i < dest->size; i++) { |
181 | dest->containers[i] = |
182 | container_clone(source->containers[i], source->typecodes[i]); |
183 | if (dest->containers[i] == NULL) { |
184 | for (int32_t j = 0; j < i; j++) { |
185 | container_free(dest->containers[j], dest->typecodes[j]); |
186 | } |
187 | ra_clear_without_containers(dest); |
188 | return false; |
189 | } |
190 | } |
191 | } |
192 | return true; |
193 | } |
194 | |
195 | void ra_clear_containers(roaring_array_t *ra) { |
196 | for (int32_t i = 0; i < ra->size; ++i) { |
197 | container_free(ra->containers[i], ra->typecodes[i]); |
198 | } |
199 | } |
200 | |
201 | void ra_reset(roaring_array_t *ra) { |
202 | ra_clear_containers(ra); |
203 | ra->size = 0; |
204 | ra_shrink_to_fit(ra); |
205 | } |
206 | |
207 | void ra_clear_without_containers(roaring_array_t *ra) { |
208 | free(ra->containers); // keys and typecodes are allocated with containers |
209 | ra->size = 0; |
210 | ra->allocation_size = 0; |
211 | ra->containers = NULL; |
212 | ra->keys = NULL; |
213 | ra->typecodes = NULL; |
214 | } |
215 | |
216 | void ra_clear(roaring_array_t *ra) { |
217 | ra_clear_containers(ra); |
218 | ra_clear_without_containers(ra); |
219 | } |
220 | |
221 | bool extend_array(roaring_array_t *ra, int32_t k) { |
222 | int32_t desired_size = ra->size + k; |
223 | assert(desired_size <= MAX_CONTAINERS); |
224 | if (desired_size > ra->allocation_size) { |
225 | int32_t new_capacity = |
226 | (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; |
227 | if (new_capacity > MAX_CONTAINERS) { |
228 | new_capacity = MAX_CONTAINERS; |
229 | } |
230 | |
231 | return realloc_array(ra, new_capacity); |
232 | } |
233 | return true; |
234 | } |
235 | |
236 | void ra_append(roaring_array_t *ra, uint16_t key, void *container, |
237 | uint8_t typecode) { |
238 | extend_array(ra, 1); |
239 | const int32_t pos = ra->size; |
240 | |
241 | ra->keys[pos] = key; |
242 | ra->containers[pos] = container; |
243 | ra->typecodes[pos] = typecode; |
244 | ra->size++; |
245 | } |
246 | |
247 | void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, |
248 | uint16_t index, bool copy_on_write) { |
249 | extend_array(ra, 1); |
250 | const int32_t pos = ra->size; |
251 | |
252 | // old contents is junk not needing freeing |
253 | ra->keys[pos] = sa->keys[index]; |
254 | // the shared container will be in two bitmaps |
255 | if (copy_on_write) { |
256 | sa->containers[index] = get_copy_of_container( |
257 | sa->containers[index], &sa->typecodes[index], copy_on_write); |
258 | ra->containers[pos] = sa->containers[index]; |
259 | ra->typecodes[pos] = sa->typecodes[index]; |
260 | } else { |
261 | ra->containers[pos] = |
262 | container_clone(sa->containers[index], sa->typecodes[index]); |
263 | ra->typecodes[pos] = sa->typecodes[index]; |
264 | } |
265 | ra->size++; |
266 | } |
267 | |
268 | void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, |
269 | uint16_t stopping_key, bool copy_on_write) { |
270 | for (int32_t i = 0; i < sa->size; ++i) { |
271 | if (sa->keys[i] >= stopping_key) break; |
272 | ra_append_copy(ra, sa, i, copy_on_write); |
273 | } |
274 | } |
275 | |
276 | void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, |
277 | int32_t start_index, int32_t end_index, |
278 | bool copy_on_write) { |
279 | extend_array(ra, end_index - start_index); |
280 | for (int32_t i = start_index; i < end_index; ++i) { |
281 | const int32_t pos = ra->size; |
282 | ra->keys[pos] = sa->keys[i]; |
283 | if (copy_on_write) { |
284 | sa->containers[i] = get_copy_of_container( |
285 | sa->containers[i], &sa->typecodes[i], copy_on_write); |
286 | ra->containers[pos] = sa->containers[i]; |
287 | ra->typecodes[pos] = sa->typecodes[i]; |
288 | } else { |
289 | ra->containers[pos] = |
290 | container_clone(sa->containers[i], sa->typecodes[i]); |
291 | ra->typecodes[pos] = sa->typecodes[i]; |
292 | } |
293 | ra->size++; |
294 | } |
295 | } |
296 | |
297 | void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, |
298 | uint16_t before_start, bool copy_on_write) { |
299 | int start_location = ra_get_index(sa, before_start); |
300 | if (start_location >= 0) |
301 | ++start_location; |
302 | else |
303 | start_location = -start_location - 1; |
304 | ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); |
305 | } |
306 | |
307 | void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, |
308 | int32_t start_index, int32_t end_index) { |
309 | extend_array(ra, end_index - start_index); |
310 | |
311 | for (int32_t i = start_index; i < end_index; ++i) { |
312 | const int32_t pos = ra->size; |
313 | |
314 | ra->keys[pos] = sa->keys[i]; |
315 | ra->containers[pos] = sa->containers[i]; |
316 | ra->typecodes[pos] = sa->typecodes[i]; |
317 | ra->size++; |
318 | } |
319 | } |
320 | |
321 | void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, |
322 | int32_t start_index, int32_t end_index, |
323 | bool copy_on_write) { |
324 | extend_array(ra, end_index - start_index); |
325 | |
326 | for (int32_t i = start_index; i < end_index; ++i) { |
327 | const int32_t pos = ra->size; |
328 | ra->keys[pos] = sa->keys[i]; |
329 | if (copy_on_write) { |
330 | sa->containers[i] = get_copy_of_container( |
331 | sa->containers[i], &sa->typecodes[i], copy_on_write); |
332 | ra->containers[pos] = sa->containers[i]; |
333 | ra->typecodes[pos] = sa->typecodes[i]; |
334 | } else { |
335 | ra->containers[pos] = |
336 | container_clone(sa->containers[i], sa->typecodes[i]); |
337 | ra->typecodes[pos] = sa->typecodes[i]; |
338 | } |
339 | ra->size++; |
340 | } |
341 | } |
342 | |
343 | void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) { |
344 | int i = binarySearch(ra->keys, (int32_t)ra->size, x); |
345 | if (i < 0) return NULL; |
346 | *typecode = ra->typecodes[i]; |
347 | return ra->containers[i]; |
348 | } |
349 | |
350 | extern inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, |
351 | uint8_t *typecode); |
352 | |
353 | void *ra_get_writable_container(roaring_array_t *ra, uint16_t x, |
354 | uint8_t *typecode) { |
355 | int i = binarySearch(ra->keys, (int32_t)ra->size, x); |
356 | if (i < 0) return NULL; |
357 | *typecode = ra->typecodes[i]; |
358 | return get_writable_copy_if_shared(ra->containers[i], typecode); |
359 | } |
360 | |
361 | void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i, |
362 | uint8_t *typecode) { |
363 | assert(i < ra->size); |
364 | *typecode = ra->typecodes[i]; |
365 | return get_writable_copy_if_shared(ra->containers[i], typecode); |
366 | } |
367 | |
368 | uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { |
369 | return ra->keys[i]; |
370 | } |
371 | |
372 | extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); |
373 | |
374 | extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, |
375 | int32_t pos); |
376 | |
377 | // everything skipped over is freed |
378 | int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { |
379 | while (pos < ra->size && ra->keys[pos] < x) { |
380 | container_free(ra->containers[pos], ra->typecodes[pos]); |
381 | ++pos; |
382 | } |
383 | return pos; |
384 | } |
385 | |
386 | void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, |
387 | void *container, uint8_t typecode) { |
388 | extend_array(ra, 1); |
389 | // May be an optimization opportunity with DIY memmove |
390 | memmove(&(ra->keys[i + 1]), &(ra->keys[i]), |
391 | sizeof(uint16_t) * (ra->size - i)); |
392 | memmove(&(ra->containers[i + 1]), &(ra->containers[i]), |
393 | sizeof(void *) * (ra->size - i)); |
394 | memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), |
395 | sizeof(uint8_t) * (ra->size - i)); |
396 | ra->keys[i] = key; |
397 | ra->containers[i] = container; |
398 | ra->typecodes[i] = typecode; |
399 | ra->size++; |
400 | } |
401 | |
402 | // note: Java routine set things to 0, enabling GC. |
403 | // Java called it "resize" but it was always used to downsize. |
404 | // Allowing upsize would break the conventions about |
405 | // valid containers below ra->size. |
406 | |
407 | void ra_downsize(roaring_array_t *ra, int32_t new_length) { |
408 | assert(new_length <= ra->size); |
409 | ra->size = new_length; |
410 | } |
411 | |
412 | void ra_remove_at_index(roaring_array_t *ra, int32_t i) { |
413 | memmove(&(ra->containers[i]), &(ra->containers[i + 1]), |
414 | sizeof(void *) * (ra->size - i - 1)); |
415 | memmove(&(ra->keys[i]), &(ra->keys[i + 1]), |
416 | sizeof(uint16_t) * (ra->size - i - 1)); |
417 | memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), |
418 | sizeof(uint8_t) * (ra->size - i - 1)); |
419 | ra->size--; |
420 | } |
421 | |
422 | void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { |
423 | container_free(ra->containers[i], ra->typecodes[i]); |
424 | ra_remove_at_index(ra, i); |
425 | } |
426 | |
427 | // used in inplace andNot only, to slide left the containers from |
428 | // the mutated RoaringBitmap that are after the largest container of |
429 | // the argument RoaringBitmap. In use it should be followed by a call to |
430 | // downsize. |
431 | // |
432 | void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, |
433 | uint32_t new_begin) { |
434 | assert(begin <= end); |
435 | assert(new_begin < begin); |
436 | |
437 | const int range = end - begin; |
438 | |
439 | // We ensure to previously have freed overwritten containers |
440 | // that are not copied elsewhere |
441 | |
442 | memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), |
443 | sizeof(void *) * range); |
444 | memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), |
445 | sizeof(uint16_t) * range); |
446 | memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), |
447 | sizeof(uint8_t) * range); |
448 | } |
449 | |
450 | void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { |
451 | if (distance > 0) { |
452 | extend_array(ra, distance); |
453 | } |
454 | int32_t srcpos = ra->size - count; |
455 | int32_t dstpos = srcpos + distance; |
456 | memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), |
457 | sizeof(uint16_t) * count); |
458 | memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), |
459 | sizeof(void *) * count); |
460 | memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), |
461 | sizeof(uint8_t) * count); |
462 | ra->size += distance; |
463 | } |
464 | |
465 | |
466 | size_t ra_size_in_bytes(roaring_array_t *ra) { |
467 | size_t cardinality = 0; |
468 | size_t tot_len = |
469 | 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) + |
470 | ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
471 | for (int32_t i = 0; i < ra->size; i++) { |
472 | tot_len += |
473 | (container_serialization_len(ra->containers[i], ra->typecodes[i]) + |
474 | sizeof(uint16_t)); |
475 | cardinality += |
476 | container_get_cardinality(ra->containers[i], ra->typecodes[i]); |
477 | } |
478 | |
479 | if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) { |
480 | return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t); |
481 | } |
482 | return tot_len; |
483 | } |
484 | |
485 | void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { |
486 | size_t ctr = 0; |
487 | for (int32_t i = 0; i < ra->size; ++i) { |
488 | int num_added = container_to_uint32_array( |
489 | ans + ctr, ra->containers[i], ra->typecodes[i], |
490 | ((uint32_t)ra->keys[i]) << 16); |
491 | ctr += num_added; |
492 | } |
493 | } |
494 | |
495 | bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
496 | size_t ctr = 0; |
497 | size_t dtr = 0; |
498 | |
499 | size_t t_limit = 0; |
500 | |
501 | bool first = false; |
502 | size_t first_skip = 0; |
503 | |
504 | uint32_t *t_ans = NULL; |
505 | size_t cur_len = 0; |
506 | |
507 | for (int i = 0; i < ra->size; ++i) { |
508 | |
509 | const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); |
510 | switch (ra->typecodes[i]) { |
511 | case BITSET_CONTAINER_TYPE_CODE: |
512 | t_limit = ((const bitset_container_t *)container)->cardinality; |
513 | break; |
514 | case ARRAY_CONTAINER_TYPE_CODE: |
515 | t_limit = ((const array_container_t *)container)->cardinality; |
516 | break; |
517 | case RUN_CONTAINER_TYPE_CODE: |
518 | t_limit = run_container_cardinality((const run_container_t *)container); |
519 | break; |
520 | } |
521 | if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ |
522 | if (!first){ |
523 | //first_skip = t_limit - (ctr + t_limit - offset); |
524 | first_skip = offset - ctr; |
525 | first = true; |
526 | t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); |
527 | if(t_ans == NULL) { |
528 | return false; |
529 | } |
530 | memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; |
531 | cur_len = first_skip + limit; |
532 | } |
533 | if (dtr + t_limit > cur_len){ |
534 | uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); |
535 | if(append_ans == NULL) { |
536 | if(t_ans != NULL) free(t_ans); |
537 | return false; |
538 | } |
539 | memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); |
540 | cur_len = cur_len + t_limit; |
541 | memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); |
542 | free(t_ans); |
543 | t_ans = append_ans; |
544 | } |
545 | switch (ra->typecodes[i]) { |
546 | case BITSET_CONTAINER_TYPE_CODE: |
547 | container_to_uint32_array( |
548 | t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], |
549 | ((uint32_t)ra->keys[i]) << 16); |
550 | break; |
551 | case ARRAY_CONTAINER_TYPE_CODE: |
552 | container_to_uint32_array( |
553 | t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], |
554 | ((uint32_t)ra->keys[i]) << 16); |
555 | break; |
556 | case RUN_CONTAINER_TYPE_CODE: |
557 | container_to_uint32_array( |
558 | t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], |
559 | ((uint32_t)ra->keys[i]) << 16); |
560 | break; |
561 | } |
562 | dtr += t_limit; |
563 | } |
564 | ctr += t_limit; |
565 | if (dtr-first_skip >= limit) break; |
566 | } |
567 | if(t_ans != NULL) { |
568 | memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); |
569 | free(t_ans); |
570 | } |
571 | return true; |
572 | } |
573 | |
574 | bool ra_has_run_container(const roaring_array_t *ra) { |
575 | for (int32_t k = 0; k < ra->size; ++k) { |
576 | if (get_container_type(ra->containers[k], ra->typecodes[k]) == |
577 | RUN_CONTAINER_TYPE_CODE) |
578 | return true; |
579 | } |
580 | return false; |
581 | } |
582 | |
583 | uint32_t (const roaring_array_t *ra) { |
584 | if (ra_has_run_container(ra)) { |
585 | if (ra->size < |
586 | NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets |
587 | return 4 + (ra->size + 7) / 8 + 4 * ra->size; |
588 | } |
589 | return 4 + (ra->size + 7) / 8 + |
590 | 8 * ra->size; // - 4 because we pack the size with the cookie |
591 | } else { |
592 | return 4 + 4 + 8 * ra->size; |
593 | } |
594 | } |
595 | |
596 | size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { |
597 | size_t count = ra_portable_header_size(ra); |
598 | |
599 | for (int32_t k = 0; k < ra->size; ++k) { |
600 | count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); |
601 | } |
602 | return count; |
603 | } |
604 | |
605 | size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { |
606 | char *initbuf = buf; |
607 | uint32_t startOffset = 0; |
608 | bool hasrun = ra_has_run_container(ra); |
609 | if (hasrun) { |
610 | uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); |
611 | memcpy(buf, &cookie, sizeof(cookie)); |
612 | buf += sizeof(cookie); |
613 | uint32_t s = (ra->size + 7) / 8; |
614 | uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); |
615 | assert(bitmapOfRunContainers != NULL); // todo: handle |
616 | for (int32_t i = 0; i < ra->size; ++i) { |
617 | if (get_container_type(ra->containers[i], ra->typecodes[i]) == |
618 | RUN_CONTAINER_TYPE_CODE) { |
619 | bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); |
620 | } |
621 | } |
622 | memcpy(buf, bitmapOfRunContainers, s); |
623 | buf += s; |
624 | free(bitmapOfRunContainers); |
625 | if (ra->size < NO_OFFSET_THRESHOLD) { |
626 | startOffset = 4 + 4 * ra->size + s; |
627 | } else { |
628 | startOffset = 4 + 8 * ra->size + s; |
629 | } |
630 | } else { // backwards compatibility |
631 | uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; |
632 | |
633 | memcpy(buf, &cookie, sizeof(cookie)); |
634 | buf += sizeof(cookie); |
635 | memcpy(buf, &ra->size, sizeof(ra->size)); |
636 | buf += sizeof(ra->size); |
637 | |
638 | startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; |
639 | } |
640 | for (int32_t k = 0; k < ra->size; ++k) { |
641 | memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); |
642 | buf += sizeof(ra->keys[k]); |
643 | // get_cardinality returns a value in [1,1<<16], subtracting one |
644 | // we get [0,1<<16 - 1] which fits in 16 bits |
645 | uint16_t card = (uint16_t)( |
646 | container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); |
647 | memcpy(buf, &card, sizeof(card)); |
648 | buf += sizeof(card); |
649 | } |
650 | if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { |
651 | // writing the containers offsets |
652 | for (int32_t k = 0; k < ra->size; k++) { |
653 | memcpy(buf, &startOffset, sizeof(startOffset)); |
654 | buf += sizeof(startOffset); |
655 | startOffset = |
656 | startOffset + |
657 | container_size_in_bytes(ra->containers[k], ra->typecodes[k]); |
658 | } |
659 | } |
660 | for (int32_t k = 0; k < ra->size; ++k) { |
661 | buf += container_write(ra->containers[k], ra->typecodes[k], buf); |
662 | } |
663 | return buf - initbuf; |
664 | } |
665 | |
666 | // Quickly checks whether there is a serialized bitmap at the pointer, |
667 | // not exceeding size "maxbytes" in bytes. This function does not allocate |
668 | // memory dynamically. |
669 | // |
670 | // This function returns 0 if and only if no valid bitmap is found. |
671 | // Otherwise, it returns how many bytes are occupied. |
672 | // |
673 | size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { |
674 | size_t bytestotal = sizeof(int32_t);// for cookie |
675 | if(bytestotal > maxbytes) return 0; |
676 | uint32_t cookie; |
677 | memcpy(&cookie, buf, sizeof(int32_t)); |
678 | buf += sizeof(uint32_t); |
679 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
680 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
681 | return 0; |
682 | } |
683 | int32_t size; |
684 | |
685 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
686 | size = (cookie >> 16) + 1; |
687 | else { |
688 | bytestotal += sizeof(int32_t); |
689 | if(bytestotal > maxbytes) return 0; |
690 | memcpy(&size, buf, sizeof(int32_t)); |
691 | buf += sizeof(uint32_t); |
692 | } |
693 | if (size > (1<<16)) { |
694 | return 0; // logically impossible |
695 | } |
696 | char *bitmapOfRunContainers = NULL; |
697 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
698 | if (hasrun) { |
699 | int32_t s = (size + 7) / 8; |
700 | bytestotal += s; |
701 | if(bytestotal > maxbytes) return 0; |
702 | bitmapOfRunContainers = (char *)buf; |
703 | buf += s; |
704 | } |
705 | bytestotal += size * 2 * sizeof(uint16_t); |
706 | if(bytestotal > maxbytes) return 0; |
707 | uint16_t *keyscards = (uint16_t *)buf; |
708 | buf += size * 2 * sizeof(uint16_t); |
709 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
710 | // skipping the offsets |
711 | bytestotal += size * 4; |
712 | if(bytestotal > maxbytes) return 0; |
713 | buf += size * 4; |
714 | } |
715 | // Reading the containers |
716 | for (int32_t k = 0; k < size; ++k) { |
717 | uint16_t tmp; |
718 | memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); |
719 | uint32_t thiscard = tmp + 1; |
720 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
721 | bool isrun = false; |
722 | if(hasrun) { |
723 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
724 | isbitmap = false; |
725 | isrun = true; |
726 | } |
727 | } |
728 | if (isbitmap) { |
729 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
730 | bytestotal += containersize; |
731 | if(bytestotal > maxbytes) return 0; |
732 | buf += containersize; |
733 | } else if (isrun) { |
734 | bytestotal += sizeof(uint16_t); |
735 | if(bytestotal > maxbytes) return 0; |
736 | uint16_t n_runs; |
737 | memcpy(&n_runs, buf, sizeof(uint16_t)); |
738 | buf += sizeof(uint16_t); |
739 | size_t containersize = n_runs * sizeof(rle16_t); |
740 | bytestotal += containersize; |
741 | if(bytestotal > maxbytes) return 0; |
742 | buf += containersize; |
743 | } else { |
744 | size_t containersize = thiscard * sizeof(uint16_t); |
745 | bytestotal += containersize; |
746 | if(bytestotal > maxbytes) return 0; |
747 | buf += containersize; |
748 | } |
749 | } |
750 | return bytestotal; |
751 | } |
752 | |
753 | |
754 | // this function populates answer from the content of buf (reading up to maxbytes bytes). |
755 | // The function returns false if a properly serialized bitmap cannot be found. |
756 | // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. |
757 | bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { |
758 | *readbytes = sizeof(int32_t);// for cookie |
759 | if(*readbytes > maxbytes) { |
760 | fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n" ); |
761 | return false; |
762 | } |
763 | uint32_t cookie; |
764 | memcpy(&cookie, buf, sizeof(int32_t)); |
765 | buf += sizeof(uint32_t); |
766 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
767 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
768 | fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n" , |
769 | cookie); |
770 | return false; |
771 | } |
772 | int32_t size; |
773 | |
774 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
775 | size = (cookie >> 16) + 1; |
776 | else { |
777 | *readbytes += sizeof(int32_t); |
778 | if(*readbytes > maxbytes) { |
779 | fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n" ); |
780 | return false; |
781 | } |
782 | memcpy(&size, buf, sizeof(int32_t)); |
783 | buf += sizeof(uint32_t); |
784 | } |
785 | if (size > (1<<16)) { |
786 | fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n" , |
787 | size); |
788 | return false; // logically impossible |
789 | } |
790 | const char *bitmapOfRunContainers = NULL; |
791 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
792 | if (hasrun) { |
793 | int32_t s = (size + 7) / 8; |
794 | *readbytes += s; |
795 | if(*readbytes > maxbytes) {// data is corrupted? |
796 | fprintf(stderr, "Ran out of bytes while reading run bitmap.\n" ); |
797 | return false; |
798 | } |
799 | bitmapOfRunContainers = buf; |
800 | buf += s; |
801 | } |
802 | uint16_t *keyscards = (uint16_t *)buf; |
803 | |
804 | *readbytes += size * 2 * sizeof(uint16_t); |
805 | if(*readbytes > maxbytes) { |
806 | fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n" ); |
807 | return false; |
808 | } |
809 | buf += size * 2 * sizeof(uint16_t); |
810 | |
811 | bool is_ok = ra_init_with_capacity(answer, size); |
812 | if (!is_ok) { |
813 | fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n" ); |
814 | return false; |
815 | } |
816 | |
817 | for (int32_t k = 0; k < size; ++k) { |
818 | uint16_t tmp; |
819 | memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); |
820 | answer->keys[k] = tmp; |
821 | } |
822 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
823 | *readbytes += size * 4; |
824 | if(*readbytes > maxbytes) {// data is corrupted? |
825 | fprintf(stderr, "Ran out of bytes while reading offsets.\n" ); |
826 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
827 | return false; |
828 | } |
829 | |
830 | // skipping the offsets |
831 | buf += size * 4; |
832 | } |
833 | // Reading the containers |
834 | for (int32_t k = 0; k < size; ++k) { |
835 | uint16_t tmp; |
836 | memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); |
837 | uint32_t thiscard = tmp + 1; |
838 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
839 | bool isrun = false; |
840 | if(hasrun) { |
841 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
842 | isbitmap = false; |
843 | isrun = true; |
844 | } |
845 | } |
846 | if (isbitmap) { |
847 | // we check that the read is allowed |
848 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
849 | *readbytes += containersize; |
850 | if(*readbytes > maxbytes) { |
851 | fprintf(stderr, "Running out of bytes while reading a bitset container.\n" ); |
852 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
853 | return false; |
854 | } |
855 | // it is now safe to read |
856 | bitset_container_t *c = bitset_container_create(); |
857 | if(c == NULL) {// memory allocation failure |
858 | fprintf(stderr, "Failed to allocate memory for a bitset container.\n" ); |
859 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
860 | return false; |
861 | } |
862 | answer->size++; |
863 | buf += bitset_container_read(thiscard, c, buf); |
864 | answer->containers[k] = c; |
865 | answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; |
866 | } else if (isrun) { |
867 | // we check that the read is allowed |
868 | *readbytes += sizeof(uint16_t); |
869 | if(*readbytes > maxbytes) { |
870 | fprintf(stderr, "Running out of bytes while reading a run container (header).\n" ); |
871 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
872 | return false; |
873 | } |
874 | uint16_t n_runs; |
875 | memcpy(&n_runs, buf, sizeof(uint16_t)); |
876 | size_t containersize = n_runs * sizeof(rle16_t); |
877 | *readbytes += containersize; |
878 | if(*readbytes > maxbytes) {// data is corrupted? |
879 | fprintf(stderr, "Running out of bytes while reading a run container.\n" ); |
880 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
881 | return false; |
882 | } |
883 | // it is now safe to read |
884 | |
885 | run_container_t *c = run_container_create(); |
886 | if(c == NULL) {// memory allocation failure |
887 | fprintf(stderr, "Failed to allocate memory for a run container.\n" ); |
888 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
889 | return false; |
890 | } |
891 | answer->size++; |
892 | buf += run_container_read(thiscard, c, buf); |
893 | answer->containers[k] = c; |
894 | answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; |
895 | } else { |
896 | // we check that the read is allowed |
897 | size_t containersize = thiscard * sizeof(uint16_t); |
898 | *readbytes += containersize; |
899 | if(*readbytes > maxbytes) {// data is corrupted? |
900 | fprintf(stderr, "Running out of bytes while reading an array container.\n" ); |
901 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
902 | return false; |
903 | } |
904 | // it is now safe to read |
905 | array_container_t *c = |
906 | array_container_create_given_capacity(thiscard); |
907 | if(c == NULL) {// memory allocation failure |
908 | fprintf(stderr, "Failed to allocate memory for an array container.\n" ); |
909 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
910 | return false; |
911 | } |
912 | answer->size++; |
913 | buf += array_container_read(thiscard, c, buf); |
914 | answer->containers[k] = c; |
915 | answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; |
916 | } |
917 | } |
918 | return true; |
919 | } |
920 | |