1#include <stdio.h>
2#include <stdlib.h>
3
4#include <roaring/containers/run.h>
5#include <roaring/portability.h>
6
7extern inline uint16_t run_container_minimum(const run_container_t *run);
8extern inline uint16_t run_container_maximum(const run_container_t *run);
9extern inline int32_t interleavedBinarySearch(const rle16_t *array,
10 int32_t lenarray, uint16_t ikey);
11extern inline bool run_container_contains(const run_container_t *run,
12 uint16_t pos);
13extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
14extern inline bool run_container_is_full(const run_container_t *run);
15extern inline bool run_container_nonzero_cardinality(const run_container_t *r);
16extern inline void run_container_clear(run_container_t *run);
17extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
18extern inline run_container_t *run_container_create_range(uint32_t start,
19 uint32_t stop);
20
21bool run_container_add(run_container_t *run, uint16_t pos) {
22 int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
23 if (index >= 0) return false; // already there
24 index = -index - 2; // points to preceding value, possibly -1
25 if (index >= 0) { // possible match
26 int32_t offset = pos - run->runs[index].value;
27 int32_t le = run->runs[index].length;
28 if (offset <= le) return false; // already there
29 if (offset == le + 1) {
30 // we may need to fuse
31 if (index + 1 < run->n_runs) {
32 if (run->runs[index + 1].value == pos + 1) {
33 // indeed fusion is needed
34 run->runs[index].length = run->runs[index + 1].value +
35 run->runs[index + 1].length -
36 run->runs[index].value;
37 recoverRoomAtIndex(run, (uint16_t)(index + 1));
38 return true;
39 }
40 }
41 run->runs[index].length++;
42 return true;
43 }
44 if (index + 1 < run->n_runs) {
45 // we may need to fuse
46 if (run->runs[index + 1].value == pos + 1) {
47 // indeed fusion is needed
48 run->runs[index + 1].value = pos;
49 run->runs[index + 1].length = run->runs[index + 1].length + 1;
50 return true;
51 }
52 }
53 }
54 if (index == -1) {
55 // we may need to extend the first run
56 if (0 < run->n_runs) {
57 if (run->runs[0].value == pos + 1) {
58 run->runs[0].length++;
59 run->runs[0].value--;
60 return true;
61 }
62 }
63 }
64 makeRoomAtIndex(run, (uint16_t)(index + 1));
65 run->runs[index + 1].value = pos;
66 run->runs[index + 1].length = 0;
67 return true;
68}
69
70/* Create a new run container. Return NULL in case of failure. */
71run_container_t *run_container_create_given_capacity(int32_t size) {
72 run_container_t *run;
73 /* Allocate the run container itself. */
74 if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) {
75 return NULL;
76 }
77 if (size <= 0 ) { // we don't want to rely on malloc(0)
78 run->runs = NULL;
79 } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) {
80 free(run);
81 return NULL;
82 }
83 run->capacity = size;
84 run->n_runs = 0;
85 return run;
86}
87
88int run_container_shrink_to_fit(run_container_t *src) {
89 if (src->n_runs == src->capacity) return 0; // nothing to do
90 int savings = src->capacity - src->n_runs;
91 src->capacity = src->n_runs;
92 rle16_t *oldruns = src->runs;
93 src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t));
94 if (src->runs == NULL) free(oldruns); // should never happen?
95 return savings;
96}
97/* Create a new run container. Return NULL in case of failure. */
98run_container_t *run_container_create(void) {
99 return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
100}
101
102run_container_t *run_container_clone(const run_container_t *src) {
103 run_container_t *run = run_container_create_given_capacity(src->capacity);
104 if (run == NULL) return NULL;
105 run->capacity = src->capacity;
106 run->n_runs = src->n_runs;
107 memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
108 return run;
109}
110
111/* Free memory. */
112void run_container_free(run_container_t *run) {
113 if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
114 free(run->runs);
115 run->runs = NULL; // pedantic
116 }
117 free(run);
118}
119
120void run_container_grow(run_container_t *run, int32_t min, bool copy) {
121 int32_t newCapacity =
122 (run->capacity == 0)
123 ? RUN_DEFAULT_INIT_SIZE
124 : run->capacity < 64 ? run->capacity * 2
125 : run->capacity < 1024 ? run->capacity * 3 / 2
126 : run->capacity * 5 / 4;
127 if (newCapacity < min) newCapacity = min;
128 run->capacity = newCapacity;
129 assert(run->capacity >= min);
130 if (copy) {
131 rle16_t *oldruns = run->runs;
132 run->runs =
133 (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t));
134 if (run->runs == NULL) free(oldruns);
135 } else {
136 // Jon Strabala reports that some tools complain otherwise
137 if (run->runs != NULL) {
138 free(run->runs);
139 }
140 run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t));
141 }
142 // handle the case where realloc fails
143 if (run->runs == NULL) {
144 fprintf(stderr, "could not allocate memory\n");
145 }
146 assert(run->runs != NULL);
147}
148
149/* copy one container into another */
150void run_container_copy(const run_container_t *src, run_container_t *dst) {
151 const int32_t n_runs = src->n_runs;
152 if (src->n_runs > dst->capacity) {
153 run_container_grow(dst, n_runs, false);
154 }
155 dst->n_runs = n_runs;
156 memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
157}
158
159/* Compute the union of `src_1' and `src_2' and write the result to `dst'
160 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
161void run_container_union(const run_container_t *src_1,
162 const run_container_t *src_2, run_container_t *dst) {
163 // TODO: this could be a lot more efficient
164
165 // we start out with inexpensive checks
166 const bool if1 = run_container_is_full(src_1);
167 const bool if2 = run_container_is_full(src_2);
168 if (if1 || if2) {
169 if (if1) {
170 run_container_copy(src_1, dst);
171 return;
172 }
173 if (if2) {
174 run_container_copy(src_2, dst);
175 return;
176 }
177 }
178 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
179 if (dst->capacity < neededcapacity)
180 run_container_grow(dst, neededcapacity, false);
181 dst->n_runs = 0;
182 int32_t rlepos = 0;
183 int32_t xrlepos = 0;
184
185 rle16_t previousrle;
186 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
187 previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
188 rlepos++;
189 } else {
190 previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
191 xrlepos++;
192 }
193
194 while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
195 rle16_t newrl;
196 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
197 newrl = src_1->runs[rlepos];
198 rlepos++;
199 } else {
200 newrl = src_2->runs[xrlepos];
201 xrlepos++;
202 }
203 run_container_append(dst, newrl, &previousrle);
204 }
205 while (xrlepos < src_2->n_runs) {
206 run_container_append(dst, src_2->runs[xrlepos], &previousrle);
207 xrlepos++;
208 }
209 while (rlepos < src_1->n_runs) {
210 run_container_append(dst, src_1->runs[rlepos], &previousrle);
211 rlepos++;
212 }
213}
214
215/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
216 */
217void run_container_union_inplace(run_container_t *src_1,
218 const run_container_t *src_2) {
219 // TODO: this could be a lot more efficient
220
221 // we start out with inexpensive checks
222 const bool if1 = run_container_is_full(src_1);
223 const bool if2 = run_container_is_full(src_2);
224 if (if1 || if2) {
225 if (if1) {
226 return;
227 }
228 if (if2) {
229 run_container_copy(src_2, src_1);
230 return;
231 }
232 }
233 // we move the data to the end of the current array
234 const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
235 const int32_t neededcapacity = maxoutput + src_1->n_runs;
236 if (src_1->capacity < neededcapacity)
237 run_container_grow(src_1, neededcapacity, true);
238 memmove(src_1->runs + maxoutput, src_1->runs,
239 src_1->n_runs * sizeof(rle16_t));
240 rle16_t *inputsrc1 = src_1->runs + maxoutput;
241 const int32_t input1nruns = src_1->n_runs;
242 src_1->n_runs = 0;
243 int32_t rlepos = 0;
244 int32_t xrlepos = 0;
245
246 rle16_t previousrle;
247 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
248 previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
249 rlepos++;
250 } else {
251 previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
252 xrlepos++;
253 }
254 while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
255 rle16_t newrl;
256 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
257 newrl = inputsrc1[rlepos];
258 rlepos++;
259 } else {
260 newrl = src_2->runs[xrlepos];
261 xrlepos++;
262 }
263 run_container_append(src_1, newrl, &previousrle);
264 }
265 while (xrlepos < src_2->n_runs) {
266 run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
267 xrlepos++;
268 }
269 while (rlepos < input1nruns) {
270 run_container_append(src_1, inputsrc1[rlepos], &previousrle);
271 rlepos++;
272 }
273}
274
275/* Compute the symmetric difference of `src_1' and `src_2' and write the result
276 * to `dst'
277 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
278void run_container_xor(const run_container_t *src_1,
279 const run_container_t *src_2, run_container_t *dst) {
280 // don't bother to convert xor with full range into negation
281 // since negation is implemented similarly
282
283 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
284 if (dst->capacity < neededcapacity)
285 run_container_grow(dst, neededcapacity, false);
286
287 int32_t pos1 = 0;
288 int32_t pos2 = 0;
289 dst->n_runs = 0;
290
291 while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
292 if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
293 run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
294 src_1->runs[pos1].length);
295 pos1++;
296 } else {
297 run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
298 src_2->runs[pos2].length);
299 pos2++;
300 }
301 }
302 while (pos1 < src_1->n_runs) {
303 run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
304 src_1->runs[pos1].length);
305 pos1++;
306 }
307
308 while (pos2 < src_2->n_runs) {
309 run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
310 src_2->runs[pos2].length);
311 pos2++;
312 }
313}
314
315/* Compute the intersection of src_1 and src_2 and write the result to
316 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
317void run_container_intersection(const run_container_t *src_1,
318 const run_container_t *src_2,
319 run_container_t *dst) {
320 const bool if1 = run_container_is_full(src_1);
321 const bool if2 = run_container_is_full(src_2);
322 if (if1 || if2) {
323 if (if1) {
324 run_container_copy(src_2, dst);
325 return;
326 }
327 if (if2) {
328 run_container_copy(src_1, dst);
329 return;
330 }
331 }
332 // TODO: this could be a lot more efficient, could use SIMD optimizations
333 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
334 if (dst->capacity < neededcapacity)
335 run_container_grow(dst, neededcapacity, false);
336 dst->n_runs = 0;
337 int32_t rlepos = 0;
338 int32_t xrlepos = 0;
339 int32_t start = src_1->runs[rlepos].value;
340 int32_t end = start + src_1->runs[rlepos].length + 1;
341 int32_t xstart = src_2->runs[xrlepos].value;
342 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
343 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
344 if (end <= xstart) {
345 ++rlepos;
346 if (rlepos < src_1->n_runs) {
347 start = src_1->runs[rlepos].value;
348 end = start + src_1->runs[rlepos].length + 1;
349 }
350 } else if (xend <= start) {
351 ++xrlepos;
352 if (xrlepos < src_2->n_runs) {
353 xstart = src_2->runs[xrlepos].value;
354 xend = xstart + src_2->runs[xrlepos].length + 1;
355 }
356 } else { // they overlap
357 const int32_t lateststart = start > xstart ? start : xstart;
358 int32_t earliestend;
359 if (end == xend) { // improbable
360 earliestend = end;
361 rlepos++;
362 xrlepos++;
363 if (rlepos < src_1->n_runs) {
364 start = src_1->runs[rlepos].value;
365 end = start + src_1->runs[rlepos].length + 1;
366 }
367 if (xrlepos < src_2->n_runs) {
368 xstart = src_2->runs[xrlepos].value;
369 xend = xstart + src_2->runs[xrlepos].length + 1;
370 }
371 } else if (end < xend) {
372 earliestend = end;
373 rlepos++;
374 if (rlepos < src_1->n_runs) {
375 start = src_1->runs[rlepos].value;
376 end = start + src_1->runs[rlepos].length + 1;
377 }
378
379 } else { // end > xend
380 earliestend = xend;
381 xrlepos++;
382 if (xrlepos < src_2->n_runs) {
383 xstart = src_2->runs[xrlepos].value;
384 xend = xstart + src_2->runs[xrlepos].length + 1;
385 }
386 }
387 dst->runs[dst->n_runs].value = (uint16_t)lateststart;
388 dst->runs[dst->n_runs].length =
389 (uint16_t)(earliestend - lateststart - 1);
390 dst->n_runs++;
391 }
392 }
393}
394
395/* Compute the size of the intersection of src_1 and src_2 . */
396int run_container_intersection_cardinality(const run_container_t *src_1,
397 const run_container_t *src_2) {
398 const bool if1 = run_container_is_full(src_1);
399 const bool if2 = run_container_is_full(src_2);
400 if (if1 || if2) {
401 if (if1) {
402 return run_container_cardinality(src_2);
403 }
404 if (if2) {
405 return run_container_cardinality(src_1);
406 }
407 }
408 int answer = 0;
409 int32_t rlepos = 0;
410 int32_t xrlepos = 0;
411 int32_t start = src_1->runs[rlepos].value;
412 int32_t end = start + src_1->runs[rlepos].length + 1;
413 int32_t xstart = src_2->runs[xrlepos].value;
414 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
415 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
416 if (end <= xstart) {
417 ++rlepos;
418 if (rlepos < src_1->n_runs) {
419 start = src_1->runs[rlepos].value;
420 end = start + src_1->runs[rlepos].length + 1;
421 }
422 } else if (xend <= start) {
423 ++xrlepos;
424 if (xrlepos < src_2->n_runs) {
425 xstart = src_2->runs[xrlepos].value;
426 xend = xstart + src_2->runs[xrlepos].length + 1;
427 }
428 } else { // they overlap
429 const int32_t lateststart = start > xstart ? start : xstart;
430 int32_t earliestend;
431 if (end == xend) { // improbable
432 earliestend = end;
433 rlepos++;
434 xrlepos++;
435 if (rlepos < src_1->n_runs) {
436 start = src_1->runs[rlepos].value;
437 end = start + src_1->runs[rlepos].length + 1;
438 }
439 if (xrlepos < src_2->n_runs) {
440 xstart = src_2->runs[xrlepos].value;
441 xend = xstart + src_2->runs[xrlepos].length + 1;
442 }
443 } else if (end < xend) {
444 earliestend = end;
445 rlepos++;
446 if (rlepos < src_1->n_runs) {
447 start = src_1->runs[rlepos].value;
448 end = start + src_1->runs[rlepos].length + 1;
449 }
450
451 } else { // end > xend
452 earliestend = xend;
453 xrlepos++;
454 if (xrlepos < src_2->n_runs) {
455 xstart = src_2->runs[xrlepos].value;
456 xend = xstart + src_2->runs[xrlepos].length + 1;
457 }
458 }
459 answer += earliestend - lateststart;
460 }
461 }
462 return answer;
463}
464
465bool run_container_intersect(const run_container_t *src_1,
466 const run_container_t *src_2) {
467 const bool if1 = run_container_is_full(src_1);
468 const bool if2 = run_container_is_full(src_2);
469 if (if1 || if2) {
470 if (if1) {
471 return !run_container_empty(src_2);
472 }
473 if (if2) {
474 return !run_container_empty(src_1);
475 }
476 }
477 int32_t rlepos = 0;
478 int32_t xrlepos = 0;
479 int32_t start = src_1->runs[rlepos].value;
480 int32_t end = start + src_1->runs[rlepos].length + 1;
481 int32_t xstart = src_2->runs[xrlepos].value;
482 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
483 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
484 if (end <= xstart) {
485 ++rlepos;
486 if (rlepos < src_1->n_runs) {
487 start = src_1->runs[rlepos].value;
488 end = start + src_1->runs[rlepos].length + 1;
489 }
490 } else if (xend <= start) {
491 ++xrlepos;
492 if (xrlepos < src_2->n_runs) {
493 xstart = src_2->runs[xrlepos].value;
494 xend = xstart + src_2->runs[xrlepos].length + 1;
495 }
496 } else { // they overlap
497 return true;
498 }
499 }
500 return false;
501}
502
503
504/* Compute the difference of src_1 and src_2 and write the result to
505 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
506void run_container_andnot(const run_container_t *src_1,
507 const run_container_t *src_2, run_container_t *dst) {
508 // following Java implementation as of June 2016
509
510 if (dst->capacity < src_1->n_runs + src_2->n_runs)
511 run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
512
513 dst->n_runs = 0;
514
515 int rlepos1 = 0;
516 int rlepos2 = 0;
517 int32_t start = src_1->runs[rlepos1].value;
518 int32_t end = start + src_1->runs[rlepos1].length + 1;
519 int32_t start2 = src_2->runs[rlepos2].value;
520 int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
521
522 while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
523 if (end <= start2) {
524 // output the first run
525 dst->runs[dst->n_runs++] =
526 (rle16_t){.value = (uint16_t)start,
527 .length = (uint16_t)(end - start - 1)};
528 rlepos1++;
529 if (rlepos1 < src_1->n_runs) {
530 start = src_1->runs[rlepos1].value;
531 end = start + src_1->runs[rlepos1].length + 1;
532 }
533 } else if (end2 <= start) {
534 // exit the second run
535 rlepos2++;
536 if (rlepos2 < src_2->n_runs) {
537 start2 = src_2->runs[rlepos2].value;
538 end2 = start2 + src_2->runs[rlepos2].length + 1;
539 }
540 } else {
541 if (start < start2) {
542 dst->runs[dst->n_runs++] =
543 (rle16_t){.value = (uint16_t)start,
544 .length = (uint16_t)(start2 - start - 1)};
545 }
546 if (end2 < end) {
547 start = end2;
548 } else {
549 rlepos1++;
550 if (rlepos1 < src_1->n_runs) {
551 start = src_1->runs[rlepos1].value;
552 end = start + src_1->runs[rlepos1].length + 1;
553 }
554 }
555 }
556 }
557 if (rlepos1 < src_1->n_runs) {
558 dst->runs[dst->n_runs++] = (rle16_t){
559 .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)};
560 rlepos1++;
561 if (rlepos1 < src_1->n_runs) {
562 memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
563 sizeof(rle16_t) * (src_1->n_runs - rlepos1));
564 dst->n_runs += src_1->n_runs - rlepos1;
565 }
566 }
567}
568
569int run_container_to_uint32_array(void *vout, const run_container_t *cont,
570 uint32_t base) {
571 int outpos = 0;
572 uint32_t *out = (uint32_t *)vout;
573 for (int i = 0; i < cont->n_runs; ++i) {
574 uint32_t run_start = base + cont->runs[i].value;
575 uint16_t le = cont->runs[i].length;
576 for (int j = 0; j <= le; ++j) {
577 uint32_t val = run_start + j;
578 memcpy(out + outpos, &val,
579 sizeof(uint32_t)); // should be compiled as a MOV on x64
580 outpos++;
581 }
582 }
583 return outpos;
584}
585
586/*
587 * Print this container using printf (useful for debugging).
588 */
589void run_container_printf(const run_container_t *cont) {
590 for (int i = 0; i < cont->n_runs; ++i) {
591 uint16_t run_start = cont->runs[i].value;
592 uint16_t le = cont->runs[i].length;
593 printf("[%d,%d]", run_start, run_start + le);
594 }
595}
596
597/*
598 * Print this container using printf as a comma-separated list of 32-bit
599 * integers starting at base.
600 */
601void run_container_printf_as_uint32_array(const run_container_t *cont,
602 uint32_t base) {
603 if (cont->n_runs == 0) return;
604 {
605 uint32_t run_start = base + cont->runs[0].value;
606 uint16_t le = cont->runs[0].length;
607 printf("%u", run_start);
608 for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j);
609 }
610 for (int32_t i = 1; i < cont->n_runs; ++i) {
611 uint32_t run_start = base + cont->runs[i].value;
612 uint16_t le = cont->runs[i].length;
613 for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j);
614 }
615}
616
617int32_t run_container_serialize(const run_container_t *container, char *buf) {
618 int32_t l, off;
619
620 memcpy(buf, &container->n_runs, off = sizeof(container->n_runs));
621 memcpy(&buf[off], &container->capacity, sizeof(container->capacity));
622 off += sizeof(container->capacity);
623
624 l = sizeof(rle16_t) * container->n_runs;
625 memcpy(&buf[off], container->runs, l);
626 return (off + l);
627}
628
629int32_t run_container_write(const run_container_t *container, char *buf) {
630 memcpy(buf, &container->n_runs, sizeof(uint16_t));
631 memcpy(buf + sizeof(uint16_t), container->runs,
632 container->n_runs * sizeof(rle16_t));
633 return run_container_size_in_bytes(container);
634}
635
636int32_t run_container_read(int32_t cardinality, run_container_t *container,
637 const char *buf) {
638 (void)cardinality;
639 memcpy(&container->n_runs, buf, sizeof(uint16_t));
640 if (container->n_runs > container->capacity)
641 run_container_grow(container, container->n_runs, false);
642 if(container->n_runs > 0) {
643 memcpy(container->runs, buf + sizeof(uint16_t),
644 container->n_runs * sizeof(rle16_t));
645 }
646 return run_container_size_in_bytes(container);
647}
648
649uint32_t run_container_serialization_len(const run_container_t *container) {
650 return (sizeof(container->n_runs) + sizeof(container->capacity) +
651 sizeof(rle16_t) * container->n_runs);
652}
653
654void *run_container_deserialize(const char *buf, size_t buf_len) {
655 run_container_t *ptr;
656
657 if (buf_len < 8 /* n_runs + capacity */)
658 return (NULL);
659 else
660 buf_len -= 8;
661
662 if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) {
663 size_t len;
664 int32_t off;
665
666 memcpy(&ptr->n_runs, buf, off = 4);
667 memcpy(&ptr->capacity, &buf[off], 4);
668 off += 4;
669
670 len = sizeof(rle16_t) * ptr->n_runs;
671
672 if (len != buf_len) {
673 free(ptr);
674 return (NULL);
675 }
676
677 if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) {
678 free(ptr);
679 return (NULL);
680 }
681
682 memcpy(ptr->runs, &buf[off], len);
683
684 /* Check if returned values are monotonically increasing */
685 for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) {
686 if (ptr->runs[i].value < j) {
687 free(ptr->runs);
688 free(ptr);
689 return (NULL);
690 } else
691 j = ptr->runs[i].value;
692 }
693 }
694
695 return (ptr);
696}
697
698bool run_container_iterate(const run_container_t *cont, uint32_t base,
699 roaring_iterator iterator, void *ptr) {
700 for (int i = 0; i < cont->n_runs; ++i) {
701 uint32_t run_start = base + cont->runs[i].value;
702 uint16_t le = cont->runs[i].length;
703
704 for (int j = 0; j <= le; ++j)
705 if (!iterator(run_start + j, ptr)) return false;
706 }
707 return true;
708}
709
710bool run_container_iterate64(const run_container_t *cont, uint32_t base,
711 roaring_iterator64 iterator, uint64_t high_bits,
712 void *ptr) {
713 for (int i = 0; i < cont->n_runs; ++i) {
714 uint32_t run_start = base + cont->runs[i].value;
715 uint16_t le = cont->runs[i].length;
716
717 for (int j = 0; j <= le; ++j)
718 if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
719 return false;
720 }
721 return true;
722}
723
724bool run_container_is_subset(const run_container_t *container1,
725 const run_container_t *container2) {
726 int i1 = 0, i2 = 0;
727 while (i1 < container1->n_runs && i2 < container2->n_runs) {
728 int start1 = container1->runs[i1].value;
729 int stop1 = start1 + container1->runs[i1].length;
730 int start2 = container2->runs[i2].value;
731 int stop2 = start2 + container2->runs[i2].length;
732 if (start1 < start2) {
733 return false;
734 } else { // start1 >= start2
735 if (stop1 < stop2) {
736 i1++;
737 } else if (stop1 == stop2) {
738 i1++;
739 i2++;
740 } else { // stop1 > stop2
741 i2++;
742 }
743 }
744 }
745 if (i1 == container1->n_runs) {
746 return true;
747 } else {
748 return false;
749 }
750}
751
752// TODO: write smart_append_exclusive version to match the overloaded 1 param
753// Java version (or is it even used?)
754
755// follows the Java implementation closely
756// length is the rle-value. Ie, run [10,12) uses a length value 1.
757void run_container_smart_append_exclusive(run_container_t *src,
758 const uint16_t start,
759 const uint16_t length) {
760 int old_end;
761 rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
762 rle16_t *appended_last_run = src->runs + src->n_runs;
763
764 if (!src->n_runs ||
765 (start > (old_end = last_run->value + last_run->length + 1))) {
766 *appended_last_run = (rle16_t){.value = start, .length = length};
767 src->n_runs++;
768 return;
769 }
770 if (old_end == start) {
771 // we merge
772 last_run->length += (length + 1);
773 return;
774 }
775 int new_end = start + length + 1;
776
777 if (start == last_run->value) {
778 // wipe out previous
779 if (new_end < old_end) {
780 *last_run = (rle16_t){.value = (uint16_t)new_end,
781 .length = (uint16_t)(old_end - new_end - 1)};
782 return;
783 } else if (new_end > old_end) {
784 *last_run = (rle16_t){.value = (uint16_t)old_end,
785 .length = (uint16_t)(new_end - old_end - 1)};
786 return;
787 } else {
788 src->n_runs--;
789 return;
790 }
791 }
792 last_run->length = start - last_run->value - 1;
793 if (new_end < old_end) {
794 *appended_last_run =
795 (rle16_t){.value = (uint16_t)new_end,
796 .length = (uint16_t)(old_end - new_end - 1)};
797 src->n_runs++;
798 } else if (new_end > old_end) {
799 *appended_last_run =
800 (rle16_t){.value = (uint16_t)old_end,
801 .length = (uint16_t)(new_end - old_end - 1)};
802 src->n_runs++;
803 }
804}
805
806bool run_container_select(const run_container_t *container,
807 uint32_t *start_rank, uint32_t rank,
808 uint32_t *element) {
809 for (int i = 0; i < container->n_runs; i++) {
810 uint16_t length = container->runs[i].length;
811 if (rank <= *start_rank + length) {
812 uint16_t value = container->runs[i].value;
813 *element = value + rank - (*start_rank);
814 return true;
815 } else
816 *start_rank += length + 1;
817 }
818 return false;
819}
820
821int run_container_rank(const run_container_t *container, uint16_t x) {
822 int sum = 0;
823 uint32_t x32 = x;
824 for (int i = 0; i < container->n_runs; i++) {
825 uint32_t startpoint = container->runs[i].value;
826 uint32_t length = container->runs[i].length;
827 uint32_t endpoint = length + startpoint;
828 if (x <= endpoint) {
829 if (x < startpoint) break;
830 return sum + (x32 - startpoint) + 1;
831 } else {
832 sum += length + 1;
833 }
834 }
835 return sum;
836}
837