1/*
2 * array.c
3 *
4 */
5
6#include <assert.h>
7#include <roaring/containers/array.h>
8#include <stdio.h>
9#include <stdlib.h>
10
11extern inline uint16_t array_container_minimum(const array_container_t *arr);
12extern inline uint16_t array_container_maximum(const array_container_t *arr);
13extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x);
14
15extern inline int array_container_rank(const array_container_t *arr,
16 uint16_t x);
17extern inline bool array_container_contains(const array_container_t *arr,
18 uint16_t pos);
19extern inline int array_container_cardinality(const array_container_t *array);
20extern inline bool array_container_nonzero_cardinality(const array_container_t *array);
21extern inline void array_container_clear(array_container_t *array);
22extern inline int32_t array_container_serialized_size_in_bytes(int32_t card);
23extern inline bool array_container_empty(const array_container_t *array);
24extern inline bool array_container_full(const array_container_t *array);
25
26/* Create a new array with capacity size. Return NULL in case of failure. */
27array_container_t *array_container_create_given_capacity(int32_t size) {
28 array_container_t *container;
29
30 if ((container = (array_container_t *)malloc(sizeof(array_container_t))) ==
31 NULL) {
32 return NULL;
33 }
34
35 if( size <= 0 ) { // we don't want to rely on malloc(0)
36 container->array = NULL;
37 } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) ==
38 NULL) {
39 free(container);
40 return NULL;
41 }
42
43 container->capacity = size;
44 container->cardinality = 0;
45
46 return container;
47}
48
49/* Create a new array. Return NULL in case of failure. */
50array_container_t *array_container_create() {
51 return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
52}
53
54/* Create a new array containing all values in [min,max). */
55array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
56 array_container_t * answer = array_container_create_given_capacity(max - min + 1);
57 if(answer == NULL) return answer;
58 answer->cardinality = 0;
59 for(uint32_t k = min; k < max; k++) {
60 answer->array[answer->cardinality++] = k;
61 }
62 return answer;
63}
64
65/* Duplicate container */
66array_container_t *array_container_clone(const array_container_t *src) {
67 array_container_t *newcontainer =
68 array_container_create_given_capacity(src->capacity);
69 if (newcontainer == NULL) return NULL;
70
71 newcontainer->cardinality = src->cardinality;
72
73 memcpy(newcontainer->array, src->array,
74 src->cardinality * sizeof(uint16_t));
75
76 return newcontainer;
77}
78
79int array_container_shrink_to_fit(array_container_t *src) {
80 if (src->cardinality == src->capacity) return 0; // nothing to do
81 int savings = src->capacity - src->cardinality;
82 src->capacity = src->cardinality;
83 if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
84 free(src->array);
85 src->array = NULL;
86 } else {
87 uint16_t *oldarray = src->array;
88 src->array =
89 (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t));
90 if (src->array == NULL) free(oldarray); // should never happen?
91 }
92 return savings;
93}
94
95/* Free memory. */
96void array_container_free(array_container_t *arr) {
97 if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
98 free(arr->array);
99 arr->array = NULL; // pedantic
100 }
101 free(arr);
102}
103
104static inline int32_t grow_capacity(int32_t capacity) {
105 return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
106 : capacity < 64 ? capacity * 2
107 : capacity < 1024 ? capacity * 3 / 2
108 : capacity * 5 / 4;
109}
110
111static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
112 return ((val < min) ? min : (val > max) ? max : val);
113}
114
115void array_container_grow(array_container_t *container, int32_t min,
116 bool preserve) {
117
118 int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
119 int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
120
121 container->capacity = new_capacity;
122 uint16_t *array = container->array;
123
124 if (preserve) {
125 container->array =
126 (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t));
127 if (container->array == NULL) free(array);
128 } else {
129 // Jon Strabala reports that some tools complain otherwise
130 if (array != NULL) {
131 free(array);
132 }
133 container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t));
134 }
135
136 // handle the case where realloc fails
137 if (container->array == NULL) {
138 fprintf(stderr, "could not allocate memory\n");
139 }
140 assert(container->array != NULL);
141}
142
143/* Copy one container into another. We assume that they are distinct. */
144void array_container_copy(const array_container_t *src,
145 array_container_t *dst) {
146 const int32_t cardinality = src->cardinality;
147 if (cardinality > dst->capacity) {
148 array_container_grow(dst, cardinality, false);
149 }
150
151 dst->cardinality = cardinality;
152 memcpy(dst->array, src->array, cardinality * sizeof(uint16_t));
153}
154
155void array_container_add_from_range(array_container_t *arr, uint32_t min,
156 uint32_t max, uint16_t step) {
157 for (uint32_t value = min; value < max; value += step) {
158 array_container_append(arr, value);
159 }
160}
161
162/* Computes the union of array1 and array2 and write the result to arrayout.
163 * It is assumed that arrayout is distinct from both array1 and array2.
164 */
165void array_container_union(const array_container_t *array_1,
166 const array_container_t *array_2,
167 array_container_t *out) {
168 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
169 const int32_t max_cardinality = card_1 + card_2;
170
171 if (out->capacity < max_cardinality) {
172 array_container_grow(out, max_cardinality, false);
173 }
174 out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1,
175 array_2->array, card_2, out->array);
176
177}
178
179/* Computes the difference of array1 and array2 and write the result
180 * to array out.
181 * Array out does not need to be distinct from array_1
182 */
183void array_container_andnot(const array_container_t *array_1,
184 const array_container_t *array_2,
185 array_container_t *out) {
186 if (out->capacity < array_1->cardinality)
187 array_container_grow(out, array_1->cardinality, false);
188#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
189 if((out != array_1) && (out != array_2)) {
190 out->cardinality =
191 difference_vector16(array_1->array, array_1->cardinality,
192 array_2->array, array_2->cardinality, out->array);
193 } else {
194 out->cardinality =
195 difference_uint16(array_1->array, array_1->cardinality, array_2->array,
196 array_2->cardinality, out->array);
197 }
198#else
199 out->cardinality =
200 difference_uint16(array_1->array, array_1->cardinality, array_2->array,
201 array_2->cardinality, out->array);
202#endif
203}
204
205/* Computes the symmetric difference of array1 and array2 and write the
206 * result
207 * to arrayout.
208 * It is assumed that arrayout is distinct from both array1 and array2.
209 */
210void array_container_xor(const array_container_t *array_1,
211 const array_container_t *array_2,
212 array_container_t *out) {
213 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
214 const int32_t max_cardinality = card_1 + card_2;
215 if (out->capacity < max_cardinality) {
216 array_container_grow(out, max_cardinality, false);
217 }
218
219#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
220 out->cardinality =
221 xor_vector16(array_1->array, array_1->cardinality, array_2->array,
222 array_2->cardinality, out->array);
223#else
224 out->cardinality =
225 xor_uint16(array_1->array, array_1->cardinality, array_2->array,
226 array_2->cardinality, out->array);
227#endif
228}
229
230static inline int32_t minimum_int32(int32_t a, int32_t b) {
231 return (a < b) ? a : b;
232}
233
234/* computes the intersection of array1 and array2 and write the result to
235 * arrayout.
236 * It is assumed that arrayout is distinct from both array1 and array2.
237 * */
238void array_container_intersection(const array_container_t *array1,
239 const array_container_t *array2,
240 array_container_t *out) {
241 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
242 min_card = minimum_int32(card_1, card_2);
243 const int threshold = 64; // subject to tuning
244#ifdef USEAVX
245 if (out->capacity < min_card) {
246 array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
247 false);
248 }
249#else
250 if (out->capacity < min_card) {
251 array_container_grow(out, min_card, false);
252 }
253#endif
254
255 if (card_1 * threshold < card_2) {
256 out->cardinality = intersect_skewed_uint16(
257 array1->array, card_1, array2->array, card_2, out->array);
258 } else if (card_2 * threshold < card_1) {
259 out->cardinality = intersect_skewed_uint16(
260 array2->array, card_2, array1->array, card_1, out->array);
261 } else {
262#ifdef USEAVX
263 out->cardinality = intersect_vector16(
264 array1->array, card_1, array2->array, card_2, out->array);
265#else
266 out->cardinality = intersect_uint16(array1->array, card_1,
267 array2->array, card_2, out->array);
268#endif
269 }
270}
271
272/* computes the size of the intersection of array1 and array2
273 * */
274int array_container_intersection_cardinality(const array_container_t *array1,
275 const array_container_t *array2) {
276 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
277 const int threshold = 64; // subject to tuning
278 if (card_1 * threshold < card_2) {
279 return intersect_skewed_uint16_cardinality(array1->array, card_1,
280 array2->array, card_2);
281 } else if (card_2 * threshold < card_1) {
282 return intersect_skewed_uint16_cardinality(array2->array, card_2,
283 array1->array, card_1);
284 } else {
285#ifdef USEAVX
286 return intersect_vector16_cardinality(array1->array, card_1,
287 array2->array, card_2);
288#else
289 return intersect_uint16_cardinality(array1->array, card_1,
290 array2->array, card_2);
291#endif
292 }
293}
294
295bool array_container_intersect(const array_container_t *array1,
296 const array_container_t *array2) {
297 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
298 const int threshold = 64; // subject to tuning
299 if (card_1 * threshold < card_2) {
300 return intersect_skewed_uint16_nonempty(
301 array1->array, card_1, array2->array, card_2);
302 } else if (card_2 * threshold < card_1) {
303 return intersect_skewed_uint16_nonempty(
304 array2->array, card_2, array1->array, card_1);
305 } else {
306 // we do not bother vectorizing
307 return intersect_uint16_nonempty(array1->array, card_1,
308 array2->array, card_2);
309 }
310}
311
312/* computes the intersection of array1 and array2 and write the result to
313 * array1.
314 * */
315void array_container_intersection_inplace(array_container_t *src_1,
316 const array_container_t *src_2) {
317 // todo: can any of this be vectorized?
318 int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
319 const int threshold = 64; // subject to tuning
320 if (card_1 * threshold < card_2) {
321 src_1->cardinality = intersect_skewed_uint16(
322 src_1->array, card_1, src_2->array, card_2, src_1->array);
323 } else if (card_2 * threshold < card_1) {
324 src_1->cardinality = intersect_skewed_uint16(
325 src_2->array, card_2, src_1->array, card_1, src_1->array);
326 } else {
327 src_1->cardinality = intersect_uint16(
328 src_1->array, card_1, src_2->array, card_2, src_1->array);
329 }
330}
331
332int array_container_to_uint32_array(void *vout, const array_container_t *cont,
333 uint32_t base) {
334 int outpos = 0;
335 uint32_t *out = (uint32_t *)vout;
336 for (int i = 0; i < cont->cardinality; ++i) {
337 const uint32_t val = base + cont->array[i];
338 memcpy(out + outpos, &val,
339 sizeof(uint32_t)); // should be compiled as a MOV on x64
340 outpos++;
341 }
342 return outpos;
343}
344
345void array_container_printf(const array_container_t *v) {
346 if (v->cardinality == 0) {
347 printf("{}");
348 return;
349 }
350 printf("{");
351 printf("%d", v->array[0]);
352 for (int i = 1; i < v->cardinality; ++i) {
353 printf(",%d", v->array[i]);
354 }
355 printf("}");
356}
357
358void array_container_printf_as_uint32_array(const array_container_t *v,
359 uint32_t base) {
360 if (v->cardinality == 0) {
361 return;
362 }
363 printf("%u", v->array[0] + base);
364 for (int i = 1; i < v->cardinality; ++i) {
365 printf(",%u", v->array[i] + base);
366 }
367}
368
369/* Compute the number of runs */
370int32_t array_container_number_of_runs(const array_container_t *a) {
371 // Can SIMD work here?
372 int32_t nr_runs = 0;
373 int32_t prev = -2;
374 for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) {
375 if (*p != prev + 1) nr_runs++;
376 prev = *p;
377 }
378 return nr_runs;
379}
380
381int32_t array_container_serialize(const array_container_t *container, char *buf) {
382 int32_t l, off;
383 uint16_t cardinality = (uint16_t)container->cardinality;
384
385 memcpy(buf, &cardinality, off = sizeof(cardinality));
386 l = sizeof(uint16_t) * container->cardinality;
387 if (l) memcpy(&buf[off], container->array, l);
388
389 return (off + l);
390}
391
392/**
393 * Writes the underlying array to buf, outputs how many bytes were written.
394 * The number of bytes written should be
395 * array_container_size_in_bytes(container).
396 *
397 */
398int32_t array_container_write(const array_container_t *container, char *buf) {
399 memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
400 return array_container_size_in_bytes(container);
401}
402
403bool array_container_is_subset(const array_container_t *container1,
404 const array_container_t *container2) {
405 if (container1->cardinality > container2->cardinality) {
406 return false;
407 }
408 int i1 = 0, i2 = 0;
409 while (i1 < container1->cardinality && i2 < container2->cardinality) {
410 if (container1->array[i1] == container2->array[i2]) {
411 i1++;
412 i2++;
413 } else if (container1->array[i1] > container2->array[i2]) {
414 i2++;
415 } else { // container1->array[i1] < container2->array[i2]
416 return false;
417 }
418 }
419 if (i1 == container1->cardinality) {
420 return true;
421 } else {
422 return false;
423 }
424}
425
426int32_t array_container_read(int32_t cardinality, array_container_t *container,
427 const char *buf) {
428 if (container->capacity < cardinality) {
429 array_container_grow(container, cardinality, false);
430 }
431 container->cardinality = cardinality;
432 memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));
433
434 return array_container_size_in_bytes(container);
435}
436
437uint32_t array_container_serialization_len(const array_container_t *container) {
438 return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ +
439 (sizeof(uint16_t) * container->cardinality));
440}
441
442void *array_container_deserialize(const char *buf, size_t buf_len) {
443 array_container_t *ptr;
444
445 if (buf_len < 2) /* capacity converted to 16 bit */
446 return (NULL);
447 else
448 buf_len -= 2;
449
450 if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) !=
451 NULL) {
452 size_t len;
453 int32_t off;
454 uint16_t cardinality;
455
456 memcpy(&cardinality, buf, off = sizeof(cardinality));
457
458 ptr->capacity = ptr->cardinality = (uint32_t)cardinality;
459 len = sizeof(uint16_t) * ptr->cardinality;
460
461 if (len != buf_len) {
462 free(ptr);
463 return (NULL);
464 }
465
466 if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) *
467 ptr->capacity)) == NULL) {
468 free(ptr);
469 return (NULL);
470 }
471
472 if (len) memcpy(ptr->array, &buf[off], len);
473
474 /* Check if returned values are monotonically increasing */
475 for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) {
476 if (ptr->array[i] < j) {
477 free(ptr->array);
478 free(ptr);
479 return (NULL);
480 } else
481 j = ptr->array[i];
482 }
483 }
484
485 return (ptr);
486}
487
488bool array_container_iterate(const array_container_t *cont, uint32_t base,
489 roaring_iterator iterator, void *ptr) {
490 for (int i = 0; i < cont->cardinality; i++)
491 if (!iterator(cont->array[i] + base, ptr)) return false;
492 return true;
493}
494
495bool array_container_iterate64(const array_container_t *cont, uint32_t base,
496 roaring_iterator64 iterator, uint64_t high_bits,
497 void *ptr) {
498 for (int i = 0; i < cont->cardinality; i++)
499 if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
500 return false;
501 return true;
502}
503