1/*
2 * array.h
3 *
4 */
5
6#ifndef INCLUDE_CONTAINERS_ARRAY_H_
7#define INCLUDE_CONTAINERS_ARRAY_H_
8
9#include <string.h>
10
11#include <roaring/array_util.h>
12#include <roaring/containers/perfparameters.h>
13#include <roaring/portability.h>
14#include <roaring/roaring_types.h>
15
16/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */
17enum { DEFAULT_MAX_SIZE = 4096 };
18
19/* struct array_container - sparse representation of a bitmap
20 *
21 * @cardinality: number of indices in `array` (and the bitmap)
22 * @capacity: allocated size of `array`
23 * @array: sorted list of integers
24 */
25struct array_container_s {
26 int32_t cardinality;
27 int32_t capacity;
28 uint16_t *array;
29};
30
31typedef struct array_container_s array_container_t;
32
33/* Create a new array with default. Return NULL in case of failure. See also
34 * array_container_create_given_capacity. */
35array_container_t *array_container_create(void);
36
37/* Create a new array with a specified capacity size. Return NULL in case of
38 * failure. */
39array_container_t *array_container_create_given_capacity(int32_t size);
40
41/* Create a new array containing all values in [min,max). */
42array_container_t * array_container_create_range(uint32_t min, uint32_t max);
43
44/*
45 * Shrink the capacity to the actual size, return the number of bytes saved.
46 */
47int array_container_shrink_to_fit(array_container_t *src);
48
49/* Free memory owned by `array'. */
50void array_container_free(array_container_t *array);
51
52/* Duplicate container */
53array_container_t *array_container_clone(const array_container_t *src);
54
55int32_t array_container_serialize(const array_container_t *container,
56 char *buf) WARN_UNUSED;
57
58uint32_t array_container_serialization_len(const array_container_t *container);
59
60void *array_container_deserialize(const char *buf, size_t buf_len);
61
62/* Get the cardinality of `array'. */
63static inline int array_container_cardinality(const array_container_t *array) {
64 return array->cardinality;
65}
66
67static inline bool array_container_nonzero_cardinality(
68 const array_container_t *array) {
69 return array->cardinality > 0;
70}
71
72/* Copy one container into another. We assume that they are distinct. */
73void array_container_copy(const array_container_t *src, array_container_t *dst);
74
75/* Add all the values in [min,max) (included) at a distance k*step from min.
76 The container must have a size less or equal to DEFAULT_MAX_SIZE after this
77 addition. */
78void array_container_add_from_range(array_container_t *arr, uint32_t min,
79 uint32_t max, uint16_t step);
80
81/* Set the cardinality to zero (does not release memory). */
82static inline void array_container_clear(array_container_t *array) {
83 array->cardinality = 0;
84}
85
86static inline bool array_container_empty(const array_container_t *array) {
87 return array->cardinality == 0;
88}
89
90/* check whether the cardinality is equal to the capacity (this does not mean
91* that it contains 1<<16 elements) */
92static inline bool array_container_full(const array_container_t *array) {
93 return array->cardinality == array->capacity;
94}
95
96
97/* Compute the union of `src_1' and `src_2' and write the result to `dst'
98 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
99void array_container_union(const array_container_t *src_1,
100 const array_container_t *src_2,
101 array_container_t *dst);
102
103/* symmetric difference, see array_container_union */
104void array_container_xor(const array_container_t *array_1,
105 const array_container_t *array_2,
106 array_container_t *out);
107
108/* Computes the intersection of src_1 and src_2 and write the result to
109 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
110void array_container_intersection(const array_container_t *src_1,
111 const array_container_t *src_2,
112 array_container_t *dst);
113
114/* Check whether src_1 and src_2 intersect. */
115bool array_container_intersect(const array_container_t *src_1,
116 const array_container_t *src_2);
117
118
119/* computers the size of the intersection between two arrays.
120 */
121int array_container_intersection_cardinality(const array_container_t *src_1,
122 const array_container_t *src_2);
123
124/* computes the intersection of array1 and array2 and write the result to
125 * array1.
126 * */
127void array_container_intersection_inplace(array_container_t *src_1,
128 const array_container_t *src_2);
129
130/*
131 * Write out the 16-bit integers contained in this container as a list of 32-bit
132 * integers using base
133 * as the starting value (it might be expected that base has zeros in its 16
134 * least significant bits).
135 * The function returns the number of values written.
136 * The caller is responsible for allocating enough memory in out.
137 */
138int array_container_to_uint32_array(void *vout, const array_container_t *cont,
139 uint32_t base);
140
141/* Compute the number of runs */
142int32_t array_container_number_of_runs(const array_container_t *a);
143
144/*
145 * Print this container using printf (useful for debugging).
146 */
147void array_container_printf(const array_container_t *v);
148
149/*
150 * Print this container using printf as a comma-separated list of 32-bit
151 * integers starting at base.
152 */
153void array_container_printf_as_uint32_array(const array_container_t *v,
154 uint32_t base);
155
156/**
157 * Return the serialized size in bytes of a container having cardinality "card".
158 */
159static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
160 return card * 2 + 2;
161}
162
163/**
164 * Increase capacity to at least min.
165 * Whether the existing data needs to be copied over depends on the "preserve"
166 * parameter. If preserve is false, then the new content will be uninitialized,
167 * otherwise the old content is copied.
168 */
169void array_container_grow(array_container_t *container, int32_t min,
170 bool preserve);
171
172bool array_container_iterate(const array_container_t *cont, uint32_t base,
173 roaring_iterator iterator, void *ptr);
174bool array_container_iterate64(const array_container_t *cont, uint32_t base,
175 roaring_iterator64 iterator, uint64_t high_bits,
176 void *ptr);
177
178/**
179 * Writes the underlying array to buf, outputs how many bytes were written.
180 * This is meant to be byte-by-byte compatible with the Java and Go versions of
181 * Roaring.
182 * The number of bytes written should be
183 * array_container_size_in_bytes(container).
184 *
185 */
186int32_t array_container_write(const array_container_t *container, char *buf);
187/**
188 * Reads the instance from buf, outputs how many bytes were read.
189 * This is meant to be byte-by-byte compatible with the Java and Go versions of
190 * Roaring.
191 * The number of bytes read should be array_container_size_in_bytes(container).
192 * You need to provide the (known) cardinality.
193 */
194int32_t array_container_read(int32_t cardinality, array_container_t *container,
195 const char *buf);
196
197/**
198 * Return the serialized size in bytes of a container (see
199 * bitset_container_write)
200 * This is meant to be compatible with the Java and Go versions of Roaring and
201 * assumes
202 * that the cardinality of the container is already known.
203 *
204 */
205static inline int32_t array_container_size_in_bytes(
206 const array_container_t *container) {
207 return container->cardinality * sizeof(uint16_t);
208}
209
210/**
211 * Return true if the two arrays have the same content.
212 */
213static inline bool array_container_equals(
214 const array_container_t *container1,
215 const array_container_t *container2) {
216
217 if (container1->cardinality != container2->cardinality) {
218 return false;
219 }
220 return memequals(container1->array, container2->array, container1->cardinality*2);
221}
222
223/**
224 * Return true if container1 is a subset of container2.
225 */
226bool array_container_is_subset(const array_container_t *container1,
227 const array_container_t *container2);
228
229/**
230 * If the element of given rank is in this container, supposing that the first
231 * element has rank start_rank, then the function returns true and sets element
232 * accordingly.
233 * Otherwise, it returns false and update start_rank.
234 */
235static inline bool array_container_select(const array_container_t *container,
236 uint32_t *start_rank, uint32_t rank,
237 uint32_t *element) {
238 int card = array_container_cardinality(container);
239 if (*start_rank + card <= rank) {
240 *start_rank += card;
241 return false;
242 } else {
243 *element = container->array[rank - *start_rank];
244 return true;
245 }
246}
247
248/* Computes the difference of array1 and array2 and write the result
249 * to array out.
250 * Array out does not need to be distinct from array_1
251 */
252void array_container_andnot(const array_container_t *array_1,
253 const array_container_t *array_2,
254 array_container_t *out);
255
256/* Append x to the set. Assumes that the value is larger than any preceding
257 * values. */
258static inline void array_container_append(array_container_t *arr,
259 uint16_t pos) {
260 const int32_t capacity = arr->capacity;
261
262 if (array_container_full(arr)) {
263 array_container_grow(arr, capacity + 1, true);
264 }
265
266 arr->array[arr->cardinality++] = pos;
267}
268
269/**
270 * Add value to the set if final cardinality doesn't exceed max_cardinality.
271 * Return code:
272 * 1 -- value was added
273 * 0 -- value was already present
274 * -1 -- value was not added because cardinality would exceed max_cardinality
275 */
276static inline int array_container_try_add(array_container_t *arr, uint16_t value,
277 int32_t max_cardinality) {
278 const int32_t cardinality = arr->cardinality;
279
280 // best case, we can append.
281 if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) &&
282 cardinality < max_cardinality) {
283 array_container_append(arr, value);
284 return 1;
285 }
286
287 const int32_t loc = binarySearch(arr->array, cardinality, value);
288
289 if (loc >= 0) {
290 return 0;
291 } else if (cardinality < max_cardinality) {
292 if (array_container_full(arr)) {
293 array_container_grow(arr, arr->capacity + 1, true);
294 }
295 const int32_t insert_idx = -loc - 1;
296 memmove(arr->array + insert_idx + 1, arr->array + insert_idx,
297 (cardinality - insert_idx) * sizeof(uint16_t));
298 arr->array[insert_idx] = value;
299 arr->cardinality++;
300 return 1;
301 } else {
302 return -1;
303 }
304}
305
306/* Add value to the set. Returns true if x was not already present. */
307static inline bool array_container_add(array_container_t *arr, uint16_t value) {
308 return array_container_try_add(arr, value, INT32_MAX) == 1;
309}
310
311/* Remove x from the set. Returns true if x was present. */
312static inline bool array_container_remove(array_container_t *arr,
313 uint16_t pos) {
314 const int32_t idx = binarySearch(arr->array, arr->cardinality, pos);
315 const bool is_present = idx >= 0;
316 if (is_present) {
317 memmove(arr->array + idx, arr->array + idx + 1,
318 (arr->cardinality - idx - 1) * sizeof(uint16_t));
319 arr->cardinality--;
320 }
321
322 return is_present;
323}
324
325/* Check whether x is present. */
326inline bool array_container_contains(const array_container_t *arr,
327 uint16_t pos) {
328 // return binarySearch(arr->array, arr->cardinality, pos) >= 0;
329 // binary search with fallback to linear search for short ranges
330 int32_t low = 0;
331 const uint16_t * carr = (const uint16_t *) arr->array;
332 int32_t high = arr->cardinality - 1;
333 // while (high - low >= 0) {
334 while(high >= low + 16) {
335 int32_t middleIndex = (low + high)>>1;
336 uint16_t middleValue = carr[middleIndex];
337 if (middleValue < pos) {
338 low = middleIndex + 1;
339 } else if (middleValue > pos) {
340 high = middleIndex - 1;
341 } else {
342 return true;
343 }
344 }
345
346 for (int i=low; i <= high; i++) {
347 uint16_t v = carr[i];
348 if (v == pos) {
349 return true;
350 }
351 if ( v > pos ) return false;
352 }
353 return false;
354
355}
356
357//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
358static inline bool array_container_contains_range(const array_container_t *arr,
359 uint32_t range_start, uint32_t range_end) {
360
361 const uint16_t rs_included = range_start;
362 const uint16_t re_included = range_end - 1;
363
364 const uint16_t *carr = (const uint16_t *) arr->array;
365
366 const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included);
367 const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included);
368
369 return (start < arr->cardinality) && (end < arr->cardinality)
370 && (((uint16_t)(end - start)) == re_included - rs_included)
371 && (carr[start] == rs_included) && (carr[end] == re_included);
372}
373
374/* Returns the smallest value (assumes not empty) */
375inline uint16_t array_container_minimum(const array_container_t *arr) {
376 if (arr->cardinality == 0) return 0;
377 return arr->array[0];
378}
379
380/* Returns the largest value (assumes not empty) */
381inline uint16_t array_container_maximum(const array_container_t *arr) {
382 if (arr->cardinality == 0) return 0;
383 return arr->array[arr->cardinality - 1];
384}
385
386/* Returns the number of values equal or smaller than x */
387inline int array_container_rank(const array_container_t *arr, uint16_t x) {
388 const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
389 const bool is_present = idx >= 0;
390 if (is_present) {
391 return idx + 1;
392 } else {
393 return -idx - 1;
394 }
395}
396
397/* Returns the index of the first value equal or smaller than x, or -1 */
398inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
399 const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
400 const bool is_present = idx >= 0;
401 if (is_present) {
402 return idx;
403 } else {
404 int32_t candidate = - idx - 1;
405 if(candidate < arr->cardinality) return candidate;
406 return -1;
407 }
408}
409
410/*
411 * Adds all values in range [min,max] using hint:
412 * nvals_less is the number of array values less than $min
413 * nvals_greater is the number of array values greater than $max
414 */
415static inline void array_container_add_range_nvals(array_container_t *array,
416 uint32_t min, uint32_t max,
417 int32_t nvals_less,
418 int32_t nvals_greater) {
419 int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
420 if (union_cardinality > array->capacity) {
421 array_container_grow(array, union_cardinality, true);
422 }
423 memmove(&(array->array[union_cardinality - nvals_greater]),
424 &(array->array[array->cardinality - nvals_greater]),
425 nvals_greater * sizeof(uint16_t));
426 for (uint32_t i = 0; i <= max - min; i++) {
427 array->array[nvals_less + i] = min + i;
428 }
429 array->cardinality = union_cardinality;
430}
431
432/**
433 * Adds all values in range [min,max].
434 */
435static inline void array_container_add_range(array_container_t *array,
436 uint32_t min, uint32_t max) {
437 int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
438 int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
439 array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
440}
441
442/*
443 * Removes all elements array[pos] .. array[pos+count-1]
444 */
445static inline void array_container_remove_range(array_container_t *array,
446 uint32_t pos, uint32_t count) {
447 if (count != 0) {
448 memmove(&(array->array[pos]), &(array->array[pos+count]),
449 (array->cardinality - pos - count) * sizeof(uint16_t));
450 array->cardinality -= count;
451 }
452}
453
454#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */
455