1 | /* |
2 | * array.h |
3 | * |
4 | */ |
5 | |
6 | #ifndef INCLUDE_CONTAINERS_ARRAY_H_ |
7 | #define INCLUDE_CONTAINERS_ARRAY_H_ |
8 | |
9 | #include <string.h> |
10 | |
11 | #include <roaring/array_util.h> |
12 | #include <roaring/containers/perfparameters.h> |
13 | #include <roaring/portability.h> |
14 | #include <roaring/roaring_types.h> |
15 | |
16 | /* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ |
17 | enum { DEFAULT_MAX_SIZE = 4096 }; |
18 | |
19 | /* struct array_container - sparse representation of a bitmap |
20 | * |
21 | * @cardinality: number of indices in `array` (and the bitmap) |
22 | * @capacity: allocated size of `array` |
23 | * @array: sorted list of integers |
24 | */ |
25 | struct array_container_s { |
26 | int32_t cardinality; |
27 | int32_t capacity; |
28 | uint16_t *array; |
29 | }; |
30 | |
31 | typedef struct array_container_s array_container_t; |
32 | |
33 | /* Create a new array with default. Return NULL in case of failure. See also |
34 | * array_container_create_given_capacity. */ |
35 | array_container_t *array_container_create(void); |
36 | |
37 | /* Create a new array with a specified capacity size. Return NULL in case of |
38 | * failure. */ |
39 | array_container_t *array_container_create_given_capacity(int32_t size); |
40 | |
41 | /* Create a new array containing all values in [min,max). */ |
42 | array_container_t * array_container_create_range(uint32_t min, uint32_t max); |
43 | |
44 | /* |
45 | * Shrink the capacity to the actual size, return the number of bytes saved. |
46 | */ |
47 | int array_container_shrink_to_fit(array_container_t *src); |
48 | |
49 | /* Free memory owned by `array'. */ |
50 | void array_container_free(array_container_t *array); |
51 | |
52 | /* Duplicate container */ |
53 | array_container_t *array_container_clone(const array_container_t *src); |
54 | |
55 | int32_t array_container_serialize(const array_container_t *container, |
56 | char *buf) WARN_UNUSED; |
57 | |
58 | uint32_t array_container_serialization_len(const array_container_t *container); |
59 | |
60 | void *array_container_deserialize(const char *buf, size_t buf_len); |
61 | |
62 | /* Get the cardinality of `array'. */ |
63 | static inline int array_container_cardinality(const array_container_t *array) { |
64 | return array->cardinality; |
65 | } |
66 | |
67 | static inline bool array_container_nonzero_cardinality( |
68 | const array_container_t *array) { |
69 | return array->cardinality > 0; |
70 | } |
71 | |
72 | /* Copy one container into another. We assume that they are distinct. */ |
73 | void array_container_copy(const array_container_t *src, array_container_t *dst); |
74 | |
75 | /* Add all the values in [min,max) (included) at a distance k*step from min. |
76 | The container must have a size less or equal to DEFAULT_MAX_SIZE after this |
77 | addition. */ |
78 | void array_container_add_from_range(array_container_t *arr, uint32_t min, |
79 | uint32_t max, uint16_t step); |
80 | |
81 | /* Set the cardinality to zero (does not release memory). */ |
82 | static inline void array_container_clear(array_container_t *array) { |
83 | array->cardinality = 0; |
84 | } |
85 | |
86 | static inline bool array_container_empty(const array_container_t *array) { |
87 | return array->cardinality == 0; |
88 | } |
89 | |
90 | /* check whether the cardinality is equal to the capacity (this does not mean |
91 | * that it contains 1<<16 elements) */ |
92 | static inline bool array_container_full(const array_container_t *array) { |
93 | return array->cardinality == array->capacity; |
94 | } |
95 | |
96 | |
97 | /* Compute the union of `src_1' and `src_2' and write the result to `dst' |
98 | * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ |
99 | void array_container_union(const array_container_t *src_1, |
100 | const array_container_t *src_2, |
101 | array_container_t *dst); |
102 | |
103 | /* symmetric difference, see array_container_union */ |
104 | void array_container_xor(const array_container_t *array_1, |
105 | const array_container_t *array_2, |
106 | array_container_t *out); |
107 | |
108 | /* Computes the intersection of src_1 and src_2 and write the result to |
109 | * dst. It is assumed that dst is distinct from both src_1 and src_2. */ |
110 | void array_container_intersection(const array_container_t *src_1, |
111 | const array_container_t *src_2, |
112 | array_container_t *dst); |
113 | |
114 | /* Check whether src_1 and src_2 intersect. */ |
115 | bool array_container_intersect(const array_container_t *src_1, |
116 | const array_container_t *src_2); |
117 | |
118 | |
119 | /* computers the size of the intersection between two arrays. |
120 | */ |
121 | int array_container_intersection_cardinality(const array_container_t *src_1, |
122 | const array_container_t *src_2); |
123 | |
124 | /* computes the intersection of array1 and array2 and write the result to |
125 | * array1. |
126 | * */ |
127 | void array_container_intersection_inplace(array_container_t *src_1, |
128 | const array_container_t *src_2); |
129 | |
130 | /* |
131 | * Write out the 16-bit integers contained in this container as a list of 32-bit |
132 | * integers using base |
133 | * as the starting value (it might be expected that base has zeros in its 16 |
134 | * least significant bits). |
135 | * The function returns the number of values written. |
136 | * The caller is responsible for allocating enough memory in out. |
137 | */ |
138 | int array_container_to_uint32_array(void *vout, const array_container_t *cont, |
139 | uint32_t base); |
140 | |
141 | /* Compute the number of runs */ |
142 | int32_t array_container_number_of_runs(const array_container_t *a); |
143 | |
144 | /* |
145 | * Print this container using printf (useful for debugging). |
146 | */ |
147 | void array_container_printf(const array_container_t *v); |
148 | |
149 | /* |
150 | * Print this container using printf as a comma-separated list of 32-bit |
151 | * integers starting at base. |
152 | */ |
153 | void array_container_printf_as_uint32_array(const array_container_t *v, |
154 | uint32_t base); |
155 | |
156 | /** |
157 | * Return the serialized size in bytes of a container having cardinality "card". |
158 | */ |
159 | static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { |
160 | return card * 2 + 2; |
161 | } |
162 | |
163 | /** |
164 | * Increase capacity to at least min. |
165 | * Whether the existing data needs to be copied over depends on the "preserve" |
166 | * parameter. If preserve is false, then the new content will be uninitialized, |
167 | * otherwise the old content is copied. |
168 | */ |
169 | void array_container_grow(array_container_t *container, int32_t min, |
170 | bool preserve); |
171 | |
172 | bool array_container_iterate(const array_container_t *cont, uint32_t base, |
173 | roaring_iterator iterator, void *ptr); |
174 | bool array_container_iterate64(const array_container_t *cont, uint32_t base, |
175 | roaring_iterator64 iterator, uint64_t high_bits, |
176 | void *ptr); |
177 | |
178 | /** |
179 | * Writes the underlying array to buf, outputs how many bytes were written. |
180 | * This is meant to be byte-by-byte compatible with the Java and Go versions of |
181 | * Roaring. |
182 | * The number of bytes written should be |
183 | * array_container_size_in_bytes(container). |
184 | * |
185 | */ |
186 | int32_t array_container_write(const array_container_t *container, char *buf); |
187 | /** |
188 | * Reads the instance from buf, outputs how many bytes were read. |
189 | * This is meant to be byte-by-byte compatible with the Java and Go versions of |
190 | * Roaring. |
191 | * The number of bytes read should be array_container_size_in_bytes(container). |
192 | * You need to provide the (known) cardinality. |
193 | */ |
194 | int32_t array_container_read(int32_t cardinality, array_container_t *container, |
195 | const char *buf); |
196 | |
197 | /** |
198 | * Return the serialized size in bytes of a container (see |
199 | * bitset_container_write) |
200 | * This is meant to be compatible with the Java and Go versions of Roaring and |
201 | * assumes |
202 | * that the cardinality of the container is already known. |
203 | * |
204 | */ |
205 | static inline int32_t array_container_size_in_bytes( |
206 | const array_container_t *container) { |
207 | return container->cardinality * sizeof(uint16_t); |
208 | } |
209 | |
210 | /** |
211 | * Return true if the two arrays have the same content. |
212 | */ |
213 | static inline bool array_container_equals( |
214 | const array_container_t *container1, |
215 | const array_container_t *container2) { |
216 | |
217 | if (container1->cardinality != container2->cardinality) { |
218 | return false; |
219 | } |
220 | return memequals(container1->array, container2->array, container1->cardinality*2); |
221 | } |
222 | |
223 | /** |
224 | * Return true if container1 is a subset of container2. |
225 | */ |
226 | bool array_container_is_subset(const array_container_t *container1, |
227 | const array_container_t *container2); |
228 | |
229 | /** |
230 | * If the element of given rank is in this container, supposing that the first |
231 | * element has rank start_rank, then the function returns true and sets element |
232 | * accordingly. |
233 | * Otherwise, it returns false and update start_rank. |
234 | */ |
235 | static inline bool array_container_select(const array_container_t *container, |
236 | uint32_t *start_rank, uint32_t rank, |
237 | uint32_t *element) { |
238 | int card = array_container_cardinality(container); |
239 | if (*start_rank + card <= rank) { |
240 | *start_rank += card; |
241 | return false; |
242 | } else { |
243 | *element = container->array[rank - *start_rank]; |
244 | return true; |
245 | } |
246 | } |
247 | |
248 | /* Computes the difference of array1 and array2 and write the result |
249 | * to array out. |
250 | * Array out does not need to be distinct from array_1 |
251 | */ |
252 | void array_container_andnot(const array_container_t *array_1, |
253 | const array_container_t *array_2, |
254 | array_container_t *out); |
255 | |
256 | /* Append x to the set. Assumes that the value is larger than any preceding |
257 | * values. */ |
258 | static inline void array_container_append(array_container_t *arr, |
259 | uint16_t pos) { |
260 | const int32_t capacity = arr->capacity; |
261 | |
262 | if (array_container_full(arr)) { |
263 | array_container_grow(arr, capacity + 1, true); |
264 | } |
265 | |
266 | arr->array[arr->cardinality++] = pos; |
267 | } |
268 | |
269 | /** |
270 | * Add value to the set if final cardinality doesn't exceed max_cardinality. |
271 | * Return code: |
272 | * 1 -- value was added |
273 | * 0 -- value was already present |
274 | * -1 -- value was not added because cardinality would exceed max_cardinality |
275 | */ |
276 | static inline int array_container_try_add(array_container_t *arr, uint16_t value, |
277 | int32_t max_cardinality) { |
278 | const int32_t cardinality = arr->cardinality; |
279 | |
280 | // best case, we can append. |
281 | if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && |
282 | cardinality < max_cardinality) { |
283 | array_container_append(arr, value); |
284 | return 1; |
285 | } |
286 | |
287 | const int32_t loc = binarySearch(arr->array, cardinality, value); |
288 | |
289 | if (loc >= 0) { |
290 | return 0; |
291 | } else if (cardinality < max_cardinality) { |
292 | if (array_container_full(arr)) { |
293 | array_container_grow(arr, arr->capacity + 1, true); |
294 | } |
295 | const int32_t insert_idx = -loc - 1; |
296 | memmove(arr->array + insert_idx + 1, arr->array + insert_idx, |
297 | (cardinality - insert_idx) * sizeof(uint16_t)); |
298 | arr->array[insert_idx] = value; |
299 | arr->cardinality++; |
300 | return 1; |
301 | } else { |
302 | return -1; |
303 | } |
304 | } |
305 | |
306 | /* Add value to the set. Returns true if x was not already present. */ |
307 | static inline bool array_container_add(array_container_t *arr, uint16_t value) { |
308 | return array_container_try_add(arr, value, INT32_MAX) == 1; |
309 | } |
310 | |
311 | /* Remove x from the set. Returns true if x was present. */ |
312 | static inline bool array_container_remove(array_container_t *arr, |
313 | uint16_t pos) { |
314 | const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); |
315 | const bool is_present = idx >= 0; |
316 | if (is_present) { |
317 | memmove(arr->array + idx, arr->array + idx + 1, |
318 | (arr->cardinality - idx - 1) * sizeof(uint16_t)); |
319 | arr->cardinality--; |
320 | } |
321 | |
322 | return is_present; |
323 | } |
324 | |
325 | /* Check whether x is present. */ |
326 | inline bool array_container_contains(const array_container_t *arr, |
327 | uint16_t pos) { |
328 | // return binarySearch(arr->array, arr->cardinality, pos) >= 0; |
329 | // binary search with fallback to linear search for short ranges |
330 | int32_t low = 0; |
331 | const uint16_t * carr = (const uint16_t *) arr->array; |
332 | int32_t high = arr->cardinality - 1; |
333 | // while (high - low >= 0) { |
334 | while(high >= low + 16) { |
335 | int32_t middleIndex = (low + high)>>1; |
336 | uint16_t middleValue = carr[middleIndex]; |
337 | if (middleValue < pos) { |
338 | low = middleIndex + 1; |
339 | } else if (middleValue > pos) { |
340 | high = middleIndex - 1; |
341 | } else { |
342 | return true; |
343 | } |
344 | } |
345 | |
346 | for (int i=low; i <= high; i++) { |
347 | uint16_t v = carr[i]; |
348 | if (v == pos) { |
349 | return true; |
350 | } |
351 | if ( v > pos ) return false; |
352 | } |
353 | return false; |
354 | |
355 | } |
356 | |
357 | //* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ |
358 | static inline bool array_container_contains_range(const array_container_t *arr, |
359 | uint32_t range_start, uint32_t range_end) { |
360 | |
361 | const uint16_t rs_included = range_start; |
362 | const uint16_t re_included = range_end - 1; |
363 | |
364 | const uint16_t *carr = (const uint16_t *) arr->array; |
365 | |
366 | const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); |
367 | const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); |
368 | |
369 | return (start < arr->cardinality) && (end < arr->cardinality) |
370 | && (((uint16_t)(end - start)) == re_included - rs_included) |
371 | && (carr[start] == rs_included) && (carr[end] == re_included); |
372 | } |
373 | |
374 | /* Returns the smallest value (assumes not empty) */ |
375 | inline uint16_t array_container_minimum(const array_container_t *arr) { |
376 | if (arr->cardinality == 0) return 0; |
377 | return arr->array[0]; |
378 | } |
379 | |
380 | /* Returns the largest value (assumes not empty) */ |
381 | inline uint16_t array_container_maximum(const array_container_t *arr) { |
382 | if (arr->cardinality == 0) return 0; |
383 | return arr->array[arr->cardinality - 1]; |
384 | } |
385 | |
386 | /* Returns the number of values equal or smaller than x */ |
387 | inline int array_container_rank(const array_container_t *arr, uint16_t x) { |
388 | const int32_t idx = binarySearch(arr->array, arr->cardinality, x); |
389 | const bool is_present = idx >= 0; |
390 | if (is_present) { |
391 | return idx + 1; |
392 | } else { |
393 | return -idx - 1; |
394 | } |
395 | } |
396 | |
397 | /* Returns the index of the first value equal or smaller than x, or -1 */ |
398 | inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { |
399 | const int32_t idx = binarySearch(arr->array, arr->cardinality, x); |
400 | const bool is_present = idx >= 0; |
401 | if (is_present) { |
402 | return idx; |
403 | } else { |
404 | int32_t candidate = - idx - 1; |
405 | if(candidate < arr->cardinality) return candidate; |
406 | return -1; |
407 | } |
408 | } |
409 | |
410 | /* |
411 | * Adds all values in range [min,max] using hint: |
412 | * nvals_less is the number of array values less than $min |
413 | * nvals_greater is the number of array values greater than $max |
414 | */ |
415 | static inline void array_container_add_range_nvals(array_container_t *array, |
416 | uint32_t min, uint32_t max, |
417 | int32_t nvals_less, |
418 | int32_t nvals_greater) { |
419 | int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; |
420 | if (union_cardinality > array->capacity) { |
421 | array_container_grow(array, union_cardinality, true); |
422 | } |
423 | memmove(&(array->array[union_cardinality - nvals_greater]), |
424 | &(array->array[array->cardinality - nvals_greater]), |
425 | nvals_greater * sizeof(uint16_t)); |
426 | for (uint32_t i = 0; i <= max - min; i++) { |
427 | array->array[nvals_less + i] = min + i; |
428 | } |
429 | array->cardinality = union_cardinality; |
430 | } |
431 | |
432 | /** |
433 | * Adds all values in range [min,max]. |
434 | */ |
435 | static inline void array_container_add_range(array_container_t *array, |
436 | uint32_t min, uint32_t max) { |
437 | int32_t nvals_greater = count_greater(array->array, array->cardinality, max); |
438 | int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); |
439 | array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); |
440 | } |
441 | |
442 | /* |
443 | * Removes all elements array[pos] .. array[pos+count-1] |
444 | */ |
445 | static inline void array_container_remove_range(array_container_t *array, |
446 | uint32_t pos, uint32_t count) { |
447 | if (count != 0) { |
448 | memmove(&(array->array[pos]), &(array->array[pos+count]), |
449 | (array->cardinality - pos - count) * sizeof(uint16_t)); |
450 | array->cardinality -= count; |
451 | } |
452 | } |
453 | |
454 | #endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ |
455 | |