1/*
2 * mixed_negation.c
3 *
4 */
5
6#include <assert.h>
7#include <string.h>
8
9#include <roaring/array_util.h>
10#include <roaring/bitset_util.h>
11#include <roaring/containers/containers.h>
12#include <roaring/containers/convert.h>
13#include <roaring/containers/mixed_negation.h>
14#include <roaring/containers/run.h>
15
16// TODO: make simplified and optimized negation code across
17// the full range.
18
19/* Negation across the entire range of the container.
20 * Compute the negation of src and write the result
21 * to *dst. The complement of a
22 * sufficiently sparse set will always be dense and a hence a bitmap
23' * We assume that dst is pre-allocated and a valid bitset container
24 * There can be no in-place version.
25 */
26void array_container_negation(const array_container_t *src,
27 bitset_container_t *dst) {
28 uint64_t card = UINT64_C(1 << 16);
29 bitset_container_set_all(dst);
30
31 dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array,
32 (uint64_t)src->cardinality);
33}
34
35/* Negation across the entire range of the container
36 * Compute the negation of src and write the result
37 * to *dst. A true return value indicates a bitset result,
38 * otherwise the result is an array container.
39 * We assume that dst is not pre-allocated. In
40 * case of failure, *dst will be NULL.
41 */
42bool bitset_container_negation(const bitset_container_t *src, void **dst) {
43 return bitset_container_negation_range(src, 0, (1 << 16), dst);
44}
45
46/* inplace version */
47/*
48 * Same as bitset_container_negation except that if the output is to
49 * be a
50 * bitset_container_t, then src is modified and no allocation is made.
51 * If the output is to be an array_container_t, then caller is responsible
52 * to free the container.
53 * In all cases, the result is in *dst.
54 */
55bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) {
56 return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
57}
58
59/* Negation across the entire range of container
60 * Compute the negation of src and write the result
61 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
62 * We assume that dst is not pre-allocated. In
63 * case of failure, *dst will be NULL.
64 */
65int run_container_negation(const run_container_t *src, void **dst) {
66 return run_container_negation_range(src, 0, (1 << 16), dst);
67}
68
69/*
70 * Same as run_container_negation except that if the output is to
71 * be a
72 * run_container_t, and has the capacity to hold the result,
73 * then src is modified and no allocation is made.
74 * In all cases, the result is in *dst.
75 */
76int run_container_negation_inplace(run_container_t *src, void **dst) {
77 return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
78}
79
80/* Negation across a range of the container.
81 * Compute the negation of src and write the result
82 * to *dst. Returns true if the result is a bitset container
83 * and false for an array container. *dst is not preallocated.
84 */
85bool array_container_negation_range(const array_container_t *src,
86 const int range_start, const int range_end,
87 void **dst) {
88 /* close port of the Java implementation */
89 if (range_start >= range_end) {
90 *dst = array_container_clone(src);
91 return false;
92 }
93
94 int32_t start_index =
95 binarySearch(src->array, src->cardinality, (uint16_t)range_start);
96 if (start_index < 0) start_index = -start_index - 1;
97
98 int32_t last_index =
99 binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
100 if (last_index < 0) last_index = -last_index - 2;
101
102 const int32_t current_values_in_range = last_index - start_index + 1;
103 const int32_t span_to_be_flipped = range_end - range_start;
104 const int32_t new_values_in_range =
105 span_to_be_flipped - current_values_in_range;
106 const int32_t cardinality_change =
107 new_values_in_range - current_values_in_range;
108 const int32_t new_cardinality = src->cardinality + cardinality_change;
109
110 if (new_cardinality > DEFAULT_MAX_SIZE) {
111 bitset_container_t *temp = bitset_container_from_array(src);
112 bitset_flip_range(temp->array, (uint32_t)range_start,
113 (uint32_t)range_end);
114 temp->cardinality = new_cardinality;
115 *dst = temp;
116 return true;
117 }
118
119 array_container_t *arr =
120 array_container_create_given_capacity(new_cardinality);
121 *dst = (void *)arr;
122 if(new_cardinality == 0) {
123 arr->cardinality = new_cardinality;
124 return false; // we are done.
125 }
126 // copy stuff before the active area
127 memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
128
129 // work on the range
130 int32_t out_pos = start_index, in_pos = start_index;
131 int32_t val_in_range = range_start;
132 for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
133 if ((uint16_t)val_in_range != src->array[in_pos]) {
134 arr->array[out_pos++] = (uint16_t)val_in_range;
135 } else {
136 ++in_pos;
137 }
138 }
139 for (; val_in_range < range_end; ++val_in_range)
140 arr->array[out_pos++] = (uint16_t)val_in_range;
141
142 // content after the active range
143 memcpy(arr->array + out_pos, src->array + (last_index + 1),
144 (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
145 arr->cardinality = new_cardinality;
146 return false;
147}
148
149/* Even when the result would fit, it is unclear how to make an
150 * inplace version without inefficient copying.
151 */
152
153bool array_container_negation_range_inplace(array_container_t *src,
154 const int range_start,
155 const int range_end, void **dst) {
156 bool ans = array_container_negation_range(src, range_start, range_end, dst);
157 // TODO : try a real inplace version
158 array_container_free(src);
159 return ans;
160}
161
162/* Negation across a range of the container
163 * Compute the negation of src and write the result
164 * to *dst. A true return value indicates a bitset result,
165 * otherwise the result is an array container.
166 * We assume that dst is not pre-allocated. In
167 * case of failure, *dst will be NULL.
168 */
169bool bitset_container_negation_range(const bitset_container_t *src,
170 const int range_start, const int range_end,
171 void **dst) {
172 // TODO maybe consider density-based estimate
173 // and sometimes build result directly as array, with
174 // conversion back to bitset if wrong. Or determine
175 // actual result cardinality, then go directly for the known final cont.
176
177 // keep computation using bitsets as long as possible.
178 bitset_container_t *t = bitset_container_clone(src);
179 bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end);
180 t->cardinality = bitset_container_compute_cardinality(t);
181
182 if (t->cardinality > DEFAULT_MAX_SIZE) {
183 *dst = t;
184 return true;
185 } else {
186 *dst = array_container_from_bitset(t);
187 bitset_container_free(t);
188 return false;
189 }
190}
191
192/* inplace version */
193/*
194 * Same as bitset_container_negation except that if the output is to
195 * be a
196 * bitset_container_t, then src is modified and no allocation is made.
197 * If the output is to be an array_container_t, then caller is responsible
198 * to free the container.
199 * In all cases, the result is in *dst.
200 */
201bool bitset_container_negation_range_inplace(bitset_container_t *src,
202 const int range_start,
203 const int range_end, void **dst) {
204 bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end);
205 src->cardinality = bitset_container_compute_cardinality(src);
206 if (src->cardinality > DEFAULT_MAX_SIZE) {
207 *dst = src;
208 return true;
209 }
210 *dst = array_container_from_bitset(src);
211 bitset_container_free(src);
212 return false;
213}
214
215/* Negation across a range of container
216 * Compute the negation of src and write the result
217 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
218 * We assume that dst is not pre-allocated. In
219 * case of failure, *dst will be NULL.
220 */
221int run_container_negation_range(const run_container_t *src,
222 const int range_start, const int range_end,
223 void **dst) {
224 uint8_t return_typecode;
225
226 // follows the Java implementation
227 if (range_end <= range_start) {
228 *dst = run_container_clone(src);
229 return RUN_CONTAINER_TYPE_CODE;
230 }
231
232 run_container_t *ans = run_container_create_given_capacity(
233 src->n_runs + 1); // src->n_runs + 1);
234 int k = 0;
235 for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
236 ans->runs[k] = src->runs[k];
237 ans->n_runs++;
238 }
239
240 run_container_smart_append_exclusive(
241 ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
242
243 for (; k < src->n_runs; ++k) {
244 run_container_smart_append_exclusive(ans, src->runs[k].value,
245 src->runs[k].length);
246 }
247
248 *dst = convert_run_to_efficient_container(ans, &return_typecode);
249 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
250
251 return return_typecode;
252}
253
254/*
255 * Same as run_container_negation except that if the output is to
256 * be a
257 * run_container_t, and has the capacity to hold the result,
258 * then src is modified and no allocation is made.
259 * In all cases, the result is in *dst.
260 */
261int run_container_negation_range_inplace(run_container_t *src,
262 const int range_start,
263 const int range_end, void **dst) {
264 uint8_t return_typecode;
265
266 if (range_end <= range_start) {
267 *dst = src;
268 return RUN_CONTAINER_TYPE_CODE;
269 }
270
271 // TODO: efficient special case when range is 0 to 65535 inclusive
272
273 if (src->capacity == src->n_runs) {
274 // no excess room. More checking to see if result can fit
275 bool last_val_before_range = false;
276 bool first_val_in_range = false;
277 bool last_val_in_range = false;
278 bool first_val_past_range = false;
279
280 if (range_start > 0)
281 last_val_before_range =
282 run_container_contains(src, (uint16_t)(range_start - 1));
283 first_val_in_range = run_container_contains(src, (uint16_t)range_start);
284
285 if (last_val_before_range == first_val_in_range) {
286 last_val_in_range =
287 run_container_contains(src, (uint16_t)(range_end - 1));
288 if (range_end != 0x10000)
289 first_val_past_range =
290 run_container_contains(src, (uint16_t)range_end);
291
292 if (last_val_in_range ==
293 first_val_past_range) { // no space for inplace
294 int ans = run_container_negation_range(src, range_start,
295 range_end, dst);
296 run_container_free(src);
297 return ans;
298 }
299 }
300 }
301 // all other cases: result will fit
302
303 run_container_t *ans = src;
304 int my_nbr_runs = src->n_runs;
305
306 ans->n_runs = 0;
307 int k = 0;
308 for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
309 // ans->runs[k] = src->runs[k]; (would be self-copy)
310 ans->n_runs++;
311 }
312
313 // as with Java implementation, use locals to give self a buffer of depth 1
314 rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0};
315 rle16_t next = buffered;
316 if (k < my_nbr_runs) buffered = src->runs[k];
317
318 run_container_smart_append_exclusive(
319 ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
320
321 for (; k < my_nbr_runs; ++k) {
322 if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
323
324 run_container_smart_append_exclusive(ans, buffered.value,
325 buffered.length);
326 buffered = next;
327 }
328
329 *dst = convert_run_to_efficient_container(ans, &return_typecode);
330 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
331
332 return return_typecode;
333}
334