1/*
2 * mixed_andnot.c. More methods since operation is not symmetric,
3 * except no "wide" andnot , so no lazy options motivated.
4 */
5
6#include <assert.h>
7#include <string.h>
8
9#include <roaring/array_util.h>
10#include <roaring/bitset_util.h>
11#include <roaring/containers/containers.h>
12#include <roaring/containers/convert.h>
13#include <roaring/containers/mixed_andnot.h>
14#include <roaring/containers/perfparameters.h>
15
16/* Compute the andnot of src_1 and src_2 and write the result to
17 * dst, a valid array container that could be the same as dst.*/
18void array_bitset_container_andnot(const array_container_t *src_1,
19 const bitset_container_t *src_2,
20 array_container_t *dst) {
21 // follows Java implementation as of June 2016
22 if (dst->capacity < src_1->cardinality) {
23 array_container_grow(dst, src_1->cardinality, false);
24 }
25 int32_t newcard = 0;
26 const int32_t origcard = src_1->cardinality;
27 for (int i = 0; i < origcard; ++i) {
28 uint16_t key = src_1->array[i];
29 dst->array[newcard] = key;
30 newcard += 1 - bitset_container_contains(src_2, key);
31 }
32 dst->cardinality = newcard;
33}
34
35/* Compute the andnot of src_1 and src_2 and write the result to
36 * src_1 */
37
38void array_bitset_container_iandnot(array_container_t *src_1,
39 const bitset_container_t *src_2) {
40 array_bitset_container_andnot(src_1, src_2, src_1);
41}
42
43/* Compute the andnot of src_1 and src_2 and write the result to
44 * dst, which does not initially have a valid container.
45 * Return true for a bitset result; false for array
46 */
47
48bool bitset_array_container_andnot(const bitset_container_t *src_1,
49 const array_container_t *src_2, void **dst) {
50 // Java did this directly, but we have option of asm or avx
51 bitset_container_t *result = bitset_container_create();
52 bitset_container_copy(src_1, result);
53 result->cardinality =
54 (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality,
55 src_2->array, (uint64_t)src_2->cardinality);
56
57 // do required type conversions.
58 if (result->cardinality <= DEFAULT_MAX_SIZE) {
59 *dst = array_container_from_bitset(result);
60 bitset_container_free(result);
61 return false;
62 }
63 *dst = result;
64 return true;
65}
66
67/* Compute the andnot of src_1 and src_2 and write the result to
68 * dst (which has no container initially). It will modify src_1
69 * to be dst if the result is a bitset. Otherwise, it will
70 * free src_1 and dst will be a new array container. In both
71 * cases, the caller is responsible for deallocating dst.
72 * Returns true iff dst is a bitset */
73
74bool bitset_array_container_iandnot(bitset_container_t *src_1,
75 const array_container_t *src_2,
76 void **dst) {
77 *dst = src_1;
78 src_1->cardinality =
79 (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality,
80 src_2->array, (uint64_t)src_2->cardinality);
81
82 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
83 *dst = array_container_from_bitset(src_1);
84 bitset_container_free(src_1);
85 return false; // not bitset
86 } else
87 return true;
88}
89
90/* Compute the andnot of src_1 and src_2 and write the result to
91 * dst. Result may be either a bitset or an array container
92 * (returns "result is bitset"). dst does not initially have
93 * any container, but becomes either a bitset container (return
94 * result true) or an array container.
95 */
96
97bool run_bitset_container_andnot(const run_container_t *src_1,
98 const bitset_container_t *src_2, void **dst) {
99 // follows the Java implementation as of June 2016
100 int card = run_container_cardinality(src_1);
101 if (card <= DEFAULT_MAX_SIZE) {
102 // must be an array
103 array_container_t *answer = array_container_create_given_capacity(card);
104 answer->cardinality = 0;
105 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
106 rle16_t rle = src_1->runs[rlepos];
107 for (int run_value = rle.value; run_value <= rle.value + rle.length;
108 ++run_value) {
109 if (!bitset_container_get(src_2, (uint16_t)run_value)) {
110 answer->array[answer->cardinality++] = (uint16_t)run_value;
111 }
112 }
113 }
114 *dst = answer;
115 return false;
116 } else { // we guess it will be a bitset, though have to check guess when
117 // done
118 bitset_container_t *answer = bitset_container_clone(src_2);
119
120 uint32_t last_pos = 0;
121 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
122 rle16_t rle = src_1->runs[rlepos];
123
124 uint32_t start = rle.value;
125 uint32_t end = start + rle.length + 1;
126 bitset_reset_range(answer->array, last_pos, start);
127 bitset_flip_range(answer->array, start, end);
128 last_pos = end;
129 }
130 bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16));
131
132 answer->cardinality = bitset_container_compute_cardinality(answer);
133
134 if (answer->cardinality <= DEFAULT_MAX_SIZE) {
135 *dst = array_container_from_bitset(answer);
136 bitset_container_free(answer);
137 return false; // not bitset
138 }
139 *dst = answer;
140 return true; // bitset
141 }
142}
143
144/* Compute the andnot of src_1 and src_2 and write the result to
145 * dst. Result may be either a bitset or an array container
146 * (returns "result is bitset"). dst does not initially have
147 * any container, but becomes either a bitset container (return
148 * result true) or an array container.
149 */
150
151bool run_bitset_container_iandnot(run_container_t *src_1,
152 const bitset_container_t *src_2, void **dst) {
153 // dummy implementation
154 bool ans = run_bitset_container_andnot(src_1, src_2, dst);
155 run_container_free(src_1);
156 return ans;
157}
158
159/* Compute the andnot of src_1 and src_2 and write the result to
160 * dst. Result may be either a bitset or an array container
161 * (returns "result is bitset"). dst does not initially have
162 * any container, but becomes either a bitset container (return
163 * result true) or an array container.
164 */
165
166bool bitset_run_container_andnot(const bitset_container_t *src_1,
167 const run_container_t *src_2, void **dst) {
168 // follows Java implementation
169 bitset_container_t *result = bitset_container_create();
170
171 bitset_container_copy(src_1, result);
172 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
173 rle16_t rle = src_2->runs[rlepos];
174 bitset_reset_range(result->array, rle.value,
175 rle.value + rle.length + UINT32_C(1));
176 }
177 result->cardinality = bitset_container_compute_cardinality(result);
178
179 if (result->cardinality <= DEFAULT_MAX_SIZE) {
180 *dst = array_container_from_bitset(result);
181 bitset_container_free(result);
182 return false; // not bitset
183 }
184 *dst = result;
185 return true; // bitset
186}
187
188/* Compute the andnot of src_1 and src_2 and write the result to
189 * dst (which has no container initially). It will modify src_1
190 * to be dst if the result is a bitset. Otherwise, it will
191 * free src_1 and dst will be a new array container. In both
192 * cases, the caller is responsible for deallocating dst.
193 * Returns true iff dst is a bitset */
194
195bool bitset_run_container_iandnot(bitset_container_t *src_1,
196 const run_container_t *src_2, void **dst) {
197 *dst = src_1;
198
199 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
200 rle16_t rle = src_2->runs[rlepos];
201 bitset_reset_range(src_1->array, rle.value,
202 rle.value + rle.length + UINT32_C(1));
203 }
204 src_1->cardinality = bitset_container_compute_cardinality(src_1);
205
206 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
207 *dst = array_container_from_bitset(src_1);
208 bitset_container_free(src_1);
209 return false; // not bitset
210 } else
211 return true;
212}
213
214/* helper. a_out must be a valid array container with adequate capacity.
215 * Returns the cardinality of the output container. Partly Based on Java
216 * implementation Util.unsignedDifference.
217 *
218 * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper
219 * to avoid advanceUntil?
220 */
221
222static int run_array_array_subtract(const run_container_t *r,
223 const array_container_t *a_in,
224 array_container_t *a_out) {
225 int out_card = 0;
226 int32_t in_array_pos =
227 -1; // since advanceUntil always assumes we start the search AFTER this
228
229 for (int rlepos = 0; rlepos < r->n_runs; rlepos++) {
230 int32_t start = r->runs[rlepos].value;
231 int32_t end = start + r->runs[rlepos].length + 1;
232
233 in_array_pos = advanceUntil(a_in->array, in_array_pos,
234 a_in->cardinality, (uint16_t)start);
235
236 if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
237 for (int32_t i = start; i < end; ++i)
238 a_out->array[out_card++] = (uint16_t)i;
239 } else {
240 uint16_t next_nonincluded = a_in->array[in_array_pos];
241 if (next_nonincluded >= end) {
242 // another case when run goes unaltered
243 for (int32_t i = start; i < end; ++i)
244 a_out->array[out_card++] = (uint16_t)i;
245 in_array_pos--; // ensure we see this item again if necessary
246 } else {
247 for (int32_t i = start; i < end; ++i)
248 if (i != next_nonincluded)
249 a_out->array[out_card++] = (uint16_t)i;
250 else // 0 should ensure we don't match
251 next_nonincluded =
252 (in_array_pos + 1 >= a_in->cardinality)
253 ? 0
254 : a_in->array[++in_array_pos];
255 in_array_pos--; // see again
256 }
257 }
258 }
259 return out_card;
260}
261
262/* dst does not indicate a valid container initially. Eventually it
263 * can become any type of container.
264 */
265
266int run_array_container_andnot(const run_container_t *src_1,
267 const array_container_t *src_2, void **dst) {
268 // follows the Java impl as of June 2016
269
270 int card = run_container_cardinality(src_1);
271 const int arbitrary_threshold = 32;
272
273 if (card <= arbitrary_threshold) {
274 if (src_2->cardinality == 0) {
275 *dst = run_container_clone(src_1);
276 return RUN_CONTAINER_TYPE_CODE;
277 }
278 // Java's "lazyandNot.toEfficientContainer" thing
279 run_container_t *answer = run_container_create_given_capacity(
280 card + array_container_cardinality(src_2));
281
282 int rlepos = 0;
283 int xrlepos = 0; // "x" is src_2
284 rle16_t rle = src_1->runs[rlepos];
285 int32_t start = rle.value;
286 int32_t end = start + rle.length + 1;
287 int32_t xstart = src_2->array[xrlepos];
288
289 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
290 if (end <= xstart) {
291 // output the first run
292 answer->runs[answer->n_runs++] =
293 (rle16_t){.value = (uint16_t)start,
294 .length = (uint16_t)(end - start - 1)};
295 rlepos++;
296 if (rlepos < src_1->n_runs) {
297 start = src_1->runs[rlepos].value;
298 end = start + src_1->runs[rlepos].length + 1;
299 }
300 } else if (xstart + 1 <= start) {
301 // exit the second run
302 xrlepos++;
303 if (xrlepos < src_2->cardinality) {
304 xstart = src_2->array[xrlepos];
305 }
306 } else {
307 if (start < xstart) {
308 answer->runs[answer->n_runs++] =
309 (rle16_t){.value = (uint16_t)start,
310 .length = (uint16_t)(xstart - start - 1)};
311 }
312 if (xstart + 1 < end) {
313 start = xstart + 1;
314 } else {
315 rlepos++;
316 if (rlepos < src_1->n_runs) {
317 start = src_1->runs[rlepos].value;
318 end = start + src_1->runs[rlepos].length + 1;
319 }
320 }
321 }
322 }
323 if (rlepos < src_1->n_runs) {
324 answer->runs[answer->n_runs++] =
325 (rle16_t){.value = (uint16_t)start,
326 .length = (uint16_t)(end - start - 1)};
327 rlepos++;
328 if (rlepos < src_1->n_runs) {
329 memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
330 (src_1->n_runs - rlepos) * sizeof(rle16_t));
331 answer->n_runs += (src_1->n_runs - rlepos);
332 }
333 }
334 uint8_t return_type;
335 *dst = convert_run_to_efficient_container(answer, &return_type);
336 if (answer != *dst) run_container_free(answer);
337 return return_type;
338 }
339 // else it's a bitmap or array
340
341 if (card <= DEFAULT_MAX_SIZE) {
342 array_container_t *ac = array_container_create_given_capacity(card);
343 // nb Java code used a generic iterator-based merge to compute
344 // difference
345 ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
346 *dst = ac;
347 return ARRAY_CONTAINER_TYPE_CODE;
348 }
349 bitset_container_t *ans = bitset_container_from_run(src_1);
350 bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
351 return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE
352 : ARRAY_CONTAINER_TYPE_CODE);
353}
354
355/* Compute the andnot of src_1 and src_2 and write the result to
356 * dst (which has no container initially). It will modify src_1
357 * to be dst if the result is a bitset. Otherwise, it will
358 * free src_1 and dst will be a new array container. In both
359 * cases, the caller is responsible for deallocating dst.
360 * Returns true iff dst is a bitset */
361
362int run_array_container_iandnot(run_container_t *src_1,
363 const array_container_t *src_2, void **dst) {
364 // dummy implementation same as June 2016 Java
365 int ans = run_array_container_andnot(src_1, src_2, dst);
366 run_container_free(src_1);
367 return ans;
368}
369
370/* dst must be a valid array container, allowed to be src_1 */
371
372void array_run_container_andnot(const array_container_t *src_1,
373 const run_container_t *src_2,
374 array_container_t *dst) {
375 // basically following Java impl as of June 2016
376 if (src_1->cardinality > dst->capacity) {
377 array_container_grow(dst, src_1->cardinality, false);
378 }
379
380 if (src_2->n_runs == 0) {
381 memmove(dst->array, src_1->array,
382 sizeof(uint16_t) * src_1->cardinality);
383 dst->cardinality = src_1->cardinality;
384 return;
385 }
386 int32_t run_start = src_2->runs[0].value;
387 int32_t run_end = run_start + src_2->runs[0].length;
388 int which_run = 0;
389
390 uint16_t val = 0;
391 int dest_card = 0;
392 for (int i = 0; i < src_1->cardinality; ++i) {
393 val = src_1->array[i];
394 if (val < run_start)
395 dst->array[dest_card++] = val;
396 else if (val <= run_end) {
397 ; // omitted item
398 } else {
399 do {
400 if (which_run + 1 < src_2->n_runs) {
401 ++which_run;
402 run_start = src_2->runs[which_run].value;
403 run_end = run_start + src_2->runs[which_run].length;
404
405 } else
406 run_start = run_end = (1 << 16) + 1;
407 } while (val > run_end);
408 --i;
409 }
410 }
411 dst->cardinality = dest_card;
412}
413
414/* dst does not indicate a valid container initially. Eventually it
415 * can become any kind of container.
416 */
417
418void array_run_container_iandnot(array_container_t *src_1,
419 const run_container_t *src_2) {
420 array_run_container_andnot(src_1, src_2, src_1);
421}
422
423/* dst does not indicate a valid container initially. Eventually it
424 * can become any kind of container.
425 */
426
427int run_run_container_andnot(const run_container_t *src_1,
428 const run_container_t *src_2, void **dst) {
429 run_container_t *ans = run_container_create();
430 run_container_andnot(src_1, src_2, ans);
431 uint8_t typecode_after;
432 *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
433 return typecode_after;
434}
435
436/* Compute the andnot of src_1 and src_2 and write the result to
437 * dst (which has no container initially). It will modify src_1
438 * to be dst if the result is a bitset. Otherwise, it will
439 * free src_1 and dst will be a new array container. In both
440 * cases, the caller is responsible for deallocating dst.
441 * Returns true iff dst is a bitset */
442
443int run_run_container_iandnot(run_container_t *src_1,
444 const run_container_t *src_2, void **dst) {
445 // following Java impl as of June 2016 (dummy)
446 int ans = run_run_container_andnot(src_1, src_2, dst);
447 run_container_free(src_1);
448 return ans;
449}
450
451/*
452 * dst is a valid array container and may be the same as src_1
453 */
454
455void array_array_container_andnot(const array_container_t *src_1,
456 const array_container_t *src_2,
457 array_container_t *dst) {
458 array_container_andnot(src_1, src_2, dst);
459}
460
461/* inplace array-array andnot will always be able to reuse the space of
462 * src_1 */
463void array_array_container_iandnot(array_container_t *src_1,
464 const array_container_t *src_2) {
465 array_container_andnot(src_1, src_2, src_1);
466}
467
468/* Compute the andnot of src_1 and src_2 and write the result to
469 * dst (which has no container initially). Return value is
470 * "dst is a bitset"
471 */
472
473bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
474 const bitset_container_t *src_2,
475 void **dst) {
476 bitset_container_t *ans = bitset_container_create();
477 int card = bitset_container_andnot(src_1, src_2, ans);
478 if (card <= DEFAULT_MAX_SIZE) {
479 *dst = array_container_from_bitset(ans);
480 bitset_container_free(ans);
481 return false; // not bitset
482 } else {
483 *dst = ans;
484 return true;
485 }
486}
487
488/* Compute the andnot of src_1 and src_2 and write the result to
489 * dst (which has no container initially). It will modify src_1
490 * to be dst if the result is a bitset. Otherwise, it will
491 * free src_1 and dst will be a new array container. In both
492 * cases, the caller is responsible for deallocating dst.
493 * Returns true iff dst is a bitset */
494
495bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
496 const bitset_container_t *src_2,
497 void **dst) {
498 int card = bitset_container_andnot(src_1, src_2, src_1);
499 if (card <= DEFAULT_MAX_SIZE) {
500 *dst = array_container_from_bitset(src_1);
501 bitset_container_free(src_1);
502 return false; // not bitset
503 } else {
504 *dst = src_1;
505 return true;
506 }
507}
508