mixed_andnot.c source code [CRoaring/src/containers/mixed_andnot.c]

1	/*
2	* mixed_andnot.c. More methods since operation is not symmetric,
3	* except no "wide" andnot , so no lazy options motivated.
4	*/
5
6	#include <assert.h>
7	#include <string.h>
8
9	#include <roaring/array_util.h>
10	#include <roaring/bitset_util.h>
11	#include <roaring/containers/containers.h>
12	#include <roaring/containers/convert.h>
13	#include <roaring/containers/mixed_andnot.h>
14	#include <roaring/containers/perfparameters.h>
15
16	/ Compute the andnot of src_1 and src_2 and write the result to*
17	* dst, a valid array container that could be the same as dst.*/
18	void array_bitset_container_andnot(const array_container_t *src_1,
19	const bitset_container_t *src_2,
20	array_container_t *dst) {
21	// follows Java implementation as of June 2016
22	if (dst->capacity < src_1->cardinality) {
23	array_container_grow(dst, src_1->cardinality, false);
24	}
25	int32_t newcard = `0`;
26	const int32_t origcard = src_1->cardinality;
27	for (int i = `0`; i < origcard; ++i) {
28	uint16_t key = src_1->array[i];
29	dst->array[newcard] = key;
30	newcard += `1` - bitset_container_contains(src_2, key);
31	}
32	dst->cardinality = newcard;
33	}
34
35	/ Compute the andnot of src_1 and src_2 and write the result to*
36	* src_1 */
37
38	void array_bitset_container_iandnot(array_container_t *src_1,
39	const bitset_container_t *src_2) {
40	array_bitset_container_andnot(src_1, src_2, src_1);
41	}
42
43	/ Compute the andnot of src_1 and src_2 and write the result to*
44	* dst, which does not initially have a valid container.
45	* Return true for a bitset result; false for array
46	*/
47
48	bool bitset_array_container_andnot(const bitset_container_t *src_1,
49	const array_container_t src_2, void* **dst) {
50	// Java did this directly, but we have option of asm or avx
51	bitset_container_t *result = bitset_container_create();
52	bitset_container_copy(src_1, result);
53	result->cardinality =
54	(int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality,
55	src_2->array, (uint64_t)src_2->cardinality);
56
57	// do required type conversions.
58	if (result->cardinality <= DEFAULT_MAX_SIZE) {
59	*dst = array_container_from_bitset(result);
60	bitset_container_free(result);
61	return false;
62	}
63	*dst = result;
64	return true;
65	}
66
67	/ Compute the andnot of src_1 and src_2 and write the result to*
68	* dst (which has no container initially). It will modify src_1
69	* to be dst if the result is a bitset. Otherwise, it will
70	* free src_1 and dst will be a new array container. In both
71	* cases, the caller is responsible for deallocating dst.
72	* Returns true iff dst is a bitset */
73
74	bool bitset_array_container_iandnot(bitset_container_t *src_1,
75	const array_container_t *src_2,
76	void **dst) {
77	*dst = src_1;
78	src_1->cardinality =
79	(int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality,
80	src_2->array, (uint64_t)src_2->cardinality);
81
82	if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
83	*dst = array_container_from_bitset(src_1);
84	bitset_container_free(src_1);
85	return false; // not bitset
86	} else
87	return true;
88	}
89
90	/ Compute the andnot of src_1 and src_2 and write the result to*
91	* dst. Result may be either a bitset or an array container
92	* (returns "result is bitset"). dst does not initially have
93	* any container, but becomes either a bitset container (return
94	* result true) or an array container.
95	*/
96
97	bool run_bitset_container_andnot(const run_container_t *src_1,
98	const bitset_container_t src_2, void* **dst) {
99	// follows the Java implementation as of June 2016
100	int card = run_container_cardinality(src_1);
101	if (card <= DEFAULT_MAX_SIZE) {
102	// must be an array
103	array_container_t *answer = array_container_create_given_capacity(card);
104	answer->cardinality = `0`;
105	for (int32_t rlepos = `0`; rlepos < src_1->n_runs; ++rlepos) {
106	rle16_t rle = src_1->runs[rlepos];
107	for (int run_value = rle.value; run_value <= rle.value + rle.length;
108	++run_value) {
109	if (!bitset_container_get(src_2, (uint16_t)run_value)) {
110	answer->array[answer->cardinality++] = (uint16_t)run_value;
111	}
112	}
113	}
114	*dst = answer;
115	return false;
116	} else { // we guess it will be a bitset, though have to check guess when
117	// done
118	bitset_container_t *answer = bitset_container_clone(src_2);
119
120	uint32_t last_pos = `0`;
121	for (int32_t rlepos = `0`; rlepos < src_1->n_runs; ++rlepos) {
122	rle16_t rle = src_1->runs[rlepos];
123
124	uint32_t start = rle.value;
125	uint32_t end = start + rle.length + `1`;
126	bitset_reset_range(answer->array, last_pos, start);
127	bitset_flip_range(answer->array, start, end);
128	last_pos = end;
129	}
130	bitset_reset_range(answer->array, last_pos, (uint32_t)(`1` << `16`));
131
132	answer->cardinality = bitset_container_compute_cardinality(answer);
133
134	if (answer->cardinality <= DEFAULT_MAX_SIZE) {
135	*dst = array_container_from_bitset(answer);
136	bitset_container_free(answer);
137	return false; // not bitset
138	}
139	*dst = answer;
140	return true; // bitset
141	}
142	}
143
144	/ Compute the andnot of src_1 and src_2 and write the result to*
145	* dst. Result may be either a bitset or an array container
146	* (returns "result is bitset"). dst does not initially have
147	* any container, but becomes either a bitset container (return
148	* result true) or an array container.
149	*/
150
151	bool run_bitset_container_iandnot(run_container_t *src_1,
152	const bitset_container_t src_2, void* **dst) {
153	// dummy implementation
154	bool ans = run_bitset_container_andnot(src_1, src_2, dst);
155	run_container_free(src_1);
156	return ans;
157	}
158
159	/ Compute the andnot of src_1 and src_2 and write the result to*
160	* dst. Result may be either a bitset or an array container
161	* (returns "result is bitset"). dst does not initially have
162	* any container, but becomes either a bitset container (return
163	* result true) or an array container.
164	*/
165
166	bool bitset_run_container_andnot(const bitset_container_t *src_1,
167	const run_container_t src_2, void* **dst) {
168	// follows Java implementation
169	bitset_container_t *result = bitset_container_create();
170
171	bitset_container_copy(src_1, result);
172	for (int32_t rlepos = `0`; rlepos < src_2->n_runs; ++rlepos) {
173	rle16_t rle = src_2->runs[rlepos];
174	bitset_reset_range(result->array, rle.value,
175	rle.value + rle.length + UINT32_C(`1`));
176	}
177	result->cardinality = bitset_container_compute_cardinality(result);
178
179	if (result->cardinality <= DEFAULT_MAX_SIZE) {
180	*dst = array_container_from_bitset(result);
181	bitset_container_free(result);
182	return false; // not bitset
183	}
184	*dst = result;
185	return true; // bitset
186	}
187
188	/ Compute the andnot of src_1 and src_2 and write the result to*
189	* dst (which has no container initially). It will modify src_1
190	* to be dst if the result is a bitset. Otherwise, it will
191	* free src_1 and dst will be a new array container. In both
192	* cases, the caller is responsible for deallocating dst.
193	* Returns true iff dst is a bitset */
194
195	bool bitset_run_container_iandnot(bitset_container_t *src_1,
196	const run_container_t src_2, void* **dst) {
197	*dst = src_1;
198
199	for (int32_t rlepos = `0`; rlepos < src_2->n_runs; ++rlepos) {
200	rle16_t rle = src_2->runs[rlepos];
201	bitset_reset_range(src_1->array, rle.value,
202	rle.value + rle.length + UINT32_C(`1`));
203	}
204	src_1->cardinality = bitset_container_compute_cardinality(src_1);
205
206	if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
207	*dst = array_container_from_bitset(src_1);
208	bitset_container_free(src_1);
209	return false; // not bitset
210	} else
211	return true;
212	}
213
214	/ helper. a_out must be a valid array container with adequate capacity.*
215	* Returns the cardinality of the output container. Partly Based on Java
216	* implementation Util.unsignedDifference.
217	*
218	* TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper
219	* to avoid advanceUntil?
220	*/
221
222	static int run_array_array_subtract(const run_container_t *r,
223	const array_container_t *a_in,
224	array_container_t *a_out) {
225	int out_card = `0`;
226	int32_t in_array_pos =
227	-`1`; // since advanceUntil always assumes we start the search AFTER this
228
229	for (int rlepos = `0`; rlepos < r->n_runs; rlepos++) {
230	int32_t start = r->runs[rlepos].value;
231	int32_t end = start + r->runs[rlepos].length + `1`;
232
233	in_array_pos = advanceUntil(a_in->array, in_array_pos,
234	a_in->cardinality, (uint16_t)start);
235
236	if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
237	for (int32_t i = start; i < end; ++i)
238	a_out->array[out_card++] = (uint16_t)i;
239	} else {
240	uint16_t next_nonincluded = a_in->array[in_array_pos];
241	if (next_nonincluded >= end) {
242	// another case when run goes unaltered
243	for (int32_t i = start; i < end; ++i)
244	a_out->array[out_card++] = (uint16_t)i;
245	in_array_pos--; // ensure we see this item again if necessary
246	} else {
247	for (int32_t i = start; i < end; ++i)
248	if (i != next_nonincluded)
249	a_out->array[out_card++] = (uint16_t)i;
250	else // 0 should ensure we don't match
251	next_nonincluded =
252	(in_array_pos + `1` >= a_in->cardinality)
253	? `0`
254	: a_in->array[++in_array_pos];
255	in_array_pos--; // see again
256	}
257	}
258	}
259	return out_card;
260	}
261
262	/ dst does not indicate a valid container initially. Eventually it*
263	* can become any type of container.
264	*/
265
266	int run_array_container_andnot(const run_container_t *src_1,
267	const array_container_t src_2, void* **dst) {
268	// follows the Java impl as of June 2016
269
270	int card = run_container_cardinality(src_1);
271	const int arbitrary_threshold = `32`;
272
273	if (card <= arbitrary_threshold) {
274	if (src_2->cardinality == `0`) {
275	*dst = run_container_clone(src_1);
276	return RUN_CONTAINER_TYPE_CODE;
277	}
278	// Java's "lazyandNot.toEfficientContainer" thing
279	run_container_t *answer = run_container_create_given_capacity(
280	card + array_container_cardinality(src_2));
281
282	int rlepos = `0`;
283	int xrlepos = `0`; // "x" is src_2
284	rle16_t rle = src_1->runs[rlepos];
285	int32_t start = rle.value;
286	int32_t end = start + rle.length + `1`;
287	int32_t xstart = src_2->array[xrlepos];
288
289	while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
290	if (end <= xstart) {
291	// output the first run
292	answer->runs[answer->n_runs++] =
293	(rle16_t){.value = (uint16_t)start,
294	.length = (uint16_t)(end - start - `1`)};
295	rlepos++;
296	if (rlepos < src_1->n_runs) {
297	start = src_1->runs[rlepos].value;
298	end = start + src_1->runs[rlepos].length + `1`;
299	}
300	} else if (xstart + `1` <= start) {
301	// exit the second run
302	xrlepos++;
303	if (xrlepos < src_2->cardinality) {
304	xstart = src_2->array[xrlepos];
305	}
306	} else {
307	if (start < xstart) {
308	answer->runs[answer->n_runs++] =
309	(rle16_t){.value = (uint16_t)start,
310	.length = (uint16_t)(xstart - start - `1`)};
311	}
312	if (xstart + `1` < end) {
313	start = xstart + `1`;
314	} else {
315	rlepos++;
316	if (rlepos < src_1->n_runs) {
317	start = src_1->runs[rlepos].value;
318	end = start + src_1->runs[rlepos].length + `1`;
319	}
320	}
321	}
322	}
323	if (rlepos < src_1->n_runs) {
324	answer->runs[answer->n_runs++] =
325	(rle16_t){.value = (uint16_t)start,
326	.length = (uint16_t)(end - start - `1`)};
327	rlepos++;
328	if (rlepos < src_1->n_runs) {
329	memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
330	(src_1->n_runs - rlepos) * sizeof(rle16_t));
331	answer->n_runs += (src_1->n_runs - rlepos);
332	}
333	}
334	uint8_t return_type;
335	*dst = convert_run_to_efficient_container(answer, &return_type);
336	if (answer != *dst) run_container_free(answer);
337	return return_type;
338	}
339	// else it's a bitmap or array
340
341	if (card <= DEFAULT_MAX_SIZE) {
342	array_container_t *ac = array_container_create_given_capacity(card);
343	// nb Java code used a generic iterator-based merge to compute
344	// difference
345	ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
346	*dst = ac;
347	return ARRAY_CONTAINER_TYPE_CODE;
348	}
349	bitset_container_t *ans = bitset_container_from_run(src_1);
350	bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
351	return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE
352	: ARRAY_CONTAINER_TYPE_CODE);
353	}
354
355	/ Compute the andnot of src_1 and src_2 and write the result to*
356	* dst (which has no container initially). It will modify src_1
357	* to be dst if the result is a bitset. Otherwise, it will
358	* free src_1 and dst will be a new array container. In both
359	* cases, the caller is responsible for deallocating dst.
360	* Returns true iff dst is a bitset */
361
362	int run_array_container_iandnot(run_container_t *src_1,
363	const array_container_t src_2, void* **dst) {
364	// dummy implementation same as June 2016 Java
365	int ans = run_array_container_andnot(src_1, src_2, dst);
366	run_container_free(src_1);
367	return ans;
368	}
369
370	/ dst must be a valid array container, allowed to be src_1 /
371
372	void array_run_container_andnot(const array_container_t *src_1,
373	const run_container_t *src_2,
374	array_container_t *dst) {
375	// basically following Java impl as of June 2016
376	if (src_1->cardinality > dst->capacity) {
377	array_container_grow(dst, src_1->cardinality, false);
378	}
379
380	if (src_2->n_runs == `0`) {
381	memmove(dst->array, src_1->array,
382	sizeof(uint16_t) * src_1->cardinality);
383	dst->cardinality = src_1->cardinality;
384	return;
385	}
386	int32_t run_start = src_2->runs[`0`].value;
387	int32_t run_end = run_start + src_2->runs[`0`].length;
388	int which_run = `0`;
389
390	uint16_t val = `0`;
391	int dest_card = `0`;
392	for (int i = `0`; i < src_1->cardinality; ++i) {
393	val = src_1->array[i];
394	if (val < run_start)
395	dst->array[dest_card++] = val;
396	else if (val <= run_end) {
397	; // omitted item
398	} else {
399	do {
400	if (which_run + `1` < src_2->n_runs) {
401	++which_run;
402	run_start = src_2->runs[which_run].value;
403	run_end = run_start + src_2->runs[which_run].length;
404
405	} else
406	run_start = run_end = (`1` << `16`) + `1`;
407	} while (val > run_end);
408	--i;
409	}
410	}
411	dst->cardinality = dest_card;
412	}
413
414	/ dst does not indicate a valid container initially. Eventually it*
415	* can become any kind of container.
416	*/
417
418	void array_run_container_iandnot(array_container_t *src_1,
419	const run_container_t *src_2) {
420	array_run_container_andnot(src_1, src_2, src_1);
421	}
422
423	/ dst does not indicate a valid container initially. Eventually it*
424	* can become any kind of container.
425	*/
426
427	int run_run_container_andnot(const run_container_t *src_1,
428	const run_container_t src_2, void* **dst) {
429	run_container_t *ans = run_container_create();
430	run_container_andnot(src_1, src_2, ans);
431	uint8_t typecode_after;
432	*dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
433	return typecode_after;
434	}
435
436	/ Compute the andnot of src_1 and src_2 and write the result to*
437	* dst (which has no container initially). It will modify src_1
438	* to be dst if the result is a bitset. Otherwise, it will
439	* free src_1 and dst will be a new array container. In both
440	* cases, the caller is responsible for deallocating dst.
441	* Returns true iff dst is a bitset */
442
443	int run_run_container_iandnot(run_container_t *src_1,
444	const run_container_t src_2, void* **dst) {
445	// following Java impl as of June 2016 (dummy)
446	int ans = run_run_container_andnot(src_1, src_2, dst);
447	run_container_free(src_1);
448	return ans;
449	}
450
451	/*
452	* dst is a valid array container and may be the same as src_1
453	*/
454
455	void array_array_container_andnot(const array_container_t *src_1,
456	const array_container_t *src_2,
457	array_container_t *dst) {
458	array_container_andnot(src_1, src_2, dst);
459	}
460
461	/ inplace array-array andnot will always be able to reuse the space of*
462	* src_1 */
463	void array_array_container_iandnot(array_container_t *src_1,
464	const array_container_t *src_2) {
465	array_container_andnot(src_1, src_2, src_1);
466	}
467
468	/ Compute the andnot of src_1 and src_2 and write the result to*
469	* dst (which has no container initially). Return value is
470	* "dst is a bitset"
471	*/
472
473	bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
474	const bitset_container_t *src_2,
475	void **dst) {
476	bitset_container_t *ans = bitset_container_create();
477	int card = bitset_container_andnot(src_1, src_2, ans);
478	if (card <= DEFAULT_MAX_SIZE) {
479	*dst = array_container_from_bitset(ans);
480	bitset_container_free(ans);
481	return false; // not bitset
482	} else {
483	*dst = ans;
484	return true;
485	}
486	}
487
488	/ Compute the andnot of src_1 and src_2 and write the result to*
489	* dst (which has no container initially). It will modify src_1
490	* to be dst if the result is a bitset. Otherwise, it will
491	* free src_1 and dst will be a new array container. In both
492	* cases, the caller is responsible for deallocating dst.
493	* Returns true iff dst is a bitset */
494
495	bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
496	const bitset_container_t *src_2,
497	void **dst) {
498	int card = bitset_container_andnot(src_1, src_2, src_1);
499	if (card <= DEFAULT_MAX_SIZE) {
500	*dst = array_container_from_bitset(src_1);
501	bitset_container_free(src_1);
502	return false; // not bitset
503	} else {
504	*dst = src_1;
505	return true;
506	}
507	}
508

Browse the source code of CRoaring/src/containers/mixed_andnot.c