mixed_negation.c source code [CRoaring/src/containers/mixed_negation.c]

1	/*
2	* mixed_negation.c
3	*
4	*/
5
6	#include <assert.h>
7	#include <string.h>
8
9	#include <roaring/array_util.h>
10	#include <roaring/bitset_util.h>
11	#include <roaring/containers/containers.h>
12	#include <roaring/containers/convert.h>
13	#include <roaring/containers/mixed_negation.h>
14	#include <roaring/containers/run.h>
15
16	// TODO: make simplified and optimized negation code across
17	// the full range.
18
19	/ Negation across the entire range of the container.*
20	* Compute the negation of src and write the result
21	* to *dst. The complement of a
22	* sufficiently sparse set will always be dense and a hence a bitmap
23	' We assume that dst is pre-allocated and a valid bitset container*
24	* There can be no in-place version.
25	*/
26	void array_container_negation(const array_container_t *src,
27	bitset_container_t *dst) {
28	uint64_t card = UINT64_C(`1` << `16`);
29	bitset_container_set_all(dst);
30
31	dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array,
32	(uint64_t)src->cardinality);
33	}
34
35	/ Negation across the entire range of the container*
36	* Compute the negation of src and write the result
37	* to *dst. A true return value indicates a bitset result,
38	* otherwise the result is an array container.
39	* We assume that dst is not pre-allocated. In
40	* case of failure, *dst will be NULL.
41	*/
42	bool bitset_container_negation(const bitset_container_t src, void* **dst) {
43	return bitset_container_negation_range(src, `0`, (`1` << `16`), dst);
44	}
45
46	/ inplace version /
47	/*
48	* Same as bitset_container_negation except that if the output is to
49	* be a
50	* bitset_container_t, then src is modified and no allocation is made.
51	* If the output is to be an array_container_t, then caller is responsible
52	* to free the container.
53	* In all cases, the result is in *dst.
54	*/
55	bool bitset_container_negation_inplace(bitset_container_t src, void* **dst) {
56	return bitset_container_negation_range_inplace(src, `0`, (`1` << `16`), dst);
57	}
58
59	/ Negation across the entire range of container*
60	* Compute the negation of src and write the result
61	* to dst. Return values are the _TYPECODES as defined * in containers.h
62	* We assume that dst is not pre-allocated. In
63	* case of failure, *dst will be NULL.
64	*/
65	int run_container_negation(const run_container_t src, void* **dst) {
66	return run_container_negation_range(src, `0`, (`1` << `16`), dst);
67	}
68
69	/*
70	* Same as run_container_negation except that if the output is to
71	* be a
72	* run_container_t, and has the capacity to hold the result,
73	* then src is modified and no allocation is made.
74	* In all cases, the result is in *dst.
75	*/
76	int run_container_negation_inplace(run_container_t src, void* **dst) {
77	return run_container_negation_range_inplace(src, `0`, (`1` << `16`), dst);
78	}
79
80	/ Negation across a range of the container.*
81	* Compute the negation of src and write the result
82	* to *dst. Returns true if the result is a bitset container
83	* and false for an array container. *dst is not preallocated.
84	*/
85	bool array_container_negation_range(const array_container_t *src,
86	const int range_start, const int range_end,
87	void **dst) {
88	/ close port of the Java implementation /
89	if (range_start >= range_end) {
90	*dst = array_container_clone(src);
91	return false;
92	}
93
94	int32_t start_index =
95	binarySearch(src->array, src->cardinality, (uint16_t)range_start);
96	if (start_index < `0`) start_index = -start_index - `1`;
97
98	int32_t last_index =
99	binarySearch(src->array, src->cardinality, (uint16_t)(range_end - `1`));
100	if (last_index < `0`) last_index = -last_index - `2`;
101
102	const int32_t current_values_in_range = last_index - start_index + `1`;
103	const int32_t span_to_be_flipped = range_end - range_start;
104	const int32_t new_values_in_range =
105	span_to_be_flipped - current_values_in_range;
106	const int32_t cardinality_change =
107	new_values_in_range - current_values_in_range;
108	const int32_t new_cardinality = src->cardinality + cardinality_change;
109
110	if (new_cardinality > DEFAULT_MAX_SIZE) {
111	bitset_container_t *temp = bitset_container_from_array(src);
112	bitset_flip_range(temp->array, (uint32_t)range_start,
113	(uint32_t)range_end);
114	temp->cardinality = new_cardinality;
115	*dst = temp;
116	return true;
117	}
118
119	array_container_t *arr =
120	array_container_create_given_capacity(new_cardinality);
121	dst = (void* *)arr;
122	if(new_cardinality == `0`) {
123	arr->cardinality = new_cardinality;
124	return false; // we are done.
125	}
126	// copy stuff before the active area
127	memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
128
129	// work on the range
130	int32_t out_pos = start_index, in_pos = start_index;
131	int32_t val_in_range = range_start;
132	for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
133	if ((uint16_t)val_in_range != src->array[in_pos]) {
134	arr->array[out_pos++] = (uint16_t)val_in_range;
135	} else {
136	++in_pos;
137	}
138	}
139	for (; val_in_range < range_end; ++val_in_range)
140	arr->array[out_pos++] = (uint16_t)val_in_range;
141
142	// content after the active range
143	memcpy(arr->array + out_pos, src->array + (last_index + `1`),
144	(src->cardinality - (last_index + `1`)) * sizeof(uint16_t));
145	arr->cardinality = new_cardinality;
146	return false;
147	}
148
149	/ Even when the result would fit, it is unclear how to make an*
150	* inplace version without inefficient copying.
151	*/
152
153	bool array_container_negation_range_inplace(array_container_t *src,
154	const int range_start,
155	const int range_end, void **dst) {
156	bool ans = array_container_negation_range(src, range_start, range_end, dst);
157	// TODO : try a real inplace version
158	array_container_free(src);
159	return ans;
160	}
161
162	/ Negation across a range of the container*
163	* Compute the negation of src and write the result
164	* to *dst. A true return value indicates a bitset result,
165	* otherwise the result is an array container.
166	* We assume that dst is not pre-allocated. In
167	* case of failure, *dst will be NULL.
168	*/
169	bool bitset_container_negation_range(const bitset_container_t *src,
170	const int range_start, const int range_end,
171	void **dst) {
172	// TODO maybe consider density-based estimate
173	// and sometimes build result directly as array, with
174	// conversion back to bitset if wrong. Or determine
175	// actual result cardinality, then go directly for the known final cont.
176
177	// keep computation using bitsets as long as possible.
178	bitset_container_t *t = bitset_container_clone(src);
179	bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end);
180	t->cardinality = bitset_container_compute_cardinality(t);
181
182	if (t->cardinality > DEFAULT_MAX_SIZE) {
183	*dst = t;
184	return true;
185	} else {
186	*dst = array_container_from_bitset(t);
187	bitset_container_free(t);
188	return false;
189	}
190	}
191
192	/ inplace version /
193	/*
194	* Same as bitset_container_negation except that if the output is to
195	* be a
196	* bitset_container_t, then src is modified and no allocation is made.
197	* If the output is to be an array_container_t, then caller is responsible
198	* to free the container.
199	* In all cases, the result is in *dst.
200	*/
201	bool bitset_container_negation_range_inplace(bitset_container_t *src,
202	const int range_start,
203	const int range_end, void **dst) {
204	bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end);
205	src->cardinality = bitset_container_compute_cardinality(src);
206	if (src->cardinality > DEFAULT_MAX_SIZE) {
207	*dst = src;
208	return true;
209	}
210	*dst = array_container_from_bitset(src);
211	bitset_container_free(src);
212	return false;
213	}
214
215	/ Negation across a range of container*
216	* Compute the negation of src and write the result
217	* to dst. Return values are the _TYPECODES as defined * in containers.h
218	* We assume that dst is not pre-allocated. In
219	* case of failure, *dst will be NULL.
220	*/
221	int run_container_negation_range(const run_container_t *src,
222	const int range_start, const int range_end,
223	void **dst) {
224	uint8_t return_typecode;
225
226	// follows the Java implementation
227	if (range_end <= range_start) {
228	*dst = run_container_clone(src);
229	return RUN_CONTAINER_TYPE_CODE;
230	}
231
232	run_container_t *ans = run_container_create_given_capacity(
233	src->n_runs + `1`); // src->n_runs + 1);
234	int k = `0`;
235	for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
236	ans->runs[k] = src->runs[k];
237	ans->n_runs++;
238	}
239
240	run_container_smart_append_exclusive(
241	ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - `1`));
242
243	for (; k < src->n_runs; ++k) {
244	run_container_smart_append_exclusive(ans, src->runs[k].value,
245	src->runs[k].length);
246	}
247
248	*dst = convert_run_to_efficient_container(ans, &return_typecode);
249	if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
250
251	return return_typecode;
252	}
253
254	/*
255	* Same as run_container_negation except that if the output is to
256	* be a
257	* run_container_t, and has the capacity to hold the result,
258	* then src is modified and no allocation is made.
259	* In all cases, the result is in *dst.
260	*/
261	int run_container_negation_range_inplace(run_container_t *src,
262	const int range_start,
263	const int range_end, void **dst) {
264	uint8_t return_typecode;
265
266	if (range_end <= range_start) {
267	*dst = src;
268	return RUN_CONTAINER_TYPE_CODE;
269	}
270
271	// TODO: efficient special case when range is 0 to 65535 inclusive
272
273	if (src->capacity == src->n_runs) {
274	// no excess room. More checking to see if result can fit
275	bool last_val_before_range = false;
276	bool first_val_in_range = false;
277	bool last_val_in_range = false;
278	bool first_val_past_range = false;
279
280	if (range_start > `0`)
281	last_val_before_range =
282	run_container_contains(src, (uint16_t)(range_start - `1`));
283	first_val_in_range = run_container_contains(src, (uint16_t)range_start);
284
285	if (last_val_before_range == first_val_in_range) {
286	last_val_in_range =
287	run_container_contains(src, (uint16_t)(range_end - `1`));
288	if (range_end != `0x10000`)
289	first_val_past_range =
290	run_container_contains(src, (uint16_t)range_end);
291
292	if (last_val_in_range ==
293	first_val_past_range) { // no space for inplace
294	int ans = run_container_negation_range(src, range_start,
295	range_end, dst);
296	run_container_free(src);
297	return ans;
298	}
299	}
300	}
301	// all other cases: result will fit
302
303	run_container_t *ans = src;
304	int my_nbr_runs = src->n_runs;
305
306	ans->n_runs = `0`;
307	int k = `0`;
308	for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
309	// ans->runs[k] = src->runs[k]; (would be self-copy)
310	ans->n_runs++;
311	}
312
313	// as with Java implementation, use locals to give self a buffer of depth 1
314	rle16_t buffered = (rle16_t){.value = (uint16_t)`0`, .length = (uint16_t)`0`};
315	rle16_t next = buffered;
316	if (k < my_nbr_runs) buffered = src->runs[k];
317
318	run_container_smart_append_exclusive(
319	ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - `1`));
320
321	for (; k < my_nbr_runs; ++k) {
322	if (k + `1` < my_nbr_runs) next = src->runs[k + `1`];
323
324	run_container_smart_append_exclusive(ans, buffered.value,
325	buffered.length);
326	buffered = next;
327	}
328
329	*dst = convert_run_to_efficient_container(ans, &return_typecode);
330	if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
331
332	return return_typecode;
333	}
334

Browse the source code of CRoaring/src/containers/mixed_negation.c