1 | /* |
2 | * mixed_xor.c |
3 | */ |
4 | |
5 | #include <assert.h> |
6 | #include <string.h> |
7 | |
8 | #include <roaring/bitset_util.h> |
9 | #include <roaring/containers/containers.h> |
10 | #include <roaring/containers/convert.h> |
11 | #include <roaring/containers/mixed_xor.h> |
12 | #include <roaring/containers/perfparameters.h> |
13 | |
14 | /* Compute the xor of src_1 and src_2 and write the result to |
15 | * dst (which has no container initially). |
16 | * Result is true iff dst is a bitset */ |
17 | bool array_bitset_container_xor(const array_container_t *src_1, |
18 | const bitset_container_t *src_2, void **dst) { |
19 | bitset_container_t *result = bitset_container_create(); |
20 | bitset_container_copy(src_2, result); |
21 | result->cardinality = (int32_t)bitset_flip_list_withcard( |
22 | result->array, result->cardinality, src_1->array, src_1->cardinality); |
23 | |
24 | // do required type conversions. |
25 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
26 | *dst = array_container_from_bitset(result); |
27 | bitset_container_free(result); |
28 | return false; // not bitset |
29 | } |
30 | *dst = result; |
31 | return true; // bitset |
32 | } |
33 | |
34 | /* Compute the xor of src_1 and src_2 and write the result to |
35 | * dst. It is allowed for src_2 to be dst. This version does not |
36 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). |
37 | */ |
38 | |
39 | void array_bitset_container_lazy_xor(const array_container_t *src_1, |
40 | const bitset_container_t *src_2, |
41 | bitset_container_t *dst) { |
42 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
43 | bitset_flip_list(dst->array, src_1->array, src_1->cardinality); |
44 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
45 | } |
46 | |
47 | /* Compute the xor of src_1 and src_2 and write the result to |
48 | * dst. Result may be either a bitset or an array container |
49 | * (returns "result is bitset"). dst does not initially have |
50 | * any container, but becomes either a bitset container (return |
51 | * result true) or an array container. |
52 | */ |
53 | |
54 | bool run_bitset_container_xor(const run_container_t *src_1, |
55 | const bitset_container_t *src_2, void **dst) { |
56 | bitset_container_t *result = bitset_container_create(); |
57 | |
58 | bitset_container_copy(src_2, result); |
59 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
60 | rle16_t rle = src_1->runs[rlepos]; |
61 | bitset_flip_range(result->array, rle.value, |
62 | rle.value + rle.length + UINT32_C(1)); |
63 | } |
64 | result->cardinality = bitset_container_compute_cardinality(result); |
65 | |
66 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
67 | *dst = array_container_from_bitset(result); |
68 | bitset_container_free(result); |
69 | return false; // not bitset |
70 | } |
71 | *dst = result; |
72 | return true; // bitset |
73 | } |
74 | |
75 | /* lazy xor. Dst is initialized and may be equal to src_2. |
76 | * Result is left as a bitset container, even if actual |
77 | * cardinality would dictate an array container. |
78 | */ |
79 | |
80 | void run_bitset_container_lazy_xor(const run_container_t *src_1, |
81 | const bitset_container_t *src_2, |
82 | bitset_container_t *dst) { |
83 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
84 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
85 | rle16_t rle = src_1->runs[rlepos]; |
86 | bitset_flip_range(dst->array, rle.value, |
87 | rle.value + rle.length + UINT32_C(1)); |
88 | } |
89 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
90 | } |
91 | |
92 | /* dst does not indicate a valid container initially. Eventually it |
93 | * can become any kind of container. |
94 | */ |
95 | |
96 | int array_run_container_xor(const array_container_t *src_1, |
97 | const run_container_t *src_2, void **dst) { |
98 | // semi following Java XOR implementation as of May 2016 |
99 | // the C OR implementation works quite differently and can return a run |
100 | // container |
101 | // TODO could optimize for full run containers. |
102 | |
103 | // use of lazy following Java impl. |
104 | const int arbitrary_threshold = 32; |
105 | if (src_1->cardinality < arbitrary_threshold) { |
106 | run_container_t *ans = run_container_create(); |
107 | array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. |
108 | uint8_t typecode_after; |
109 | *dst = |
110 | convert_run_to_efficient_container_and_free(ans, &typecode_after); |
111 | return typecode_after; |
112 | } |
113 | |
114 | int card = run_container_cardinality(src_2); |
115 | if (card <= DEFAULT_MAX_SIZE) { |
116 | // Java implementation works with the array, xoring the run elements via |
117 | // iterator |
118 | array_container_t *temp = array_container_from_run(src_2); |
119 | bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); |
120 | array_container_free(temp); |
121 | return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
122 | : ARRAY_CONTAINER_TYPE_CODE; |
123 | |
124 | } else { // guess that it will end up as a bitset |
125 | bitset_container_t *result = bitset_container_from_run(src_2); |
126 | bool is_bitset = bitset_array_container_ixor(result, src_1, dst); |
127 | // any necessary type conversion has been done by the ixor |
128 | int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE |
129 | : ARRAY_CONTAINER_TYPE_CODE); |
130 | return retval; |
131 | } |
132 | } |
133 | |
134 | /* Dst is a valid run container. (Can it be src_2? Let's say not.) |
135 | * Leaves result as run container, even if other options are |
136 | * smaller. |
137 | */ |
138 | |
139 | void array_run_container_lazy_xor(const array_container_t *src_1, |
140 | const run_container_t *src_2, |
141 | run_container_t *dst) { |
142 | run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); |
143 | int32_t rlepos = 0; |
144 | int32_t arraypos = 0; |
145 | dst->n_runs = 0; |
146 | |
147 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
148 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
149 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
150 | src_2->runs[rlepos].length); |
151 | rlepos++; |
152 | } else { |
153 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], |
154 | 0); |
155 | arraypos++; |
156 | } |
157 | } |
158 | while (arraypos < src_1->cardinality) { |
159 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); |
160 | arraypos++; |
161 | } |
162 | while (rlepos < src_2->n_runs) { |
163 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
164 | src_2->runs[rlepos].length); |
165 | rlepos++; |
166 | } |
167 | } |
168 | |
169 | /* dst does not indicate a valid container initially. Eventually it |
170 | * can become any kind of container. |
171 | */ |
172 | |
173 | int run_run_container_xor(const run_container_t *src_1, |
174 | const run_container_t *src_2, void **dst) { |
175 | run_container_t *ans = run_container_create(); |
176 | run_container_xor(src_1, src_2, ans); |
177 | uint8_t typecode_after; |
178 | *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); |
179 | return typecode_after; |
180 | } |
181 | |
182 | /* |
183 | * Java implementation (as of May 2016) for array_run, run_run |
184 | * and bitset_run don't do anything different for inplace. |
185 | * Could adopt the mixed_union.c approach instead (ie, using |
186 | * smart_append_exclusive) |
187 | * |
188 | */ |
189 | |
190 | bool array_array_container_xor(const array_container_t *src_1, |
191 | const array_container_t *src_2, void **dst) { |
192 | int totalCardinality = |
193 | src_1->cardinality + src_2->cardinality; // upper bound |
194 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
195 | *dst = array_container_create_given_capacity(totalCardinality); |
196 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
197 | return false; // not a bitset |
198 | } |
199 | *dst = bitset_container_from_array(src_1); |
200 | bool returnval = true; // expect a bitset |
201 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
202 | ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( |
203 | ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); |
204 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
205 | // need to convert! |
206 | *dst = array_container_from_bitset(ourbitset); |
207 | bitset_container_free(ourbitset); |
208 | returnval = false; // not going to be a bitset |
209 | } |
210 | |
211 | return returnval; |
212 | } |
213 | |
214 | bool array_array_container_lazy_xor(const array_container_t *src_1, |
215 | const array_container_t *src_2, |
216 | void **dst) { |
217 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
218 | // upper bound, but probably poor estimate for xor |
219 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
220 | *dst = array_container_create_given_capacity(totalCardinality); |
221 | if (*dst != NULL) |
222 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
223 | return false; // not a bitset |
224 | } |
225 | *dst = bitset_container_from_array(src_1); |
226 | bool returnval = true; // expect a bitset (maybe, for XOR??) |
227 | if (*dst != NULL) { |
228 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
229 | bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); |
230 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
231 | } |
232 | return returnval; |
233 | } |
234 | |
235 | /* Compute the xor of src_1 and src_2 and write the result to |
236 | * dst (which has no container initially). Return value is |
237 | * "dst is a bitset" |
238 | */ |
239 | |
240 | bool bitset_bitset_container_xor(const bitset_container_t *src_1, |
241 | const bitset_container_t *src_2, void **dst) { |
242 | bitset_container_t *ans = bitset_container_create(); |
243 | int card = bitset_container_xor(src_1, src_2, ans); |
244 | if (card <= DEFAULT_MAX_SIZE) { |
245 | *dst = array_container_from_bitset(ans); |
246 | bitset_container_free(ans); |
247 | return false; // not bitset |
248 | } else { |
249 | *dst = ans; |
250 | return true; |
251 | } |
252 | } |
253 | |
254 | /* Compute the xor of src_1 and src_2 and write the result to |
255 | * dst (which has no container initially). It will modify src_1 |
256 | * to be dst if the result is a bitset. Otherwise, it will |
257 | * free src_1 and dst will be a new array container. In both |
258 | * cases, the caller is responsible for deallocating dst. |
259 | * Returns true iff dst is a bitset */ |
260 | |
261 | bool bitset_array_container_ixor(bitset_container_t *src_1, |
262 | const array_container_t *src_2, void **dst) { |
263 | *dst = src_1; |
264 | src_1->cardinality = (uint32_t)bitset_flip_list_withcard( |
265 | src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); |
266 | |
267 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
268 | *dst = array_container_from_bitset(src_1); |
269 | bitset_container_free(src_1); |
270 | return false; // not bitset |
271 | } else |
272 | return true; |
273 | } |
274 | |
275 | /* a bunch of in-place, some of which may not *really* be inplace. |
276 | * TODO: write actual inplace routine if efficiency warrants it |
277 | * Anything inplace with a bitset is a good candidate |
278 | */ |
279 | |
280 | bool bitset_bitset_container_ixor(bitset_container_t *src_1, |
281 | const bitset_container_t *src_2, void **dst) { |
282 | bool ans = bitset_bitset_container_xor(src_1, src_2, dst); |
283 | bitset_container_free(src_1); |
284 | return ans; |
285 | } |
286 | |
287 | bool array_bitset_container_ixor(array_container_t *src_1, |
288 | const bitset_container_t *src_2, void **dst) { |
289 | bool ans = array_bitset_container_xor(src_1, src_2, dst); |
290 | array_container_free(src_1); |
291 | return ans; |
292 | } |
293 | |
294 | /* Compute the xor of src_1 and src_2 and write the result to |
295 | * dst. Result may be either a bitset or an array container |
296 | * (returns "result is bitset"). dst does not initially have |
297 | * any container, but becomes either a bitset container (return |
298 | * result true) or an array container. |
299 | */ |
300 | |
301 | bool run_bitset_container_ixor(run_container_t *src_1, |
302 | const bitset_container_t *src_2, void **dst) { |
303 | bool ans = run_bitset_container_xor(src_1, src_2, dst); |
304 | run_container_free(src_1); |
305 | return ans; |
306 | } |
307 | |
308 | bool bitset_run_container_ixor(bitset_container_t *src_1, |
309 | const run_container_t *src_2, void **dst) { |
310 | bool ans = run_bitset_container_xor(src_2, src_1, dst); |
311 | bitset_container_free(src_1); |
312 | return ans; |
313 | } |
314 | |
315 | /* dst does not indicate a valid container initially. Eventually it |
316 | * can become any kind of container. |
317 | */ |
318 | |
319 | int array_run_container_ixor(array_container_t *src_1, |
320 | const run_container_t *src_2, void **dst) { |
321 | int ans = array_run_container_xor(src_1, src_2, dst); |
322 | array_container_free(src_1); |
323 | return ans; |
324 | } |
325 | |
326 | int run_array_container_ixor(run_container_t *src_1, |
327 | const array_container_t *src_2, void **dst) { |
328 | int ans = array_run_container_xor(src_2, src_1, dst); |
329 | run_container_free(src_1); |
330 | return ans; |
331 | } |
332 | |
333 | bool array_array_container_ixor(array_container_t *src_1, |
334 | const array_container_t *src_2, void **dst) { |
335 | bool ans = array_array_container_xor(src_1, src_2, dst); |
336 | array_container_free(src_1); |
337 | return ans; |
338 | } |
339 | |
340 | int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, |
341 | void **dst) { |
342 | int ans = run_run_container_xor(src_1, src_2, dst); |
343 | run_container_free(src_1); |
344 | return ans; |
345 | } |
346 | |