| 1 | /* |
| 2 | * mixed_xor.c |
| 3 | */ |
| 4 | |
| 5 | #include <assert.h> |
| 6 | #include <string.h> |
| 7 | |
| 8 | #include <roaring/bitset_util.h> |
| 9 | #include <roaring/containers/containers.h> |
| 10 | #include <roaring/containers/convert.h> |
| 11 | #include <roaring/containers/mixed_xor.h> |
| 12 | #include <roaring/containers/perfparameters.h> |
| 13 | |
| 14 | /* Compute the xor of src_1 and src_2 and write the result to |
| 15 | * dst (which has no container initially). |
| 16 | * Result is true iff dst is a bitset */ |
| 17 | bool array_bitset_container_xor(const array_container_t *src_1, |
| 18 | const bitset_container_t *src_2, void **dst) { |
| 19 | bitset_container_t *result = bitset_container_create(); |
| 20 | bitset_container_copy(src_2, result); |
| 21 | result->cardinality = (int32_t)bitset_flip_list_withcard( |
| 22 | result->array, result->cardinality, src_1->array, src_1->cardinality); |
| 23 | |
| 24 | // do required type conversions. |
| 25 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
| 26 | *dst = array_container_from_bitset(result); |
| 27 | bitset_container_free(result); |
| 28 | return false; // not bitset |
| 29 | } |
| 30 | *dst = result; |
| 31 | return true; // bitset |
| 32 | } |
| 33 | |
| 34 | /* Compute the xor of src_1 and src_2 and write the result to |
| 35 | * dst. It is allowed for src_2 to be dst. This version does not |
| 36 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). |
| 37 | */ |
| 38 | |
| 39 | void array_bitset_container_lazy_xor(const array_container_t *src_1, |
| 40 | const bitset_container_t *src_2, |
| 41 | bitset_container_t *dst) { |
| 42 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 43 | bitset_flip_list(dst->array, src_1->array, src_1->cardinality); |
| 44 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 45 | } |
| 46 | |
| 47 | /* Compute the xor of src_1 and src_2 and write the result to |
| 48 | * dst. Result may be either a bitset or an array container |
| 49 | * (returns "result is bitset"). dst does not initially have |
| 50 | * any container, but becomes either a bitset container (return |
| 51 | * result true) or an array container. |
| 52 | */ |
| 53 | |
| 54 | bool run_bitset_container_xor(const run_container_t *src_1, |
| 55 | const bitset_container_t *src_2, void **dst) { |
| 56 | bitset_container_t *result = bitset_container_create(); |
| 57 | |
| 58 | bitset_container_copy(src_2, result); |
| 59 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
| 60 | rle16_t rle = src_1->runs[rlepos]; |
| 61 | bitset_flip_range(result->array, rle.value, |
| 62 | rle.value + rle.length + UINT32_C(1)); |
| 63 | } |
| 64 | result->cardinality = bitset_container_compute_cardinality(result); |
| 65 | |
| 66 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
| 67 | *dst = array_container_from_bitset(result); |
| 68 | bitset_container_free(result); |
| 69 | return false; // not bitset |
| 70 | } |
| 71 | *dst = result; |
| 72 | return true; // bitset |
| 73 | } |
| 74 | |
| 75 | /* lazy xor. Dst is initialized and may be equal to src_2. |
| 76 | * Result is left as a bitset container, even if actual |
| 77 | * cardinality would dictate an array container. |
| 78 | */ |
| 79 | |
| 80 | void run_bitset_container_lazy_xor(const run_container_t *src_1, |
| 81 | const bitset_container_t *src_2, |
| 82 | bitset_container_t *dst) { |
| 83 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 84 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
| 85 | rle16_t rle = src_1->runs[rlepos]; |
| 86 | bitset_flip_range(dst->array, rle.value, |
| 87 | rle.value + rle.length + UINT32_C(1)); |
| 88 | } |
| 89 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 90 | } |
| 91 | |
| 92 | /* dst does not indicate a valid container initially. Eventually it |
| 93 | * can become any kind of container. |
| 94 | */ |
| 95 | |
| 96 | int array_run_container_xor(const array_container_t *src_1, |
| 97 | const run_container_t *src_2, void **dst) { |
| 98 | // semi following Java XOR implementation as of May 2016 |
| 99 | // the C OR implementation works quite differently and can return a run |
| 100 | // container |
| 101 | // TODO could optimize for full run containers. |
| 102 | |
| 103 | // use of lazy following Java impl. |
| 104 | const int arbitrary_threshold = 32; |
| 105 | if (src_1->cardinality < arbitrary_threshold) { |
| 106 | run_container_t *ans = run_container_create(); |
| 107 | array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. |
| 108 | uint8_t typecode_after; |
| 109 | *dst = |
| 110 | convert_run_to_efficient_container_and_free(ans, &typecode_after); |
| 111 | return typecode_after; |
| 112 | } |
| 113 | |
| 114 | int card = run_container_cardinality(src_2); |
| 115 | if (card <= DEFAULT_MAX_SIZE) { |
| 116 | // Java implementation works with the array, xoring the run elements via |
| 117 | // iterator |
| 118 | array_container_t *temp = array_container_from_run(src_2); |
| 119 | bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); |
| 120 | array_container_free(temp); |
| 121 | return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
| 122 | : ARRAY_CONTAINER_TYPE_CODE; |
| 123 | |
| 124 | } else { // guess that it will end up as a bitset |
| 125 | bitset_container_t *result = bitset_container_from_run(src_2); |
| 126 | bool is_bitset = bitset_array_container_ixor(result, src_1, dst); |
| 127 | // any necessary type conversion has been done by the ixor |
| 128 | int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE |
| 129 | : ARRAY_CONTAINER_TYPE_CODE); |
| 130 | return retval; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | /* Dst is a valid run container. (Can it be src_2? Let's say not.) |
| 135 | * Leaves result as run container, even if other options are |
| 136 | * smaller. |
| 137 | */ |
| 138 | |
| 139 | void array_run_container_lazy_xor(const array_container_t *src_1, |
| 140 | const run_container_t *src_2, |
| 141 | run_container_t *dst) { |
| 142 | run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); |
| 143 | int32_t rlepos = 0; |
| 144 | int32_t arraypos = 0; |
| 145 | dst->n_runs = 0; |
| 146 | |
| 147 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
| 148 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
| 149 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
| 150 | src_2->runs[rlepos].length); |
| 151 | rlepos++; |
| 152 | } else { |
| 153 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], |
| 154 | 0); |
| 155 | arraypos++; |
| 156 | } |
| 157 | } |
| 158 | while (arraypos < src_1->cardinality) { |
| 159 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); |
| 160 | arraypos++; |
| 161 | } |
| 162 | while (rlepos < src_2->n_runs) { |
| 163 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
| 164 | src_2->runs[rlepos].length); |
| 165 | rlepos++; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | /* dst does not indicate a valid container initially. Eventually it |
| 170 | * can become any kind of container. |
| 171 | */ |
| 172 | |
| 173 | int run_run_container_xor(const run_container_t *src_1, |
| 174 | const run_container_t *src_2, void **dst) { |
| 175 | run_container_t *ans = run_container_create(); |
| 176 | run_container_xor(src_1, src_2, ans); |
| 177 | uint8_t typecode_after; |
| 178 | *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); |
| 179 | return typecode_after; |
| 180 | } |
| 181 | |
| 182 | /* |
| 183 | * Java implementation (as of May 2016) for array_run, run_run |
| 184 | * and bitset_run don't do anything different for inplace. |
| 185 | * Could adopt the mixed_union.c approach instead (ie, using |
| 186 | * smart_append_exclusive) |
| 187 | * |
| 188 | */ |
| 189 | |
| 190 | bool array_array_container_xor(const array_container_t *src_1, |
| 191 | const array_container_t *src_2, void **dst) { |
| 192 | int totalCardinality = |
| 193 | src_1->cardinality + src_2->cardinality; // upper bound |
| 194 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
| 195 | *dst = array_container_create_given_capacity(totalCardinality); |
| 196 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
| 197 | return false; // not a bitset |
| 198 | } |
| 199 | *dst = bitset_container_from_array(src_1); |
| 200 | bool returnval = true; // expect a bitset |
| 201 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 202 | ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( |
| 203 | ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); |
| 204 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
| 205 | // need to convert! |
| 206 | *dst = array_container_from_bitset(ourbitset); |
| 207 | bitset_container_free(ourbitset); |
| 208 | returnval = false; // not going to be a bitset |
| 209 | } |
| 210 | |
| 211 | return returnval; |
| 212 | } |
| 213 | |
| 214 | bool array_array_container_lazy_xor(const array_container_t *src_1, |
| 215 | const array_container_t *src_2, |
| 216 | void **dst) { |
| 217 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
| 218 | // upper bound, but probably poor estimate for xor |
| 219 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
| 220 | *dst = array_container_create_given_capacity(totalCardinality); |
| 221 | if (*dst != NULL) |
| 222 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
| 223 | return false; // not a bitset |
| 224 | } |
| 225 | *dst = bitset_container_from_array(src_1); |
| 226 | bool returnval = true; // expect a bitset (maybe, for XOR??) |
| 227 | if (*dst != NULL) { |
| 228 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 229 | bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); |
| 230 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 231 | } |
| 232 | return returnval; |
| 233 | } |
| 234 | |
| 235 | /* Compute the xor of src_1 and src_2 and write the result to |
| 236 | * dst (which has no container initially). Return value is |
| 237 | * "dst is a bitset" |
| 238 | */ |
| 239 | |
| 240 | bool bitset_bitset_container_xor(const bitset_container_t *src_1, |
| 241 | const bitset_container_t *src_2, void **dst) { |
| 242 | bitset_container_t *ans = bitset_container_create(); |
| 243 | int card = bitset_container_xor(src_1, src_2, ans); |
| 244 | if (card <= DEFAULT_MAX_SIZE) { |
| 245 | *dst = array_container_from_bitset(ans); |
| 246 | bitset_container_free(ans); |
| 247 | return false; // not bitset |
| 248 | } else { |
| 249 | *dst = ans; |
| 250 | return true; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | /* Compute the xor of src_1 and src_2 and write the result to |
| 255 | * dst (which has no container initially). It will modify src_1 |
| 256 | * to be dst if the result is a bitset. Otherwise, it will |
| 257 | * free src_1 and dst will be a new array container. In both |
| 258 | * cases, the caller is responsible for deallocating dst. |
| 259 | * Returns true iff dst is a bitset */ |
| 260 | |
| 261 | bool bitset_array_container_ixor(bitset_container_t *src_1, |
| 262 | const array_container_t *src_2, void **dst) { |
| 263 | *dst = src_1; |
| 264 | src_1->cardinality = (uint32_t)bitset_flip_list_withcard( |
| 265 | src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); |
| 266 | |
| 267 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
| 268 | *dst = array_container_from_bitset(src_1); |
| 269 | bitset_container_free(src_1); |
| 270 | return false; // not bitset |
| 271 | } else |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | /* a bunch of in-place, some of which may not *really* be inplace. |
| 276 | * TODO: write actual inplace routine if efficiency warrants it |
| 277 | * Anything inplace with a bitset is a good candidate |
| 278 | */ |
| 279 | |
| 280 | bool bitset_bitset_container_ixor(bitset_container_t *src_1, |
| 281 | const bitset_container_t *src_2, void **dst) { |
| 282 | bool ans = bitset_bitset_container_xor(src_1, src_2, dst); |
| 283 | bitset_container_free(src_1); |
| 284 | return ans; |
| 285 | } |
| 286 | |
| 287 | bool array_bitset_container_ixor(array_container_t *src_1, |
| 288 | const bitset_container_t *src_2, void **dst) { |
| 289 | bool ans = array_bitset_container_xor(src_1, src_2, dst); |
| 290 | array_container_free(src_1); |
| 291 | return ans; |
| 292 | } |
| 293 | |
| 294 | /* Compute the xor of src_1 and src_2 and write the result to |
| 295 | * dst. Result may be either a bitset or an array container |
| 296 | * (returns "result is bitset"). dst does not initially have |
| 297 | * any container, but becomes either a bitset container (return |
| 298 | * result true) or an array container. |
| 299 | */ |
| 300 | |
| 301 | bool run_bitset_container_ixor(run_container_t *src_1, |
| 302 | const bitset_container_t *src_2, void **dst) { |
| 303 | bool ans = run_bitset_container_xor(src_1, src_2, dst); |
| 304 | run_container_free(src_1); |
| 305 | return ans; |
| 306 | } |
| 307 | |
| 308 | bool bitset_run_container_ixor(bitset_container_t *src_1, |
| 309 | const run_container_t *src_2, void **dst) { |
| 310 | bool ans = run_bitset_container_xor(src_2, src_1, dst); |
| 311 | bitset_container_free(src_1); |
| 312 | return ans; |
| 313 | } |
| 314 | |
| 315 | /* dst does not indicate a valid container initially. Eventually it |
| 316 | * can become any kind of container. |
| 317 | */ |
| 318 | |
| 319 | int array_run_container_ixor(array_container_t *src_1, |
| 320 | const run_container_t *src_2, void **dst) { |
| 321 | int ans = array_run_container_xor(src_1, src_2, dst); |
| 322 | array_container_free(src_1); |
| 323 | return ans; |
| 324 | } |
| 325 | |
| 326 | int run_array_container_ixor(run_container_t *src_1, |
| 327 | const array_container_t *src_2, void **dst) { |
| 328 | int ans = array_run_container_xor(src_2, src_1, dst); |
| 329 | run_container_free(src_1); |
| 330 | return ans; |
| 331 | } |
| 332 | |
| 333 | bool array_array_container_ixor(array_container_t *src_1, |
| 334 | const array_container_t *src_2, void **dst) { |
| 335 | bool ans = array_array_container_xor(src_1, src_2, dst); |
| 336 | array_container_free(src_1); |
| 337 | return ans; |
| 338 | } |
| 339 | |
| 340 | int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, |
| 341 | void **dst) { |
| 342 | int ans = run_run_container_xor(src_1, src_2, dst); |
| 343 | run_container_free(src_1); |
| 344 | return ans; |
| 345 | } |
| 346 | |