| 1 | /* |
| 2 | * mixed_union.c |
| 3 | * |
| 4 | */ |
| 5 | |
| 6 | #include <assert.h> |
| 7 | #include <string.h> |
| 8 | |
| 9 | #include <roaring/bitset_util.h> |
| 10 | #include <roaring/containers/convert.h> |
| 11 | #include <roaring/containers/mixed_union.h> |
| 12 | #include <roaring/containers/perfparameters.h> |
| 13 | |
| 14 | /* Compute the union of src_1 and src_2 and write the result to |
| 15 | * dst. */ |
| 16 | void array_bitset_container_union(const array_container_t *src_1, |
| 17 | const bitset_container_t *src_2, |
| 18 | bitset_container_t *dst) { |
| 19 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 20 | dst->cardinality = (int32_t)bitset_set_list_withcard( |
| 21 | dst->array, dst->cardinality, src_1->array, src_1->cardinality); |
| 22 | } |
| 23 | |
| 24 | /* Compute the union of src_1 and src_2 and write the result to |
| 25 | * dst. It is allowed for src_2 to be dst. This version does not |
| 26 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ |
| 27 | void array_bitset_container_lazy_union(const array_container_t *src_1, |
| 28 | const bitset_container_t *src_2, |
| 29 | bitset_container_t *dst) { |
| 30 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 31 | bitset_set_list(dst->array, src_1->array, src_1->cardinality); |
| 32 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 33 | } |
| 34 | |
| 35 | void run_bitset_container_union(const run_container_t *src_1, |
| 36 | const bitset_container_t *src_2, |
| 37 | bitset_container_t *dst) { |
| 38 | assert(!run_container_is_full(src_1)); // catch this case upstream |
| 39 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 40 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
| 41 | rle16_t rle = src_1->runs[rlepos]; |
| 42 | bitset_set_lenrange(dst->array, rle.value, rle.length); |
| 43 | } |
| 44 | dst->cardinality = bitset_container_compute_cardinality(dst); |
| 45 | } |
| 46 | |
| 47 | void run_bitset_container_lazy_union(const run_container_t *src_1, |
| 48 | const bitset_container_t *src_2, |
| 49 | bitset_container_t *dst) { |
| 50 | assert(!run_container_is_full(src_1)); // catch this case upstream |
| 51 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
| 52 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
| 53 | rle16_t rle = src_1->runs[rlepos]; |
| 54 | bitset_set_lenrange(dst->array, rle.value, rle.length); |
| 55 | } |
| 56 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 57 | } |
| 58 | |
| 59 | // why do we leave the result as a run container?? |
| 60 | void array_run_container_union(const array_container_t *src_1, |
| 61 | const run_container_t *src_2, |
| 62 | run_container_t *dst) { |
| 63 | if (run_container_is_full(src_2)) { |
| 64 | run_container_copy(src_2, dst); |
| 65 | return; |
| 66 | } |
| 67 | // TODO: see whether the "2*" is spurious |
| 68 | run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); |
| 69 | int32_t rlepos = 0; |
| 70 | int32_t arraypos = 0; |
| 71 | rle16_t previousrle; |
| 72 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
| 73 | previousrle = run_container_append_first(dst, src_2->runs[rlepos]); |
| 74 | rlepos++; |
| 75 | } else { |
| 76 | previousrle = |
| 77 | run_container_append_value_first(dst, src_1->array[arraypos]); |
| 78 | arraypos++; |
| 79 | } |
| 80 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
| 81 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
| 82 | run_container_append(dst, src_2->runs[rlepos], &previousrle); |
| 83 | rlepos++; |
| 84 | } else { |
| 85 | run_container_append_value(dst, src_1->array[arraypos], |
| 86 | &previousrle); |
| 87 | arraypos++; |
| 88 | } |
| 89 | } |
| 90 | if (arraypos < src_1->cardinality) { |
| 91 | while (arraypos < src_1->cardinality) { |
| 92 | run_container_append_value(dst, src_1->array[arraypos], |
| 93 | &previousrle); |
| 94 | arraypos++; |
| 95 | } |
| 96 | } else { |
| 97 | while (rlepos < src_2->n_runs) { |
| 98 | run_container_append(dst, src_2->runs[rlepos], &previousrle); |
| 99 | rlepos++; |
| 100 | } |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | void array_run_container_inplace_union(const array_container_t *src_1, |
| 105 | run_container_t *src_2) { |
| 106 | if (run_container_is_full(src_2)) { |
| 107 | return; |
| 108 | } |
| 109 | const int32_t maxoutput = src_1->cardinality + src_2->n_runs; |
| 110 | const int32_t neededcapacity = maxoutput + src_2->n_runs; |
| 111 | if (src_2->capacity < neededcapacity) |
| 112 | run_container_grow(src_2, neededcapacity, true); |
| 113 | memmove(src_2->runs + maxoutput, src_2->runs, |
| 114 | src_2->n_runs * sizeof(rle16_t)); |
| 115 | rle16_t *inputsrc2 = src_2->runs + maxoutput; |
| 116 | int32_t rlepos = 0; |
| 117 | int32_t arraypos = 0; |
| 118 | int src2nruns = src_2->n_runs; |
| 119 | src_2->n_runs = 0; |
| 120 | |
| 121 | rle16_t previousrle; |
| 122 | |
| 123 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
| 124 | previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); |
| 125 | rlepos++; |
| 126 | } else { |
| 127 | previousrle = |
| 128 | run_container_append_value_first(src_2, src_1->array[arraypos]); |
| 129 | arraypos++; |
| 130 | } |
| 131 | |
| 132 | while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { |
| 133 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
| 134 | run_container_append(src_2, inputsrc2[rlepos], &previousrle); |
| 135 | rlepos++; |
| 136 | } else { |
| 137 | run_container_append_value(src_2, src_1->array[arraypos], |
| 138 | &previousrle); |
| 139 | arraypos++; |
| 140 | } |
| 141 | } |
| 142 | if (arraypos < src_1->cardinality) { |
| 143 | while (arraypos < src_1->cardinality) { |
| 144 | run_container_append_value(src_2, src_1->array[arraypos], |
| 145 | &previousrle); |
| 146 | arraypos++; |
| 147 | } |
| 148 | } else { |
| 149 | while (rlepos < src2nruns) { |
| 150 | run_container_append(src_2, inputsrc2[rlepos], &previousrle); |
| 151 | rlepos++; |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | bool array_array_container_union(const array_container_t *src_1, |
| 157 | const array_container_t *src_2, void **dst) { |
| 158 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
| 159 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
| 160 | *dst = array_container_create_given_capacity(totalCardinality); |
| 161 | if (*dst != NULL) { |
| 162 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
| 163 | } else { |
| 164 | return true; // otherwise failure won't be caught |
| 165 | } |
| 166 | return false; // not a bitset |
| 167 | } |
| 168 | *dst = bitset_container_create(); |
| 169 | bool returnval = true; // expect a bitset |
| 170 | if (*dst != NULL) { |
| 171 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 172 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
| 173 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
| 174 | ourbitset->array, src_1->cardinality, src_2->array, |
| 175 | src_2->cardinality); |
| 176 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
| 177 | // need to convert! |
| 178 | *dst = array_container_from_bitset(ourbitset); |
| 179 | bitset_container_free(ourbitset); |
| 180 | returnval = false; // not going to be a bitset |
| 181 | } |
| 182 | } |
| 183 | return returnval; |
| 184 | } |
| 185 | |
| 186 | bool array_array_container_inplace_union(array_container_t *src_1, |
| 187 | const array_container_t *src_2, void **dst) { |
| 188 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
| 189 | *dst = NULL; |
| 190 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
| 191 | if(src_1->capacity < totalCardinality) { |
| 192 | *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous |
| 193 | if (*dst != NULL) { |
| 194 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
| 195 | } else { |
| 196 | return true; // otherwise failure won't be caught |
| 197 | } |
| 198 | return false; // not a bitset |
| 199 | } else { |
| 200 | memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); |
| 201 | src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, |
| 202 | src_2->array, src_2->cardinality, src_1->array); |
| 203 | return false; // not a bitset |
| 204 | } |
| 205 | } |
| 206 | *dst = bitset_container_create(); |
| 207 | bool returnval = true; // expect a bitset |
| 208 | if (*dst != NULL) { |
| 209 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 210 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
| 211 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
| 212 | ourbitset->array, src_1->cardinality, src_2->array, |
| 213 | src_2->cardinality); |
| 214 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
| 215 | // need to convert! |
| 216 | if(src_1->capacity < ourbitset->cardinality) { |
| 217 | array_container_grow(src_1, ourbitset->cardinality, false); |
| 218 | } |
| 219 | |
| 220 | bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, |
| 221 | src_1->array, 0); |
| 222 | src_1->cardinality = ourbitset->cardinality; |
| 223 | *dst = src_1; |
| 224 | bitset_container_free(ourbitset); |
| 225 | returnval = false; // not going to be a bitset |
| 226 | } |
| 227 | } |
| 228 | return returnval; |
| 229 | } |
| 230 | |
| 231 | |
| 232 | bool array_array_container_lazy_union(const array_container_t *src_1, |
| 233 | const array_container_t *src_2, |
| 234 | void **dst) { |
| 235 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
| 236 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
| 237 | *dst = array_container_create_given_capacity(totalCardinality); |
| 238 | if (*dst != NULL) { |
| 239 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
| 240 | } else { |
| 241 | return true; // otherwise failure won't be caught |
| 242 | } |
| 243 | return false; // not a bitset |
| 244 | } |
| 245 | *dst = bitset_container_create(); |
| 246 | bool returnval = true; // expect a bitset |
| 247 | if (*dst != NULL) { |
| 248 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 249 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
| 250 | bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); |
| 251 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 252 | } |
| 253 | return returnval; |
| 254 | } |
| 255 | |
| 256 | |
| 257 | bool array_array_container_lazy_inplace_union(array_container_t *src_1, |
| 258 | const array_container_t *src_2, |
| 259 | void **dst) { |
| 260 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
| 261 | *dst = NULL; |
| 262 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
| 263 | if(src_1->capacity < totalCardinality) { |
| 264 | *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous |
| 265 | if (*dst != NULL) { |
| 266 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
| 267 | } else { |
| 268 | return true; // otherwise failure won't be caught |
| 269 | } |
| 270 | return false; // not a bitset |
| 271 | } else { |
| 272 | memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); |
| 273 | src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, |
| 274 | src_2->array, src_2->cardinality, src_1->array); |
| 275 | return false; // not a bitset |
| 276 | } |
| 277 | } |
| 278 | *dst = bitset_container_create(); |
| 279 | bool returnval = true; // expect a bitset |
| 280 | if (*dst != NULL) { |
| 281 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
| 282 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
| 283 | bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); |
| 284 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
| 285 | } |
| 286 | return returnval; |
| 287 | } |
| 288 | |