1#include <stdio.h>
2
3#include <roaring/bitset_util.h>
4#include <roaring/containers/containers.h>
5#include <roaring/containers/convert.h>
6#include <roaring/containers/perfparameters.h>
7
8// file contains grubby stuff that must know impl. details of all container
9// types.
10bitset_container_t *bitset_container_from_array(const array_container_t *a) {
11 bitset_container_t *ans = bitset_container_create();
12 int limit = array_container_cardinality(a);
13 for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]);
14 return ans;
15}
16
17bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
18 int card = run_container_cardinality(arr);
19 bitset_container_t *answer = bitset_container_create();
20 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
21 rle16_t vl = arr->runs[rlepos];
22 bitset_set_lenrange(answer->array, vl.value, vl.length);
23 }
24 answer->cardinality = card;
25 return answer;
26}
27
28array_container_t *array_container_from_run(const run_container_t *arr) {
29 array_container_t *answer =
30 array_container_create_given_capacity(run_container_cardinality(arr));
31 answer->cardinality = 0;
32 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
33 int run_start = arr->runs[rlepos].value;
34 int run_end = run_start + arr->runs[rlepos].length;
35
36 for (int run_value = run_start; run_value <= run_end; ++run_value) {
37 answer->array[answer->cardinality++] = (uint16_t)run_value;
38 }
39 }
40 return answer;
41}
42
43array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
44 array_container_t *result =
45 array_container_create_given_capacity(bits->cardinality);
46 result->cardinality = bits->cardinality;
47 // sse version ends up being slower here
48 // (bitset_extract_setbits_sse_uint16)
49 // because of the sparsity of the data
50 bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS,
51 result->array, 0);
52 return result;
53}
54
55/* assumes that container has adequate space. Run from [s,e] (inclusive) */
56static void add_run(run_container_t *r, int s, int e) {
57 r->runs[r->n_runs].value = s;
58 r->runs[r->n_runs].length = e - s;
59 r->n_runs++;
60}
61
62run_container_t *run_container_from_array(const array_container_t *c) {
63 int32_t n_runs = array_container_number_of_runs(c);
64 run_container_t *answer = run_container_create_given_capacity(n_runs);
65 int prev = -2;
66 int run_start = -1;
67 int32_t card = c->cardinality;
68 if (card == 0) return answer;
69 for (int i = 0; i < card; ++i) {
70 const uint16_t cur_val = c->array[i];
71 if (cur_val != prev + 1) {
72 // new run starts; flush old one, if any
73 if (run_start != -1) add_run(answer, run_start, prev);
74 run_start = cur_val;
75 }
76 prev = c->array[i];
77 }
78 // now prev is the last seen value
79 add_run(answer, run_start, prev);
80 // assert(run_container_cardinality(answer) == c->cardinality);
81 return answer;
82}
83
84/**
85 * Convert the runcontainer to either a Bitmap or an Array Container, depending
86 * on the cardinality. Frees the container.
87 * Allocates and returns new container, which caller is responsible for freeing
88 */
89
90void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card,
91 uint8_t *resulttype) {
92 if (card <= DEFAULT_MAX_SIZE) {
93 array_container_t *answer = array_container_create_given_capacity(card);
94 answer->cardinality = 0;
95 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
96 uint16_t run_start = r->runs[rlepos].value;
97 uint16_t run_end = run_start + r->runs[rlepos].length;
98 for (uint16_t run_value = run_start; run_value <= run_end;
99 ++run_value) {
100 answer->array[answer->cardinality++] = run_value;
101 }
102 }
103 assert(card == answer->cardinality);
104 *resulttype = ARRAY_CONTAINER_TYPE_CODE;
105 run_container_free(r);
106 return answer;
107 }
108 bitset_container_t *answer = bitset_container_create();
109 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
110 uint16_t run_start = r->runs[rlepos].value;
111 bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length);
112 }
113 answer->cardinality = card;
114 *resulttype = BITSET_CONTAINER_TYPE_CODE;
115 run_container_free(r);
116 return answer;
117}
118
119/* Converts a run container to either an array or a bitset, IF it saves space.
120 */
121/* If a conversion occurs, the caller is responsible to free the original
122 * container and
123 * he becomes responsible to free the new one. */
124void *convert_run_to_efficient_container(run_container_t *c,
125 uint8_t *typecode_after) {
126 int32_t size_as_run_container =
127 run_container_serialized_size_in_bytes(c->n_runs);
128
129 int32_t size_as_bitset_container =
130 bitset_container_serialized_size_in_bytes();
131 int32_t card = run_container_cardinality(c);
132 int32_t size_as_array_container =
133 array_container_serialized_size_in_bytes(card);
134
135 int32_t min_size_non_run =
136 size_as_bitset_container < size_as_array_container
137 ? size_as_bitset_container
138 : size_as_array_container;
139 if (size_as_run_container <= min_size_non_run) { // no conversion
140 *typecode_after = RUN_CONTAINER_TYPE_CODE;
141 return c;
142 }
143 if (card <= DEFAULT_MAX_SIZE) {
144 // to array
145 array_container_t *answer = array_container_create_given_capacity(card);
146 answer->cardinality = 0;
147 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
148 int run_start = c->runs[rlepos].value;
149 int run_end = run_start + c->runs[rlepos].length;
150
151 for (int run_value = run_start; run_value <= run_end; ++run_value) {
152 answer->array[answer->cardinality++] = (uint16_t)run_value;
153 }
154 }
155 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
156 return answer;
157 }
158
159 // else to bitset
160 bitset_container_t *answer = bitset_container_create();
161
162 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
163 int start = c->runs[rlepos].value;
164 int end = start + c->runs[rlepos].length;
165 bitset_set_range(answer->array, start, end + 1);
166 }
167 answer->cardinality = card;
168 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
169 return answer;
170}
171
172// like convert_run_to_efficient_container but frees the old result if needed
173void *convert_run_to_efficient_container_and_free(run_container_t *c,
174 uint8_t *typecode_after) {
175 void *answer = convert_run_to_efficient_container(c, typecode_after);
176 if (answer != c) run_container_free(c);
177 return answer;
178}
179
180/* once converted, the original container is disposed here, rather than
181 in roaring_array
182*/
183
184// TODO: split into run- array- and bitset- subfunctions for sanity;
185// a few function calls won't really matter.
186
187void *convert_run_optimize(void *c, uint8_t typecode_original,
188 uint8_t *typecode_after) {
189 if (typecode_original == RUN_CONTAINER_TYPE_CODE) {
190 void *newc = convert_run_to_efficient_container((run_container_t *)c,
191 typecode_after);
192 if (newc != c) {
193 container_free(c, typecode_original);
194 }
195 return newc;
196 } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) {
197 // it might need to be converted to a run container.
198 array_container_t *c_qua_array = (array_container_t *)c;
199 int32_t n_runs = array_container_number_of_runs(c_qua_array);
200 int32_t size_as_run_container =
201 run_container_serialized_size_in_bytes(n_runs);
202 int32_t card = array_container_cardinality(c_qua_array);
203 int32_t size_as_array_container =
204 array_container_serialized_size_in_bytes(card);
205
206 if (size_as_run_container >= size_as_array_container) {
207 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
208 return c;
209 }
210 // else convert array to run container
211 run_container_t *answer = run_container_create_given_capacity(n_runs);
212 int prev = -2;
213 int run_start = -1;
214
215 assert(card > 0);
216 for (int i = 0; i < card; ++i) {
217 uint16_t cur_val = c_qua_array->array[i];
218 if (cur_val != prev + 1) {
219 // new run starts; flush old one, if any
220 if (run_start != -1) add_run(answer, run_start, prev);
221 run_start = cur_val;
222 }
223 prev = c_qua_array->array[i];
224 }
225 assert(run_start >= 0);
226 // now prev is the last seen value
227 add_run(answer, run_start, prev);
228 *typecode_after = RUN_CONTAINER_TYPE_CODE;
229 array_container_free(c_qua_array);
230 return answer;
231 } else if (typecode_original ==
232 BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset
233 // does bitset need conversion to run?
234 bitset_container_t *c_qua_bitset = (bitset_container_t *)c;
235 int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
236 int32_t size_as_run_container =
237 run_container_serialized_size_in_bytes(n_runs);
238 int32_t size_as_bitset_container =
239 bitset_container_serialized_size_in_bytes();
240
241 if (size_as_bitset_container <= size_as_run_container) {
242 // no conversion needed.
243 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
244 return c;
245 }
246 // bitset to runcontainer (ported from Java RunContainer(
247 // BitmapContainer bc, int nbrRuns))
248 assert(n_runs > 0); // no empty bitmaps
249 run_container_t *answer = run_container_create_given_capacity(n_runs);
250
251 int long_ctr = 0;
252 uint64_t cur_word = c_qua_bitset->array[0];
253 int run_count = 0;
254 while (true) {
255 while (cur_word == UINT64_C(0) &&
256 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
257 cur_word = c_qua_bitset->array[++long_ctr];
258
259 if (cur_word == UINT64_C(0)) {
260 bitset_container_free(c_qua_bitset);
261 *typecode_after = RUN_CONTAINER_TYPE_CODE;
262 return answer;
263 }
264
265 int local_run_start = __builtin_ctzll(cur_word);
266 int run_start = local_run_start + 64 * long_ctr;
267 uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
268
269 int run_end = 0;
270 while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
271 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
272 cur_word_with_1s = c_qua_bitset->array[++long_ctr];
273
274 if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
275 run_end = 64 + long_ctr * 64; // exclusive, I guess
276 add_run(answer, run_start, run_end - 1);
277 bitset_container_free(c_qua_bitset);
278 *typecode_after = RUN_CONTAINER_TYPE_CODE;
279 return answer;
280 }
281 int local_run_end = __builtin_ctzll(~cur_word_with_1s);
282 run_end = local_run_end + long_ctr * 64;
283 add_run(answer, run_start, run_end - 1);
284 run_count++;
285 cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
286 }
287 return answer;
288 } else {
289 assert(false);
290 __builtin_unreachable();
291 return NULL;
292 }
293}
294
295bitset_container_t *bitset_container_from_run_range(const run_container_t *run,
296 uint32_t min, uint32_t max) {
297 bitset_container_t *bitset = bitset_container_create();
298 int32_t union_cardinality = 0;
299 for (int32_t i = 0; i < run->n_runs; ++i) {
300 uint32_t rle_min = run->runs[i].value;
301 uint32_t rle_max = rle_min + run->runs[i].length;
302 bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min);
303 union_cardinality += run->runs[i].length + 1;
304 }
305 union_cardinality += max - min + 1;
306 union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min);
307 bitset_set_lenrange(bitset->array, min, max - min);
308 bitset->cardinality = union_cardinality;
309 return bitset;
310}
311