1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Multibit: fast bitset structure, main runtime.
31 *
32 * *Structure*
33 *
34 * For sizes <= MMB_FLAT_MAX_BITS, a flat bit vector is used, stored as N
35 * 64-bit blocks followed by one "runt block".
36 *
37 * In larger cases, we use a sequence of blocks forming a tree. Each bit in an
38 * internal block indicates whether its child block contains valid data. Every
39 * level bar the last is complete. The last level is just a basic bit vector.
40 *
41 * -----------------------------------------------------------------------------
42 * WARNING:
43 *
44 * mmbit code assumes that it is legal to load 8 bytes before the end of the
45 * mmbit. This means that for small mmbits (< 8byte), data may be read from
46 * before the base pointer. It is the user's responsibility to ensure that this
47 * is possible.
48 * -----------------------------------------------------------------------------
49 */
50#ifndef MULTIBIT_H
51#define MULTIBIT_H
52
53#include "config.h"
54#include "ue2common.h"
55#include "bitutils.h"
56#include "partial_store.h"
57#include "unaligned.h"
58#include "multibit_internal.h"
59
60#include <string.h>
61
62#ifdef __cplusplus
63extern "C" {
64#endif
65
66#define MMB_ONE (1ULL)
67#define MMB_ALL_ONES (0xffffffffffffffffULL)
68
69/** \brief Number of bits in a block. */
70#define MMB_KEY_BITS (sizeof(MMB_TYPE) * 8)
71
72#define MMB_KEY_MASK (MMB_KEY_BITS - 1)
73
74// Key structure defines
75#define MMB_KEY_SHIFT 6
76
77/** \brief Max size of a flat multibit. */
78#define MMB_FLAT_MAX_BITS 256
79
80// Utility functions and data
81// see multibit.c for contents
82extern const u8 mmbit_keyshift_lut[32];
83extern const u8 mmbit_maxlevel_from_keyshift_lut[32];
84extern const u8 mmbit_maxlevel_direct_lut[32];
85extern const u32 mmbit_root_offset_from_level[7];
86extern const u64a mmbit_zero_to_lut[65];
87
88static really_inline
89MMB_TYPE mmb_load(const u8 * bits) {
90 return unaligned_load_u64a(bits);
91}
92
93static really_inline
94void mmb_store(u8 *bits, MMB_TYPE val) {
95 unaligned_store_u64a(bits, val);
96}
97
98static really_inline
99void mmb_store_partial(u8 *bits, MMB_TYPE val, u32 block_bits) {
100 assert(block_bits <= MMB_KEY_BITS);
101 partial_store_u64a(bits, val, ROUNDUP_N(block_bits, 8U) / 8U);
102}
103
104static really_inline
105MMB_TYPE mmb_single_bit(u32 bit) {
106 assert(bit < MMB_KEY_BITS);
107 return MMB_ONE << bit;
108}
109
110static really_inline
111MMB_TYPE mmb_mask_zero_to(u32 bit) {
112 assert(bit <= MMB_KEY_BITS);
113#ifdef ARCH_32_BIT
114 return mmbit_zero_to_lut[bit];
115#else
116 if (bit == MMB_KEY_BITS) {
117 return MMB_ALL_ONES;
118 } else {
119 return mmb_single_bit(bit) - MMB_ONE;
120 }
121#endif
122}
123
124/** \brief Returns a mask of set bits up to position \a bit. Does not handle
125 * the case where bit == MMB_KEY_BITS. */
126static really_inline
127MMB_TYPE mmb_mask_zero_to_nocheck(u32 bit) {
128 assert(bit < MMB_KEY_BITS);
129#ifdef ARCH_32_BIT
130 return mmbit_zero_to_lut[bit];
131#else
132 return mmb_single_bit(bit) - MMB_ONE;
133#endif
134}
135
136static really_inline
137u32 mmb_test(MMB_TYPE val, u32 bit) {
138 assert(bit < MMB_KEY_BITS);
139 return (val >> bit) & MMB_ONE;
140}
141
142static really_inline
143void mmb_set(MMB_TYPE * val, u32 bit) {
144 assert(bit < MMB_KEY_BITS);
145 *val |= mmb_single_bit(bit);
146}
147
148static really_inline
149void mmb_clear(MMB_TYPE * val, u32 bit) {
150 assert(bit < MMB_KEY_BITS);
151 *val &= ~mmb_single_bit(bit);
152}
153
154static really_inline
155u32 mmb_ctz(MMB_TYPE val) {
156 return ctz64(val);
157}
158
159static really_inline
160u32 mmb_popcount(MMB_TYPE val) {
161 return popcount64(val);
162}
163
164#ifndef MMMB_DEBUG
165#define MDEBUG_PRINTF(x, ...) do { } while(0)
166#else
167#define MDEBUG_PRINTF DEBUG_PRINTF
168#endif
169
170// Switch the following define on to trace writes to multibit.
171//#define MMB_TRACE_WRITES
172#ifdef MMB_TRACE_WRITES
173#define MMB_TRACE(format, ...) \
174 printf("mmb [%u bits @ %p] " format, total_bits, bits, ##__VA_ARGS__)
175#else
176#define MMB_TRACE(format, ...) \
177 do { \
178 } while (0)
179#endif
180
181static really_inline
182u32 mmbit_keyshift(u32 total_bits) {
183 assert(total_bits > 1);
184 u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
185 return mmbit_keyshift_lut[n];
186}
187
188static really_inline
189u32 mmbit_maxlevel(u32 total_bits) {
190 assert(total_bits > 1);
191 u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
192 u32 max_level = mmbit_maxlevel_direct_lut[n];
193 assert(max_level <= MMB_MAX_LEVEL);
194 return max_level;
195}
196
197static really_inline
198u32 mmbit_maxlevel_from_keyshift(u32 ks) {
199 assert(ks <= 30);
200 assert(ks % MMB_KEY_SHIFT == 0);
201
202 u32 max_level = mmbit_maxlevel_from_keyshift_lut[ks];
203 assert(max_level <= MMB_MAX_LEVEL);
204 return max_level;
205}
206
207/** \brief get our keyshift for the current level */
208static really_inline
209u32 mmbit_get_ks(u32 max_level, u32 level) {
210 assert(max_level <= MMB_MAX_LEVEL);
211 assert(level <= max_level);
212 return (max_level - level) * MMB_KEY_SHIFT;
213}
214
215/** \brief get our key value for the current level */
216static really_inline
217u32 mmbit_get_key_val(u32 max_level, u32 level, u32 key) {
218 return (key >> mmbit_get_ks(max_level, level)) & MMB_KEY_MASK;
219}
220
221/** \brief get the level root for the current level */
222static really_inline
223u8 *mmbit_get_level_root(u8 *bits, u32 level) {
224 assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
225 return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
226}
227
228/** \brief get the level root for the current level as const */
229static really_inline
230const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) {
231 assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
232 return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
233}
234
235/** \brief get the block for this key on the current level as a u8 ptr */
236static really_inline
237u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
238 u8 *level_root = mmbit_get_level_root(bits, level);
239 u32 ks = mmbit_get_ks(max_level, level);
240 return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
241}
242
243/** \brief get the block for this key on the current level as a const u8 ptr */
244static really_inline
245const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
246 u32 key) {
247 const u8 *level_root = mmbit_get_level_root_const(bits, level);
248 u32 ks = mmbit_get_ks(max_level, level);
249 return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
250}
251
252/** \brief get the _byte_ for this key on the current level as a u8 ptr */
253static really_inline
254u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
255 u8 *level_root = mmbit_get_level_root(bits, level);
256 u32 ks = mmbit_get_ks(max_level, level);
257 return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3));
258}
259
260/** \brief get our key value for the current level */
261static really_inline
262u32 mmbit_get_key_val_byte(u32 max_level, u32 level, u32 key) {
263 return (key >> (mmbit_get_ks(max_level, level))) & 0x7;
264}
265
266/** \brief Load a flat bitvector block corresponding to N bits. */
267static really_inline
268MMB_TYPE mmbit_get_flat_block(const u8 *bits, u32 n_bits) {
269 assert(n_bits <= MMB_KEY_BITS);
270 u32 n_bytes = ROUNDUP_N(n_bits, 8) / 8;
271 switch (n_bytes) {
272 case 1:
273 return *bits;
274 case 2:
275 return unaligned_load_u16(bits);
276 case 3:
277 case 4: {
278 u32 rv;
279 assert(n_bytes <= sizeof(rv));
280 memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
281 rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
282 * the right position and remove
283 * junk */
284 assert(rv == partial_load_u32(bits, n_bytes));
285 return rv;
286 }
287 default: {
288 u64a rv;
289 assert(n_bytes <= sizeof(rv));
290 memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
291 rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
292 * the right position and remove
293 * junk */
294 assert(rv == partial_load_u64a(bits, n_bytes));
295 return rv;
296 }
297 }
298}
299
300/** \brief True if this multibit is small enough to use a flat model */
301static really_inline
302u32 mmbit_is_flat_model(u32 total_bits) {
303 return total_bits <= MMB_FLAT_MAX_BITS;
304}
305
306static really_inline
307u32 mmbit_flat_size(u32 total_bits) {
308 assert(mmbit_is_flat_model(total_bits));
309 return ROUNDUP_N(total_bits, 8) / 8;
310}
311
312static really_inline
313u32 mmbit_flat_select_byte(u32 key, UNUSED u32 total_bits) {
314 return key / 8;
315}
316
317/** \brief returns the dense index of the bit in the given mask. */
318static really_inline
319u32 mmbit_mask_index(u32 bit, MMB_TYPE mask) {
320 assert(bit < MMB_KEY_BITS);
321 assert(mmb_test(mask, bit));
322
323 mask &= mmb_mask_zero_to(bit);
324 if (mask == 0ULL) {
325 return 0; // Common case.
326 }
327 return mmb_popcount(mask);
328}
329
330/** \brief Clear all bits. */
331static really_inline
332void mmbit_clear(u8 *bits, u32 total_bits) {
333 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
334 MMB_TRACE("CLEAR\n");
335 if (!total_bits) {
336 return;
337 }
338 if (mmbit_is_flat_model(total_bits)) {
339 memset(bits, 0, mmbit_flat_size(total_bits));
340 return;
341 }
342 mmb_store(bits, 0);
343}
344
345/** \brief Specialisation of \ref mmbit_set for flat models. */
346static really_inline
347char mmbit_set_flat(u8 *bits, u32 total_bits, u32 key) {
348 bits += mmbit_flat_select_byte(key, total_bits);
349 u8 mask = 1U << (key % 8);
350 char was_set = !!(*bits & mask);
351 *bits |= mask;
352 return was_set;
353}
354
355static really_inline
356char mmbit_set_big(u8 *bits, u32 total_bits, u32 key) {
357 const u32 max_level = mmbit_maxlevel(total_bits);
358 u32 level = 0;
359 do {
360 u8 * byte_ptr = mmbit_get_byte_ptr(bits, max_level, level, key);
361 u8 keymask = 1U << mmbit_get_key_val_byte(max_level, level, key);
362 u8 byte = *byte_ptr;
363 if (likely(!(byte & keymask))) {
364 *byte_ptr = byte | keymask;
365 while (level++ != max_level) {
366 u8 *block_ptr_1 = mmbit_get_block_ptr(bits, max_level, level, key);
367 MMB_TYPE keymask_1 = mmb_single_bit(mmbit_get_key_val(max_level, level, key));
368 mmb_store(block_ptr_1, keymask_1);
369 }
370 return 0;
371 }
372 } while (level++ != max_level);
373 return 1;
374}
375
376/** Internal version of \ref mmbit_set without MMB_TRACE, so it can be used by
377 * \ref mmbit_sparse_iter_dump. */
378static really_inline
379char mmbit_set_i(u8 *bits, u32 total_bits, u32 key) {
380 assert(key < total_bits);
381 if (mmbit_is_flat_model(total_bits)) {
382 return mmbit_set_flat(bits, total_bits, key);
383 } else {
384 return mmbit_set_big(bits, total_bits, key);
385 }
386}
387
388static really_inline
389char mmbit_isset(const u8 *bits, u32 total_bits, u32 key);
390
391/** \brief Sets the given key in the multibit. Returns 0 if the key was NOT
392 * already set, 1 otherwise. */
393static really_inline
394char mmbit_set(u8 *bits, u32 total_bits, u32 key) {
395 MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
396 char status = mmbit_set_i(bits, total_bits, key);
397 MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status);
398 assert(mmbit_isset(bits, total_bits, key));
399 return status;
400}
401
402/** \brief Specialisation of \ref mmbit_isset for flat models. */
403static really_inline
404char mmbit_isset_flat(const u8 *bits, u32 total_bits, u32 key) {
405 bits += mmbit_flat_select_byte(key, total_bits);
406 return !!(*bits & (1U << (key % 8U)));
407}
408
409static really_inline
410char mmbit_isset_big(const u8 *bits, u32 total_bits, u32 key) {
411 const u32 max_level = mmbit_maxlevel(total_bits);
412 u32 level = 0;
413 do {
414 const u8 *block_ptr = mmbit_get_block_ptr_const(bits, max_level, level, key);
415 MMB_TYPE block = mmb_load(block_ptr);
416 if (!mmb_test(block, mmbit_get_key_val(max_level, level, key))) {
417 return 0;
418 }
419 } while (level++ != max_level);
420 return 1;
421}
422
423/** \brief Returns whether the given key is set. */
424static really_inline
425char mmbit_isset(const u8 *bits, u32 total_bits, u32 key) {
426 MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
427 assert(key < total_bits);
428 if (mmbit_is_flat_model(total_bits)) {
429 return mmbit_isset_flat(bits, total_bits, key);
430 } else {
431 return mmbit_isset_big(bits, total_bits, key);
432 }
433}
434
435/** \brief Specialisation of \ref mmbit_unset for flat models. */
436static really_inline
437void mmbit_unset_flat(u8 *bits, u32 total_bits, u32 key) {
438 bits += mmbit_flat_select_byte(key, total_bits);
439 *bits &= ~(1U << (key % 8U));
440}
441
442// TODO:
443// build two versions of this - unset_dangerous that doesn't clear the summary
444// block and a regular unset that actually clears ALL the way up the levels if
445// possible - might make a utility function for the clear
446static really_inline
447void mmbit_unset_big(u8 *bits, u32 total_bits, u32 key) {
448 /* This function is lazy as it does not clear the summary block
449 * entry if the child becomes empty. This is not a correctness problem as the
450 * summary block entries are used to mean that their children are valid
451 * rather than that they have a set child. */
452 const u32 max_level = mmbit_maxlevel(total_bits);
453 u32 level = 0;
454 do {
455 u8 *block_ptr = mmbit_get_block_ptr(bits, max_level, level, key);
456 u32 key_val = mmbit_get_key_val(max_level, level, key);
457 MMB_TYPE block = mmb_load(block_ptr);
458 if (!mmb_test(block, key_val)) {
459 return;
460 }
461 if (level == max_level) {
462 mmb_clear(&block, key_val);
463 mmb_store(block_ptr, block);
464 }
465 } while (level++ != max_level);
466}
467
468/** \brief Switch off a given key. */
469static really_inline
470void mmbit_unset(u8 *bits, u32 total_bits, u32 key) {
471 MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
472 assert(key < total_bits);
473 MMB_TRACE("UNSET %u (prev status: %d)\n", key,
474 (int)mmbit_isset(bits, total_bits, key));
475
476 if (mmbit_is_flat_model(total_bits)) {
477 mmbit_unset_flat(bits, total_bits, key);
478 } else {
479 mmbit_unset_big(bits, total_bits, key);
480 }
481}
482
483/** \brief Specialisation of \ref mmbit_iterate for flat models. */
484static really_inline
485u32 mmbit_iterate_flat(const u8 *bits, u32 total_bits, u32 it_in) {
486 // Short cut for single-block cases.
487 if (total_bits <= MMB_KEY_BITS) {
488 MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
489 if (it_in != MMB_INVALID) {
490 it_in++;
491 assert(it_in < total_bits);
492 block &= ~mmb_mask_zero_to(it_in);
493 }
494 if (block) {
495 return mmb_ctz(block);
496 }
497 return MMB_INVALID;
498 }
499
500 const u32 last_block = total_bits / MMB_KEY_BITS;
501 u32 start; // starting block index
502
503 if (it_in != MMB_INVALID) {
504 it_in++;
505 assert(it_in < total_bits);
506
507 start = (ROUNDUP_N(it_in, MMB_KEY_BITS) / MMB_KEY_BITS) - 1;
508 u32 start_key = start * MMB_KEY_BITS;
509 u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
510 MMB_TYPE block =
511 mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
512 block &= ~mmb_mask_zero_to(it_in - start_key);
513
514 if (block) {
515 return start_key + mmb_ctz(block);
516 } else if (start_key + MMB_KEY_BITS >= total_bits) {
517 return MMB_INVALID; // That was the final block.
518 }
519 start++;
520 } else {
521 start = 0;
522 }
523
524 // Remaining full-sized blocks.
525 for (; start < last_block; start++) {
526 MMB_TYPE block = mmb_load(bits + (start * sizeof(MMB_TYPE)));
527 if (block) {
528 return (start * MMB_KEY_BITS) + mmb_ctz(block);
529 }
530 }
531
532 // We may have a final, smaller than full-sized, block to deal with at the
533 // end.
534 if (total_bits % MMB_KEY_BITS) {
535 u32 start_key = start * MMB_KEY_BITS;
536 u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
537 MMB_TYPE block =
538 mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
539 if (block) {
540 return start_key + mmb_ctz(block);
541 }
542 }
543
544 return MMB_INVALID;
545}
546
547static really_inline
548u32 mmbit_iterate_big(const u8 * bits, u32 total_bits, u32 it_in) {
549 const u32 max_level = mmbit_maxlevel(total_bits);
550 u32 level = 0;
551 u32 key = 0;
552 u32 key_rem = 0;
553
554 if (it_in != MMB_INVALID) {
555 // We're continuing a previous iteration, so we need to go
556 // to max_level so we can pick up where we left off.
557 // NOTE: assumes that we're valid down the whole tree
558 key = it_in >> MMB_KEY_SHIFT;
559 key_rem = (it_in & MMB_KEY_MASK) + 1;
560 level = max_level;
561 }
562 while (1) {
563 if (key_rem < MMB_KEY_BITS) {
564 const u8 *block_ptr = mmbit_get_level_root_const(bits, level) +
565 key * sizeof(MMB_TYPE);
566 MMB_TYPE block
567 = mmb_load(block_ptr) & ~mmb_mask_zero_to_nocheck(key_rem);
568 if (block) {
569 key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
570 if (level++ == max_level) {
571 break;
572 }
573 key_rem = 0;
574 continue; // jump the rootwards step if we found a 'tree' non-zero bit
575 }
576 }
577 // rootwards step (block is zero or key_rem == MMB_KEY_BITS)
578 if (level-- == 0) {
579 return MMB_INVALID; // if we don't find anything and we're at the top level, we're done
580 }
581 key_rem = (key & MMB_KEY_MASK) + 1;
582 key >>= MMB_KEY_SHIFT;
583 }
584 assert(key < total_bits);
585 assert(mmbit_isset(bits, total_bits, key));
586 return key;
587}
588
589/** \brief Unbounded iterator. Returns the index of the next set bit after \a
590 * it_in, or MMB_INVALID.
591 *
592 * Note: assumes that if you pass in a value of it_in other than MMB_INVALID,
593 * that bit must be on (assumes all its summary blocks are set).
594 */
595static really_inline
596u32 mmbit_iterate(const u8 *bits, u32 total_bits, u32 it_in) {
597 MDEBUG_PRINTF("%p total_bits %u it_in %u\n", bits, total_bits, it_in);
598 assert(it_in < total_bits || it_in == MMB_INVALID);
599 if (!total_bits) {
600 return MMB_INVALID;
601 }
602 if (it_in == total_bits - 1) {
603 return MMB_INVALID; // it_in is the last key.
604 }
605
606 u32 key;
607 if (mmbit_is_flat_model(total_bits)) {
608 key = mmbit_iterate_flat(bits, total_bits, it_in);
609 } else {
610 key = mmbit_iterate_big(bits, total_bits, it_in);
611 }
612 assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
613 return key;
614}
615
616/** \brief Specialisation of \ref mmbit_any and \ref mmbit_any_precise for flat
617 * models. */
618static really_inline
619char mmbit_any_flat(const u8 *bits, u32 total_bits) {
620 if (total_bits <= MMB_KEY_BITS) {
621 return !!mmbit_get_flat_block(bits, total_bits);
622 }
623
624 const u8 *end = bits + mmbit_flat_size(total_bits);
625 for (const u8 *last = end - sizeof(MMB_TYPE); bits < last;
626 bits += sizeof(MMB_TYPE)) {
627 if (mmb_load(bits)) {
628 return 1;
629 }
630 }
631
632 // Overlapping load at the end.
633 return !!mmb_load(end - sizeof(MMB_TYPE));
634}
635
636/** \brief True if any keys are (or might be) on in the given multibit.
637 *
638 * NOTE: mmbit_any is sloppy (may return true when only summary bits are set).
639 * Use \ref mmbit_any_precise if you need/want a correct answer.
640 */
641static really_inline
642char mmbit_any(const u8 *bits, u32 total_bits) {
643 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
644 if (!total_bits) {
645 return 0;
646 }
647 if (mmbit_is_flat_model(total_bits)) {
648 return mmbit_any_flat(bits, total_bits);
649 }
650 return !!mmb_load(bits);
651}
652
653/** \brief True if there are any keys on. Guaranteed precise. */
654static really_inline
655char mmbit_any_precise(const u8 *bits, u32 total_bits) {
656 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
657 if (!total_bits) {
658 return 0;
659 }
660 if (mmbit_is_flat_model(total_bits)) {
661 return mmbit_any_flat(bits, total_bits);
662 }
663
664 return mmbit_iterate_big(bits, total_bits, MMB_INVALID) != MMB_INVALID;
665}
666
667static really_inline
668char mmbit_all_flat(const u8 *bits, u32 total_bits) {
669 while (total_bits > MMB_KEY_BITS) {
670 if (mmb_load(bits) != MMB_ALL_ONES) {
671 return 0;
672 }
673 bits += sizeof(MMB_TYPE);
674 total_bits -= MMB_KEY_BITS;
675 }
676 while (total_bits > 8) {
677 if (*bits != 0xff) {
678 return 0;
679 }
680 bits++;
681 total_bits -= 8;
682 }
683 u8 mask = (u8)mmb_mask_zero_to_nocheck(total_bits);
684 return (*bits & mask) == mask;
685}
686
687static really_inline
688char mmbit_all_big(const u8 *bits, u32 total_bits) {
689 u32 ks = mmbit_keyshift(total_bits);
690
691 u32 level = 0;
692 for (;;) {
693 // Number of bits we expect to see switched on on this level.
694 u32 level_bits;
695 if (ks != 0) {
696 u32 next_level_width = MMB_KEY_BITS << (ks - MMB_KEY_SHIFT);
697 level_bits = ROUNDUP_N(total_bits, next_level_width) >> ks;
698 } else {
699 level_bits = total_bits;
700 }
701
702 const u8 *block_ptr = mmbit_get_level_root_const(bits, level);
703
704 // All full-size blocks should be all-ones.
705 while (level_bits >= MMB_KEY_BITS) {
706 MMB_TYPE block = mmb_load(block_ptr);
707 if (block != MMB_ALL_ONES) {
708 return 0;
709 }
710 block_ptr += sizeof(MMB_TYPE);
711 level_bits -= MMB_KEY_BITS;
712 }
713
714 // If we have bits remaining, we have a runt block on the end.
715 if (level_bits > 0) {
716 MMB_TYPE block = mmb_load(block_ptr);
717 MMB_TYPE mask = mmb_mask_zero_to_nocheck(level_bits);
718 if ((block & mask) != mask) {
719 return 0;
720 }
721 }
722
723 if (ks == 0) {
724 break;
725 }
726
727 ks -= MMB_KEY_SHIFT;
728 level++;
729 }
730
731 return 1;
732}
733
734/** \brief True if all keys are on. Guaranteed precise. */
735static really_inline
736char mmbit_all(const u8 *bits, u32 total_bits) {
737 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
738
739 if (mmbit_is_flat_model(total_bits)) {
740 return mmbit_all_flat(bits, total_bits);
741 }
742 return mmbit_all_big(bits, total_bits);
743}
744
745static really_inline
746MMB_TYPE get_flat_masks(u32 base, u32 it_start, u32 it_end) {
747 if (it_end <= base) {
748 return 0;
749 }
750 u32 udiff = it_end - base;
751 MMB_TYPE mask = udiff < 64 ? mmb_mask_zero_to_nocheck(udiff) : MMB_ALL_ONES;
752 if (it_start >= base) {
753 u32 ldiff = it_start - base;
754 MMB_TYPE lmask = ldiff < 64 ? ~mmb_mask_zero_to_nocheck(ldiff) : 0;
755 mask &= lmask;
756 }
757 return mask;
758}
759
760/** \brief Specialisation of \ref mmbit_iterate_bounded for flat models. */
761static really_inline
762u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin,
763 u32 end) {
764 // Short cut for single-block cases.
765 if (total_bits <= MMB_KEY_BITS) {
766 MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
767 block &= get_flat_masks(0, begin, end);
768 if (block) {
769 return mmb_ctz(block);
770 }
771 return MMB_INVALID;
772 }
773
774 const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
775
776 // Iterate over full-sized blocks.
777 for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
778 i < e; i += MMB_KEY_BITS) {
779 const u8 *block_ptr = bits + i / 8;
780 MMB_TYPE block = mmb_load(block_ptr);
781 block &= get_flat_masks(i, begin, end);
782 if (block) {
783 return i + mmb_ctz(block);
784 }
785 }
786
787 // Final block, which is less than full-sized.
788 if (end > last_block) {
789 const u8 *block_ptr = bits + last_block / 8;
790 u32 num_bits = total_bits - last_block;
791 MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
792 block &= get_flat_masks(last_block, begin, end);
793 if (block) {
794 return last_block + mmb_ctz(block);
795 }
796 }
797
798 return MMB_INVALID;
799}
800
801static really_inline
802MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max,
803 u64a block_base) {
804 const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
805 u64a lshift = (block_min - block_base) >> level_shift;
806 u64a ushift = (block_max - block_base) >> level_shift;
807 MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
808 MMB_TYPE umask =
809 ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
810 return lmask & umask;
811}
812
813static really_inline
814u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
815 u64a key = 0;
816 u32 ks = mmbit_keyshift(total_bits);
817 const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
818 u32 level = 0;
819 --it_end; // make end-limit inclusive
820 for (;;) {
821 assert(level <= max_level);
822
823 u64a block_width = MMB_KEY_BITS << ks;
824 u64a block_base = key * block_width;
825 u64a block_min = MAX(it_start, block_base);
826 u64a block_max = MIN(it_end, block_base + block_width - 1);
827 const u8 *block_ptr =
828 mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
829 MMB_TYPE block = mmb_load(block_ptr);
830 block &= get_lowhi_masks(level, max_level, block_min, block_max, block_base);
831 if (block) {
832 // Found a bit, go down a level
833 key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
834 if (level++ == max_level) {
835 return key;
836 }
837 ks -= MMB_KEY_SHIFT;
838 } else {
839 // No bit found, go up a level
840 // we know that this block didn't have any answers, so we can push
841 // our start iterator forward.
842 u64a next_start = block_base + block_width;
843 if (next_start > it_end) {
844 break;
845 }
846 if (level-- == 0) {
847 break;
848 }
849 it_start = next_start;
850 key >>= MMB_KEY_SHIFT;
851 ks += MMB_KEY_SHIFT;
852 }
853 }
854 return MMB_INVALID;
855}
856
857/** \brief Bounded iterator. Returns the index of the first set bit between
858 * it_start (inclusive) and it_end (exclusive) or MMB_INVALID if no bits are
859 * set in that range.
860 */
861static really_inline
862u32 mmbit_iterate_bounded(const u8 *bits, u32 total_bits, u32 it_start,
863 u32 it_end) {
864 MDEBUG_PRINTF("%p total_bits %u it_start %u it_end %u\n", bits, total_bits,
865 it_start, it_end);
866 assert(it_start <= it_end);
867 assert(it_end <= total_bits);
868 if (!total_bits || it_end == it_start) {
869 return MMB_INVALID;
870 }
871 assert(it_start < total_bits);
872 u32 key;
873 if (mmbit_is_flat_model(total_bits)) {
874 key = mmbit_iterate_bounded_flat(bits, total_bits, it_start, it_end);
875 } else {
876 key = mmbit_iterate_bounded_big(bits, total_bits, it_start, it_end);
877 }
878 assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
879 return key;
880}
881
882/** \brief Specialisation of \ref mmbit_unset_range for flat models. */
883static really_inline
884void mmbit_unset_range_flat(u8 *bits, u32 total_bits, u32 begin, u32 end) {
885 const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
886
887 // Iterate over full-sized blocks.
888 for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
889 i < e; i += MMB_KEY_BITS) {
890 u8 *block_ptr = bits + i / 8;
891 MMB_TYPE block = mmb_load(block_ptr);
892 MMB_TYPE mask = get_flat_masks(i, begin, end);
893 mmb_store(block_ptr, block & ~mask);
894 }
895
896 // Final block, which is less than full-sized.
897 if (end > last_block) {
898 u8 *block_ptr = bits + last_block / 8;
899 u32 num_bits = total_bits - last_block;
900 MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
901 MMB_TYPE mask = get_flat_masks(last_block, begin, end);
902 mmb_store_partial(block_ptr, block & ~mask, num_bits);
903 }
904}
905
906static really_inline
907void mmbit_unset_range_big(u8 *bits, const u32 total_bits, u32 begin,
908 u32 end) {
909 // TODO: combine iterator and unset operation; completely replace this
910 u32 i = begin;
911 for (;;) {
912 i = mmbit_iterate_bounded(bits, total_bits, i, end);
913 if (i == MMB_INVALID) {
914 break;
915 }
916 mmbit_unset_big(bits, total_bits, i);
917 if (++i == end) {
918 break;
919 }
920 }
921}
922
923/** \brief Unset a whole range of bits. Ensures that all bits between \a begin
924 * (inclusive) and \a end (exclusive) are switched off. */
925static really_inline
926void mmbit_unset_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
927 MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
928 end);
929 assert(begin <= end);
930 assert(end <= total_bits);
931 if (mmbit_is_flat_model(total_bits)) {
932 mmbit_unset_range_flat(bits, total_bits, begin, end);
933 } else {
934 mmbit_unset_range_big(bits, total_bits, begin, end);
935 }
936 // No bits are on in [begin, end) once we're done.
937 assert(MMB_INVALID == mmbit_iterate_bounded(bits, total_bits, begin, end));
938}
939
940/** \brief Specialisation of \ref mmbit_init_range for flat models. */
941static really_inline
942void mmbit_init_range_flat(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
943 const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
944
945 // Iterate over full-sized blocks.
946 for (u32 i = 0; i < last_block; i += MMB_KEY_BITS) {
947 mmb_store(bits + i / 8, get_flat_masks(i, begin, end));
948 }
949
950 // Final block, which is less than full-sized.
951 if (total_bits % MMB_KEY_BITS) {
952 u32 num_bits = total_bits - last_block;
953 MMB_TYPE block = get_flat_masks(last_block, begin, end);
954 mmb_store_partial(bits + last_block / 8, block, num_bits);
955 }
956}
957
958static really_inline
959void mmbit_init_range_big(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
960 u32 ks = mmbit_keyshift(total_bits);
961 u32 level = 0;
962
963 for (;;) {
964 u8 *block = mmbit_get_level_root(bits, level);
965 u32 k1 = begin >> ks, k2 = end >> ks;
966
967 // Summary blocks need to account for the runt block on the end.
968 if ((k2 << ks) != end) {
969 k2++;
970 }
971
972 // Partial block to deal with beginning.
973 block += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE);
974 if (k1 % MMB_KEY_BITS) {
975 u32 idx = k1 / MMB_KEY_BITS;
976 u32 block_end = (idx + 1) * MMB_KEY_BITS;
977
978 // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
979 // here (see the branch in mmb_mask_zero_to).
980 MMB_TYPE mask = MMB_ALL_ONES << (k1 % MMB_KEY_BITS);
981
982 if (k2 < block_end) {
983 assert(k2 % MMB_KEY_BITS);
984 mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
985 mmb_store(block, mask);
986 goto next_level;
987 } else {
988 mmb_store(block, mask);
989 k1 = block_end;
990 block += sizeof(MMB_TYPE);
991 }
992 }
993
994 // Write blocks filled with ones until we get to the last block.
995 for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
996 mmb_store(block, MMB_ALL_ONES);
997 block += sizeof(MMB_TYPE);
998 }
999
1000 // Final block.
1001 if (likely(k1 < k2)) {
1002 // Again, if k2 was at a block boundary, it would have been handled
1003 // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
1004 // avoid the branch in mmb_mask_zero_to here.
1005 assert(k2 % MMB_KEY_BITS);
1006 MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
1007 mmb_store(block, mask);
1008 }
1009
1010 next_level:
1011 if (ks == 0) {
1012 break; // Last level is done, finished.
1013 }
1014
1015 ks -= MMB_KEY_SHIFT;
1016 level++;
1017 }
1018}
1019
1020/** \brief Initialises the multibit so that only the given range of bits are
1021 * set.
1022 *
1023 * Ensures that all bits between \a begin (inclusive) and \a end (exclusive)
1024 * are switched on.
1025 */
1026static really_inline
1027void mmbit_init_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
1028 MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
1029 end);
1030 assert(begin <= end);
1031 assert(end <= total_bits);
1032
1033 if (!total_bits) {
1034 return;
1035 }
1036
1037 // Short cut for cases where we're not actually setting any bits; just
1038 // clear the multibit.
1039 if (begin == end) {
1040 mmbit_clear(bits, total_bits);
1041 return;
1042 }
1043
1044 if (mmbit_is_flat_model(total_bits)) {
1045 mmbit_init_range_flat(bits, total_bits, begin, end);
1046 } else {
1047 mmbit_init_range_big(bits, total_bits, begin, end);
1048 }
1049
1050 assert(begin == end ||
1051 mmbit_iterate(bits, total_bits, MMB_INVALID) == begin);
1052 assert(!end || begin == end ||
1053 mmbit_iterate(bits, total_bits, end - 1) == MMB_INVALID);
1054}
1055
1056/** \brief Determine the number of \ref mmbit_sparse_state elements required.
1057 * */
1058static really_inline
1059u32 mmbit_sparse_iter_state_size(u32 total_bits) {
1060 if (mmbit_is_flat_model(total_bits)) {
1061 return 2;
1062 }
1063 u32 levels = mmbit_maxlevel(total_bits);
1064 return levels + 1;
1065}
1066
1067#ifdef DUMP_SUPPORT
1068// Dump function, defined in multibit.c.
1069void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, u32 total_bits);
1070#endif
1071
1072/** Internal: common loop used by mmbit_sparse_iter_{begin,next}_big. Returns
1073 * matching next key given starting state, or MMB_INVALID. */
1074static really_inline
1075u32 mmbit_sparse_iter_exec(const u8 *bits, u32 key, u32 *idx, u32 level,
1076 const u32 max_level, struct mmbit_sparse_state *s,
1077 const struct mmbit_sparse_iter *it_root,
1078 const struct mmbit_sparse_iter *it) {
1079 for (;;) {
1080 MMB_TYPE block = s[level].mask;
1081 if (block) {
1082 u32 bit = mmb_ctz(block);
1083 key = (key << MMB_KEY_SHIFT) + bit;
1084 u32 bit_idx = mmbit_mask_index(bit, it->mask);
1085 if (level++ == max_level) {
1086 // we've found a key
1087 *idx = it->val + bit_idx;
1088 return key;
1089 } else {
1090 // iterator record is the start of the level (current it->val)
1091 // plus N, where N is the dense index of the bit in the current
1092 // level's itmask
1093 u32 iter_key = it->val + bit_idx;
1094 it = it_root + iter_key;
1095 MMB_TYPE nextblock =
1096 mmb_load(mmbit_get_level_root_const(bits, level) +
1097 key * sizeof(MMB_TYPE));
1098 s[level].mask = nextblock & it->mask;
1099 s[level].itkey = iter_key;
1100 }
1101 } else {
1102 // No bits set in this block
1103 if (level-- == 0) {
1104 break; // no key available
1105 }
1106 key >>= MMB_KEY_SHIFT;
1107 // Update state mask and iterator
1108 s[level].mask &= (s[level].mask - 1);
1109 it = it_root + s[level].itkey;
1110 }
1111 }
1112 return MMB_INVALID;
1113}
1114
1115static really_inline
1116u32 mmbit_sparse_iter_begin_big(const u8 *bits, u32 total_bits, u32 *idx,
1117 const struct mmbit_sparse_iter *it_root,
1118 struct mmbit_sparse_state *s) {
1119 const struct mmbit_sparse_iter *it = it_root;
1120 u32 key = 0;
1121 MMB_TYPE block = mmb_load(bits) & it->mask;
1122 if (!block) {
1123 return MMB_INVALID;
1124 }
1125
1126 // Load first block into top level state.
1127 const u32 max_level = mmbit_maxlevel(total_bits);
1128 s[0].mask = block;
1129 s[0].itkey = 0;
1130 return mmbit_sparse_iter_exec(bits, key, idx, 0, max_level,
1131 s, it_root, it);
1132}
1133
1134/** \brief Specialisation of \ref mmbit_sparse_iter_begin for flat models. */
1135static really_inline
1136u32 mmbit_sparse_iter_begin_flat(const u8 *bits, u32 total_bits, u32 *idx,
1137 const struct mmbit_sparse_iter *it_root,
1138 struct mmbit_sparse_state *s) {
1139 // Small cases have everything in the root iterator mask.
1140 if (total_bits <= MMB_KEY_BITS) {
1141 MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
1142 block &= it_root->mask;
1143 if (!block) {
1144 return MMB_INVALID;
1145 }
1146
1147 s->mask = block;
1148 u32 key = mmb_ctz(block);
1149 *idx = mmbit_mask_index(key, it_root->mask);
1150 return key;
1151 }
1152
1153 // Otherwise, the root iterator mask tells us which blocks (which we lay out
1154 // linearly in the flat model) could contain keys.
1155 assert(mmbit_maxlevel(total_bits) == 1); // Should only be two levels
1156 MMB_TYPE root = it_root->mask;
1157 for (; root; root &= (root - 1)) {
1158 u32 bit = mmb_ctz(root);
1159 u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
1160 u32 iter_key = it_root->val + bit_idx;
1161 const struct mmbit_sparse_iter *it = it_root + iter_key;
1162 u32 block_key_min = bit * MMB_KEY_BITS;
1163 u32 block_key_max = block_key_min + MMB_KEY_BITS;
1164 MMB_TYPE block;
1165 if (block_key_max > total_bits) {
1166 block_key_max = total_bits;
1167 block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
1168 block_key_max - block_key_min);
1169 } else {
1170 block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
1171 }
1172
1173 block &= it->mask;
1174 if (block) {
1175 s[0].mask = root;
1176 s[1].mask = block;
1177 s[1].itkey = iter_key;
1178 u32 key = mmb_ctz(block);
1179 *idx = it->val + mmbit_mask_index(key, it->mask);
1180 return key + block_key_min;
1181 }
1182 }
1183
1184 return MMB_INVALID;
1185}
1186
1187/** \brief Sparse iterator, find first key.
1188 *
1189 * Returns the first of the bits specified by the iterator \a it_root that is
1190 * on, and initialises the state \a s. If none of the bits specified by the
1191 * iterator are on, returns MMB_INVALID.
1192 */
1193static really_inline
1194u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
1195 const struct mmbit_sparse_iter *it_root,
1196 struct mmbit_sparse_state *s) {
1197 assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
1198
1199 // Our state _may_ be on the stack
1200#ifndef _WIN32
1201 assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
1202#else
1203 assert(ISALIGNED_N(s, 4));
1204#endif
1205
1206 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
1207 // iterator should have _something_ at the root level
1208 assert(it_root->mask != 0);
1209 u32 key;
1210 if (mmbit_is_flat_model(total_bits)) {
1211 key = mmbit_sparse_iter_begin_flat(bits, total_bits, idx, it_root, s);
1212 } else {
1213 key = mmbit_sparse_iter_begin_big(bits, total_bits, idx, it_root, s);
1214 }
1215 if (key != MMB_INVALID) {
1216 assert(key < total_bits);
1217 assert(mmbit_isset(bits, total_bits, key));
1218 }
1219 return key;
1220}
1221
1222static really_inline
1223u32 mmbit_sparse_iter_next_big(const u8 *bits, u32 total_bits, u32 last_key,
1224 u32 *idx,
1225 const struct mmbit_sparse_iter *it_root,
1226 struct mmbit_sparse_state *s) {
1227 const u32 max_level = mmbit_maxlevel(total_bits);
1228 u32 key = last_key >> MMB_KEY_SHIFT;
1229 s[max_level].mask &= (s[max_level].mask - 1);
1230 const struct mmbit_sparse_iter *it = it_root + s[max_level].itkey;
1231 return mmbit_sparse_iter_exec(bits, key, idx, max_level, max_level, s,
1232 it_root, it);
1233}
1234
1235/** \brief Specialisation of \ref mmbit_sparse_iter_next for flat models. */
1236static really_inline
1237u32 mmbit_sparse_iter_next_flat(const u8 *bits, const u32 total_bits, u32 *idx,
1238 const struct mmbit_sparse_iter *it_root,
1239 struct mmbit_sparse_state *s) {
1240 if (total_bits <= MMB_KEY_BITS) {
1241 // All of our data is already in the s->mask, so we just need to scrape
1242 // off the next match.
1243 s->mask &= (s->mask - 1);
1244 if (s->mask) {
1245 u32 key = mmb_ctz(s->mask);
1246 *idx = mmbit_mask_index(key, it_root->mask);
1247 return key;
1248 }
1249 } else {
1250 assert(s[0].mask);
1251
1252 s[1].mask &= (s[1].mask - 1); // Remove previous key from iter state.
1253 u32 bit = mmb_ctz(s[0].mask); // Flat block currently being accessed.
1254
1255 for (;;) {
1256 if (s[1].mask) {
1257 u32 key = mmb_ctz(s[1].mask);
1258 const struct mmbit_sparse_iter *it = it_root + s[1].itkey;
1259 *idx = it->val + mmbit_mask_index(key, it->mask);
1260 key += (bit * MMB_KEY_BITS);
1261 return key;
1262 }
1263
1264 // Otherwise, we have no keys left in this block. Consult the root
1265 // mask and find the next one.
1266
1267 s[0].mask &= s[0].mask - 1;
1268 if (!s[0].mask) {
1269 break;
1270 }
1271
1272 bit = mmb_ctz(s[0].mask);
1273 u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
1274 u32 iter_key = it_root->val + bit_idx;
1275 const struct mmbit_sparse_iter *it = it_root + iter_key;
1276 u32 block_key_min = bit * MMB_KEY_BITS;
1277 u32 block_key_max = block_key_min + MMB_KEY_BITS;
1278 MMB_TYPE block;
1279 if (block_key_max > total_bits) {
1280 block_key_max = total_bits;
1281 block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
1282 block_key_max - block_key_min);
1283 } else {
1284 block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
1285 }
1286
1287 s[1].mask = block & it->mask;
1288 s[1].itkey = iter_key;
1289 }
1290 }
1291
1292 return MMB_INVALID;
1293}
1294
1295/** \brief Sparse iterator, find next key.
1296 *
1297 * Takes in a sparse iterator tree structure \a it_root and a state array, and
1298 * finds the next on bit (from the set of bits specified in the iterator).
1299 *
1300 * NOTE: The sparse iterator stores copies of the multibit blocks in its state,
1301 * so it is not necessarily safe to set or unset bits in the multibit while
1302 * iterating: the changes you make may or may not be taken into account
1303 * by the iterator.
1304 */
1305static really_inline
1306u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
1307 u32 *idx, const struct mmbit_sparse_iter *it_root,
1308 struct mmbit_sparse_state *s) {
1309 assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
1310
1311 // Our state _may_ be on the stack
1312#ifndef _WIN32
1313 assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
1314#else
1315 assert(ISALIGNED_N(s, 4));
1316#endif
1317
1318 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
1319 MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
1320 UNUSED u32 last_idx = *idx; // for assertion at the end
1321 // our iterator should have _something_ at the root level
1322 assert(it_root->mask != 0);
1323 assert(last_key < total_bits);
1324
1325 u32 key;
1326 if (mmbit_is_flat_model(total_bits)) {
1327 key = mmbit_sparse_iter_next_flat(bits, total_bits, idx, it_root, s);
1328 } else {
1329 key = mmbit_sparse_iter_next_big(bits, total_bits, last_key, idx,
1330 it_root, s);
1331 }
1332 if (key != MMB_INVALID) {
1333 MDEBUG_PRINTF("END NEXT: key=%u, idx=%u\n", key, *idx);
1334 assert(key < total_bits);
1335 assert(key > last_key);
1336 assert(mmbit_isset(bits, total_bits, key));
1337 assert(*idx > last_idx);
1338 } else {
1339 MDEBUG_PRINTF("END NEXT: no more keys\n");
1340 }
1341 return key;
1342}
1343
1344/** \brief Specialisation of \ref mmbit_sparse_iter_unset for flat models. */
1345static really_inline
1346void mmbit_sparse_iter_unset_flat(u8 *bits, u32 total_bits,
1347 const struct mmbit_sparse_iter *it_root) {
1348 if (total_bits <= MMB_KEY_BITS) {
1349 // Everything is in the root mask: we can just mask those bits off.
1350 MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
1351 block &= ~it_root->mask;
1352 mmb_store_partial(bits, block, total_bits);
1353 return;
1354 }
1355
1356 // Larger case, we have two iterator levels to worry about.
1357 u32 bit_idx = 0;
1358 for (MMB_TYPE root = it_root->mask; root; root &= (root - 1), bit_idx++) {
1359 u32 bit = mmb_ctz(root);
1360 u32 block_key_min = bit * MMB_KEY_BITS;
1361 u32 block_key_max = block_key_min + MMB_KEY_BITS;
1362 u8 *block_ptr = bits + (bit * sizeof(MMB_TYPE));
1363 u32 iter_key = it_root->val + bit_idx;
1364 const struct mmbit_sparse_iter *it = it_root + iter_key;
1365 if (block_key_max <= total_bits) {
1366 // Full-sized block.
1367 MMB_TYPE block = mmb_load(block_ptr);
1368 block &= ~it->mask;
1369 mmb_store(block_ptr, block);
1370 } else {
1371 // Runt (final) block.
1372 u32 num_bits = total_bits - block_key_min;
1373 MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
1374 block &= ~it->mask;
1375 mmb_store_partial(block_ptr, block, num_bits);
1376 break; // We know this is the last block.
1377 }
1378 }
1379}
1380
1381static really_inline
1382void mmbit_sparse_iter_unset_big(u8 *bits, u32 total_bits,
1383 const struct mmbit_sparse_iter *it_root,
1384 struct mmbit_sparse_state *s) {
1385 const struct mmbit_sparse_iter *it = it_root;
1386 MMB_TYPE block = mmb_load(bits) & it->mask;
1387 if (!block) {
1388 return;
1389 }
1390
1391 u32 key = 0;
1392 const u32 max_level = mmbit_maxlevel(total_bits);
1393 u32 level = 0;
1394
1395 // Load first block into top level state
1396 s[level].mask = block;
1397 s[level].itkey = 0;
1398 for (;;) {
1399 block = s[level].mask;
1400 if (block) {
1401 if (level == max_level) {
1402 // bottom level block: we want to mask out the bits specified
1403 // by the iterator mask and then go back up a level.
1404 u8 *block_ptr =
1405 mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
1406 MMB_TYPE real_block = mmb_load(block_ptr);
1407 real_block &= ~(it->mask);
1408 mmb_store(block_ptr, real_block);
1409 goto uplevel; // still cheap and nasty
1410 } else {
1411 u32 bit = mmb_ctz(block);
1412 key = (key << MMB_KEY_SHIFT) + bit;
1413 level++;
1414
1415 // iterator record is the start of the level (current it->val)
1416 // plus N, where N is the dense index of the bit in the current
1417 // level's itmask
1418 u32 iter_key = it->val + mmbit_mask_index(bit, it->mask);
1419 it = it_root + iter_key;
1420 MMB_TYPE nextblock =
1421 mmb_load(mmbit_get_level_root_const(bits, level) +
1422 key * sizeof(MMB_TYPE));
1423 s[level].mask = nextblock & it->mask;
1424 s[level].itkey = iter_key;
1425 }
1426 } else {
1427uplevel:
1428 // No bits set in this block
1429 if (level == 0) {
1430 return; // we are done
1431 }
1432 u8 *block_ptr =
1433 mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
1434 MMB_TYPE real_block = mmb_load(block_ptr);
1435 key >>= MMB_KEY_SHIFT;
1436 level--;
1437
1438 if (real_block == 0) {
1439 // If we've zeroed our block For Real (unmasked by iterator),
1440 // we can clear the parent bit that led us to it, so that
1441 // we don't go down this particular garden path again later.
1442 u32 bit = mmb_ctz(s[level].mask);
1443 u8 *parent_ptr =
1444 mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
1445 MMB_TYPE parent_block = mmb_load(parent_ptr);
1446 mmb_clear(&parent_block, bit);
1447 mmb_store(parent_ptr, parent_block);
1448 }
1449
1450 // Update state mask and iterator
1451 s[level].mask &= (s[level].mask - 1);
1452 it = it_root + s[level].itkey;
1453 }
1454 }
1455}
1456
1457/** \brief Sparse iterator, unset all bits.
1458 *
1459 * Takes in a sparse iterator tree structure and switches off any entries found
1460 * therein.
1461 */
1462static really_inline
1463void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
1464 const struct mmbit_sparse_iter *it,
1465 struct mmbit_sparse_state *s) {
1466 assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
1467
1468 // Our state _may_ be on the stack
1469#ifndef _WIN32
1470 assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
1471#else
1472 assert(ISALIGNED_N(s, 4));
1473#endif
1474
1475 MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
1476
1477#ifdef MMB_TRACE_WRITES
1478 MMB_TRACE("ITER-UNSET iter=[");
1479 mmbit_sparse_iter_dump(it, total_bits);
1480 printf("] actually on=[");
1481 struct mmbit_sparse_state tmp[MAX_SPARSE_ITER_STATES];
1482 u32 idx = 0;
1483 u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, tmp);
1484 for (; i != MMB_INVALID;
1485 i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, tmp)) {
1486 printf(" %u", i);
1487 }
1488 printf("]\n");
1489#endif
1490
1491 if (mmbit_is_flat_model(total_bits)) {
1492 mmbit_sparse_iter_unset_flat(bits, total_bits, it);
1493 } else {
1494 mmbit_sparse_iter_unset_big(bits, total_bits, it, s);
1495 }
1496}
1497
1498#ifdef __cplusplus
1499} // extern "C"
1500#endif
1501
1502#endif // MULTIBIT_H
1503