1// Copyright (c) 2018 Kenton Varda and contributors
2// Licensed under the MIT License:
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20// THE SOFTWARE.
21
22#include "table.h"
23#include "debug.h"
24#include <stdlib.h>
25
26namespace kj {
27namespace _ {
28
29static inline uint lg(uint value) {
30 // Compute floor(log2(value)).
31 //
32 // Undefined for value = 0.
33#if _MSC_VER
34 unsigned long i;
35 auto found = _BitScanReverse(&i, value);
36 KJ_DASSERT(found); // !found means value = 0
37 return i;
38#else
39 return sizeof(uint) * 8 - 1 - __builtin_clz(value);
40#endif
41}
42
43void throwDuplicateTableRow() {
44 KJ_FAIL_REQUIRE("inserted row already exists in table");
45}
46
47void logHashTableInconsistency() {
48 KJ_LOG(ERROR,
49 "HashIndex detected hash table inconsistency. This can happen if you create a kj::Table "
50 "with a hash index and you modify the rows in the table post-indexing in a way that would "
51 "change their hash. This is a serious bug which will lead to undefined behavior."
52 "\nstack: ", kj::getStackTrace());
53}
54
55// List of primes where each element is roughly double the previous. Obtained
56// from:
57// http://planetmath.org/goodhashtableprimes
58// Primes < 53 were added to ensure that small tables don't allocate excessive memory.
59static const size_t PRIMES[] = {
60 1, // 2^ 0 = 1
61 3, // 2^ 1 = 2
62 5, // 2^ 2 = 4
63 11, // 2^ 3 = 8
64 23, // 2^ 4 = 16
65 53, // 2^ 5 = 32
66 97, // 2^ 6 = 64
67 193, // 2^ 7 = 128
68 389, // 2^ 8 = 256
69 769, // 2^ 9 = 512
70 1543, // 2^10 = 1024
71 3079, // 2^11 = 2048
72 6151, // 2^12 = 4096
73 12289, // 2^13 = 8192
74 24593, // 2^14 = 16384
75 49157, // 2^15 = 32768
76 98317, // 2^16 = 65536
77 196613, // 2^17 = 131072
78 393241, // 2^18 = 262144
79 786433, // 2^19 = 524288
80 1572869, // 2^20 = 1048576
81 3145739, // 2^21 = 2097152
82 6291469, // 2^22 = 4194304
83 12582917, // 2^23 = 8388608
84 25165843, // 2^24 = 16777216
85 50331653, // 2^25 = 33554432
86 100663319, // 2^26 = 67108864
87 201326611, // 2^27 = 134217728
88 402653189, // 2^28 = 268435456
89 805306457, // 2^29 = 536870912
90 1610612741, // 2^30 = 1073741824
91};
92
93uint chooseBucket(uint hash, uint count) {
94 // Integer modulus is really, really slow. It turns out that the compiler can generate much
95 // faster code if the denominator is a constant. Since we have a fixed set of possible
96 // denominators, a big old switch() statement is a win.
97
98 // TODO(perf): Consider using power-of-two bucket sizes. We can safely do so as long as we demand
99 // high-quality hash functions -- kj::hashCode() needs good diffusion even for integers, can't
100 // just be a cast. Also be sure to implement Robin Hood hashing to avoid extremely bad negative
101 // lookup time when elements have sequential hashes (otherwise, it could be necessary to scan
102 // the entire list to determine that an element isn't present).
103
104 switch (count) {
105#define HANDLE(i) case i##u: return hash % i##u
106 HANDLE( 1);
107 HANDLE( 3);
108 HANDLE( 5);
109 HANDLE( 11);
110 HANDLE( 23);
111 HANDLE( 53);
112 HANDLE( 97);
113 HANDLE( 193);
114 HANDLE( 389);
115 HANDLE( 769);
116 HANDLE( 1543);
117 HANDLE( 3079);
118 HANDLE( 6151);
119 HANDLE( 12289);
120 HANDLE( 24593);
121 HANDLE( 49157);
122 HANDLE( 98317);
123 HANDLE( 196613);
124 HANDLE( 393241);
125 HANDLE( 786433);
126 HANDLE( 1572869);
127 HANDLE( 3145739);
128 HANDLE( 6291469);
129 HANDLE( 12582917);
130 HANDLE( 25165843);
131 HANDLE( 50331653);
132 HANDLE( 100663319);
133 HANDLE( 201326611);
134 HANDLE( 402653189);
135 HANDLE( 805306457);
136 HANDLE(1610612741);
137#undef HANDLE
138 default: return hash % count;
139 }
140}
141
142size_t chooseHashTableSize(uint size) {
143 if (size == 0) return 0;
144
145 // Add 1 to compensate for the floor() above, then look up the best prime bucket size for that
146 // target size.
147 return PRIMES[lg(size) + 1];
148}
149
150kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize) {
151 // Rehash the whole table.
152
153 KJ_REQUIRE(targetSize < (1 << 30), "hash table has reached maximum size");
154
155 size_t size = chooseHashTableSize(targetSize);
156
157 if (size < oldBuckets.size()) {
158 size = oldBuckets.size();
159 }
160
161 auto newBuckets = kj::heapArray<HashBucket>(size);
162 memset(newBuckets.begin(), 0, sizeof(HashBucket) * size);
163
164 for (auto& oldBucket: oldBuckets) {
165 if (oldBucket.isOccupied()) {
166 for (uint i = oldBucket.hash % newBuckets.size();; i = probeHash(newBuckets, i)) {
167 auto& newBucket = newBuckets[i];
168 if (newBucket.isEmpty()) {
169 newBucket = oldBucket;
170 break;
171 }
172 }
173 }
174 }
175
176 return newBuckets;
177}
178
179// =======================================================================================
180// BTree
181
182#if _WIN32
183#define aligned_free _aligned_free
184#else
185#define aligned_free ::free
186#endif
187
188BTreeImpl::BTreeImpl()
189 : tree(const_cast<NodeUnion*>(&EMPTY_NODE)),
190 treeCapacity(1),
191 height(0),
192 freelistHead(1),
193 freelistSize(0),
194 beginLeaf(0),
195 endLeaf(0) {}
196BTreeImpl::~BTreeImpl() noexcept(false) {
197 if (tree != &EMPTY_NODE) {
198 aligned_free(tree);
199 }
200}
201
202const BTreeImpl::NodeUnion BTreeImpl::EMPTY_NODE = {{{0, {0}}}};
203
204void BTreeImpl::verify(size_t size, FunctionParam<bool(uint, uint)> f) {
205 KJ_ASSERT(verifyNode(size, f, 0, height, nullptr) == size);
206}
207size_t BTreeImpl::verifyNode(size_t size, FunctionParam<bool(uint, uint)>& f,
208 uint pos, uint height, MaybeUint maxRow) {
209 if (height > 0) {
210 auto& parent = tree[pos].parent;
211
212 auto n = parent.keyCount();
213 size_t total = 0;
214 for (auto i: kj::zeroTo(n)) {
215 KJ_ASSERT(*parent.keys[i] < size);
216 total += verifyNode(size, f, parent.children[i], height - 1, parent.keys[i]);
217 KJ_ASSERT(i + 1 == n || f(*parent.keys[i], *parent.keys[i + 1]));
218 }
219 total += verifyNode(size, f, parent.children[n], height - 1, maxRow);
220 KJ_ASSERT(maxRow == nullptr || f(*parent.keys[n-1], *maxRow));
221 return total;
222 } else {
223 auto& leaf = tree[pos].leaf;
224 auto n = leaf.size();
225 for (auto i: kj::zeroTo(n)) {
226 KJ_ASSERT(*leaf.rows[i] < size);
227 if (i + 1 < n) {
228 KJ_ASSERT(f(*leaf.rows[i], *leaf.rows[i + 1]));
229 } else {
230 KJ_ASSERT(maxRow == nullptr || leaf.rows[n-1] == maxRow);
231 }
232 }
233 return n;
234 }
235}
236
237void BTreeImpl::logInconsistency() const {
238 KJ_LOG(ERROR,
239 "BTreeIndex detected tree state inconsistency. This can happen if you create a kj::Table "
240 "with a b-tree index and you modify the rows in the table post-indexing in a way that would "
241 "change their ordering. This is a serious bug which will lead to undefined behavior."
242 "\nstack: ", kj::getStackTrace());
243}
244
245void BTreeImpl::reserve(size_t size) {
246 KJ_REQUIRE(size < (1u << 31), "b-tree has reached maximum size");
247
248 // Calculate the worst-case number of leaves to cover the size, given that a leaf is always at
249 // least half-full. (Note that it's correct for this calculation to round down, not up: The
250 // remainder will necessarily be distributed among the non-full leaves, rather than creating a
251 // new leaf, because if it went into a new leaf, that leaf would be less than half-full.)
252 uint leaves = size / (Leaf::NROWS / 2);
253
254 // Calculate the worst-case number of parents to cover the leaves, given that a parent is always
255 // at least half-full. Since the parents form a tree with branching factor B, the size of the
256 // tree is N/B + N/B^2 + N/B^3 + N/B^4 + ... = N / (B - 1). Math.
257 constexpr uint branchingFactor = Parent::NCHILDREN / 2;
258 uint parents = leaves / (branchingFactor - 1);
259
260 // Height is log-base-branching-factor of leaves, plus 1 for the root node.
261 uint height = lg(leaves | 1) / lg(branchingFactor) + 1;
262
263 size_t newSize = leaves +
264 parents + 1 + // + 1 for the root
265 height + 2; // minimum freelist size needed by insert()
266
267 if (treeCapacity < newSize) {
268 growTree(newSize);
269 }
270}
271
272void BTreeImpl::clear() {
273 if (tree != &EMPTY_NODE) {
274 azero(tree, treeCapacity);
275 height = 0;
276 freelistHead = 1;
277 freelistSize = treeCapacity;
278 beginLeaf = 0;
279 endLeaf = 0;
280 }
281}
282
283void BTreeImpl::growTree(uint minCapacity) {
284 uint newCapacity = kj::max(kj::max(minCapacity, treeCapacity * 2), 4);
285 freelistSize += newCapacity - treeCapacity;
286
287 // Allocate some aligned memory! In theory this should be as simple as calling the C11 standard
288 // aligned_alloc() function. Unfortunately, many platforms don't implement it. Luckily, there
289 // are usually alternatives.
290
291#if __APPLE__ || __BIONIC__
292 // OSX and Android lack aligned_alloc(), but have posix_memalign(). Fine.
293 void* allocPtr;
294 int error = posix_memalign(&allocPtr,
295 sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion));
296 if (error != 0) {
297 KJ_FAIL_SYSCALL("posix_memalign", error);
298 }
299 NodeUnion* newTree = reinterpret_cast<NodeUnion*>(allocPtr);
300#elif _WIN32
301 // Windows lacks aligned_alloc() but has its own _aligned_malloc() (which requires freeing using
302 // _aligned_free()).
303 // WATCH OUT: The argument order for _aligned_malloc() is opposite of aligned_alloc()!
304 NodeUnion* newTree = reinterpret_cast<NodeUnion*>(
305 _aligned_malloc(newCapacity * sizeof(BTreeImpl::NodeUnion), sizeof(BTreeImpl::NodeUnion)));
306 KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity);
307#else
308 // Let's use the C11 standard.
309 NodeUnion* newTree = reinterpret_cast<NodeUnion*>(
310 aligned_alloc(sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion)));
311 KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity);
312#endif
313
314 acopy(newTree, tree, treeCapacity);
315 azero(newTree + treeCapacity, newCapacity - treeCapacity);
316 if (tree != &EMPTY_NODE) aligned_free(tree);
317 tree = newTree;
318 treeCapacity = newCapacity;
319}
320
321BTreeImpl::Iterator BTreeImpl::search(const SearchKey& searchKey) const {
322 // Find the "first" row number (in sorted order) for which searchKey.isAfter(rowNumber) returns
323 // false.
324
325 uint pos = 0;
326
327 for (auto i KJ_UNUSED: zeroTo(height)) {
328 auto& parent = tree[pos].parent;
329 pos = parent.children[searchKey.search(parent)];
330 }
331
332 auto& leaf = tree[pos].leaf;
333 return { tree, &leaf, searchKey.search(leaf) };
334}
335
336template <typename T>
337struct BTreeImpl::AllocResult {
338 uint index;
339 T& node;
340};
341
342template <typename T>
343inline BTreeImpl::AllocResult<T> BTreeImpl::alloc() {
344 // Allocate a new item from the freelist. Guaranteed to be zero'd except for the first member.
345 uint i = freelistHead;
346 NodeUnion* ptr = &tree[i];
347 freelistHead = i + 1 + ptr->freelist.nextOffset;
348 --freelistSize;
349 return { i, *ptr };
350}
351
352inline void BTreeImpl::free(uint pos) {
353 // Add the given node to the freelist.
354
355 // HACK: This is typically called on a node immediately after copying its contents away, but the
356 // pointer used to copy it away may be a different pointer pointing to a different union member
357 // which the compiler may not recgonize as aliasing with this object. Just to be extra-safe,
358 // insert a compiler barrier.
359 compilerBarrier();
360
361 auto& node = tree[pos];
362 node.freelist.nextOffset = freelistHead - pos - 1;
363 azero(node.freelist.zero, kj::size(node.freelist.zero));
364 freelistHead = pos;
365 ++freelistSize;
366}
367
368BTreeImpl::Iterator BTreeImpl::insert(const SearchKey& searchKey) {
369 // Like search() but ensures that there is room in the leaf node to insert a new row.
370
371 // If we split the root node it will generate two new nodes. If we split any other node in the
372 // path it will generate one new node. `height` doesn't count leaf nodes, but we can equivalently
373 // think of it as not counting the root node, so in the worst case we may allocate height + 2
374 // new nodes.
375 //
376 // (Also note that if the tree is currently empty, then `tree` points to a dummy root node in
377 // read-only memory. We definitely need to allocate a real tree node array in this case, and
378 // we'll start out allocating space for four nodes, which will be all we need up to 28 rows.)
379 if (freelistSize < height + 2) {
380 if (height > 0 && !tree[0].parent.isFull() && freelistSize >= height) {
381 // Slight optimization: The root node is not full, so we're definitely not going to split it.
382 // That means that the maximum allocations we might do is equal to `height`, not
383 // `height + 2`, and we have that much space, so no need to grow yet.
384 //
385 // This optimization is particularly important for small trees, e.g. when treeCapacity is 4
386 // and the tree so far consists of a root and two children, we definitely don't need to grow
387 // the tree yet.
388 } else {
389 growTree();
390
391 if (freelistHead == 0) {
392 // We have no root yet. Allocate one.
393 KJ_ASSERT(alloc<Parent>().index == 0);
394 }
395 }
396 }
397
398 uint pos = 0;
399
400 // Track grandparent node and child index within grandparent.
401 Parent* parent = nullptr;
402 uint indexInParent = 0;
403
404 for (auto i KJ_UNUSED: zeroTo(height)) {
405 Parent& node = insertHelper(searchKey, tree[pos].parent, parent, indexInParent, pos);
406
407 parent = &node;
408 indexInParent = searchKey.search(node);
409 pos = node.children[indexInParent];
410 }
411
412 Leaf& leaf = insertHelper(searchKey, tree[pos].leaf, parent, indexInParent, pos);
413
414 // Fun fact: Unlike erase(), there's no need to climb back up the tree modifying keys, because
415 // either the newly-inserted node will not be the last in the leaf (and thus parent keys aren't
416 // modified), or the leaf is the last leaf in the tree (and thus there's no parent key to
417 // modify).
418
419 return { tree, &leaf, searchKey.search(leaf) };
420}
421
422template <typename Node>
423Node& BTreeImpl::insertHelper(const SearchKey& searchKey,
424 Node& node, Parent* parent, uint indexInParent, uint pos) {
425 if (node.isFull()) {
426 // This node is full. Need to split.
427 if (parent == nullptr) {
428 // This is the root node. We need to split into two nodes and create a new root.
429 auto n1 = alloc<Node>();
430 auto n2 = alloc<Node>();
431
432 uint pivot = split(n2.node, n2.index, node, pos);
433 move(n1.node, n1.index, node);
434
435 // Rewrite root to have the two children.
436 tree[0].parent.initRoot(pivot, n1.index, n2.index);
437
438 // Increased height.
439 ++height;
440
441 // Decide which new branch has our search key.
442 if (searchKey.isAfter(pivot)) {
443 // the right one
444 return n2.node;
445 } else {
446 // the left one
447 return n1.node;
448 }
449 } else {
450 // This is a non-root parent node. We need to split it into two and insert the new node
451 // into the grandparent.
452 auto n = alloc<Node>();
453 uint pivot = split(n.node, n.index, node, pos);
454
455 // Insert new child into grandparent.
456 parent->insertAfter(indexInParent, pivot, n.index);
457
458 // Decide which new branch has our search key.
459 if (searchKey.isAfter(pivot)) {
460 // the new one, which is right of the original
461 return n.node;
462 } else {
463 // the original one, which is left of the new one
464 return node;
465 }
466 }
467 } else {
468 // No split needed.
469 return node;
470 }
471}
472
473void BTreeImpl::erase(uint row, const SearchKey& searchKey) {
474 // Erase the given row number from the tree. predicate() returns true for the given row and all
475 // rows after it.
476
477 uint pos = 0;
478
479 // Track grandparent node and child index within grandparent.
480 Parent* parent = nullptr;
481 uint indexInParent = 0;
482
483 MaybeUint* fixup = nullptr;
484
485 for (auto i KJ_UNUSED: zeroTo(height)) {
486 Parent& node = eraseHelper(tree[pos].parent, parent, indexInParent, pos, fixup);
487
488 parent = &node;
489 indexInParent = searchKey.search(node);
490 pos = node.children[indexInParent];
491
492 if (indexInParent < kj::size(node.keys) && node.keys[indexInParent] == row) {
493 // Oh look, the row is a key in this node! We'll need to come back and fix this up later.
494 // Note that any particular row can only appear as *one* key value anywhere in the tree, so
495 // we only need one fixup pointer, which is nice.
496 MaybeUint* newFixup = &node.keys[indexInParent];
497 if (fixup == newFixup) {
498 // The fixup pointer was already set while processing a parent node, and then a merge or
499 // rotate caused it to be moved, but the fixup pointer was updated... so it's already set
500 // to point at the slot we wanted it to point to, so nothing to see here.
501 } else {
502 KJ_DASSERT(fixup == nullptr);
503 fixup = newFixup;
504 }
505 }
506 }
507
508 Leaf& leaf = eraseHelper(tree[pos].leaf, parent, indexInParent, pos, fixup);
509
510 uint r = searchKey.search(leaf);
511 if (leaf.rows[r] == row) {
512 leaf.erase(r);
513
514 if (fixup != nullptr) {
515 // There's a key in a parent node that needs fixup. This is only possible if the removed
516 // node is the last in its leaf.
517 KJ_DASSERT(leaf.rows[r] == nullptr);
518 KJ_DASSERT(r > 0); // non-root nodes must be at least half full so this can't be item 0
519 KJ_DASSERT(*fixup == row);
520 *fixup = leaf.rows[r - 1];
521 }
522 } else {
523 logInconsistency();
524 }
525}
526
527template <typename Node>
528Node& BTreeImpl::eraseHelper(
529 Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup) {
530 if (parent != nullptr && !node.isMostlyFull()) {
531 // This is not the root, but it's only half-full. Rebalance.
532 KJ_DASSERT(node.isHalfFull());
533
534 if (indexInParent > 0) {
535 // There's a sibling to the left.
536 uint sibPos = parent->children[indexInParent - 1];
537 Node& sib = tree[sibPos];
538 if (sib.isMostlyFull()) {
539 // Left sibling is more than half full. Steal one member.
540 rotateRight(sib, node, *parent, indexInParent - 1);
541 return node;
542 } else {
543 // Left sibling is half full, too. Merge.
544 KJ_ASSERT(sib.isHalfFull());
545 merge(sib, sibPos, *parent->keys[indexInParent - 1], node);
546 parent->eraseAfter(indexInParent - 1);
547 free(pos);
548 if (fixup == &parent->keys[indexInParent]) --fixup;
549
550 if (parent->keys[0] == nullptr) {
551 // Oh hah, the parent has no keys left. It must be the root. We can eliminate it.
552 KJ_DASSERT(parent == &tree->parent);
553 compilerBarrier(); // don't reorder any writes to parent below here
554 move(tree[0], 0, sib);
555 free(sibPos);
556 --height;
557 return tree[0];
558 } else {
559 return sib;
560 }
561 }
562 } else if (indexInParent < Parent::NKEYS && parent->keys[indexInParent] != nullptr) {
563 // There's a sibling to the right.
564 uint sibPos = parent->children[indexInParent + 1];
565 Node& sib = tree[sibPos];
566 if (sib.isMostlyFull()) {
567 // Right sibling is more than half full. Steal one member.
568 rotateLeft(node, sib, *parent, indexInParent, fixup);
569 return node;
570 } else {
571 // Right sibling is half full, too. Merge.
572 KJ_ASSERT(sib.isHalfFull());
573 merge(node, pos, *parent->keys[indexInParent], sib);
574 parent->eraseAfter(indexInParent);
575 free(sibPos);
576 if (fixup == &parent->keys[indexInParent]) fixup = nullptr;
577
578 if (parent->keys[0] == nullptr) {
579 // Oh hah, the parent has no keys left. It must be the root. We can eliminate it.
580 KJ_DASSERT(parent == &tree->parent);
581 compilerBarrier(); // don't reorder any writes to parent below here
582 move(tree[0], 0, node);
583 free(pos);
584 --height;
585 return tree[0];
586 } else {
587 return node;
588 }
589 }
590 } else {
591 KJ_FAIL_ASSERT("inconsistent b-tree");
592 }
593 }
594
595 return node;
596}
597
598void BTreeImpl::renumber(uint oldRow, uint newRow, const SearchKey& searchKey) {
599 // Renumber the given row from oldRow to newRow. predicate() returns true for oldRow and all
600 // rows after it. (It will not be called on newRow.)
601
602 uint pos = 0;
603
604 for (auto i KJ_UNUSED: zeroTo(height)) {
605 auto& node = tree[pos].parent;
606 uint indexInParent = searchKey.search(node);
607 pos = node.children[indexInParent];
608 if (node.keys[indexInParent] == oldRow) {
609 node.keys[indexInParent] = newRow;
610 }
611 KJ_DASSERT(pos != 0);
612 }
613
614 auto& leaf = tree[pos].leaf;
615 uint r = searchKey.search(leaf);
616 if (leaf.rows[r] == oldRow) {
617 leaf.rows[r] = newRow;
618 } else {
619 logInconsistency();
620 }
621}
622
623uint BTreeImpl::split(Parent& dst, uint dstPos, Parent& src, uint srcPos) {
624 constexpr size_t mid = Parent::NKEYS / 2;
625 uint pivot = *src.keys[mid];
626 acopy(dst.keys, src.keys + mid + 1, Parent::NKEYS - mid - 1);
627 azero(src.keys + mid, Parent::NKEYS - mid);
628 acopy(dst.children, src.children + mid + 1, Parent::NCHILDREN - mid - 1);
629 azero(src.children + mid + 1, Parent::NCHILDREN - mid - 1);
630 return pivot;
631}
632
633uint BTreeImpl::split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos) {
634 constexpr size_t mid = Leaf::NROWS / 2;
635 uint pivot = *src.rows[mid - 1];
636 acopy(dst.rows, src.rows + mid, Leaf::NROWS - mid);
637 azero(src.rows + mid, Leaf::NROWS - mid);
638
639 if (src.next == 0) {
640 endLeaf = dstPos;
641 } else {
642 tree[src.next].leaf.prev = dstPos;
643 }
644 dst.next = src.next;
645 dst.prev = srcPos;
646 src.next = dstPos;
647
648 return pivot;
649}
650
651void BTreeImpl::merge(Parent& dst, uint dstPos, uint pivot, Parent& src) {
652 // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants
653 // guarantee that the node can't be more than half-empty, or we would have merged it sooner.
654 // (The root can be more than half-empty, but it is never merged with anything.)
655 KJ_DASSERT(src.isHalfFull());
656 KJ_DASSERT(dst.isHalfFull());
657
658 constexpr size_t mid = Parent::NKEYS/2;
659 dst.keys[mid] = pivot;
660 acopy(dst.keys + mid + 1, src.keys, mid);
661 acopy(dst.children + mid + 1, src.children, mid + 1);
662}
663
664void BTreeImpl::merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src) {
665 // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants
666 // guarantee that the node can't be more than half-empty, or we would have merged it sooner.
667 // (The root can be more than half-empty, but it is never merged with anything.)
668 KJ_DASSERT(src.isHalfFull());
669 KJ_DASSERT(dst.isHalfFull());
670
671 constexpr size_t mid = Leaf::NROWS/2;
672 dst.rows[mid] = pivot;
673 acopy(dst.rows + mid, src.rows, mid);
674
675 dst.next = src.next;
676 if (dst.next == 0) {
677 endLeaf = dstPos;
678 } else {
679 tree[dst.next].leaf.prev = dstPos;
680 }
681}
682
683void BTreeImpl::move(Parent& dst, uint dstPos, Parent& src) {
684 dst = src;
685}
686
687void BTreeImpl::move(Leaf& dst, uint dstPos, Leaf& src) {
688 dst = src;
689 if (src.next == 0) {
690 endLeaf = dstPos;
691 } else {
692 tree[src.next].leaf.prev = dstPos;
693 }
694 if (src.prev == 0) {
695 beginLeaf = dstPos;
696 } else {
697 tree[src.prev].leaf.next = dstPos;
698 }
699}
700
701void BTreeImpl::rotateLeft(
702 Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) {
703 // Steal one item from the right node and move it to the left node.
704
705 // Like merge(), this is only called on an exactly-half-empty node.
706 KJ_DASSERT(left.isHalfFull());
707 KJ_DASSERT(right.isMostlyFull());
708
709 constexpr size_t mid = Parent::NKEYS/2;
710 left.keys[mid] = parent.keys[indexInParent];
711 if (fixup == &parent.keys[indexInParent]) fixup = &left.keys[mid];
712 parent.keys[indexInParent] = right.keys[0];
713 left.children[mid + 1] = right.children[0];
714 amove(right.keys, right.keys + 1, Parent::NKEYS - 1);
715 right.keys[Parent::NKEYS - 1] = nullptr;
716 amove(right.children, right.children + 1, Parent::NCHILDREN - 1);
717 right.children[Parent::NCHILDREN - 1] = 0;
718}
719
720void BTreeImpl::rotateLeft(
721 Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) {
722 // Steal one item from the right node and move it to the left node.
723
724 // Like merge(), this is only called on an exactly-half-empty node.
725 KJ_DASSERT(left.isHalfFull());
726 KJ_DASSERT(right.isMostlyFull());
727
728 constexpr size_t mid = Leaf::NROWS/2;
729 parent.keys[indexInParent] = left.rows[mid] = right.rows[0];
730 if (fixup == &parent.keys[indexInParent]) fixup = nullptr;
731 amove(right.rows, right.rows + 1, Leaf::NROWS - 1);
732 right.rows[Leaf::NROWS - 1] = nullptr;
733}
734
735void BTreeImpl::rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent) {
736 // Steal one item from the left node and move it to the right node.
737
738 // Like merge(), this is only called on an exactly-half-empty node.
739 KJ_DASSERT(right.isHalfFull());
740 KJ_DASSERT(left.isMostlyFull());
741
742 constexpr size_t mid = Parent::NKEYS/2;
743 amove(right.keys + 1, right.keys, mid);
744 amove(right.children + 1, right.children, mid + 1);
745
746 uint back = left.keyCount() - 1;
747
748 right.keys[0] = parent.keys[indexInParent];
749 parent.keys[indexInParent] = left.keys[back];
750 right.children[0] = left.children[back + 1];
751 left.keys[back] = nullptr;
752 left.children[back + 1] = 0;
753}
754
755void BTreeImpl::rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent) {
756 // Steal one item from the left node and move it to the right node.
757
758 // Like mergeFrom(), this is only called on an exactly-half-empty node.
759 KJ_DASSERT(right.isHalfFull());
760 KJ_DASSERT(left.isMostlyFull());
761
762 constexpr size_t mid = Leaf::NROWS/2;
763 amove(right.rows + 1, right.rows, mid);
764
765 uint back = left.size() - 1;
766
767 right.rows[0] = left.rows[back];
768 parent.keys[indexInParent] = left.rows[back - 1];
769 left.rows[back] = nullptr;
770}
771
772void BTreeImpl::Parent::initRoot(uint key, uint leftChild, uint rightChild) {
773 // HACK: This is typically called on the root node immediately after copying its contents away,
774 // but the pointer used to copy it away may be a different pointer pointing to a different
775 // union member which the compiler may not recgonize as aliasing with this object. Just to
776 // be extra-safe, insert a compiler barrier.
777 compilerBarrier();
778
779 keys[0] = key;
780 children[0] = leftChild;
781 children[1] = rightChild;
782 azero(keys + 1, Parent::NKEYS - 1);
783 azero(children + 2, Parent::NCHILDREN - 2);
784}
785
786void BTreeImpl::Parent::insertAfter(uint i, uint splitKey, uint child) {
787 KJ_IREQUIRE(children[Parent::NCHILDREN - 1] == 0); // check not full
788
789 amove(keys + i + 1, keys + i, Parent::NKEYS - (i + 1));
790 keys[i] = splitKey;
791
792 amove(children + i + 2, children + i + 1, Parent::NCHILDREN - (i + 2));
793 children[i + 1] = child;
794}
795
796void BTreeImpl::Parent::eraseAfter(uint i) {
797 amove(keys + i, keys + i + 1, Parent::NKEYS - (i + 1));
798 keys[Parent::NKEYS - 1] = nullptr;
799 amove(children + i + 1, children + i + 2, Parent::NCHILDREN - (i + 2));
800 children[Parent::NCHILDREN - 1] = 0;
801}
802
803} // namespace _
804
805// =======================================================================================
806// Insertion order
807
808const InsertionOrderIndex::Link InsertionOrderIndex::EMPTY_LINK = { 0, 0 };
809
810InsertionOrderIndex::InsertionOrderIndex(): capacity(0), links(const_cast<Link*>(&EMPTY_LINK)) {}
811InsertionOrderIndex::~InsertionOrderIndex() noexcept(false) {
812 if (links != &EMPTY_LINK) delete[] links;
813}
814
815void InsertionOrderIndex::reserve(size_t size) {
816 KJ_ASSERT(size < (1u << 31), "Table too big for InsertionOrderIndex");
817
818 if (size > capacity) {
819 // Need to grow.
820 // Note that `size` and `capacity` do not include the special link[0].
821
822 // Round up to the next power of 2.
823 size_t allocation = 1u << (_::lg(size) + 1);
824 KJ_DASSERT(allocation > size);
825 KJ_DASSERT(allocation <= size * 2);
826
827 // Round first allocation up to 8.
828 allocation = kj::max(allocation, 8);
829
830 Link* newLinks = new Link[allocation];
831#ifdef KJ_DEBUG
832 // To catch bugs, fill unused links with 0xff.
833 memset(newLinks, 0xff, allocation * sizeof(Link));
834#endif
835 _::acopy(newLinks, links, capacity + 1);
836 if (links != &EMPTY_LINK) delete[] links;
837 links = newLinks;
838 capacity = allocation - 1;
839 }
840}
841
842void InsertionOrderIndex::clear() {
843 links[0] = Link { 0, 0 };
844
845#ifdef KJ_DEBUG
846 // To catch bugs, fill unused links with 0xff.
847 memset(links + 1, 0xff, capacity * sizeof(Link));
848#endif
849}
850
851kj::Maybe<size_t> InsertionOrderIndex::insertImpl(size_t pos) {
852 if (pos >= capacity) {
853 reserve(pos + 1);
854 }
855
856 links[pos + 1].prev = links[0].prev;
857 links[pos + 1].next = 0;
858 links[links[0].prev].next = pos + 1;
859 links[0].prev = pos + 1;
860
861 return nullptr;
862}
863
864void InsertionOrderIndex::eraseImpl(size_t pos) {
865 Link& link = links[pos + 1];
866 links[link.next].prev = link.prev;
867 links[link.prev].next = link.next;
868
869#ifdef KJ_DEBUG
870 memset(&link, 0xff, sizeof(Link));
871#endif
872}
873
874void InsertionOrderIndex::moveImpl(size_t oldPos, size_t newPos) {
875 Link& link = links[oldPos + 1];
876 Link& newLink = links[newPos + 1];
877
878 newLink = link;
879
880 KJ_DASSERT(links[link.next].prev == oldPos + 1);
881 KJ_DASSERT(links[link.prev].next == oldPos + 1);
882 links[link.next].prev = newPos + 1;
883 links[link.prev].next = newPos + 1;
884
885#ifdef KJ_DEBUG
886 memset(&link, 0xff, sizeof(Link));
887#endif
888}
889
890} // namespace kj
891