1 | // Copyright (c) 2018 Kenton Varda and contributors |
2 | // Licensed under the MIT License: |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | // of this software and associated documentation files (the "Software"), to deal |
6 | // in the Software without restriction, including without limitation the rights |
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | // copies of the Software, and to permit persons to whom the Software is |
9 | // furnished to do so, subject to the following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included in |
12 | // all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | // THE SOFTWARE. |
21 | |
22 | #include "table.h" |
23 | #include "debug.h" |
24 | #include <stdlib.h> |
25 | |
26 | namespace kj { |
27 | namespace _ { |
28 | |
29 | static inline uint lg(uint value) { |
30 | // Compute floor(log2(value)). |
31 | // |
32 | // Undefined for value = 0. |
33 | #if _MSC_VER |
34 | unsigned long i; |
35 | auto found = _BitScanReverse(&i, value); |
36 | KJ_DASSERT(found); // !found means value = 0 |
37 | return i; |
38 | #else |
39 | return sizeof(uint) * 8 - 1 - __builtin_clz(value); |
40 | #endif |
41 | } |
42 | |
43 | void throwDuplicateTableRow() { |
44 | KJ_FAIL_REQUIRE("inserted row already exists in table" ); |
45 | } |
46 | |
47 | void logHashTableInconsistency() { |
48 | KJ_LOG(ERROR, |
49 | "HashIndex detected hash table inconsistency. This can happen if you create a kj::Table " |
50 | "with a hash index and you modify the rows in the table post-indexing in a way that would " |
51 | "change their hash. This is a serious bug which will lead to undefined behavior." |
52 | "\nstack: " , kj::getStackTrace()); |
53 | } |
54 | |
55 | // List of primes where each element is roughly double the previous. Obtained |
56 | // from: |
57 | // http://planetmath.org/goodhashtableprimes |
58 | // Primes < 53 were added to ensure that small tables don't allocate excessive memory. |
59 | static const size_t PRIMES[] = { |
60 | 1, // 2^ 0 = 1 |
61 | 3, // 2^ 1 = 2 |
62 | 5, // 2^ 2 = 4 |
63 | 11, // 2^ 3 = 8 |
64 | 23, // 2^ 4 = 16 |
65 | 53, // 2^ 5 = 32 |
66 | 97, // 2^ 6 = 64 |
67 | 193, // 2^ 7 = 128 |
68 | 389, // 2^ 8 = 256 |
69 | 769, // 2^ 9 = 512 |
70 | 1543, // 2^10 = 1024 |
71 | 3079, // 2^11 = 2048 |
72 | 6151, // 2^12 = 4096 |
73 | 12289, // 2^13 = 8192 |
74 | 24593, // 2^14 = 16384 |
75 | 49157, // 2^15 = 32768 |
76 | 98317, // 2^16 = 65536 |
77 | 196613, // 2^17 = 131072 |
78 | 393241, // 2^18 = 262144 |
79 | 786433, // 2^19 = 524288 |
80 | 1572869, // 2^20 = 1048576 |
81 | 3145739, // 2^21 = 2097152 |
82 | 6291469, // 2^22 = 4194304 |
83 | 12582917, // 2^23 = 8388608 |
84 | 25165843, // 2^24 = 16777216 |
85 | 50331653, // 2^25 = 33554432 |
86 | 100663319, // 2^26 = 67108864 |
87 | 201326611, // 2^27 = 134217728 |
88 | 402653189, // 2^28 = 268435456 |
89 | 805306457, // 2^29 = 536870912 |
90 | 1610612741, // 2^30 = 1073741824 |
91 | }; |
92 | |
93 | uint chooseBucket(uint hash, uint count) { |
94 | // Integer modulus is really, really slow. It turns out that the compiler can generate much |
95 | // faster code if the denominator is a constant. Since we have a fixed set of possible |
96 | // denominators, a big old switch() statement is a win. |
97 | |
98 | // TODO(perf): Consider using power-of-two bucket sizes. We can safely do so as long as we demand |
99 | // high-quality hash functions -- kj::hashCode() needs good diffusion even for integers, can't |
100 | // just be a cast. Also be sure to implement Robin Hood hashing to avoid extremely bad negative |
101 | // lookup time when elements have sequential hashes (otherwise, it could be necessary to scan |
102 | // the entire list to determine that an element isn't present). |
103 | |
104 | switch (count) { |
105 | #define HANDLE(i) case i##u: return hash % i##u |
106 | HANDLE( 1); |
107 | HANDLE( 3); |
108 | HANDLE( 5); |
109 | HANDLE( 11); |
110 | HANDLE( 23); |
111 | HANDLE( 53); |
112 | HANDLE( 97); |
113 | HANDLE( 193); |
114 | HANDLE( 389); |
115 | HANDLE( 769); |
116 | HANDLE( 1543); |
117 | HANDLE( 3079); |
118 | HANDLE( 6151); |
119 | HANDLE( 12289); |
120 | HANDLE( 24593); |
121 | HANDLE( 49157); |
122 | HANDLE( 98317); |
123 | HANDLE( 196613); |
124 | HANDLE( 393241); |
125 | HANDLE( 786433); |
126 | HANDLE( 1572869); |
127 | HANDLE( 3145739); |
128 | HANDLE( 6291469); |
129 | HANDLE( 12582917); |
130 | HANDLE( 25165843); |
131 | HANDLE( 50331653); |
132 | HANDLE( 100663319); |
133 | HANDLE( 201326611); |
134 | HANDLE( 402653189); |
135 | HANDLE( 805306457); |
136 | HANDLE(1610612741); |
137 | #undef HANDLE |
138 | default: return hash % count; |
139 | } |
140 | } |
141 | |
142 | size_t chooseHashTableSize(uint size) { |
143 | if (size == 0) return 0; |
144 | |
145 | // Add 1 to compensate for the floor() above, then look up the best prime bucket size for that |
146 | // target size. |
147 | return PRIMES[lg(size) + 1]; |
148 | } |
149 | |
150 | kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize) { |
151 | // Rehash the whole table. |
152 | |
153 | KJ_REQUIRE(targetSize < (1 << 30), "hash table has reached maximum size" ); |
154 | |
155 | size_t size = chooseHashTableSize(targetSize); |
156 | |
157 | if (size < oldBuckets.size()) { |
158 | size = oldBuckets.size(); |
159 | } |
160 | |
161 | auto newBuckets = kj::heapArray<HashBucket>(size); |
162 | memset(newBuckets.begin(), 0, sizeof(HashBucket) * size); |
163 | |
164 | for (auto& oldBucket: oldBuckets) { |
165 | if (oldBucket.isOccupied()) { |
166 | for (uint i = oldBucket.hash % newBuckets.size();; i = probeHash(newBuckets, i)) { |
167 | auto& newBucket = newBuckets[i]; |
168 | if (newBucket.isEmpty()) { |
169 | newBucket = oldBucket; |
170 | break; |
171 | } |
172 | } |
173 | } |
174 | } |
175 | |
176 | return newBuckets; |
177 | } |
178 | |
179 | // ======================================================================================= |
180 | // BTree |
181 | |
182 | #if _WIN32 |
183 | #define aligned_free _aligned_free |
184 | #else |
185 | #define aligned_free ::free |
186 | #endif |
187 | |
188 | BTreeImpl::BTreeImpl() |
189 | : tree(const_cast<NodeUnion*>(&EMPTY_NODE)), |
190 | treeCapacity(1), |
191 | height(0), |
192 | freelistHead(1), |
193 | freelistSize(0), |
194 | beginLeaf(0), |
195 | endLeaf(0) {} |
196 | BTreeImpl::~BTreeImpl() noexcept(false) { |
197 | if (tree != &EMPTY_NODE) { |
198 | aligned_free(tree); |
199 | } |
200 | } |
201 | |
202 | const BTreeImpl::NodeUnion BTreeImpl::EMPTY_NODE = {{{0, {0}}}}; |
203 | |
204 | void BTreeImpl::verify(size_t size, FunctionParam<bool(uint, uint)> f) { |
205 | KJ_ASSERT(verifyNode(size, f, 0, height, nullptr) == size); |
206 | } |
207 | size_t BTreeImpl::verifyNode(size_t size, FunctionParam<bool(uint, uint)>& f, |
208 | uint pos, uint height, MaybeUint maxRow) { |
209 | if (height > 0) { |
210 | auto& parent = tree[pos].parent; |
211 | |
212 | auto n = parent.keyCount(); |
213 | size_t total = 0; |
214 | for (auto i: kj::zeroTo(n)) { |
215 | KJ_ASSERT(*parent.keys[i] < size); |
216 | total += verifyNode(size, f, parent.children[i], height - 1, parent.keys[i]); |
217 | KJ_ASSERT(i + 1 == n || f(*parent.keys[i], *parent.keys[i + 1])); |
218 | } |
219 | total += verifyNode(size, f, parent.children[n], height - 1, maxRow); |
220 | KJ_ASSERT(maxRow == nullptr || f(*parent.keys[n-1], *maxRow)); |
221 | return total; |
222 | } else { |
223 | auto& leaf = tree[pos].leaf; |
224 | auto n = leaf.size(); |
225 | for (auto i: kj::zeroTo(n)) { |
226 | KJ_ASSERT(*leaf.rows[i] < size); |
227 | if (i + 1 < n) { |
228 | KJ_ASSERT(f(*leaf.rows[i], *leaf.rows[i + 1])); |
229 | } else { |
230 | KJ_ASSERT(maxRow == nullptr || leaf.rows[n-1] == maxRow); |
231 | } |
232 | } |
233 | return n; |
234 | } |
235 | } |
236 | |
237 | void BTreeImpl::logInconsistency() const { |
238 | KJ_LOG(ERROR, |
239 | "BTreeIndex detected tree state inconsistency. This can happen if you create a kj::Table " |
240 | "with a b-tree index and you modify the rows in the table post-indexing in a way that would " |
241 | "change their ordering. This is a serious bug which will lead to undefined behavior." |
242 | "\nstack: " , kj::getStackTrace()); |
243 | } |
244 | |
245 | void BTreeImpl::reserve(size_t size) { |
246 | KJ_REQUIRE(size < (1u << 31), "b-tree has reached maximum size" ); |
247 | |
248 | // Calculate the worst-case number of leaves to cover the size, given that a leaf is always at |
249 | // least half-full. (Note that it's correct for this calculation to round down, not up: The |
250 | // remainder will necessarily be distributed among the non-full leaves, rather than creating a |
251 | // new leaf, because if it went into a new leaf, that leaf would be less than half-full.) |
252 | uint leaves = size / (Leaf::NROWS / 2); |
253 | |
254 | // Calculate the worst-case number of parents to cover the leaves, given that a parent is always |
255 | // at least half-full. Since the parents form a tree with branching factor B, the size of the |
256 | // tree is N/B + N/B^2 + N/B^3 + N/B^4 + ... = N / (B - 1). Math. |
257 | constexpr uint branchingFactor = Parent::NCHILDREN / 2; |
258 | uint parents = leaves / (branchingFactor - 1); |
259 | |
260 | // Height is log-base-branching-factor of leaves, plus 1 for the root node. |
261 | uint height = lg(leaves | 1) / lg(branchingFactor) + 1; |
262 | |
263 | size_t newSize = leaves + |
264 | parents + 1 + // + 1 for the root |
265 | height + 2; // minimum freelist size needed by insert() |
266 | |
267 | if (treeCapacity < newSize) { |
268 | growTree(newSize); |
269 | } |
270 | } |
271 | |
272 | void BTreeImpl::clear() { |
273 | if (tree != &EMPTY_NODE) { |
274 | azero(tree, treeCapacity); |
275 | height = 0; |
276 | freelistHead = 1; |
277 | freelistSize = treeCapacity; |
278 | beginLeaf = 0; |
279 | endLeaf = 0; |
280 | } |
281 | } |
282 | |
283 | void BTreeImpl::growTree(uint minCapacity) { |
284 | uint newCapacity = kj::max(kj::max(minCapacity, treeCapacity * 2), 4); |
285 | freelistSize += newCapacity - treeCapacity; |
286 | |
287 | // Allocate some aligned memory! In theory this should be as simple as calling the C11 standard |
288 | // aligned_alloc() function. Unfortunately, many platforms don't implement it. Luckily, there |
289 | // are usually alternatives. |
290 | |
291 | #if __APPLE__ || __BIONIC__ |
292 | // OSX and Android lack aligned_alloc(), but have posix_memalign(). Fine. |
293 | void* allocPtr; |
294 | int error = posix_memalign(&allocPtr, |
295 | sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion)); |
296 | if (error != 0) { |
297 | KJ_FAIL_SYSCALL("posix_memalign" , error); |
298 | } |
299 | NodeUnion* newTree = reinterpret_cast<NodeUnion*>(allocPtr); |
300 | #elif _WIN32 |
301 | // Windows lacks aligned_alloc() but has its own _aligned_malloc() (which requires freeing using |
302 | // _aligned_free()). |
303 | // WATCH OUT: The argument order for _aligned_malloc() is opposite of aligned_alloc()! |
304 | NodeUnion* newTree = reinterpret_cast<NodeUnion*>( |
305 | _aligned_malloc(newCapacity * sizeof(BTreeImpl::NodeUnion), sizeof(BTreeImpl::NodeUnion))); |
306 | KJ_ASSERT(newTree != nullptr, "memory allocation failed" , newCapacity); |
307 | #else |
308 | // Let's use the C11 standard. |
309 | NodeUnion* newTree = reinterpret_cast<NodeUnion*>( |
310 | aligned_alloc(sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion))); |
311 | KJ_ASSERT(newTree != nullptr, "memory allocation failed" , newCapacity); |
312 | #endif |
313 | |
314 | acopy(newTree, tree, treeCapacity); |
315 | azero(newTree + treeCapacity, newCapacity - treeCapacity); |
316 | if (tree != &EMPTY_NODE) aligned_free(tree); |
317 | tree = newTree; |
318 | treeCapacity = newCapacity; |
319 | } |
320 | |
321 | BTreeImpl::Iterator BTreeImpl::search(const SearchKey& searchKey) const { |
322 | // Find the "first" row number (in sorted order) for which searchKey.isAfter(rowNumber) returns |
323 | // false. |
324 | |
325 | uint pos = 0; |
326 | |
327 | for (auto i KJ_UNUSED: zeroTo(height)) { |
328 | auto& parent = tree[pos].parent; |
329 | pos = parent.children[searchKey.search(parent)]; |
330 | } |
331 | |
332 | auto& leaf = tree[pos].leaf; |
333 | return { tree, &leaf, searchKey.search(leaf) }; |
334 | } |
335 | |
336 | template <typename T> |
337 | struct BTreeImpl::AllocResult { |
338 | uint index; |
339 | T& node; |
340 | }; |
341 | |
342 | template <typename T> |
343 | inline BTreeImpl::AllocResult<T> BTreeImpl::alloc() { |
344 | // Allocate a new item from the freelist. Guaranteed to be zero'd except for the first member. |
345 | uint i = freelistHead; |
346 | NodeUnion* ptr = &tree[i]; |
347 | freelistHead = i + 1 + ptr->freelist.nextOffset; |
348 | --freelistSize; |
349 | return { i, *ptr }; |
350 | } |
351 | |
352 | inline void BTreeImpl::free(uint pos) { |
353 | // Add the given node to the freelist. |
354 | |
355 | // HACK: This is typically called on a node immediately after copying its contents away, but the |
356 | // pointer used to copy it away may be a different pointer pointing to a different union member |
357 | // which the compiler may not recgonize as aliasing with this object. Just to be extra-safe, |
358 | // insert a compiler barrier. |
359 | compilerBarrier(); |
360 | |
361 | auto& node = tree[pos]; |
362 | node.freelist.nextOffset = freelistHead - pos - 1; |
363 | azero(node.freelist.zero, kj::size(node.freelist.zero)); |
364 | freelistHead = pos; |
365 | ++freelistSize; |
366 | } |
367 | |
368 | BTreeImpl::Iterator BTreeImpl::insert(const SearchKey& searchKey) { |
369 | // Like search() but ensures that there is room in the leaf node to insert a new row. |
370 | |
371 | // If we split the root node it will generate two new nodes. If we split any other node in the |
372 | // path it will generate one new node. `height` doesn't count leaf nodes, but we can equivalently |
373 | // think of it as not counting the root node, so in the worst case we may allocate height + 2 |
374 | // new nodes. |
375 | // |
376 | // (Also note that if the tree is currently empty, then `tree` points to a dummy root node in |
377 | // read-only memory. We definitely need to allocate a real tree node array in this case, and |
378 | // we'll start out allocating space for four nodes, which will be all we need up to 28 rows.) |
379 | if (freelistSize < height + 2) { |
380 | if (height > 0 && !tree[0].parent.isFull() && freelistSize >= height) { |
381 | // Slight optimization: The root node is not full, so we're definitely not going to split it. |
382 | // That means that the maximum allocations we might do is equal to `height`, not |
383 | // `height + 2`, and we have that much space, so no need to grow yet. |
384 | // |
385 | // This optimization is particularly important for small trees, e.g. when treeCapacity is 4 |
386 | // and the tree so far consists of a root and two children, we definitely don't need to grow |
387 | // the tree yet. |
388 | } else { |
389 | growTree(); |
390 | |
391 | if (freelistHead == 0) { |
392 | // We have no root yet. Allocate one. |
393 | KJ_ASSERT(alloc<Parent>().index == 0); |
394 | } |
395 | } |
396 | } |
397 | |
398 | uint pos = 0; |
399 | |
400 | // Track grandparent node and child index within grandparent. |
401 | Parent* parent = nullptr; |
402 | uint indexInParent = 0; |
403 | |
404 | for (auto i KJ_UNUSED: zeroTo(height)) { |
405 | Parent& node = insertHelper(searchKey, tree[pos].parent, parent, indexInParent, pos); |
406 | |
407 | parent = &node; |
408 | indexInParent = searchKey.search(node); |
409 | pos = node.children[indexInParent]; |
410 | } |
411 | |
412 | Leaf& leaf = insertHelper(searchKey, tree[pos].leaf, parent, indexInParent, pos); |
413 | |
414 | // Fun fact: Unlike erase(), there's no need to climb back up the tree modifying keys, because |
415 | // either the newly-inserted node will not be the last in the leaf (and thus parent keys aren't |
416 | // modified), or the leaf is the last leaf in the tree (and thus there's no parent key to |
417 | // modify). |
418 | |
419 | return { tree, &leaf, searchKey.search(leaf) }; |
420 | } |
421 | |
422 | template <typename Node> |
423 | Node& BTreeImpl::insertHelper(const SearchKey& searchKey, |
424 | Node& node, Parent* parent, uint indexInParent, uint pos) { |
425 | if (node.isFull()) { |
426 | // This node is full. Need to split. |
427 | if (parent == nullptr) { |
428 | // This is the root node. We need to split into two nodes and create a new root. |
429 | auto n1 = alloc<Node>(); |
430 | auto n2 = alloc<Node>(); |
431 | |
432 | uint pivot = split(n2.node, n2.index, node, pos); |
433 | move(n1.node, n1.index, node); |
434 | |
435 | // Rewrite root to have the two children. |
436 | tree[0].parent.initRoot(pivot, n1.index, n2.index); |
437 | |
438 | // Increased height. |
439 | ++height; |
440 | |
441 | // Decide which new branch has our search key. |
442 | if (searchKey.isAfter(pivot)) { |
443 | // the right one |
444 | return n2.node; |
445 | } else { |
446 | // the left one |
447 | return n1.node; |
448 | } |
449 | } else { |
450 | // This is a non-root parent node. We need to split it into two and insert the new node |
451 | // into the grandparent. |
452 | auto n = alloc<Node>(); |
453 | uint pivot = split(n.node, n.index, node, pos); |
454 | |
455 | // Insert new child into grandparent. |
456 | parent->insertAfter(indexInParent, pivot, n.index); |
457 | |
458 | // Decide which new branch has our search key. |
459 | if (searchKey.isAfter(pivot)) { |
460 | // the new one, which is right of the original |
461 | return n.node; |
462 | } else { |
463 | // the original one, which is left of the new one |
464 | return node; |
465 | } |
466 | } |
467 | } else { |
468 | // No split needed. |
469 | return node; |
470 | } |
471 | } |
472 | |
473 | void BTreeImpl::erase(uint row, const SearchKey& searchKey) { |
474 | // Erase the given row number from the tree. predicate() returns true for the given row and all |
475 | // rows after it. |
476 | |
477 | uint pos = 0; |
478 | |
479 | // Track grandparent node and child index within grandparent. |
480 | Parent* parent = nullptr; |
481 | uint indexInParent = 0; |
482 | |
483 | MaybeUint* fixup = nullptr; |
484 | |
485 | for (auto i KJ_UNUSED: zeroTo(height)) { |
486 | Parent& node = eraseHelper(tree[pos].parent, parent, indexInParent, pos, fixup); |
487 | |
488 | parent = &node; |
489 | indexInParent = searchKey.search(node); |
490 | pos = node.children[indexInParent]; |
491 | |
492 | if (indexInParent < kj::size(node.keys) && node.keys[indexInParent] == row) { |
493 | // Oh look, the row is a key in this node! We'll need to come back and fix this up later. |
494 | // Note that any particular row can only appear as *one* key value anywhere in the tree, so |
495 | // we only need one fixup pointer, which is nice. |
496 | MaybeUint* newFixup = &node.keys[indexInParent]; |
497 | if (fixup == newFixup) { |
498 | // The fixup pointer was already set while processing a parent node, and then a merge or |
499 | // rotate caused it to be moved, but the fixup pointer was updated... so it's already set |
500 | // to point at the slot we wanted it to point to, so nothing to see here. |
501 | } else { |
502 | KJ_DASSERT(fixup == nullptr); |
503 | fixup = newFixup; |
504 | } |
505 | } |
506 | } |
507 | |
508 | Leaf& leaf = eraseHelper(tree[pos].leaf, parent, indexInParent, pos, fixup); |
509 | |
510 | uint r = searchKey.search(leaf); |
511 | if (leaf.rows[r] == row) { |
512 | leaf.erase(r); |
513 | |
514 | if (fixup != nullptr) { |
515 | // There's a key in a parent node that needs fixup. This is only possible if the removed |
516 | // node is the last in its leaf. |
517 | KJ_DASSERT(leaf.rows[r] == nullptr); |
518 | KJ_DASSERT(r > 0); // non-root nodes must be at least half full so this can't be item 0 |
519 | KJ_DASSERT(*fixup == row); |
520 | *fixup = leaf.rows[r - 1]; |
521 | } |
522 | } else { |
523 | logInconsistency(); |
524 | } |
525 | } |
526 | |
527 | template <typename Node> |
528 | Node& BTreeImpl::eraseHelper( |
529 | Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup) { |
530 | if (parent != nullptr && !node.isMostlyFull()) { |
531 | // This is not the root, but it's only half-full. Rebalance. |
532 | KJ_DASSERT(node.isHalfFull()); |
533 | |
534 | if (indexInParent > 0) { |
535 | // There's a sibling to the left. |
536 | uint sibPos = parent->children[indexInParent - 1]; |
537 | Node& sib = tree[sibPos]; |
538 | if (sib.isMostlyFull()) { |
539 | // Left sibling is more than half full. Steal one member. |
540 | rotateRight(sib, node, *parent, indexInParent - 1); |
541 | return node; |
542 | } else { |
543 | // Left sibling is half full, too. Merge. |
544 | KJ_ASSERT(sib.isHalfFull()); |
545 | merge(sib, sibPos, *parent->keys[indexInParent - 1], node); |
546 | parent->eraseAfter(indexInParent - 1); |
547 | free(pos); |
548 | if (fixup == &parent->keys[indexInParent]) --fixup; |
549 | |
550 | if (parent->keys[0] == nullptr) { |
551 | // Oh hah, the parent has no keys left. It must be the root. We can eliminate it. |
552 | KJ_DASSERT(parent == &tree->parent); |
553 | compilerBarrier(); // don't reorder any writes to parent below here |
554 | move(tree[0], 0, sib); |
555 | free(sibPos); |
556 | --height; |
557 | return tree[0]; |
558 | } else { |
559 | return sib; |
560 | } |
561 | } |
562 | } else if (indexInParent < Parent::NKEYS && parent->keys[indexInParent] != nullptr) { |
563 | // There's a sibling to the right. |
564 | uint sibPos = parent->children[indexInParent + 1]; |
565 | Node& sib = tree[sibPos]; |
566 | if (sib.isMostlyFull()) { |
567 | // Right sibling is more than half full. Steal one member. |
568 | rotateLeft(node, sib, *parent, indexInParent, fixup); |
569 | return node; |
570 | } else { |
571 | // Right sibling is half full, too. Merge. |
572 | KJ_ASSERT(sib.isHalfFull()); |
573 | merge(node, pos, *parent->keys[indexInParent], sib); |
574 | parent->eraseAfter(indexInParent); |
575 | free(sibPos); |
576 | if (fixup == &parent->keys[indexInParent]) fixup = nullptr; |
577 | |
578 | if (parent->keys[0] == nullptr) { |
579 | // Oh hah, the parent has no keys left. It must be the root. We can eliminate it. |
580 | KJ_DASSERT(parent == &tree->parent); |
581 | compilerBarrier(); // don't reorder any writes to parent below here |
582 | move(tree[0], 0, node); |
583 | free(pos); |
584 | --height; |
585 | return tree[0]; |
586 | } else { |
587 | return node; |
588 | } |
589 | } |
590 | } else { |
591 | KJ_FAIL_ASSERT("inconsistent b-tree" ); |
592 | } |
593 | } |
594 | |
595 | return node; |
596 | } |
597 | |
598 | void BTreeImpl::renumber(uint oldRow, uint newRow, const SearchKey& searchKey) { |
599 | // Renumber the given row from oldRow to newRow. predicate() returns true for oldRow and all |
600 | // rows after it. (It will not be called on newRow.) |
601 | |
602 | uint pos = 0; |
603 | |
604 | for (auto i KJ_UNUSED: zeroTo(height)) { |
605 | auto& node = tree[pos].parent; |
606 | uint indexInParent = searchKey.search(node); |
607 | pos = node.children[indexInParent]; |
608 | if (node.keys[indexInParent] == oldRow) { |
609 | node.keys[indexInParent] = newRow; |
610 | } |
611 | KJ_DASSERT(pos != 0); |
612 | } |
613 | |
614 | auto& leaf = tree[pos].leaf; |
615 | uint r = searchKey.search(leaf); |
616 | if (leaf.rows[r] == oldRow) { |
617 | leaf.rows[r] = newRow; |
618 | } else { |
619 | logInconsistency(); |
620 | } |
621 | } |
622 | |
623 | uint BTreeImpl::split(Parent& dst, uint dstPos, Parent& src, uint srcPos) { |
624 | constexpr size_t mid = Parent::NKEYS / 2; |
625 | uint pivot = *src.keys[mid]; |
626 | acopy(dst.keys, src.keys + mid + 1, Parent::NKEYS - mid - 1); |
627 | azero(src.keys + mid, Parent::NKEYS - mid); |
628 | acopy(dst.children, src.children + mid + 1, Parent::NCHILDREN - mid - 1); |
629 | azero(src.children + mid + 1, Parent::NCHILDREN - mid - 1); |
630 | return pivot; |
631 | } |
632 | |
633 | uint BTreeImpl::split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos) { |
634 | constexpr size_t mid = Leaf::NROWS / 2; |
635 | uint pivot = *src.rows[mid - 1]; |
636 | acopy(dst.rows, src.rows + mid, Leaf::NROWS - mid); |
637 | azero(src.rows + mid, Leaf::NROWS - mid); |
638 | |
639 | if (src.next == 0) { |
640 | endLeaf = dstPos; |
641 | } else { |
642 | tree[src.next].leaf.prev = dstPos; |
643 | } |
644 | dst.next = src.next; |
645 | dst.prev = srcPos; |
646 | src.next = dstPos; |
647 | |
648 | return pivot; |
649 | } |
650 | |
651 | void BTreeImpl::merge(Parent& dst, uint dstPos, uint pivot, Parent& src) { |
652 | // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants |
653 | // guarantee that the node can't be more than half-empty, or we would have merged it sooner. |
654 | // (The root can be more than half-empty, but it is never merged with anything.) |
655 | KJ_DASSERT(src.isHalfFull()); |
656 | KJ_DASSERT(dst.isHalfFull()); |
657 | |
658 | constexpr size_t mid = Parent::NKEYS/2; |
659 | dst.keys[mid] = pivot; |
660 | acopy(dst.keys + mid + 1, src.keys, mid); |
661 | acopy(dst.children + mid + 1, src.children, mid + 1); |
662 | } |
663 | |
664 | void BTreeImpl::merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src) { |
665 | // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants |
666 | // guarantee that the node can't be more than half-empty, or we would have merged it sooner. |
667 | // (The root can be more than half-empty, but it is never merged with anything.) |
668 | KJ_DASSERT(src.isHalfFull()); |
669 | KJ_DASSERT(dst.isHalfFull()); |
670 | |
671 | constexpr size_t mid = Leaf::NROWS/2; |
672 | dst.rows[mid] = pivot; |
673 | acopy(dst.rows + mid, src.rows, mid); |
674 | |
675 | dst.next = src.next; |
676 | if (dst.next == 0) { |
677 | endLeaf = dstPos; |
678 | } else { |
679 | tree[dst.next].leaf.prev = dstPos; |
680 | } |
681 | } |
682 | |
683 | void BTreeImpl::move(Parent& dst, uint dstPos, Parent& src) { |
684 | dst = src; |
685 | } |
686 | |
687 | void BTreeImpl::move(Leaf& dst, uint dstPos, Leaf& src) { |
688 | dst = src; |
689 | if (src.next == 0) { |
690 | endLeaf = dstPos; |
691 | } else { |
692 | tree[src.next].leaf.prev = dstPos; |
693 | } |
694 | if (src.prev == 0) { |
695 | beginLeaf = dstPos; |
696 | } else { |
697 | tree[src.prev].leaf.next = dstPos; |
698 | } |
699 | } |
700 | |
701 | void BTreeImpl::rotateLeft( |
702 | Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) { |
703 | // Steal one item from the right node and move it to the left node. |
704 | |
705 | // Like merge(), this is only called on an exactly-half-empty node. |
706 | KJ_DASSERT(left.isHalfFull()); |
707 | KJ_DASSERT(right.isMostlyFull()); |
708 | |
709 | constexpr size_t mid = Parent::NKEYS/2; |
710 | left.keys[mid] = parent.keys[indexInParent]; |
711 | if (fixup == &parent.keys[indexInParent]) fixup = &left.keys[mid]; |
712 | parent.keys[indexInParent] = right.keys[0]; |
713 | left.children[mid + 1] = right.children[0]; |
714 | amove(right.keys, right.keys + 1, Parent::NKEYS - 1); |
715 | right.keys[Parent::NKEYS - 1] = nullptr; |
716 | amove(right.children, right.children + 1, Parent::NCHILDREN - 1); |
717 | right.children[Parent::NCHILDREN - 1] = 0; |
718 | } |
719 | |
720 | void BTreeImpl::rotateLeft( |
721 | Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) { |
722 | // Steal one item from the right node and move it to the left node. |
723 | |
724 | // Like merge(), this is only called on an exactly-half-empty node. |
725 | KJ_DASSERT(left.isHalfFull()); |
726 | KJ_DASSERT(right.isMostlyFull()); |
727 | |
728 | constexpr size_t mid = Leaf::NROWS/2; |
729 | parent.keys[indexInParent] = left.rows[mid] = right.rows[0]; |
730 | if (fixup == &parent.keys[indexInParent]) fixup = nullptr; |
731 | amove(right.rows, right.rows + 1, Leaf::NROWS - 1); |
732 | right.rows[Leaf::NROWS - 1] = nullptr; |
733 | } |
734 | |
735 | void BTreeImpl::rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent) { |
736 | // Steal one item from the left node and move it to the right node. |
737 | |
738 | // Like merge(), this is only called on an exactly-half-empty node. |
739 | KJ_DASSERT(right.isHalfFull()); |
740 | KJ_DASSERT(left.isMostlyFull()); |
741 | |
742 | constexpr size_t mid = Parent::NKEYS/2; |
743 | amove(right.keys + 1, right.keys, mid); |
744 | amove(right.children + 1, right.children, mid + 1); |
745 | |
746 | uint back = left.keyCount() - 1; |
747 | |
748 | right.keys[0] = parent.keys[indexInParent]; |
749 | parent.keys[indexInParent] = left.keys[back]; |
750 | right.children[0] = left.children[back + 1]; |
751 | left.keys[back] = nullptr; |
752 | left.children[back + 1] = 0; |
753 | } |
754 | |
755 | void BTreeImpl::rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent) { |
756 | // Steal one item from the left node and move it to the right node. |
757 | |
758 | // Like mergeFrom(), this is only called on an exactly-half-empty node. |
759 | KJ_DASSERT(right.isHalfFull()); |
760 | KJ_DASSERT(left.isMostlyFull()); |
761 | |
762 | constexpr size_t mid = Leaf::NROWS/2; |
763 | amove(right.rows + 1, right.rows, mid); |
764 | |
765 | uint back = left.size() - 1; |
766 | |
767 | right.rows[0] = left.rows[back]; |
768 | parent.keys[indexInParent] = left.rows[back - 1]; |
769 | left.rows[back] = nullptr; |
770 | } |
771 | |
772 | void BTreeImpl::Parent::initRoot(uint key, uint leftChild, uint rightChild) { |
773 | // HACK: This is typically called on the root node immediately after copying its contents away, |
774 | // but the pointer used to copy it away may be a different pointer pointing to a different |
775 | // union member which the compiler may not recgonize as aliasing with this object. Just to |
776 | // be extra-safe, insert a compiler barrier. |
777 | compilerBarrier(); |
778 | |
779 | keys[0] = key; |
780 | children[0] = leftChild; |
781 | children[1] = rightChild; |
782 | azero(keys + 1, Parent::NKEYS - 1); |
783 | azero(children + 2, Parent::NCHILDREN - 2); |
784 | } |
785 | |
786 | void BTreeImpl::Parent::insertAfter(uint i, uint splitKey, uint child) { |
787 | KJ_IREQUIRE(children[Parent::NCHILDREN - 1] == 0); // check not full |
788 | |
789 | amove(keys + i + 1, keys + i, Parent::NKEYS - (i + 1)); |
790 | keys[i] = splitKey; |
791 | |
792 | amove(children + i + 2, children + i + 1, Parent::NCHILDREN - (i + 2)); |
793 | children[i + 1] = child; |
794 | } |
795 | |
796 | void BTreeImpl::Parent::eraseAfter(uint i) { |
797 | amove(keys + i, keys + i + 1, Parent::NKEYS - (i + 1)); |
798 | keys[Parent::NKEYS - 1] = nullptr; |
799 | amove(children + i + 1, children + i + 2, Parent::NCHILDREN - (i + 2)); |
800 | children[Parent::NCHILDREN - 1] = 0; |
801 | } |
802 | |
803 | } // namespace _ |
804 | |
805 | // ======================================================================================= |
806 | // Insertion order |
807 | |
808 | const InsertionOrderIndex::Link InsertionOrderIndex::EMPTY_LINK = { 0, 0 }; |
809 | |
810 | InsertionOrderIndex::InsertionOrderIndex(): capacity(0), links(const_cast<Link*>(&EMPTY_LINK)) {} |
811 | InsertionOrderIndex::~InsertionOrderIndex() noexcept(false) { |
812 | if (links != &EMPTY_LINK) delete[] links; |
813 | } |
814 | |
815 | void InsertionOrderIndex::reserve(size_t size) { |
816 | KJ_ASSERT(size < (1u << 31), "Table too big for InsertionOrderIndex" ); |
817 | |
818 | if (size > capacity) { |
819 | // Need to grow. |
820 | // Note that `size` and `capacity` do not include the special link[0]. |
821 | |
822 | // Round up to the next power of 2. |
823 | size_t allocation = 1u << (_::lg(size) + 1); |
824 | KJ_DASSERT(allocation > size); |
825 | KJ_DASSERT(allocation <= size * 2); |
826 | |
827 | // Round first allocation up to 8. |
828 | allocation = kj::max(allocation, 8); |
829 | |
830 | Link* newLinks = new Link[allocation]; |
831 | #ifdef KJ_DEBUG |
832 | // To catch bugs, fill unused links with 0xff. |
833 | memset(newLinks, 0xff, allocation * sizeof(Link)); |
834 | #endif |
835 | _::acopy(newLinks, links, capacity + 1); |
836 | if (links != &EMPTY_LINK) delete[] links; |
837 | links = newLinks; |
838 | capacity = allocation - 1; |
839 | } |
840 | } |
841 | |
842 | void InsertionOrderIndex::clear() { |
843 | links[0] = Link { 0, 0 }; |
844 | |
845 | #ifdef KJ_DEBUG |
846 | // To catch bugs, fill unused links with 0xff. |
847 | memset(links + 1, 0xff, capacity * sizeof(Link)); |
848 | #endif |
849 | } |
850 | |
851 | kj::Maybe<size_t> InsertionOrderIndex::insertImpl(size_t pos) { |
852 | if (pos >= capacity) { |
853 | reserve(pos + 1); |
854 | } |
855 | |
856 | links[pos + 1].prev = links[0].prev; |
857 | links[pos + 1].next = 0; |
858 | links[links[0].prev].next = pos + 1; |
859 | links[0].prev = pos + 1; |
860 | |
861 | return nullptr; |
862 | } |
863 | |
864 | void InsertionOrderIndex::eraseImpl(size_t pos) { |
865 | Link& link = links[pos + 1]; |
866 | links[link.next].prev = link.prev; |
867 | links[link.prev].next = link.next; |
868 | |
869 | #ifdef KJ_DEBUG |
870 | memset(&link, 0xff, sizeof(Link)); |
871 | #endif |
872 | } |
873 | |
874 | void InsertionOrderIndex::moveImpl(size_t oldPos, size_t newPos) { |
875 | Link& link = links[oldPos + 1]; |
876 | Link& newLink = links[newPos + 1]; |
877 | |
878 | newLink = link; |
879 | |
880 | KJ_DASSERT(links[link.next].prev == oldPos + 1); |
881 | KJ_DASSERT(links[link.prev].next == oldPos + 1); |
882 | links[link.next].prev = newPos + 1; |
883 | links[link.prev].next = newPos + 1; |
884 | |
885 | #ifdef KJ_DEBUG |
886 | memset(&link, 0xff, sizeof(Link)); |
887 | #endif |
888 | } |
889 | |
890 | } // namespace kj |
891 | |