table.c++ source code [ClickHouse/contrib/capnproto/c++/src/kj/table.c++]

1	// Copyright (c) 2018 Kenton Varda and contributors
2	// Licensed under the MIT License:
3	//
4	// Permission is hereby granted, free of charge, to any person obtaining a copy
5	// of this software and associated documentation files (the "Software"), to deal
6	// in the Software without restriction, including without limitation the rights
7	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8	// copies of the Software, and to permit persons to whom the Software is
9	// furnished to do so, subject to the following conditions:
10	//
11	// The above copyright notice and this permission notice shall be included in
12	// all copies or substantial portions of the Software.
13	//
14	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20	// THE SOFTWARE.
21
22	#include "table.h"
23	#include "debug.h"
24	#include <stdlib.h>
25
26	namespace kj {
27	namespace _ {
28
29	static inline uint lg(uint value) {
30	// Compute floor(log2(value)).
31	//
32	// Undefined for value = 0.
33	#if _MSC_VER
34	unsigned long i;
35	auto found = _BitScanReverse(&i, value);
36	KJ_DASSERT(found); // !found means value = 0
37	return i;
38	#else
39	return sizeof(uint) * `8` - `1` - __builtin_clz(value);
40	#endif
41	}
42
43	void throwDuplicateTableRow() {
44	KJ_FAIL_REQUIRE("inserted row already exists in table");
45	}
46
47	void logHashTableInconsistency() {
48	KJ_LOG(ERROR,
49	"HashIndex detected hash table inconsistency. This can happen if you create a kj::Table "
50	"with a hash index and you modify the rows in the table post-indexing in a way that would "
51	"change their hash. This is a serious bug which will lead to undefined behavior."
52	"\nstack: ", kj::getStackTrace());
53	}
54
55	// List of primes where each element is roughly double the previous. Obtained
56	// from:
57	// http://planetmath.org/goodhashtableprimes
58	// Primes < 53 were added to ensure that small tables don't allocate excessive memory.
59	static const size_t PRIMES[] = {
60	`1`, // 2^ 0 = 1
61	`3`, // 2^ 1 = 2
62	`5`, // 2^ 2 = 4
63	`11`, // 2^ 3 = 8
64	`23`, // 2^ 4 = 16
65	`53`, // 2^ 5 = 32
66	`97`, // 2^ 6 = 64
67	`193`, // 2^ 7 = 128
68	`389`, // 2^ 8 = 256
69	`769`, // 2^ 9 = 512
70	`1543`, // 2^10 = 1024
71	`3079`, // 2^11 = 2048
72	`6151`, // 2^12 = 4096
73	`12289`, // 2^13 = 8192
74	`24593`, // 2^14 = 16384
75	`49157`, // 2^15 = 32768
76	`98317`, // 2^16 = 65536
77	`196613`, // 2^17 = 131072
78	`393241`, // 2^18 = 262144
79	`786433`, // 2^19 = 524288
80	`1572869`, // 2^20 = 1048576
81	`3145739`, // 2^21 = 2097152
82	`6291469`, // 2^22 = 4194304
83	`12582917`, // 2^23 = 8388608
84	`25165843`, // 2^24 = 16777216
85	`50331653`, // 2^25 = 33554432
86	`100663319`, // 2^26 = 67108864
87	`201326611`, // 2^27 = 134217728
88	`402653189`, // 2^28 = 268435456
89	`805306457`, // 2^29 = 536870912
90	`1610612741`, // 2^30 = 1073741824
91	};
92
93	uint chooseBucket(uint hash, uint count) {
94	// Integer modulus is really, really slow. It turns out that the compiler can generate much
95	// faster code if the denominator is a constant. Since we have a fixed set of possible
96	// denominators, a big old switch() statement is a win.
97
98	// TODO(perf): Consider using power-of-two bucket sizes. We can safely do so as long as we demand
99	// high-quality hash functions -- kj::hashCode() needs good diffusion even for integers, can't
100	// just be a cast. Also be sure to implement Robin Hood hashing to avoid extremely bad negative
101	// lookup time when elements have sequential hashes (otherwise, it could be necessary to scan
102	// the entire list to determine that an element isn't present).
103
104	switch (count) {
105	#define HANDLE(i) case i##u: return hash % i##u
106	HANDLE( `1`);
107	HANDLE( `3`);
108	HANDLE( `5`);
109	HANDLE( `11`);
110	HANDLE( `23`);
111	HANDLE( `53`);
112	HANDLE( `97`);
113	HANDLE( `193`);
114	HANDLE( `389`);
115	HANDLE( `769`);
116	HANDLE( `1543`);
117	HANDLE( `3079`);
118	HANDLE( `6151`);
119	HANDLE( `12289`);
120	HANDLE( `24593`);
121	HANDLE( `49157`);
122	HANDLE( `98317`);
123	HANDLE( `196613`);
124	HANDLE( `393241`);
125	HANDLE( `786433`);
126	HANDLE( `1572869`);
127	HANDLE( `3145739`);
128	HANDLE( `6291469`);
129	HANDLE( `12582917`);
130	HANDLE( `25165843`);
131	HANDLE( `50331653`);
132	HANDLE( `100663319`);
133	HANDLE( `201326611`);
134	HANDLE( `402653189`);
135	HANDLE( `805306457`);
136	HANDLE(`1610612741`);
137	#undef HANDLE
138	default: return hash % count;
139	}
140	}
141
142	size_t chooseHashTableSize(uint size) {
143	if (size == `0`) return `0`;
144
145	// Add 1 to compensate for the floor() above, then look up the best prime bucket size for that
146	// target size.
147	return PRIMES[lg(size) + `1`];
148	}
149
150	kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize) {
151	// Rehash the whole table.
152
153	KJ_REQUIRE(targetSize < (`1` << `30`), "hash table has reached maximum size");
154
155	size_t size = chooseHashTableSize(targetSize);
156
157	if (size < oldBuckets.size()) {
158	size = oldBuckets.size();
159	}
160
161	auto newBuckets = kj::heapArray<HashBucket>(size);
162	memset(newBuckets.begin(), `0`, sizeof(HashBucket) * size);
163
164	for (auto& oldBucket: oldBuckets) {
165	if (oldBucket.isOccupied()) {
166	for (uint i = oldBucket.hash % newBuckets.size();; i = probeHash(newBuckets, i)) {
167	auto& newBucket = newBuckets [i];
168	if (newBucket.isEmpty()) {
169	newBucket = oldBucket;
170	break;
171	}
172	}
173	}
174	}
175
176	return newBuckets;
177	}
178
179	// =======================================================================================
180	// BTree
181
182	#if _WIN32
183	#define aligned_free _aligned_free
184	#else
185	#define aligned_free ::free
186	#endif
187
188	BTreeImpl::BTreeImpl()
189	: tree(const_cast<NodeUnion*>(&EMPTY_NODE)),
190	treeCapacity(`1`),
191	height(`0`),
192	freelistHead(`1`),
193	freelistSize(`0`),
194	beginLeaf(`0`),
195	endLeaf(`0`) {}
196	BTreeImpl::~BTreeImpl() noexcept(false) {
197	if (tree != &EMPTY_NODE) {
198	aligned_free(tree);
199	}
200	}
201
202	const BTreeImpl::NodeUnion BTreeImpl::EMPTY_NODE = {{{`0`, {`0`}}}};
203
204	void BTreeImpl::verify(size_t size, FunctionParam<bool(uint, uint)> f) {
205	KJ_ASSERT(verifyNode(size, f, `0`, height, nullptr) == size);
206	}
207	size_t BTreeImpl::verifyNode(size_t size, FunctionParam<bool(uint, uint)>& f,
208	uint pos, uint height, MaybeUint maxRow) {
209	if (height > `0`) {
210	auto& parent = tree[pos].parent;
211
212	auto n = parent.keyCount();
213	size_t total = `0`;
214	for (auto i: kj::zeroTo(n)) {
215	KJ_ASSERT(*parent.keys[i] < size);
216	total += verifyNode(size, f, parent.children[i], height - `1`, parent.keys[i]);
217	KJ_ASSERT(i + `1` == n \|\| f(parent.keys[i], parent.keys[i + `1`]));
218	}
219	total += verifyNode(size, f, parent.children[n], height - `1`, maxRow);
220	KJ_ASSERT(maxRow == nullptr \|\| f(parent.keys[n-`1`], maxRow));
221	return total;
222	} else {
223	auto& leaf = tree[pos].leaf;
224	auto n = leaf.size();
225	for (auto i: kj::zeroTo(n)) {
226	KJ_ASSERT(*leaf.rows[i] < size);
227	if (i + `1` < n) {
228	KJ_ASSERT(f(leaf.rows[i], leaf.rows[i + `1`]));
229	} else {
230	KJ_ASSERT(maxRow == nullptr \|\| leaf.rows[n-`1`] == maxRow);
231	}
232	}
233	return n;
234	}
235	}
236
237	void BTreeImpl::logInconsistency() const {
238	KJ_LOG(ERROR,
239	"BTreeIndex detected tree state inconsistency. This can happen if you create a kj::Table "
240	"with a b-tree index and you modify the rows in the table post-indexing in a way that would "
241	"change their ordering. This is a serious bug which will lead to undefined behavior."
242	"\nstack: ", kj::getStackTrace());
243	}
244
245	void BTreeImpl::reserve(size_t size) {
246	KJ_REQUIRE(size < (`1u` << `31`), "b-tree has reached maximum size");
247
248	// Calculate the worst-case number of leaves to cover the size, given that a leaf is always at
249	// least half-full. (Note that it's correct for this calculation to round down, not up: The
250	// remainder will necessarily be distributed among the non-full leaves, rather than creating a
251	// new leaf, because if it went into a new leaf, that leaf would be less than half-full.)
252	uint leaves = size / (Leaf::NROWS / `2`);
253
254	// Calculate the worst-case number of parents to cover the leaves, given that a parent is always
255	// at least half-full. Since the parents form a tree with branching factor B, the size of the
256	// tree is N/B + N/B^2 + N/B^3 + N/B^4 + ... = N / (B - 1). Math.
257	constexpr uint branchingFactor = Parent::NCHILDREN / `2`;
258	uint parents = leaves / (branchingFactor - `1`);
259
260	// Height is log-base-branching-factor of leaves, plus 1 for the root node.
261	uint height = lg(leaves \| `1`) / lg(branchingFactor) + `1`;
262
263	size_t newSize = leaves +
264	parents + `1` + // + 1 for the root
265	height + `2`; // minimum freelist size needed by insert()
266
267	if (treeCapacity < newSize) {
268	growTree(newSize);
269	}
270	}
271
272	void BTreeImpl::clear() {
273	if (tree != &EMPTY_NODE) {
274	azero(tree, treeCapacity);
275	height = `0`;
276	freelistHead = `1`;
277	freelistSize = treeCapacity;
278	beginLeaf = `0`;
279	endLeaf = `0`;
280	}
281	}
282
283	void BTreeImpl::growTree(uint minCapacity) {
284	uint newCapacity = kj::max(kj::max(minCapacity, treeCapacity * `2`), `4`);
285	freelistSize += newCapacity - treeCapacity;
286
287	// Allocate some aligned memory! In theory this should be as simple as calling the C11 standard
288	// aligned_alloc() function. Unfortunately, many platforms don't implement it. Luckily, there
289	// are usually alternatives.
290
291	#if __APPLE__ \|\| __BIONIC__
292	// OSX and Android lack aligned_alloc(), but have posix_memalign(). Fine.
293	void* allocPtr;
294	int error = posix_memalign(&allocPtr,
295	sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion));
296	if (error != `0`) {
297	KJ_FAIL_SYSCALL("posix_memalign", error);
298	}
299	NodeUnion* newTree = reinterpret_cast<NodeUnion*>(allocPtr);
300	#elif _WIN32
301	// Windows lacks aligned_alloc() but has its own _aligned_malloc() (which requires freeing using
302	// _aligned_free()).
303	// WATCH OUT: The argument order for _aligned_malloc() is opposite of aligned_alloc()!
304	NodeUnion* newTree = reinterpret_cast<NodeUnion*>(
305	_aligned_malloc(newCapacity * sizeof(BTreeImpl::NodeUnion), sizeof(BTreeImpl::NodeUnion)));
306	KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity);
307	#else
308	// Let's use the C11 standard.
309	NodeUnion* newTree = reinterpret_cast<NodeUnion*>(
310	aligned_alloc(sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion)));
311	KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity);
312	#endif
313
314	acopy(newTree, tree, treeCapacity);
315	azero(newTree + treeCapacity, newCapacity - treeCapacity);
316	if (tree != &EMPTY_NODE) aligned_free(tree);
317	tree = newTree;
318	treeCapacity = newCapacity;
319	}
320
321	BTreeImpl::Iterator BTreeImpl::search(const SearchKey& searchKey) const {
322	// Find the "first" row number (in sorted order) for which searchKey.isAfter(rowNumber) returns
323	// false.
324
325	uint pos = `0`;
326
327	for (auto i KJ_UNUSED: zeroTo(height)) {
328	auto& parent = tree[pos].parent;
329	pos = parent.children[searchKey.search(parent)];
330	}
331
332	auto& leaf = tree[pos].leaf;
333	return { tree, &leaf, searchKey.search(leaf) };
334	}
335
336	template <typename T>
337	struct BTreeImpl::AllocResult {
338	uint index;
339	T& node;
340	};
341
342	template <typename T>
343	inline BTreeImpl::AllocResult<T> BTreeImpl::alloc() {
344	// Allocate a new item from the freelist. Guaranteed to be zero'd except for the first member.
345	uint i = freelistHead;
346	NodeUnion* ptr = &tree[i];
347	freelistHead = i + `1` + ptr->freelist.nextOffset;
348	--freelistSize;
349	return { i, *ptr };
350	}
351
352	inline void BTreeImpl::free(uint pos) {
353	// Add the given node to the freelist.
354
355	// HACK: This is typically called on a node immediately after copying its contents away, but the
356	// pointer used to copy it away may be a different pointer pointing to a different union member
357	// which the compiler may not recgonize as aliasing with this object. Just to be extra-safe,
358	// insert a compiler barrier.
359	compilerBarrier();
360
361	auto& node = tree[pos];
362	node.freelist.nextOffset = freelistHead - pos - `1`;
363	azero(node.freelist.zero, kj::size(node.freelist.zero));
364	freelistHead = pos;
365	++freelistSize;
366	}
367
368	BTreeImpl::Iterator BTreeImpl::insert(const SearchKey& searchKey) {
369	// Like search() but ensures that there is room in the leaf node to insert a new row.
370
371	// If we split the root node it will generate two new nodes. If we split any other node in the
372	// path it will generate one new node. `height` doesn't count leaf nodes, but we can equivalently
373	// think of it as not counting the root node, so in the worst case we may allocate height + 2
374	// new nodes.
375	//
376	// (Also note that if the tree is currently empty, then `tree` points to a dummy root node in
377	// read-only memory. We definitely need to allocate a real tree node array in this case, and
378	// we'll start out allocating space for four nodes, which will be all we need up to 28 rows.)
379	if (freelistSize < height + `2`) {
380	if (height > `0` && !tree[`0`].parent.isFull() && freelistSize >= height) {
381	// Slight optimization: The root node is not full, so we're definitely not going to split it.
382	// That means that the maximum allocations we might do is equal to `height`, not
383	// `height + 2`, and we have that much space, so no need to grow yet.
384	//
385	// This optimization is particularly important for small trees, e.g. when treeCapacity is 4
386	// and the tree so far consists of a root and two children, we definitely don't need to grow
387	// the tree yet.
388	} else {
389	growTree();
390
391	if (freelistHead == `0`) {
392	// We have no root yet. Allocate one.
393	KJ_ASSERT(alloc<Parent>().index == `0`);
394	}
395	}
396	}
397
398	uint pos = `0`;
399
400	// Track grandparent node and child index within grandparent.
401	Parent* parent = nullptr;
402	uint indexInParent = `0`;
403
404	for (auto i KJ_UNUSED: zeroTo(height)) {
405	Parent& node = insertHelper(searchKey, tree[pos].parent, parent, indexInParent, pos);
406
407	parent = &node;
408	indexInParent = searchKey.search(node);
409	pos = node.children[indexInParent];
410	}
411
412	Leaf& leaf = insertHelper(searchKey, tree[pos].leaf, parent, indexInParent, pos);
413
414	// Fun fact: Unlike erase(), there's no need to climb back up the tree modifying keys, because
415	// either the newly-inserted node will not be the last in the leaf (and thus parent keys aren't
416	// modified), or the leaf is the last leaf in the tree (and thus there's no parent key to
417	// modify).
418
419	return { tree, &leaf, searchKey.search(leaf) };
420	}
421
422	template <typename Node>
423	Node& BTreeImpl::insertHelper(const SearchKey& searchKey,
424	Node& node, Parent* parent, uint indexInParent, uint pos) {
425	if (node.isFull()) {
426	// This node is full. Need to split.
427	if (parent == nullptr) {
428	// This is the root node. We need to split into two nodes and create a new root.
429	auto n1 = alloc<Node>();
430	auto n2 = alloc<Node>();
431
432	uint pivot = split(n2.node, n2.index, node, pos);
433	move(n1.node, n1.index, node);
434
435	// Rewrite root to have the two children.
436	tree[`0`].parent.initRoot(pivot, n1.index, n2.index);
437
438	// Increased height.
439	++height;
440
441	// Decide which new branch has our search key.
442	if (searchKey.isAfter(pivot)) {
443	// the right one
444	return n2.node;
445	} else {
446	// the left one
447	return n1.node;
448	}
449	} else {
450	// This is a non-root parent node. We need to split it into two and insert the new node
451	// into the grandparent.
452	auto n = alloc<Node>();
453	uint pivot = split(n.node, n.index, node, pos);
454
455	// Insert new child into grandparent.
456	parent->insertAfter(indexInParent, pivot, n.index);
457
458	// Decide which new branch has our search key.
459	if (searchKey.isAfter(pivot)) {
460	// the new one, which is right of the original
461	return n.node;
462	} else {
463	// the original one, which is left of the new one
464	return node;
465	}
466	}
467	} else {
468	// No split needed.
469	return node;
470	}
471	}
472
473	void BTreeImpl::erase(uint row, const SearchKey& searchKey) {
474	// Erase the given row number from the tree. predicate() returns true for the given row and all
475	// rows after it.
476
477	uint pos = `0`;
478
479	// Track grandparent node and child index within grandparent.
480	Parent* parent = nullptr;
481	uint indexInParent = `0`;
482
483	MaybeUint* fixup = nullptr;
484
485	for (auto i KJ_UNUSED: zeroTo(height)) {
486	Parent& node = eraseHelper(tree[pos].parent, parent, indexInParent, pos, fixup);
487
488	parent = &node;
489	indexInParent = searchKey.search(node);
490	pos = node.children[indexInParent];
491
492	if (indexInParent < kj::size(node.keys) && node.keys[indexInParent] == row) {
493	// Oh look, the row is a key in this node! We'll need to come back and fix this up later.
494	// Note that any particular row can only appear as one* key value anywhere in the tree, so*
495	// we only need one fixup pointer, which is nice.
496	MaybeUint* newFixup = &node.keys[indexInParent];
497	if (fixup == newFixup) {
498	// The fixup pointer was already set while processing a parent node, and then a merge or
499	// rotate caused it to be moved, but the fixup pointer was updated... so it's already set
500	// to point at the slot we wanted it to point to, so nothing to see here.
501	} else {
502	KJ_DASSERT(fixup == nullptr);
503	fixup = newFixup;
504	}
505	}
506	}
507
508	Leaf& leaf = eraseHelper(tree[pos].leaf, parent, indexInParent, pos, fixup);
509
510	uint r = searchKey.search(leaf);
511	if (leaf.rows[r] == row) {
512	leaf.erase(r);
513
514	if (fixup != nullptr) {
515	// There's a key in a parent node that needs fixup. This is only possible if the removed
516	// node is the last in its leaf.
517	KJ_DASSERT(leaf.rows[r] == nullptr);
518	KJ_DASSERT(r > `0`); // non-root nodes must be at least half full so this can't be item 0
519	KJ_DASSERT(*fixup == row);
520	*fixup = leaf.rows[r - `1`];
521	}
522	} else {
523	logInconsistency();
524	}
525	}
526
527	template <typename Node>
528	Node& BTreeImpl::eraseHelper(
529	Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup) {
530	if (parent != nullptr && !node.isMostlyFull()) {
531	// This is not the root, but it's only half-full. Rebalance.
532	KJ_DASSERT(node.isHalfFull());
533
534	if (indexInParent > `0`) {
535	// There's a sibling to the left.
536	uint sibPos = parent->children[indexInParent - `1`];
537	Node& sib = tree[sibPos];
538	if (sib.isMostlyFull()) {
539	// Left sibling is more than half full. Steal one member.
540	rotateRight(sib, node, *parent, indexInParent - `1`);
541	return node;
542	} else {
543	// Left sibling is half full, too. Merge.
544	KJ_ASSERT(sib.isHalfFull());
545	merge(sib, sibPos, *parent->keys[indexInParent - `1`], node);
546	parent->eraseAfter(indexInParent - `1`);
547	free(pos);
548	if (fixup == &parent->keys[indexInParent]) --fixup;
549
550	if (parent->keys[`0`] == nullptr) {
551	// Oh hah, the parent has no keys left. It must be the root. We can eliminate it.
552	KJ_DASSERT(parent == &tree->parent);
553	compilerBarrier(); // don't reorder any writes to parent below here
554	move(tree[`0`], `0`, sib);
555	free(sibPos);
556	--height;
557	return tree[`0`];
558	} else {
559	return sib;
560	}
561	}
562	} else if (indexInParent < Parent::NKEYS && parent->keys[indexInParent] != nullptr) {
563	// There's a sibling to the right.
564	uint sibPos = parent->children[indexInParent + `1`];
565	Node& sib = tree[sibPos];
566	if (sib.isMostlyFull()) {
567	// Right sibling is more than half full. Steal one member.
568	rotateLeft(node, sib, *parent, indexInParent, fixup);
569	return node;
570	} else {
571	// Right sibling is half full, too. Merge.
572	KJ_ASSERT(sib.isHalfFull());
573	merge(node, pos, *parent->keys[indexInParent], sib);
574	parent->eraseAfter(indexInParent);
575	free(sibPos);
576	if (fixup == &parent->keys[indexInParent]) fixup = nullptr;
577
578	if (parent->keys[`0`] == nullptr) {
579	// Oh hah, the parent has no keys left. It must be the root. We can eliminate it.
580	KJ_DASSERT(parent == &tree->parent);
581	compilerBarrier(); // don't reorder any writes to parent below here
582	move(tree[`0`], `0`, node);
583	free(pos);
584	--height;
585	return tree[`0`];
586	} else {
587	return node;
588	}
589	}
590	} else {
591	KJ_FAIL_ASSERT("inconsistent b-tree");
592	}
593	}
594
595	return node;
596	}
597
598	void BTreeImpl::renumber(uint oldRow, uint newRow, const SearchKey& searchKey) {
599	// Renumber the given row from oldRow to newRow. predicate() returns true for oldRow and all
600	// rows after it. (It will not be called on newRow.)
601
602	uint pos = `0`;
603
604	for (auto i KJ_UNUSED: zeroTo(height)) {
605	auto& node = tree[pos].parent;
606	uint indexInParent = searchKey.search(node);
607	pos = node.children[indexInParent];
608	if (node.keys[indexInParent] == oldRow) {
609	node.keys[indexInParent] = newRow;
610	}
611	KJ_DASSERT(pos != `0`);
612	}
613
614	auto& leaf = tree[pos].leaf;
615	uint r = searchKey.search(leaf);
616	if (leaf.rows[r] == oldRow) {
617	leaf.rows[r] = newRow;
618	} else {
619	logInconsistency();
620	}
621	}
622
623	uint BTreeImpl::split(Parent& dst, uint dstPos, Parent& src, uint srcPos) {
624	constexpr size_t mid = Parent::NKEYS / `2`;
625	uint pivot = *src.keys[mid];
626	acopy(dst.keys, src.keys + mid + `1`, Parent::NKEYS - mid - `1`);
627	azero(src.keys + mid, Parent::NKEYS - mid);
628	acopy(dst.children, src.children + mid + `1`, Parent::NCHILDREN - mid - `1`);
629	azero(src.children + mid + `1`, Parent::NCHILDREN - mid - `1`);
630	return pivot;
631	}
632
633	uint BTreeImpl::split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos) {
634	constexpr size_t mid = Leaf::NROWS / `2`;
635	uint pivot = *src.rows[mid - `1`];
636	acopy(dst.rows, src.rows + mid, Leaf::NROWS - mid);
637	azero(src.rows + mid, Leaf::NROWS - mid);
638
639	if (src.next == `0`) {
640	endLeaf = dstPos;
641	} else {
642	tree[src.next].leaf.prev = dstPos;
643	}
644	dst.next = src.next;
645	dst.prev = srcPos;
646	src.next = dstPos;
647
648	return pivot;
649	}
650
651	void BTreeImpl::merge(Parent& dst, uint dstPos, uint pivot, Parent& src) {
652	// merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants
653	// guarantee that the node can't be more than half-empty, or we would have merged it sooner.
654	// (The root can be more than half-empty, but it is never merged with anything.)
655	KJ_DASSERT(src.isHalfFull());
656	KJ_DASSERT(dst.isHalfFull());
657
658	constexpr size_t mid = Parent::NKEYS/`2`;
659	dst.keys[mid] = pivot;
660	acopy(dst.keys + mid + `1`, src.keys, mid);
661	acopy(dst.children + mid + `1`, src.children, mid + `1`);
662	}
663
664	void BTreeImpl::merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src) {
665	// merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants
666	// guarantee that the node can't be more than half-empty, or we would have merged it sooner.
667	// (The root can be more than half-empty, but it is never merged with anything.)
668	KJ_DASSERT(src.isHalfFull());
669	KJ_DASSERT(dst.isHalfFull());
670
671	constexpr size_t mid = Leaf::NROWS/`2`;
672	dst.rows[mid] = pivot;
673	acopy(dst.rows + mid, src.rows, mid);
674
675	dst.next = src.next;
676	if (dst.next == `0`) {
677	endLeaf = dstPos;
678	} else {
679	tree[dst.next].leaf.prev = dstPos;
680	}
681	}
682
683	void BTreeImpl::move(Parent& dst, uint dstPos, Parent& src) {
684	dst = src;
685	}
686
687	void BTreeImpl::move(Leaf& dst, uint dstPos, Leaf& src) {
688	dst = src;
689	if (src.next == `0`) {
690	endLeaf = dstPos;
691	} else {
692	tree[src.next].leaf.prev = dstPos;
693	}
694	if (src.prev == `0`) {
695	beginLeaf = dstPos;
696	} else {
697	tree[src.prev].leaf.next = dstPos;
698	}
699	}
700
701	void BTreeImpl::rotateLeft(
702	Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) {
703	// Steal one item from the right node and move it to the left node.
704
705	// Like merge(), this is only called on an exactly-half-empty node.
706	KJ_DASSERT(left.isHalfFull());
707	KJ_DASSERT(right.isMostlyFull());
708
709	constexpr size_t mid = Parent::NKEYS/`2`;
710	left.keys[mid] = parent.keys[indexInParent];
711	if (fixup == &parent.keys[indexInParent]) fixup = &left.keys[mid];
712	parent.keys[indexInParent] = right.keys[`0`];
713	left.children[mid + `1`] = right.children[`0`];
714	amove(right.keys, right.keys + `1`, Parent::NKEYS - `1`);
715	right.keys[Parent::NKEYS - `1`] = nullptr;
716	amove(right.children, right.children + `1`, Parent::NCHILDREN - `1`);
717	right.children[Parent::NCHILDREN - `1`] = `0`;
718	}
719
720	void BTreeImpl::rotateLeft(
721	Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) {
722	// Steal one item from the right node and move it to the left node.
723
724	// Like merge(), this is only called on an exactly-half-empty node.
725	KJ_DASSERT(left.isHalfFull());
726	KJ_DASSERT(right.isMostlyFull());
727
728	constexpr size_t mid = Leaf::NROWS/`2`;
729	parent.keys[indexInParent] = left.rows[mid] = right.rows[`0`];
730	if (fixup == &parent.keys[indexInParent]) fixup = nullptr;
731	amove(right.rows, right.rows + `1`, Leaf::NROWS - `1`);
732	right.rows[Leaf::NROWS - `1`] = nullptr;
733	}
734
735	void BTreeImpl::rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent) {
736	// Steal one item from the left node and move it to the right node.
737
738	// Like merge(), this is only called on an exactly-half-empty node.
739	KJ_DASSERT(right.isHalfFull());
740	KJ_DASSERT(left.isMostlyFull());
741
742	constexpr size_t mid = Parent::NKEYS/`2`;
743	amove(right.keys + `1`, right.keys, mid);
744	amove(right.children + `1`, right.children, mid + `1`);
745
746	uint back = left.keyCount() - `1`;
747
748	right.keys[`0`] = parent.keys[indexInParent];
749	parent.keys[indexInParent] = left.keys[back];
750	right.children[`0`] = left.children[back + `1`];
751	left.keys[back] = nullptr;
752	left.children[back + `1`] = `0`;
753	}
754
755	void BTreeImpl::rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent) {
756	// Steal one item from the left node and move it to the right node.
757
758	// Like mergeFrom(), this is only called on an exactly-half-empty node.
759	KJ_DASSERT(right.isHalfFull());
760	KJ_DASSERT(left.isMostlyFull());
761
762	constexpr size_t mid = Leaf::NROWS/`2`;
763	amove(right.rows + `1`, right.rows, mid);
764
765	uint back = left.size() - `1`;
766
767	right.rows[`0`] = left.rows[back];
768	parent.keys[indexInParent] = left.rows[back - `1`];
769	left.rows[back] = nullptr;
770	}
771
772	void BTreeImpl::Parent::initRoot(uint key, uint leftChild, uint rightChild) {
773	// HACK: This is typically called on the root node immediately after copying its contents away,
774	// but the pointer used to copy it away may be a different pointer pointing to a different
775	// union member which the compiler may not recgonize as aliasing with this object. Just to
776	// be extra-safe, insert a compiler barrier.
777	compilerBarrier();
778
779	keys[`0`] = key;
780	children[`0`] = leftChild;
781	children[`1`] = rightChild;
782	azero(keys + `1`, Parent::NKEYS - `1`);
783	azero(children + `2`, Parent::NCHILDREN - `2`);
784	}
785
786	void BTreeImpl::Parent::insertAfter(uint i, uint splitKey, uint child) {
787	KJ_IREQUIRE(children[Parent::NCHILDREN - `1`] == `0`); // check not full
788
789	amove(keys + i + `1`, keys + i, Parent::NKEYS - (i + `1`));
790	keys[i] = splitKey;
791
792	amove(children + i + `2`, children + i + `1`, Parent::NCHILDREN - (i + `2`));
793	children[i + `1`] = child;
794	}
795
796	void BTreeImpl::Parent::eraseAfter(uint i) {
797	amove(keys + i, keys + i + `1`, Parent::NKEYS - (i + `1`));
798	keys[Parent::NKEYS - `1`] = nullptr;
799	amove(children + i + `1`, children + i + `2`, Parent::NCHILDREN - (i + `2`));
800	children[Parent::NCHILDREN - `1`] = `0`;
801	}
802
803	} // namespace _
804
805	// =======================================================================================
806	// Insertion order
807
808	const InsertionOrderIndex::Link InsertionOrderIndex::EMPTY_LINK = { `0`, `0` };
809
810	InsertionOrderIndex::InsertionOrderIndex(): capacity(`0`), links(const_cast<Link*>(&EMPTY_LINK)) {}
811	InsertionOrderIndex::~InsertionOrderIndex() noexcept(false) {
812	if (links != &EMPTY_LINK) delete[] links;
813	}
814
815	void InsertionOrderIndex::reserve(size_t size) {
816	KJ_ASSERT(size < (`1u` << `31`), "Table too big for InsertionOrderIndex");
817
818	if (size > capacity) {
819	// Need to grow.
820	// Note that `size` and `capacity` do not include the special link[0].
821
822	// Round up to the next power of 2.
823	size_t allocation = `1u` << (_::lg(size) + `1`);
824	KJ_DASSERT(allocation > size);
825	KJ_DASSERT(allocation <= size * `2`);
826
827	// Round first allocation up to 8.
828	allocation = kj::max(allocation, `8`);
829
830	Link* newLinks = new Link[allocation];
831	#ifdef KJ_DEBUG
832	// To catch bugs, fill unused links with 0xff.
833	memset(newLinks, `0xff`, allocation * sizeof(Link));
834	#endif
835	_::acopy(newLinks, links, capacity + `1`);
836	if (links != &EMPTY_LINK) delete[] links;
837	links = newLinks;
838	capacity = allocation - `1`;
839	}
840	}
841
842	void InsertionOrderIndex::clear() {
843	links[`0`] = Link { `0`, `0` };
844
845	#ifdef KJ_DEBUG
846	// To catch bugs, fill unused links with 0xff.
847	memset(links + `1`, `0xff`, capacity * sizeof(Link));
848	#endif
849	}
850
851	kj::Maybe<size_t> InsertionOrderIndex::insertImpl(size_t pos) {
852	if (pos >= capacity) {
853	reserve(pos + `1`);
854	}
855
856	links[pos + `1`].prev = links[`0`].prev;
857	links[pos + `1`].next = `0`;
858	links[links[`0`].prev].next = pos + `1`;
859	links[`0`].prev = pos + `1`;
860
861	return nullptr;
862	}
863
864	void InsertionOrderIndex::eraseImpl(size_t pos) {
865	Link& link = links[pos + `1`];
866	links[link.next].prev = link.prev;
867	links[link.prev].next = link.next;
868
869	#ifdef KJ_DEBUG
870	memset(&link, `0xff`, sizeof(Link));
871	#endif
872	}
873
874	void InsertionOrderIndex::moveImpl(size_t oldPos, size_t newPos) {
875	Link& link = links[oldPos + `1`];
876	Link& newLink = links[newPos + `1`];
877
878	newLink = link;
879
880	KJ_DASSERT(links[link.next].prev == oldPos + `1`);
881	KJ_DASSERT(links[link.prev].next == oldPos + `1`);
882	links[link.next].prev = newPos + `1`;
883	links[link.prev].next = newPos + `1`;
884
885	#ifdef KJ_DEBUG
886	memset(&link, `0xff`, sizeof(Link));
887	#endif
888	}
889
890	} // namespace kj
891

Browse the source code of ClickHouse/contrib/capnproto/c++/src/kj/table.c++