1// Copyright (c) 2018 Kenton Varda and contributors
2// Licensed under the MIT License:
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20// THE SOFTWARE.
21
22#pragma once
23
24#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
25#pragma GCC system_header
26#endif
27
28#include "common.h"
29#include "tuple.h"
30#include "vector.h"
31#include "function.h"
32
33#if _MSC_VER
34// Need _ReadWriteBarrier
35#if _MSC_VER < 1910
36#include <intrin.h>
37#else
38#include <intrin0.h>
39#endif
40#endif
41
42namespace kj {
43
44namespace _ { // private
45
46template <typename Inner, typename Mapping>
47class MappedIterable;
48template <typename Row>
49class TableMapping;
50template <typename Row, typename Inner>
51using TableIterable = MappedIterable<Inner, TableMapping<Row>>;
52
53} // namespace _ (private)
54
55template <typename Row, typename... Indexes>
56class Table {
57 // A table with one or more indexes. This is the KJ alternative to map, set, unordered_map, and
58 // unordered_set.
59 //
60 // Unlike a traditional map, which explicitly stores key/value pairs, a Table simply stores
61 // "rows" of arbitrary type, and then lets the application specify how these should be indexed.
62 // Rows could be indexed on a specific struct field, or they could be indexed based on a computed
63 // property. An index could be hash-based or tree-based. Multiple indexes are supported, making
64 // it easy to construct a "bimap".
65 //
66 // The table has deterministic iteration order based on the sequence of insertions and deletions.
67 // In the case of only insertions, the iteration order is the order of insertion. If deletions
68 // occur, then the current last row is moved to occupy the deleted slot. This determinism is
69 // intended to be reliable for the purpose of testing, etc.
70 //
71 // Each index is a class that looks like:
72 //
73 // class Index {
74 // public:
75 // void reserve(size_t size);
76 // // Called when Table::reserve() is called.
77 //
78 // SearchParam& keyForRow(const Row& row) const;
79 // // Given a row, return a value appropriate to pass as SearchParams to the other functions.
80 //
81 // // In all function calls below, `SearchPrams` refers to whatever parameters the index
82 // // supports for looking up a row in the table.
83 //
84 // template <typename... SearchParams>
85 // kj::Maybe<size_t> insert(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...);
86 // // Called to indicate that we're about to insert a new row which will match the given
87 // // search parameters, and will be located at the given position. If this index disallows
88 // // duplicates and some other matching row already exists, then insert() returns the index
89 // // of that row without modifying the index. If the row does not exist, then insert()
90 // // updates the index to note that the new row is located at `pos`. Note that `table[pos]`
91 // // may not be valid yet at the time of this call; the index must go on the search params
92 // // alone.
93 // //
94 // // Insert may throw an exception, in which case the table will roll back insertion.
95 //
96 // template <typename... SearchParams>
97 // void erase(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...);
98 // // Called to indicate that the index must remove references to row number `pos`. The
99 // // index must not attempt to access table[pos] directly -- in fact, `pos` may be equal to
100 // // `table.size()`, i.e., may be out-of-bounds (this happens when rolling back a failed
101 // // insertion). Instead, the index can use the search params to search for the row -- they
102 // // will either be the same as the params passed to insert(), or will be a single value of
103 // // type `Row&`.
104 // //
105 // // erase() called immediately after a successful insert() must not throw an exception, as
106 // // it may be called during unwind.
107 //
108 // template <typename... SearchParams>
109 // void move(kj::ArrayPtr<const Row> table, size_t oldPos, size_t newPos, SearchParams&&...);
110 // // Called when a row is about to be moved from `oldPos` to `newPos` in the table. The
111 // // index should update it to the new location. Neither `table[oldPos]` nor `table[newPos]`
112 // // is valid during the call -- use the search params to find the row. Before this call
113 // // `oldPos` is indexed and `newPos` is not -- after the call, the opposite is true.
114 // //
115 // // This should never throw; if it does the table may be corrupted.
116 //
117 // class Iterator; // Behaves like a C++ iterator over size_t values.
118 // class Iterable; // Has begin() and end() methods returning iterators.
119 //
120 // template <typename... SearchParams>
121 // Maybe<size_t> find(kj::ArrayPtr<const Row> table, SearchParams&&...) const;
122 // // Optional. Implements Table::find<Index>(...).
123 //
124 // template <typename... SearchParams>
125 // Iterable range(kj::ArrayPtr<const Row> table, SearchParams&&...) const;
126 // // Optional. Implements Table::range<Index>(...).
127 //
128 // Iterator begin() const;
129 // Iterator end() const;
130 // // Optional. Implements Table::ordered<Index>().
131 // };
132
133public:
134 Table();
135 Table(Indexes&&... indexes);
136
137 void reserve(size_t size);
138 // Pre-allocates space for a table of the given size. Normally a Table grows by re-allocating
139 // its backing array whenever more space is needed. Reserving in advance avoids redundantly
140 // re-allocating as the table grows.
141
142 size_t size() const;
143 size_t capacity() const;
144
145 void clear();
146
147 Row* begin();
148 Row* end();
149 const Row* begin() const;
150 const Row* end() const;
151
152 Row& insert(Row&& row);
153 Row& insert(const Row& row);
154 // Inserts a new row. Throws an exception if this would violate the uniqueness constraints of any
155 // of the indexes.
156
157 template <typename Collection>
158 void insertAll(Collection&& collection);
159 template <typename Collection>
160 void insertAll(Collection& collection);
161 // Given an iterable collection of Rows, inserts all of them into this table. If the input is
162 // an rvalue, the rows will be moved rather than copied.
163 //
164 // If an insertion throws (e.g. because it violates a uniqueness constraint of some index),
165 // subsequent insertions do not occur, but previous insertions remain inserted.
166
167 template <typename UpdateFunc>
168 Row& upsert(Row&& row, UpdateFunc&& update);
169 template <typename UpdateFunc>
170 Row& upsert(const Row& row, UpdateFunc&& update);
171 // Tries to insert a new row. However, if a duplicate already exists (according to some index),
172 // then update(Row& existingRow, Row&& newRow) is called to modify the existing row.
173
174 template <typename Index, typename... Params>
175 kj::Maybe<Row&> find(Params&&... params);
176 template <typename Index, typename... Params>
177 kj::Maybe<const Row&> find(Params&&... params) const;
178 // Using the given index, search for a matching row. What parameters are accepted depends on the
179 // index. Not all indexes support this method -- "multimap" indexes may support only range().
180
181 template <typename Index, typename... Params, typename Func>
182 Row& findOrCreate(Params&&... params, Func&& createFunc);
183 // Like find(), but if the row doesn't exist, call a function to create it. createFunc() must
184 // return `Row` or something that implicitly converts to `Row`.
185 //
186 // NOTE: C++ doesn't actually properly support inferring types of a parameter pack at the
187 // beginning of an argument list, but we define a hack to support it below. Don't worry about
188 // it.
189
190 template <typename Index, typename... Params>
191 auto range(Params&&... params);
192 template <typename Index, typename... Params>
193 auto range(Params&&... params) const;
194 // Using the given index, look up a range of values, returning an iterable. What parameters are
195 // accepted depends on the index. Not all indexes support this method (in particular, unique
196 // indexes normally don't).
197
198 template <typename Index>
199 _::TableIterable<Row, Index&> ordered();
200 template <typename Index>
201 _::TableIterable<const Row, const Index&> ordered() const;
202 // Returns an iterable over the whole table ordered using the given index. Not all indexes
203 // support this method.
204
205 template <typename Index, typename... Params>
206 bool eraseMatch(Params&&... params);
207 // Erase the row that would be matched by `find<Index>(params)`. Returns true if there was a
208 // match.
209
210 template <typename Index, typename... Params>
211 size_t eraseRange(Params&&... params);
212 // Erase the row that would be matched by `range<Index>(params)`. Returns the number of
213 // elements erased.
214
215 void erase(Row& row);
216 // Erase the given row.
217 //
218 // WARNING: This invalidates all iterators, so you can't iterate over rows and erase them this
219 // way. Use `eraseAll()` for that.
220
221 Row release(Row& row);
222 // Remove the given row from the table and return it in one operation.
223 //
224 // WARNING: This invalidates all iterators, so you can't iterate over rows and release them this
225 // way.
226
227 template <typename Predicate, typename = decltype(instance<Predicate>()(instance<Row&>()))>
228 size_t eraseAll(Predicate&& predicate);
229 // Erase all rows for which predicate(row) returns true. This scans over the entire table.
230
231 template <typename Collection, typename = decltype(instance<Collection>().begin()), bool = true>
232 size_t eraseAll(Collection&& collection);
233 // Erase all rows in the given iterable collection of rows. This carefully marks rows for
234 // deletion in a first pass then deletes them in a second.
235
236 template <size_t index = 0, typename... Params>
237 kj::Maybe<Row&> find(Params&&... params);
238 template <size_t index = 0, typename... Params>
239 kj::Maybe<const Row&> find(Params&&... params) const;
240 template <size_t index = 0, typename... Params, typename Func>
241 Row& findOrCreate(Params&&... params, Func&& createFunc);
242 template <size_t index = 0, typename... Params>
243 auto range(Params&&... params);
244 template <size_t index = 0, typename... Params>
245 auto range(Params&&... params) const;
246 template <size_t index = 0>
247 _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> ordered();
248 template <size_t index = 0>
249 _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> ordered() const;
250 template <size_t index = 0, typename... Params>
251 bool eraseMatch(Params&&... params);
252 template <size_t index = 0, typename... Params>
253 size_t eraseRange(Params&&... params);
254 // Methods which take an index type as a template parameter can also take an index number. This
255 // is useful particularly when you have multiple indexes of the same type but different runtime
256 // properties. Additionally, you can omit the template parameter altogether to use the first
257 // index.
258
259 template <size_t index = 0>
260 void verify();
261 // Checks the integrity of indexes, throwing an exception if there are any problems. This is
262 // intended to be called within the unit test for an index.
263
264 template <typename Index, typename First, typename... Rest>
265 Row& findOrCreate(First&& first, Rest&&... rest);
266 template <size_t index = 0, typename First, typename... Rest>
267 Row& findOrCreate(First&& first, Rest&&... rest);
268 // HACK: A parameter pack can only be inferred if it lives at the end of the argument list, so
269 // the findOrCreate() definitions from earlier won't actually work. These ones will, but we
270 // have to do some annoying things inside to regroup the arguments.
271
272private:
273 Vector<Row> rows;
274 Tuple<Indexes...> indexes;
275
276 template <size_t index = 0, bool final = (index >= sizeof...(Indexes))>
277 class Impl;
278 template <typename Func, typename... Params>
279 class FindOrCreateImpl;
280
281 template <typename ParamsTuple, typename... Params>
282 struct FindOrCreateHack;
283
284 void eraseImpl(size_t pos);
285 template <typename Collection>
286 size_t eraseAllImpl(Collection&& collection);
287};
288
289template <typename Callbacks>
290class HashIndex;
291// A Table index based on a hash table.
292//
293// This implementation:
294// * Is based on linear probing, not chaining. It is important to use a high-quality hash function.
295// Use the KJ hashing library if possible.
296// * Is limited to tables of 2^30 rows or less, mainly to allow for tighter packing with 32-bit
297// integers instead of 64-bit.
298// * Caches hash codes so that each table row need only be hashed once, and never checks equality
299// unless hash codes have already been determined to be equal.
300//
301// The `Callbacks` type defines how to compute hash codes and equality. It should be defined like:
302//
303// class Callbacks {
304// public:
305// // In this interface, `SearchParams...` means whatever parameters you want to support in
306// // a call to table.find(...). By overloading the calls to support various inputs, you can
307// // affect what table.find(...) accepts.
308//
309// SearchParam& keyForRow(const Row& row);
310// // Given a row of the table, return the SearchParams that might be passed to the other
311// // methods to match this row.
312//
313// bool matches(const Row&, SearchParams&&...) const;
314// // Returns true if the row on the left matches thes search params on the right.
315//
316// uint hashCode(SearchParams&&...) const;
317// // Computes the hash code of the given search params. Matching rows (as determined by
318// // matches()) must have the same hash code. Non-matching rows should have different hash
319// // codes, to the maximum extent possible. Non-matching rows with the same hash code hurt
320// // performance.
321// };
322//
323// If your `Callbacks` type has dynamic state, you may pass its constructor parameters as the
324// constructor parameters to `HashIndex`.
325
326template <typename Callbacks>
327class TreeIndex;
328// A Table index based on a B-tree.
329//
330// This allows sorted iteration over rows.
331//
332// The `Callbacks` type defines how to compare rows. It should be defined like:
333//
334// class Callbacks {
335// public:
336// // In this interface, `SearchParams...` means whatever parameters you want to support in
337// // a call to table.find(...). By overloading the calls to support various inputs, you can
338// // affect what table.find(...) accepts.
339//
340// SearchParam& keyForRow(const Row& row);
341// // Given a row of the table, return the SearchParams that might be passed to the other
342// // methods to match this row.
343//
344// bool isBefore(const Row&, SearchParams&&...) const;
345// // Returns true if the row on the left comes before the search params on the right.
346//
347// bool matches(const Row&, SearchParams&&...) const;
348// // Returns true if the row "matches" the search params.
349// };
350
351// =======================================================================================
352// inline implementation details
353
354namespace _ { // private
355
356KJ_NORETURN(void throwDuplicateTableRow());
357
358template <typename Dst, typename Src, typename = decltype(instance<Src>().size())>
359inline void tryReserveSize(Dst& dst, Src&& src) { dst.reserve(dst.size() + src.size()); }
360template <typename... Params>
361inline void tryReserveSize(Params&&...) {}
362// If `src` has a `.size()` method, call dst.reserve(dst.size() + src.size()).
363// Otherwise, do nothing.
364
365template <typename Inner, class Mapping>
366class MappedIterator: private Mapping {
367 // An iterator that wraps some other iterator and maps the values through a mapping function.
368 // The type `Mapping` must define a method `map()` which performs this mapping.
369 //
370 // TODO(cleanup): This seems generally useful. Should we put it somewhere resuable?
371
372public:
373 template <typename... Params>
374 MappedIterator(Inner inner, Params&&... params)
375 : Mapping(kj::fwd<Params>(params)...), inner(inner) {}
376
377 inline auto operator->() const { return &Mapping::map(*inner); }
378 inline decltype(auto) operator* () const { return Mapping::map(*inner); }
379 inline decltype(auto) operator[](size_t index) const { return Mapping::map(inner[index]); }
380 inline MappedIterator& operator++() { ++inner; return *this; }
381 inline MappedIterator operator++(int) { return MappedIterator(inner++, *this); }
382 inline MappedIterator& operator--() { --inner; return *this; }
383 inline MappedIterator operator--(int) { return MappedIterator(inner--, *this); }
384 inline MappedIterator& operator+=(ptrdiff_t amount) { inner += amount; return *this; }
385 inline MappedIterator& operator-=(ptrdiff_t amount) { inner -= amount; return *this; }
386 inline MappedIterator operator+ (ptrdiff_t amount) const {
387 return MappedIterator(inner + amount, *this);
388 }
389 inline MappedIterator operator- (ptrdiff_t amount) const {
390 return MappedIterator(inner - amount, *this);
391 }
392 inline ptrdiff_t operator- (const MappedIterator& other) const { return inner - other.inner; }
393
394 inline bool operator==(const MappedIterator& other) const { return inner == other.inner; }
395 inline bool operator!=(const MappedIterator& other) const { return inner != other.inner; }
396 inline bool operator<=(const MappedIterator& other) const { return inner <= other.inner; }
397 inline bool operator>=(const MappedIterator& other) const { return inner >= other.inner; }
398 inline bool operator< (const MappedIterator& other) const { return inner < other.inner; }
399 inline bool operator> (const MappedIterator& other) const { return inner > other.inner; }
400
401private:
402 Inner inner;
403};
404
405template <typename Inner, typename Mapping>
406class MappedIterable: private Mapping {
407 // An iterable that wraps some other iterable and maps the values through a mapping function.
408 // The type `Mapping` must define a method `map()` which performs this mapping.
409 //
410 // TODO(cleanup): This seems generally useful. Should we put it somewhere resuable?
411
412public:
413 template <typename... Params>
414 MappedIterable(Inner inner, Params&&... params)
415 : Mapping(kj::fwd<Params>(params)...), inner(inner) {}
416
417 typedef Decay<decltype(instance<Inner>().begin())> InnerIterator;
418 typedef MappedIterator<InnerIterator, Mapping> Iterator;
419 typedef Decay<decltype(instance<const Inner>().begin())> InnerConstIterator;
420 typedef MappedIterator<InnerConstIterator, Mapping> ConstIterator;
421
422 inline Iterator begin() { return { inner.begin(), (Mapping&)*this }; }
423 inline Iterator end() { return { inner.end(), (Mapping&)*this }; }
424 inline ConstIterator begin() const { return { inner.begin(), (const Mapping&)*this }; }
425 inline ConstIterator end() const { return { inner.end(), (const Mapping&)*this }; }
426
427private:
428 Inner inner;
429};
430
431template <typename Row>
432class TableMapping {
433public:
434 TableMapping(Row* table): table(table) {}
435 Row& map(size_t i) const { return table[i]; }
436
437private:
438 Row* table;
439};
440
441template <typename Row>
442class TableUnmapping {
443public:
444 TableUnmapping(Row* table): table(table) {}
445 size_t map(Row& row) const { return &row - table; }
446 size_t map(Row* row) const { return row - table; }
447
448private:
449 Row* table;
450};
451
452} // namespace _ (private)
453
454template <typename Row, typename... Indexes>
455template <size_t index>
456class Table<Row, Indexes...>::Impl<index, false> {
457public:
458 static void reserve(Table<Row, Indexes...>& table, size_t size) {
459 get<index>(table.indexes).reserve(size);
460 Impl<index + 1>::reserve(table, size);
461 }
462
463 static void clear(Table<Row, Indexes...>& table) {
464 get<index>(table.indexes).clear();
465 Impl<index + 1>::clear(table);
466 }
467
468 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) {
469 if (skip == index) {
470 return Impl<index + 1>::insert(table, pos, row, skip);
471 }
472 auto& indexObj = get<index>(table.indexes);
473 KJ_IF_MAYBE(existing, indexObj.insert(table.rows.asPtr(), pos, indexObj.keyForRow(row))) {
474 return *existing;
475 }
476
477 bool success = false;
478 KJ_DEFER(if (!success) {
479 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row));
480 });
481 auto result = Impl<index + 1>::insert(table, pos, row, skip);
482 success = result == nullptr;
483 return result;
484 }
485
486 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {
487 auto& indexObj = get<index>(table.indexes);
488 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row));
489 Impl<index + 1>::erase(table, pos, row);
490 }
491
492 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {
493 auto& indexObj = get<index>(table.indexes);
494 indexObj.move(table.rows.asPtr(), oldPos, newPos, indexObj.keyForRow(row));
495 Impl<index + 1>::move(table, oldPos, newPos, row);
496 }
497};
498
499template <typename Row, typename... Indexes>
500template <size_t index>
501class Table<Row, Indexes...>::Impl<index, true> {
502public:
503 static void reserve(Table<Row, Indexes...>& table, size_t size) {}
504 static void clear(Table<Row, Indexes...>& table) {}
505 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) {
506 return nullptr;
507 }
508 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {}
509 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {}
510};
511
512template <typename Row, typename... Indexes>
513Table<Row, Indexes...>::Table() {}
514
515template <typename Row, typename... Indexes>
516Table<Row, Indexes...>::Table(Indexes&&... indexes)
517 : indexes(tuple(kj::fwd<Indexes&&>(indexes)...)) {}
518
519template <typename Row, typename... Indexes>
520void Table<Row, Indexes...>::reserve(size_t size) {
521 rows.reserve(size);
522 Impl<>::reserve(*this, size);
523}
524
525template <typename Row, typename... Indexes>
526size_t Table<Row, Indexes...>::size() const {
527 return rows.size();
528}
529template <typename Row, typename... Indexes>
530void Table<Row, Indexes...>::clear() {
531 Impl<>::clear(*this);
532 rows.clear();
533}
534template <typename Row, typename... Indexes>
535size_t Table<Row, Indexes...>::capacity() const {
536 return rows.capacity();
537}
538
539template <typename Row, typename... Indexes>
540Row* Table<Row, Indexes...>::begin() {
541 return rows.begin();
542}
543template <typename Row, typename... Indexes>
544Row* Table<Row, Indexes...>::end() {
545 return rows.end();
546}
547template <typename Row, typename... Indexes>
548const Row* Table<Row, Indexes...>::begin() const {
549 return rows.begin();
550}
551template <typename Row, typename... Indexes>
552const Row* Table<Row, Indexes...>::end() const {
553 return rows.end();
554}
555
556template <typename Row, typename... Indexes>
557Row& Table<Row, Indexes...>::insert(Row&& row) {
558 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) {
559 _::throwDuplicateTableRow();
560 } else {
561 return rows.add(kj::mv(row));
562 }
563}
564template <typename Row, typename... Indexes>
565Row& Table<Row, Indexes...>::insert(const Row& row) {
566 return insert(kj::cp(row));
567}
568
569template <typename Row, typename... Indexes>
570template <typename Collection>
571void Table<Row, Indexes...>::insertAll(Collection&& collection) {
572 _::tryReserveSize(*this, collection);
573 for (auto& row: collection) {
574 insert(kj::mv(row));
575 }
576}
577
578template <typename Row, typename... Indexes>
579template <typename Collection>
580void Table<Row, Indexes...>::insertAll(Collection& collection) {
581 _::tryReserveSize(*this, collection);
582 for (auto& row: collection) {
583 insert(row);
584 }
585}
586
587template <typename Row, typename... Indexes>
588template <typename UpdateFunc>
589Row& Table<Row, Indexes...>::upsert(Row&& row, UpdateFunc&& update) {
590 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) {
591 update(rows[*existing], kj::mv(row));
592 return rows[*existing];
593 } else {
594 return rows.add(kj::mv(row));
595 }
596}
597template <typename Row, typename... Indexes>
598template <typename UpdateFunc>
599Row& Table<Row, Indexes...>::upsert(const Row& row, UpdateFunc&& update) {
600 return upsert(kj::cp(row), kj::fwd<UpdateFunc>(update));
601}
602
603template <typename Row, typename... Indexes>
604template <typename Index, typename... Params>
605kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) {
606 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
607}
608template <typename Row, typename... Indexes>
609template <size_t index, typename... Params>
610kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) {
611 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
612 return rows[*pos];
613 } else {
614 return nullptr;
615 }
616}
617template <typename Row, typename... Indexes>
618template <typename Index, typename... Params>
619kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const {
620 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
621}
622template <typename Row, typename... Indexes>
623template <size_t index, typename... Params>
624kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const {
625 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
626 return rows[*pos];
627 } else {
628 return nullptr;
629 }
630}
631
632template <typename Row, typename... Indexes>
633template <typename Func, typename... Params>
634class Table<Row, Indexes...>::FindOrCreateImpl {
635public:
636 template <size_t index>
637 static Row& apply(Table<Row, Indexes...>& table, Params&&... params, Func&& createFunc) {
638 auto pos = table.rows.size();
639 KJ_IF_MAYBE(existing, get<index>(table.indexes).insert(table.rows.asPtr(), pos, params...)) {
640 return table.rows[*existing];
641 } else {
642 bool success = false;
643 auto& newRow = table.rows.add(createFunc());
644 KJ_DEFER({
645 if (!success) {
646 table.rows.removeLast();
647 get<index>(table.indexes).erase(table.rows.asPtr(), pos, params...);
648 }
649 });
650 if (Table<Row, Indexes...>::template Impl<>::insert(table, pos, newRow, index) == nullptr) {
651 success = true;
652 } else {
653 _::throwDuplicateTableRow();
654 }
655 return newRow;
656 }
657 }
658};
659
660template <typename Row, typename... Indexes>
661template <typename... T, typename U, typename V, typename... W>
662struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U, V, W...>
663 : public FindOrCreateHack<_::Tuple<T..., U>, V, W...> {};
664template <typename Row, typename... Indexes>
665template <typename... T, typename U>
666struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U>
667 : public FindOrCreateImpl<U, T...> {};
668// This awful hack works around C++'s lack of support for parameter packs anywhere other than at
669// the end of an argument list. We accumulate all of the types except for the last one into a
670// Tuple, then forward to FindOrCreateImpl with the last parameter as the Func.
671
672template <typename Row, typename... Indexes>
673template <typename Index, typename First, typename... Rest>
674Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) {
675 return findOrCreate<indexOfType<Index, Tuple<Indexes...>>()>(
676 kj::fwd<First>(first), kj::fwd<Rest>(rest)...);
677}
678template <typename Row, typename... Indexes>
679template <size_t index, typename First, typename... Rest>
680Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) {
681 return FindOrCreateHack<_::Tuple<>, First, Rest...>::template apply<index>(
682 *this, kj::fwd<First>(first), kj::fwd<Rest>(rest)...);
683}
684
685template <typename Row, typename... Indexes>
686template <typename Index, typename... Params>
687auto Table<Row, Indexes...>::range(Params&&... params) {
688 return range<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
689}
690template <typename Row, typename... Indexes>
691template <size_t index, typename... Params>
692auto Table<Row, Indexes...>::range(Params&&... params) {
693 auto inner = get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...);
694 return _::TableIterable<Row, decltype(inner)>(kj::mv(inner), rows.begin());
695}
696template <typename Row, typename... Indexes>
697template <typename Index, typename... Params>
698auto Table<Row, Indexes...>::range(Params&&... params) const {
699 return range<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
700}
701template <typename Row, typename... Indexes>
702template <size_t index, typename... Params>
703auto Table<Row, Indexes...>::range(Params&&... params) const {
704 auto inner = get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...);
705 return _::TableIterable<const Row, decltype(inner)>(kj::mv(inner), rows.begin());
706}
707
708template <typename Row, typename... Indexes>
709template <typename Index>
710_::TableIterable<Row, Index&> Table<Row, Indexes...>::ordered() {
711 return ordered<indexOfType<Index, Tuple<Indexes...>>()>();
712}
713template <typename Row, typename... Indexes>
714template <size_t index>
715_::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> Table<Row, Indexes...>::ordered() {
716 return { get<index>(indexes), rows.begin() };
717}
718template <typename Row, typename... Indexes>
719template <typename Index>
720_::TableIterable<const Row, const Index&> Table<Row, Indexes...>::ordered() const {
721 return ordered<indexOfType<Index, Tuple<Indexes...>>()>();
722}
723template <typename Row, typename... Indexes>
724template <size_t index>
725_::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&>
726Table<Row, Indexes...>::ordered() const {
727 return { get<index>(indexes), rows.begin() };
728}
729
730template <typename Row, typename... Indexes>
731template <typename Index, typename... Params>
732bool Table<Row, Indexes...>::eraseMatch(Params&&... params) {
733 return eraseMatch<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
734}
735template <typename Row, typename... Indexes>
736template <size_t index, typename... Params>
737bool Table<Row, Indexes...>::eraseMatch(Params&&... params) {
738 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
739 eraseImpl(*pos);
740 return true;
741 } else {
742 return false;
743 }
744}
745
746template <typename Row, typename... Indexes>
747template <typename Index, typename... Params>
748size_t Table<Row, Indexes...>::eraseRange(Params&&... params) {
749 return eraseRange<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
750}
751template <typename Row, typename... Indexes>
752template <size_t index, typename... Params>
753size_t Table<Row, Indexes...>::eraseRange(Params&&... params) {
754 return eraseAllImpl(get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...));
755}
756
757template <typename Row, typename... Indexes>
758template <size_t index>
759void Table<Row, Indexes...>::verify() {
760 get<index>(indexes).verify(rows.asPtr());
761}
762
763template <typename Row, typename... Indexes>
764void Table<Row, Indexes...>::erase(Row& row) {
765 KJ_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table");
766 eraseImpl(&row - rows.begin());
767}
768template <typename Row, typename... Indexes>
769void Table<Row, Indexes...>::eraseImpl(size_t pos) {
770 Impl<>::erase(*this, pos, rows[pos]);
771 size_t back = rows.size() - 1;
772 if (pos != back) {
773 Impl<>::move(*this, back, pos, rows[back]);
774 rows[pos] = kj::mv(rows[back]);
775 }
776 rows.removeLast();
777}
778
779template <typename Row, typename... Indexes>
780Row Table<Row, Indexes...>::release(Row& row) {
781 KJ_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table");
782 size_t pos = &row - rows.begin();
783 Impl<>::erase(*this, pos, row);
784 Row result = kj::mv(row);
785 size_t back = rows.size() - 1;
786 if (pos != back) {
787 Impl<>::move(*this, back, pos, rows[back]);
788 row = kj::mv(rows[back]);
789 }
790 rows.removeLast();
791 return result;
792}
793
794template <typename Row, typename... Indexes>
795template <typename Predicate, typename>
796size_t Table<Row, Indexes...>::eraseAll(Predicate&& predicate) {
797 size_t count = 0;
798 for (size_t i = 0; i < rows.size();) {
799 if (predicate(rows[i])) {
800 eraseImpl(i);
801 ++count;
802 // eraseImpl() replaces the erased row with the last row, so don't increment i here; repeat
803 // with the same i.
804 } else {
805 ++i;
806 }
807 }
808 return count;
809}
810
811template <typename Row, typename... Indexes>
812template <typename Collection, typename, bool>
813size_t Table<Row, Indexes...>::eraseAll(Collection&& collection) {
814 return eraseAllImpl(_::MappedIterable<Collection&, _::TableUnmapping<Row>>(
815 collection, rows.begin()));
816}
817
818template <typename Row, typename... Indexes>
819template <typename Collection>
820size_t Table<Row, Indexes...>::eraseAllImpl(Collection&& collection) {
821 // We need to transform the collection of row numbers into a sequence of erasures, accounting
822 // for the fact that each erasure re-positions the last row into its slot.
823 Vector<size_t> erased;
824 _::tryReserveSize(erased, collection);
825 for (size_t pos: collection) {
826 while (pos >= rows.size() - erased.size()) {
827 // Oops, the next item to be erased is already scheduled to be moved to a different location
828 // due to a previous erasure. Figure out where it will be at this point.
829 size_t erasureNumber = rows.size() - pos - 1;
830 pos = erased[erasureNumber];
831 }
832 erased.add(pos);
833 }
834
835 // Now we can execute the sequence of erasures.
836 for (size_t pos: erased) {
837 eraseImpl(pos);
838 }
839
840 return erased.size();
841}
842
843// -----------------------------------------------------------------------------
844// Hash table index
845
846namespace _ { // private
847
848void logHashTableInconsistency();
849
850struct HashBucket {
851 uint hash;
852 uint value;
853
854 HashBucket() = default;
855 HashBucket(uint hash, uint pos)
856 : hash(hash), value(pos + 2) {}
857
858 inline bool isEmpty() const { return value == 0; }
859 inline bool isErased() const { return value == 1; }
860 inline bool isOccupied() const { return value >= 2; }
861 template <typename Row>
862 inline Row& getRow(ArrayPtr<Row> table) const { return table[getPos()]; }
863 template <typename Row>
864 inline const Row& getRow(ArrayPtr<const Row> table) const { return table[getPos()]; }
865 inline bool isPos(uint pos) const { return pos + 2 == value; }
866 inline uint getPos() const {
867 KJ_IASSERT(value >= 2);
868 return value - 2;
869 }
870 inline void setEmpty() { value = 0; }
871 inline void setErased() { value = 1; }
872 inline void setPos(uint pos) { value = pos + 2; }
873};
874
875inline size_t probeHash(const kj::Array<HashBucket>& buckets, size_t i) {
876 // TODO(perf): Is linear probing OK or should we do something fancier?
877 if (++i == buckets.size()) {
878 return 0;
879 } else {
880 return i;
881 }
882}
883
884kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize);
885
886uint chooseBucket(uint hash, uint count);
887
888} // namespace _ (private)
889
890template <typename Callbacks>
891class HashIndex {
892public:
893 HashIndex() KJ_DEFAULT_CONSTRUCTOR_VS2015_BUGGY
894 template <typename... Params>
895 HashIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {}
896
897 void reserve(size_t size) {
898 if (buckets.size() < size * 2) {
899 rehash(size);
900 }
901 }
902
903 void clear() {
904 erasedCount = 0;
905 memset(buckets.begin(), 0, buckets.asBytes().size());
906 }
907
908 template <typename Row>
909 decltype(auto) keyForRow(Row&& row) const {
910 return cb.keyForRow(kj::fwd<Row>(row));
911 }
912
913 template <typename Row, typename... Params>
914 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
915 if (buckets.size() * 2 < (table.size() + 1 + erasedCount) * 3) {
916 // Load factor is more than 2/3, let's rehash.
917 rehash(kj::max(buckets.size() * 2, (table.size() + 1) * 2));
918 }
919
920 uint hashCode = cb.hashCode(params...);
921 Maybe<_::HashBucket&> erasedSlot;
922 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
923 auto& bucket = buckets[i];
924 if (bucket.isEmpty()) {
925 // no duplicates found
926 KJ_IF_MAYBE(s, erasedSlot) {
927 --erasedCount;
928 *s = { hashCode, uint(pos) };
929 } else {
930 bucket = { hashCode, uint(pos) };
931 }
932 return nullptr;
933 } else if (bucket.isErased()) {
934 // We can fill in the erased slot. However, we have to keep searching to make sure there
935 // are no duplicates before we do that.
936 if (erasedSlot == nullptr) {
937 erasedSlot = bucket;
938 }
939 } else if (bucket.hash == hashCode &&
940 cb.matches(bucket.getRow(table), params...)) {
941 // duplicate row
942 return size_t(bucket.getPos());
943 }
944 }
945 }
946
947 template <typename Row, typename... Params>
948 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
949 uint hashCode = cb.hashCode(params...);
950 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
951 auto& bucket = buckets[i];
952 if (bucket.isPos(pos)) {
953 // found it
954 ++erasedCount;
955 bucket.setErased();
956 return;
957 } else if (bucket.isEmpty()) {
958 // can't find the bucket, something is very wrong
959 _::logHashTableInconsistency();
960 return;
961 }
962 }
963 }
964
965 template <typename Row, typename... Params>
966 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) {
967 uint hashCode = cb.hashCode(params...);
968 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
969 auto& bucket = buckets[i];
970 if (bucket.isPos(oldPos)) {
971 // found it
972 bucket.setPos(newPos);
973 return;
974 } else if (bucket.isEmpty()) {
975 // can't find the bucket, something is very wrong
976 _::logHashTableInconsistency();
977 return;
978 }
979 }
980 }
981
982 template <typename Row, typename... Params>
983 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const {
984 if (buckets.size() == 0) return nullptr;
985
986 uint hashCode = cb.hashCode(params...);
987 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
988 auto& bucket = buckets[i];
989 if (bucket.isEmpty()) {
990 // not found.
991 return nullptr;
992 } else if (bucket.isErased()) {
993 // skip, keep searching
994 } else if (bucket.hash == hashCode &&
995 cb.matches(bucket.getRow(table), params...)) {
996 // found
997 return size_t(bucket.getPos());
998 }
999 }
1000 }
1001
1002 // No begin() nor end() because hash tables are not usefully ordered.
1003
1004private:
1005 Callbacks cb;
1006 size_t erasedCount = 0;
1007 Array<_::HashBucket> buckets;
1008
1009 void rehash(size_t targetSize) {
1010 buckets = _::rehash(buckets, targetSize);
1011 }
1012};
1013
1014// -----------------------------------------------------------------------------
1015// BTree index
1016
1017namespace _ { // private
1018
1019KJ_ALWAYS_INLINE(void compilerBarrier());
1020void compilerBarrier() {
1021 // Make sure that reads occurring before this call cannot be re-ordered to happen after
1022 // writes that occur after this call. We need this in a couple places below to prevent C++
1023 // strict aliasing rules from breaking things.
1024#if _MSC_VER
1025 _ReadWriteBarrier();
1026#else
1027 __asm__ __volatile__("": : :"memory");
1028#endif
1029}
1030
1031template <typename T>
1032inline void acopy(T* to, T* from, size_t size) { memcpy(to, from, size * sizeof(T)); }
1033template <typename T>
1034inline void amove(T* to, T* from, size_t size) { memmove(to, from, size * sizeof(T)); }
1035template <typename T>
1036inline void azero(T* ptr, size_t size) { memset(ptr, 0, size * sizeof(T)); }
1037// memcpy/memmove/memset variants that count size in elements, not bytes.
1038//
1039// TODO(cleanup): These are generally useful, put them somewhere.
1040
1041class BTreeImpl {
1042public:
1043 class Iterator;
1044 class MaybeUint;
1045 struct NodeUnion;
1046 struct Leaf;
1047 struct Parent;
1048 struct Freelisted;
1049
1050 class SearchKey {
1051 // Passed to methods that need to search the tree. This class allows most of the B-tree
1052 // implementation to be kept out of templates, avoiding code bloat, at the cost of some
1053 // performance trade-off. In order to lessen the performance cost of virtual calls, we design
1054 // this interface so that it only needs to be called once per tree node, rather than once per
1055 // comparison.
1056
1057 public:
1058 virtual uint search(const Parent& parent) const = 0;
1059 virtual uint search(const Leaf& leaf) const = 0;
1060 // Binary search for the first key/row in the parent/leaf that is equal to or comes after the
1061 // search key.
1062
1063 virtual bool isAfter(uint rowIndex) const = 0;
1064 // Returns true if the key comes after the value in the given row.
1065 };
1066
1067 BTreeImpl();
1068 ~BTreeImpl() noexcept(false);
1069
1070 void logInconsistency() const;
1071
1072 void reserve(size_t size);
1073
1074 void clear();
1075
1076 Iterator begin() const;
1077 Iterator end() const;
1078
1079 Iterator search(const SearchKey& searchKey) const;
1080 // Find the "first" row (in sorted order) for which searchKey.isAfter(rowNumber) returns true.
1081
1082 Iterator insert(const SearchKey& searchKey);
1083 // Like search() but ensures that there is room in the leaf node to insert a new row.
1084
1085 void erase(uint row, const SearchKey& searchKey);
1086 // Erase the given row number from the tree. searchKey.isAfter() returns true for the given row
1087 // and all rows after it.
1088
1089 void renumber(uint oldRow, uint newRow, const SearchKey& searchKey);
1090 // Renumber the given row from oldRow to newRow. searchKey.isAfter() returns true for oldRow and
1091 // all rows after it. (It will not be called on newRow.)
1092
1093 void verify(size_t size, FunctionParam<bool(uint, uint)>);
1094
1095private:
1096 NodeUnion* tree; // allocated with aligned_alloc aligned to cache lines
1097 uint treeCapacity;
1098 uint height; // height of *parent* tree -- does not include the leaf level
1099 uint freelistHead;
1100 uint freelistSize;
1101 uint beginLeaf;
1102 uint endLeaf;
1103 void growTree(uint minCapacity = 0);
1104
1105 template <typename T>
1106 struct AllocResult;
1107
1108 template <typename T>
1109 inline AllocResult<T> alloc();
1110 inline void free(uint pos);
1111
1112 inline uint split(Parent& src, uint srcPos, Parent& dst, uint dstPos);
1113 inline uint split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos);
1114 inline void merge(Parent& dst, uint dstPos, uint pivot, Parent& src);
1115 inline void merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src);
1116 inline void move(Parent& dst, uint dstPos, Parent& src);
1117 inline void move(Leaf& dst, uint dstPos, Leaf& src);
1118 inline void rotateLeft(
1119 Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup);
1120 inline void rotateLeft(
1121 Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup);
1122 inline void rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent);
1123 inline void rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent);
1124
1125 template <typename Node>
1126 inline Node& insertHelper(const SearchKey& searchKey,
1127 Node& node, Parent* parent, uint indexInParent, uint pos);
1128
1129 template <typename Node>
1130 inline Node& eraseHelper(
1131 Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup);
1132
1133 size_t verifyNode(size_t size, FunctionParam<bool(uint, uint)>&,
1134 uint pos, uint height, MaybeUint maxRow);
1135
1136 static const NodeUnion EMPTY_NODE;
1137};
1138
1139class BTreeImpl::MaybeUint {
1140 // A nullable uint, using the value zero to mean null and shifting all other values up by 1.
1141public:
1142 MaybeUint() = default;
1143 inline MaybeUint(uint i): i(i - 1) {}
1144 inline MaybeUint(decltype(nullptr)): i(0) {}
1145
1146 inline bool operator==(decltype(nullptr)) const { return i == 0; }
1147 inline bool operator==(uint j) const { return i == j + 1; }
1148 inline bool operator==(const MaybeUint& other) const { return i == other.i; }
1149 inline bool operator!=(decltype(nullptr)) const { return i != 0; }
1150 inline bool operator!=(uint j) const { return i != j + 1; }
1151 inline bool operator!=(const MaybeUint& other) const { return i != other.i; }
1152
1153 inline MaybeUint& operator=(decltype(nullptr)) { i = 0; return *this; }
1154 inline MaybeUint& operator=(uint j) { i = j + 1; return *this; }
1155
1156 inline uint operator*() const { KJ_IREQUIRE(i != 0); return i - 1; }
1157
1158 template <typename Func>
1159 inline bool check(Func& func) const { return i != 0 && func(i - 1); }
1160 // Equivalent to *this != nullptr && func(**this)
1161
1162private:
1163 uint i;
1164};
1165
1166struct BTreeImpl::Leaf {
1167 uint next;
1168 uint prev;
1169 // Pointers to next and previous nodes at the same level, used for fast iteration.
1170
1171 static constexpr size_t NROWS = 14;
1172 MaybeUint rows[NROWS];
1173 // Pointers to table rows, offset by 1 so that 0 is an empty value.
1174
1175 inline bool isFull() const;
1176 inline bool isMostlyFull() const;
1177 inline bool isHalfFull() const;
1178
1179 inline void insert(uint i, uint newRow) {
1180 KJ_IREQUIRE(rows[Leaf::NROWS - 1] == nullptr); // check not full
1181
1182 amove(rows + i + 1, rows + i, Leaf::NROWS - (i + 1));
1183 rows[i] = newRow;
1184 }
1185
1186 inline void erase(uint i) {
1187 KJ_IREQUIRE(rows[0] != nullptr); // check not empty
1188
1189 amove(rows + i, rows + i + 1, Leaf::NROWS - (i + 1));
1190 rows[Leaf::NROWS - 1] = nullptr;
1191 }
1192
1193 inline uint size() const {
1194 static_assert(Leaf::NROWS == 14, "logic here needs updating");
1195
1196 // Binary search for first empty element in `rows`, or return 14 if no empty elements. We do
1197 // this in a branch-free manner. Since there are 15 possible results (0 through 14, inclusive),
1198 // this isn't a perfectly balanced binary search. We carefully choose the split points so that
1199 // there's no way we'll try to dereference row[14] or later (which would be a buffer overflow).
1200 uint i = (rows[6] != nullptr) * 7;
1201 i += (rows[i + 3] != nullptr) * 4;
1202 i += (rows[i + 1] != nullptr) * 2;
1203 i += (rows[i ] != nullptr);
1204 return i;
1205 }
1206
1207 template <typename Func>
1208 inline uint binarySearch(Func& predicate) const {
1209 // Binary search to find first row for which predicate(row) is false.
1210
1211 static_assert(Leaf::NROWS == 14, "logic here needs updating");
1212
1213 // See comments in size().
1214 uint i = (rows[6].check(predicate)) * 7;
1215 i += (rows[i + 3].check(predicate)) * 4;
1216 i += (rows[i + 1].check(predicate)) * 2;
1217 if (i != 6) { // don't redundantly check row 6
1218 i += (rows[i ].check(predicate));
1219 }
1220 return i;
1221 }
1222};
1223
1224struct BTreeImpl::Parent {
1225 uint unused;
1226 // Not used. May be arbitrarily non-zero due to overlap with Freelisted::nextOffset.
1227
1228 static constexpr size_t NKEYS = 7;
1229 MaybeUint keys[NKEYS];
1230 // Pointers to table rows, offset by 1 so that 0 is an empty value.
1231
1232 static constexpr size_t NCHILDREN = NKEYS + 1;
1233 uint children[NCHILDREN];
1234 // Pointers to children. Not offset because the root is always at position 0, and a pointer
1235 // to the root would be nonsensical.
1236
1237 inline bool isFull() const;
1238 inline bool isMostlyFull() const;
1239 inline bool isHalfFull() const;
1240 inline void initRoot(uint key, uint leftChild, uint rightChild);
1241 inline void insertAfter(uint i, uint splitKey, uint child);
1242 inline void eraseAfter(uint i);
1243
1244 inline uint keyCount() const {
1245 static_assert(Parent::NKEYS == 7, "logic here needs updating");
1246
1247 // Binary search for first empty element in `keys`, or return 7 if no empty elements. We do
1248 // this in a branch-free manner. Since there are 8 possible results (0 through 7, inclusive),
1249 // this is a perfectly balanced binary search.
1250 uint i = (keys[3] != nullptr) * 4;
1251 i += (keys[i + 1] != nullptr) * 2;
1252 i += (keys[i ] != nullptr);
1253 return i;
1254 }
1255
1256 template <typename Func>
1257 inline uint binarySearch(Func& predicate) const {
1258 // Binary search to find first key for which predicate(key) is false.
1259
1260 static_assert(Parent::NKEYS == 7, "logic here needs updating");
1261
1262 // See comments in size().
1263 uint i = (keys[3].check(predicate)) * 4;
1264 i += (keys[i + 1].check(predicate)) * 2;
1265 i += (keys[i ].check(predicate));
1266 return i;
1267 }
1268};
1269
1270struct BTreeImpl::Freelisted {
1271 int nextOffset;
1272 // The next node in the freelist is at: this + 1 + nextOffset
1273 //
1274 // Hence, newly-allocated space can initialize this to zero.
1275
1276 uint zero[15];
1277 // Freelisted entries are always zero'd.
1278};
1279
1280struct BTreeImpl::NodeUnion {
1281 union {
1282 Freelisted freelist;
1283 // If this node is in the freelist.
1284
1285 Leaf leaf;
1286 // If this node is a leaf.
1287
1288 Parent parent;
1289 // If this node is not a leaf.
1290 };
1291
1292 inline operator Leaf&() { return leaf; }
1293 inline operator Parent&() { return parent; }
1294 inline operator const Leaf&() const { return leaf; }
1295 inline operator const Parent&() const { return parent; }
1296};
1297
1298static_assert(sizeof(BTreeImpl::Parent) == 64,
1299 "BTreeImpl::Parent should be optimized to fit a cache line");
1300static_assert(sizeof(BTreeImpl::Leaf) == 64,
1301 "BTreeImpl::Leaf should be optimized to fit a cache line");
1302static_assert(sizeof(BTreeImpl::Freelisted) == 64,
1303 "BTreeImpl::Freelisted should be optimized to fit a cache line");
1304static_assert(sizeof(BTreeImpl::NodeUnion) == 64,
1305 "BTreeImpl::NodeUnion should be optimized to fit a cache line");
1306
1307bool BTreeImpl::Leaf::isFull() const {
1308 return rows[Leaf::NROWS - 1] != nullptr;
1309}
1310bool BTreeImpl::Leaf::isMostlyFull() const {
1311 return rows[Leaf::NROWS / 2] != nullptr;
1312}
1313bool BTreeImpl::Leaf::isHalfFull() const {
1314 KJ_IASSERT(rows[Leaf::NROWS / 2 - 1] != nullptr);
1315 return rows[Leaf::NROWS / 2] == nullptr;
1316}
1317
1318bool BTreeImpl::Parent::isFull() const {
1319 return keys[Parent::NKEYS - 1] != nullptr;
1320}
1321bool BTreeImpl::Parent::isMostlyFull() const {
1322 return keys[Parent::NKEYS / 2] != nullptr;
1323}
1324bool BTreeImpl::Parent::isHalfFull() const {
1325 KJ_IASSERT(keys[Parent::NKEYS / 2 - 1] != nullptr);
1326 return keys[Parent::NKEYS / 2] == nullptr;
1327}
1328
1329class BTreeImpl::Iterator {
1330public:
1331 Iterator(const NodeUnion* tree, const Leaf* leaf, uint row)
1332 : tree(tree), leaf(leaf), row(row) {}
1333
1334 size_t operator*() const {
1335 KJ_IREQUIRE(row < Leaf::NROWS && leaf->rows[row] != nullptr,
1336 "tried to dereference end() iterator");
1337 return *leaf->rows[row];
1338 }
1339
1340 inline Iterator& operator++() {
1341 KJ_IREQUIRE(leaf->rows[row] != nullptr, "B-tree iterator overflow");
1342 ++row;
1343 if (row >= Leaf::NROWS || leaf->rows[row] == nullptr) {
1344 if (leaf->next == 0) {
1345 // at end; stay on current leaf
1346 } else {
1347 leaf = &tree[leaf->next].leaf;
1348 row = 0;
1349 }
1350 }
1351 return *this;
1352 }
1353 inline Iterator operator++(int) {
1354 Iterator other = *this;
1355 ++*this;
1356 return other;
1357 }
1358
1359 inline Iterator& operator--() {
1360 if (row == 0) {
1361 KJ_IREQUIRE(leaf->prev != 0, "B-tree iterator underflow");
1362 leaf = &tree[leaf->prev].leaf;
1363 row = leaf->size() - 1;
1364 } else {
1365 --row;
1366 }
1367 return *this;
1368 }
1369 inline Iterator operator--(int) {
1370 Iterator other = *this;
1371 --*this;
1372 return other;
1373 }
1374
1375 inline bool operator==(const Iterator& other) const {
1376 return leaf == other.leaf && row == other.row;
1377 }
1378 inline bool operator!=(const Iterator& other) const {
1379 return leaf != other.leaf || row != other.row;
1380 }
1381
1382 bool isEnd() {
1383 return row == Leaf::NROWS || leaf->rows[row] == nullptr;
1384 }
1385
1386 void insert(BTreeImpl& impl, uint newRow) {
1387 KJ_IASSERT(impl.tree == tree);
1388 const_cast<Leaf*>(leaf)->insert(row, newRow);
1389 }
1390
1391 void erase(BTreeImpl& impl) {
1392 KJ_IASSERT(impl.tree == tree);
1393 const_cast<Leaf*>(leaf)->erase(row);
1394 }
1395
1396 void replace(BTreeImpl& impl, uint newRow) {
1397 KJ_IASSERT(impl.tree == tree);
1398 const_cast<Leaf*>(leaf)->rows[row] = newRow;
1399 }
1400
1401private:
1402 const NodeUnion* tree;
1403 const Leaf* leaf;
1404 uint row;
1405};
1406
1407template <typename Iterator>
1408class IterRange {
1409public:
1410 inline IterRange(Iterator b, Iterator e): b(b), e(e) {}
1411
1412 inline Iterator begin() const { return b; }
1413 inline Iterator end() const { return e; }
1414private:
1415 Iterator b;
1416 Iterator e;
1417};
1418
1419template <typename Iterator>
1420inline IterRange<Decay<Iterator>> iterRange(Iterator b, Iterator e) {
1421 return { b, e };
1422}
1423
1424inline BTreeImpl::Iterator BTreeImpl::begin() const {
1425 return { tree, &tree[beginLeaf].leaf, 0 };
1426}
1427inline BTreeImpl::Iterator BTreeImpl::end() const {
1428 auto& leaf = tree[endLeaf].leaf;
1429 return { tree, &leaf, leaf.size() };
1430}
1431
1432} // namespace _ (private)
1433
1434template <typename Callbacks>
1435class TreeIndex {
1436public:
1437 TreeIndex() KJ_DEFAULT_CONSTRUCTOR_VS2015_BUGGY
1438 template <typename... Params>
1439 TreeIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {}
1440
1441 template <typename Row>
1442 void verify(kj::ArrayPtr<Row> table) {
1443 impl.verify(table.size(), [&](uint i, uint j) {
1444 return cb.isBefore(table[i], table[j]);
1445 });
1446 }
1447
1448 inline void reserve(size_t size) { impl.reserve(size); }
1449 inline void clear() { impl.clear(); }
1450 inline auto begin() const { return impl.begin(); }
1451 inline auto end() const { return impl.end(); }
1452
1453 template <typename Row>
1454 decltype(auto) keyForRow(Row&& row) const {
1455 return cb.keyForRow(kj::fwd<Row>(row));
1456 }
1457
1458 template <typename Row, typename... Params>
1459 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
1460 auto iter = impl.insert(searchKey(table, params...));
1461
1462 if (!iter.isEnd() && cb.matches(table[*iter], params...)) {
1463 return *iter;
1464 } else {
1465 iter.insert(impl, pos);
1466 return nullptr;
1467 }
1468 }
1469
1470 template <typename Row, typename... Params>
1471 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
1472 impl.erase(pos, searchKey(table, params...));
1473 }
1474
1475 template <typename Row, typename... Params>
1476 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) {
1477 impl.renumber(oldPos, newPos, searchKey(table, params...));
1478 }
1479
1480 template <typename Row, typename... Params>
1481 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const {
1482 auto iter = impl.search(searchKey(table, params...));
1483
1484 if (!iter.isEnd() && cb.matches(table[*iter], params...)) {
1485 return size_t(*iter);
1486 } else {
1487 return nullptr;
1488 }
1489 }
1490
1491 template <typename Row, typename Begin, typename End>
1492 _::IterRange<_::BTreeImpl::Iterator> range(
1493 kj::ArrayPtr<Row> table, Begin&& begin, End&& end) const {
1494 return {
1495 impl.search(searchKey(table, begin)),
1496 impl.search(searchKey(table, end ))
1497 };
1498 }
1499
1500private:
1501 Callbacks cb;
1502 _::BTreeImpl impl;
1503
1504 template <typename Predicate>
1505 class SearchKeyImpl: public _::BTreeImpl::SearchKey {
1506 public:
1507 SearchKeyImpl(Predicate&& predicate)
1508 : predicate(kj::mv(predicate)) {}
1509
1510 uint search(const _::BTreeImpl::Parent& parent) const override {
1511 return parent.binarySearch(predicate);
1512 }
1513 uint search(const _::BTreeImpl::Leaf& leaf) const override {
1514 return leaf.binarySearch(predicate);
1515 }
1516 bool isAfter(uint rowIndex) const override {
1517 return predicate(rowIndex);
1518 }
1519
1520 private:
1521 Predicate predicate;
1522 };
1523
1524 template <typename Row, typename... Params>
1525 inline auto searchKey(kj::ArrayPtr<Row>& table, Params&... params) const {
1526 auto predicate = [&](uint i) { return cb.isBefore(table[i], params...); };
1527 return SearchKeyImpl<decltype(predicate)>(kj::mv(predicate));
1528 }
1529};
1530
1531// -----------------------------------------------------------------------------
1532// Insertion order index
1533
1534class InsertionOrderIndex {
1535 // Table index which allows iterating over elements in order of insertion. This index cannot
1536 // be used for Table::find(), but can be used for Table::ordered().
1537
1538 struct Link;
1539public:
1540 InsertionOrderIndex();
1541 ~InsertionOrderIndex() noexcept(false);
1542
1543 class Iterator {
1544 public:
1545 Iterator(const Link* links, uint pos)
1546 : links(links), pos(pos) {}
1547
1548 inline size_t operator*() const {
1549 KJ_IREQUIRE(pos != 0, "can't derefrence end() iterator");
1550 return pos - 1;
1551 };
1552
1553 inline Iterator& operator++() {
1554 pos = links[pos].next;
1555 return *this;
1556 }
1557 inline Iterator operator++(int) {
1558 Iterator result = *this;
1559 ++*this;
1560 return result;
1561 }
1562 inline Iterator& operator--() {
1563 pos = links[pos].prev;
1564 return *this;
1565 }
1566 inline Iterator operator--(int) {
1567 Iterator result = *this;
1568 --*this;
1569 return result;
1570 }
1571
1572 inline bool operator==(const Iterator& other) const {
1573 return pos == other.pos;
1574 }
1575 inline bool operator!=(const Iterator& other) const {
1576 return pos != other.pos;
1577 }
1578
1579 private:
1580 const Link* links;
1581 uint pos;
1582 };
1583
1584 template <typename Row>
1585 Row& keyForRow(Row& row) const { return row; }
1586
1587 void reserve(size_t size);
1588 void clear();
1589 inline Iterator begin() const { return Iterator(links, links[0].next); }
1590 inline Iterator end() const { return Iterator(links, 0); }
1591
1592 template <typename Row>
1593 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, const Row& row) {
1594 return insertImpl(pos);
1595 }
1596
1597 template <typename Row>
1598 void erase(kj::ArrayPtr<Row> table, size_t pos, const Row& row) {
1599 eraseImpl(pos);
1600 }
1601
1602 template <typename Row>
1603 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, const Row& row) {
1604 return moveImpl(oldPos, newPos);
1605 }
1606
1607private:
1608 struct Link {
1609 uint next;
1610 uint prev;
1611 };
1612
1613 uint capacity;
1614 Link* links;
1615 // links[0] is special: links[0].next points to the first link, links[0].prev points to the last.
1616 // links[n+1] corresponds to row n.
1617
1618 kj::Maybe<size_t> insertImpl(size_t pos);
1619 void eraseImpl(size_t pos);
1620 void moveImpl(size_t oldPos, size_t newPos);
1621
1622 static const Link EMPTY_LINK;
1623};
1624
1625} // namespace kj
1626