1 | // Copyright (c) 2018 Kenton Varda and contributors |
2 | // Licensed under the MIT License: |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | // of this software and associated documentation files (the "Software"), to deal |
6 | // in the Software without restriction, including without limitation the rights |
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | // copies of the Software, and to permit persons to whom the Software is |
9 | // furnished to do so, subject to the following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included in |
12 | // all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | // THE SOFTWARE. |
21 | |
22 | #pragma once |
23 | |
24 | #if defined(__GNUC__) && !KJ_HEADER_WARNINGS |
25 | #pragma GCC system_header |
26 | #endif |
27 | |
28 | #include "common.h" |
29 | #include "tuple.h" |
30 | #include "vector.h" |
31 | #include "function.h" |
32 | |
33 | #if _MSC_VER |
34 | // Need _ReadWriteBarrier |
35 | #if _MSC_VER < 1910 |
36 | #include <intrin.h> |
37 | #else |
38 | #include <intrin0.h> |
39 | #endif |
40 | #endif |
41 | |
42 | namespace kj { |
43 | |
44 | namespace _ { // private |
45 | |
46 | template <typename Inner, typename Mapping> |
47 | class MappedIterable; |
48 | template <typename Row> |
49 | class TableMapping; |
50 | template <typename Row, typename Inner> |
51 | using TableIterable = MappedIterable<Inner, TableMapping<Row>>; |
52 | |
53 | } // namespace _ (private) |
54 | |
55 | template <typename Row, typename... Indexes> |
56 | class Table { |
57 | // A table with one or more indexes. This is the KJ alternative to map, set, unordered_map, and |
58 | // unordered_set. |
59 | // |
60 | // Unlike a traditional map, which explicitly stores key/value pairs, a Table simply stores |
61 | // "rows" of arbitrary type, and then lets the application specify how these should be indexed. |
62 | // Rows could be indexed on a specific struct field, or they could be indexed based on a computed |
63 | // property. An index could be hash-based or tree-based. Multiple indexes are supported, making |
64 | // it easy to construct a "bimap". |
65 | // |
66 | // The table has deterministic iteration order based on the sequence of insertions and deletions. |
67 | // In the case of only insertions, the iteration order is the order of insertion. If deletions |
68 | // occur, then the current last row is moved to occupy the deleted slot. This determinism is |
69 | // intended to be reliable for the purpose of testing, etc. |
70 | // |
71 | // Each index is a class that looks like: |
72 | // |
73 | // class Index { |
74 | // public: |
75 | // void reserve(size_t size); |
76 | // // Called when Table::reserve() is called. |
77 | // |
78 | // SearchParam& keyForRow(const Row& row) const; |
79 | // // Given a row, return a value appropriate to pass as SearchParams to the other functions. |
80 | // |
81 | // // In all function calls below, `SearchPrams` refers to whatever parameters the index |
82 | // // supports for looking up a row in the table. |
83 | // |
84 | // template <typename... SearchParams> |
85 | // kj::Maybe<size_t> insert(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...); |
86 | // // Called to indicate that we're about to insert a new row which will match the given |
87 | // // search parameters, and will be located at the given position. If this index disallows |
88 | // // duplicates and some other matching row already exists, then insert() returns the index |
89 | // // of that row without modifying the index. If the row does not exist, then insert() |
90 | // // updates the index to note that the new row is located at `pos`. Note that `table[pos]` |
91 | // // may not be valid yet at the time of this call; the index must go on the search params |
92 | // // alone. |
93 | // // |
94 | // // Insert may throw an exception, in which case the table will roll back insertion. |
95 | // |
96 | // template <typename... SearchParams> |
97 | // void erase(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...); |
98 | // // Called to indicate that the index must remove references to row number `pos`. The |
99 | // // index must not attempt to access table[pos] directly -- in fact, `pos` may be equal to |
100 | // // `table.size()`, i.e., may be out-of-bounds (this happens when rolling back a failed |
101 | // // insertion). Instead, the index can use the search params to search for the row -- they |
102 | // // will either be the same as the params passed to insert(), or will be a single value of |
103 | // // type `Row&`. |
104 | // // |
105 | // // erase() called immediately after a successful insert() must not throw an exception, as |
106 | // // it may be called during unwind. |
107 | // |
108 | // template <typename... SearchParams> |
109 | // void move(kj::ArrayPtr<const Row> table, size_t oldPos, size_t newPos, SearchParams&&...); |
110 | // // Called when a row is about to be moved from `oldPos` to `newPos` in the table. The |
111 | // // index should update it to the new location. Neither `table[oldPos]` nor `table[newPos]` |
112 | // // is valid during the call -- use the search params to find the row. Before this call |
113 | // // `oldPos` is indexed and `newPos` is not -- after the call, the opposite is true. |
114 | // // |
115 | // // This should never throw; if it does the table may be corrupted. |
116 | // |
117 | // class Iterator; // Behaves like a C++ iterator over size_t values. |
118 | // class Iterable; // Has begin() and end() methods returning iterators. |
119 | // |
120 | // template <typename... SearchParams> |
121 | // Maybe<size_t> find(kj::ArrayPtr<const Row> table, SearchParams&&...) const; |
122 | // // Optional. Implements Table::find<Index>(...). |
123 | // |
124 | // template <typename... SearchParams> |
125 | // Iterable range(kj::ArrayPtr<const Row> table, SearchParams&&...) const; |
126 | // // Optional. Implements Table::range<Index>(...). |
127 | // |
128 | // Iterator begin() const; |
129 | // Iterator end() const; |
130 | // // Optional. Implements Table::ordered<Index>(). |
131 | // }; |
132 | |
133 | public: |
134 | Table(); |
135 | Table(Indexes&&... indexes); |
136 | |
137 | void reserve(size_t size); |
138 | // Pre-allocates space for a table of the given size. Normally a Table grows by re-allocating |
139 | // its backing array whenever more space is needed. Reserving in advance avoids redundantly |
140 | // re-allocating as the table grows. |
141 | |
142 | size_t size() const; |
143 | size_t capacity() const; |
144 | |
145 | void clear(); |
146 | |
147 | Row* begin(); |
148 | Row* end(); |
149 | const Row* begin() const; |
150 | const Row* end() const; |
151 | |
152 | Row& insert(Row&& row); |
153 | Row& insert(const Row& row); |
154 | // Inserts a new row. Throws an exception if this would violate the uniqueness constraints of any |
155 | // of the indexes. |
156 | |
157 | template <typename Collection> |
158 | void insertAll(Collection&& collection); |
159 | template <typename Collection> |
160 | void insertAll(Collection& collection); |
161 | // Given an iterable collection of Rows, inserts all of them into this table. If the input is |
162 | // an rvalue, the rows will be moved rather than copied. |
163 | // |
164 | // If an insertion throws (e.g. because it violates a uniqueness constraint of some index), |
165 | // subsequent insertions do not occur, but previous insertions remain inserted. |
166 | |
167 | template <typename UpdateFunc> |
168 | Row& upsert(Row&& row, UpdateFunc&& update); |
169 | template <typename UpdateFunc> |
170 | Row& upsert(const Row& row, UpdateFunc&& update); |
171 | // Tries to insert a new row. However, if a duplicate already exists (according to some index), |
172 | // then update(Row& existingRow, Row&& newRow) is called to modify the existing row. |
173 | |
174 | template <typename Index, typename... Params> |
175 | kj::Maybe<Row&> find(Params&&... params); |
176 | template <typename Index, typename... Params> |
177 | kj::Maybe<const Row&> find(Params&&... params) const; |
178 | // Using the given index, search for a matching row. What parameters are accepted depends on the |
179 | // index. Not all indexes support this method -- "multimap" indexes may support only range(). |
180 | |
181 | template <typename Index, typename... Params, typename Func> |
182 | Row& findOrCreate(Params&&... params, Func&& createFunc); |
183 | // Like find(), but if the row doesn't exist, call a function to create it. createFunc() must |
184 | // return `Row` or something that implicitly converts to `Row`. |
185 | // |
186 | // NOTE: C++ doesn't actually properly support inferring types of a parameter pack at the |
187 | // beginning of an argument list, but we define a hack to support it below. Don't worry about |
188 | // it. |
189 | |
190 | template <typename Index, typename... Params> |
191 | auto range(Params&&... params); |
192 | template <typename Index, typename... Params> |
193 | auto range(Params&&... params) const; |
194 | // Using the given index, look up a range of values, returning an iterable. What parameters are |
195 | // accepted depends on the index. Not all indexes support this method (in particular, unique |
196 | // indexes normally don't). |
197 | |
198 | template <typename Index> |
199 | _::TableIterable<Row, Index&> ordered(); |
200 | template <typename Index> |
201 | _::TableIterable<const Row, const Index&> ordered() const; |
202 | // Returns an iterable over the whole table ordered using the given index. Not all indexes |
203 | // support this method. |
204 | |
205 | template <typename Index, typename... Params> |
206 | bool eraseMatch(Params&&... params); |
207 | // Erase the row that would be matched by `find<Index>(params)`. Returns true if there was a |
208 | // match. |
209 | |
210 | template <typename Index, typename... Params> |
211 | size_t eraseRange(Params&&... params); |
212 | // Erase the row that would be matched by `range<Index>(params)`. Returns the number of |
213 | // elements erased. |
214 | |
215 | void erase(Row& row); |
216 | // Erase the given row. |
217 | // |
218 | // WARNING: This invalidates all iterators, so you can't iterate over rows and erase them this |
219 | // way. Use `eraseAll()` for that. |
220 | |
221 | Row release(Row& row); |
222 | // Remove the given row from the table and return it in one operation. |
223 | // |
224 | // WARNING: This invalidates all iterators, so you can't iterate over rows and release them this |
225 | // way. |
226 | |
227 | template <typename Predicate, typename = decltype(instance<Predicate>()(instance<Row&>()))> |
228 | size_t eraseAll(Predicate&& predicate); |
229 | // Erase all rows for which predicate(row) returns true. This scans over the entire table. |
230 | |
231 | template <typename Collection, typename = decltype(instance<Collection>().begin()), bool = true> |
232 | size_t eraseAll(Collection&& collection); |
233 | // Erase all rows in the given iterable collection of rows. This carefully marks rows for |
234 | // deletion in a first pass then deletes them in a second. |
235 | |
236 | template <size_t index = 0, typename... Params> |
237 | kj::Maybe<Row&> find(Params&&... params); |
238 | template <size_t index = 0, typename... Params> |
239 | kj::Maybe<const Row&> find(Params&&... params) const; |
240 | template <size_t index = 0, typename... Params, typename Func> |
241 | Row& findOrCreate(Params&&... params, Func&& createFunc); |
242 | template <size_t index = 0, typename... Params> |
243 | auto range(Params&&... params); |
244 | template <size_t index = 0, typename... Params> |
245 | auto range(Params&&... params) const; |
246 | template <size_t index = 0> |
247 | _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> ordered(); |
248 | template <size_t index = 0> |
249 | _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> ordered() const; |
250 | template <size_t index = 0, typename... Params> |
251 | bool eraseMatch(Params&&... params); |
252 | template <size_t index = 0, typename... Params> |
253 | size_t eraseRange(Params&&... params); |
254 | // Methods which take an index type as a template parameter can also take an index number. This |
255 | // is useful particularly when you have multiple indexes of the same type but different runtime |
256 | // properties. Additionally, you can omit the template parameter altogether to use the first |
257 | // index. |
258 | |
259 | template <size_t index = 0> |
260 | void verify(); |
261 | // Checks the integrity of indexes, throwing an exception if there are any problems. This is |
262 | // intended to be called within the unit test for an index. |
263 | |
264 | template <typename Index, typename First, typename... Rest> |
265 | Row& findOrCreate(First&& first, Rest&&... rest); |
266 | template <size_t index = 0, typename First, typename... Rest> |
267 | Row& findOrCreate(First&& first, Rest&&... rest); |
268 | // HACK: A parameter pack can only be inferred if it lives at the end of the argument list, so |
269 | // the findOrCreate() definitions from earlier won't actually work. These ones will, but we |
270 | // have to do some annoying things inside to regroup the arguments. |
271 | |
272 | private: |
273 | Vector<Row> rows; |
274 | Tuple<Indexes...> indexes; |
275 | |
276 | template <size_t index = 0, bool final = (index >= sizeof...(Indexes))> |
277 | class Impl; |
278 | template <typename Func, typename... Params> |
279 | class FindOrCreateImpl; |
280 | |
281 | template <typename ParamsTuple, typename... Params> |
282 | struct FindOrCreateHack; |
283 | |
284 | void eraseImpl(size_t pos); |
285 | template <typename Collection> |
286 | size_t eraseAllImpl(Collection&& collection); |
287 | }; |
288 | |
289 | template <typename Callbacks> |
290 | class HashIndex; |
291 | // A Table index based on a hash table. |
292 | // |
293 | // This implementation: |
294 | // * Is based on linear probing, not chaining. It is important to use a high-quality hash function. |
295 | // Use the KJ hashing library if possible. |
296 | // * Is limited to tables of 2^30 rows or less, mainly to allow for tighter packing with 32-bit |
297 | // integers instead of 64-bit. |
298 | // * Caches hash codes so that each table row need only be hashed once, and never checks equality |
299 | // unless hash codes have already been determined to be equal. |
300 | // |
301 | // The `Callbacks` type defines how to compute hash codes and equality. It should be defined like: |
302 | // |
303 | // class Callbacks { |
304 | // public: |
305 | // // In this interface, `SearchParams...` means whatever parameters you want to support in |
306 | // // a call to table.find(...). By overloading the calls to support various inputs, you can |
307 | // // affect what table.find(...) accepts. |
308 | // |
309 | // SearchParam& keyForRow(const Row& row); |
310 | // // Given a row of the table, return the SearchParams that might be passed to the other |
311 | // // methods to match this row. |
312 | // |
313 | // bool matches(const Row&, SearchParams&&...) const; |
314 | // // Returns true if the row on the left matches thes search params on the right. |
315 | // |
316 | // uint hashCode(SearchParams&&...) const; |
317 | // // Computes the hash code of the given search params. Matching rows (as determined by |
318 | // // matches()) must have the same hash code. Non-matching rows should have different hash |
319 | // // codes, to the maximum extent possible. Non-matching rows with the same hash code hurt |
320 | // // performance. |
321 | // }; |
322 | // |
323 | // If your `Callbacks` type has dynamic state, you may pass its constructor parameters as the |
324 | // constructor parameters to `HashIndex`. |
325 | |
326 | template <typename Callbacks> |
327 | class TreeIndex; |
328 | // A Table index based on a B-tree. |
329 | // |
330 | // This allows sorted iteration over rows. |
331 | // |
332 | // The `Callbacks` type defines how to compare rows. It should be defined like: |
333 | // |
334 | // class Callbacks { |
335 | // public: |
336 | // // In this interface, `SearchParams...` means whatever parameters you want to support in |
337 | // // a call to table.find(...). By overloading the calls to support various inputs, you can |
338 | // // affect what table.find(...) accepts. |
339 | // |
340 | // SearchParam& keyForRow(const Row& row); |
341 | // // Given a row of the table, return the SearchParams that might be passed to the other |
342 | // // methods to match this row. |
343 | // |
344 | // bool isBefore(const Row&, SearchParams&&...) const; |
345 | // // Returns true if the row on the left comes before the search params on the right. |
346 | // |
347 | // bool matches(const Row&, SearchParams&&...) const; |
348 | // // Returns true if the row "matches" the search params. |
349 | // }; |
350 | |
351 | // ======================================================================================= |
352 | // inline implementation details |
353 | |
354 | namespace _ { // private |
355 | |
356 | KJ_NORETURN(void throwDuplicateTableRow()); |
357 | |
358 | template <typename Dst, typename Src, typename = decltype(instance<Src>().size())> |
359 | inline void tryReserveSize(Dst& dst, Src&& src) { dst.reserve(dst.size() + src.size()); } |
360 | template <typename... Params> |
361 | inline void tryReserveSize(Params&&...) {} |
362 | // If `src` has a `.size()` method, call dst.reserve(dst.size() + src.size()). |
363 | // Otherwise, do nothing. |
364 | |
365 | template <typename Inner, class Mapping> |
366 | class MappedIterator: private Mapping { |
367 | // An iterator that wraps some other iterator and maps the values through a mapping function. |
368 | // The type `Mapping` must define a method `map()` which performs this mapping. |
369 | // |
370 | // TODO(cleanup): This seems generally useful. Should we put it somewhere resuable? |
371 | |
372 | public: |
373 | template <typename... Params> |
374 | MappedIterator(Inner inner, Params&&... params) |
375 | : Mapping(kj::fwd<Params>(params)...), inner(inner) {} |
376 | |
377 | inline auto operator->() const { return &Mapping::map(*inner); } |
378 | inline decltype(auto) operator* () const { return Mapping::map(*inner); } |
379 | inline decltype(auto) operator[](size_t index) const { return Mapping::map(inner[index]); } |
380 | inline MappedIterator& operator++() { ++inner; return *this; } |
381 | inline MappedIterator operator++(int) { return MappedIterator(inner++, *this); } |
382 | inline MappedIterator& operator--() { --inner; return *this; } |
383 | inline MappedIterator operator--(int) { return MappedIterator(inner--, *this); } |
384 | inline MappedIterator& operator+=(ptrdiff_t amount) { inner += amount; return *this; } |
385 | inline MappedIterator& operator-=(ptrdiff_t amount) { inner -= amount; return *this; } |
386 | inline MappedIterator operator+ (ptrdiff_t amount) const { |
387 | return MappedIterator(inner + amount, *this); |
388 | } |
389 | inline MappedIterator operator- (ptrdiff_t amount) const { |
390 | return MappedIterator(inner - amount, *this); |
391 | } |
392 | inline ptrdiff_t operator- (const MappedIterator& other) const { return inner - other.inner; } |
393 | |
394 | inline bool operator==(const MappedIterator& other) const { return inner == other.inner; } |
395 | inline bool operator!=(const MappedIterator& other) const { return inner != other.inner; } |
396 | inline bool operator<=(const MappedIterator& other) const { return inner <= other.inner; } |
397 | inline bool operator>=(const MappedIterator& other) const { return inner >= other.inner; } |
398 | inline bool operator< (const MappedIterator& other) const { return inner < other.inner; } |
399 | inline bool operator> (const MappedIterator& other) const { return inner > other.inner; } |
400 | |
401 | private: |
402 | Inner inner; |
403 | }; |
404 | |
405 | template <typename Inner, typename Mapping> |
406 | class MappedIterable: private Mapping { |
407 | // An iterable that wraps some other iterable and maps the values through a mapping function. |
408 | // The type `Mapping` must define a method `map()` which performs this mapping. |
409 | // |
410 | // TODO(cleanup): This seems generally useful. Should we put it somewhere resuable? |
411 | |
412 | public: |
413 | template <typename... Params> |
414 | MappedIterable(Inner inner, Params&&... params) |
415 | : Mapping(kj::fwd<Params>(params)...), inner(inner) {} |
416 | |
417 | typedef Decay<decltype(instance<Inner>().begin())> InnerIterator; |
418 | typedef MappedIterator<InnerIterator, Mapping> Iterator; |
419 | typedef Decay<decltype(instance<const Inner>().begin())> InnerConstIterator; |
420 | typedef MappedIterator<InnerConstIterator, Mapping> ConstIterator; |
421 | |
422 | inline Iterator begin() { return { inner.begin(), (Mapping&)*this }; } |
423 | inline Iterator end() { return { inner.end(), (Mapping&)*this }; } |
424 | inline ConstIterator begin() const { return { inner.begin(), (const Mapping&)*this }; } |
425 | inline ConstIterator end() const { return { inner.end(), (const Mapping&)*this }; } |
426 | |
427 | private: |
428 | Inner inner; |
429 | }; |
430 | |
431 | template <typename Row> |
432 | class TableMapping { |
433 | public: |
434 | TableMapping(Row* table): table(table) {} |
435 | Row& map(size_t i) const { return table[i]; } |
436 | |
437 | private: |
438 | Row* table; |
439 | }; |
440 | |
441 | template <typename Row> |
442 | class TableUnmapping { |
443 | public: |
444 | TableUnmapping(Row* table): table(table) {} |
445 | size_t map(Row& row) const { return &row - table; } |
446 | size_t map(Row* row) const { return row - table; } |
447 | |
448 | private: |
449 | Row* table; |
450 | }; |
451 | |
452 | } // namespace _ (private) |
453 | |
454 | template <typename Row, typename... Indexes> |
455 | template <size_t index> |
456 | class Table<Row, Indexes...>::Impl<index, false> { |
457 | public: |
458 | static void reserve(Table<Row, Indexes...>& table, size_t size) { |
459 | get<index>(table.indexes).reserve(size); |
460 | Impl<index + 1>::reserve(table, size); |
461 | } |
462 | |
463 | static void clear(Table<Row, Indexes...>& table) { |
464 | get<index>(table.indexes).clear(); |
465 | Impl<index + 1>::clear(table); |
466 | } |
467 | |
468 | static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) { |
469 | if (skip == index) { |
470 | return Impl<index + 1>::insert(table, pos, row, skip); |
471 | } |
472 | auto& indexObj = get<index>(table.indexes); |
473 | KJ_IF_MAYBE(existing, indexObj.insert(table.rows.asPtr(), pos, indexObj.keyForRow(row))) { |
474 | return *existing; |
475 | } |
476 | |
477 | bool success = false; |
478 | KJ_DEFER(if (!success) { |
479 | indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row)); |
480 | }); |
481 | auto result = Impl<index + 1>::insert(table, pos, row, skip); |
482 | success = result == nullptr; |
483 | return result; |
484 | } |
485 | |
486 | static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) { |
487 | auto& indexObj = get<index>(table.indexes); |
488 | indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row)); |
489 | Impl<index + 1>::erase(table, pos, row); |
490 | } |
491 | |
492 | static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) { |
493 | auto& indexObj = get<index>(table.indexes); |
494 | indexObj.move(table.rows.asPtr(), oldPos, newPos, indexObj.keyForRow(row)); |
495 | Impl<index + 1>::move(table, oldPos, newPos, row); |
496 | } |
497 | }; |
498 | |
499 | template <typename Row, typename... Indexes> |
500 | template <size_t index> |
501 | class Table<Row, Indexes...>::Impl<index, true> { |
502 | public: |
503 | static void reserve(Table<Row, Indexes...>& table, size_t size) {} |
504 | static void clear(Table<Row, Indexes...>& table) {} |
505 | static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) { |
506 | return nullptr; |
507 | } |
508 | static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {} |
509 | static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {} |
510 | }; |
511 | |
512 | template <typename Row, typename... Indexes> |
513 | Table<Row, Indexes...>::Table() {} |
514 | |
515 | template <typename Row, typename... Indexes> |
516 | Table<Row, Indexes...>::Table(Indexes&&... indexes) |
517 | : indexes(tuple(kj::fwd<Indexes&&>(indexes)...)) {} |
518 | |
519 | template <typename Row, typename... Indexes> |
520 | void Table<Row, Indexes...>::reserve(size_t size) { |
521 | rows.reserve(size); |
522 | Impl<>::reserve(*this, size); |
523 | } |
524 | |
525 | template <typename Row, typename... Indexes> |
526 | size_t Table<Row, Indexes...>::size() const { |
527 | return rows.size(); |
528 | } |
529 | template <typename Row, typename... Indexes> |
530 | void Table<Row, Indexes...>::clear() { |
531 | Impl<>::clear(*this); |
532 | rows.clear(); |
533 | } |
534 | template <typename Row, typename... Indexes> |
535 | size_t Table<Row, Indexes...>::capacity() const { |
536 | return rows.capacity(); |
537 | } |
538 | |
539 | template <typename Row, typename... Indexes> |
540 | Row* Table<Row, Indexes...>::begin() { |
541 | return rows.begin(); |
542 | } |
543 | template <typename Row, typename... Indexes> |
544 | Row* Table<Row, Indexes...>::end() { |
545 | return rows.end(); |
546 | } |
547 | template <typename Row, typename... Indexes> |
548 | const Row* Table<Row, Indexes...>::begin() const { |
549 | return rows.begin(); |
550 | } |
551 | template <typename Row, typename... Indexes> |
552 | const Row* Table<Row, Indexes...>::end() const { |
553 | return rows.end(); |
554 | } |
555 | |
556 | template <typename Row, typename... Indexes> |
557 | Row& Table<Row, Indexes...>::insert(Row&& row) { |
558 | KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) { |
559 | _::throwDuplicateTableRow(); |
560 | } else { |
561 | return rows.add(kj::mv(row)); |
562 | } |
563 | } |
564 | template <typename Row, typename... Indexes> |
565 | Row& Table<Row, Indexes...>::insert(const Row& row) { |
566 | return insert(kj::cp(row)); |
567 | } |
568 | |
569 | template <typename Row, typename... Indexes> |
570 | template <typename Collection> |
571 | void Table<Row, Indexes...>::insertAll(Collection&& collection) { |
572 | _::tryReserveSize(*this, collection); |
573 | for (auto& row: collection) { |
574 | insert(kj::mv(row)); |
575 | } |
576 | } |
577 | |
578 | template <typename Row, typename... Indexes> |
579 | template <typename Collection> |
580 | void Table<Row, Indexes...>::insertAll(Collection& collection) { |
581 | _::tryReserveSize(*this, collection); |
582 | for (auto& row: collection) { |
583 | insert(row); |
584 | } |
585 | } |
586 | |
587 | template <typename Row, typename... Indexes> |
588 | template <typename UpdateFunc> |
589 | Row& Table<Row, Indexes...>::upsert(Row&& row, UpdateFunc&& update) { |
590 | KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) { |
591 | update(rows[*existing], kj::mv(row)); |
592 | return rows[*existing]; |
593 | } else { |
594 | return rows.add(kj::mv(row)); |
595 | } |
596 | } |
597 | template <typename Row, typename... Indexes> |
598 | template <typename UpdateFunc> |
599 | Row& Table<Row, Indexes...>::upsert(const Row& row, UpdateFunc&& update) { |
600 | return upsert(kj::cp(row), kj::fwd<UpdateFunc>(update)); |
601 | } |
602 | |
603 | template <typename Row, typename... Indexes> |
604 | template <typename Index, typename... Params> |
605 | kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) { |
606 | return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
607 | } |
608 | template <typename Row, typename... Indexes> |
609 | template <size_t index, typename... Params> |
610 | kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) { |
611 | KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { |
612 | return rows[*pos]; |
613 | } else { |
614 | return nullptr; |
615 | } |
616 | } |
617 | template <typename Row, typename... Indexes> |
618 | template <typename Index, typename... Params> |
619 | kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const { |
620 | return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
621 | } |
622 | template <typename Row, typename... Indexes> |
623 | template <size_t index, typename... Params> |
624 | kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const { |
625 | KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { |
626 | return rows[*pos]; |
627 | } else { |
628 | return nullptr; |
629 | } |
630 | } |
631 | |
632 | template <typename Row, typename... Indexes> |
633 | template <typename Func, typename... Params> |
634 | class Table<Row, Indexes...>::FindOrCreateImpl { |
635 | public: |
636 | template <size_t index> |
637 | static Row& apply(Table<Row, Indexes...>& table, Params&&... params, Func&& createFunc) { |
638 | auto pos = table.rows.size(); |
639 | KJ_IF_MAYBE(existing, get<index>(table.indexes).insert(table.rows.asPtr(), pos, params...)) { |
640 | return table.rows[*existing]; |
641 | } else { |
642 | bool success = false; |
643 | auto& newRow = table.rows.add(createFunc()); |
644 | KJ_DEFER({ |
645 | if (!success) { |
646 | table.rows.removeLast(); |
647 | get<index>(table.indexes).erase(table.rows.asPtr(), pos, params...); |
648 | } |
649 | }); |
650 | if (Table<Row, Indexes...>::template Impl<>::insert(table, pos, newRow, index) == nullptr) { |
651 | success = true; |
652 | } else { |
653 | _::throwDuplicateTableRow(); |
654 | } |
655 | return newRow; |
656 | } |
657 | } |
658 | }; |
659 | |
660 | template <typename Row, typename... Indexes> |
661 | template <typename... T, typename U, typename V, typename... W> |
662 | struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U, V, W...> |
663 | : public FindOrCreateHack<_::Tuple<T..., U>, V, W...> {}; |
664 | template <typename Row, typename... Indexes> |
665 | template <typename... T, typename U> |
666 | struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U> |
667 | : public FindOrCreateImpl<U, T...> {}; |
668 | // This awful hack works around C++'s lack of support for parameter packs anywhere other than at |
669 | // the end of an argument list. We accumulate all of the types except for the last one into a |
670 | // Tuple, then forward to FindOrCreateImpl with the last parameter as the Func. |
671 | |
672 | template <typename Row, typename... Indexes> |
673 | template <typename Index, typename First, typename... Rest> |
674 | Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) { |
675 | return findOrCreate<indexOfType<Index, Tuple<Indexes...>>()>( |
676 | kj::fwd<First>(first), kj::fwd<Rest>(rest)...); |
677 | } |
678 | template <typename Row, typename... Indexes> |
679 | template <size_t index, typename First, typename... Rest> |
680 | Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) { |
681 | return FindOrCreateHack<_::Tuple<>, First, Rest...>::template apply<index>( |
682 | *this, kj::fwd<First>(first), kj::fwd<Rest>(rest)...); |
683 | } |
684 | |
685 | template <typename Row, typename... Indexes> |
686 | template <typename Index, typename... Params> |
687 | auto Table<Row, Indexes...>::range(Params&&... params) { |
688 | return range<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
689 | } |
690 | template <typename Row, typename... Indexes> |
691 | template <size_t index, typename... Params> |
692 | auto Table<Row, Indexes...>::range(Params&&... params) { |
693 | auto inner = get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...); |
694 | return _::TableIterable<Row, decltype(inner)>(kj::mv(inner), rows.begin()); |
695 | } |
696 | template <typename Row, typename... Indexes> |
697 | template <typename Index, typename... Params> |
698 | auto Table<Row, Indexes...>::range(Params&&... params) const { |
699 | return range<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
700 | } |
701 | template <typename Row, typename... Indexes> |
702 | template <size_t index, typename... Params> |
703 | auto Table<Row, Indexes...>::range(Params&&... params) const { |
704 | auto inner = get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...); |
705 | return _::TableIterable<const Row, decltype(inner)>(kj::mv(inner), rows.begin()); |
706 | } |
707 | |
708 | template <typename Row, typename... Indexes> |
709 | template <typename Index> |
710 | _::TableIterable<Row, Index&> Table<Row, Indexes...>::ordered() { |
711 | return ordered<indexOfType<Index, Tuple<Indexes...>>()>(); |
712 | } |
713 | template <typename Row, typename... Indexes> |
714 | template <size_t index> |
715 | _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> Table<Row, Indexes...>::ordered() { |
716 | return { get<index>(indexes), rows.begin() }; |
717 | } |
718 | template <typename Row, typename... Indexes> |
719 | template <typename Index> |
720 | _::TableIterable<const Row, const Index&> Table<Row, Indexes...>::ordered() const { |
721 | return ordered<indexOfType<Index, Tuple<Indexes...>>()>(); |
722 | } |
723 | template <typename Row, typename... Indexes> |
724 | template <size_t index> |
725 | _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> |
726 | Table<Row, Indexes...>::ordered() const { |
727 | return { get<index>(indexes), rows.begin() }; |
728 | } |
729 | |
730 | template <typename Row, typename... Indexes> |
731 | template <typename Index, typename... Params> |
732 | bool Table<Row, Indexes...>::eraseMatch(Params&&... params) { |
733 | return eraseMatch<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
734 | } |
735 | template <typename Row, typename... Indexes> |
736 | template <size_t index, typename... Params> |
737 | bool Table<Row, Indexes...>::eraseMatch(Params&&... params) { |
738 | KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) { |
739 | eraseImpl(*pos); |
740 | return true; |
741 | } else { |
742 | return false; |
743 | } |
744 | } |
745 | |
746 | template <typename Row, typename... Indexes> |
747 | template <typename Index, typename... Params> |
748 | size_t Table<Row, Indexes...>::eraseRange(Params&&... params) { |
749 | return eraseRange<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...); |
750 | } |
751 | template <typename Row, typename... Indexes> |
752 | template <size_t index, typename... Params> |
753 | size_t Table<Row, Indexes...>::eraseRange(Params&&... params) { |
754 | return eraseAllImpl(get<index>(indexes).range(rows.asPtr(), kj::fwd<Params>(params)...)); |
755 | } |
756 | |
757 | template <typename Row, typename... Indexes> |
758 | template <size_t index> |
759 | void Table<Row, Indexes...>::verify() { |
760 | get<index>(indexes).verify(rows.asPtr()); |
761 | } |
762 | |
763 | template <typename Row, typename... Indexes> |
764 | void Table<Row, Indexes...>::erase(Row& row) { |
765 | KJ_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table" ); |
766 | eraseImpl(&row - rows.begin()); |
767 | } |
768 | template <typename Row, typename... Indexes> |
769 | void Table<Row, Indexes...>::eraseImpl(size_t pos) { |
770 | Impl<>::erase(*this, pos, rows[pos]); |
771 | size_t back = rows.size() - 1; |
772 | if (pos != back) { |
773 | Impl<>::move(*this, back, pos, rows[back]); |
774 | rows[pos] = kj::mv(rows[back]); |
775 | } |
776 | rows.removeLast(); |
777 | } |
778 | |
779 | template <typename Row, typename... Indexes> |
780 | Row Table<Row, Indexes...>::release(Row& row) { |
781 | KJ_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table" ); |
782 | size_t pos = &row - rows.begin(); |
783 | Impl<>::erase(*this, pos, row); |
784 | Row result = kj::mv(row); |
785 | size_t back = rows.size() - 1; |
786 | if (pos != back) { |
787 | Impl<>::move(*this, back, pos, rows[back]); |
788 | row = kj::mv(rows[back]); |
789 | } |
790 | rows.removeLast(); |
791 | return result; |
792 | } |
793 | |
794 | template <typename Row, typename... Indexes> |
795 | template <typename Predicate, typename> |
796 | size_t Table<Row, Indexes...>::eraseAll(Predicate&& predicate) { |
797 | size_t count = 0; |
798 | for (size_t i = 0; i < rows.size();) { |
799 | if (predicate(rows[i])) { |
800 | eraseImpl(i); |
801 | ++count; |
802 | // eraseImpl() replaces the erased row with the last row, so don't increment i here; repeat |
803 | // with the same i. |
804 | } else { |
805 | ++i; |
806 | } |
807 | } |
808 | return count; |
809 | } |
810 | |
811 | template <typename Row, typename... Indexes> |
812 | template <typename Collection, typename, bool> |
813 | size_t Table<Row, Indexes...>::eraseAll(Collection&& collection) { |
814 | return eraseAllImpl(_::MappedIterable<Collection&, _::TableUnmapping<Row>>( |
815 | collection, rows.begin())); |
816 | } |
817 | |
818 | template <typename Row, typename... Indexes> |
819 | template <typename Collection> |
820 | size_t Table<Row, Indexes...>::eraseAllImpl(Collection&& collection) { |
821 | // We need to transform the collection of row numbers into a sequence of erasures, accounting |
822 | // for the fact that each erasure re-positions the last row into its slot. |
823 | Vector<size_t> erased; |
824 | _::tryReserveSize(erased, collection); |
825 | for (size_t pos: collection) { |
826 | while (pos >= rows.size() - erased.size()) { |
827 | // Oops, the next item to be erased is already scheduled to be moved to a different location |
828 | // due to a previous erasure. Figure out where it will be at this point. |
829 | size_t erasureNumber = rows.size() - pos - 1; |
830 | pos = erased[erasureNumber]; |
831 | } |
832 | erased.add(pos); |
833 | } |
834 | |
835 | // Now we can execute the sequence of erasures. |
836 | for (size_t pos: erased) { |
837 | eraseImpl(pos); |
838 | } |
839 | |
840 | return erased.size(); |
841 | } |
842 | |
843 | // ----------------------------------------------------------------------------- |
844 | // Hash table index |
845 | |
846 | namespace _ { // private |
847 | |
848 | void logHashTableInconsistency(); |
849 | |
850 | struct HashBucket { |
851 | uint hash; |
852 | uint value; |
853 | |
854 | HashBucket() = default; |
855 | HashBucket(uint hash, uint pos) |
856 | : hash(hash), value(pos + 2) {} |
857 | |
858 | inline bool isEmpty() const { return value == 0; } |
859 | inline bool isErased() const { return value == 1; } |
860 | inline bool isOccupied() const { return value >= 2; } |
861 | template <typename Row> |
862 | inline Row& getRow(ArrayPtr<Row> table) const { return table[getPos()]; } |
863 | template <typename Row> |
864 | inline const Row& getRow(ArrayPtr<const Row> table) const { return table[getPos()]; } |
865 | inline bool isPos(uint pos) const { return pos + 2 == value; } |
866 | inline uint getPos() const { |
867 | KJ_IASSERT(value >= 2); |
868 | return value - 2; |
869 | } |
870 | inline void setEmpty() { value = 0; } |
871 | inline void setErased() { value = 1; } |
872 | inline void setPos(uint pos) { value = pos + 2; } |
873 | }; |
874 | |
875 | inline size_t probeHash(const kj::Array<HashBucket>& buckets, size_t i) { |
876 | // TODO(perf): Is linear probing OK or should we do something fancier? |
877 | if (++i == buckets.size()) { |
878 | return 0; |
879 | } else { |
880 | return i; |
881 | } |
882 | } |
883 | |
884 | kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize); |
885 | |
886 | uint chooseBucket(uint hash, uint count); |
887 | |
888 | } // namespace _ (private) |
889 | |
890 | template <typename Callbacks> |
891 | class HashIndex { |
892 | public: |
893 | HashIndex() KJ_DEFAULT_CONSTRUCTOR_VS2015_BUGGY |
894 | template <typename... Params> |
895 | HashIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {} |
896 | |
897 | void reserve(size_t size) { |
898 | if (buckets.size() < size * 2) { |
899 | rehash(size); |
900 | } |
901 | } |
902 | |
903 | void clear() { |
904 | erasedCount = 0; |
905 | memset(buckets.begin(), 0, buckets.asBytes().size()); |
906 | } |
907 | |
908 | template <typename Row> |
909 | decltype(auto) keyForRow(Row&& row) const { |
910 | return cb.keyForRow(kj::fwd<Row>(row)); |
911 | } |
912 | |
913 | template <typename Row, typename... Params> |
914 | kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { |
915 | if (buckets.size() * 2 < (table.size() + 1 + erasedCount) * 3) { |
916 | // Load factor is more than 2/3, let's rehash. |
917 | rehash(kj::max(buckets.size() * 2, (table.size() + 1) * 2)); |
918 | } |
919 | |
920 | uint hashCode = cb.hashCode(params...); |
921 | Maybe<_::HashBucket&> erasedSlot; |
922 | for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { |
923 | auto& bucket = buckets[i]; |
924 | if (bucket.isEmpty()) { |
925 | // no duplicates found |
926 | KJ_IF_MAYBE(s, erasedSlot) { |
927 | --erasedCount; |
928 | *s = { hashCode, uint(pos) }; |
929 | } else { |
930 | bucket = { hashCode, uint(pos) }; |
931 | } |
932 | return nullptr; |
933 | } else if (bucket.isErased()) { |
934 | // We can fill in the erased slot. However, we have to keep searching to make sure there |
935 | // are no duplicates before we do that. |
936 | if (erasedSlot == nullptr) { |
937 | erasedSlot = bucket; |
938 | } |
939 | } else if (bucket.hash == hashCode && |
940 | cb.matches(bucket.getRow(table), params...)) { |
941 | // duplicate row |
942 | return size_t(bucket.getPos()); |
943 | } |
944 | } |
945 | } |
946 | |
947 | template <typename Row, typename... Params> |
948 | void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { |
949 | uint hashCode = cb.hashCode(params...); |
950 | for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { |
951 | auto& bucket = buckets[i]; |
952 | if (bucket.isPos(pos)) { |
953 | // found it |
954 | ++erasedCount; |
955 | bucket.setErased(); |
956 | return; |
957 | } else if (bucket.isEmpty()) { |
958 | // can't find the bucket, something is very wrong |
959 | _::logHashTableInconsistency(); |
960 | return; |
961 | } |
962 | } |
963 | } |
964 | |
965 | template <typename Row, typename... Params> |
966 | void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) { |
967 | uint hashCode = cb.hashCode(params...); |
968 | for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { |
969 | auto& bucket = buckets[i]; |
970 | if (bucket.isPos(oldPos)) { |
971 | // found it |
972 | bucket.setPos(newPos); |
973 | return; |
974 | } else if (bucket.isEmpty()) { |
975 | // can't find the bucket, something is very wrong |
976 | _::logHashTableInconsistency(); |
977 | return; |
978 | } |
979 | } |
980 | } |
981 | |
982 | template <typename Row, typename... Params> |
983 | Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const { |
984 | if (buckets.size() == 0) return nullptr; |
985 | |
986 | uint hashCode = cb.hashCode(params...); |
987 | for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) { |
988 | auto& bucket = buckets[i]; |
989 | if (bucket.isEmpty()) { |
990 | // not found. |
991 | return nullptr; |
992 | } else if (bucket.isErased()) { |
993 | // skip, keep searching |
994 | } else if (bucket.hash == hashCode && |
995 | cb.matches(bucket.getRow(table), params...)) { |
996 | // found |
997 | return size_t(bucket.getPos()); |
998 | } |
999 | } |
1000 | } |
1001 | |
1002 | // No begin() nor end() because hash tables are not usefully ordered. |
1003 | |
1004 | private: |
1005 | Callbacks cb; |
1006 | size_t erasedCount = 0; |
1007 | Array<_::HashBucket> buckets; |
1008 | |
1009 | void rehash(size_t targetSize) { |
1010 | buckets = _::rehash(buckets, targetSize); |
1011 | } |
1012 | }; |
1013 | |
1014 | // ----------------------------------------------------------------------------- |
1015 | // BTree index |
1016 | |
1017 | namespace _ { // private |
1018 | |
1019 | KJ_ALWAYS_INLINE(void compilerBarrier()); |
1020 | void compilerBarrier() { |
1021 | // Make sure that reads occurring before this call cannot be re-ordered to happen after |
1022 | // writes that occur after this call. We need this in a couple places below to prevent C++ |
1023 | // strict aliasing rules from breaking things. |
1024 | #if _MSC_VER |
1025 | _ReadWriteBarrier(); |
1026 | #else |
1027 | __asm__ __volatile__("" : : :"memory" ); |
1028 | #endif |
1029 | } |
1030 | |
1031 | template <typename T> |
1032 | inline void acopy(T* to, T* from, size_t size) { memcpy(to, from, size * sizeof(T)); } |
1033 | template <typename T> |
1034 | inline void amove(T* to, T* from, size_t size) { memmove(to, from, size * sizeof(T)); } |
1035 | template <typename T> |
1036 | inline void azero(T* ptr, size_t size) { memset(ptr, 0, size * sizeof(T)); } |
1037 | // memcpy/memmove/memset variants that count size in elements, not bytes. |
1038 | // |
1039 | // TODO(cleanup): These are generally useful, put them somewhere. |
1040 | |
1041 | class BTreeImpl { |
1042 | public: |
1043 | class Iterator; |
1044 | class MaybeUint; |
1045 | struct NodeUnion; |
1046 | struct Leaf; |
1047 | struct Parent; |
1048 | struct Freelisted; |
1049 | |
1050 | class SearchKey { |
1051 | // Passed to methods that need to search the tree. This class allows most of the B-tree |
1052 | // implementation to be kept out of templates, avoiding code bloat, at the cost of some |
1053 | // performance trade-off. In order to lessen the performance cost of virtual calls, we design |
1054 | // this interface so that it only needs to be called once per tree node, rather than once per |
1055 | // comparison. |
1056 | |
1057 | public: |
1058 | virtual uint search(const Parent& parent) const = 0; |
1059 | virtual uint search(const Leaf& leaf) const = 0; |
1060 | // Binary search for the first key/row in the parent/leaf that is equal to or comes after the |
1061 | // search key. |
1062 | |
1063 | virtual bool isAfter(uint rowIndex) const = 0; |
1064 | // Returns true if the key comes after the value in the given row. |
1065 | }; |
1066 | |
1067 | BTreeImpl(); |
1068 | ~BTreeImpl() noexcept(false); |
1069 | |
1070 | void logInconsistency() const; |
1071 | |
1072 | void reserve(size_t size); |
1073 | |
1074 | void clear(); |
1075 | |
1076 | Iterator begin() const; |
1077 | Iterator end() const; |
1078 | |
1079 | Iterator search(const SearchKey& searchKey) const; |
1080 | // Find the "first" row (in sorted order) for which searchKey.isAfter(rowNumber) returns true. |
1081 | |
1082 | Iterator insert(const SearchKey& searchKey); |
1083 | // Like search() but ensures that there is room in the leaf node to insert a new row. |
1084 | |
1085 | void erase(uint row, const SearchKey& searchKey); |
1086 | // Erase the given row number from the tree. searchKey.isAfter() returns true for the given row |
1087 | // and all rows after it. |
1088 | |
1089 | void renumber(uint oldRow, uint newRow, const SearchKey& searchKey); |
1090 | // Renumber the given row from oldRow to newRow. searchKey.isAfter() returns true for oldRow and |
1091 | // all rows after it. (It will not be called on newRow.) |
1092 | |
1093 | void verify(size_t size, FunctionParam<bool(uint, uint)>); |
1094 | |
1095 | private: |
1096 | NodeUnion* tree; // allocated with aligned_alloc aligned to cache lines |
1097 | uint treeCapacity; |
1098 | uint height; // height of *parent* tree -- does not include the leaf level |
1099 | uint freelistHead; |
1100 | uint freelistSize; |
1101 | uint beginLeaf; |
1102 | uint endLeaf; |
1103 | void growTree(uint minCapacity = 0); |
1104 | |
1105 | template <typename T> |
1106 | struct AllocResult; |
1107 | |
1108 | template <typename T> |
1109 | inline AllocResult<T> alloc(); |
1110 | inline void free(uint pos); |
1111 | |
1112 | inline uint split(Parent& src, uint srcPos, Parent& dst, uint dstPos); |
1113 | inline uint split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos); |
1114 | inline void merge(Parent& dst, uint dstPos, uint pivot, Parent& src); |
1115 | inline void merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src); |
1116 | inline void move(Parent& dst, uint dstPos, Parent& src); |
1117 | inline void move(Leaf& dst, uint dstPos, Leaf& src); |
1118 | inline void rotateLeft( |
1119 | Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup); |
1120 | inline void rotateLeft( |
1121 | Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup); |
1122 | inline void rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent); |
1123 | inline void rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent); |
1124 | |
1125 | template <typename Node> |
1126 | inline Node& insertHelper(const SearchKey& searchKey, |
1127 | Node& node, Parent* parent, uint indexInParent, uint pos); |
1128 | |
1129 | template <typename Node> |
1130 | inline Node& eraseHelper( |
1131 | Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup); |
1132 | |
1133 | size_t verifyNode(size_t size, FunctionParam<bool(uint, uint)>&, |
1134 | uint pos, uint height, MaybeUint maxRow); |
1135 | |
1136 | static const NodeUnion EMPTY_NODE; |
1137 | }; |
1138 | |
1139 | class BTreeImpl::MaybeUint { |
1140 | // A nullable uint, using the value zero to mean null and shifting all other values up by 1. |
1141 | public: |
1142 | MaybeUint() = default; |
1143 | inline MaybeUint(uint i): i(i - 1) {} |
1144 | inline MaybeUint(decltype(nullptr)): i(0) {} |
1145 | |
1146 | inline bool operator==(decltype(nullptr)) const { return i == 0; } |
1147 | inline bool operator==(uint j) const { return i == j + 1; } |
1148 | inline bool operator==(const MaybeUint& other) const { return i == other.i; } |
1149 | inline bool operator!=(decltype(nullptr)) const { return i != 0; } |
1150 | inline bool operator!=(uint j) const { return i != j + 1; } |
1151 | inline bool operator!=(const MaybeUint& other) const { return i != other.i; } |
1152 | |
1153 | inline MaybeUint& operator=(decltype(nullptr)) { i = 0; return *this; } |
1154 | inline MaybeUint& operator=(uint j) { i = j + 1; return *this; } |
1155 | |
1156 | inline uint operator*() const { KJ_IREQUIRE(i != 0); return i - 1; } |
1157 | |
1158 | template <typename Func> |
1159 | inline bool check(Func& func) const { return i != 0 && func(i - 1); } |
1160 | // Equivalent to *this != nullptr && func(**this) |
1161 | |
1162 | private: |
1163 | uint i; |
1164 | }; |
1165 | |
1166 | struct BTreeImpl::Leaf { |
1167 | uint next; |
1168 | uint prev; |
1169 | // Pointers to next and previous nodes at the same level, used for fast iteration. |
1170 | |
1171 | static constexpr size_t NROWS = 14; |
1172 | MaybeUint rows[NROWS]; |
1173 | // Pointers to table rows, offset by 1 so that 0 is an empty value. |
1174 | |
1175 | inline bool isFull() const; |
1176 | inline bool isMostlyFull() const; |
1177 | inline bool isHalfFull() const; |
1178 | |
1179 | inline void insert(uint i, uint newRow) { |
1180 | KJ_IREQUIRE(rows[Leaf::NROWS - 1] == nullptr); // check not full |
1181 | |
1182 | amove(rows + i + 1, rows + i, Leaf::NROWS - (i + 1)); |
1183 | rows[i] = newRow; |
1184 | } |
1185 | |
1186 | inline void erase(uint i) { |
1187 | KJ_IREQUIRE(rows[0] != nullptr); // check not empty |
1188 | |
1189 | amove(rows + i, rows + i + 1, Leaf::NROWS - (i + 1)); |
1190 | rows[Leaf::NROWS - 1] = nullptr; |
1191 | } |
1192 | |
1193 | inline uint size() const { |
1194 | static_assert(Leaf::NROWS == 14, "logic here needs updating" ); |
1195 | |
1196 | // Binary search for first empty element in `rows`, or return 14 if no empty elements. We do |
1197 | // this in a branch-free manner. Since there are 15 possible results (0 through 14, inclusive), |
1198 | // this isn't a perfectly balanced binary search. We carefully choose the split points so that |
1199 | // there's no way we'll try to dereference row[14] or later (which would be a buffer overflow). |
1200 | uint i = (rows[6] != nullptr) * 7; |
1201 | i += (rows[i + 3] != nullptr) * 4; |
1202 | i += (rows[i + 1] != nullptr) * 2; |
1203 | i += (rows[i ] != nullptr); |
1204 | return i; |
1205 | } |
1206 | |
1207 | template <typename Func> |
1208 | inline uint binarySearch(Func& predicate) const { |
1209 | // Binary search to find first row for which predicate(row) is false. |
1210 | |
1211 | static_assert(Leaf::NROWS == 14, "logic here needs updating" ); |
1212 | |
1213 | // See comments in size(). |
1214 | uint i = (rows[6].check(predicate)) * 7; |
1215 | i += (rows[i + 3].check(predicate)) * 4; |
1216 | i += (rows[i + 1].check(predicate)) * 2; |
1217 | if (i != 6) { // don't redundantly check row 6 |
1218 | i += (rows[i ].check(predicate)); |
1219 | } |
1220 | return i; |
1221 | } |
1222 | }; |
1223 | |
1224 | struct BTreeImpl::Parent { |
1225 | uint unused; |
1226 | // Not used. May be arbitrarily non-zero due to overlap with Freelisted::nextOffset. |
1227 | |
1228 | static constexpr size_t NKEYS = 7; |
1229 | MaybeUint keys[NKEYS]; |
1230 | // Pointers to table rows, offset by 1 so that 0 is an empty value. |
1231 | |
1232 | static constexpr size_t NCHILDREN = NKEYS + 1; |
1233 | uint children[NCHILDREN]; |
1234 | // Pointers to children. Not offset because the root is always at position 0, and a pointer |
1235 | // to the root would be nonsensical. |
1236 | |
1237 | inline bool isFull() const; |
1238 | inline bool isMostlyFull() const; |
1239 | inline bool isHalfFull() const; |
1240 | inline void initRoot(uint key, uint leftChild, uint rightChild); |
1241 | inline void insertAfter(uint i, uint splitKey, uint child); |
1242 | inline void eraseAfter(uint i); |
1243 | |
1244 | inline uint keyCount() const { |
1245 | static_assert(Parent::NKEYS == 7, "logic here needs updating" ); |
1246 | |
1247 | // Binary search for first empty element in `keys`, or return 7 if no empty elements. We do |
1248 | // this in a branch-free manner. Since there are 8 possible results (0 through 7, inclusive), |
1249 | // this is a perfectly balanced binary search. |
1250 | uint i = (keys[3] != nullptr) * 4; |
1251 | i += (keys[i + 1] != nullptr) * 2; |
1252 | i += (keys[i ] != nullptr); |
1253 | return i; |
1254 | } |
1255 | |
1256 | template <typename Func> |
1257 | inline uint binarySearch(Func& predicate) const { |
1258 | // Binary search to find first key for which predicate(key) is false. |
1259 | |
1260 | static_assert(Parent::NKEYS == 7, "logic here needs updating" ); |
1261 | |
1262 | // See comments in size(). |
1263 | uint i = (keys[3].check(predicate)) * 4; |
1264 | i += (keys[i + 1].check(predicate)) * 2; |
1265 | i += (keys[i ].check(predicate)); |
1266 | return i; |
1267 | } |
1268 | }; |
1269 | |
1270 | struct BTreeImpl::Freelisted { |
1271 | int nextOffset; |
1272 | // The next node in the freelist is at: this + 1 + nextOffset |
1273 | // |
1274 | // Hence, newly-allocated space can initialize this to zero. |
1275 | |
1276 | uint zero[15]; |
1277 | // Freelisted entries are always zero'd. |
1278 | }; |
1279 | |
1280 | struct BTreeImpl::NodeUnion { |
1281 | union { |
1282 | Freelisted freelist; |
1283 | // If this node is in the freelist. |
1284 | |
1285 | Leaf leaf; |
1286 | // If this node is a leaf. |
1287 | |
1288 | Parent parent; |
1289 | // If this node is not a leaf. |
1290 | }; |
1291 | |
1292 | inline operator Leaf&() { return leaf; } |
1293 | inline operator Parent&() { return parent; } |
1294 | inline operator const Leaf&() const { return leaf; } |
1295 | inline operator const Parent&() const { return parent; } |
1296 | }; |
1297 | |
1298 | static_assert(sizeof(BTreeImpl::Parent) == 64, |
1299 | "BTreeImpl::Parent should be optimized to fit a cache line" ); |
1300 | static_assert(sizeof(BTreeImpl::Leaf) == 64, |
1301 | "BTreeImpl::Leaf should be optimized to fit a cache line" ); |
1302 | static_assert(sizeof(BTreeImpl::Freelisted) == 64, |
1303 | "BTreeImpl::Freelisted should be optimized to fit a cache line" ); |
1304 | static_assert(sizeof(BTreeImpl::NodeUnion) == 64, |
1305 | "BTreeImpl::NodeUnion should be optimized to fit a cache line" ); |
1306 | |
1307 | bool BTreeImpl::Leaf::isFull() const { |
1308 | return rows[Leaf::NROWS - 1] != nullptr; |
1309 | } |
1310 | bool BTreeImpl::Leaf::isMostlyFull() const { |
1311 | return rows[Leaf::NROWS / 2] != nullptr; |
1312 | } |
1313 | bool BTreeImpl::Leaf::isHalfFull() const { |
1314 | KJ_IASSERT(rows[Leaf::NROWS / 2 - 1] != nullptr); |
1315 | return rows[Leaf::NROWS / 2] == nullptr; |
1316 | } |
1317 | |
1318 | bool BTreeImpl::Parent::isFull() const { |
1319 | return keys[Parent::NKEYS - 1] != nullptr; |
1320 | } |
1321 | bool BTreeImpl::Parent::isMostlyFull() const { |
1322 | return keys[Parent::NKEYS / 2] != nullptr; |
1323 | } |
1324 | bool BTreeImpl::Parent::isHalfFull() const { |
1325 | KJ_IASSERT(keys[Parent::NKEYS / 2 - 1] != nullptr); |
1326 | return keys[Parent::NKEYS / 2] == nullptr; |
1327 | } |
1328 | |
1329 | class BTreeImpl::Iterator { |
1330 | public: |
1331 | Iterator(const NodeUnion* tree, const Leaf* leaf, uint row) |
1332 | : tree(tree), leaf(leaf), row(row) {} |
1333 | |
1334 | size_t operator*() const { |
1335 | KJ_IREQUIRE(row < Leaf::NROWS && leaf->rows[row] != nullptr, |
1336 | "tried to dereference end() iterator" ); |
1337 | return *leaf->rows[row]; |
1338 | } |
1339 | |
1340 | inline Iterator& operator++() { |
1341 | KJ_IREQUIRE(leaf->rows[row] != nullptr, "B-tree iterator overflow" ); |
1342 | ++row; |
1343 | if (row >= Leaf::NROWS || leaf->rows[row] == nullptr) { |
1344 | if (leaf->next == 0) { |
1345 | // at end; stay on current leaf |
1346 | } else { |
1347 | leaf = &tree[leaf->next].leaf; |
1348 | row = 0; |
1349 | } |
1350 | } |
1351 | return *this; |
1352 | } |
1353 | inline Iterator operator++(int) { |
1354 | Iterator other = *this; |
1355 | ++*this; |
1356 | return other; |
1357 | } |
1358 | |
1359 | inline Iterator& operator--() { |
1360 | if (row == 0) { |
1361 | KJ_IREQUIRE(leaf->prev != 0, "B-tree iterator underflow" ); |
1362 | leaf = &tree[leaf->prev].leaf; |
1363 | row = leaf->size() - 1; |
1364 | } else { |
1365 | --row; |
1366 | } |
1367 | return *this; |
1368 | } |
1369 | inline Iterator operator--(int) { |
1370 | Iterator other = *this; |
1371 | --*this; |
1372 | return other; |
1373 | } |
1374 | |
1375 | inline bool operator==(const Iterator& other) const { |
1376 | return leaf == other.leaf && row == other.row; |
1377 | } |
1378 | inline bool operator!=(const Iterator& other) const { |
1379 | return leaf != other.leaf || row != other.row; |
1380 | } |
1381 | |
1382 | bool isEnd() { |
1383 | return row == Leaf::NROWS || leaf->rows[row] == nullptr; |
1384 | } |
1385 | |
1386 | void insert(BTreeImpl& impl, uint newRow) { |
1387 | KJ_IASSERT(impl.tree == tree); |
1388 | const_cast<Leaf*>(leaf)->insert(row, newRow); |
1389 | } |
1390 | |
1391 | void erase(BTreeImpl& impl) { |
1392 | KJ_IASSERT(impl.tree == tree); |
1393 | const_cast<Leaf*>(leaf)->erase(row); |
1394 | } |
1395 | |
1396 | void replace(BTreeImpl& impl, uint newRow) { |
1397 | KJ_IASSERT(impl.tree == tree); |
1398 | const_cast<Leaf*>(leaf)->rows[row] = newRow; |
1399 | } |
1400 | |
1401 | private: |
1402 | const NodeUnion* tree; |
1403 | const Leaf* leaf; |
1404 | uint row; |
1405 | }; |
1406 | |
1407 | template <typename Iterator> |
1408 | class IterRange { |
1409 | public: |
1410 | inline IterRange(Iterator b, Iterator e): b(b), e(e) {} |
1411 | |
1412 | inline Iterator begin() const { return b; } |
1413 | inline Iterator end() const { return e; } |
1414 | private: |
1415 | Iterator b; |
1416 | Iterator e; |
1417 | }; |
1418 | |
1419 | template <typename Iterator> |
1420 | inline IterRange<Decay<Iterator>> iterRange(Iterator b, Iterator e) { |
1421 | return { b, e }; |
1422 | } |
1423 | |
1424 | inline BTreeImpl::Iterator BTreeImpl::begin() const { |
1425 | return { tree, &tree[beginLeaf].leaf, 0 }; |
1426 | } |
1427 | inline BTreeImpl::Iterator BTreeImpl::end() const { |
1428 | auto& leaf = tree[endLeaf].leaf; |
1429 | return { tree, &leaf, leaf.size() }; |
1430 | } |
1431 | |
1432 | } // namespace _ (private) |
1433 | |
1434 | template <typename Callbacks> |
1435 | class TreeIndex { |
1436 | public: |
1437 | TreeIndex() KJ_DEFAULT_CONSTRUCTOR_VS2015_BUGGY |
1438 | template <typename... Params> |
1439 | TreeIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {} |
1440 | |
1441 | template <typename Row> |
1442 | void verify(kj::ArrayPtr<Row> table) { |
1443 | impl.verify(table.size(), [&](uint i, uint j) { |
1444 | return cb.isBefore(table[i], table[j]); |
1445 | }); |
1446 | } |
1447 | |
1448 | inline void reserve(size_t size) { impl.reserve(size); } |
1449 | inline void clear() { impl.clear(); } |
1450 | inline auto begin() const { return impl.begin(); } |
1451 | inline auto end() const { return impl.end(); } |
1452 | |
1453 | template <typename Row> |
1454 | decltype(auto) keyForRow(Row&& row) const { |
1455 | return cb.keyForRow(kj::fwd<Row>(row)); |
1456 | } |
1457 | |
1458 | template <typename Row, typename... Params> |
1459 | kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { |
1460 | auto iter = impl.insert(searchKey(table, params...)); |
1461 | |
1462 | if (!iter.isEnd() && cb.matches(table[*iter], params...)) { |
1463 | return *iter; |
1464 | } else { |
1465 | iter.insert(impl, pos); |
1466 | return nullptr; |
1467 | } |
1468 | } |
1469 | |
1470 | template <typename Row, typename... Params> |
1471 | void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) { |
1472 | impl.erase(pos, searchKey(table, params...)); |
1473 | } |
1474 | |
1475 | template <typename Row, typename... Params> |
1476 | void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) { |
1477 | impl.renumber(oldPos, newPos, searchKey(table, params...)); |
1478 | } |
1479 | |
1480 | template <typename Row, typename... Params> |
1481 | Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const { |
1482 | auto iter = impl.search(searchKey(table, params...)); |
1483 | |
1484 | if (!iter.isEnd() && cb.matches(table[*iter], params...)) { |
1485 | return size_t(*iter); |
1486 | } else { |
1487 | return nullptr; |
1488 | } |
1489 | } |
1490 | |
1491 | template <typename Row, typename Begin, typename End> |
1492 | _::IterRange<_::BTreeImpl::Iterator> range( |
1493 | kj::ArrayPtr<Row> table, Begin&& begin, End&& end) const { |
1494 | return { |
1495 | impl.search(searchKey(table, begin)), |
1496 | impl.search(searchKey(table, end )) |
1497 | }; |
1498 | } |
1499 | |
1500 | private: |
1501 | Callbacks cb; |
1502 | _::BTreeImpl impl; |
1503 | |
1504 | template <typename Predicate> |
1505 | class SearchKeyImpl: public _::BTreeImpl::SearchKey { |
1506 | public: |
1507 | SearchKeyImpl(Predicate&& predicate) |
1508 | : predicate(kj::mv(predicate)) {} |
1509 | |
1510 | uint search(const _::BTreeImpl::Parent& parent) const override { |
1511 | return parent.binarySearch(predicate); |
1512 | } |
1513 | uint search(const _::BTreeImpl::Leaf& leaf) const override { |
1514 | return leaf.binarySearch(predicate); |
1515 | } |
1516 | bool isAfter(uint rowIndex) const override { |
1517 | return predicate(rowIndex); |
1518 | } |
1519 | |
1520 | private: |
1521 | Predicate predicate; |
1522 | }; |
1523 | |
1524 | template <typename Row, typename... Params> |
1525 | inline auto searchKey(kj::ArrayPtr<Row>& table, Params&... params) const { |
1526 | auto predicate = [&](uint i) { return cb.isBefore(table[i], params...); }; |
1527 | return SearchKeyImpl<decltype(predicate)>(kj::mv(predicate)); |
1528 | } |
1529 | }; |
1530 | |
1531 | // ----------------------------------------------------------------------------- |
1532 | // Insertion order index |
1533 | |
1534 | class InsertionOrderIndex { |
1535 | // Table index which allows iterating over elements in order of insertion. This index cannot |
1536 | // be used for Table::find(), but can be used for Table::ordered(). |
1537 | |
1538 | struct Link; |
1539 | public: |
1540 | InsertionOrderIndex(); |
1541 | ~InsertionOrderIndex() noexcept(false); |
1542 | |
1543 | class Iterator { |
1544 | public: |
1545 | Iterator(const Link* links, uint pos) |
1546 | : links(links), pos(pos) {} |
1547 | |
1548 | inline size_t operator*() const { |
1549 | KJ_IREQUIRE(pos != 0, "can't derefrence end() iterator" ); |
1550 | return pos - 1; |
1551 | }; |
1552 | |
1553 | inline Iterator& operator++() { |
1554 | pos = links[pos].next; |
1555 | return *this; |
1556 | } |
1557 | inline Iterator operator++(int) { |
1558 | Iterator result = *this; |
1559 | ++*this; |
1560 | return result; |
1561 | } |
1562 | inline Iterator& operator--() { |
1563 | pos = links[pos].prev; |
1564 | return *this; |
1565 | } |
1566 | inline Iterator operator--(int) { |
1567 | Iterator result = *this; |
1568 | --*this; |
1569 | return result; |
1570 | } |
1571 | |
1572 | inline bool operator==(const Iterator& other) const { |
1573 | return pos == other.pos; |
1574 | } |
1575 | inline bool operator!=(const Iterator& other) const { |
1576 | return pos != other.pos; |
1577 | } |
1578 | |
1579 | private: |
1580 | const Link* links; |
1581 | uint pos; |
1582 | }; |
1583 | |
1584 | template <typename Row> |
1585 | Row& keyForRow(Row& row) const { return row; } |
1586 | |
1587 | void reserve(size_t size); |
1588 | void clear(); |
1589 | inline Iterator begin() const { return Iterator(links, links[0].next); } |
1590 | inline Iterator end() const { return Iterator(links, 0); } |
1591 | |
1592 | template <typename Row> |
1593 | kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, const Row& row) { |
1594 | return insertImpl(pos); |
1595 | } |
1596 | |
1597 | template <typename Row> |
1598 | void erase(kj::ArrayPtr<Row> table, size_t pos, const Row& row) { |
1599 | eraseImpl(pos); |
1600 | } |
1601 | |
1602 | template <typename Row> |
1603 | void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, const Row& row) { |
1604 | return moveImpl(oldPos, newPos); |
1605 | } |
1606 | |
1607 | private: |
1608 | struct Link { |
1609 | uint next; |
1610 | uint prev; |
1611 | }; |
1612 | |
1613 | uint capacity; |
1614 | Link* links; |
1615 | // links[0] is special: links[0].next points to the first link, links[0].prev points to the last. |
1616 | // links[n+1] corresponds to row n. |
1617 | |
1618 | kj::Maybe<size_t> insertImpl(size_t pos); |
1619 | void eraseImpl(size_t pos); |
1620 | void moveImpl(size_t oldPos, size_t newPos); |
1621 | |
1622 | static const Link EMPTY_LINK; |
1623 | }; |
1624 | |
1625 | } // namespace kj |
1626 | |