1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#pragma once
40
41#include <vector>
42
43#include "portability/memory.h"
44#include "portability/toku_portability.h"
45#include "portability/toku_race_tools.h"
46#include "portability/toku_stdint.h"
47
48#include "ft/serialize/wbuf.h"
49#include "util/growable_array.h"
50#include "util/mempool.h"
51
52namespace toku {
53typedef uint32_t node_offset;
54
55
56/**
57 * Dynamic Order Maintenance Tree (DMT)
58 *
59 * Maintains a collection of totally ordered values, where each value has weight 1.
60 * A DMT supports variable sized values.
61 * The DMT is a mutable datatype.
62 *
63 * The Abstraction:
64 *
65 * An DMT is a vector of values, $V$, where $|V|$ is the length of the vector.
66 * The vector is numbered from $0$ to $|V|-1$.
67 *
68 * We can create a new DMT, which is the empty vector.
69 *
70 * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
71 * $|V'|=1+|V|$ and
72 *
73 * V'_j = V_j if $j<i$
74 * x if $j=i$
75 * V_{j-1} if $j>i$.
76 *
77 * We can specify $i$ using a kind of function instead of as an integer.
78 * Let $b$ be a function mapping from values to nonzero integers, such that
79 * the signum of $b$ is monotically increasing.
80 * We can specify $i$ as the minimum integer such that $b(V_i)>0$.
81 *
82 * We look up a value using its index, or using a Heaviside function.
83 * For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
84 * When lookup up values, we can look up
85 * $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.)
86 * (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
87 * $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.)
88 * $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.)
89 *
90 * When looking up a value using a Heaviside function, we get the value and its index.
91 *
92 * Performance:
93 * Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
94 * The memory required is O(|V|).
95 *
96 * Usage:
97 * The dmt is templated by three parameters:
98 * - dmtdata_t is what will be stored within the dmt. These could be pointers or real data types (ints, structs).
99 * - dmtdataout_t is what will be returned by find and related functions. By default, it is the same as dmtdata_t, but you can set it to (dmtdata_t *).
100 * - dmtwriter_t is a class that effectively handles (de)serialization between the value stored in the dmt and outside the dmt.
101 * To create an dmt which will store "TXNID"s, for example, it is a good idea to typedef the template:
102 * typedef dmt<TXNID, TXNID, txnid_writer_t> txnid_dmt_t;
103 * If you are storing structs (or you want to edit what is stored), you may want to be able to get a pointer to the data actually stored in the dmt (see find_zero). To do this, use the second template parameter:
104 * typedef dmt<struct foo, struct foo *, foo_writer_t> foo_dmt_t;
105 */
106
107namespace dmt_internal {
108
109class subtree {
110private:
111 uint32_t m_index;
112public:
113 // The maximum mempool size for a dmt is 2**32-2
114 static const uint32_t NODE_NULL = UINT32_MAX;
115 inline void set_to_null(void) {
116 m_index = NODE_NULL;
117 }
118
119 inline bool is_null(void) const {
120 return NODE_NULL == this->get_offset();
121 }
122
123 inline node_offset get_offset(void) const {
124 return m_index;
125 }
126
127 inline void set_offset(node_offset index) {
128 paranoid_invariant(index != NODE_NULL);
129 m_index = index;
130 }
131} __attribute__((__packed__,__aligned__(4)));
132
133template<typename dmtdata_t>
134class dmt_node_templated {
135public:
136 uint32_t weight;
137 subtree left;
138 subtree right;
139 uint32_t value_length;
140 dmtdata_t value;
141} __attribute__((__aligned__(4))); //NOTE: we cannot use attribute packed or dmtdata_t will call copy constructors (dmtdata_t might not be packed by default)
142
143}
144
145using namespace toku::dmt_internal;
146
147// Each data type used in a dmt requires a dmt_writer class (allows you to insert/etc with dynamic sized types).
148// A dmt_writer can be thought of a (de)serializer
149// There is no default implementation.
150// A dmtwriter instance handles reading/writing 'dmtdata_t's to/from the dmt.
151// The class must implement the following functions:
152// The size required in a dmt for the dmtdata_t represented:
153// size_t get_size(void) const;
154// Write the dmtdata_t to memory owned by a dmt:
155// void write_to(dmtdata_t *const dest) const;
156// Constructor (others are allowed, but this one is required)
157// dmtwriter(const uint32_t dmtdata_t_len, dmtdata_t *const src)
158
159template<typename dmtdata_t,
160 typename dmtdataout_t,
161 typename dmtwriter_t
162 >
163class dmt {
164private:
165 typedef dmt_node_templated<dmtdata_t> dmt_node;
166
167public:
168 static const uint8_t ALIGNMENT = 4;
169
170 class builder {
171 public:
172 void append(const dmtwriter_t &value);
173
174 // Create a dmt builder to build a dmt that will have at most n_values values and use
175 // at most n_value_bytes bytes in the mempool to store values (not counting node or alignment overhead).
176 void create(uint32_t n_values, uint32_t n_value_bytes);
177
178 bool value_length_is_fixed(void);
179
180 // Constructs a dmt that contains everything that was append()ed to this builder.
181 // Destroys this builder and frees associated memory.
182 void build(dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> *dest);
183 private:
184 uint32_t max_values;
185 uint32_t max_value_bytes;
186 node_offset *sorted_node_offsets;
187 bool temp_valid;
188 dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> temp;
189 };
190
191 /**
192 * Effect: Create an empty DMT.
193 * Performance: constant time.
194 */
195 void create(void);
196
197 /**
198 * Effect: Create a DMT containing values. The number of values is in numvalues.
199 * Each value is of a fixed (at runtime) length.
200 * mem contains the values in packed form (no alignment padding)
201 * Caller retains ownership of mem.
202 * Requires: this has not been created yet
203 * Rationale: Normally to insert N values takes O(N lg N) amortized time.
204 * If the N values are known in advance, are sorted, and
205 * the structure is empty, we can batch insert them much faster.
206 */
207 __attribute__((nonnull))
208 void create_from_sorted_memory_of_fixed_size_elements(
209 const void *mem,
210 const uint32_t numvalues,
211 const uint32_t mem_length,
212 const uint32_t fixed_value_length);
213
214 /**
215 * Effect: Creates a copy of an dmt.
216 * Creates this as the clone.
217 * Each element is copied directly. If they are pointers, the underlying data is not duplicated.
218 * Performance: O(memory) (essentially a memdup)
219 * The underlying structures are memcpy'd. Only the values themselves are copied (shallow copy)
220 */
221 void clone(const dmt &src);
222
223 /**
224 * Effect: Set the tree to be empty.
225 * Note: Will not reallocate or resize any memory.
226 * Note: If this dmt had variable sized elements, it will start tracking again (until it gets values of two different sizes)
227 * Performance: time=O(1)
228 */
229 void clear(void);
230
231 /**
232 * Effect: Destroy an DMT, freeing all its memory.
233 * If the values being stored are pointers, their underlying data is not freed.
234 * Those values may be freed before or after calling ::destroy()
235 * Rationale: Returns no values since free() cannot fail.
236 * Rationale: Does not free the underlying pointers to reduce complexity/maintain abstraction layer
237 * Performance: time=O(1)
238 */
239 void destroy(void);
240
241 /**
242 * Effect: return |this| (number of values stored in this dmt).
243 * Performance: time=O(1)
244 */
245 uint32_t size(void) const;
246
247 /**
248 * Effect: Serialize all values contained in this dmt into a packed form (no alignment padding).
249 * We serialized to wb. expected_unpadded_memory is the size of memory reserved in the wbuf
250 * for serialization. (We assert that serialization requires exactly the expected amount)
251 * Requires:
252 * ::prepare_for_serialize() has been called and no non-const functions have been called since.
253 * This dmt has fixed-length values and is in array form.
254 * Performance:
255 * O(memory)
256 */
257 void serialize_values(uint32_t expected_unpadded_memory, struct wbuf *wb) const;
258
259 /**
260 * Effect: Insert value into the DMT.
261 * If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
262 * Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
263 * If no such i exists, then let i be |V|
264 * Then this has the same effect as
265 * insert_at(tree, value, i);
266 * If idx!=NULL then i is stored in *idx
267 * Requires: The signum of h must be monotonically increasing.
268 * Returns:
269 * 0 success
270 * DB_KEYEXIST the key is present (h was equal to zero for some value)
271 * On nonzero return, dmt is unchanged.
272 * Performance: time=O(\log N) amortized.
273 * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
274 */
275 template<typename dmtcmp_t, int (*h)(const uint32_t size, const dmtdata_t &, const dmtcmp_t &)>
276 int insert(const dmtwriter_t &value, const dmtcmp_t &v, uint32_t *const idx);
277
278 /**
279 * Effect: Increases indexes of all items at slot >= idx by 1.
280 * Insert value into the position at idx.
281 * Returns:
282 * 0 success
283 * EINVAL if idx > this->size()
284 * On error, dmt is unchanged.
285 * Performance: time=O(\log N) amortized time.
286 * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
287 */
288 int insert_at(const dmtwriter_t &value, const uint32_t idx);
289
290 /**
291 * Effect: Delete the item in slot idx.
292 * Decreases indexes of all items at slot > idx by 1.
293 * Returns
294 * 0 success
295 * EINVAL if idx>=this->size()
296 * On error, dmt is unchanged.
297 * Rationale: To delete an item, first find its index using find or find_zero, then delete it.
298 * Performance: time=O(\log N) amortized.
299 */
300 int delete_at(const uint32_t idx);
301
302 /**
303 * Effect: Iterate over the values of the dmt, from left to right, calling f on each value.
304 * The first argument passed to f is a ref-to-const of the value stored in the dmt.
305 * The second argument passed to f is the index of the value.
306 * The third argument passed to f is iterate_extra.
307 * The indices run from 0 (inclusive) to this->size() (exclusive).
308 * Requires: f != NULL
309 * Returns:
310 * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate.
311 * If f always returns zero, then iterate returns 0.
312 * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.)
313 * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt.
314 * Rationale: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
315 * Rationale: We may at some point use functors, but for now this is a smaller change from the old DMT.
316 */
317 template<typename iterate_extra_t,
318 int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
319 int iterate(iterate_extra_t *const iterate_extra) const;
320
321 /**
322 * Effect: Iterate over the values of the dmt, from left to right, calling f on each value.
323 * The first argument passed to f is a ref-to-const of the value stored in the dmt.
324 * The second argument passed to f is the index of the value.
325 * The third argument passed to f is iterate_extra.
326 * The indices run from 0 (inclusive) to this->size() (exclusive).
327 * We will iterate only over [left,right)
328 *
329 * Requires: left <= right
330 * Requires: f != NULL
331 * Returns:
332 * EINVAL if right > this->size()
333 * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate_on_range.
334 * If f always returns zero, then iterate_on_range returns 0.
335 * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.)
336 * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt.
337 * Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
338 */
339 template<typename iterate_extra_t,
340 int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
341 int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const;
342
343 // Attempt to verify this dmt is well formed. (Crashes/asserts/aborts if not well formed)
344 void verify(void) const;
345
346 /**
347 * Effect: Iterate over the values of the dmt, from left to right, calling f on each value.
348 * The first argument passed to f is a pointer to the value stored in the dmt.
349 * The second argument passed to f is the index of the value.
350 * The third argument passed to f is iterate_extra.
351 * The indices run from 0 (inclusive) to this->size() (exclusive).
352 * Requires: same as for iterate()
353 * Returns: same as for iterate()
354 * Performance: same as for iterate()
355 * Rationale: In general, most iterators should use iterate() since they should not modify the data stored in the dmt. This function is for iterators which need to modify values (for example, free_items).
356 * Rationale: We assume if you are transforming the data in place, you want to do it to everything at once, so there is not yet an iterate_on_range_ptr (but there could be).
357 */
358 template<typename iterate_extra_t,
359 int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
360 void iterate_ptr(iterate_extra_t *const iterate_extra);
361
362 /**
363 * Effect: Set *value=V_idx
364 * Returns
365 * 0 success
366 * EINVAL if index>=toku_dmt_size(dmt)
367 * On nonzero return, *value is unchanged
368 * Performance: time=O(\log N)
369 */
370 int fetch(const uint32_t idx, uint32_t *const value_size, dmtdataout_t *const value) const;
371
372 /**
373 * Effect: Find the smallest i such that h(V_i, extra)>=0
374 * If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value = V_i, and return 0.
375 * If there is such an i and h(V_i,extra)>0 then set *idxp=i and return DB_NOTFOUND.
376 * If there is no such i then set *idx=this->size() and return DB_NOTFOUND.
377 * Note: value is of type dmtdataout_t, which may be of type (dmtdata_t) or (dmtdata_t *) but is fixed by the instantiation.
378 * If it is the value type, then the value is copied out (even if the value type is a pointer to something else)
379 * If it is the pointer type, then *value is set to a pointer to the data within the dmt.
380 * This is determined by the type of the dmt as initially declared.
381 * If the dmt is declared as dmt<foo_t>, then foo_t's will be stored and foo_t's will be returned by find and related functions.
382 * If the dmt is declared as dmt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the stored items will be returned by find and related functions.
383 * Rationale:
384 * Structs too small for malloc should be stored directly in the dmt.
385 * These structs may need to be edited as they exist inside the dmt, so we need a way to get a pointer within the dmt.
386 * Using separate functions for returning pointers and values increases code duplication and reduces type-checking.
387 * That also reduces the ability of the creator of a data structure to give advice to its future users.
388 * Slight overloading in this case seemed to provide a better API and better type checking.
389 */
390 template<typename dmtcmp_t,
391 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
392 int find_zero(const dmtcmp_t &extra, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const;
393
394 /**
395 * Effect:
396 * If direction >0 then find the smallest i such that h(V_i,extra)>0.
397 * If direction <0 then find the largest i such that h(V_i,extra)<0.
398 * (Direction may not be equal to zero.)
399 * If value!=NULL then store V_i in *value
400 * If idxp!=NULL then store i in *idxp.
401 * Requires: The signum of h is monotically increasing.
402 * Returns
403 * 0 success
404 * DB_NOTFOUND no such value is found.
405 * On nonzero return, *value and *idxp are unchanged
406 * Performance: time=O(\log N)
407 * Rationale:
408 * Here's how to use the find function to find various things
409 * Cases for find:
410 * find first value: ( h(v)=+1, direction=+1 )
411 * find last value ( h(v)=-1, direction=-1 )
412 * find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 )
413 * find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 )
414 * find X or successor to X ( same as find first X. )
415 *
416 * Rationale: To help understand heaviside functions and behavor of find:
417 * There are 7 kinds of heaviside functions.
418 * The signus of the h must be monotonically increasing.
419 * Given a function of the following form, A is the element
420 * returned for direction>0, B is the element returned
421 * for direction<0, C is the element returned for
422 * direction==0 (see find_zero) (with a return of 0), and D is the element
423 * returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
424 * If any of A, B, or C are not found, then asking for the
425 * associated direction will return DB_NOTFOUND.
426 * See find_zero for more information.
427 *
428 * Let the following represent the signus of the heaviside function.
429 *
430 * -...-
431 * A
432 * D
433 *
434 * +...+
435 * B
436 * D
437 *
438 * 0...0
439 * C
440 *
441 * -...-0...0
442 * AC
443 *
444 * 0...0+...+
445 * C B
446 *
447 * -...-+...+
448 * AB
449 * D
450 *
451 * -...-0...0+...+
452 * AC B
453 */
454 template<typename dmtcmp_t,
455 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
456 int find(const dmtcmp_t &extra, int direction, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const;
457
458 /**
459 * Effect: Return the size (in bytes) of the dmt, as it resides in main memory.
460 * If the data stored are pointers, don't include the size of what they all point to.
461 * //TODO(leif or yoni): (maybe rename and) return memory footprint instead of allocated size
462 */
463 size_t memory_size(void);
464
465 // Returns whether all values in the dmt are known to be the same size.
466 // Note:
467 // There are no false positives, but false negatives are allowed.
468 // A false negative can happen if this dmt had 2 (or more) different size values,
469 // and then enough were deleted so that all the remaining ones are the same size.
470 // Once that happens, this dmt will never again return true for this function unless/until
471 // ::clear() is called
472 bool value_length_is_fixed(void) const;
473
474
475 // If this dmt is empty, return value is undefined.
476 // else if value_length_is_fixed() then it returns the fixed length.
477 // else returns 0
478 uint32_t get_fixed_length(void) const;
479
480 // Preprocesses the dmt so that serialization can happen quickly.
481 // After this call, serialize_values() can be called but no other mutator function can be called in between.
482 void prepare_for_serialize(void);
483
484private:
485 // Do a bit of verification that subtree and nodes act like packed c structs and do not introduce unnecessary padding for alignment.
486 ENSURE_POD(subtree);
487 static_assert(ALIGNMENT > 0, "ALIGNMENT <= 0");
488 static_assert((ALIGNMENT & (ALIGNMENT - 1)) == 0, "ALIGNMENT not a power of 2");
489 static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node");
490 static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded");
491 static_assert(__builtin_offsetof(dmt_node, value) % ALIGNMENT == 0, "dmt_node requires padding for alignment");
492 ENSURE_POD(dmt_node);
493
494 struct dmt_array {
495 uint32_t num_values;
496 };
497
498 struct dmt_tree {
499 subtree root;
500 };
501
502 /*
503 Relationship between values_same_size, d.a.num_values, value_length, is_array:
504 In an empty dmt:
505 is_array is true
506 value_same_size is true
507 value_length is undefined
508 d.a.num_values is 0
509 In a non-empty array dmt:
510 is_array is true
511 values_same_size is true
512 value_length is defined
513 d.a.num_values > 0
514 In a non-empty tree dmt:
515 is_array = false
516 value_same_size is true iff all values have been the same size since the last time the dmt turned into a tree.
517 value_length is defined iff values_same_size is true
518 d.a.num_values is undefined (the memory is used for the tree)
519 Note that in tree form, the dmt keeps track of if all values are the same size until the first time they are not.
520 'values_same_size' will not become true again (even if we change all values to be the same size)
521 until/unless the dmt becomes empty, at which point it becomes an array again.
522 */
523 bool values_same_size;
524 uint32_t value_length; // valid iff values_same_size is true.
525 struct mempool mp;
526 bool is_array;
527 union {
528 struct dmt_array a;
529 struct dmt_tree t;
530 } d;
531
532 // Returns pad bytes per element (for alignment) or 0 if not fixed length.
533 uint32_t get_fixed_length_alignment_overhead(void) const;
534
535 void verify_internal(const subtree &subtree, std::vector<bool> *touched) const;
536
537 // Retrieves the node for a given subtree.
538 // Requires: !subtree.is_null()
539 dmt_node & get_node(const subtree &subtree) const;
540
541 // Retrieves the node at a given offset in the mempool.
542 dmt_node & get_node(const node_offset offset) const;
543
544 // Returns the weight of a subtree rooted at st.
545 // if st.is_null(), returns 0
546 // Perf: O(1)
547 uint32_t nweight(const subtree &st) const;
548
549 // Allocates space for a node (in the mempool) and uses the dmtwriter to write the value into the node
550 node_offset node_malloc_and_set_value(const dmtwriter_t &value);
551
552 // Uses the dmtwriter to write a value into node n
553 void node_set_value(dmt_node *n, const dmtwriter_t &value);
554
555 // (mempool-)free the memory for a node
556 void node_free(const subtree &st);
557
558 // Effect: Resizes the mempool (holding the array) if necessary to hold one more item of length: this->value_length
559 // Requires:
560 // This dmt is in array form (and thus this->values_same_length)
561 void maybe_resize_array_for_insert(void);
562
563 // Effect: Converts a dmt from array form to tree form.
564 // Perf: O(n)
565 // Note: This does not clear the 'this->values_same_size' bit
566 void convert_to_tree(void);
567
568 // Effect: Resizes the mempool holding a tree if necessary. If value==nullptr then it may shrink if overallocated,
569 // otherwise resize only happens if there is not enough free space for an insert of value
570 void maybe_resize_tree(const dmtwriter_t * value);
571
572 // Returns true if the tree rooted at st would need rebalance after adding
573 // leftmod to the left subtree and rightmod to the right subtree
574 bool will_need_rebalance(const subtree &st, const int leftmod, const int rightmod) const;
575
576 __attribute__((nonnull))
577 void insert_internal(subtree *const subtreep, const dmtwriter_t &value, const uint32_t idx, subtree **const rebalance_subtree);
578
579 template<bool with_resize>
580 int insert_at_array_end(const dmtwriter_t& value_in);
581
582 dmtdata_t * alloc_array_value_end(void);
583
584 dmtdata_t * get_array_value(const uint32_t idx) const;
585
586 dmtdata_t * get_array_value_internal(const struct mempool *mempool, const uint32_t idx) const;
587
588 void convert_from_array_to_tree(void);
589
590 void convert_from_tree_to_array(void);
591
592 void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree);
593
594 template<typename iterate_extra_t,
595 int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
596 int iterate_internal_array(const uint32_t left, const uint32_t right,
597 iterate_extra_t *const iterate_extra) const;
598
599 template<typename iterate_extra_t,
600 int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
601 void iterate_ptr_internal(const uint32_t left, const uint32_t right,
602 const subtree &subtree, const uint32_t idx,
603 iterate_extra_t *const iterate_extra);
604
605 template<typename iterate_extra_t,
606 int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
607 void iterate_ptr_internal_array(const uint32_t left, const uint32_t right,
608 iterate_extra_t *const iterate_extra);
609
610 template<typename iterate_extra_t,
611 int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
612 int iterate_internal(const uint32_t left, const uint32_t right,
613 const subtree &subtree, const uint32_t idx,
614 iterate_extra_t *const iterate_extra) const;
615
616 void fetch_internal_array(const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const;
617
618 void fetch_internal(const subtree &subtree, const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const;
619
620 __attribute__((nonnull))
621 void fill_array_with_subtree_offsets(node_offset *const array, const subtree &subtree) const;
622
623 __attribute__((nonnull))
624 void rebuild_subtree_from_offsets(subtree *const subtree, const node_offset *const offsets, const uint32_t numvalues);
625
626 __attribute__((nonnull))
627 void rebalance(subtree *const subtree);
628
629 static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n);
630
631 static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n);
632
633 static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr);
634
635 static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr);
636
637 template<typename dmtcmp_t,
638 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
639 int find_internal_zero_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
640
641 template<typename dmtcmp_t,
642 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
643 int find_internal_zero(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
644
645 template<typename dmtcmp_t,
646 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
647 int find_internal_plus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
648
649 template<typename dmtcmp_t,
650 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
651 int find_internal_plus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
652
653 template<typename dmtcmp_t,
654 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
655 int find_internal_minus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
656
657 template<typename dmtcmp_t,
658 int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
659 int find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
660
661 // Allocate memory for an array: node_offset[num_idx] from pre-allocated contiguous free space in the mempool.
662 // If there is not enough space, returns nullptr.
663 node_offset* alloc_temp_node_offsets(uint32_t num_idxs);
664
665 // Returns the aligned size of x.
666 // If x % ALIGNMENT == 0, returns x
667 // o.w. returns x + (ALIGNMENT - (x % ALIGNMENT))
668 uint32_t align(const uint32_t x) const;
669};
670
671} // namespace toku
672
673// include the implementation here
674#include "dmt.cc"
675
676