1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #pragma once |
40 | |
41 | #include <vector> |
42 | |
43 | #include "portability/memory.h" |
44 | #include "portability/toku_portability.h" |
45 | #include "portability/toku_race_tools.h" |
46 | #include "portability/toku_stdint.h" |
47 | |
48 | #include "ft/serialize/wbuf.h" |
49 | #include "util/growable_array.h" |
50 | #include "util/mempool.h" |
51 | |
52 | namespace toku { |
53 | typedef uint32_t node_offset; |
54 | |
55 | |
56 | /** |
57 | * Dynamic Order Maintenance Tree (DMT) |
58 | * |
59 | * Maintains a collection of totally ordered values, where each value has weight 1. |
60 | * A DMT supports variable sized values. |
61 | * The DMT is a mutable datatype. |
62 | * |
63 | * The Abstraction: |
64 | * |
65 | * An DMT is a vector of values, $V$, where $|V|$ is the length of the vector. |
66 | * The vector is numbered from $0$ to $|V|-1$. |
67 | * |
68 | * We can create a new DMT, which is the empty vector. |
69 | * |
70 | * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where |
71 | * $|V'|=1+|V|$ and |
72 | * |
73 | * V'_j = V_j if $j<i$ |
74 | * x if $j=i$ |
75 | * V_{j-1} if $j>i$. |
76 | * |
77 | * We can specify $i$ using a kind of function instead of as an integer. |
78 | * Let $b$ be a function mapping from values to nonzero integers, such that |
79 | * the signum of $b$ is monotically increasing. |
80 | * We can specify $i$ as the minimum integer such that $b(V_i)>0$. |
81 | * |
82 | * We look up a value using its index, or using a Heaviside function. |
83 | * For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing. |
84 | * When lookup up values, we can look up |
85 | * $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.) |
86 | * (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.) |
87 | * $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.) |
88 | * $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.) |
89 | * |
90 | * When looking up a value using a Heaviside function, we get the value and its index. |
91 | * |
92 | * Performance: |
93 | * Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function. |
94 | * The memory required is O(|V|). |
95 | * |
96 | * Usage: |
97 | * The dmt is templated by three parameters: |
98 | * - dmtdata_t is what will be stored within the dmt. These could be pointers or real data types (ints, structs). |
99 | * - dmtdataout_t is what will be returned by find and related functions. By default, it is the same as dmtdata_t, but you can set it to (dmtdata_t *). |
100 | * - dmtwriter_t is a class that effectively handles (de)serialization between the value stored in the dmt and outside the dmt. |
101 | * To create an dmt which will store "TXNID"s, for example, it is a good idea to typedef the template: |
102 | * typedef dmt<TXNID, TXNID, txnid_writer_t> txnid_dmt_t; |
103 | * If you are storing structs (or you want to edit what is stored), you may want to be able to get a pointer to the data actually stored in the dmt (see find_zero). To do this, use the second template parameter: |
104 | * typedef dmt<struct foo, struct foo *, foo_writer_t> foo_dmt_t; |
105 | */ |
106 | |
107 | namespace dmt_internal { |
108 | |
109 | class subtree { |
110 | private: |
111 | uint32_t m_index; |
112 | public: |
113 | // The maximum mempool size for a dmt is 2**32-2 |
114 | static const uint32_t NODE_NULL = UINT32_MAX; |
115 | inline void set_to_null(void) { |
116 | m_index = NODE_NULL; |
117 | } |
118 | |
119 | inline bool is_null(void) const { |
120 | return NODE_NULL == this->get_offset(); |
121 | } |
122 | |
123 | inline node_offset get_offset(void) const { |
124 | return m_index; |
125 | } |
126 | |
127 | inline void set_offset(node_offset index) { |
128 | paranoid_invariant(index != NODE_NULL); |
129 | m_index = index; |
130 | } |
131 | } __attribute__((__packed__,__aligned__(4))); |
132 | |
133 | template<typename dmtdata_t> |
134 | class dmt_node_templated { |
135 | public: |
136 | uint32_t weight; |
137 | subtree left; |
138 | subtree right; |
139 | uint32_t value_length; |
140 | dmtdata_t value; |
141 | } __attribute__((__aligned__(4))); //NOTE: we cannot use attribute packed or dmtdata_t will call copy constructors (dmtdata_t might not be packed by default) |
142 | |
143 | } |
144 | |
145 | using namespace toku::dmt_internal; |
146 | |
147 | // Each data type used in a dmt requires a dmt_writer class (allows you to insert/etc with dynamic sized types). |
148 | // A dmt_writer can be thought of a (de)serializer |
149 | // There is no default implementation. |
150 | // A dmtwriter instance handles reading/writing 'dmtdata_t's to/from the dmt. |
151 | // The class must implement the following functions: |
152 | // The size required in a dmt for the dmtdata_t represented: |
153 | // size_t get_size(void) const; |
154 | // Write the dmtdata_t to memory owned by a dmt: |
155 | // void write_to(dmtdata_t *const dest) const; |
156 | // Constructor (others are allowed, but this one is required) |
157 | // dmtwriter(const uint32_t dmtdata_t_len, dmtdata_t *const src) |
158 | |
159 | template<typename dmtdata_t, |
160 | typename dmtdataout_t, |
161 | typename dmtwriter_t |
162 | > |
163 | class dmt { |
164 | private: |
165 | typedef dmt_node_templated<dmtdata_t> dmt_node; |
166 | |
167 | public: |
168 | static const uint8_t ALIGNMENT = 4; |
169 | |
170 | class builder { |
171 | public: |
172 | void append(const dmtwriter_t &value); |
173 | |
174 | // Create a dmt builder to build a dmt that will have at most n_values values and use |
175 | // at most n_value_bytes bytes in the mempool to store values (not counting node or alignment overhead). |
176 | void create(uint32_t n_values, uint32_t n_value_bytes); |
177 | |
178 | bool value_length_is_fixed(void); |
179 | |
180 | // Constructs a dmt that contains everything that was append()ed to this builder. |
181 | // Destroys this builder and frees associated memory. |
182 | void build(dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> *dest); |
183 | private: |
184 | uint32_t max_values; |
185 | uint32_t max_value_bytes; |
186 | node_offset *sorted_node_offsets; |
187 | bool temp_valid; |
188 | dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> temp; |
189 | }; |
190 | |
191 | /** |
192 | * Effect: Create an empty DMT. |
193 | * Performance: constant time. |
194 | */ |
195 | void create(void); |
196 | |
197 | /** |
198 | * Effect: Create a DMT containing values. The number of values is in numvalues. |
199 | * Each value is of a fixed (at runtime) length. |
200 | * mem contains the values in packed form (no alignment padding) |
201 | * Caller retains ownership of mem. |
202 | * Requires: this has not been created yet |
203 | * Rationale: Normally to insert N values takes O(N lg N) amortized time. |
204 | * If the N values are known in advance, are sorted, and |
205 | * the structure is empty, we can batch insert them much faster. |
206 | */ |
207 | __attribute__((nonnull)) |
208 | void create_from_sorted_memory_of_fixed_size_elements( |
209 | const void *mem, |
210 | const uint32_t numvalues, |
211 | const uint32_t mem_length, |
212 | const uint32_t fixed_value_length); |
213 | |
214 | /** |
215 | * Effect: Creates a copy of an dmt. |
216 | * Creates this as the clone. |
217 | * Each element is copied directly. If they are pointers, the underlying data is not duplicated. |
218 | * Performance: O(memory) (essentially a memdup) |
219 | * The underlying structures are memcpy'd. Only the values themselves are copied (shallow copy) |
220 | */ |
221 | void clone(const dmt &src); |
222 | |
223 | /** |
224 | * Effect: Set the tree to be empty. |
225 | * Note: Will not reallocate or resize any memory. |
226 | * Note: If this dmt had variable sized elements, it will start tracking again (until it gets values of two different sizes) |
227 | * Performance: time=O(1) |
228 | */ |
229 | void clear(void); |
230 | |
231 | /** |
232 | * Effect: Destroy an DMT, freeing all its memory. |
233 | * If the values being stored are pointers, their underlying data is not freed. |
234 | * Those values may be freed before or after calling ::destroy() |
235 | * Rationale: Returns no values since free() cannot fail. |
236 | * Rationale: Does not free the underlying pointers to reduce complexity/maintain abstraction layer |
237 | * Performance: time=O(1) |
238 | */ |
239 | void destroy(void); |
240 | |
241 | /** |
242 | * Effect: return |this| (number of values stored in this dmt). |
243 | * Performance: time=O(1) |
244 | */ |
245 | uint32_t size(void) const; |
246 | |
247 | /** |
248 | * Effect: Serialize all values contained in this dmt into a packed form (no alignment padding). |
249 | * We serialized to wb. expected_unpadded_memory is the size of memory reserved in the wbuf |
250 | * for serialization. (We assert that serialization requires exactly the expected amount) |
251 | * Requires: |
252 | * ::prepare_for_serialize() has been called and no non-const functions have been called since. |
253 | * This dmt has fixed-length values and is in array form. |
254 | * Performance: |
255 | * O(memory) |
256 | */ |
257 | void serialize_values(uint32_t expected_unpadded_memory, struct wbuf *wb) const; |
258 | |
259 | /** |
260 | * Effect: Insert value into the DMT. |
261 | * If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST. |
262 | * Otherwise, let i be the minimum value such that $h(V_i, v)>0$. |
263 | * If no such i exists, then let i be |V| |
264 | * Then this has the same effect as |
265 | * insert_at(tree, value, i); |
266 | * If idx!=NULL then i is stored in *idx |
267 | * Requires: The signum of h must be monotonically increasing. |
268 | * Returns: |
269 | * 0 success |
270 | * DB_KEYEXIST the key is present (h was equal to zero for some value) |
271 | * On nonzero return, dmt is unchanged. |
272 | * Performance: time=O(\log N) amortized. |
273 | * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. |
274 | */ |
275 | template<typename dmtcmp_t, int (*h)(const uint32_t size, const dmtdata_t &, const dmtcmp_t &)> |
276 | int insert(const dmtwriter_t &value, const dmtcmp_t &v, uint32_t *const idx); |
277 | |
278 | /** |
279 | * Effect: Increases indexes of all items at slot >= idx by 1. |
280 | * Insert value into the position at idx. |
281 | * Returns: |
282 | * 0 success |
283 | * EINVAL if idx > this->size() |
284 | * On error, dmt is unchanged. |
285 | * Performance: time=O(\log N) amortized time. |
286 | * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. |
287 | */ |
288 | int insert_at(const dmtwriter_t &value, const uint32_t idx); |
289 | |
290 | /** |
291 | * Effect: Delete the item in slot idx. |
292 | * Decreases indexes of all items at slot > idx by 1. |
293 | * Returns |
294 | * 0 success |
295 | * EINVAL if idx>=this->size() |
296 | * On error, dmt is unchanged. |
297 | * Rationale: To delete an item, first find its index using find or find_zero, then delete it. |
298 | * Performance: time=O(\log N) amortized. |
299 | */ |
300 | int delete_at(const uint32_t idx); |
301 | |
302 | /** |
303 | * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. |
304 | * The first argument passed to f is a ref-to-const of the value stored in the dmt. |
305 | * The second argument passed to f is the index of the value. |
306 | * The third argument passed to f is iterate_extra. |
307 | * The indices run from 0 (inclusive) to this->size() (exclusive). |
308 | * Requires: f != NULL |
309 | * Returns: |
310 | * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate. |
311 | * If f always returns zero, then iterate returns 0. |
312 | * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.) |
313 | * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt. |
314 | * Rationale: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. |
315 | * Rationale: We may at some point use functors, but for now this is a smaller change from the old DMT. |
316 | */ |
317 | template<typename iterate_extra_t, |
318 | int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)> |
319 | int iterate(iterate_extra_t *const ) const; |
320 | |
321 | /** |
322 | * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. |
323 | * The first argument passed to f is a ref-to-const of the value stored in the dmt. |
324 | * The second argument passed to f is the index of the value. |
325 | * The third argument passed to f is iterate_extra. |
326 | * The indices run from 0 (inclusive) to this->size() (exclusive). |
327 | * We will iterate only over [left,right) |
328 | * |
329 | * Requires: left <= right |
330 | * Requires: f != NULL |
331 | * Returns: |
332 | * EINVAL if right > this->size() |
333 | * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate_on_range. |
334 | * If f always returns zero, then iterate_on_range returns 0. |
335 | * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.) |
336 | * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt. |
337 | * Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. |
338 | */ |
339 | template<typename iterate_extra_t, |
340 | int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)> |
341 | int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const ) const; |
342 | |
343 | // Attempt to verify this dmt is well formed. (Crashes/asserts/aborts if not well formed) |
344 | void verify(void) const; |
345 | |
346 | /** |
347 | * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. |
348 | * The first argument passed to f is a pointer to the value stored in the dmt. |
349 | * The second argument passed to f is the index of the value. |
350 | * The third argument passed to f is iterate_extra. |
351 | * The indices run from 0 (inclusive) to this->size() (exclusive). |
352 | * Requires: same as for iterate() |
353 | * Returns: same as for iterate() |
354 | * Performance: same as for iterate() |
355 | * Rationale: In general, most iterators should use iterate() since they should not modify the data stored in the dmt. This function is for iterators which need to modify values (for example, free_items). |
356 | * Rationale: We assume if you are transforming the data in place, you want to do it to everything at once, so there is not yet an iterate_on_range_ptr (but there could be). |
357 | */ |
358 | template<typename iterate_extra_t, |
359 | int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)> |
360 | void iterate_ptr(iterate_extra_t *const ); |
361 | |
362 | /** |
363 | * Effect: Set *value=V_idx |
364 | * Returns |
365 | * 0 success |
366 | * EINVAL if index>=toku_dmt_size(dmt) |
367 | * On nonzero return, *value is unchanged |
368 | * Performance: time=O(\log N) |
369 | */ |
370 | int fetch(const uint32_t idx, uint32_t *const value_size, dmtdataout_t *const value) const; |
371 | |
372 | /** |
373 | * Effect: Find the smallest i such that h(V_i, extra)>=0 |
374 | * If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value = V_i, and return 0. |
375 | * If there is such an i and h(V_i,extra)>0 then set *idxp=i and return DB_NOTFOUND. |
376 | * If there is no such i then set *idx=this->size() and return DB_NOTFOUND. |
377 | * Note: value is of type dmtdataout_t, which may be of type (dmtdata_t) or (dmtdata_t *) but is fixed by the instantiation. |
378 | * If it is the value type, then the value is copied out (even if the value type is a pointer to something else) |
379 | * If it is the pointer type, then *value is set to a pointer to the data within the dmt. |
380 | * This is determined by the type of the dmt as initially declared. |
381 | * If the dmt is declared as dmt<foo_t>, then foo_t's will be stored and foo_t's will be returned by find and related functions. |
382 | * If the dmt is declared as dmt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the stored items will be returned by find and related functions. |
383 | * Rationale: |
384 | * Structs too small for malloc should be stored directly in the dmt. |
385 | * These structs may need to be edited as they exist inside the dmt, so we need a way to get a pointer within the dmt. |
386 | * Using separate functions for returning pointers and values increases code duplication and reduces type-checking. |
387 | * That also reduces the ability of the creator of a data structure to give advice to its future users. |
388 | * Slight overloading in this case seemed to provide a better API and better type checking. |
389 | */ |
390 | template<typename dmtcmp_t, |
391 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
392 | int find_zero(const dmtcmp_t &, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const; |
393 | |
394 | /** |
395 | * Effect: |
396 | * If direction >0 then find the smallest i such that h(V_i,extra)>0. |
397 | * If direction <0 then find the largest i such that h(V_i,extra)<0. |
398 | * (Direction may not be equal to zero.) |
399 | * If value!=NULL then store V_i in *value |
400 | * If idxp!=NULL then store i in *idxp. |
401 | * Requires: The signum of h is monotically increasing. |
402 | * Returns |
403 | * 0 success |
404 | * DB_NOTFOUND no such value is found. |
405 | * On nonzero return, *value and *idxp are unchanged |
406 | * Performance: time=O(\log N) |
407 | * Rationale: |
408 | * Here's how to use the find function to find various things |
409 | * Cases for find: |
410 | * find first value: ( h(v)=+1, direction=+1 ) |
411 | * find last value ( h(v)=-1, direction=-1 ) |
412 | * find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 ) |
413 | * find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 ) |
414 | * find X or successor to X ( same as find first X. ) |
415 | * |
416 | * Rationale: To help understand heaviside functions and behavor of find: |
417 | * There are 7 kinds of heaviside functions. |
418 | * The signus of the h must be monotonically increasing. |
419 | * Given a function of the following form, A is the element |
420 | * returned for direction>0, B is the element returned |
421 | * for direction<0, C is the element returned for |
422 | * direction==0 (see find_zero) (with a return of 0), and D is the element |
423 | * returned for direction==0 (see find_zero) with a return of DB_NOTFOUND. |
424 | * If any of A, B, or C are not found, then asking for the |
425 | * associated direction will return DB_NOTFOUND. |
426 | * See find_zero for more information. |
427 | * |
428 | * Let the following represent the signus of the heaviside function. |
429 | * |
430 | * -...- |
431 | * A |
432 | * D |
433 | * |
434 | * +...+ |
435 | * B |
436 | * D |
437 | * |
438 | * 0...0 |
439 | * C |
440 | * |
441 | * -...-0...0 |
442 | * AC |
443 | * |
444 | * 0...0+...+ |
445 | * C B |
446 | * |
447 | * -...-+...+ |
448 | * AB |
449 | * D |
450 | * |
451 | * -...-0...0+...+ |
452 | * AC B |
453 | */ |
454 | template<typename dmtcmp_t, |
455 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
456 | int find(const dmtcmp_t &, int direction, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const; |
457 | |
458 | /** |
459 | * Effect: Return the size (in bytes) of the dmt, as it resides in main memory. |
460 | * If the data stored are pointers, don't include the size of what they all point to. |
461 | * //TODO(leif or yoni): (maybe rename and) return memory footprint instead of allocated size |
462 | */ |
463 | size_t memory_size(void); |
464 | |
465 | // Returns whether all values in the dmt are known to be the same size. |
466 | // Note: |
467 | // There are no false positives, but false negatives are allowed. |
468 | // A false negative can happen if this dmt had 2 (or more) different size values, |
469 | // and then enough were deleted so that all the remaining ones are the same size. |
470 | // Once that happens, this dmt will never again return true for this function unless/until |
471 | // ::clear() is called |
472 | bool value_length_is_fixed(void) const; |
473 | |
474 | |
475 | // If this dmt is empty, return value is undefined. |
476 | // else if value_length_is_fixed() then it returns the fixed length. |
477 | // else returns 0 |
478 | uint32_t get_fixed_length(void) const; |
479 | |
480 | // Preprocesses the dmt so that serialization can happen quickly. |
481 | // After this call, serialize_values() can be called but no other mutator function can be called in between. |
482 | void prepare_for_serialize(void); |
483 | |
484 | private: |
485 | // Do a bit of verification that subtree and nodes act like packed c structs and do not introduce unnecessary padding for alignment. |
486 | ENSURE_POD(subtree); |
487 | static_assert(ALIGNMENT > 0, "ALIGNMENT <= 0" ); |
488 | static_assert((ALIGNMENT & (ALIGNMENT - 1)) == 0, "ALIGNMENT not a power of 2" ); |
489 | static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node" ); |
490 | static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded" ); |
491 | static_assert(__builtin_offsetof(dmt_node, value) % ALIGNMENT == 0, "dmt_node requires padding for alignment" ); |
492 | ENSURE_POD(dmt_node); |
493 | |
494 | struct dmt_array { |
495 | uint32_t num_values; |
496 | }; |
497 | |
498 | struct dmt_tree { |
499 | subtree root; |
500 | }; |
501 | |
502 | /* |
503 | Relationship between values_same_size, d.a.num_values, value_length, is_array: |
504 | In an empty dmt: |
505 | is_array is true |
506 | value_same_size is true |
507 | value_length is undefined |
508 | d.a.num_values is 0 |
509 | In a non-empty array dmt: |
510 | is_array is true |
511 | values_same_size is true |
512 | value_length is defined |
513 | d.a.num_values > 0 |
514 | In a non-empty tree dmt: |
515 | is_array = false |
516 | value_same_size is true iff all values have been the same size since the last time the dmt turned into a tree. |
517 | value_length is defined iff values_same_size is true |
518 | d.a.num_values is undefined (the memory is used for the tree) |
519 | Note that in tree form, the dmt keeps track of if all values are the same size until the first time they are not. |
520 | 'values_same_size' will not become true again (even if we change all values to be the same size) |
521 | until/unless the dmt becomes empty, at which point it becomes an array again. |
522 | */ |
523 | bool values_same_size; |
524 | uint32_t value_length; // valid iff values_same_size is true. |
525 | struct mempool mp; |
526 | bool is_array; |
527 | union { |
528 | struct dmt_array a; |
529 | struct dmt_tree t; |
530 | } d; |
531 | |
532 | // Returns pad bytes per element (for alignment) or 0 if not fixed length. |
533 | uint32_t get_fixed_length_alignment_overhead(void) const; |
534 | |
535 | void verify_internal(const subtree &subtree, std::vector<bool> *touched) const; |
536 | |
537 | // Retrieves the node for a given subtree. |
538 | // Requires: !subtree.is_null() |
539 | dmt_node & get_node(const subtree &subtree) const; |
540 | |
541 | // Retrieves the node at a given offset in the mempool. |
542 | dmt_node & get_node(const node_offset offset) const; |
543 | |
544 | // Returns the weight of a subtree rooted at st. |
545 | // if st.is_null(), returns 0 |
546 | // Perf: O(1) |
547 | uint32_t nweight(const subtree &st) const; |
548 | |
549 | // Allocates space for a node (in the mempool) and uses the dmtwriter to write the value into the node |
550 | node_offset node_malloc_and_set_value(const dmtwriter_t &value); |
551 | |
552 | // Uses the dmtwriter to write a value into node n |
553 | void node_set_value(dmt_node *n, const dmtwriter_t &value); |
554 | |
555 | // (mempool-)free the memory for a node |
556 | void node_free(const subtree &st); |
557 | |
558 | // Effect: Resizes the mempool (holding the array) if necessary to hold one more item of length: this->value_length |
559 | // Requires: |
560 | // This dmt is in array form (and thus this->values_same_length) |
561 | void maybe_resize_array_for_insert(void); |
562 | |
563 | // Effect: Converts a dmt from array form to tree form. |
564 | // Perf: O(n) |
565 | // Note: This does not clear the 'this->values_same_size' bit |
566 | void convert_to_tree(void); |
567 | |
568 | // Effect: Resizes the mempool holding a tree if necessary. If value==nullptr then it may shrink if overallocated, |
569 | // otherwise resize only happens if there is not enough free space for an insert of value |
570 | void maybe_resize_tree(const dmtwriter_t * value); |
571 | |
572 | // Returns true if the tree rooted at st would need rebalance after adding |
573 | // leftmod to the left subtree and rightmod to the right subtree |
574 | bool will_need_rebalance(const subtree &st, const int leftmod, const int rightmod) const; |
575 | |
576 | __attribute__((nonnull)) |
577 | void insert_internal(subtree *const subtreep, const dmtwriter_t &value, const uint32_t idx, subtree **const rebalance_subtree); |
578 | |
579 | template<bool with_resize> |
580 | int insert_at_array_end(const dmtwriter_t& value_in); |
581 | |
582 | dmtdata_t * alloc_array_value_end(void); |
583 | |
584 | dmtdata_t * get_array_value(const uint32_t idx) const; |
585 | |
586 | dmtdata_t * get_array_value_internal(const struct mempool *mempool, const uint32_t idx) const; |
587 | |
588 | void convert_from_array_to_tree(void); |
589 | |
590 | void convert_from_tree_to_array(void); |
591 | |
592 | void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree); |
593 | |
594 | template<typename iterate_extra_t, |
595 | int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)> |
596 | int iterate_internal_array(const uint32_t left, const uint32_t right, |
597 | iterate_extra_t *const ) const; |
598 | |
599 | template<typename iterate_extra_t, |
600 | int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)> |
601 | void iterate_ptr_internal(const uint32_t left, const uint32_t right, |
602 | const subtree &subtree, const uint32_t idx, |
603 | iterate_extra_t *const ); |
604 | |
605 | template<typename iterate_extra_t, |
606 | int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)> |
607 | void iterate_ptr_internal_array(const uint32_t left, const uint32_t right, |
608 | iterate_extra_t *const ); |
609 | |
610 | template<typename iterate_extra_t, |
611 | int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)> |
612 | int iterate_internal(const uint32_t left, const uint32_t right, |
613 | const subtree &subtree, const uint32_t idx, |
614 | iterate_extra_t *const ) const; |
615 | |
616 | void fetch_internal_array(const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const; |
617 | |
618 | void fetch_internal(const subtree &subtree, const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const; |
619 | |
620 | __attribute__((nonnull)) |
621 | void fill_array_with_subtree_offsets(node_offset *const array, const subtree &subtree) const; |
622 | |
623 | __attribute__((nonnull)) |
624 | void rebuild_subtree_from_offsets(subtree *const subtree, const node_offset *const offsets, const uint32_t numvalues); |
625 | |
626 | __attribute__((nonnull)) |
627 | void rebalance(subtree *const subtree); |
628 | |
629 | static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n); |
630 | |
631 | static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n); |
632 | |
633 | static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr); |
634 | |
635 | static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr); |
636 | |
637 | template<typename dmtcmp_t, |
638 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
639 | int find_internal_zero_array(const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
640 | |
641 | template<typename dmtcmp_t, |
642 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
643 | int find_internal_zero(const subtree &subtree, const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
644 | |
645 | template<typename dmtcmp_t, |
646 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
647 | int find_internal_plus_array(const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
648 | |
649 | template<typename dmtcmp_t, |
650 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
651 | int find_internal_plus(const subtree &subtree, const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
652 | |
653 | template<typename dmtcmp_t, |
654 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
655 | int find_internal_minus_array(const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
656 | |
657 | template<typename dmtcmp_t, |
658 | int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)> |
659 | int find_internal_minus(const subtree &subtree, const dmtcmp_t &, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; |
660 | |
661 | // Allocate memory for an array: node_offset[num_idx] from pre-allocated contiguous free space in the mempool. |
662 | // If there is not enough space, returns nullptr. |
663 | node_offset* alloc_temp_node_offsets(uint32_t num_idxs); |
664 | |
665 | // Returns the aligned size of x. |
666 | // If x % ALIGNMENT == 0, returns x |
667 | // o.w. returns x + (ALIGNMENT - (x % ALIGNMENT)) |
668 | uint32_t align(const uint32_t x) const; |
669 | }; |
670 | |
671 | } // namespace toku |
672 | |
673 | // include the implementation here |
674 | #include "dmt.cc" |
675 | |
676 | |