1/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3/*======
4This file is part of PerconaFT.
5
6
7Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
8
9 PerconaFT is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License, version 2,
11 as published by the Free Software Foundation.
12
13 PerconaFT is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
20
21----------------------------------------
22
23 PerconaFT is free software: you can redistribute it and/or modify
24 it under the terms of the GNU Affero General Public License, version 3,
25 as published by the Free Software Foundation.
26
27 PerconaFT is distributed in the hope that it will be useful,
28 but WITHOUT ANY WARRANTY; without even the implied warranty of
29 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 GNU Affero General Public License for more details.
31
32 You should have received a copy of the GNU Affero General Public License
33 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
34======= */
35
36#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
37
38#pragma once
39
40#include "util/dmt.h"
41#include "util/mempool.h"
42
43#include "ft/leafentry.h"
44#include "ft/serialize/wbuf.h"
45
46// Key/leafentry pair stored in a dmt. The key is inlined, the offset (in leafentry mempool) is stored for the leafentry.
47struct klpair_struct {
48 uint32_t le_offset; //Offset of leafentry (in leafentry mempool)
49 uint8_t key[0]; // key, followed by le
50};
51
52static constexpr uint32_t keylen_from_klpair_len(const uint32_t klpair_len) {
53 return klpair_len - __builtin_offsetof(klpair_struct, key);
54}
55
56
57static_assert(__builtin_offsetof(klpair_struct, key) == 1*sizeof(uint32_t), "klpair alignment issues");
58static_assert(__builtin_offsetof(klpair_struct, key) == sizeof(klpair_struct), "klpair size issues");
59
60// A wrapper for the heaviside function provided to dmt->find*.
61// Needed because the heaviside functions provided to bndata do not know about the internal types.
62// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
63template<typename dmtcmp_t,
64 int (*h)(const DBT &, const dmtcmp_t &)>
65static int klpair_find_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const dmtcmp_t &extra) {
66 DBT kdbt;
67 kdbt.data = const_cast<void*>(reinterpret_cast<const void*>(klpair.key));
68 kdbt.size = keylen_from_klpair_len(klpair_len);
69 return h(kdbt, extra);
70}
71
72template<typename inner_iterate_extra_t>
73struct klpair_iterate_extra {
74 public:
75 inner_iterate_extra_t *inner;
76 const class bn_data * bd;
77};
78
79// A wrapper for the high-order function provided to dmt->iterate*
80// Needed because the heaviside functions provided to bndata do not know about the internal types.
81// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
82template<typename iterate_extra_t,
83 int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)>
84static int klpair_iterate_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, klpair_iterate_extra<iterate_extra_t> *const extra) {
85 const void* key = &klpair.key;
86 LEAFENTRY le = extra->bd->get_le_from_klpair(&klpair);
87 return f(key, keylen_from_klpair_len(klpair_len), le, idx, extra->inner);
88}
89
90
91namespace toku {
92// dmt writer for klpair_struct
93class klpair_dmtwriter {
94 public:
95 // Return the size needed for the klpair_struct that this dmtwriter represents
96 size_t get_size(void) const {
97 return sizeof(klpair_struct) + this->keylen;
98 }
99 // Write the klpair_struct this dmtwriter represents to a destination
100 void write_to(klpair_struct *const dest) const {
101 dest->le_offset = this->le_offset;
102 memcpy(dest->key, this->keyp, this->keylen);
103 }
104
105 klpair_dmtwriter(uint32_t _keylen, uint32_t _le_offset, const void* _keyp)
106 : keylen(_keylen), le_offset(_le_offset), keyp(_keyp) {}
107 klpair_dmtwriter(const uint32_t klpair_len, klpair_struct *const src)
108 : keylen(keylen_from_klpair_len(klpair_len)), le_offset(src->le_offset), keyp(src->key) {}
109 private:
110 const uint32_t keylen;
111 const uint32_t le_offset;
112 const void* keyp;
113};
114}
115
116typedef toku::dmt<klpair_struct, klpair_struct*, toku::klpair_dmtwriter> klpair_dmt_t;
117// This class stores the data associated with a basement node
118class bn_data {
119public:
120 // Initialize an empty bn_data _without_ a dmt backing.
121 // Externally only used for deserialization.
122 void init_zero(void);
123
124 // Initialize an empty bn_data _with_ a dmt
125 void initialize_empty(void);
126
127 // Deserialize a bn_data from rbuf.
128 // This is the entry point for deserialization.
129 void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
130
131 // Retrieve the memory footprint of this basement node.
132 // May over or under count: see Percona/PerconaFT#136
133 // Also see dmt's implementation.
134 uint64_t get_memory_size(void);
135
136 // Get the serialized size of this basement node.
137 uint64_t get_disk_size(void);
138
139 // Perform (paranoid) verification that all leafentries are fully contained within the mempool
140 void verify_mempool(void);
141
142 // size() of key dmt
143 uint32_t num_klpairs(void) const;
144
145 // iterate() on key dmt (and associated leafentries)
146 template<typename iterate_extra_t,
147 int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
148 int iterate(iterate_extra_t *const iterate_extra) const {
149 return iterate_on_range<iterate_extra_t, f>(0, num_klpairs(), iterate_extra);
150 }
151
152 // iterate_on_range() on key dmt (and associated leafentries)
153 template<typename iterate_extra_t,
154 int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
155 int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
156 klpair_iterate_extra<iterate_extra_t> klpair_extra = { iterate_extra, this };
157 return m_buffer.iterate_on_range< klpair_iterate_extra<iterate_extra_t>, klpair_iterate_wrapper<iterate_extra_t, f> >(left, right, &klpair_extra);
158 }
159
160 // find_zero() on key dmt
161 template<typename dmtcmp_t,
162 int (*h)(const DBT &, const dmtcmp_t &)>
163 int find_zero(const dmtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
164 klpair_struct* klpair = nullptr;
165 uint32_t klpair_len;
166 int r = m_buffer.find_zero< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, &klpair_len, &klpair, idxp);
167 if (r == 0) {
168 if (value) {
169 *value = get_le_from_klpair(klpair);
170 }
171 if (key) {
172 paranoid_invariant_notnull(keylen);
173 *key = klpair->key;
174 *keylen = keylen_from_klpair_len(klpair_len);
175 }
176 else {
177 paranoid_invariant_null(keylen);
178 }
179 }
180 return r;
181 }
182
183 // find() on key dmt (and associated leafentries)
184 template<typename dmtcmp_t,
185 int (*h)(const DBT &, const dmtcmp_t &)>
186 int find(const dmtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
187 klpair_struct* klpair = nullptr;
188 uint32_t klpair_len;
189 int r = m_buffer.find< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, direction, &klpair_len, &klpair, idxp);
190 if (r == 0) {
191 if (value) {
192 *value = get_le_from_klpair(klpair);
193 }
194 if (key) {
195 paranoid_invariant_notnull(keylen);
196 *key = klpair->key;
197 *keylen = keylen_from_klpair_len(klpair_len);
198 }
199 else {
200 paranoid_invariant_null(keylen);
201 }
202 }
203 return r;
204 }
205
206 // Fetch leafentry by index
207 __attribute__((__nonnull__))
208 int fetch_le(uint32_t idx, LEAFENTRY *le);
209 // Fetch (leafentry, key, keylen) by index
210 __attribute__((__nonnull__))
211 int fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key);
212 // Fetch (serialized size of leafentry, key, and keylen) by index
213 __attribute__((__nonnull__))
214 int fetch_klpair_disksize(uint32_t idx, size_t *size);
215 // Fetch (key, keylen) by index
216 __attribute__((__nonnull__))
217 int fetch_key_and_len(uint32_t idx, uint32_t *len, void** key);
218
219 // Move leafentries (and associated key/keylens) from this basement node to dest_bd
220 // Moves indexes [lbi-ube)
221 __attribute__((__nonnull__))
222 void split_klpairs(bn_data* dest_bd, uint32_t first_index_for_dest);
223
224 // Destroy this basement node and free memory.
225 void destroy(void);
226
227 // Uses sorted array as input for this basement node.
228 // Expects this to be a basement node just initialized with initialize_empty()
229 void set_contents_as_clone_of_sorted_array(
230 uint32_t num_les,
231 const void** old_key_ptrs,
232 uint32_t* old_keylens,
233 LEAFENTRY* old_les,
234 size_t *le_sizes,
235 size_t total_key_size,
236 size_t total_le_size
237 );
238
239 // Make this basement node a clone of orig_bn_data.
240 // orig_bn_data still owns all its memory (dmt, mempool)
241 // this basement node will have a new dmt, mempool containing same data.
242 void clone(bn_data* orig_bn_data);
243
244 // Delete klpair index idx with provided keylen and old leafentry with size old_le_size
245 void delete_leafentry (
246 uint32_t idx,
247 uint32_t keylen,
248 uint32_t old_le_size
249 );
250
251 // Allocates space in the mempool to store a new leafentry.
252 // This may require reorganizing the mempool and updating the dmt.
253 __attribute__((__nonnull__))
254 void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size,
255 uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
256
257 // Allocates space in the mempool to store a new leafentry
258 // and inserts a new key into the dmt
259 // This may require reorganizing the mempool and updating the dmt.
260 __attribute__((__nonnull__))
261 void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space, void **const maybe_free);
262
263 // Gets a leafentry given a klpair from this basement node.
264 LEAFENTRY get_le_from_klpair(const klpair_struct *klpair) const;
265
266 void serialize_to_wbuf(struct wbuf *const wb);
267
268 // Prepares this basement node for serialization.
269 // Must be called before serializing this basement node.
270 // Between calling prepare_to_serialize and actually serializing, the basement node may not be modified
271 void prepare_to_serialize(void);
272
273 // Serialize the basement node header to a wbuf
274 // Requires prepare_to_serialize() to have been called first.
275 void serialize_header(struct wbuf *wb) const;
276
277 // Serialize all keys and leafentries to a wbuf
278 // Requires prepare_to_serialize() (and serialize_header()) has been called first.
279 // Currently only supported when all keys are fixed-length.
280 void serialize_rest(struct wbuf *wb) const;
281
282 static const uint32_t HEADER_LENGTH = 0
283 + sizeof(uint32_t) // key_data_size
284 + sizeof(uint32_t) // val_data_size
285 + sizeof(uint32_t) // fixed_key_length
286 + sizeof(uint8_t) // all_keys_same_length
287 + sizeof(uint8_t) // keys_vals_separate
288 + 0;
289private:
290
291 // split_klpairs_extra should be a local class in split_klpairs, but
292 // the dmt template parameter for iterate needs linkage, so it has to be a
293 // separate class, but we want it to be able to call e.g. add_key
294 friend class split_klpairs_extra;
295
296 // Allocates space in the mempool.
297 // If there is insufficient space, the mempool is enlarged and leafentries may be shuffled to reduce fragmentation.
298 // If shuffling happens, the offsets stored in the dmt are updated.
299 LEAFENTRY mempool_malloc_and_update_dmt(size_t size, void **maybe_free);
300
301 // Change the size of the mempool to support what is already in it, plus added_size.
302 // possibly "compress" by shuffling leafentries around to reduce fragmentation to 0.
303 // If fragmentation is already 0 and force_compress is not true, shuffling may be skipped.
304 // If shuffling happens, leafentries will be stored in the mempool in sorted order.
305 void dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress);
306
307 // Note that a key was added (for maintaining disk-size of this basement node)
308 void add_key(uint32_t keylen);
309
310 // Note that multiple keys were added (for maintaining disk-size of this basement node)
311 void add_keys(uint32_t n_keys, uint32_t combined_klpair_len);
312
313 // Note that a key was removed (for maintaining disk-size of this basement node)
314 void remove_key(uint32_t keylen);
315
316 klpair_dmt_t m_buffer; // pointers to individual leaf entries
317 struct mempool m_buffer_mempool; // storage for all leaf entries
318
319 friend class bndata_bugfix_test;
320
321 // Get the serialized size of a klpair.
322 // As of Jan 14, 2014, serialized size of a klpair is independent of whether this basement node has fixed-length keys.
323 uint32_t klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const;
324
325 // The disk/memory size of all keys. (Note that the size of memory for the leafentries is maintained by m_buffer_mempool)
326 size_t m_disksize_of_keys;
327
328 // Deserialize this basement node from rbuf
329 // all keys will be first followed by all leafentries (both in sorted order)
330 void initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version,
331 uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
332 uint32_t fixed_klpair_length);
333};
334