1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #include <my_global.h> |
40 | #include <ft/bndata.h> |
41 | #include <ft/ft-internal.h> |
42 | |
43 | using namespace toku; |
44 | uint32_t bn_data::klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const { |
45 | return sizeof(*klpair) + keylen_from_klpair_len(klpair_len) + leafentry_disksize(get_le_from_klpair(klpair)); |
46 | } |
47 | |
48 | void bn_data::init_zero() { |
49 | toku_mempool_zero(&m_buffer_mempool); |
50 | m_disksize_of_keys = 0; |
51 | } |
52 | |
53 | void bn_data::initialize_empty() { |
54 | init_zero(); |
55 | m_buffer.create(); |
56 | } |
57 | |
58 | void bn_data::add_key(uint32_t keylen) { |
59 | m_disksize_of_keys += sizeof(keylen) + keylen; |
60 | } |
61 | |
62 | void bn_data::add_keys(uint32_t n_keys, uint32_t combined_klpair_len) { |
63 | invariant(n_keys * sizeof(uint32_t) <= combined_klpair_len); |
64 | m_disksize_of_keys += combined_klpair_len; |
65 | } |
66 | |
67 | void bn_data::remove_key(uint32_t keylen) { |
68 | m_disksize_of_keys -= sizeof(keylen) + keylen; |
69 | } |
70 | |
71 | // Deserialize from format optimized for keys being inlined. |
72 | // Currently only supports fixed-length keys. |
73 | void bn_data::initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version UU(), |
74 | uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length, |
75 | uint32_t fixed_klpair_length) { |
76 | paranoid_invariant(version >= FT_LAYOUT_VERSION_26); // Support was added @26 |
77 | uint32_t ndone_before = rb->ndone; |
78 | init_zero(); |
79 | invariant(all_keys_same_length); // Until otherwise supported. |
80 | const void *keys_src; |
81 | rbuf_literal_bytes(rb, &keys_src, key_data_size); |
82 | //Generate dmt |
83 | this->m_buffer.create_from_sorted_memory_of_fixed_size_elements( |
84 | keys_src, num_entries, key_data_size, fixed_klpair_length); |
85 | toku_mempool_construct(&this->m_buffer_mempool, val_data_size); |
86 | |
87 | const void *vals_src; |
88 | rbuf_literal_bytes(rb, &vals_src, val_data_size); |
89 | |
90 | if (num_entries > 0) { |
91 | void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size); |
92 | paranoid_invariant_notnull(vals_dest); |
93 | memcpy(vals_dest, vals_src, val_data_size); |
94 | } |
95 | |
96 | add_keys(num_entries, num_entries * fixed_klpair_length); |
97 | |
98 | toku_note_deserialized_basement_node(all_keys_same_length); |
99 | |
100 | invariant(rb->ndone - ndone_before == data_size); |
101 | } |
102 | |
103 | static int |
104 | wbufwriteleafentry(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t UU(idx), struct wbuf * const wb) { |
105 | // need to pack the leafentry as it was in versions |
106 | // where the key was integrated into it (< 26) |
107 | uint32_t begin_spot UU() = wb->ndone; |
108 | uint32_t le_disk_size = leafentry_disksize(le); |
109 | wbuf_nocrc_uint8_t(wb, le->type); |
110 | wbuf_nocrc_uint32_t(wb, keylen); |
111 | if (le->type == LE_CLEAN) { |
112 | wbuf_nocrc_uint32_t(wb, le->u.clean.vallen); |
113 | wbuf_nocrc_literal_bytes(wb, key, keylen); |
114 | wbuf_nocrc_literal_bytes(wb, le->u.clean.val, le->u.clean.vallen); |
115 | } |
116 | else { |
117 | paranoid_invariant(le->type == LE_MVCC); |
118 | wbuf_nocrc_uint32_t(wb, le->u.mvcc.num_cxrs); |
119 | wbuf_nocrc_uint8_t(wb, le->u.mvcc.num_pxrs); |
120 | wbuf_nocrc_literal_bytes(wb, key, keylen); |
121 | wbuf_nocrc_literal_bytes(wb, le->u.mvcc.xrs, le_disk_size - (1 + 4 + 1)); |
122 | } |
123 | uint32_t end_spot UU() = wb->ndone; |
124 | paranoid_invariant((end_spot - begin_spot) == keylen + sizeof(keylen) + le_disk_size); |
125 | return 0; |
126 | } |
127 | |
128 | void bn_data::serialize_to_wbuf(struct wbuf *const wb) { |
129 | prepare_to_serialize(); |
130 | serialize_header(wb); |
131 | if (m_buffer.value_length_is_fixed()) { |
132 | serialize_rest(wb); |
133 | } else { |
134 | // |
135 | // iterate over leafentries and place them into the buffer |
136 | // |
137 | iterate<struct wbuf, wbufwriteleafentry>(wb); |
138 | } |
139 | } |
140 | |
141 | // If we have fixed-length keys, we prepare the dmt and mempool. |
142 | // The mempool is prepared by removing any fragmented space and ordering leafentries in the same order as their keys. |
143 | void bn_data::prepare_to_serialize(void) { |
144 | if (m_buffer.value_length_is_fixed()) { |
145 | m_buffer.prepare_for_serialize(); |
146 | dmt_compress_kvspace(0, nullptr, true); // Gets it ready for easy serialization. |
147 | } |
148 | } |
149 | |
150 | void bn_data::(struct wbuf *wb) const { |
151 | bool fixed = m_buffer.value_length_is_fixed(); |
152 | |
153 | //key_data_size |
154 | wbuf_nocrc_uint(wb, m_disksize_of_keys); |
155 | //val_data_size |
156 | wbuf_nocrc_uint(wb, toku_mempool_get_used_size(&m_buffer_mempool)); |
157 | //fixed_klpair_length |
158 | wbuf_nocrc_uint(wb, m_buffer.get_fixed_length()); |
159 | // all_keys_same_length |
160 | wbuf_nocrc_uint8_t(wb, fixed); |
161 | // keys_vals_separate |
162 | wbuf_nocrc_uint8_t(wb, fixed); |
163 | } |
164 | |
165 | void bn_data::serialize_rest(struct wbuf *wb) const { |
166 | //Write keys |
167 | invariant(m_buffer.value_length_is_fixed()); //Assumes prepare_to_serialize was called |
168 | m_buffer.serialize_values(m_disksize_of_keys, wb); |
169 | |
170 | //Write leafentries |
171 | //Just ran dmt_compress_kvspace so there is no fragmentation and also leafentries are in sorted order. |
172 | paranoid_invariant(toku_mempool_get_frag_size(&m_buffer_mempool) == 0); |
173 | uint32_t val_data_size = toku_mempool_get_used_size(&m_buffer_mempool); |
174 | wbuf_nocrc_literal_bytes(wb, toku_mempool_get_base(&m_buffer_mempool), val_data_size); |
175 | } |
176 | |
177 | // Deserialize from rbuf |
178 | void bn_data::deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version) { |
179 | uint32_t key_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small) |
180 | uint32_t val_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small) |
181 | |
182 | bool all_keys_same_length = false; |
183 | bool keys_vals_separate = false; |
184 | uint32_t fixed_klpair_length = 0; |
185 | |
186 | // In version 25 and older there is no header. Skip reading header for old version. |
187 | if (version >= FT_LAYOUT_VERSION_26) { |
188 | uint32_t ndone_before = rb->ndone; |
189 | key_data_size = rbuf_int(rb); |
190 | val_data_size = rbuf_int(rb); |
191 | fixed_klpair_length = rbuf_int(rb); // 0 if !all_keys_same_length |
192 | all_keys_same_length = rbuf_char(rb); |
193 | keys_vals_separate = rbuf_char(rb); |
194 | invariant(all_keys_same_length == keys_vals_separate); // Until we support otherwise |
195 | uint32_t = rb->ndone - ndone_before; |
196 | data_size -= header_size; |
197 | invariant(header_size == HEADER_LENGTH); |
198 | if (keys_vals_separate) { |
199 | invariant(fixed_klpair_length >= sizeof(klpair_struct) || num_entries == 0); |
200 | initialize_from_separate_keys_and_vals(num_entries, rb, data_size, version, |
201 | key_data_size, val_data_size, all_keys_same_length, |
202 | fixed_klpair_length); |
203 | return; |
204 | } |
205 | } |
206 | // Version >= 26 and version 25 deserialization are now identical except that <= 25 might allocate too much memory. |
207 | const void *bytes; |
208 | rbuf_literal_bytes(rb, &bytes, data_size); |
209 | const unsigned char *CAST_FROM_VOIDP(buf, bytes); |
210 | if (data_size == 0) { |
211 | invariant_zero(num_entries); |
212 | } |
213 | init_zero(); |
214 | klpair_dmt_t::builder dmt_builder; |
215 | dmt_builder.create(num_entries, key_data_size); |
216 | |
217 | // TODO(leif): clean this up (#149) |
218 | unsigned char *newmem = nullptr; |
219 | // add 25% extra wiggle room |
220 | uint32_t allocated_bytes_vals = val_data_size + (val_data_size / 4); |
221 | CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes_vals)); |
222 | const unsigned char* curr_src_pos = buf; |
223 | unsigned char* curr_dest_pos = newmem; |
224 | for (uint32_t i = 0; i < num_entries; i++) { |
225 | uint8_t curr_type = curr_src_pos[0]; |
226 | curr_src_pos++; |
227 | // first thing we do is lay out the key, |
228 | // to do so, we must extract it from the leafentry |
229 | // and write it in |
230 | uint32_t keylen = 0; |
231 | const void* keyp = nullptr; |
232 | keylen = *(uint32_t *)curr_src_pos; |
233 | curr_src_pos += sizeof(uint32_t); |
234 | uint32_t clean_vallen = 0; |
235 | uint32_t num_cxrs = 0; |
236 | uint8_t num_pxrs = 0; |
237 | if (curr_type == LE_CLEAN) { |
238 | clean_vallen = toku_dtoh32(*(uint32_t *)curr_src_pos); |
239 | curr_src_pos += sizeof(clean_vallen); // val_len |
240 | keyp = curr_src_pos; |
241 | curr_src_pos += keylen; |
242 | } |
243 | else { |
244 | paranoid_invariant(curr_type == LE_MVCC); |
245 | num_cxrs = toku_htod32(*(uint32_t *)curr_src_pos); |
246 | curr_src_pos += sizeof(uint32_t); // num_cxrs |
247 | num_pxrs = curr_src_pos[0]; |
248 | curr_src_pos += sizeof(uint8_t); //num_pxrs |
249 | keyp = curr_src_pos; |
250 | curr_src_pos += keylen; |
251 | } |
252 | uint32_t le_offset = curr_dest_pos - newmem; |
253 | dmt_builder.append(klpair_dmtwriter(keylen, le_offset, keyp)); |
254 | add_key(keylen); |
255 | |
256 | // now curr_dest_pos is pointing to where the leafentry should be packed |
257 | curr_dest_pos[0] = curr_type; |
258 | curr_dest_pos++; |
259 | if (curr_type == LE_CLEAN) { |
260 | *(uint32_t *)curr_dest_pos = toku_htod32(clean_vallen); |
261 | curr_dest_pos += sizeof(clean_vallen); |
262 | memcpy(curr_dest_pos, curr_src_pos, clean_vallen); // copy the val |
263 | curr_dest_pos += clean_vallen; |
264 | curr_src_pos += clean_vallen; |
265 | } |
266 | else { |
267 | // pack num_cxrs and num_pxrs |
268 | *(uint32_t *)curr_dest_pos = toku_htod32(num_cxrs); |
269 | curr_dest_pos += sizeof(num_cxrs); |
270 | *(uint8_t *)curr_dest_pos = num_pxrs; |
271 | curr_dest_pos += sizeof(num_pxrs); |
272 | // now we need to pack the rest of the data |
273 | uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, const_cast<uint8_t*>(curr_src_pos)); |
274 | memcpy(curr_dest_pos, curr_src_pos, num_rest_bytes); |
275 | curr_dest_pos += num_rest_bytes; |
276 | curr_src_pos += num_rest_bytes; |
277 | } |
278 | } |
279 | dmt_builder.build(&this->m_buffer); |
280 | toku_note_deserialized_basement_node(m_buffer.value_length_is_fixed()); |
281 | |
282 | uint32_t num_bytes_read = (uint32_t)(curr_src_pos - buf); |
283 | invariant(num_bytes_read == data_size); |
284 | |
285 | uint32_t num_bytes_written = curr_dest_pos - newmem + m_disksize_of_keys; |
286 | invariant(num_bytes_written == data_size); |
287 | toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(curr_dest_pos - newmem), allocated_bytes_vals); |
288 | |
289 | invariant(get_disk_size() == data_size); |
290 | // Versions older than 26 might have allocated too much memory. Try to shrink the mempool now that we |
291 | // know how much memory we need. |
292 | if (version < FT_LAYOUT_VERSION_26) { |
293 | // Unnecessary after version 26 |
294 | // Reallocate smaller mempool to save memory |
295 | invariant_zero(toku_mempool_get_frag_size(&m_buffer_mempool)); |
296 | toku_mempool_realloc_larger(&m_buffer_mempool, toku_mempool_get_used_size(&m_buffer_mempool)); |
297 | } |
298 | } |
299 | |
300 | uint64_t bn_data::get_memory_size() { |
301 | uint64_t retval = 0; |
302 | //TODO: Maybe ask for memory_size instead of mempool_footprint (either this todo or the next) |
303 | // include fragmentation overhead but do not include space in the |
304 | // mempool that has not yet been allocated for leaf entries |
305 | size_t poolsize = toku_mempool_footprint(&m_buffer_mempool); |
306 | retval += poolsize; |
307 | // This one includes not-yet-allocated for nodes (just like old constant-key omt) |
308 | //TODO: Maybe ask for mempool_footprint instead of memory_size. |
309 | retval += m_buffer.memory_size(); |
310 | invariant(retval >= get_disk_size()); |
311 | return retval; |
312 | } |
313 | |
314 | void bn_data::delete_leafentry ( |
315 | uint32_t idx, |
316 | uint32_t keylen, |
317 | uint32_t old_le_size |
318 | ) |
319 | { |
320 | remove_key(keylen); |
321 | m_buffer.delete_at(idx); |
322 | toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size); |
323 | } |
324 | |
325 | /* mempool support */ |
326 | |
327 | struct dmt_compressor_state { |
328 | struct mempool *new_kvspace; |
329 | class bn_data *bd; |
330 | }; |
331 | |
332 | static int move_it (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct dmt_compressor_state * const oc) { |
333 | LEAFENTRY old_le = oc->bd->get_le_from_klpair(klpair); |
334 | uint32_t size = leafentry_memsize(old_le); |
335 | void* newdata = toku_mempool_malloc(oc->new_kvspace, size); |
336 | paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen |
337 | memcpy(newdata, old_le, size); |
338 | klpair->le_offset = toku_mempool_get_offset_from_pointer_and_base(oc->new_kvspace, newdata); |
339 | return 0; |
340 | } |
341 | |
342 | // Compress things, and grow or shrink the mempool if needed. |
343 | // May (always if force_compress) have a side effect of putting contents of mempool in sorted order. |
344 | void bn_data::dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress) { |
345 | uint32_t total_size_needed = toku_mempool_get_used_size(&m_buffer_mempool) + added_size; |
346 | |
347 | // If there is no fragmentation, e.g. in serial inserts, we can just increase the size |
348 | // of the mempool and move things over with a cheap memcpy. If force_compress is true, |
349 | // the caller needs the side effect that all contents are put in sorted order. |
350 | bool do_compress = toku_mempool_get_frag_size(&m_buffer_mempool) > 0 || force_compress; |
351 | |
352 | void *old_mempool_base = toku_mempool_get_base(&m_buffer_mempool); |
353 | struct mempool new_kvspace; |
354 | if (do_compress) { |
355 | size_t requested_size = force_compress ? total_size_needed : ((total_size_needed * 3) / 2); |
356 | toku_mempool_construct(&new_kvspace, requested_size); |
357 | struct dmt_compressor_state oc = { &new_kvspace, this }; |
358 | m_buffer.iterate_ptr< decltype(oc), move_it >(&oc); |
359 | } else { |
360 | toku_mempool_construct(&new_kvspace, total_size_needed); |
361 | size_t old_offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool); |
362 | void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit); |
363 | memcpy(new_mempool_base, old_mempool_base, old_offset_limit); |
364 | } |
365 | |
366 | if (maybe_free) { |
367 | *maybe_free = old_mempool_base; |
368 | } else { |
369 | toku_free(old_mempool_base); |
370 | } |
371 | m_buffer_mempool = new_kvspace; |
372 | } |
373 | |
374 | // Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items |
375 | // from the OMT (which items refer to items in the old mempool) into the new mempool. |
376 | // If MAYBE_FREE is nullptr then free the old mempool's space. |
377 | // Otherwise, store the old mempool's space in maybe_free. |
378 | LEAFENTRY bn_data::mempool_malloc_and_update_dmt(size_t size, void **maybe_free) { |
379 | void *v = toku_mempool_malloc(&m_buffer_mempool, size); |
380 | if (v == nullptr) { |
381 | dmt_compress_kvspace(size, maybe_free, false); |
382 | v = toku_mempool_malloc(&m_buffer_mempool, size); |
383 | paranoid_invariant_notnull(v); |
384 | } |
385 | return (LEAFENTRY)v; |
386 | } |
387 | |
388 | void bn_data::get_space_for_overwrite( |
389 | uint32_t idx, |
390 | const void* keyp UU(), |
391 | uint32_t keylen UU(), |
392 | uint32_t old_keylen, |
393 | uint32_t old_le_size, |
394 | uint32_t new_size, |
395 | LEAFENTRY* new_le_space, |
396 | void **const maybe_free |
397 | ) |
398 | { |
399 | *maybe_free = nullptr; |
400 | LEAFENTRY new_le = mempool_malloc_and_update_dmt(new_size, maybe_free); |
401 | toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size); |
402 | klpair_struct* klp = nullptr; |
403 | uint32_t klpair_len; |
404 | int r = m_buffer.fetch(idx, &klpair_len, &klp); |
405 | invariant_zero(r); |
406 | paranoid_invariant(klp!=nullptr); |
407 | // Old key length should be consistent with what is stored in the DMT |
408 | invariant(keylen_from_klpair_len(klpair_len) == old_keylen); |
409 | |
410 | size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le); |
411 | paranoid_invariant(new_le_offset <= UINT32_MAX - new_size); // Not using > 4GB |
412 | klp->le_offset = new_le_offset; |
413 | |
414 | paranoid_invariant(new_le == get_le_from_klpair(klp)); |
415 | *new_le_space = new_le; |
416 | } |
417 | |
418 | void bn_data::get_space_for_insert( |
419 | uint32_t idx, |
420 | const void* keyp, |
421 | uint32_t keylen, |
422 | size_t size, |
423 | LEAFENTRY* new_le_space, |
424 | void **const maybe_free |
425 | ) |
426 | { |
427 | add_key(keylen); |
428 | |
429 | *maybe_free = nullptr; |
430 | LEAFENTRY new_le = mempool_malloc_and_update_dmt(size, maybe_free); |
431 | size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le); |
432 | |
433 | klpair_dmtwriter kl(keylen, new_le_offset, keyp); |
434 | m_buffer.insert_at(kl, idx); |
435 | |
436 | *new_le_space = new_le; |
437 | } |
438 | |
439 | class { |
440 | bn_data *const ; |
441 | bn_data *const ; |
442 | klpair_dmt_t::builder *const ; |
443 | klpair_dmt_t::builder *const ; |
444 | struct mempool *const ; |
445 | uint32_t ; |
446 | |
447 | struct mempool *(void) const { return m_left_dest_mp; } |
448 | struct mempool *(void) const { return &m_right_bn->m_buffer_mempool; } |
449 | |
450 | void (const uint32_t klpair_len, const klpair_struct &klpair, |
451 | klpair_dmt_t::builder *const builder, |
452 | struct mempool *const dest_mp, |
453 | bn_data *const bn) { |
454 | LEAFENTRY old_le = m_left_bn->get_le_from_klpair(&klpair); |
455 | size_t le_size = leafentry_memsize(old_le); |
456 | |
457 | void *new_le = toku_mempool_malloc(dest_mp, le_size); |
458 | paranoid_invariant_notnull(new_le); |
459 | memcpy(new_le, old_le, le_size); |
460 | size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(dest_mp, new_le); |
461 | size_t keylen = keylen_from_klpair_len(klpair_len); |
462 | builder->append(klpair_dmtwriter(keylen, le_offset, klpair.key)); |
463 | |
464 | bn->add_key(keylen); |
465 | } |
466 | |
467 | int (const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx) { |
468 | m_left_bn->remove_key(keylen_from_klpair_len(klpair_len)); |
469 | |
470 | if (idx < m_split_at) { |
471 | copy_klpair(klpair_len, klpair, m_left_builder, left_dest_mp(), m_left_bn); |
472 | } else { |
473 | copy_klpair(klpair_len, klpair, m_right_builder, right_dest_mp(), m_right_bn); |
474 | } |
475 | return 0; |
476 | } |
477 | |
478 | public: |
479 | (bn_data *const left_bn, bn_data *const right_bn, |
480 | klpair_dmt_t::builder *const left_builder, |
481 | klpair_dmt_t::builder *const right_builder, |
482 | struct mempool *const left_new_mp, |
483 | uint32_t split_at) |
484 | : m_left_bn(left_bn), |
485 | m_right_bn(right_bn), |
486 | m_left_builder(left_builder), |
487 | m_right_builder(right_builder), |
488 | m_left_dest_mp(left_new_mp), |
489 | m_split_at(split_at) {} |
490 | static int (const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, split_klpairs_extra *const thisp) { |
491 | return thisp->move_leafentry(klpair_len, klpair, idx); |
492 | } |
493 | }; |
494 | |
495 | void bn_data::split_klpairs( |
496 | bn_data* right_bd, |
497 | uint32_t split_at //lower bound inclusive for right_bd |
498 | ) |
499 | { |
500 | // We use move_leafentries_to during a split, and the split algorithm should never call this |
501 | // if it's splitting on a boundary, so there must be some leafentries in the range to move. |
502 | paranoid_invariant(split_at < num_klpairs()); |
503 | |
504 | right_bd->init_zero(); |
505 | |
506 | size_t mpsize = toku_mempool_get_used_size(&m_buffer_mempool); // overkill, but safe |
507 | |
508 | struct mempool new_left_mp; |
509 | toku_mempool_construct(&new_left_mp, mpsize); |
510 | |
511 | struct mempool *right_mp = &right_bd->m_buffer_mempool; |
512 | toku_mempool_construct(right_mp, mpsize); |
513 | |
514 | klpair_dmt_t::builder left_dmt_builder; |
515 | left_dmt_builder.create(split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end) |
516 | |
517 | klpair_dmt_t::builder right_dmt_builder; |
518 | right_dmt_builder.create(num_klpairs() - split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end) |
519 | |
520 | split_klpairs_extra (this, right_bd, &left_dmt_builder, &right_dmt_builder, &new_left_mp, split_at); |
521 | |
522 | int r = m_buffer.iterate<split_klpairs_extra, split_klpairs_extra::cb>(&extra); |
523 | invariant_zero(r); |
524 | |
525 | m_buffer.destroy(); |
526 | toku_mempool_destroy(&m_buffer_mempool); |
527 | |
528 | m_buffer_mempool = new_left_mp; |
529 | |
530 | left_dmt_builder.build(&m_buffer); |
531 | right_dmt_builder.build(&right_bd->m_buffer); |
532 | |
533 | // Potentially shrink memory pool for destination. |
534 | // We overallocated ("overkill") above |
535 | struct mempool *const left_mp = &m_buffer_mempool; |
536 | paranoid_invariant_zero(toku_mempool_get_frag_size(left_mp)); |
537 | toku_mempool_realloc_larger(left_mp, toku_mempool_get_used_size(left_mp)); |
538 | paranoid_invariant_zero(toku_mempool_get_frag_size(right_mp)); |
539 | toku_mempool_realloc_larger(right_mp, toku_mempool_get_used_size(right_mp)); |
540 | } |
541 | |
542 | uint64_t bn_data::get_disk_size() { |
543 | return m_disksize_of_keys + |
544 | toku_mempool_get_used_size(&m_buffer_mempool); |
545 | } |
546 | |
547 | struct verify_le_in_mempool_state { |
548 | size_t offset_limit; |
549 | class bn_data *bd; |
550 | }; |
551 | |
552 | static int verify_le_in_mempool (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct verify_le_in_mempool_state * const state) { |
553 | invariant(klpair->le_offset < state->offset_limit); |
554 | |
555 | LEAFENTRY le = state->bd->get_le_from_klpair(klpair); |
556 | uint32_t size = leafentry_memsize(le); |
557 | |
558 | size_t end_offset = klpair->le_offset+size; |
559 | |
560 | invariant(end_offset <= state->offset_limit); |
561 | return 0; |
562 | } |
563 | |
564 | //This is a debug-only (paranoid) verification. |
565 | //Verifies the dmt is valid, and all leafentries are entirely in the mempool's memory. |
566 | void bn_data::verify_mempool(void) { |
567 | //Verify the dmt itself <- paranoid and slow |
568 | m_buffer.verify(); |
569 | |
570 | verify_le_in_mempool_state state = { .offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool), .bd = this }; |
571 | //Verify every leafentry pointed to by the keys in the dmt are fully inside the mempool |
572 | m_buffer.iterate_ptr< decltype(state), verify_le_in_mempool >(&state); |
573 | } |
574 | |
575 | uint32_t bn_data::num_klpairs(void) const { |
576 | return m_buffer.size(); |
577 | } |
578 | |
579 | void bn_data::destroy(void) { |
580 | // The buffer may have been freed already, in some cases. |
581 | m_buffer.destroy(); |
582 | toku_mempool_destroy(&m_buffer_mempool); |
583 | m_disksize_of_keys = 0; |
584 | } |
585 | |
586 | void bn_data::set_contents_as_clone_of_sorted_array( |
587 | uint32_t num_les, |
588 | const void** old_key_ptrs, |
589 | uint32_t* old_keylens, |
590 | LEAFENTRY* old_les, |
591 | size_t *le_sizes, |
592 | size_t total_key_size, |
593 | size_t total_le_size |
594 | ) |
595 | { |
596 | //Enforce "just created" invariant. |
597 | paranoid_invariant_zero(m_disksize_of_keys); |
598 | paranoid_invariant_zero(num_klpairs()); |
599 | paranoid_invariant_null(toku_mempool_get_base(&m_buffer_mempool)); |
600 | paranoid_invariant_zero(toku_mempool_get_size(&m_buffer_mempool)); |
601 | |
602 | toku_mempool_construct(&m_buffer_mempool, total_le_size); |
603 | m_buffer.destroy(); |
604 | m_disksize_of_keys = 0; |
605 | |
606 | klpair_dmt_t::builder dmt_builder; |
607 | dmt_builder.create(num_les, total_key_size); |
608 | |
609 | for (uint32_t idx = 0; idx < num_les; idx++) { |
610 | void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx]); |
611 | paranoid_invariant_notnull(new_le); |
612 | memcpy(new_le, old_les[idx], le_sizes[idx]); |
613 | size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(&m_buffer_mempool, new_le); |
614 | dmt_builder.append(klpair_dmtwriter(old_keylens[idx], le_offset, old_key_ptrs[idx])); |
615 | add_key(old_keylens[idx]); |
616 | } |
617 | dmt_builder.build(&this->m_buffer); |
618 | } |
619 | |
620 | LEAFENTRY bn_data::get_le_from_klpair(const klpair_struct *klpair) const { |
621 | void * ptr = toku_mempool_get_pointer_from_base_and_offset(&this->m_buffer_mempool, klpair->le_offset); |
622 | LEAFENTRY CAST_FROM_VOIDP(le, ptr); |
623 | return le; |
624 | } |
625 | |
626 | |
627 | // get info about a single leafentry by index |
628 | int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) { |
629 | klpair_struct* klpair = nullptr; |
630 | int r = m_buffer.fetch(idx, nullptr, &klpair); |
631 | if (r == 0) { |
632 | *le = get_le_from_klpair(klpair); |
633 | } |
634 | return r; |
635 | } |
636 | |
637 | int bn_data::fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key) { |
638 | klpair_struct* klpair = nullptr; |
639 | uint32_t klpair_len; |
640 | int r = m_buffer.fetch(idx, &klpair_len, &klpair); |
641 | if (r == 0) { |
642 | *len = keylen_from_klpair_len(klpair_len); |
643 | *key = klpair->key; |
644 | *le = get_le_from_klpair(klpair); |
645 | } |
646 | return r; |
647 | } |
648 | |
649 | int bn_data::fetch_klpair_disksize(uint32_t idx, size_t *size) { |
650 | klpair_struct* klpair = nullptr; |
651 | uint32_t klpair_len; |
652 | int r = m_buffer.fetch(idx, &klpair_len, &klpair); |
653 | if (r == 0) { |
654 | *size = klpair_disksize(klpair_len, klpair); |
655 | } |
656 | return r; |
657 | } |
658 | |
659 | int bn_data::fetch_key_and_len(uint32_t idx, uint32_t *len, void** key) { |
660 | klpair_struct* klpair = nullptr; |
661 | uint32_t klpair_len; |
662 | int r = m_buffer.fetch(idx, &klpair_len, &klpair); |
663 | if (r == 0) { |
664 | *len = keylen_from_klpair_len(klpair_len); |
665 | *key = klpair->key; |
666 | } |
667 | return r; |
668 | } |
669 | |
670 | void bn_data::clone(bn_data* orig_bn_data) { |
671 | toku_mempool_clone(&orig_bn_data->m_buffer_mempool, &m_buffer_mempool); |
672 | m_buffer.clone(orig_bn_data->m_buffer); |
673 | this->m_disksize_of_keys = orig_bn_data->m_disksize_of_keys; |
674 | } |
675 | |
676 | |