1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#include <my_global.h>
40#include "ft/ft.h"
41#include "ft/ft-cachetable-wrappers.h"
42#include "ft/ft-flusher.h"
43#include "ft/ft-flusher-internal.h"
44#include "ft/ft-internal.h"
45#include "ft/node.h"
46#include "portability/toku_atomic.h"
47#include "util/context.h"
48#include "util/status.h"
49
50// Member Descirption:
51// 1. highest_pivot_key - this is the key that corresponds to the
52// most recently flushed leaf entry.
53// 2. max_current_key - this is the pivot/key that we inherit as
54// we descend down the tree. We use this to set the highest_pivot_key.
55// 3. sub_tree_size - this is the percentage of the entire tree that our
56// current position (in a sub-tree) encompasses.
57// 4. percentage_done - this is the percentage of leaf nodes that have
58// been flushed into.
59// 5. rightmost_leaf_seen - this is a boolean we use to determine if
60// if we have flushed to every leaf node.
61struct hot_flusher_extra {
62 DBT highest_pivot_key;
63 DBT max_current_key;
64 float sub_tree_size;
65 float percentage_done;
66 bool rightmost_leaf_seen;
67};
68
69void
70toku_ft_hot_get_status(FT_HOT_STATUS s) {
71 hot_status.init();
72 *s = hot_status;
73}
74
75// Copies the max current key to the highest pivot key seen.
76static void
77hot_set_highest_key(struct hot_flusher_extra *flusher)
78{
79 // The max current key will be NULL if we are traversing in the
80 // rightmost subtree of a given parent. As such, we don't want to
81 // allocate memory for this case.
82 toku_destroy_dbt(&flusher->highest_pivot_key);
83 if (flusher->max_current_key.data != NULL) {
84 // Otherwise, let's copy all the contents from one key to the other.
85 toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
86 }
87}
88
89static void
90hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
91{
92 toku_destroy_dbt(&flusher->highest_pivot_key);
93 if (start != NULL) {
94 // Otherwise, let's copy all the contents from one key to the other.
95 toku_clone_dbt(&flusher->highest_pivot_key, *start);
96 }
97}
98
99static int
100hot_just_pick_child(FT ft,
101 FTNODE parent,
102 struct hot_flusher_extra *flusher)
103{
104 int childnum = 0;
105
106 // Search through Parents pivots, see which one is greater than
107 // the highest_pivot_key seen so far.
108 if (flusher->highest_pivot_key.data == NULL)
109 {
110 // Special case of the first child of the root node.
111 // Also known as, NEGATIVE INFINITY....
112 childnum = 0;
113 } else {
114 // Find the pivot boundary.
115 childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
116 }
117
118 return childnum;
119}
120
121static void
122hot_update_flusher_keys(FTNODE parent,
123 int childnum,
124 struct hot_flusher_extra *flusher)
125{
126 // Update maximum current key if the child is NOT the rightmost
127 // child node.
128 if (childnum < (parent->n_children - 1)) {
129 toku_destroy_dbt(&flusher->max_current_key);
130 toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
131 }
132}
133
134// Picks which child toku_ft_flush_some_child will use for flushing and
135// recursion.
136static int
137hot_pick_child(FT ft,
138 FTNODE parent,
139 void *extra)
140{
141 struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
142 int childnum = hot_just_pick_child(ft, parent, flusher);
143
144 // Now we determine the percentage of the tree flushed so far.
145
146 // Whichever subtree we choose to recurse into, it is a fraction
147 // of the current parent.
148 flusher->sub_tree_size /= parent->n_children;
149
150 // Update the precentage complete, using our new sub tree size AND
151 // the number of children we have already flushed.
152 flusher->percentage_done += (flusher->sub_tree_size * childnum);
153
154 hot_update_flusher_keys(parent, childnum, flusher);
155
156 return childnum;
157}
158
159// Does nothing for now.
160static void
161hot_update_status(FTNODE UU(child),
162 int UU(dirtied),
163 void *UU(extra))
164{
165 return;
166}
167
168// If we've just split a node, HOT needs another chance to decide which
169// one to flush into. This gives it a chance to do that, and update the
170// keys it maintains.
171static int
172hot_pick_child_after_split(FT ft,
173 FTNODE parent,
174 int childnuma,
175 int childnumb,
176 void *extra)
177{
178 struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
179 int childnum = hot_just_pick_child(ft, parent, flusher);
180 assert(childnum == childnuma || childnum == childnumb);
181 hot_update_flusher_keys(parent, childnum, flusher);
182 if (parent->height == 1) {
183 // We don't want to recurse into a leaf node, but if we return
184 // anything valid, ft_split_child will try to go there, so we
185 // return -1 to allow ft_split_child to have its default
186 // behavior, which will be to stop recursing.
187 childnum = -1;
188 }
189 return childnum;
190}
191
192// Basic constructor/initializer for the hot flusher struct.
193static void
194hot_flusher_init(struct flusher_advice *advice,
195 struct hot_flusher_extra *flusher)
196{
197 // Initialize the highest pivot key seen to NULL. This represents
198 // NEGATIVE INFINITY and is used to cover the special case of our
199 // first traversal of the tree.
200 toku_init_dbt(&(flusher->highest_pivot_key));
201 toku_init_dbt(&(flusher->max_current_key));
202 flusher->rightmost_leaf_seen = 0;
203 flusher->sub_tree_size = 1.0;
204 flusher->percentage_done = 0.0;
205 flusher_advice_init(advice,
206 hot_pick_child,
207 dont_destroy_basement_nodes,
208 always_recursively_flush,
209 default_merge_child,
210 hot_update_status,
211 hot_pick_child_after_split,
212 flusher
213 );
214}
215
216// Erases any DBT keys we have copied from a traversal.
217static void
218hot_flusher_destroy(struct hot_flusher_extra *flusher)
219{
220 toku_destroy_dbt(&flusher->highest_pivot_key);
221 toku_destroy_dbt(&flusher->max_current_key);
222}
223
224// Entry point for Hot Optimize Table (HOT). Note, this function is
225// not recursive. It iterates over root-to-leaf paths.
226int
227toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
228 int (*progress_callback)(void *extra, float progress),
229 void *progress_extra, uint64_t* loops_run)
230{
231 toku::context flush_ctx(CTX_FLUSH);
232
233 int r = 0;
234 struct hot_flusher_extra flusher;
235 struct flusher_advice advice;
236
237 hot_flusher_init(&advice, &flusher);
238 hot_set_start_key(&flusher, left);
239
240 uint64_t loop_count = 0;
241 MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
242 // start of HOT operation
243 (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), 1);
244
245 toku_ft_note_hot_begin(ft_handle);
246
247 // Higher level logic prevents a dictionary from being deleted or
248 // truncated during a hot optimize operation. Doing so would violate
249 // the hot optimize contract.
250 do {
251 FTNODE root;
252 CACHEKEY root_key;
253 uint32_t fullhash;
254
255 {
256 // Get root node (the first parent of each successive HOT
257 // call.)
258 toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
259 ftnode_fetch_extra bfe;
260 bfe.create_for_full_read(ft_handle->ft);
261 toku_pin_ftnode(ft_handle->ft,
262 (BLOCKNUM) root_key,
263 fullhash,
264 &bfe,
265 PL_WRITE_EXPENSIVE,
266 &root,
267 true);
268 toku_ftnode_assert_fully_in_memory(root);
269 }
270
271 // Prepare HOT diagnostics.
272 if (loop_count == 0) {
273 // The first time through, capture msn from root
274 msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
275 }
276
277 loop_count++;
278
279 if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) {
280 HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
281 }
282
283 // Initialize the maximum current key. We need to do this for
284 // every traversal.
285 toku_destroy_dbt(&flusher.max_current_key);
286
287 flusher.sub_tree_size = 1.0;
288 flusher.percentage_done = 0.0;
289
290 // This should recurse to the bottom of the tree and then
291 // return.
292 if (root->height > 0) {
293 toku_ft_flush_some_child(ft_handle->ft, root, &advice);
294 } else {
295 // Since there are no children to flush, we should abort
296 // the HOT call.
297 flusher.rightmost_leaf_seen = 1;
298 toku_unpin_ftnode(ft_handle->ft, root);
299 }
300
301 // Set the highest pivot key seen here, since the parent may
302 // be unlocked and NULL'd later in our caller:
303 // toku_ft_flush_some_child().
304 hot_set_highest_key(&flusher);
305
306 // This is where we determine if the traversal is finished or
307 // not.
308 if (flusher.max_current_key.data == NULL) {
309 flusher.rightmost_leaf_seen = 1;
310 }
311 else if (right) {
312 // if we have flushed past the bounds set for us,
313 // set rightmost_leaf_seen so we exit
314 int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right);
315 if (cmp > 0) {
316 flusher.rightmost_leaf_seen = 1;
317 }
318 }
319
320 // Update HOT's progress.
321 if (progress_callback != NULL) {
322 r = progress_callback(progress_extra, flusher.percentage_done);
323
324 // Check if the callback wants us to stop running HOT.
325 if (r != 0) {
326 flusher.rightmost_leaf_seen = 1;
327 }
328 }
329
330 // Loop until the max key has been updated to positive
331 // infinity.
332 } while (!flusher.rightmost_leaf_seen);
333 *loops_run = loop_count;
334
335 // Cleanup.
336 hot_flusher_destroy(&flusher);
337
338 // More diagnostics.
339 {
340 bool success = false;
341 if (r == 0) { success = true; }
342
343 {
344 toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot);
345 }
346
347 if (success) {
348 (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), 1);
349 } else {
350 (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), 1);
351 }
352 }
353 return r;
354}
355
356#include <toku_race_tools.h>
357void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void);
358void
359toku_hot_helgrind_ignore(void) {
360 // incremented only while lock is held, but read by engine status asynchronously.
361 TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status);
362}
363