ft-hot-flusher.cc source code [MariaDB/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc]

1	/ -- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -- /
2	// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3	#ident "$Id$"
4	/======*
5	This file is part of PerconaFT.
6
7
8	Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10	PerconaFT is free software: you can redistribute it and/or modify
11	it under the terms of the GNU General Public License, version 2,
12	as published by the Free Software Foundation.
13
14	PerconaFT is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22	----------------------------------------
23
24	PerconaFT is free software: you can redistribute it and/or modify
25	it under the terms of the GNU Affero General Public License, version 3,
26	as published by the Free Software Foundation.
27
28	PerconaFT is distributed in the hope that it will be useful,
29	but WITHOUT ANY WARRANTY; without even the implied warranty of
30	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31	GNU Affero General Public License for more details.
32
33	You should have received a copy of the GNU Affero General Public License
34	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35	======= /*
36
37	#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39	#include <my_global.h>
40	#include "ft/ft.h"
41	#include "ft/ft-cachetable-wrappers.h"
42	#include "ft/ft-flusher.h"
43	#include "ft/ft-flusher-internal.h"
44	#include "ft/ft-internal.h"
45	#include "ft/node.h"
46	#include "portability/toku_atomic.h"
47	#include "util/context.h"
48	#include "util/status.h"
49
50	// Member Descirption:
51	// 1. highest_pivot_key - this is the key that corresponds to the
52	// most recently flushed leaf entry.
53	// 2. max_current_key - this is the pivot/key that we inherit as
54	// we descend down the tree. We use this to set the highest_pivot_key.
55	// 3. sub_tree_size - this is the percentage of the entire tree that our
56	// current position (in a sub-tree) encompasses.
57	// 4. percentage_done - this is the percentage of leaf nodes that have
58	// been flushed into.
59	// 5. rightmost_leaf_seen - this is a boolean we use to determine if
60	// if we have flushed to every leaf node.
61	struct hot_flusher_extra {
62	DBT highest_pivot_key;
63	DBT max_current_key;
64	float sub_tree_size;
65	float percentage_done;
66	bool rightmost_leaf_seen;
67	};
68
69	void
70	toku_ft_hot_get_status(FT_HOT_STATUS s) {
71	hot_status.init();
72	*s = hot_status;
73	}
74
75	// Copies the max current key to the highest pivot key seen.
76	static void
77	hot_set_highest_key(struct hot_flusher_extra *flusher)
78	{
79	// The max current key will be NULL if we are traversing in the
80	// rightmost subtree of a given parent. As such, we don't want to
81	// allocate memory for this case.
82	toku_destroy_dbt(&flusher->highest_pivot_key);
83	if (flusher->max_current_key.data != NULL) {
84	// Otherwise, let's copy all the contents from one key to the other.
85	toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
86	}
87	}
88
89	static void
90	hot_set_start_key(struct hot_flusher_extra flusher, const* DBT* start)
91	{
92	toku_destroy_dbt(&flusher->highest_pivot_key);
93	if (start != NULL) {
94	// Otherwise, let's copy all the contents from one key to the other.
95	toku_clone_dbt(&flusher->highest_pivot_key, *start);
96	}
97	}
98
99	static int
100	hot_just_pick_child(FT ft,
101	FTNODE parent,
102	struct hot_flusher_extra *flusher)
103	{
104	int childnum = `0`;
105
106	// Search through Parents pivots, see which one is greater than
107	// the highest_pivot_key seen so far.
108	if (flusher->highest_pivot_key.data == NULL)
109	{
110	// Special case of the first child of the root node.
111	// Also known as, NEGATIVE INFINITY....
112	childnum = `0`;
113	} else {
114	// Find the pivot boundary.
115	childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
116	}
117
118	return childnum;
119	}
120
121	static void
122	hot_update_flusher_keys(FTNODE parent,
123	int childnum,
124	struct hot_flusher_extra *flusher)
125	{
126	// Update maximum current key if the child is NOT the rightmost
127	// child node.
128	if (childnum < (parent->n_children - `1`)) {
129	toku_destroy_dbt(&flusher->max_current_key);
130	toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
131	}
132	}
133
134	// Picks which child toku_ft_flush_some_child will use for flushing and
135	// recursion.
136	static int
137	hot_pick_child(FT ft,
138	FTNODE parent,
139	void *extra)
140	{
141	struct hot_flusher_extra flusher = (struct* hot_flusher_extra *) extra;
142	int childnum = hot_just_pick_child(ft, parent, flusher);
143
144	// Now we determine the percentage of the tree flushed so far.
145
146	// Whichever subtree we choose to recurse into, it is a fraction
147	// of the current parent.
148	flusher->sub_tree_size /= parent->n_children;
149
150	// Update the precentage complete, using our new sub tree size AND
151	// the number of children we have already flushed.
152	flusher->percentage_done += (flusher->sub_tree_size * childnum);
153
154	hot_update_flusher_keys(parent, childnum, flusher);
155
156	return childnum;
157	}
158
159	// Does nothing for now.
160	static void
161	hot_update_status(FTNODE UU(child),
162	int UU(dirtied),
163	void *UU(extra))
164	{
165	return;
166	}
167
168	// If we've just split a node, HOT needs another chance to decide which
169	// one to flush into. This gives it a chance to do that, and update the
170	// keys it maintains.
171	static int
172	hot_pick_child_after_split(FT ft,
173	FTNODE parent,
174	int childnuma,
175	int childnumb,
176	void *extra)
177	{
178	struct hot_flusher_extra flusher = (struct* hot_flusher_extra *) extra;
179	int childnum = hot_just_pick_child(ft, parent, flusher);
180	assert(childnum == childnuma \|\| childnum == childnumb);
181	hot_update_flusher_keys(parent, childnum, flusher);
182	if (parent->height == `1`) {
183	// We don't want to recurse into a leaf node, but if we return
184	// anything valid, ft_split_child will try to go there, so we
185	// return -1 to allow ft_split_child to have its default
186	// behavior, which will be to stop recursing.
187	childnum = -`1`;
188	}
189	return childnum;
190	}
191
192	// Basic constructor/initializer for the hot flusher struct.
193	static void
194	hot_flusher_init(struct flusher_advice *advice,
195	struct hot_flusher_extra *flusher)
196	{
197	// Initialize the highest pivot key seen to NULL. This represents
198	// NEGATIVE INFINITY and is used to cover the special case of our
199	// first traversal of the tree.
200	toku_init_dbt(&(flusher->highest_pivot_key));
201	toku_init_dbt(&(flusher->max_current_key));
202	flusher->rightmost_leaf_seen = `0`;
203	flusher->sub_tree_size = `1.0`;
204	flusher->percentage_done = `0.0`;
205	flusher_advice_init(advice,
206	hot_pick_child,
207	dont_destroy_basement_nodes,
208	always_recursively_flush,
209	default_merge_child,
210	hot_update_status,
211	hot_pick_child_after_split,
212	flusher
213	);
214	}
215
216	// Erases any DBT keys we have copied from a traversal.
217	static void
218	hot_flusher_destroy(struct hot_flusher_extra *flusher)
219	{
220	toku_destroy_dbt(&flusher->highest_pivot_key);
221	toku_destroy_dbt(&flusher->max_current_key);
222	}
223
224	// Entry point for Hot Optimize Table (HOT). Note, this function is
225	// not recursive. It iterates over root-to-leaf paths.
226	int
227	toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
228	int (progress_callback)(void* extra, float* progress),
229	void progress_extra, uint64_t loops_run)
230	{
231	toku::context flush_ctx(CTX_FLUSH);
232
233	int r = `0`;
234	struct hot_flusher_extra flusher;
235	struct flusher_advice advice;
236
237	hot_flusher_init(&advice, &flusher);
238	hot_set_start_key(&flusher, left);
239
240	uint64_t loop_count = `0`;
241	MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
242	// start of HOT operation
243	(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), `1`);
244
245	toku_ft_note_hot_begin(ft_handle);
246
247	// Higher level logic prevents a dictionary from being deleted or
248	// truncated during a hot optimize operation. Doing so would violate
249	// the hot optimize contract.
250	do {
251	FTNODE root;
252	CACHEKEY root_key;
253	uint32_t fullhash;
254
255	{
256	// Get root node (the first parent of each successive HOT
257	// call.)
258	toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
259	ftnode_fetch_extra bfe;
260	bfe.create_for_full_read(ft_handle->ft);
261	toku_pin_ftnode(ft_handle->ft,
262	(BLOCKNUM) root_key,
263	fullhash,
264	&bfe,
265	PL_WRITE_EXPENSIVE,
266	&root,
267	true);
268	toku_ftnode_assert_fully_in_memory(root);
269	}
270
271	// Prepare HOT diagnostics.
272	if (loop_count == `0`) {
273	// The first time through, capture msn from root
274	msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
275	}
276
277	loop_count++;
278
279	if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) {
280	HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
281	}
282
283	// Initialize the maximum current key. We need to do this for
284	// every traversal.
285	toku_destroy_dbt(&flusher.max_current_key);
286
287	flusher.sub_tree_size = `1.0`;
288	flusher.percentage_done = `0.0`;
289
290	// This should recurse to the bottom of the tree and then
291	// return.
292	if (root->height > `0`) {
293	toku_ft_flush_some_child(ft_handle->ft, root, &advice);
294	} else {
295	// Since there are no children to flush, we should abort
296	// the HOT call.
297	flusher.rightmost_leaf_seen = `1`;
298	toku_unpin_ftnode(ft_handle->ft, root);
299	}
300
301	// Set the highest pivot key seen here, since the parent may
302	// be unlocked and NULL'd later in our caller:
303	// toku_ft_flush_some_child().
304	hot_set_highest_key(&flusher);
305
306	// This is where we determine if the traversal is finished or
307	// not.
308	if (flusher.max_current_key.data == NULL) {
309	flusher.rightmost_leaf_seen = `1`;
310	}
311	else if (right) {
312	// if we have flushed past the bounds set for us,
313	// set rightmost_leaf_seen so we exit
314	int cmp = ft_handle->ft->cmp (&flusher.max_current_key, right);
315	if (cmp > `0`) {
316	flusher.rightmost_leaf_seen = `1`;
317	}
318	}
319
320	// Update HOT's progress.
321	if (progress_callback != NULL) {
322	r = progress_callback(progress_extra, flusher.percentage_done);
323
324	// Check if the callback wants us to stop running HOT.
325	if (r != `0`) {
326	flusher.rightmost_leaf_seen = `1`;
327	}
328	}
329
330	// Loop until the max key has been updated to positive
331	// infinity.
332	} while (!flusher.rightmost_leaf_seen);
333	*loops_run = loop_count;
334
335	// Cleanup.
336	hot_flusher_destroy(&flusher);
337
338	// More diagnostics.
339	{
340	bool success = false;
341	if (r == `0`) { success = true; }
342
343	{
344	toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot);
345	}
346
347	if (success) {
348	(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), `1`);
349	} else {
350	(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), `1`);
351	}
352	}
353	return r;
354	}
355
356	#include <toku_race_tools.h>
357	void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void);
358	void
359	toku_hot_helgrind_ignore(void) {
360	// incremented only while lock is held, but read by engine status asynchronously.
361	TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status);
362	}
363

Browse the source code of MariaDB/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc