ft-cachetable-wrappers.cc source code [MariaDB/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc]

1	/ -- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -- /
2	// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3	#ident "$Id$"
4	/======*
5	This file is part of PerconaFT.
6
7
8	Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10	PerconaFT is free software: you can redistribute it and/or modify
11	it under the terms of the GNU General Public License, version 2,
12	as published by the Free Software Foundation.
13
14	PerconaFT is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22	----------------------------------------
23
24	PerconaFT is free software: you can redistribute it and/or modify
25	it under the terms of the GNU Affero General Public License, version 3,
26	as published by the Free Software Foundation.
27
28	PerconaFT is distributed in the hope that it will be useful,
29	but WITHOUT ANY WARRANTY; without even the implied warranty of
30	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31	GNU Affero General Public License for more details.
32
33	You should have received a copy of the GNU Affero General Public License
34	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35	======= /*
36
37	#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39	#include <my_global.h>
40	#include "ft/serialize/block_table.h"
41	#include "ft/ft-cachetable-wrappers.h"
42	#include "ft/ft-flusher.h"
43	#include "ft/ft-internal.h"
44	#include "ft/ft.h"
45	#include "ft/node.h"
46
47	#include <util/context.h>
48
49	static void
50	ftnode_get_key_and_fullhash(
51	BLOCKNUM* cachekey,
52	uint32_t* fullhash,
53	void* extra)
54	{
55	FT ft = (FT) extra;
56	BLOCKNUM blocknum;
57	ft->blocktable.allocate_blocknum(&blocknum, ft);
58	*cachekey = blocknum;
59	*fullhash = toku_cachetable_hash(ft->cf, blocknum);
60	}
61
62	void
63	cachetable_put_empty_node_with_dep_nodes(
64	FT ft,
65	uint32_t num_dependent_nodes,
66	FTNODE* dependent_nodes,
67	BLOCKNUM* blocknum, //output
68	uint32_t* fullhash, //output
69	FTNODE* result)
70	{
71	FTNODE XCALLOC(new_node);
72	PAIR dependent_pairs[num_dependent_nodes];
73	enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
74	for (uint32_t i = `0`; i < num_dependent_nodes; i++) {
75	dependent_pairs[i] = dependent_nodes[i]->ct_pair;
76	dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
77	}
78
79	toku_cachetable_put_with_dep_pairs(
80	ft->cf,
81	ftnode_get_key_and_fullhash,
82	new_node,
83	make_pair_attr(sizeof(FTNODE)),
84	get_write_callbacks_for_node(ft),
85	ft,
86	num_dependent_nodes,
87	dependent_pairs,
88	dependent_dirty_bits,
89	blocknum,
90	fullhash,
91	toku_ftnode_save_ct_pair);
92	*result = new_node;
93	}
94
95	void
96	create_new_ftnode_with_dep_nodes(
97	FT ft,
98	FTNODE *result,
99	int height,
100	int n_children,
101	uint32_t num_dependent_nodes,
102	FTNODE* dependent_nodes)
103	{
104	uint32_t fullhash = `0`;
105	BLOCKNUM blocknum;
106
107	cachetable_put_empty_node_with_dep_nodes(
108	ft,
109	num_dependent_nodes,
110	dependent_nodes,
111	&blocknum,
112	&fullhash,
113	result);
114
115	assert(ft->h->basementnodesize > `0`);
116	if (height == `0`) {
117	assert(n_children > `0`);
118	}
119
120	toku_initialize_empty_ftnode(
121	*result,
122	blocknum,
123	height,
124	n_children,
125	ft->h->layout_version,
126	ft->h->flags);
127
128	(*result)->fullhash = fullhash;
129	}
130
131	void
132	toku_create_new_ftnode (
133	FT_HANDLE t,
134	FTNODE *result,
135	int height,
136	int n_children)
137	{
138	return create_new_ftnode_with_dep_nodes(
139	t->ft,
140	result,
141	height,
142	n_children,
143	`0`,
144	NULL);
145	}
146
147	//
148	// On success, this function assumes that the caller is trying to pin the node
149	// with a PL_READ lock. If message application is needed,
150	// then a PL_WRITE_CHEAP lock is grabbed
151	//
152	int
153	toku_pin_ftnode_for_query(
154	FT_HANDLE ft_handle,
155	BLOCKNUM blocknum,
156	uint32_t fullhash,
157	UNLOCKERS unlockers,
158	ANCESTORS ancestors,
159	const pivot_bounds &bounds,
160	ftnode_fetch_extra *bfe,
161	bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
162	FTNODE *node_p,
163	bool* msgs_applied)
164	{
165	void *node_v;
166	msgs_applied = false*;
167	FTNODE node = nullptr;
168	MSN max_msn_in_path = ZERO_MSN;
169	bool needs_ancestors_messages = false;
170	// this function assumes that if you want ancestor messages applied,
171	// you are doing a read for a query. This is so we can make some optimizations
172	// below.
173	if (apply_ancestor_messages) {
174	paranoid_invariant(bfe->type == ftnode_fetch_subset);
175	}
176
177	int r = toku_cachetable_get_and_pin_nonblocking(
178	ft_handle->ft->cf,
179	blocknum,
180	fullhash,
181	&node_v,
182	NULL,
183	get_write_callbacks_for_node(ft_handle->ft),
184	toku_ftnode_fetch_callback,
185	toku_ftnode_pf_req_callback,
186	toku_ftnode_pf_callback,
187	PL_READ,
188	bfe, //read_extraargs
189	unlockers);
190	if (r != `0`) {
191	assert(r == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
192	goto exit;
193	}
194	node = static_cast<FTNODE>(node_v);
195	if (apply_ancestor_messages && node->height == `0`) {
196	needs_ancestors_messages = toku_ft_leaf_needs_ancestors_messages(
197	ft_handle->ft,
198	node,
199	ancestors,
200	bounds,
201	&max_msn_in_path,
202	bfe->child_to_read
203	);
204	if (needs_ancestors_messages) {
205	toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
206
207	toku_unpin_ftnode_read_only(ft_handle->ft, node);
208	int rr = toku_cachetable_get_and_pin_nonblocking(
209	ft_handle->ft->cf,
210	blocknum,
211	fullhash,
212	&node_v,
213	NULL,
214	get_write_callbacks_for_node(ft_handle->ft),
215	toku_ftnode_fetch_callback,
216	toku_ftnode_pf_req_callback,
217	toku_ftnode_pf_callback,
218	PL_WRITE_CHEAP,
219	bfe, //read_extraargs
220	unlockers);
221	if (rr != `0`) {
222	assert(rr == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
223	r = TOKUDB_TRY_AGAIN;
224	goto exit;
225	}
226	node = static_cast<FTNODE>(node_v);
227	toku_apply_ancestors_messages_to_node(
228	ft_handle,
229	node,
230	ancestors,
231	bounds,
232	msgs_applied,
233	bfe->child_to_read
234	);
235	} else {
236	// At this point, we aren't going to run
237	// toku_apply_ancestors_messages_to_node but that doesn't
238	// mean max_msn_applied shouldn't be updated if possible
239	// (this saves the CPU work involved in
240	// toku_ft_leaf_needs_ancestors_messages).
241	//
242	// We still have a read lock, so we have not resolved
243	// checkpointing. If the node is pending and dirty, we
244	// can't modify anything, including max_msn, until we
245	// resolve checkpointing. If we do, the node might get
246	// written out that way as part of a checkpoint with a
247	// root that was already written out with a smaller
248	// max_msn. During recovery, we would then inject a
249	// message based on the root's max_msn, and that message
250	// would get filtered by the leaf because it had too high
251	// a max_msn value. (see #5407)
252	//
253	// So for simplicity we only update the max_msn if the
254	// node is clean. That way, in order for the node to get
255	// written out, it would have to be dirtied. That
256	// requires a write lock, and a write lock requires you to
257	// resolve checkpointing.
258	if (!node->dirty) {
259	toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read);
260	}
261	}
262	}
263	*node_p = node;
264	exit:
265	return r;
266	}
267
268	void
269	toku_pin_ftnode_with_dep_nodes(
270	FT ft,
271	BLOCKNUM blocknum,
272	uint32_t fullhash,
273	ftnode_fetch_extra *bfe,
274	pair_lock_type lock_type,
275	uint32_t num_dependent_nodes,
276	FTNODE *dependent_nodes,
277	FTNODE *node_p,
278	bool move_messages)
279	{
280	void *node_v;
281	PAIR dependent_pairs[num_dependent_nodes];
282	enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
283	for (uint32_t i = `0`; i < num_dependent_nodes; i++) {
284	dependent_pairs[i] = dependent_nodes[i]->ct_pair;
285	dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty;
286	}
287
288	int r = toku_cachetable_get_and_pin_with_dep_pairs(
289	ft->cf,
290	blocknum,
291	fullhash,
292	&node_v,
293	NULL,
294	get_write_callbacks_for_node(ft),
295	toku_ftnode_fetch_callback,
296	toku_ftnode_pf_req_callback,
297	toku_ftnode_pf_callback,
298	lock_type,
299	bfe,
300	num_dependent_nodes,
301	dependent_pairs,
302	dependent_dirty_bits
303	);
304	invariant_zero(r);
305	FTNODE node = (FTNODE) node_v;
306	if (lock_type != PL_READ && node->height > `0` && move_messages) {
307	toku_move_ftnode_messages_to_stale(ft, node);
308	}
309	*node_p = node;
310	}
311
312	void toku_pin_ftnode(FT ft,
313	BLOCKNUM blocknum,
314	uint32_t fullhash,
315	ftnode_fetch_extra *bfe,
316	pair_lock_type lock_type,
317	FTNODE *node_p,
318	bool move_messages) {
319	toku_pin_ftnode_with_dep_nodes(ft, blocknum, fullhash, bfe, lock_type, `0`, nullptr, node_p, move_messages);
320	}
321
322	int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep) {
323	void *node_v;
324	int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, lock_type, &node_v);
325	if (r != `0`) {
326	goto cleanup;
327	}
328	CAST_FROM_VOIDP(*nodep, node_v);
329	if ((*nodep)->height > `0` && lock_type != PL_READ) {
330	toku_move_ftnode_messages_to_stale(ft, *nodep);
331	}
332	cleanup:
333	return r;
334	}
335
336	void toku_unpin_ftnode(FT ft, FTNODE node) {
337	int r = toku_cachetable_unpin(ft->cf,
338	node->ct_pair,
339	static_cast<enum cachetable_dirty>(node->dirty),
340	make_ftnode_pair_attr(node));
341	invariant_zero(r);
342	}
343
344	void
345	toku_unpin_ftnode_read_only(FT ft, FTNODE node)
346	{
347	int r = toku_cachetable_unpin(
348	ft->cf,
349	node->ct_pair,
350	(enum cachetable_dirty) node->dirty,
351	make_invalid_pair_attr()
352	);
353	assert(r==`0`);
354	}
355
356	void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b)
357	// Effect: Swap the blocknum, fullhash, and PAIR for for a and b
358	// Requires: Both nodes are pinned
359	{
360	BLOCKNUM tmp_blocknum = a->blocknum;
361	uint32_t tmp_fullhash = a->fullhash;
362	PAIR tmp_pair = a->ct_pair;
363
364	a->blocknum = b->blocknum;
365	a->fullhash = b->fullhash;
366	a->ct_pair = b->ct_pair;
367
368	b->blocknum = tmp_blocknum;
369	b->fullhash = tmp_fullhash;
370	b->ct_pair = tmp_pair;
371
372	// A and B swapped pair pointers, but we still have to swap
373	// the actual pair values (ie: the FTNODEs they represent)
374	// in the cachetable.
375	toku_cachetable_swap_pair_values(a->ct_pair, b->ct_pair);
376	}
377

Browse the source code of MariaDB/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc