ydb_row_lock.cc source code [MariaDB/storage/tokudb/PerconaFT/src/ydb_row_lock.cc]

1	/ -- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -- /
2	// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3	#ident "$Id$"
4	/======*
5	This file is part of PerconaFT.
6
7
8	Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10	PerconaFT is free software: you can redistribute it and/or modify
11	it under the terms of the GNU General Public License, version 2,
12	as published by the Free Software Foundation.
13
14	PerconaFT is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22	----------------------------------------
23
24	PerconaFT is free software: you can redistribute it and/or modify
25	it under the terms of the GNU Affero General Public License, version 3,
26	as published by the Free Software Foundation.
27
28	PerconaFT is distributed in the hope that it will be useful,
29	but WITHOUT ANY WARRANTY; without even the implied warranty of
30	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31	GNU Affero General Public License for more details.
32
33	You should have received a copy of the GNU Affero General Public License
34	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35	======= /*
36
37	#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39	#include <db.h>
40
41	#include <locktree/lock_request.h>
42
43	#include "ydb-internal.h"
44	#include "ydb_txn.h"
45	#include "ydb_row_lock.h"
46
47	/*
48	Used for partial implementation of nested transactions.
49	Work is done by children as normal, but all locking is done by the
50	root of the nested txn tree.
51	This may hold extra locks, and will not work as expected when
52	a node has two non-completed txns at any time.
53	*/
54	static DB_TXN txn_oldest_ancester(DB_TXN txn) {
55	while (txn && txn->parent) {
56	txn = txn->parent;
57	}
58	return txn;
59	}
60
61	int find_key_ranges_by_lt(const txn_lt_key_ranges &ranges,
62	const toku::locktree *const &find_lt);
63	int find_key_ranges_by_lt(const txn_lt_key_ranges &ranges,
64	const toku::locktree *const &find_lt) {
65	return ranges.lt->compare(find_lt);
66	}
67
68	static void db_txn_note_row_lock(DB db, DB_TXN txn, const DBT left_key, const* DBT *right_key) {
69	const toku::locktree *lt = db->i->lt;
70
71	toku_mutex_lock(&db_txn_struct_i(txn)->txn_mutex);
72
73	uint32_t idx;
74	txn_lt_key_ranges ranges;
75	toku::omt<txn_lt_key_ranges> *map = &db_txn_struct_i(txn)->lt_map;
76
77	// if this txn has not yet already referenced this
78	// locktree, then add it to this txn's locktree map
79	int r = map->find_zero<const toku::locktree *, find_key_ranges_by_lt>(lt, &ranges, &idx);
80	if (r == DB_NOTFOUND) {
81	ranges.lt = db->i->lt;
82	XMALLOC(ranges.buffer);
83	ranges.buffer->create();
84	map->insert_at(ranges, idx);
85
86	// let the manager know we're referencing this lt
87	toku::locktree_manager *ltm = &txn->mgrp->i->ltm;
88	ltm->reference_lt(ranges.lt);
89	} else {
90	invariant_zero(r);
91	}
92
93	// add a new lock range to this txn's row lock buffer
94	size_t old_mem_size = ranges.buffer->total_memory_size();
95	ranges.buffer->append(left_key, right_key);
96	size_t new_mem_size = ranges.buffer->total_memory_size();
97	invariant(new_mem_size > old_mem_size);
98	lt->get_manager()->note_mem_used(new_mem_size - old_mem_size);
99
100	toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex);
101	}
102
103	void toku_db_txn_escalate_callback(TXNID txnid, const toku::locktree lt, const* toku::range_buffer &buffer, void *extra) {
104	DB_ENV *CAST_FROM_VOIDP(env, extra);
105
106	// Get the TOKUTXN and DB_TXN for this txnid from the environment's txn manager.
107	// Only the parent id is used in the search.
108	TOKUTXN ttxn;
109	TXNID_PAIR txnid_pair = { .parent_id64 = txnid, .child_id64 = `0` };
110	TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
111
112	toku_txn_manager_suspend(txn_manager);
113	toku_txn_manager_id2txn_unlocked(txn_manager, txnid_pair, &ttxn);
114
115	// We are still holding the txn manager lock. If we couldn't find the txn,
116	// then we lost a race with a committing transaction that got removed
117	// from the txn manager before it released its locktree locks. In this
118	// case we do nothing - that transaction has or is just about to release
119	// its locks and be gone, so there's not point in updating its lt_map
120	// with the new escalated ranges. It will go about releasing the old
121	// locks it thinks it had, and will succeed as if nothing happened.
122	//
123	// If we did find the transaction, then it has not yet been removed
124	// from the manager and therefore has not yet released its locks.
125	// We must try to replace the range buffer associated with this locktree,
126	// if it exists. This is impotant, otherwise it can grow out of
127	// control (ticket 5961).
128
129	if (ttxn != nullptr) {
130	DB_TXN *txn = toku_txn_get_container_db_txn(ttxn);
131
132	// One subtle point is that if the transaction is still live, it is impossible
133	// to deadlock on the txn mutex, even though we are holding the locktree's root
134	// mutex and release locks takes them in the opposite order.
135	//
136	// Proof: releasing locks takes the txn mutex and then acquires the locktree's
137	// root mutex, escalation takes the root mutex and possibly takes the txn mutex.
138	// releasing locks implies the txn is not live, and a non-live txn implies we
139	// will not need to take the txn mutex, so the deadlock is avoided.
140	toku_mutex_lock(&db_txn_struct_i(txn)->txn_mutex);
141
142	uint32_t idx;
143	txn_lt_key_ranges ranges;
144	toku::omt<txn_lt_key_ranges> *map = &db_txn_struct_i(txn)->lt_map;
145	int r = map->find_zero<const toku::locktree *, find_key_ranges_by_lt>(lt, &ranges, &idx);
146	if (r == `0`) {
147	// Destroy the old range buffer, create a new one, and insert the new ranges.
148	//
149	// We could theoretically steal the memory from the caller instead of copying
150	// it, but it's simpler to have a callback API that doesn't transfer memory ownership.
151	lt->get_manager()->note_mem_released(ranges.buffer->total_memory_size());
152	ranges.buffer->destroy();
153	ranges.buffer->create();
154	toku::range_buffer::iterator iter(&buffer);
155	toku::range_buffer::iterator::record rec;
156	while (iter.current(&rec)) {
157	ranges.buffer->append(rec.get_left_key(), rec.get_right_key());
158	iter.next();
159	}
160	lt->get_manager()->note_mem_used(ranges.buffer->total_memory_size());
161	} else {
162	// In rare cases, we may not find the associated locktree, because we are
163	// racing with the transaction trying to add this locktree to the lt map
164	// after acquiring its first lock. The escalated lock set must be the single
165	// lock that this txnid just acquired. Do nothing here and let the txn
166	// take care of adding this locktree and range to its lt map as usual.
167	invariant(buffer.get_num_ranges() == `1`);
168	}
169
170	toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex);
171	}
172
173	toku_txn_manager_resume(txn_manager);
174	}
175
176	// Get a range lock.
177	// Return when the range lock is acquired or the default lock tree timeout has expired.
178	int toku_db_get_range_lock(DB db, DB_TXN txn, const DBT left_key, const* DBT *right_key,
179	toku::lock_request::type lock_type) {
180	toku::lock_request request;
181	request.create();
182	int r = toku_db_start_range_lock(db, txn, left_key, right_key, lock_type, &request);
183	if (r == DB_LOCK_NOTGRANTED) {
184	toku_debug_sync(db_txn_struct_i(txn)->tokutxn,
185	"toku_range_lock_before_wait");
186	r = toku_db_wait_range_lock(db, txn, &request);
187	if (r == DB_LOCK_NOTGRANTED)
188	toku_debug_sync(db_txn_struct_i(txn)->tokutxn,
189	"toku_range_lock_not_granted_after_wait");
190	}
191	else if (r == `0`) {
192	toku_debug_sync(db_txn_struct_i(txn)->tokutxn,
193	"toku_range_lock_granted_immediately");
194	}
195
196	request.destroy();
197	return r;
198	}
199
200	// Setup and start an asynchronous lock request.
201	int toku_db_start_range_lock(DB db, DB_TXN txn, const DBT left_key, const* DBT *right_key,
202	toku::lock_request::type lock_type, toku::lock_request *request) {
203	uint64_t client_id;
204	void *client_extra;
205	DB_TXN *txn_anc = txn_oldest_ancester(txn);
206	TXNID txn_anc_id = txn_anc->id64(txn_anc);
207	txn->get_client_id(txn, &client_id, &client_extra);
208	request->set(db->i->lt, txn_anc_id, left_key, right_key, lock_type,
209	toku_is_big_txn(txn_anc), client_extra);
210
211	const int r = request->start();
212	if (r == `0`) {
213	db_txn_note_row_lock(db, txn_anc, left_key, right_key);
214	} else if (r == DB_LOCK_DEADLOCK) {
215	lock_timeout_callback callback = txn->mgrp->i->lock_wait_timeout_callback;
216	if (callback != nullptr) {
217	callback(db, txn_anc_id, left_key, right_key,
218	request->get_conflicting_txnid());
219	}
220	}
221	return r;
222	}
223
224	// Complete a lock request by waiting until the request is ready
225	// and then storing the acquired lock if successful.
226	int toku_db_wait_range_lock(DB db, DB_TXN txn, toku::lock_request *request) {
227	DB_TXN *txn_anc = txn_oldest_ancester(txn);
228	const DBT *left_key = request->get_left_key();
229	const DBT *right_key = request->get_right_key();
230	DB_ENV *env = db->dbenv;
231	uint64_t wait_time_msec = env->i->default_lock_timeout_msec;
232	if (env->i->get_lock_timeout_callback)
233	wait_time_msec = env->i->get_lock_timeout_callback(wait_time_msec);
234	uint64_t killed_time_msec = env->i->default_killed_time_msec;
235	if (env->i->get_killed_time_callback)
236	killed_time_msec = env->i->get_killed_time_callback(killed_time_msec);
237	const int r = request->wait(wait_time_msec, killed_time_msec, env->i->killed_callback,
238	env->i->lock_wait_needed_callback);
239	if (r == `0`) {
240	db_txn_note_row_lock(db, txn_anc, left_key, right_key);
241	} else if (r == DB_LOCK_NOTGRANTED) {
242	lock_timeout_callback callback = txn->mgrp->i->lock_wait_timeout_callback;
243	if (callback != nullptr) {
244	callback(db, txn_anc->id64(txn_anc), left_key, right_key,
245	request->get_conflicting_txnid());
246	}
247	}
248	return r;
249	}
250
251	int toku_db_get_point_write_lock(DB db, DB_TXN txn, const DBT *key) {
252	return toku_db_get_range_lock(db, txn, key, key, toku::lock_request::type::WRITE);
253	}
254
255	// acquire a point write lock on the key for a given txn.
256	// this does not block the calling thread.
257	void toku_db_grab_write_lock (DB db, DBT key, TOKUTXN tokutxn) {
258	uint64_t client_id;
259	void *client_extra;
260	DB_TXN *txn = toku_txn_get_container_db_txn(tokutxn);
261	DB_TXN *txn_anc = txn_oldest_ancester(txn);
262	TXNID txn_anc_id = txn_anc->id64(txn_anc);
263
264	// This lock request must succeed, so we do not want to wait
265	toku::lock_request request;
266	request.create();
267	txn->get_client_id(txn, &client_id, &client_extra);
268	request.set(db->i->lt, txn_anc_id, key, key,
269	toku::lock_request::type::WRITE, toku_is_big_txn(txn_anc),
270	client_extra);
271	int r = request.start();
272	invariant_zero(r);
273	db_txn_note_row_lock(db, txn_anc, key, key);
274	request.destroy();
275	}
276
277	void toku_db_release_lt_key_ranges(DB_TXN txn, txn_lt_key_ranges ranges) {
278	toku::locktree *lt = ranges->lt;
279	TXNID txnid = txn->id64(txn);
280
281	// release all of the locks this txn has ever successfully
282	// acquired and stored in the range buffer for this locktree
283	lt->release_locks(txnid, ranges->buffer);
284	lt->get_manager()->note_mem_released(ranges->buffer->total_memory_size());
285	ranges->buffer->destroy();
286	toku_free(ranges->buffer);
287
288	// all of our locks have been released, so first try to wake up
289	// pending lock requests, then release our reference on the lt
290	toku::lock_request::retry_all_lock_requests(lt, txn->mgrp->i->lock_wait_needed_callback);
291
292	// Release our reference on this locktree
293	toku::locktree_manager *ltm = &txn->mgrp->i->ltm;
294	ltm->release_lt(lt);
295	}
296

Browse the source code of MariaDB/storage/tokudb/PerconaFT/src/ydb_row_lock.cc