1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39/***********
40 * The purpose of this file is to implement the high-level logic for
41 * taking a checkpoint.
42 *
43 * There are three locks used for taking a checkpoint. They are listed below.
44 *
45 * NOTE: The reader-writer locks may be held by either multiple clients
46 * or the checkpoint function. (The checkpoint function has the role
47 * of the writer, the clients have the reader roles.)
48 *
49 * - multi_operation_lock
50 * This is a new reader-writer lock.
51 * This lock is held by the checkpoint function only for as long as is required to
52 * to set all the "pending" bits and to create the checkpoint-in-progress versions
53 * of the header and translation table (btt).
54 * The following operations must take the multi_operation_lock:
55 * - any set of operations that must be atomic with respect to begin checkpoint
56 *
57 * - checkpoint_safe_lock
58 * This is a new reader-writer lock.
59 * This lock is held for the entire duration of the checkpoint.
60 * It is used to prevent more than one checkpoint from happening at a time
61 * (the checkpoint function is non-re-entrant), and to prevent certain operations
62 * that should not happen during a checkpoint.
63 * The following operations must take the checkpoint_safe lock:
64 * - delete a dictionary
65 * - rename a dictionary
66 * The application can use this lock to disable checkpointing during other sensitive
67 * operations, such as making a backup copy of the database.
68 *
69 * Once the "pending" bits are set and the snapshots are taken of the header and btt,
70 * most normal database operations are permitted to resume.
71 *
72 *
73 *
74 *****/
75
76#include <my_global.h>
77#include <time.h>
78
79#include "portability/toku_portability.h"
80#include "portability/toku_atomic.h"
81
82#include "ft/cachetable/cachetable.h"
83#include "ft/cachetable/checkpoint.h"
84#include "ft/ft.h"
85#include "ft/logger/log-internal.h"
86#include "ft/logger/recover.h"
87#include "util/frwlock.h"
88#include "util/status.h"
89
90toku_instr_key *checkpoint_safe_mutex_key;
91toku_instr_key *checkpoint_safe_rwlock_key;
92toku_instr_key *multi_operation_lock_key;
93toku_instr_key *low_priority_multi_operation_lock_key;
94
95toku_instr_key *rwlock_cond_key;
96toku_instr_key *rwlock_wait_read_key;
97toku_instr_key *rwlock_wait_write_key;
98
99void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
100 cp_status.init();
101 CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
102 *statp = cp_status;
103}
104
105static LSN last_completed_checkpoint_lsn;
106
107static toku_mutex_t checkpoint_safe_mutex;
108static toku::frwlock checkpoint_safe_lock;
109static toku_pthread_rwlock_t multi_operation_lock;
110static toku_pthread_rwlock_t low_priority_multi_operation_lock;
111
112static bool initialized = false; // sanity check
113static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
114static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
115static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
116static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
117
118// Note following static functions are called from checkpoint internal logic only,
119// and use the "writer" calls for locking and unlocking.
120
121static void
122multi_operation_lock_init(void) {
123 pthread_rwlockattr_t attr;
124 pthread_rwlockattr_init(&attr);
125#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
126 pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
127#else
128// TODO: need to figure out how to make writer-preferential rwlocks
129// happen on osx
130#endif
131 toku_pthread_rwlock_init(
132 *multi_operation_lock_key, &multi_operation_lock, &attr);
133 toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key,
134 &low_priority_multi_operation_lock,
135 &attr);
136 pthread_rwlockattr_destroy(&attr);
137 locked_mo = false;
138}
139
140static void
141multi_operation_lock_destroy(void) {
142 toku_pthread_rwlock_destroy(&multi_operation_lock);
143 toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
144}
145
146static void
147multi_operation_checkpoint_lock(void) {
148 toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
149 toku_pthread_rwlock_wrlock(&multi_operation_lock);
150 locked_mo = true;
151}
152
153static void
154multi_operation_checkpoint_unlock(void) {
155 locked_mo = false;
156 toku_pthread_rwlock_wrunlock(&multi_operation_lock);
157 toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
158}
159
160static void checkpoint_safe_lock_init(void) {
161 toku_mutex_init(
162 *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr);
163 checkpoint_safe_lock.init(&checkpoint_safe_mutex
164#ifdef TOKU_MYSQL_WITH_PFS
165 ,
166 *checkpoint_safe_rwlock_key
167#endif
168 );
169 locked_cs = false;
170}
171
172static void
173checkpoint_safe_lock_destroy(void) {
174 checkpoint_safe_lock.deinit();
175 toku_mutex_destroy(&checkpoint_safe_mutex);
176}
177
178static void
179checkpoint_safe_checkpoint_lock(void) {
180 toku_mutex_lock(&checkpoint_safe_mutex);
181 checkpoint_safe_lock.write_lock(false);
182 toku_mutex_unlock(&checkpoint_safe_mutex);
183 locked_cs = true;
184}
185
186static void
187checkpoint_safe_checkpoint_unlock(void) {
188 locked_cs = false;
189 toku_mutex_lock(&checkpoint_safe_mutex);
190 checkpoint_safe_lock.write_unlock();
191 toku_mutex_unlock(&checkpoint_safe_mutex);
192}
193
194// toku_xxx_client_(un)lock() functions are only called from client code,
195// never from checkpoint code, and use the "reader" interface to the lock functions.
196
197void
198toku_multi_operation_client_lock(void) {
199 if (locked_mo)
200 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
201 toku_pthread_rwlock_rdlock(&multi_operation_lock);
202}
203
204void
205toku_multi_operation_client_unlock(void) {
206 toku_pthread_rwlock_rdunlock(&multi_operation_lock);
207}
208
209void toku_low_priority_multi_operation_client_lock(void) {
210 toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
211}
212
213void toku_low_priority_multi_operation_client_unlock(void) {
214 toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
215}
216
217void
218toku_checkpoint_safe_client_lock(void) {
219 if (locked_cs)
220 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
221 toku_mutex_lock(&checkpoint_safe_mutex);
222 checkpoint_safe_lock.read_lock();
223 toku_mutex_unlock(&checkpoint_safe_mutex);
224 toku_multi_operation_client_lock();
225}
226
227void
228toku_checkpoint_safe_client_unlock(void) {
229 toku_mutex_lock(&checkpoint_safe_mutex);
230 checkpoint_safe_lock.read_unlock();
231 toku_mutex_unlock(&checkpoint_safe_mutex);
232 toku_multi_operation_client_unlock();
233}
234
235// Initialize the checkpoint mechanism, must be called before any client operations.
236void
237toku_checkpoint_init(void) {
238 multi_operation_lock_init();
239 checkpoint_safe_lock_init();
240 initialized = true;
241}
242
243void
244toku_checkpoint_destroy(void) {
245 multi_operation_lock_destroy();
246 checkpoint_safe_lock_destroy();
247 initialized = false;
248}
249
250#define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
251
252
253// Take a checkpoint of all currently open dictionaries
254int
255toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
256 void (*callback_f)(void*), void * extra,
257 void (*callback2_f)(void*), void * extra2,
258 checkpoint_caller_t caller_id) {
259 int footprint_offset = (int) caller_id * 1000;
260
261 assert(initialized);
262
263 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
264 checkpoint_safe_checkpoint_lock();
265 (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
266
267 if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
268 CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
269
270 SET_CHECKPOINT_FOOTPRINT(10);
271 multi_operation_checkpoint_lock();
272 SET_CHECKPOINT_FOOTPRINT(20);
273 toku_ft_open_close_lock();
274
275 SET_CHECKPOINT_FOOTPRINT(30);
276 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
277 uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
278 toku_cachetable_begin_checkpoint(cp, logger);
279 uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
280
281 toku_ft_open_close_unlock();
282 multi_operation_checkpoint_unlock();
283
284 SET_CHECKPOINT_FOOTPRINT(40);
285 if (callback_f) {
286 callback_f(extra); // callback is called with checkpoint_safe_lock still held
287 }
288
289 uint64_t t_checkpoint_end_start = toku_current_time_microsec();
290 toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
291 uint64_t t_checkpoint_end_end = toku_current_time_microsec();
292
293 SET_CHECKPOINT_FOOTPRINT(50);
294 if (logger) {
295 last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
296 toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
297 CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
298 }
299
300 SET_CHECKPOINT_FOOTPRINT(60);
301 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
302 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
303 CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
304 uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
305 CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
306 if (duration >= toku_checkpoint_begin_long_threshold) {
307 CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
308 CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
309 }
310 duration = t_checkpoint_end_end - t_checkpoint_end_start;
311 CP_STATUS_VAL(CP_END_TIME) += duration;
312 if (duration >= toku_checkpoint_end_long_threshold) {
313 CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
314 CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
315 }
316 CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
317 CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
318 CP_STATUS_VAL(CP_FOOTPRINT) = 0;
319
320 checkpoint_safe_checkpoint_unlock();
321 return 0;
322}
323
324#include <toku_race_tools.h>
325void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
326void
327toku_checkpoint_helgrind_ignore(void) {
328 TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
329 TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
330 TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
331}
332
333#undef SET_CHECKPOINT_FOOTPRINT
334