1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | /*********** |
40 | * The purpose of this file is to implement the high-level logic for |
41 | * taking a checkpoint. |
42 | * |
43 | * There are three locks used for taking a checkpoint. They are listed below. |
44 | * |
45 | * NOTE: The reader-writer locks may be held by either multiple clients |
46 | * or the checkpoint function. (The checkpoint function has the role |
47 | * of the writer, the clients have the reader roles.) |
48 | * |
49 | * - multi_operation_lock |
50 | * This is a new reader-writer lock. |
51 | * This lock is held by the checkpoint function only for as long as is required to |
52 | * to set all the "pending" bits and to create the checkpoint-in-progress versions |
53 | * of the header and translation table (btt). |
54 | * The following operations must take the multi_operation_lock: |
55 | * - any set of operations that must be atomic with respect to begin checkpoint |
56 | * |
57 | * - checkpoint_safe_lock |
58 | * This is a new reader-writer lock. |
59 | * This lock is held for the entire duration of the checkpoint. |
60 | * It is used to prevent more than one checkpoint from happening at a time |
61 | * (the checkpoint function is non-re-entrant), and to prevent certain operations |
62 | * that should not happen during a checkpoint. |
63 | * The following operations must take the checkpoint_safe lock: |
64 | * - delete a dictionary |
65 | * - rename a dictionary |
66 | * The application can use this lock to disable checkpointing during other sensitive |
67 | * operations, such as making a backup copy of the database. |
68 | * |
69 | * Once the "pending" bits are set and the snapshots are taken of the header and btt, |
70 | * most normal database operations are permitted to resume. |
71 | * |
72 | * |
73 | * |
74 | *****/ |
75 | |
76 | #include <my_global.h> |
77 | #include <time.h> |
78 | |
79 | #include "portability/toku_portability.h" |
80 | #include "portability/toku_atomic.h" |
81 | |
82 | #include "ft/cachetable/cachetable.h" |
83 | #include "ft/cachetable/checkpoint.h" |
84 | #include "ft/ft.h" |
85 | #include "ft/logger/log-internal.h" |
86 | #include "ft/logger/recover.h" |
87 | #include "util/frwlock.h" |
88 | #include "util/status.h" |
89 | |
90 | toku_instr_key *checkpoint_safe_mutex_key; |
91 | toku_instr_key *checkpoint_safe_rwlock_key; |
92 | toku_instr_key *multi_operation_lock_key; |
93 | toku_instr_key *low_priority_multi_operation_lock_key; |
94 | |
95 | toku_instr_key *rwlock_cond_key; |
96 | toku_instr_key *rwlock_wait_read_key; |
97 | toku_instr_key *rwlock_wait_write_key; |
98 | |
99 | void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { |
100 | cp_status.init(); |
101 | CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); |
102 | *statp = cp_status; |
103 | } |
104 | |
105 | static LSN last_completed_checkpoint_lsn; |
106 | |
107 | static toku_mutex_t checkpoint_safe_mutex; |
108 | static toku::frwlock checkpoint_safe_lock; |
109 | static toku_pthread_rwlock_t multi_operation_lock; |
110 | static toku_pthread_rwlock_t low_priority_multi_operation_lock; |
111 | |
112 | static bool initialized = false; // sanity check |
113 | static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint) |
114 | static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint) |
115 | static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second |
116 | static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute |
117 | |
118 | // Note following static functions are called from checkpoint internal logic only, |
119 | // and use the "writer" calls for locking and unlocking. |
120 | |
121 | static void |
122 | multi_operation_lock_init(void) { |
123 | pthread_rwlockattr_t attr; |
124 | pthread_rwlockattr_init(&attr); |
125 | #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) |
126 | pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); |
127 | #else |
128 | // TODO: need to figure out how to make writer-preferential rwlocks |
129 | // happen on osx |
130 | #endif |
131 | toku_pthread_rwlock_init( |
132 | *multi_operation_lock_key, &multi_operation_lock, &attr); |
133 | toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key, |
134 | &low_priority_multi_operation_lock, |
135 | &attr); |
136 | pthread_rwlockattr_destroy(&attr); |
137 | locked_mo = false; |
138 | } |
139 | |
140 | static void |
141 | multi_operation_lock_destroy(void) { |
142 | toku_pthread_rwlock_destroy(&multi_operation_lock); |
143 | toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock); |
144 | } |
145 | |
146 | static void |
147 | multi_operation_checkpoint_lock(void) { |
148 | toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock); |
149 | toku_pthread_rwlock_wrlock(&multi_operation_lock); |
150 | locked_mo = true; |
151 | } |
152 | |
153 | static void |
154 | multi_operation_checkpoint_unlock(void) { |
155 | locked_mo = false; |
156 | toku_pthread_rwlock_wrunlock(&multi_operation_lock); |
157 | toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock); |
158 | } |
159 | |
160 | static void checkpoint_safe_lock_init(void) { |
161 | toku_mutex_init( |
162 | *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr); |
163 | checkpoint_safe_lock.init(&checkpoint_safe_mutex |
164 | #ifdef TOKU_MYSQL_WITH_PFS |
165 | , |
166 | *checkpoint_safe_rwlock_key |
167 | #endif |
168 | ); |
169 | locked_cs = false; |
170 | } |
171 | |
172 | static void |
173 | checkpoint_safe_lock_destroy(void) { |
174 | checkpoint_safe_lock.deinit(); |
175 | toku_mutex_destroy(&checkpoint_safe_mutex); |
176 | } |
177 | |
178 | static void |
179 | checkpoint_safe_checkpoint_lock(void) { |
180 | toku_mutex_lock(&checkpoint_safe_mutex); |
181 | checkpoint_safe_lock.write_lock(false); |
182 | toku_mutex_unlock(&checkpoint_safe_mutex); |
183 | locked_cs = true; |
184 | } |
185 | |
186 | static void |
187 | checkpoint_safe_checkpoint_unlock(void) { |
188 | locked_cs = false; |
189 | toku_mutex_lock(&checkpoint_safe_mutex); |
190 | checkpoint_safe_lock.write_unlock(); |
191 | toku_mutex_unlock(&checkpoint_safe_mutex); |
192 | } |
193 | |
194 | // toku_xxx_client_(un)lock() functions are only called from client code, |
195 | // never from checkpoint code, and use the "reader" interface to the lock functions. |
196 | |
197 | void |
198 | toku_multi_operation_client_lock(void) { |
199 | if (locked_mo) |
200 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1); |
201 | toku_pthread_rwlock_rdlock(&multi_operation_lock); |
202 | } |
203 | |
204 | void |
205 | toku_multi_operation_client_unlock(void) { |
206 | toku_pthread_rwlock_rdunlock(&multi_operation_lock); |
207 | } |
208 | |
209 | void toku_low_priority_multi_operation_client_lock(void) { |
210 | toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock); |
211 | } |
212 | |
213 | void toku_low_priority_multi_operation_client_unlock(void) { |
214 | toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock); |
215 | } |
216 | |
217 | void |
218 | toku_checkpoint_safe_client_lock(void) { |
219 | if (locked_cs) |
220 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1); |
221 | toku_mutex_lock(&checkpoint_safe_mutex); |
222 | checkpoint_safe_lock.read_lock(); |
223 | toku_mutex_unlock(&checkpoint_safe_mutex); |
224 | toku_multi_operation_client_lock(); |
225 | } |
226 | |
227 | void |
228 | toku_checkpoint_safe_client_unlock(void) { |
229 | toku_mutex_lock(&checkpoint_safe_mutex); |
230 | checkpoint_safe_lock.read_unlock(); |
231 | toku_mutex_unlock(&checkpoint_safe_mutex); |
232 | toku_multi_operation_client_unlock(); |
233 | } |
234 | |
235 | // Initialize the checkpoint mechanism, must be called before any client operations. |
236 | void |
237 | toku_checkpoint_init(void) { |
238 | multi_operation_lock_init(); |
239 | checkpoint_safe_lock_init(); |
240 | initialized = true; |
241 | } |
242 | |
243 | void |
244 | toku_checkpoint_destroy(void) { |
245 | multi_operation_lock_destroy(); |
246 | checkpoint_safe_lock_destroy(); |
247 | initialized = false; |
248 | } |
249 | |
250 | #define (x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x |
251 | |
252 | |
253 | // Take a checkpoint of all currently open dictionaries |
254 | int |
255 | toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, |
256 | void (*callback_f)(void*), void * , |
257 | void (*callback2_f)(void*), void * , |
258 | checkpoint_caller_t caller_id) { |
259 | int = (int) caller_id * 1000; |
260 | |
261 | assert(initialized); |
262 | |
263 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1); |
264 | checkpoint_safe_checkpoint_lock(); |
265 | (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1); |
266 | |
267 | if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX)) |
268 | CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock |
269 | |
270 | SET_CHECKPOINT_FOOTPRINT(10); |
271 | multi_operation_checkpoint_lock(); |
272 | SET_CHECKPOINT_FOOTPRINT(20); |
273 | toku_ft_open_close_lock(); |
274 | |
275 | SET_CHECKPOINT_FOOTPRINT(30); |
276 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL); |
277 | uint64_t t_checkpoint_begin_start = toku_current_time_microsec(); |
278 | toku_cachetable_begin_checkpoint(cp, logger); |
279 | uint64_t t_checkpoint_begin_end = toku_current_time_microsec(); |
280 | |
281 | toku_ft_open_close_unlock(); |
282 | multi_operation_checkpoint_unlock(); |
283 | |
284 | SET_CHECKPOINT_FOOTPRINT(40); |
285 | if (callback_f) { |
286 | callback_f(extra); // callback is called with checkpoint_safe_lock still held |
287 | } |
288 | |
289 | uint64_t t_checkpoint_end_start = toku_current_time_microsec(); |
290 | toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2); |
291 | uint64_t t_checkpoint_end_end = toku_current_time_microsec(); |
292 | |
293 | SET_CHECKPOINT_FOOTPRINT(50); |
294 | if (logger) { |
295 | last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; |
296 | toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); |
297 | CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn; |
298 | } |
299 | |
300 | SET_CHECKPOINT_FOOTPRINT(60); |
301 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL); |
302 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN); |
303 | CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++; |
304 | uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start; |
305 | CP_STATUS_VAL(CP_BEGIN_TIME) += duration; |
306 | if (duration >= toku_checkpoint_begin_long_threshold) { |
307 | CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration; |
308 | CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1; |
309 | } |
310 | duration = t_checkpoint_end_end - t_checkpoint_end_start; |
311 | CP_STATUS_VAL(CP_END_TIME) += duration; |
312 | if (duration >= toku_checkpoint_end_long_threshold) { |
313 | CP_STATUS_VAL(CP_LONG_END_TIME) += duration; |
314 | CP_STATUS_VAL(CP_LONG_END_COUNT) += 1; |
315 | } |
316 | CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN)); |
317 | CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN)); |
318 | CP_STATUS_VAL(CP_FOOTPRINT) = 0; |
319 | |
320 | checkpoint_safe_checkpoint_unlock(); |
321 | return 0; |
322 | } |
323 | |
324 | #include <toku_race_tools.h> |
325 | void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void); |
326 | void |
327 | toku_checkpoint_helgrind_ignore(void) { |
328 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status); |
329 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo); |
330 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs); |
331 | } |
332 | |
333 | #undef SET_CHECKPOINT_FOOTPRINT |
334 | |