| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | /*********** |
| 40 | * The purpose of this file is to implement the high-level logic for |
| 41 | * taking a checkpoint. |
| 42 | * |
| 43 | * There are three locks used for taking a checkpoint. They are listed below. |
| 44 | * |
| 45 | * NOTE: The reader-writer locks may be held by either multiple clients |
| 46 | * or the checkpoint function. (The checkpoint function has the role |
| 47 | * of the writer, the clients have the reader roles.) |
| 48 | * |
| 49 | * - multi_operation_lock |
| 50 | * This is a new reader-writer lock. |
| 51 | * This lock is held by the checkpoint function only for as long as is required to |
| 52 | * to set all the "pending" bits and to create the checkpoint-in-progress versions |
| 53 | * of the header and translation table (btt). |
| 54 | * The following operations must take the multi_operation_lock: |
| 55 | * - any set of operations that must be atomic with respect to begin checkpoint |
| 56 | * |
| 57 | * - checkpoint_safe_lock |
| 58 | * This is a new reader-writer lock. |
| 59 | * This lock is held for the entire duration of the checkpoint. |
| 60 | * It is used to prevent more than one checkpoint from happening at a time |
| 61 | * (the checkpoint function is non-re-entrant), and to prevent certain operations |
| 62 | * that should not happen during a checkpoint. |
| 63 | * The following operations must take the checkpoint_safe lock: |
| 64 | * - delete a dictionary |
| 65 | * - rename a dictionary |
| 66 | * The application can use this lock to disable checkpointing during other sensitive |
| 67 | * operations, such as making a backup copy of the database. |
| 68 | * |
| 69 | * Once the "pending" bits are set and the snapshots are taken of the header and btt, |
| 70 | * most normal database operations are permitted to resume. |
| 71 | * |
| 72 | * |
| 73 | * |
| 74 | *****/ |
| 75 | |
| 76 | #include <my_global.h> |
| 77 | #include <time.h> |
| 78 | |
| 79 | #include "portability/toku_portability.h" |
| 80 | #include "portability/toku_atomic.h" |
| 81 | |
| 82 | #include "ft/cachetable/cachetable.h" |
| 83 | #include "ft/cachetable/checkpoint.h" |
| 84 | #include "ft/ft.h" |
| 85 | #include "ft/logger/log-internal.h" |
| 86 | #include "ft/logger/recover.h" |
| 87 | #include "util/frwlock.h" |
| 88 | #include "util/status.h" |
| 89 | |
| 90 | toku_instr_key *checkpoint_safe_mutex_key; |
| 91 | toku_instr_key *checkpoint_safe_rwlock_key; |
| 92 | toku_instr_key *multi_operation_lock_key; |
| 93 | toku_instr_key *low_priority_multi_operation_lock_key; |
| 94 | |
| 95 | toku_instr_key *rwlock_cond_key; |
| 96 | toku_instr_key *rwlock_wait_read_key; |
| 97 | toku_instr_key *rwlock_wait_write_key; |
| 98 | |
| 99 | void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) { |
| 100 | cp_status.init(); |
| 101 | CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct); |
| 102 | *statp = cp_status; |
| 103 | } |
| 104 | |
| 105 | static LSN last_completed_checkpoint_lsn; |
| 106 | |
| 107 | static toku_mutex_t checkpoint_safe_mutex; |
| 108 | static toku::frwlock checkpoint_safe_lock; |
| 109 | static toku_pthread_rwlock_t multi_operation_lock; |
| 110 | static toku_pthread_rwlock_t low_priority_multi_operation_lock; |
| 111 | |
| 112 | static bool initialized = false; // sanity check |
| 113 | static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint) |
| 114 | static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint) |
| 115 | static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second |
| 116 | static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute |
| 117 | |
| 118 | // Note following static functions are called from checkpoint internal logic only, |
| 119 | // and use the "writer" calls for locking and unlocking. |
| 120 | |
| 121 | static void |
| 122 | multi_operation_lock_init(void) { |
| 123 | pthread_rwlockattr_t attr; |
| 124 | pthread_rwlockattr_init(&attr); |
| 125 | #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP) |
| 126 | pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); |
| 127 | #else |
| 128 | // TODO: need to figure out how to make writer-preferential rwlocks |
| 129 | // happen on osx |
| 130 | #endif |
| 131 | toku_pthread_rwlock_init( |
| 132 | *multi_operation_lock_key, &multi_operation_lock, &attr); |
| 133 | toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key, |
| 134 | &low_priority_multi_operation_lock, |
| 135 | &attr); |
| 136 | pthread_rwlockattr_destroy(&attr); |
| 137 | locked_mo = false; |
| 138 | } |
| 139 | |
| 140 | static void |
| 141 | multi_operation_lock_destroy(void) { |
| 142 | toku_pthread_rwlock_destroy(&multi_operation_lock); |
| 143 | toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock); |
| 144 | } |
| 145 | |
| 146 | static void |
| 147 | multi_operation_checkpoint_lock(void) { |
| 148 | toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock); |
| 149 | toku_pthread_rwlock_wrlock(&multi_operation_lock); |
| 150 | locked_mo = true; |
| 151 | } |
| 152 | |
| 153 | static void |
| 154 | multi_operation_checkpoint_unlock(void) { |
| 155 | locked_mo = false; |
| 156 | toku_pthread_rwlock_wrunlock(&multi_operation_lock); |
| 157 | toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock); |
| 158 | } |
| 159 | |
| 160 | static void checkpoint_safe_lock_init(void) { |
| 161 | toku_mutex_init( |
| 162 | *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr); |
| 163 | checkpoint_safe_lock.init(&checkpoint_safe_mutex |
| 164 | #ifdef TOKU_MYSQL_WITH_PFS |
| 165 | , |
| 166 | *checkpoint_safe_rwlock_key |
| 167 | #endif |
| 168 | ); |
| 169 | locked_cs = false; |
| 170 | } |
| 171 | |
| 172 | static void |
| 173 | checkpoint_safe_lock_destroy(void) { |
| 174 | checkpoint_safe_lock.deinit(); |
| 175 | toku_mutex_destroy(&checkpoint_safe_mutex); |
| 176 | } |
| 177 | |
| 178 | static void |
| 179 | checkpoint_safe_checkpoint_lock(void) { |
| 180 | toku_mutex_lock(&checkpoint_safe_mutex); |
| 181 | checkpoint_safe_lock.write_lock(false); |
| 182 | toku_mutex_unlock(&checkpoint_safe_mutex); |
| 183 | locked_cs = true; |
| 184 | } |
| 185 | |
| 186 | static void |
| 187 | checkpoint_safe_checkpoint_unlock(void) { |
| 188 | locked_cs = false; |
| 189 | toku_mutex_lock(&checkpoint_safe_mutex); |
| 190 | checkpoint_safe_lock.write_unlock(); |
| 191 | toku_mutex_unlock(&checkpoint_safe_mutex); |
| 192 | } |
| 193 | |
| 194 | // toku_xxx_client_(un)lock() functions are only called from client code, |
| 195 | // never from checkpoint code, and use the "reader" interface to the lock functions. |
| 196 | |
| 197 | void |
| 198 | toku_multi_operation_client_lock(void) { |
| 199 | if (locked_mo) |
| 200 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1); |
| 201 | toku_pthread_rwlock_rdlock(&multi_operation_lock); |
| 202 | } |
| 203 | |
| 204 | void |
| 205 | toku_multi_operation_client_unlock(void) { |
| 206 | toku_pthread_rwlock_rdunlock(&multi_operation_lock); |
| 207 | } |
| 208 | |
| 209 | void toku_low_priority_multi_operation_client_lock(void) { |
| 210 | toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock); |
| 211 | } |
| 212 | |
| 213 | void toku_low_priority_multi_operation_client_unlock(void) { |
| 214 | toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock); |
| 215 | } |
| 216 | |
| 217 | void |
| 218 | toku_checkpoint_safe_client_lock(void) { |
| 219 | if (locked_cs) |
| 220 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1); |
| 221 | toku_mutex_lock(&checkpoint_safe_mutex); |
| 222 | checkpoint_safe_lock.read_lock(); |
| 223 | toku_mutex_unlock(&checkpoint_safe_mutex); |
| 224 | toku_multi_operation_client_lock(); |
| 225 | } |
| 226 | |
| 227 | void |
| 228 | toku_checkpoint_safe_client_unlock(void) { |
| 229 | toku_mutex_lock(&checkpoint_safe_mutex); |
| 230 | checkpoint_safe_lock.read_unlock(); |
| 231 | toku_mutex_unlock(&checkpoint_safe_mutex); |
| 232 | toku_multi_operation_client_unlock(); |
| 233 | } |
| 234 | |
| 235 | // Initialize the checkpoint mechanism, must be called before any client operations. |
| 236 | void |
| 237 | toku_checkpoint_init(void) { |
| 238 | multi_operation_lock_init(); |
| 239 | checkpoint_safe_lock_init(); |
| 240 | initialized = true; |
| 241 | } |
| 242 | |
| 243 | void |
| 244 | toku_checkpoint_destroy(void) { |
| 245 | multi_operation_lock_destroy(); |
| 246 | checkpoint_safe_lock_destroy(); |
| 247 | initialized = false; |
| 248 | } |
| 249 | |
| 250 | #define (x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x |
| 251 | |
| 252 | |
| 253 | // Take a checkpoint of all currently open dictionaries |
| 254 | int |
| 255 | toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, |
| 256 | void (*callback_f)(void*), void * , |
| 257 | void (*callback2_f)(void*), void * , |
| 258 | checkpoint_caller_t caller_id) { |
| 259 | int = (int) caller_id * 1000; |
| 260 | |
| 261 | assert(initialized); |
| 262 | |
| 263 | (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1); |
| 264 | checkpoint_safe_checkpoint_lock(); |
| 265 | (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1); |
| 266 | |
| 267 | if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX)) |
| 268 | CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock |
| 269 | |
| 270 | SET_CHECKPOINT_FOOTPRINT(10); |
| 271 | multi_operation_checkpoint_lock(); |
| 272 | SET_CHECKPOINT_FOOTPRINT(20); |
| 273 | toku_ft_open_close_lock(); |
| 274 | |
| 275 | SET_CHECKPOINT_FOOTPRINT(30); |
| 276 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL); |
| 277 | uint64_t t_checkpoint_begin_start = toku_current_time_microsec(); |
| 278 | toku_cachetable_begin_checkpoint(cp, logger); |
| 279 | uint64_t t_checkpoint_begin_end = toku_current_time_microsec(); |
| 280 | |
| 281 | toku_ft_open_close_unlock(); |
| 282 | multi_operation_checkpoint_unlock(); |
| 283 | |
| 284 | SET_CHECKPOINT_FOOTPRINT(40); |
| 285 | if (callback_f) { |
| 286 | callback_f(extra); // callback is called with checkpoint_safe_lock still held |
| 287 | } |
| 288 | |
| 289 | uint64_t t_checkpoint_end_start = toku_current_time_microsec(); |
| 290 | toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2); |
| 291 | uint64_t t_checkpoint_end_end = toku_current_time_microsec(); |
| 292 | |
| 293 | SET_CHECKPOINT_FOOTPRINT(50); |
| 294 | if (logger) { |
| 295 | last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; |
| 296 | toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); |
| 297 | CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn; |
| 298 | } |
| 299 | |
| 300 | SET_CHECKPOINT_FOOTPRINT(60); |
| 301 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL); |
| 302 | CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN); |
| 303 | CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++; |
| 304 | uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start; |
| 305 | CP_STATUS_VAL(CP_BEGIN_TIME) += duration; |
| 306 | if (duration >= toku_checkpoint_begin_long_threshold) { |
| 307 | CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration; |
| 308 | CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1; |
| 309 | } |
| 310 | duration = t_checkpoint_end_end - t_checkpoint_end_start; |
| 311 | CP_STATUS_VAL(CP_END_TIME) += duration; |
| 312 | if (duration >= toku_checkpoint_end_long_threshold) { |
| 313 | CP_STATUS_VAL(CP_LONG_END_TIME) += duration; |
| 314 | CP_STATUS_VAL(CP_LONG_END_COUNT) += 1; |
| 315 | } |
| 316 | CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN)); |
| 317 | CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN)); |
| 318 | CP_STATUS_VAL(CP_FOOTPRINT) = 0; |
| 319 | |
| 320 | checkpoint_safe_checkpoint_unlock(); |
| 321 | return 0; |
| 322 | } |
| 323 | |
| 324 | #include <toku_race_tools.h> |
| 325 | void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void); |
| 326 | void |
| 327 | toku_checkpoint_helgrind_ignore(void) { |
| 328 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status); |
| 329 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo); |
| 330 | TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs); |
| 331 | } |
| 332 | |
| 333 | #undef SET_CHECKPOINT_FOOTPRINT |
| 334 | |