| 1 | // Copyright (c) 2014, Google Inc. |
| 2 | // Copyright (c) 2017, MariaDB Corporation. |
| 3 | |
| 4 | /**************************************************//** |
| 5 | @file btr/btr0scrub.cc |
| 6 | Scrubbing of btree pages |
| 7 | |
| 8 | *******************************************************/ |
| 9 | |
| 10 | #include "btr0btr.h" |
| 11 | #include "btr0cur.h" |
| 12 | #include "btr0scrub.h" |
| 13 | #include "ibuf0ibuf.h" |
| 14 | #include "fsp0fsp.h" |
| 15 | #include "dict0dict.h" |
| 16 | #include "mtr0mtr.h" |
| 17 | |
| 18 | /* used when trying to acquire dict-lock */ |
| 19 | UNIV_INTERN bool fil_crypt_is_closing(ulint space); |
| 20 | |
| 21 | /** |
| 22 | * scrub data at delete time (e.g purge thread) |
| 23 | */ |
| 24 | my_bool srv_immediate_scrub_data_uncompressed = false; |
| 25 | |
| 26 | /** |
| 27 | * background scrub uncompressed data |
| 28 | * |
| 29 | * if srv_immediate_scrub_data_uncompressed is enabled |
| 30 | * this is only needed to handle "old" data |
| 31 | */ |
| 32 | my_bool srv_background_scrub_data_uncompressed = false; |
| 33 | |
| 34 | /** |
| 35 | * backgrounds scrub compressed data |
| 36 | * |
| 37 | * reorganize compressed page for scrubbing |
| 38 | * (only way to scrub compressed data) |
| 39 | */ |
| 40 | my_bool srv_background_scrub_data_compressed = false; |
| 41 | |
| 42 | /* check spaces once per hour */ |
| 43 | UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60); |
| 44 | |
| 45 | /* default to scrub spaces that hasn't been scrubbed in a week */ |
| 46 | UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60); |
| 47 | |
| 48 | /** |
| 49 | * statistics for scrubbing by background threads |
| 50 | */ |
| 51 | static btr_scrub_stat_t scrub_stat; |
| 52 | static ib_mutex_t scrub_stat_mutex; |
| 53 | #ifdef UNIV_PFS_MUTEX |
| 54 | UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key; |
| 55 | #endif |
| 56 | |
| 57 | #ifdef UNIV_DEBUG |
| 58 | /** |
| 59 | * srv_scrub_force_testing |
| 60 | * |
| 61 | * - force scrubbing using background threads even for uncompressed tables |
| 62 | * - force pessimistic scrubbing (page split) even if not needed |
| 63 | * (see test_pessimistic_scrub_pct) |
| 64 | */ |
| 65 | my_bool srv_scrub_force_testing = true; |
| 66 | |
| 67 | /** |
| 68 | * Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only) |
| 69 | */ |
| 70 | static int test_pessimistic_scrub_pct = 50; |
| 71 | |
| 72 | #endif |
| 73 | static uint scrub_compression_level = page_zip_level; |
| 74 | |
| 75 | /**************************************************************//** |
| 76 | Log a scrubbing failure */ |
| 77 | static |
| 78 | void |
| 79 | log_scrub_failure( |
| 80 | /*===============*/ |
| 81 | dict_index_t* index, /*!< in: index */ |
| 82 | btr_scrub_t* scrub_data, /*!< in: data to store statistics on */ |
| 83 | buf_block_t* block, /*!< in: block */ |
| 84 | dberr_t err) /*!< in: error */ |
| 85 | { |
| 86 | const char* reason = "unknown" ; |
| 87 | switch(err) { |
| 88 | case DB_UNDERFLOW: |
| 89 | reason = "too few records on page" ; |
| 90 | scrub_data->scrub_stat.page_split_failures_underflow++; |
| 91 | break; |
| 92 | case DB_INDEX_CORRUPT: |
| 93 | reason = "unable to find index!" ; |
| 94 | scrub_data->scrub_stat.page_split_failures_missing_index++; |
| 95 | break; |
| 96 | case DB_OUT_OF_FILE_SPACE: |
| 97 | reason = "out of filespace" ; |
| 98 | scrub_data->scrub_stat.page_split_failures_out_of_filespace++; |
| 99 | break; |
| 100 | default: |
| 101 | ut_ad(0); |
| 102 | reason = "unknown" ; |
| 103 | scrub_data->scrub_stat.page_split_failures_unknown++; |
| 104 | } |
| 105 | |
| 106 | ib::warn() << "Failed to scrub index " << index->name |
| 107 | << " of table " << index->table->name |
| 108 | << " page " << block->page.id << ": " << reason; |
| 109 | } |
| 110 | |
| 111 | /**************************************************************** |
| 112 | Lock dict mutexes */ |
| 113 | static |
| 114 | bool |
| 115 | btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table, |
| 116 | const char * file, uint line) |
| 117 | { |
| 118 | time_t start = time(0); |
| 119 | time_t last = start; |
| 120 | |
| 121 | /* FIXME: this is not the proper way of doing things. The |
| 122 | dict_sys->mutex should not be held by any thread for longer |
| 123 | than a few microseconds. It must not be held during I/O, |
| 124 | for example. So, what is the purpose for this busy-waiting? |
| 125 | This function should be rewritten as part of MDEV-8139: |
| 126 | Fix scrubbing tests. */ |
| 127 | |
| 128 | while (mutex_enter_nowait(&(dict_sys->mutex))) { |
| 129 | /* if we lock to close a table, we wait forever |
| 130 | * if we don't lock to close a table, we check if space |
| 131 | * is closing, and then instead give up |
| 132 | */ |
| 133 | if (lock_to_close_table) { |
| 134 | } else if (fil_space_t* space = fil_space_acquire(space_id)) { |
| 135 | bool stopping = space->is_stopping(); |
| 136 | space->release(); |
| 137 | if (stopping) { |
| 138 | return false; |
| 139 | } |
| 140 | } else { |
| 141 | return false; |
| 142 | } |
| 143 | |
| 144 | os_thread_sleep(250000); |
| 145 | |
| 146 | time_t now = time(0); |
| 147 | |
| 148 | if (now >= last + 30) { |
| 149 | fprintf(stderr, |
| 150 | "WARNING: %s:%u waited %ld seconds for" |
| 151 | " dict_sys lock, space: " ULINTPF |
| 152 | " lock_to_close_table: %d\n" , |
| 153 | file, line, long(now - start), space_id, |
| 154 | lock_to_close_table); |
| 155 | |
| 156 | last = now; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | ut_ad(mutex_own(&dict_sys->mutex)); |
| 161 | return true; |
| 162 | } |
| 163 | |
| 164 | #define btr_scrub_lock_dict(space, lock_to_close_table) \ |
| 165 | btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__) |
| 166 | |
| 167 | /**************************************************************** |
| 168 | Unlock dict mutexes */ |
| 169 | static |
| 170 | void |
| 171 | btr_scrub_unlock_dict() |
| 172 | { |
| 173 | dict_mutex_exit_for_mysql(); |
| 174 | } |
| 175 | |
| 176 | /**************************************************************** |
| 177 | Release reference to table |
| 178 | */ |
| 179 | static |
| 180 | void |
| 181 | btr_scrub_table_close( |
| 182 | /*==================*/ |
| 183 | dict_table_t* table) /*!< in: table */ |
| 184 | { |
| 185 | bool dict_locked = true; |
| 186 | bool try_drop = false; |
| 187 | table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS; |
| 188 | dict_table_close(table, dict_locked, try_drop); |
| 189 | } |
| 190 | |
| 191 | /**************************************************************** |
| 192 | Release reference to table |
| 193 | */ |
| 194 | static |
| 195 | void |
| 196 | btr_scrub_table_close_for_thread( |
| 197 | btr_scrub_t *scrub_data) |
| 198 | { |
| 199 | if (scrub_data->current_table == NULL) { |
| 200 | return; |
| 201 | } |
| 202 | |
| 203 | if (fil_space_t* space = fil_space_acquire(scrub_data->space)) { |
| 204 | /* If tablespace is not marked as stopping perform |
| 205 | the actual close. */ |
| 206 | if (!space->is_stopping()) { |
| 207 | mutex_enter(&dict_sys->mutex); |
| 208 | /* perform the actual closing */ |
| 209 | btr_scrub_table_close(scrub_data->current_table); |
| 210 | mutex_exit(&dict_sys->mutex); |
| 211 | } |
| 212 | space->release(); |
| 213 | } |
| 214 | |
| 215 | scrub_data->current_table = NULL; |
| 216 | scrub_data->current_index = NULL; |
| 217 | } |
| 218 | |
| 219 | /**************************************************************//** |
| 220 | Check if scrubbing is turned ON or OFF */ |
| 221 | static |
| 222 | bool |
| 223 | check_scrub_setting( |
| 224 | /*=====================*/ |
| 225 | btr_scrub_t* scrub_data) /*!< in: scrub data */ |
| 226 | { |
| 227 | if (scrub_data->compressed) |
| 228 | return srv_background_scrub_data_compressed; |
| 229 | else |
| 230 | return srv_background_scrub_data_uncompressed; |
| 231 | } |
| 232 | |
| 233 | #define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID) |
| 234 | |
| 235 | /**************************************************************//** |
| 236 | Check if a page needs scrubbing */ |
| 237 | UNIV_INTERN |
| 238 | int |
| 239 | btr_page_needs_scrubbing( |
| 240 | /*=====================*/ |
| 241 | btr_scrub_t* scrub_data, /*!< in: scrub data */ |
| 242 | buf_block_t* block, /*!< in: block to check, latched */ |
| 243 | btr_scrub_page_allocation_status_t allocated) /*!< in: is block known |
| 244 | to be allocated */ |
| 245 | { |
| 246 | /** |
| 247 | * Check if scrubbing has been turned OFF. |
| 248 | * |
| 249 | * at start of space, we check if scrubbing is ON or OFF |
| 250 | * here we only check if scrubbing is turned OFF. |
| 251 | * |
| 252 | * Motivation is that it's only valueable to have a full table (space) |
| 253 | * scrubbed. |
| 254 | */ |
| 255 | if (!check_scrub_setting(scrub_data)) { |
| 256 | bool before_value = scrub_data->scrubbing; |
| 257 | scrub_data->scrubbing = false; |
| 258 | |
| 259 | if (before_value == true) { |
| 260 | /* we toggle scrubbing from on to off */ |
| 261 | return BTR_SCRUB_TURNED_OFF; |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | if (scrub_data->scrubbing == false) { |
| 266 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 267 | } |
| 268 | |
| 269 | const page_t* page = buf_block_get_frame(block); |
| 270 | |
| 271 | if (allocated == BTR_SCRUB_PAGE_ALLOCATED) { |
| 272 | if (fil_page_get_type(page) != FIL_PAGE_INDEX) { |
| 273 | /* this function is called from fil-crypt-threads. |
| 274 | * these threads iterate all pages of all tablespaces |
| 275 | * and don't know about fil_page_type. |
| 276 | * But scrubbing is only needed for index-pages. */ |
| 277 | |
| 278 | /** |
| 279 | * NOTE: scrubbing is also needed for UNDO pages, |
| 280 | * but they are scrubbed at purge-time, since they are |
| 281 | * uncompressed |
| 282 | */ |
| 283 | |
| 284 | /* if encountering page type not needing scrubbing |
| 285 | release reference to table object */ |
| 286 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 287 | } |
| 288 | |
| 289 | if (!page_has_garbage(page)) { |
| 290 | /* no garbage (from deleted/shrunken records) */ |
| 291 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 292 | } |
| 293 | |
| 294 | } else if (allocated == BTR_SCRUB_PAGE_FREE || |
| 295 | allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) { |
| 296 | |
| 297 | switch (fil_page_get_type(page)) { |
| 298 | case FIL_PAGE_INDEX: |
| 299 | case FIL_PAGE_TYPE_ZBLOB: |
| 300 | case FIL_PAGE_TYPE_ZBLOB2: |
| 301 | break; |
| 302 | default: |
| 303 | /** |
| 304 | * If this is a dropped page, we also need to scrub |
| 305 | * BLOB pages |
| 306 | */ |
| 307 | |
| 308 | /* if encountering page type not needing scrubbing |
| 309 | release reference to table object */ |
| 310 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | if (block->page.id.space() == TRX_SYS_SPACE |
| 315 | && btr_page_get_index_id(page) == IBUF_INDEX_ID) { |
| 316 | /* skip ibuf */ |
| 317 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 318 | } |
| 319 | |
| 320 | return BTR_SCRUB_PAGE; |
| 321 | } |
| 322 | |
| 323 | /**************************************************************** |
| 324 | Handle a skipped page |
| 325 | */ |
| 326 | UNIV_INTERN |
| 327 | void |
| 328 | btr_scrub_skip_page( |
| 329 | /*==================*/ |
| 330 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
| 331 | int needs_scrubbing) /*!< in: return code from |
| 332 | btr_page_needs_scrubbing */ |
| 333 | { |
| 334 | switch(needs_scrubbing) { |
| 335 | case BTR_SCRUB_SKIP_PAGE: |
| 336 | /* nothing todo */ |
| 337 | return; |
| 338 | case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE: |
| 339 | btr_scrub_table_close_for_thread(scrub_data); |
| 340 | return; |
| 341 | case BTR_SCRUB_TURNED_OFF: |
| 342 | case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE: |
| 343 | btr_scrub_complete_space(scrub_data); |
| 344 | return; |
| 345 | } |
| 346 | |
| 347 | /* unknown value. should not happen */ |
| 348 | ut_a(0); |
| 349 | } |
| 350 | |
| 351 | /**************************************************************** |
| 352 | Try to scrub a page using btr_page_reorganize_low |
| 353 | return DB_SUCCESS on success or DB_OVERFLOW on failure */ |
| 354 | static |
| 355 | dberr_t |
| 356 | btr_optimistic_scrub( |
| 357 | /*==================*/ |
| 358 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
| 359 | buf_block_t* block, /*!< in: block to scrub */ |
| 360 | dict_index_t* index, /*!< in: index */ |
| 361 | mtr_t* mtr) /*!< in: mtr */ |
| 362 | { |
| 363 | #ifdef UNIV_DEBUG |
| 364 | if (srv_scrub_force_testing && |
| 365 | page_get_n_recs(buf_block_get_frame(block)) > 2 && |
| 366 | (rand() % 100) < test_pessimistic_scrub_pct) { |
| 367 | |
| 368 | log_scrub_failure(index, scrub_data, block, DB_OVERFLOW); |
| 369 | return DB_OVERFLOW; |
| 370 | } |
| 371 | #endif |
| 372 | |
| 373 | page_cur_t cur; |
| 374 | page_cur_set_before_first(block, &cur); |
| 375 | bool recovery = false; |
| 376 | if (!btr_page_reorganize_low(recovery, scrub_compression_level, |
| 377 | &cur, index, mtr)) { |
| 378 | return DB_OVERFLOW; |
| 379 | } |
| 380 | |
| 381 | /* We play safe and reset the free bits */ |
| 382 | if (!dict_index_is_clust(index) && |
| 383 | block != NULL) { |
| 384 | buf_frame_t* frame = buf_block_get_frame(block); |
| 385 | if (frame && |
| 386 | page_is_leaf(frame)) { |
| 387 | |
| 388 | ibuf_reset_free_bits(block); |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | scrub_data->scrub_stat.page_reorganizations++; |
| 393 | |
| 394 | return DB_SUCCESS; |
| 395 | } |
| 396 | |
| 397 | /**************************************************************** |
| 398 | Try to scrub a page by splitting it |
| 399 | return DB_SUCCESS on success |
| 400 | DB_UNDERFLOW if page has too few records |
| 401 | DB_OUT_OF_FILE_SPACE if we can't find space for split */ |
| 402 | static |
| 403 | dberr_t |
| 404 | btr_pessimistic_scrub( |
| 405 | /*==================*/ |
| 406 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
| 407 | buf_block_t* block, /*!< in: block to scrub */ |
| 408 | dict_index_t* index, /*!< in: index */ |
| 409 | mtr_t* mtr) /*!< in: mtr */ |
| 410 | { |
| 411 | page_t* page = buf_block_get_frame(block); |
| 412 | |
| 413 | if (page_get_n_recs(page) < 2) { |
| 414 | /** |
| 415 | * There is no way we can split a page with < 2 records |
| 416 | */ |
| 417 | log_scrub_failure(index, scrub_data, block, DB_UNDERFLOW); |
| 418 | return DB_UNDERFLOW; |
| 419 | } |
| 420 | |
| 421 | /** |
| 422 | * Splitting page needs new space, allocate it here |
| 423 | * so that splitting won't fail due to this */ |
| 424 | ulint n_extents = 3; |
| 425 | ulint n_reserved = 0; |
| 426 | if (!fsp_reserve_free_extents(&n_reserved, index->table->space, |
| 427 | n_extents, FSP_NORMAL, mtr)) { |
| 428 | log_scrub_failure(index, scrub_data, block, |
| 429 | DB_OUT_OF_FILE_SPACE); |
| 430 | return DB_OUT_OF_FILE_SPACE; |
| 431 | } |
| 432 | |
| 433 | /* read block variables */ |
| 434 | const ulint page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); |
| 435 | const ulint left_page_no = mach_read_from_4(page + FIL_PAGE_PREV); |
| 436 | const ulint right_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); |
| 437 | const page_size_t page_size(index->table->space->flags); |
| 438 | |
| 439 | /** |
| 440 | * When splitting page, we need X-latches on left/right brothers |
| 441 | * see e.g btr_cur_latch_leaves |
| 442 | */ |
| 443 | |
| 444 | if (left_page_no != FIL_NULL) { |
| 445 | /** |
| 446 | * pages needs to be locked left-to-right, release block |
| 447 | * and re-lock. We still have x-lock on index |
| 448 | * so this should be safe |
| 449 | */ |
| 450 | mtr->release_block_at_savepoint(scrub_data->savepoint, block); |
| 451 | |
| 452 | buf_block_t* get_block __attribute__((unused)) = btr_block_get( |
| 453 | page_id_t(index->table->space->id, left_page_no), |
| 454 | page_size, RW_X_LATCH, index, mtr); |
| 455 | |
| 456 | /** |
| 457 | * Refetch block and re-initialize page |
| 458 | */ |
| 459 | block = btr_block_get( |
| 460 | page_id_t(index->table->space->id, page_no), |
| 461 | page_size, RW_X_LATCH, index, mtr); |
| 462 | |
| 463 | page = buf_block_get_frame(block); |
| 464 | |
| 465 | /** |
| 466 | * structure should be unchanged |
| 467 | */ |
| 468 | ut_a(left_page_no == btr_page_get_prev(page, mtr)); |
| 469 | ut_a(right_page_no == btr_page_get_next(page, mtr)); |
| 470 | } |
| 471 | |
| 472 | if (right_page_no != FIL_NULL) { |
| 473 | buf_block_t* get_block __attribute__((unused))= btr_block_get( |
| 474 | page_id_t(index->table->space->id, right_page_no), |
| 475 | page_size, RW_X_LATCH, index, mtr); |
| 476 | } |
| 477 | |
| 478 | /* arguments to btr_page_split_and_insert */ |
| 479 | mem_heap_t* heap = NULL; |
| 480 | dtuple_t* entry = NULL; |
| 481 | ulint* offsets = NULL; |
| 482 | ulint n_ext = 0; |
| 483 | ulint flags = BTR_MODIFY_TREE; |
| 484 | |
| 485 | /** |
| 486 | * position a cursor on first record on page |
| 487 | */ |
| 488 | rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); |
| 489 | btr_cur_t cursor; |
| 490 | btr_cur_position(index, rec, block, &cursor); |
| 491 | |
| 492 | /** |
| 493 | * call split page with NULL as argument for entry to insert |
| 494 | */ |
| 495 | if (dict_index_get_page(index) == page_no) { |
| 496 | /* The page is the root page |
| 497 | * NOTE: ibuf_reset_free_bits is called inside |
| 498 | * btr_root_raise_and_insert */ |
| 499 | rec = btr_root_raise_and_insert( |
| 500 | flags, &cursor, &offsets, &heap, entry, n_ext, mtr); |
| 501 | } else { |
| 502 | /* We play safe and reset the free bits |
| 503 | * NOTE: need to call this prior to btr_page_split_and_insert */ |
| 504 | if (!dict_index_is_clust(index) && |
| 505 | block != NULL) { |
| 506 | buf_frame_t* frame = buf_block_get_frame(block); |
| 507 | if (frame && |
| 508 | page_is_leaf(frame)) { |
| 509 | |
| 510 | ibuf_reset_free_bits(block); |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | rec = btr_page_split_and_insert( |
| 515 | flags, &cursor, &offsets, &heap, entry, n_ext, mtr); |
| 516 | } |
| 517 | |
| 518 | if (heap) { |
| 519 | mem_heap_free(heap); |
| 520 | } |
| 521 | |
| 522 | index->table->space->release_free_extents(n_reserved); |
| 523 | scrub_data->scrub_stat.page_splits++; |
| 524 | return DB_SUCCESS; |
| 525 | } |
| 526 | |
| 527 | /**************************************************************** |
| 528 | Location index by id for a table |
| 529 | return index or NULL */ |
| 530 | static |
| 531 | dict_index_t* |
| 532 | find_index( |
| 533 | /*========*/ |
| 534 | dict_table_t* table, /*!< in: table */ |
| 535 | index_id_t index_id) /*!< in: index id */ |
| 536 | { |
| 537 | if (table != NULL) { |
| 538 | dict_index_t* index = dict_table_get_first_index(table); |
| 539 | while (index != NULL) { |
| 540 | if (index->id == index_id) |
| 541 | return index; |
| 542 | index = dict_table_get_next_index(index); |
| 543 | } |
| 544 | } |
| 545 | |
| 546 | return NULL; |
| 547 | } |
| 548 | |
| 549 | /**************************************************************** |
| 550 | Check if table should be scrubbed |
| 551 | */ |
| 552 | static |
| 553 | bool |
| 554 | btr_scrub_table_needs_scrubbing( |
| 555 | /*============================*/ |
| 556 | dict_table_t* table) /*!< in: table */ |
| 557 | { |
| 558 | if (table == NULL) |
| 559 | return false; |
| 560 | |
| 561 | if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) { |
| 562 | return false; |
| 563 | } |
| 564 | |
| 565 | if (table->to_be_dropped) { |
| 566 | return false; |
| 567 | } |
| 568 | |
| 569 | if (!table->is_readable()) { |
| 570 | return false; |
| 571 | } |
| 572 | |
| 573 | return true; |
| 574 | } |
| 575 | |
| 576 | /**************************************************************** |
| 577 | Check if index should be scrubbed |
| 578 | */ |
| 579 | static |
| 580 | bool |
| 581 | btr_scrub_index_needs_scrubbing( |
| 582 | /*============================*/ |
| 583 | dict_index_t* index) /*!< in: index */ |
| 584 | { |
| 585 | if (index == NULL) |
| 586 | return false; |
| 587 | |
| 588 | if (dict_index_is_ibuf(index)) { |
| 589 | return false; |
| 590 | } |
| 591 | |
| 592 | if (dict_index_is_online_ddl(index)) { |
| 593 | return false; |
| 594 | } |
| 595 | |
| 596 | return true; |
| 597 | } |
| 598 | |
| 599 | /**************************************************************** |
| 600 | Get table and index and store it on scrub_data |
| 601 | */ |
| 602 | static |
| 603 | void |
| 604 | btr_scrub_get_table_and_index( |
| 605 | /*=========================*/ |
| 606 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
| 607 | index_id_t index_id) /*!< in: index id */ |
| 608 | { |
| 609 | /* first check if it's an index to current table */ |
| 610 | scrub_data->current_index = find_index(scrub_data->current_table, |
| 611 | index_id); |
| 612 | |
| 613 | if (scrub_data->current_index != NULL) { |
| 614 | /* yes it was */ |
| 615 | return; |
| 616 | } |
| 617 | |
| 618 | if (!btr_scrub_lock_dict(scrub_data->space, false)) { |
| 619 | btr_scrub_complete_space(scrub_data); |
| 620 | return; |
| 621 | } |
| 622 | |
| 623 | /* close current table (if any) */ |
| 624 | if (scrub_data->current_table != NULL) { |
| 625 | btr_scrub_table_close(scrub_data->current_table); |
| 626 | scrub_data->current_table = NULL; |
| 627 | } |
| 628 | |
| 629 | /* argument to dict_table_open_on_index_id */ |
| 630 | bool dict_locked = true; |
| 631 | |
| 632 | /* open table based on index_id */ |
| 633 | dict_table_t* table = dict_table_open_on_index_id( |
| 634 | index_id, |
| 635 | dict_locked); |
| 636 | |
| 637 | if (table != NULL) { |
| 638 | /* mark table as being scrubbed */ |
| 639 | table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS; |
| 640 | |
| 641 | if (!btr_scrub_table_needs_scrubbing(table)) { |
| 642 | btr_scrub_table_close(table); |
| 643 | btr_scrub_unlock_dict(); |
| 644 | return; |
| 645 | } |
| 646 | } |
| 647 | |
| 648 | btr_scrub_unlock_dict(); |
| 649 | scrub_data->current_table = table; |
| 650 | scrub_data->current_index = find_index(table, index_id); |
| 651 | } |
| 652 | |
| 653 | /**************************************************************** |
| 654 | Handle free page */ |
| 655 | UNIV_INTERN |
| 656 | int |
| 657 | btr_scrub_free_page( |
| 658 | /*====================*/ |
| 659 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
| 660 | buf_block_t* block, /*!< in: block to scrub */ |
| 661 | mtr_t* mtr) /*!< in: mtr */ |
| 662 | { |
| 663 | // TODO(jonaso): scrub only what is actually needed |
| 664 | |
| 665 | { |
| 666 | /* note: perform both the memset and setting of FIL_PAGE_TYPE |
| 667 | * wo/ logging. so that if we crash before page is flushed |
| 668 | * it will be found by scrubbing thread again |
| 669 | */ |
| 670 | memset(buf_block_get_frame(block) + PAGE_HEADER, 0, |
| 671 | srv_page_size - PAGE_HEADER); |
| 672 | |
| 673 | mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE, |
| 674 | FIL_PAGE_TYPE_ALLOCATED); |
| 675 | } |
| 676 | |
| 677 | page_create(block, mtr, |
| 678 | dict_table_is_comp(scrub_data->current_table), |
| 679 | dict_index_is_spatial(scrub_data->current_index)); |
| 680 | |
| 681 | mtr_commit(mtr); |
| 682 | |
| 683 | /* page doesn't need further processing => SKIP |
| 684 | * and close table/index so that we don't keep references too long */ |
| 685 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 686 | } |
| 687 | |
| 688 | /**************************************************************** |
| 689 | Recheck if a page needs scrubbing, and if it does load appropriate |
| 690 | table and index */ |
| 691 | UNIV_INTERN |
| 692 | int |
| 693 | btr_scrub_recheck_page( |
| 694 | /*====================*/ |
| 695 | btr_scrub_t* scrub_data, /*!< inut: scrub data */ |
| 696 | buf_block_t* block, /*!< in: block */ |
| 697 | btr_scrub_page_allocation_status_t allocated, /*!< in: is block |
| 698 | allocated or free */ |
| 699 | mtr_t* mtr) /*!< in: mtr */ |
| 700 | { |
| 701 | /* recheck if page needs scrubbing (knowing allocation status) */ |
| 702 | int needs_scrubbing = btr_page_needs_scrubbing( |
| 703 | scrub_data, block, allocated); |
| 704 | |
| 705 | if (needs_scrubbing != BTR_SCRUB_PAGE) { |
| 706 | mtr_commit(mtr); |
| 707 | return needs_scrubbing; |
| 708 | } |
| 709 | |
| 710 | if (allocated == BTR_SCRUB_PAGE_FREE) { |
| 711 | /** we don't need to load table/index for free pages |
| 712 | * so scrub directly here */ |
| 713 | /* mtr is committed inside btr_scrub_page_free */ |
| 714 | return btr_scrub_free_page(scrub_data, |
| 715 | block, |
| 716 | mtr); |
| 717 | } |
| 718 | |
| 719 | page_t* page = buf_block_get_frame(block); |
| 720 | index_id_t index_id = btr_page_get_index_id(page); |
| 721 | |
| 722 | if (scrub_data->current_index == NULL || |
| 723 | scrub_data->current_index->id != index_id) { |
| 724 | |
| 725 | /** |
| 726 | * commit mtr (i.e release locks on block) |
| 727 | * and try to get table&index potentially loading it |
| 728 | * from disk |
| 729 | */ |
| 730 | mtr_commit(mtr); |
| 731 | btr_scrub_get_table_and_index(scrub_data, index_id); |
| 732 | } else { |
| 733 | /* we already have correct index |
| 734 | * commit mtr so that we can lock index before fetching page |
| 735 | */ |
| 736 | mtr_commit(mtr); |
| 737 | } |
| 738 | |
| 739 | /* check if table is about to be dropped */ |
| 740 | if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) { |
| 741 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 742 | } |
| 743 | |
| 744 | /* check if index is scrubbable */ |
| 745 | if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) { |
| 746 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 747 | } |
| 748 | |
| 749 | mtr_start(mtr); |
| 750 | mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr); |
| 751 | /** set savepoint for X-latch of block */ |
| 752 | scrub_data->savepoint = mtr_set_savepoint(mtr); |
| 753 | return BTR_SCRUB_PAGE; |
| 754 | } |
| 755 | |
| 756 | /**************************************************************** |
| 757 | Perform actual scrubbing of page */ |
| 758 | UNIV_INTERN |
| 759 | int |
| 760 | btr_scrub_page( |
| 761 | /*============*/ |
| 762 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
| 763 | buf_block_t* block, /*!< in: block */ |
| 764 | btr_scrub_page_allocation_status_t allocated, /*!< in: is block |
| 765 | allocated or free */ |
| 766 | mtr_t* mtr) /*!< in: mtr */ |
| 767 | { |
| 768 | /* recheck if page needs scrubbing (knowing allocation status) */ |
| 769 | int needs_scrubbing = BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 770 | |
| 771 | if (block) { |
| 772 | btr_page_needs_scrubbing(scrub_data, block, allocated); |
| 773 | } |
| 774 | |
| 775 | if (!block || needs_scrubbing != BTR_SCRUB_PAGE) { |
| 776 | mtr_commit(mtr); |
| 777 | return needs_scrubbing; |
| 778 | } |
| 779 | |
| 780 | if (allocated == BTR_SCRUB_PAGE_FREE) { |
| 781 | /* mtr is committed inside btr_scrub_page_free */ |
| 782 | return btr_scrub_free_page(scrub_data, |
| 783 | block, |
| 784 | mtr); |
| 785 | } |
| 786 | |
| 787 | /* check that table/index still match now that they are loaded */ |
| 788 | |
| 789 | if (!scrub_data->current_table->space |
| 790 | || scrub_data->current_table->space->id != scrub_data->space) { |
| 791 | /* this is truncate table */ |
| 792 | mtr_commit(mtr); |
| 793 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 794 | } |
| 795 | |
| 796 | if (scrub_data->current_index->table != scrub_data->current_table) { |
| 797 | /* this is truncate table */ |
| 798 | mtr_commit(mtr); |
| 799 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 800 | } |
| 801 | |
| 802 | if (scrub_data->current_index->page == FIL_NULL) { |
| 803 | /* this is truncate table */ |
| 804 | mtr_commit(mtr); |
| 805 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 806 | } |
| 807 | |
| 808 | buf_frame_t* frame = buf_block_get_frame(block); |
| 809 | |
| 810 | if (!frame || btr_page_get_index_id(frame) != |
| 811 | scrub_data->current_index->id) { |
| 812 | /* page has been reallocated to new index */ |
| 813 | mtr_commit(mtr); |
| 814 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
| 815 | } |
| 816 | |
| 817 | /* check if I can scrub (reorganize) page wo/ overflow */ |
| 818 | if (btr_optimistic_scrub(scrub_data, |
| 819 | block, |
| 820 | scrub_data->current_index, |
| 821 | mtr) != DB_SUCCESS) { |
| 822 | |
| 823 | /** |
| 824 | * Can't reorganize page...need to split it |
| 825 | */ |
| 826 | btr_pessimistic_scrub(scrub_data, |
| 827 | block, |
| 828 | scrub_data->current_index, |
| 829 | mtr); |
| 830 | } |
| 831 | mtr_commit(mtr); |
| 832 | |
| 833 | return BTR_SCRUB_SKIP_PAGE; // no further action needed |
| 834 | } |
| 835 | |
| 836 | /**************************************************************//** |
| 837 | Start iterating a space */ |
| 838 | UNIV_INTERN |
| 839 | bool |
| 840 | btr_scrub_start_space( |
| 841 | /*===================*/ |
| 842 | ulint space, /*!< in: space */ |
| 843 | btr_scrub_t* scrub_data) /*!< in/out: scrub data */ |
| 844 | { |
| 845 | bool found; |
| 846 | scrub_data->space = space; |
| 847 | scrub_data->current_table = NULL; |
| 848 | scrub_data->current_index = NULL; |
| 849 | const page_size_t page_size = fil_space_get_page_size(space, &found); |
| 850 | |
| 851 | scrub_data->compressed = page_size.is_compressed(); |
| 852 | scrub_data->scrubbing = check_scrub_setting(scrub_data); |
| 853 | return scrub_data->scrubbing; |
| 854 | } |
| 855 | |
| 856 | /*********************************************************************** |
| 857 | Update global statistics with thread statistics */ |
| 858 | static |
| 859 | void |
| 860 | btr_scrub_update_total_stat(btr_scrub_t *scrub_data) |
| 861 | { |
| 862 | mutex_enter(&scrub_stat_mutex); |
| 863 | scrub_stat.page_reorganizations += |
| 864 | scrub_data->scrub_stat.page_reorganizations; |
| 865 | scrub_stat.page_splits += |
| 866 | scrub_data->scrub_stat.page_splits; |
| 867 | scrub_stat.page_split_failures_underflow += |
| 868 | scrub_data->scrub_stat.page_split_failures_underflow; |
| 869 | scrub_stat.page_split_failures_out_of_filespace += |
| 870 | scrub_data->scrub_stat.page_split_failures_out_of_filespace; |
| 871 | scrub_stat.page_split_failures_missing_index += |
| 872 | scrub_data->scrub_stat.page_split_failures_missing_index; |
| 873 | scrub_stat.page_split_failures_unknown += |
| 874 | scrub_data->scrub_stat.page_split_failures_unknown; |
| 875 | mutex_exit(&scrub_stat_mutex); |
| 876 | |
| 877 | // clear stat |
| 878 | memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat)); |
| 879 | } |
| 880 | |
| 881 | /** Complete iterating a space. |
| 882 | @param[in,out] scrub_data scrub data */ |
| 883 | UNIV_INTERN |
| 884 | void |
| 885 | btr_scrub_complete_space(btr_scrub_t* scrub_data) |
| 886 | { |
| 887 | ut_ad(scrub_data->scrubbing); |
| 888 | btr_scrub_table_close_for_thread(scrub_data); |
| 889 | btr_scrub_update_total_stat(scrub_data); |
| 890 | } |
| 891 | |
| 892 | /********************************************************************* |
| 893 | Return scrub statistics */ |
| 894 | void |
| 895 | btr_scrub_total_stat(btr_scrub_stat_t *stat) |
| 896 | { |
| 897 | mutex_enter(&scrub_stat_mutex); |
| 898 | *stat = scrub_stat; |
| 899 | mutex_exit(&scrub_stat_mutex); |
| 900 | } |
| 901 | |
| 902 | /********************************************************************* |
| 903 | Init global variables */ |
| 904 | UNIV_INTERN |
| 905 | void |
| 906 | btr_scrub_init() |
| 907 | { |
| 908 | mutex_create(LATCH_ID_SCRUB_STAT_MUTEX, &scrub_stat_mutex); |
| 909 | |
| 910 | memset(&scrub_stat, 0, sizeof(scrub_stat)); |
| 911 | } |
| 912 | |
| 913 | /********************************************************************* |
| 914 | Cleanup globals */ |
| 915 | UNIV_INTERN |
| 916 | void |
| 917 | btr_scrub_cleanup() |
| 918 | { |
| 919 | mutex_free(&scrub_stat_mutex); |
| 920 | } |
| 921 | |
| 922 | |