1 | // Copyright (c) 2014, Google Inc. |
2 | // Copyright (c) 2017, MariaDB Corporation. |
3 | |
4 | /**************************************************//** |
5 | @file btr/btr0scrub.cc |
6 | Scrubbing of btree pages |
7 | |
8 | *******************************************************/ |
9 | |
10 | #include "btr0btr.h" |
11 | #include "btr0cur.h" |
12 | #include "btr0scrub.h" |
13 | #include "ibuf0ibuf.h" |
14 | #include "fsp0fsp.h" |
15 | #include "dict0dict.h" |
16 | #include "mtr0mtr.h" |
17 | |
18 | /* used when trying to acquire dict-lock */ |
19 | UNIV_INTERN bool fil_crypt_is_closing(ulint space); |
20 | |
21 | /** |
22 | * scrub data at delete time (e.g purge thread) |
23 | */ |
24 | my_bool srv_immediate_scrub_data_uncompressed = false; |
25 | |
26 | /** |
27 | * background scrub uncompressed data |
28 | * |
29 | * if srv_immediate_scrub_data_uncompressed is enabled |
30 | * this is only needed to handle "old" data |
31 | */ |
32 | my_bool srv_background_scrub_data_uncompressed = false; |
33 | |
34 | /** |
35 | * backgrounds scrub compressed data |
36 | * |
37 | * reorganize compressed page for scrubbing |
38 | * (only way to scrub compressed data) |
39 | */ |
40 | my_bool srv_background_scrub_data_compressed = false; |
41 | |
42 | /* check spaces once per hour */ |
43 | UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60); |
44 | |
45 | /* default to scrub spaces that hasn't been scrubbed in a week */ |
46 | UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60); |
47 | |
48 | /** |
49 | * statistics for scrubbing by background threads |
50 | */ |
51 | static btr_scrub_stat_t scrub_stat; |
52 | static ib_mutex_t scrub_stat_mutex; |
53 | #ifdef UNIV_PFS_MUTEX |
54 | UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key; |
55 | #endif |
56 | |
57 | #ifdef UNIV_DEBUG |
58 | /** |
59 | * srv_scrub_force_testing |
60 | * |
61 | * - force scrubbing using background threads even for uncompressed tables |
62 | * - force pessimistic scrubbing (page split) even if not needed |
63 | * (see test_pessimistic_scrub_pct) |
64 | */ |
65 | my_bool srv_scrub_force_testing = true; |
66 | |
67 | /** |
68 | * Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only) |
69 | */ |
70 | static int test_pessimistic_scrub_pct = 50; |
71 | |
72 | #endif |
73 | static uint scrub_compression_level = page_zip_level; |
74 | |
75 | /**************************************************************//** |
76 | Log a scrubbing failure */ |
77 | static |
78 | void |
79 | log_scrub_failure( |
80 | /*===============*/ |
81 | dict_index_t* index, /*!< in: index */ |
82 | btr_scrub_t* scrub_data, /*!< in: data to store statistics on */ |
83 | buf_block_t* block, /*!< in: block */ |
84 | dberr_t err) /*!< in: error */ |
85 | { |
86 | const char* reason = "unknown" ; |
87 | switch(err) { |
88 | case DB_UNDERFLOW: |
89 | reason = "too few records on page" ; |
90 | scrub_data->scrub_stat.page_split_failures_underflow++; |
91 | break; |
92 | case DB_INDEX_CORRUPT: |
93 | reason = "unable to find index!" ; |
94 | scrub_data->scrub_stat.page_split_failures_missing_index++; |
95 | break; |
96 | case DB_OUT_OF_FILE_SPACE: |
97 | reason = "out of filespace" ; |
98 | scrub_data->scrub_stat.page_split_failures_out_of_filespace++; |
99 | break; |
100 | default: |
101 | ut_ad(0); |
102 | reason = "unknown" ; |
103 | scrub_data->scrub_stat.page_split_failures_unknown++; |
104 | } |
105 | |
106 | ib::warn() << "Failed to scrub index " << index->name |
107 | << " of table " << index->table->name |
108 | << " page " << block->page.id << ": " << reason; |
109 | } |
110 | |
111 | /**************************************************************** |
112 | Lock dict mutexes */ |
113 | static |
114 | bool |
115 | btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table, |
116 | const char * file, uint line) |
117 | { |
118 | time_t start = time(0); |
119 | time_t last = start; |
120 | |
121 | /* FIXME: this is not the proper way of doing things. The |
122 | dict_sys->mutex should not be held by any thread for longer |
123 | than a few microseconds. It must not be held during I/O, |
124 | for example. So, what is the purpose for this busy-waiting? |
125 | This function should be rewritten as part of MDEV-8139: |
126 | Fix scrubbing tests. */ |
127 | |
128 | while (mutex_enter_nowait(&(dict_sys->mutex))) { |
129 | /* if we lock to close a table, we wait forever |
130 | * if we don't lock to close a table, we check if space |
131 | * is closing, and then instead give up |
132 | */ |
133 | if (lock_to_close_table) { |
134 | } else if (fil_space_t* space = fil_space_acquire(space_id)) { |
135 | bool stopping = space->is_stopping(); |
136 | space->release(); |
137 | if (stopping) { |
138 | return false; |
139 | } |
140 | } else { |
141 | return false; |
142 | } |
143 | |
144 | os_thread_sleep(250000); |
145 | |
146 | time_t now = time(0); |
147 | |
148 | if (now >= last + 30) { |
149 | fprintf(stderr, |
150 | "WARNING: %s:%u waited %ld seconds for" |
151 | " dict_sys lock, space: " ULINTPF |
152 | " lock_to_close_table: %d\n" , |
153 | file, line, long(now - start), space_id, |
154 | lock_to_close_table); |
155 | |
156 | last = now; |
157 | } |
158 | } |
159 | |
160 | ut_ad(mutex_own(&dict_sys->mutex)); |
161 | return true; |
162 | } |
163 | |
164 | #define btr_scrub_lock_dict(space, lock_to_close_table) \ |
165 | btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__) |
166 | |
167 | /**************************************************************** |
168 | Unlock dict mutexes */ |
169 | static |
170 | void |
171 | btr_scrub_unlock_dict() |
172 | { |
173 | dict_mutex_exit_for_mysql(); |
174 | } |
175 | |
176 | /**************************************************************** |
177 | Release reference to table |
178 | */ |
179 | static |
180 | void |
181 | btr_scrub_table_close( |
182 | /*==================*/ |
183 | dict_table_t* table) /*!< in: table */ |
184 | { |
185 | bool dict_locked = true; |
186 | bool try_drop = false; |
187 | table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS; |
188 | dict_table_close(table, dict_locked, try_drop); |
189 | } |
190 | |
191 | /**************************************************************** |
192 | Release reference to table |
193 | */ |
194 | static |
195 | void |
196 | btr_scrub_table_close_for_thread( |
197 | btr_scrub_t *scrub_data) |
198 | { |
199 | if (scrub_data->current_table == NULL) { |
200 | return; |
201 | } |
202 | |
203 | if (fil_space_t* space = fil_space_acquire(scrub_data->space)) { |
204 | /* If tablespace is not marked as stopping perform |
205 | the actual close. */ |
206 | if (!space->is_stopping()) { |
207 | mutex_enter(&dict_sys->mutex); |
208 | /* perform the actual closing */ |
209 | btr_scrub_table_close(scrub_data->current_table); |
210 | mutex_exit(&dict_sys->mutex); |
211 | } |
212 | space->release(); |
213 | } |
214 | |
215 | scrub_data->current_table = NULL; |
216 | scrub_data->current_index = NULL; |
217 | } |
218 | |
219 | /**************************************************************//** |
220 | Check if scrubbing is turned ON or OFF */ |
221 | static |
222 | bool |
223 | check_scrub_setting( |
224 | /*=====================*/ |
225 | btr_scrub_t* scrub_data) /*!< in: scrub data */ |
226 | { |
227 | if (scrub_data->compressed) |
228 | return srv_background_scrub_data_compressed; |
229 | else |
230 | return srv_background_scrub_data_uncompressed; |
231 | } |
232 | |
233 | #define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID) |
234 | |
235 | /**************************************************************//** |
236 | Check if a page needs scrubbing */ |
237 | UNIV_INTERN |
238 | int |
239 | btr_page_needs_scrubbing( |
240 | /*=====================*/ |
241 | btr_scrub_t* scrub_data, /*!< in: scrub data */ |
242 | buf_block_t* block, /*!< in: block to check, latched */ |
243 | btr_scrub_page_allocation_status_t allocated) /*!< in: is block known |
244 | to be allocated */ |
245 | { |
246 | /** |
247 | * Check if scrubbing has been turned OFF. |
248 | * |
249 | * at start of space, we check if scrubbing is ON or OFF |
250 | * here we only check if scrubbing is turned OFF. |
251 | * |
252 | * Motivation is that it's only valueable to have a full table (space) |
253 | * scrubbed. |
254 | */ |
255 | if (!check_scrub_setting(scrub_data)) { |
256 | bool before_value = scrub_data->scrubbing; |
257 | scrub_data->scrubbing = false; |
258 | |
259 | if (before_value == true) { |
260 | /* we toggle scrubbing from on to off */ |
261 | return BTR_SCRUB_TURNED_OFF; |
262 | } |
263 | } |
264 | |
265 | if (scrub_data->scrubbing == false) { |
266 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
267 | } |
268 | |
269 | const page_t* page = buf_block_get_frame(block); |
270 | |
271 | if (allocated == BTR_SCRUB_PAGE_ALLOCATED) { |
272 | if (fil_page_get_type(page) != FIL_PAGE_INDEX) { |
273 | /* this function is called from fil-crypt-threads. |
274 | * these threads iterate all pages of all tablespaces |
275 | * and don't know about fil_page_type. |
276 | * But scrubbing is only needed for index-pages. */ |
277 | |
278 | /** |
279 | * NOTE: scrubbing is also needed for UNDO pages, |
280 | * but they are scrubbed at purge-time, since they are |
281 | * uncompressed |
282 | */ |
283 | |
284 | /* if encountering page type not needing scrubbing |
285 | release reference to table object */ |
286 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
287 | } |
288 | |
289 | if (!page_has_garbage(page)) { |
290 | /* no garbage (from deleted/shrunken records) */ |
291 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
292 | } |
293 | |
294 | } else if (allocated == BTR_SCRUB_PAGE_FREE || |
295 | allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) { |
296 | |
297 | switch (fil_page_get_type(page)) { |
298 | case FIL_PAGE_INDEX: |
299 | case FIL_PAGE_TYPE_ZBLOB: |
300 | case FIL_PAGE_TYPE_ZBLOB2: |
301 | break; |
302 | default: |
303 | /** |
304 | * If this is a dropped page, we also need to scrub |
305 | * BLOB pages |
306 | */ |
307 | |
308 | /* if encountering page type not needing scrubbing |
309 | release reference to table object */ |
310 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
311 | } |
312 | } |
313 | |
314 | if (block->page.id.space() == TRX_SYS_SPACE |
315 | && btr_page_get_index_id(page) == IBUF_INDEX_ID) { |
316 | /* skip ibuf */ |
317 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
318 | } |
319 | |
320 | return BTR_SCRUB_PAGE; |
321 | } |
322 | |
323 | /**************************************************************** |
324 | Handle a skipped page |
325 | */ |
326 | UNIV_INTERN |
327 | void |
328 | btr_scrub_skip_page( |
329 | /*==================*/ |
330 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
331 | int needs_scrubbing) /*!< in: return code from |
332 | btr_page_needs_scrubbing */ |
333 | { |
334 | switch(needs_scrubbing) { |
335 | case BTR_SCRUB_SKIP_PAGE: |
336 | /* nothing todo */ |
337 | return; |
338 | case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE: |
339 | btr_scrub_table_close_for_thread(scrub_data); |
340 | return; |
341 | case BTR_SCRUB_TURNED_OFF: |
342 | case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE: |
343 | btr_scrub_complete_space(scrub_data); |
344 | return; |
345 | } |
346 | |
347 | /* unknown value. should not happen */ |
348 | ut_a(0); |
349 | } |
350 | |
351 | /**************************************************************** |
352 | Try to scrub a page using btr_page_reorganize_low |
353 | return DB_SUCCESS on success or DB_OVERFLOW on failure */ |
354 | static |
355 | dberr_t |
356 | btr_optimistic_scrub( |
357 | /*==================*/ |
358 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
359 | buf_block_t* block, /*!< in: block to scrub */ |
360 | dict_index_t* index, /*!< in: index */ |
361 | mtr_t* mtr) /*!< in: mtr */ |
362 | { |
363 | #ifdef UNIV_DEBUG |
364 | if (srv_scrub_force_testing && |
365 | page_get_n_recs(buf_block_get_frame(block)) > 2 && |
366 | (rand() % 100) < test_pessimistic_scrub_pct) { |
367 | |
368 | log_scrub_failure(index, scrub_data, block, DB_OVERFLOW); |
369 | return DB_OVERFLOW; |
370 | } |
371 | #endif |
372 | |
373 | page_cur_t cur; |
374 | page_cur_set_before_first(block, &cur); |
375 | bool recovery = false; |
376 | if (!btr_page_reorganize_low(recovery, scrub_compression_level, |
377 | &cur, index, mtr)) { |
378 | return DB_OVERFLOW; |
379 | } |
380 | |
381 | /* We play safe and reset the free bits */ |
382 | if (!dict_index_is_clust(index) && |
383 | block != NULL) { |
384 | buf_frame_t* frame = buf_block_get_frame(block); |
385 | if (frame && |
386 | page_is_leaf(frame)) { |
387 | |
388 | ibuf_reset_free_bits(block); |
389 | } |
390 | } |
391 | |
392 | scrub_data->scrub_stat.page_reorganizations++; |
393 | |
394 | return DB_SUCCESS; |
395 | } |
396 | |
397 | /**************************************************************** |
398 | Try to scrub a page by splitting it |
399 | return DB_SUCCESS on success |
400 | DB_UNDERFLOW if page has too few records |
401 | DB_OUT_OF_FILE_SPACE if we can't find space for split */ |
402 | static |
403 | dberr_t |
404 | btr_pessimistic_scrub( |
405 | /*==================*/ |
406 | btr_scrub_t* scrub_data, /*!< in: data with scrub state */ |
407 | buf_block_t* block, /*!< in: block to scrub */ |
408 | dict_index_t* index, /*!< in: index */ |
409 | mtr_t* mtr) /*!< in: mtr */ |
410 | { |
411 | page_t* page = buf_block_get_frame(block); |
412 | |
413 | if (page_get_n_recs(page) < 2) { |
414 | /** |
415 | * There is no way we can split a page with < 2 records |
416 | */ |
417 | log_scrub_failure(index, scrub_data, block, DB_UNDERFLOW); |
418 | return DB_UNDERFLOW; |
419 | } |
420 | |
421 | /** |
422 | * Splitting page needs new space, allocate it here |
423 | * so that splitting won't fail due to this */ |
424 | ulint n_extents = 3; |
425 | ulint n_reserved = 0; |
426 | if (!fsp_reserve_free_extents(&n_reserved, index->table->space, |
427 | n_extents, FSP_NORMAL, mtr)) { |
428 | log_scrub_failure(index, scrub_data, block, |
429 | DB_OUT_OF_FILE_SPACE); |
430 | return DB_OUT_OF_FILE_SPACE; |
431 | } |
432 | |
433 | /* read block variables */ |
434 | const ulint page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); |
435 | const ulint left_page_no = mach_read_from_4(page + FIL_PAGE_PREV); |
436 | const ulint right_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); |
437 | const page_size_t page_size(index->table->space->flags); |
438 | |
439 | /** |
440 | * When splitting page, we need X-latches on left/right brothers |
441 | * see e.g btr_cur_latch_leaves |
442 | */ |
443 | |
444 | if (left_page_no != FIL_NULL) { |
445 | /** |
446 | * pages needs to be locked left-to-right, release block |
447 | * and re-lock. We still have x-lock on index |
448 | * so this should be safe |
449 | */ |
450 | mtr->release_block_at_savepoint(scrub_data->savepoint, block); |
451 | |
452 | buf_block_t* get_block __attribute__((unused)) = btr_block_get( |
453 | page_id_t(index->table->space->id, left_page_no), |
454 | page_size, RW_X_LATCH, index, mtr); |
455 | |
456 | /** |
457 | * Refetch block and re-initialize page |
458 | */ |
459 | block = btr_block_get( |
460 | page_id_t(index->table->space->id, page_no), |
461 | page_size, RW_X_LATCH, index, mtr); |
462 | |
463 | page = buf_block_get_frame(block); |
464 | |
465 | /** |
466 | * structure should be unchanged |
467 | */ |
468 | ut_a(left_page_no == btr_page_get_prev(page, mtr)); |
469 | ut_a(right_page_no == btr_page_get_next(page, mtr)); |
470 | } |
471 | |
472 | if (right_page_no != FIL_NULL) { |
473 | buf_block_t* get_block __attribute__((unused))= btr_block_get( |
474 | page_id_t(index->table->space->id, right_page_no), |
475 | page_size, RW_X_LATCH, index, mtr); |
476 | } |
477 | |
478 | /* arguments to btr_page_split_and_insert */ |
479 | mem_heap_t* heap = NULL; |
480 | dtuple_t* entry = NULL; |
481 | ulint* offsets = NULL; |
482 | ulint n_ext = 0; |
483 | ulint flags = BTR_MODIFY_TREE; |
484 | |
485 | /** |
486 | * position a cursor on first record on page |
487 | */ |
488 | rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); |
489 | btr_cur_t cursor; |
490 | btr_cur_position(index, rec, block, &cursor); |
491 | |
492 | /** |
493 | * call split page with NULL as argument for entry to insert |
494 | */ |
495 | if (dict_index_get_page(index) == page_no) { |
496 | /* The page is the root page |
497 | * NOTE: ibuf_reset_free_bits is called inside |
498 | * btr_root_raise_and_insert */ |
499 | rec = btr_root_raise_and_insert( |
500 | flags, &cursor, &offsets, &heap, entry, n_ext, mtr); |
501 | } else { |
502 | /* We play safe and reset the free bits |
503 | * NOTE: need to call this prior to btr_page_split_and_insert */ |
504 | if (!dict_index_is_clust(index) && |
505 | block != NULL) { |
506 | buf_frame_t* frame = buf_block_get_frame(block); |
507 | if (frame && |
508 | page_is_leaf(frame)) { |
509 | |
510 | ibuf_reset_free_bits(block); |
511 | } |
512 | } |
513 | |
514 | rec = btr_page_split_and_insert( |
515 | flags, &cursor, &offsets, &heap, entry, n_ext, mtr); |
516 | } |
517 | |
518 | if (heap) { |
519 | mem_heap_free(heap); |
520 | } |
521 | |
522 | index->table->space->release_free_extents(n_reserved); |
523 | scrub_data->scrub_stat.page_splits++; |
524 | return DB_SUCCESS; |
525 | } |
526 | |
527 | /**************************************************************** |
528 | Location index by id for a table |
529 | return index or NULL */ |
530 | static |
531 | dict_index_t* |
532 | find_index( |
533 | /*========*/ |
534 | dict_table_t* table, /*!< in: table */ |
535 | index_id_t index_id) /*!< in: index id */ |
536 | { |
537 | if (table != NULL) { |
538 | dict_index_t* index = dict_table_get_first_index(table); |
539 | while (index != NULL) { |
540 | if (index->id == index_id) |
541 | return index; |
542 | index = dict_table_get_next_index(index); |
543 | } |
544 | } |
545 | |
546 | return NULL; |
547 | } |
548 | |
549 | /**************************************************************** |
550 | Check if table should be scrubbed |
551 | */ |
552 | static |
553 | bool |
554 | btr_scrub_table_needs_scrubbing( |
555 | /*============================*/ |
556 | dict_table_t* table) /*!< in: table */ |
557 | { |
558 | if (table == NULL) |
559 | return false; |
560 | |
561 | if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) { |
562 | return false; |
563 | } |
564 | |
565 | if (table->to_be_dropped) { |
566 | return false; |
567 | } |
568 | |
569 | if (!table->is_readable()) { |
570 | return false; |
571 | } |
572 | |
573 | return true; |
574 | } |
575 | |
576 | /**************************************************************** |
577 | Check if index should be scrubbed |
578 | */ |
579 | static |
580 | bool |
581 | btr_scrub_index_needs_scrubbing( |
582 | /*============================*/ |
583 | dict_index_t* index) /*!< in: index */ |
584 | { |
585 | if (index == NULL) |
586 | return false; |
587 | |
588 | if (dict_index_is_ibuf(index)) { |
589 | return false; |
590 | } |
591 | |
592 | if (dict_index_is_online_ddl(index)) { |
593 | return false; |
594 | } |
595 | |
596 | return true; |
597 | } |
598 | |
599 | /**************************************************************** |
600 | Get table and index and store it on scrub_data |
601 | */ |
602 | static |
603 | void |
604 | btr_scrub_get_table_and_index( |
605 | /*=========================*/ |
606 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
607 | index_id_t index_id) /*!< in: index id */ |
608 | { |
609 | /* first check if it's an index to current table */ |
610 | scrub_data->current_index = find_index(scrub_data->current_table, |
611 | index_id); |
612 | |
613 | if (scrub_data->current_index != NULL) { |
614 | /* yes it was */ |
615 | return; |
616 | } |
617 | |
618 | if (!btr_scrub_lock_dict(scrub_data->space, false)) { |
619 | btr_scrub_complete_space(scrub_data); |
620 | return; |
621 | } |
622 | |
623 | /* close current table (if any) */ |
624 | if (scrub_data->current_table != NULL) { |
625 | btr_scrub_table_close(scrub_data->current_table); |
626 | scrub_data->current_table = NULL; |
627 | } |
628 | |
629 | /* argument to dict_table_open_on_index_id */ |
630 | bool dict_locked = true; |
631 | |
632 | /* open table based on index_id */ |
633 | dict_table_t* table = dict_table_open_on_index_id( |
634 | index_id, |
635 | dict_locked); |
636 | |
637 | if (table != NULL) { |
638 | /* mark table as being scrubbed */ |
639 | table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS; |
640 | |
641 | if (!btr_scrub_table_needs_scrubbing(table)) { |
642 | btr_scrub_table_close(table); |
643 | btr_scrub_unlock_dict(); |
644 | return; |
645 | } |
646 | } |
647 | |
648 | btr_scrub_unlock_dict(); |
649 | scrub_data->current_table = table; |
650 | scrub_data->current_index = find_index(table, index_id); |
651 | } |
652 | |
653 | /**************************************************************** |
654 | Handle free page */ |
655 | UNIV_INTERN |
656 | int |
657 | btr_scrub_free_page( |
658 | /*====================*/ |
659 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
660 | buf_block_t* block, /*!< in: block to scrub */ |
661 | mtr_t* mtr) /*!< in: mtr */ |
662 | { |
663 | // TODO(jonaso): scrub only what is actually needed |
664 | |
665 | { |
666 | /* note: perform both the memset and setting of FIL_PAGE_TYPE |
667 | * wo/ logging. so that if we crash before page is flushed |
668 | * it will be found by scrubbing thread again |
669 | */ |
670 | memset(buf_block_get_frame(block) + PAGE_HEADER, 0, |
671 | srv_page_size - PAGE_HEADER); |
672 | |
673 | mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE, |
674 | FIL_PAGE_TYPE_ALLOCATED); |
675 | } |
676 | |
677 | page_create(block, mtr, |
678 | dict_table_is_comp(scrub_data->current_table), |
679 | dict_index_is_spatial(scrub_data->current_index)); |
680 | |
681 | mtr_commit(mtr); |
682 | |
683 | /* page doesn't need further processing => SKIP |
684 | * and close table/index so that we don't keep references too long */ |
685 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
686 | } |
687 | |
688 | /**************************************************************** |
689 | Recheck if a page needs scrubbing, and if it does load appropriate |
690 | table and index */ |
691 | UNIV_INTERN |
692 | int |
693 | btr_scrub_recheck_page( |
694 | /*====================*/ |
695 | btr_scrub_t* scrub_data, /*!< inut: scrub data */ |
696 | buf_block_t* block, /*!< in: block */ |
697 | btr_scrub_page_allocation_status_t allocated, /*!< in: is block |
698 | allocated or free */ |
699 | mtr_t* mtr) /*!< in: mtr */ |
700 | { |
701 | /* recheck if page needs scrubbing (knowing allocation status) */ |
702 | int needs_scrubbing = btr_page_needs_scrubbing( |
703 | scrub_data, block, allocated); |
704 | |
705 | if (needs_scrubbing != BTR_SCRUB_PAGE) { |
706 | mtr_commit(mtr); |
707 | return needs_scrubbing; |
708 | } |
709 | |
710 | if (allocated == BTR_SCRUB_PAGE_FREE) { |
711 | /** we don't need to load table/index for free pages |
712 | * so scrub directly here */ |
713 | /* mtr is committed inside btr_scrub_page_free */ |
714 | return btr_scrub_free_page(scrub_data, |
715 | block, |
716 | mtr); |
717 | } |
718 | |
719 | page_t* page = buf_block_get_frame(block); |
720 | index_id_t index_id = btr_page_get_index_id(page); |
721 | |
722 | if (scrub_data->current_index == NULL || |
723 | scrub_data->current_index->id != index_id) { |
724 | |
725 | /** |
726 | * commit mtr (i.e release locks on block) |
727 | * and try to get table&index potentially loading it |
728 | * from disk |
729 | */ |
730 | mtr_commit(mtr); |
731 | btr_scrub_get_table_and_index(scrub_data, index_id); |
732 | } else { |
733 | /* we already have correct index |
734 | * commit mtr so that we can lock index before fetching page |
735 | */ |
736 | mtr_commit(mtr); |
737 | } |
738 | |
739 | /* check if table is about to be dropped */ |
740 | if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) { |
741 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
742 | } |
743 | |
744 | /* check if index is scrubbable */ |
745 | if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) { |
746 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
747 | } |
748 | |
749 | mtr_start(mtr); |
750 | mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr); |
751 | /** set savepoint for X-latch of block */ |
752 | scrub_data->savepoint = mtr_set_savepoint(mtr); |
753 | return BTR_SCRUB_PAGE; |
754 | } |
755 | |
756 | /**************************************************************** |
757 | Perform actual scrubbing of page */ |
758 | UNIV_INTERN |
759 | int |
760 | btr_scrub_page( |
761 | /*============*/ |
762 | btr_scrub_t* scrub_data, /*!< in/out: scrub data */ |
763 | buf_block_t* block, /*!< in: block */ |
764 | btr_scrub_page_allocation_status_t allocated, /*!< in: is block |
765 | allocated or free */ |
766 | mtr_t* mtr) /*!< in: mtr */ |
767 | { |
768 | /* recheck if page needs scrubbing (knowing allocation status) */ |
769 | int needs_scrubbing = BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
770 | |
771 | if (block) { |
772 | btr_page_needs_scrubbing(scrub_data, block, allocated); |
773 | } |
774 | |
775 | if (!block || needs_scrubbing != BTR_SCRUB_PAGE) { |
776 | mtr_commit(mtr); |
777 | return needs_scrubbing; |
778 | } |
779 | |
780 | if (allocated == BTR_SCRUB_PAGE_FREE) { |
781 | /* mtr is committed inside btr_scrub_page_free */ |
782 | return btr_scrub_free_page(scrub_data, |
783 | block, |
784 | mtr); |
785 | } |
786 | |
787 | /* check that table/index still match now that they are loaded */ |
788 | |
789 | if (!scrub_data->current_table->space |
790 | || scrub_data->current_table->space->id != scrub_data->space) { |
791 | /* this is truncate table */ |
792 | mtr_commit(mtr); |
793 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
794 | } |
795 | |
796 | if (scrub_data->current_index->table != scrub_data->current_table) { |
797 | /* this is truncate table */ |
798 | mtr_commit(mtr); |
799 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
800 | } |
801 | |
802 | if (scrub_data->current_index->page == FIL_NULL) { |
803 | /* this is truncate table */ |
804 | mtr_commit(mtr); |
805 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
806 | } |
807 | |
808 | buf_frame_t* frame = buf_block_get_frame(block); |
809 | |
810 | if (!frame || btr_page_get_index_id(frame) != |
811 | scrub_data->current_index->id) { |
812 | /* page has been reallocated to new index */ |
813 | mtr_commit(mtr); |
814 | return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; |
815 | } |
816 | |
817 | /* check if I can scrub (reorganize) page wo/ overflow */ |
818 | if (btr_optimistic_scrub(scrub_data, |
819 | block, |
820 | scrub_data->current_index, |
821 | mtr) != DB_SUCCESS) { |
822 | |
823 | /** |
824 | * Can't reorganize page...need to split it |
825 | */ |
826 | btr_pessimistic_scrub(scrub_data, |
827 | block, |
828 | scrub_data->current_index, |
829 | mtr); |
830 | } |
831 | mtr_commit(mtr); |
832 | |
833 | return BTR_SCRUB_SKIP_PAGE; // no further action needed |
834 | } |
835 | |
836 | /**************************************************************//** |
837 | Start iterating a space */ |
838 | UNIV_INTERN |
839 | bool |
840 | btr_scrub_start_space( |
841 | /*===================*/ |
842 | ulint space, /*!< in: space */ |
843 | btr_scrub_t* scrub_data) /*!< in/out: scrub data */ |
844 | { |
845 | bool found; |
846 | scrub_data->space = space; |
847 | scrub_data->current_table = NULL; |
848 | scrub_data->current_index = NULL; |
849 | const page_size_t page_size = fil_space_get_page_size(space, &found); |
850 | |
851 | scrub_data->compressed = page_size.is_compressed(); |
852 | scrub_data->scrubbing = check_scrub_setting(scrub_data); |
853 | return scrub_data->scrubbing; |
854 | } |
855 | |
856 | /*********************************************************************** |
857 | Update global statistics with thread statistics */ |
858 | static |
859 | void |
860 | btr_scrub_update_total_stat(btr_scrub_t *scrub_data) |
861 | { |
862 | mutex_enter(&scrub_stat_mutex); |
863 | scrub_stat.page_reorganizations += |
864 | scrub_data->scrub_stat.page_reorganizations; |
865 | scrub_stat.page_splits += |
866 | scrub_data->scrub_stat.page_splits; |
867 | scrub_stat.page_split_failures_underflow += |
868 | scrub_data->scrub_stat.page_split_failures_underflow; |
869 | scrub_stat.page_split_failures_out_of_filespace += |
870 | scrub_data->scrub_stat.page_split_failures_out_of_filespace; |
871 | scrub_stat.page_split_failures_missing_index += |
872 | scrub_data->scrub_stat.page_split_failures_missing_index; |
873 | scrub_stat.page_split_failures_unknown += |
874 | scrub_data->scrub_stat.page_split_failures_unknown; |
875 | mutex_exit(&scrub_stat_mutex); |
876 | |
877 | // clear stat |
878 | memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat)); |
879 | } |
880 | |
881 | /** Complete iterating a space. |
882 | @param[in,out] scrub_data scrub data */ |
883 | UNIV_INTERN |
884 | void |
885 | btr_scrub_complete_space(btr_scrub_t* scrub_data) |
886 | { |
887 | ut_ad(scrub_data->scrubbing); |
888 | btr_scrub_table_close_for_thread(scrub_data); |
889 | btr_scrub_update_total_stat(scrub_data); |
890 | } |
891 | |
892 | /********************************************************************* |
893 | Return scrub statistics */ |
894 | void |
895 | btr_scrub_total_stat(btr_scrub_stat_t *stat) |
896 | { |
897 | mutex_enter(&scrub_stat_mutex); |
898 | *stat = scrub_stat; |
899 | mutex_exit(&scrub_stat_mutex); |
900 | } |
901 | |
902 | /********************************************************************* |
903 | Init global variables */ |
904 | UNIV_INTERN |
905 | void |
906 | btr_scrub_init() |
907 | { |
908 | mutex_create(LATCH_ID_SCRUB_STAT_MUTEX, &scrub_stat_mutex); |
909 | |
910 | memset(&scrub_stat, 0, sizeof(scrub_stat)); |
911 | } |
912 | |
913 | /********************************************************************* |
914 | Cleanup globals */ |
915 | UNIV_INTERN |
916 | void |
917 | btr_scrub_cleanup() |
918 | { |
919 | mutex_free(&scrub_stat_mutex); |
920 | } |
921 | |
922 | |