1/*****************************************************************************
2
3Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file row/row0purge.cc
22Purge obsolete records
23
24Created 3/14/1997 Heikki Tuuri
25*******************************************************/
26
27#include "row0purge.h"
28#include "fsp0fsp.h"
29#include "mach0data.h"
30#include "dict0stats.h"
31#include "trx0rseg.h"
32#include "trx0trx.h"
33#include "trx0roll.h"
34#include "trx0undo.h"
35#include "trx0purge.h"
36#include "trx0rec.h"
37#include "que0que.h"
38#include "row0row.h"
39#include "row0upd.h"
40#include "row0vers.h"
41#include "row0mysql.h"
42#include "row0log.h"
43#include "log0log.h"
44#include "srv0mon.h"
45#include "srv0start.h"
46#include "handler.h"
47#include "ha_innodb.h"
48#include "fil0fil.h"
49
50/*************************************************************************
51IMPORTANT NOTE: Any operation that generates redo MUST check that there
52is enough space in the redo log before for that operation. This is
53done by calling log_free_check(). The reason for checking the
54availability of the redo log space before the start of the operation is
55that we MUST not hold any synchonization objects when performing the
56check.
57If you make a change in this module make sure that no codepath is
58introduced where a call to log_free_check() is bypassed. */
59
60/** Create a purge node to a query graph.
61@param[in] parent parent node, i.e., a thr node
62@param[in] heap memory heap where created
63@return own: purge node */
64purge_node_t*
65row_purge_node_create(
66 que_thr_t* parent,
67 mem_heap_t* heap)
68{
69 purge_node_t* node;
70
71 ut_ad(parent != NULL);
72 ut_ad(heap != NULL);
73
74 node = static_cast<purge_node_t*>(
75 mem_heap_zalloc(heap, sizeof(*node)));
76
77 node->common.type = QUE_NODE_PURGE;
78 node->common.parent = parent;
79 node->done = TRUE;
80 node->heap = mem_heap_create(256);
81
82 return(node);
83}
84
85/***********************************************************//**
86Repositions the pcur in the purge node on the clustered index record,
87if found. If the record is not found, close pcur.
88@return TRUE if the record was found */
89static
90ibool
91row_purge_reposition_pcur(
92/*======================*/
93 ulint mode, /*!< in: latching mode */
94 purge_node_t* node, /*!< in: row purge node */
95 mtr_t* mtr) /*!< in: mtr */
96{
97 if (node->found_clust) {
98 ut_ad(node->validate_pcur());
99
100 node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr);
101
102 } else {
103 node->found_clust = row_search_on_row_ref(
104 &node->pcur, mode, node->table, node->ref, mtr);
105
106 if (node->found_clust) {
107 btr_pcur_store_position(&node->pcur, mtr);
108 }
109 }
110
111 /* Close the current cursor if we fail to position it correctly. */
112 if (!node->found_clust) {
113 btr_pcur_close(&node->pcur);
114 }
115
116 return(node->found_clust);
117}
118
119/***********************************************************//**
120Removes a delete marked clustered index record if possible.
121@retval true if the row was not found, or it was successfully removed
122@retval false if the row was modified after the delete marking */
123static MY_ATTRIBUTE((nonnull, warn_unused_result))
124bool
125row_purge_remove_clust_if_poss_low(
126/*===============================*/
127 purge_node_t* node, /*!< in/out: row purge node */
128 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
129{
130 dict_index_t* index;
131 bool success = true;
132 mtr_t mtr;
133 rec_t* rec;
134 mem_heap_t* heap = NULL;
135 ulint* offsets;
136 ulint offsets_[REC_OFFS_NORMAL_SIZE];
137 rec_offs_init(offsets_);
138
139 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
140
141 index = dict_table_get_first_index(node->table);
142
143 log_free_check();
144 mtr_start(&mtr);
145 index->set_modified(mtr);
146
147 if (!row_purge_reposition_pcur(mode, node, &mtr)) {
148 /* The record was already removed. */
149 goto func_exit;
150 }
151
152 rec = btr_pcur_get_rec(&node->pcur);
153
154 offsets = rec_get_offsets(
155 rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
156
157 if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
158 /* Someone else has modified the record later: do not remove */
159 goto func_exit;
160 }
161
162 ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
163 /* In delete-marked records, DB_TRX_ID must
164 always refer to an existing undo log record. */
165 ut_ad(row_get_rec_trx_id(rec, index, offsets));
166
167 if (mode == BTR_MODIFY_LEAF) {
168 success = btr_cur_optimistic_delete(
169 btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
170 } else {
171 dberr_t err;
172 ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
173 btr_cur_pessimistic_delete(
174 &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
175 false, &mtr);
176
177 switch (err) {
178 case DB_SUCCESS:
179 break;
180 case DB_OUT_OF_FILE_SPACE:
181 success = false;
182 break;
183 default:
184 ut_error;
185 }
186 }
187
188func_exit:
189 if (heap) {
190 mem_heap_free(heap);
191 }
192
193 /* Persistent cursor is closed if reposition fails. */
194 if (node->found_clust) {
195 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
196 } else {
197 mtr_commit(&mtr);
198 }
199
200 return(success);
201}
202
203/***********************************************************//**
204Removes a clustered index record if it has not been modified after the delete
205marking.
206@retval true if the row was not found, or it was successfully removed
207@retval false the purge needs to be suspended because of running out
208of file space. */
209static MY_ATTRIBUTE((nonnull, warn_unused_result))
210bool
211row_purge_remove_clust_if_poss(
212/*===========================*/
213 purge_node_t* node) /*!< in/out: row purge node */
214{
215 if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
216 return(true);
217 }
218
219 for (ulint n_tries = 0;
220 n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
221 n_tries++) {
222 if (row_purge_remove_clust_if_poss_low(
223 node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
224 return(true);
225 }
226
227 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
228 }
229
230 return(false);
231}
232
233/***********************************************************//**
234Determines if it is possible to remove a secondary index entry.
235Removal is possible if the secondary index entry does not refer to any
236not delete marked version of a clustered index record where DB_TRX_ID
237is newer than the purge view.
238
239NOTE: This function should only be called by the purge thread, only
240while holding a latch on the leaf page of the secondary index entry
241(or keeping the buffer pool watch on the page). It is possible that
242this function first returns true and then false, if a user transaction
243inserts a record that the secondary index entry would refer to.
244However, in that case, the user transaction would also re-insert the
245secondary index entry after purge has removed it and released the leaf
246page latch.
247@return true if the secondary index record can be purged */
248bool
249row_purge_poss_sec(
250/*===============*/
251 purge_node_t* node, /*!< in/out: row purge node */
252 dict_index_t* index, /*!< in: secondary index */
253 const dtuple_t* entry) /*!< in: secondary index entry */
254{
255 bool can_delete;
256 mtr_t mtr;
257
258 ut_ad(!dict_index_is_clust(index));
259 mtr_start(&mtr);
260
261 can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
262 || !row_vers_old_has_index_entry(TRUE,
263 btr_pcur_get_rec(&node->pcur),
264 &mtr, index, entry,
265 node->roll_ptr, node->trx_id);
266
267 /* Persistent cursor is closed if reposition fails. */
268 if (node->found_clust) {
269 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
270 } else {
271 mtr_commit(&mtr);
272 }
273
274 return(can_delete);
275}
276
277/***************************************************************
278Removes a secondary index entry if possible, by modifying the
279index tree. Does not try to buffer the delete.
280@return TRUE if success or if not found */
281static MY_ATTRIBUTE((nonnull, warn_unused_result))
282ibool
283row_purge_remove_sec_if_poss_tree(
284/*==============================*/
285 purge_node_t* node, /*!< in: row purge node */
286 dict_index_t* index, /*!< in: index */
287 const dtuple_t* entry) /*!< in: index entry */
288{
289 btr_pcur_t pcur;
290 btr_cur_t* btr_cur;
291 ibool success = TRUE;
292 dberr_t err;
293 mtr_t mtr;
294 enum row_search_result search_result;
295
296 log_free_check();
297 mtr_start(&mtr);
298 index->set_modified(mtr);
299
300 if (!index->is_committed()) {
301 /* The index->online_status may change if the index is
302 or was being created online, but not committed yet. It
303 is protected by index->lock. */
304 mtr_sx_lock(dict_index_get_lock(index), &mtr);
305
306 if (dict_index_is_online_ddl(index)) {
307 /* Online secondary index creation will not
308 copy any delete-marked records. Therefore
309 there is nothing to be purged. We must also
310 skip the purge when a completed index is
311 dropped by rollback_inplace_alter_table(). */
312 goto func_exit_no_pcur;
313 }
314 } else {
315 /* For secondary indexes,
316 index->online_status==ONLINE_INDEX_COMPLETE if
317 index->is_committed(). */
318 ut_ad(!dict_index_is_online_ddl(index));
319 }
320
321 search_result = row_search_index_entry(
322 index, entry,
323 BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
324 &pcur, &mtr);
325
326 switch (search_result) {
327 case ROW_NOT_FOUND:
328 /* Not found. This is a legitimate condition. In a
329 rollback, InnoDB will remove secondary recs that would
330 be purged anyway. Then the actual purge will not find
331 the secondary index record. Also, the purge itself is
332 eager: if it comes to consider a secondary index
333 record, and notices it does not need to exist in the
334 index, it will remove it. Then if/when the purge
335 comes to consider the secondary index record a second
336 time, it will not exist any more in the index. */
337
338 /* fputs("PURGE:........sec entry not found\n", stderr); */
339 /* dtuple_print(stderr, entry); */
340 goto func_exit;
341 case ROW_FOUND:
342 break;
343 case ROW_BUFFERED:
344 case ROW_NOT_DELETED_REF:
345 /* These are invalid outcomes, because the mode passed
346 to row_search_index_entry() did not include any of the
347 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
348 ut_error;
349 }
350
351 btr_cur = btr_pcur_get_btr_cur(&pcur);
352
353 /* We should remove the index record if no later version of the row,
354 which cannot be purged yet, requires its existence. If some requires,
355 we should do nothing. */
356
357 if (row_purge_poss_sec(node, index, entry)) {
358 /* Remove the index record, which should have been
359 marked for deletion. */
360 if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
361 dict_table_is_comp(index->table))) {
362 ib::error()
363 << "tried to purge non-delete-marked record"
364 " in index " << index->name
365 << " of table " << index->table->name
366 << ": tuple: " << *entry
367 << ", record: " << rec_index_print(
368 btr_cur_get_rec(btr_cur), index);
369
370 ut_ad(0);
371
372 goto func_exit;
373 }
374
375 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
376 false, &mtr);
377 switch (UNIV_EXPECT(err, DB_SUCCESS)) {
378 case DB_SUCCESS:
379 break;
380 case DB_OUT_OF_FILE_SPACE:
381 success = FALSE;
382 break;
383 default:
384 ut_error;
385 }
386 }
387
388func_exit:
389 btr_pcur_close(&pcur);
390func_exit_no_pcur:
391 mtr_commit(&mtr);
392
393 return(success);
394}
395
396/***************************************************************
397Removes a secondary index entry without modifying the index tree,
398if possible.
399@retval true if success or if not found
400@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
401static MY_ATTRIBUTE((nonnull, warn_unused_result))
402bool
403row_purge_remove_sec_if_poss_leaf(
404/*==============================*/
405 purge_node_t* node, /*!< in: row purge node */
406 dict_index_t* index, /*!< in: index */
407 const dtuple_t* entry) /*!< in: index entry */
408{
409 mtr_t mtr;
410 btr_pcur_t pcur;
411 enum btr_latch_mode mode;
412 enum row_search_result search_result;
413 bool success = true;
414
415 log_free_check();
416 ut_ad(index->table == node->table);
417 ut_ad(!index->table->is_temporary());
418 mtr_start(&mtr);
419 index->set_modified(mtr);
420
421 if (!index->is_committed()) {
422 /* For uncommitted spatial index, we also skip the purge. */
423 if (dict_index_is_spatial(index)) {
424 goto func_exit_no_pcur;
425 }
426
427 /* The index->online_status may change if the the
428 index is or was being created online, but not
429 committed yet. It is protected by index->lock. */
430 mtr_s_lock(dict_index_get_lock(index), &mtr);
431
432 if (dict_index_is_online_ddl(index)) {
433 /* Online secondary index creation will not
434 copy any delete-marked records. Therefore
435 there is nothing to be purged. We must also
436 skip the purge when a completed index is
437 dropped by rollback_inplace_alter_table(). */
438 goto func_exit_no_pcur;
439 }
440
441 mode = BTR_PURGE_LEAF_ALREADY_S_LATCHED;
442 } else {
443 /* For secondary indexes,
444 index->online_status==ONLINE_INDEX_COMPLETE if
445 index->is_committed(). */
446 ut_ad(!dict_index_is_online_ddl(index));
447
448 /* Change buffering is disabled for spatial index. */
449 mode = dict_index_is_spatial(index)
450 ? BTR_MODIFY_LEAF
451 : BTR_PURGE_LEAF;
452 }
453
454 /* Set the purge node for the call to row_purge_poss_sec(). */
455 pcur.btr_cur.purge_node = node;
456 if (dict_index_is_spatial(index)) {
457 rw_lock_sx_lock(dict_index_get_lock(index));
458 pcur.btr_cur.thr = NULL;
459 } else {
460 /* Set the query thread, so that ibuf_insert_low() will be
461 able to invoke thd_get_trx(). */
462 pcur.btr_cur.thr = static_cast<que_thr_t*>(
463 que_node_get_parent(node));
464 }
465
466 search_result = row_search_index_entry(
467 index, entry, mode, &pcur, &mtr);
468
469 if (dict_index_is_spatial(index)) {
470 rw_lock_sx_unlock(dict_index_get_lock(index));
471 }
472
473 switch (search_result) {
474 case ROW_FOUND:
475 /* Before attempting to purge a record, check
476 if it is safe to do so. */
477 if (row_purge_poss_sec(node, index, entry)) {
478 btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
479
480 /* Only delete-marked records should be purged. */
481 if (!rec_get_deleted_flag(
482 btr_cur_get_rec(btr_cur),
483 dict_table_is_comp(index->table))) {
484
485 ib::error()
486 << "tried to purge non-delete-marked"
487 " record" " in index " << index->name
488 << " of table " << index->table->name
489 << ": tuple: " << *entry
490 << ", record: "
491 << rec_index_print(
492 btr_cur_get_rec(btr_cur),
493 index);
494 ut_ad(0);
495
496 btr_pcur_close(&pcur);
497
498 goto func_exit_no_pcur;
499 }
500
501 if (dict_index_is_spatial(index)) {
502 const page_t* page;
503 const trx_t* trx = NULL;
504
505 if (btr_cur->rtr_info != NULL
506 && btr_cur->rtr_info->thr != NULL) {
507 trx = thr_get_trx(
508 btr_cur->rtr_info->thr);
509 }
510
511 page = btr_cur_get_page(btr_cur);
512
513 if (!lock_test_prdt_page_lock(
514 trx,
515 page_get_space_id(page),
516 page_get_page_no(page))
517 && page_get_n_recs(page) < 2
518 && btr_cur_get_block(btr_cur)
519 ->page.id.page_no() !=
520 dict_index_get_page(index)) {
521 /* this is the last record on page,
522 and it has a "page" lock on it,
523 which mean search is still depending
524 on it, so do not delete */
525 DBUG_LOG("purge",
526 "skip purging last"
527 " record on page "
528 << btr_cur_get_block(btr_cur)
529 ->page.id);
530
531 btr_pcur_close(&pcur);
532 mtr_commit(&mtr);
533 return(success);
534 }
535 }
536
537 if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
538
539 /* The index entry could not be deleted. */
540 success = false;
541 }
542 }
543 /* (The index entry is still needed,
544 or the deletion succeeded) */
545 /* fall through */
546 case ROW_NOT_DELETED_REF:
547 /* The index entry is still needed. */
548 case ROW_BUFFERED:
549 /* The deletion was buffered. */
550 case ROW_NOT_FOUND:
551 /* The index entry does not exist, nothing to do. */
552 btr_pcur_close(&pcur);
553func_exit_no_pcur:
554 mtr_commit(&mtr);
555 return(success);
556 }
557
558 ut_error;
559 return(false);
560}
561
562/***********************************************************//**
563Removes a secondary index entry if possible. */
564UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2)))
565void
566row_purge_remove_sec_if_poss(
567/*=========================*/
568 purge_node_t* node, /*!< in: row purge node */
569 dict_index_t* index, /*!< in: index */
570 const dtuple_t* entry) /*!< in: index entry */
571{
572 ibool success;
573 ulint n_tries = 0;
574
575 /* fputs("Purge: Removing secondary record\n", stderr); */
576
577 if (!entry) {
578 /* The node->row must have lacked some fields of this
579 index. This is possible when the undo log record was
580 written before this index was created. */
581 return;
582 }
583
584 if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
585
586 return;
587 }
588retry:
589 success = row_purge_remove_sec_if_poss_tree(node, index, entry);
590 /* The delete operation may fail if we have little
591 file space left: TODO: easiest to crash the database
592 and restart with more file space */
593
594 if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
595
596 n_tries++;
597
598 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
599
600 goto retry;
601 }
602
603 ut_a(success);
604}
605
606/** Skip uncommitted virtual indexes on newly added virtual column.
607@param[in,out] index dict index object */
608static
609inline
610void
611row_purge_skip_uncommitted_virtual_index(
612 dict_index_t*& index)
613{
614 /* We need to skip virtual indexes which is not
615 committed yet. It's safe because these indexes are
616 newly created by alter table, and because we do
617 not support LOCK=NONE when adding an index on newly
618 added virtual column.*/
619 while (index != NULL && dict_index_has_virtual(index)
620 && !index->is_committed() && index->has_new_v_col) {
621 index = dict_table_get_next_index(index);
622 }
623}
624
625/***********************************************************//**
626Purges a delete marking of a record.
627@retval true if the row was not found, or it was successfully removed
628@retval false the purge needs to be suspended because of
629running out of file space */
630static MY_ATTRIBUTE((nonnull, warn_unused_result))
631bool
632row_purge_del_mark(
633/*===============*/
634 purge_node_t* node) /*!< in/out: row purge node */
635{
636 mem_heap_t* heap;
637
638 heap = mem_heap_create(1024);
639
640 while (node->index != NULL) {
641 /* skip corrupted secondary index */
642 dict_table_skip_corrupt_index(node->index);
643
644 row_purge_skip_uncommitted_virtual_index(node->index);
645
646 if (!node->index) {
647 break;
648 }
649
650 if (node->index->type != DICT_FTS) {
651 dtuple_t* entry = row_build_index_entry_low(
652 node->row, NULL, node->index,
653 heap, ROW_BUILD_FOR_PURGE);
654 row_purge_remove_sec_if_poss(node, node->index, entry);
655 mem_heap_empty(heap);
656 }
657
658 node->index = dict_table_get_next_index(node->index);
659 }
660
661 mem_heap_free(heap);
662
663 return(row_purge_remove_clust_if_poss(node));
664}
665
666/** Reset DB_TRX_ID, DB_ROLL_PTR of a clustered index record
667whose old history can no longer be observed.
668@param[in,out] node purge node
669@param[in,out] mtr mini-transaction (will be started and committed) */
670static
671void
672row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
673{
674 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
675 /* Reset DB_TRX_ID, DB_ROLL_PTR for old records. */
676 mtr->start();
677
678 if (row_purge_reposition_pcur(BTR_MODIFY_LEAF, node, mtr)) {
679 dict_index_t* index = dict_table_get_first_index(
680 node->table);
681 ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
682 rec_t* rec = btr_pcur_get_rec(&node->pcur);
683 mem_heap_t* heap = NULL;
684 /* Reserve enough offsets for the PRIMARY KEY and 2 columns
685 so that we can access DB_TRX_ID, DB_ROLL_PTR. */
686 ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
687 rec_offs_init(offsets_);
688 ulint* offsets = rec_get_offsets(
689 rec, index, offsets_, true, trx_id_pos + 2, &heap);
690 ut_ad(heap == NULL);
691
692 ut_ad(dict_index_get_nth_field(index, trx_id_pos)
693 ->col->mtype == DATA_SYS);
694 ut_ad(dict_index_get_nth_field(index, trx_id_pos)
695 ->col->prtype == (DATA_TRX_ID | DATA_NOT_NULL));
696 ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1)
697 ->col->mtype == DATA_SYS);
698 ut_ad(dict_index_get_nth_field(index, trx_id_pos + 1)
699 ->col->prtype == (DATA_ROLL_PTR | DATA_NOT_NULL));
700
701 /* Only update the record if DB_ROLL_PTR matches (the
702 record has not been modified after this transaction
703 became purgeable) */
704 if (node->roll_ptr
705 == row_get_rec_roll_ptr(rec, index, offsets)) {
706 ut_ad(!rec_get_deleted_flag(rec,
707 rec_offs_comp(offsets)));
708 DBUG_LOG("purge", "reset DB_TRX_ID="
709 << ib::hex(row_get_rec_trx_id(
710 rec, index, offsets)));
711
712 index->set_modified(*mtr);
713 if (page_zip_des_t* page_zip
714 = buf_block_get_page_zip(
715 btr_pcur_get_block(&node->pcur))) {
716 page_zip_write_trx_id_and_roll_ptr(
717 page_zip, rec, offsets, trx_id_pos,
718 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
719 mtr);
720 } else {
721 ulint len;
722 byte* ptr = rec_get_nth_field(
723 rec, offsets, trx_id_pos, &len);
724 ut_ad(len == DATA_TRX_ID_LEN);
725 mlog_write_string(ptr, reset_trx_id,
726 sizeof reset_trx_id, mtr);
727 }
728 }
729 }
730
731 mtr->commit();
732}
733
734/***********************************************************//**
735Purges an update of an existing record. Also purges an update of a delete
736marked record if that record contained an externally stored field. */
737static
738void
739row_purge_upd_exist_or_extern_func(
740/*===============================*/
741#ifdef UNIV_DEBUG
742 const que_thr_t*thr, /*!< in: query thread */
743#endif /* UNIV_DEBUG */
744 purge_node_t* node, /*!< in: row purge node */
745 trx_undo_rec_t* undo_rec) /*!< in: record to purge */
746{
747 mem_heap_t* heap;
748
749 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
750 ut_ad(!node->table->skip_alter_undo);
751
752 if (node->rec_type == TRX_UNDO_UPD_DEL_REC
753 || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
754
755 goto skip_secondaries;
756 }
757
758 heap = mem_heap_create(1024);
759
760 while (node->index != NULL) {
761 dict_table_skip_corrupt_index(node->index);
762
763 row_purge_skip_uncommitted_virtual_index(node->index);
764
765 if (!node->index) {
766 break;
767 }
768
769 if (row_upd_changes_ord_field_binary(node->index, node->update,
770 thr, NULL, NULL)) {
771 /* Build the older version of the index entry */
772 dtuple_t* entry = row_build_index_entry_low(
773 node->row, NULL, node->index,
774 heap, ROW_BUILD_FOR_PURGE);
775 row_purge_remove_sec_if_poss(node, node->index, entry);
776 mem_heap_empty(heap);
777 }
778
779 node->index = dict_table_get_next_index(node->index);
780 }
781
782 mem_heap_free(heap);
783
784skip_secondaries:
785 mtr_t mtr;
786 dict_index_t* index = dict_table_get_first_index(node->table);
787 /* Free possible externally stored fields */
788 for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
789
790 const upd_field_t* ufield
791 = upd_get_nth_field(node->update, i);
792
793 if (dfield_is_ext(&ufield->new_val)) {
794 trx_rseg_t* rseg;
795 buf_block_t* block;
796 ulint internal_offset;
797 byte* data_field;
798 ibool is_insert;
799 ulint rseg_id;
800 ulint page_no;
801 ulint offset;
802
803 /* We use the fact that new_val points to
804 undo_rec and get thus the offset of
805 dfield data inside the undo record. Then we
806 can calculate from node->roll_ptr the file
807 address of the new_val data */
808
809 internal_offset = ulint(
810 static_cast<const byte*>
811 (dfield_get_data(&ufield->new_val))
812 - undo_rec);
813
814 ut_a(internal_offset < srv_page_size);
815
816 trx_undo_decode_roll_ptr(node->roll_ptr,
817 &is_insert, &rseg_id,
818 &page_no, &offset);
819
820 rseg = trx_sys.rseg_array[rseg_id];
821
822 ut_a(rseg != NULL);
823 ut_ad(rseg->id == rseg_id);
824 ut_ad(rseg->is_persistent());
825
826 mtr_start(&mtr);
827
828 /* We have to acquire an SX-latch to the clustered
829 index tree (exclude other tree changes) */
830
831 mtr_sx_lock(dict_index_get_lock(index), &mtr);
832
833 index->set_modified(mtr);
834
835 /* NOTE: we must also acquire an X-latch to the
836 root page of the tree. We will need it when we
837 free pages from the tree. If the tree is of height 1,
838 the tree X-latch does NOT protect the root page,
839 because it is also a leaf page. Since we will have a
840 latch on an undo log page, we would break the
841 latching order if we would only later latch the
842 root page of such a tree! */
843
844 btr_root_get(index, &mtr);
845
846 block = buf_page_get(
847 page_id_t(rseg->space->id, page_no),
848 univ_page_size, RW_X_LATCH, &mtr);
849
850 buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
851
852 data_field = buf_block_get_frame(block)
853 + offset + internal_offset;
854
855 ut_a(dfield_get_len(&ufield->new_val)
856 >= BTR_EXTERN_FIELD_REF_SIZE);
857 btr_free_externally_stored_field(
858 index,
859 data_field + dfield_get_len(&ufield->new_val)
860 - BTR_EXTERN_FIELD_REF_SIZE,
861 NULL, NULL, NULL, 0, false, &mtr);
862 mtr_commit(&mtr);
863 }
864 }
865
866 row_purge_reset_trx_id(node, &mtr);
867}
868
869#ifdef UNIV_DEBUG
870# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
871 row_purge_upd_exist_or_extern_func(thr,node,undo_rec)
872#else /* UNIV_DEBUG */
873# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
874 row_purge_upd_exist_or_extern_func(node,undo_rec)
875#endif /* UNIV_DEBUG */
876
877/***********************************************************//**
878Parses the row reference and other info in a modify undo log record.
879@return true if purge operation required */
880static
881bool
882row_purge_parse_undo_rec(
883/*=====================*/
884 purge_node_t* node, /*!< in: row undo node */
885 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
886 bool* updated_extern, /*!< out: true if an externally
887 stored field was updated */
888 que_thr_t* thr) /*!< in: query thread */
889{
890 dict_index_t* clust_index;
891 byte* ptr;
892 undo_no_t undo_no;
893 table_id_t table_id;
894 roll_ptr_t roll_ptr;
895 ulint info_bits;
896 ulint type;
897
898 ut_ad(node != NULL);
899 ut_ad(thr != NULL);
900
901 ptr = trx_undo_rec_get_pars(
902 undo_rec, &type, &node->cmpl_info,
903 updated_extern, &undo_no, &table_id);
904
905 node->rec_type = type;
906
907 switch (type) {
908 case TRX_UNDO_RENAME_TABLE:
909 return false;
910 case TRX_UNDO_INSERT_DEFAULT:
911 case TRX_UNDO_INSERT_REC:
912 break;
913 default:
914#ifdef UNIV_DEBUG
915 ut_ad(!"unknown undo log record type");
916 return false;
917 case TRX_UNDO_UPD_DEL_REC:
918 case TRX_UNDO_UPD_EXIST_REC:
919 case TRX_UNDO_DEL_MARK_REC:
920#endif /* UNIV_DEBUG */
921 ptr = trx_undo_update_rec_get_sys_cols(ptr, &node->trx_id,
922 &roll_ptr, &info_bits);
923 break;
924 }
925
926 /* Prevent DROP TABLE etc. from running when we are doing the purge
927 for this row */
928
929try_again:
930 rw_lock_s_lock_inline(dict_operation_lock, 0, __FILE__, __LINE__);
931
932 node->table = dict_table_open_on_id(
933 table_id, FALSE, DICT_TABLE_OP_NORMAL);
934
935 if (node->table == NULL) {
936 /* The table has been dropped: no need to do purge */
937 goto err_exit;
938 }
939
940 ut_ad(!node->table->is_temporary());
941
942 if (!fil_table_accessible(node->table)) {
943 dict_table_close(node->table, FALSE, FALSE);
944 node->table = NULL;
945 goto err_exit;
946 }
947
948 switch (type) {
949 case TRX_UNDO_INSERT_DEFAULT:
950 case TRX_UNDO_INSERT_REC:
951 break;
952 default:
953 if (!node->table->n_v_cols || node->table->vc_templ
954 || !dict_table_has_indexed_v_cols(node->table)) {
955 break;
956 }
957 /* Need server fully up for virtual column computation */
958 if (!mysqld_server_started) {
959
960 dict_table_close(node->table, FALSE, FALSE);
961 rw_lock_s_unlock(dict_operation_lock);
962 if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
963 return(false);
964 }
965 os_thread_sleep(1000000);
966 goto try_again;
967 }
968
969 /* Initialize the template for the table */
970 innobase_init_vc_templ(node->table);
971 }
972
973 clust_index = dict_table_get_first_index(node->table);
974
975 if (!clust_index || clust_index->is_corrupted()) {
976 /* The table was corrupt in the data dictionary.
977 dict_set_corrupted() works on an index, and
978 we do not have an index to call it with. */
979 dict_table_close(node->table, FALSE, FALSE);
980err_exit:
981 rw_lock_s_unlock(dict_operation_lock);
982 return(false);
983 }
984
985 if (type == TRX_UNDO_INSERT_DEFAULT) {
986 node->ref = &trx_undo_default_rec;
987 return(true);
988 }
989
990 ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
991 node->heap);
992
993 if (type == TRX_UNDO_INSERT_REC) {
994 return(true);
995 }
996
997 ptr = trx_undo_update_rec_get_update(ptr, clust_index, type,
998 node->trx_id,
999 roll_ptr, info_bits,
1000 node->heap, &(node->update));
1001
1002 /* Read to the partial row the fields that occur in indexes */
1003
1004 if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
1005 ptr = trx_undo_rec_get_partial_row(
1006 ptr, clust_index, node->update, &node->row,
1007 type == TRX_UNDO_UPD_DEL_REC,
1008 node->heap);
1009 }
1010
1011 return(true);
1012}
1013
1014/***********************************************************//**
1015Purges the parsed record.
1016@return true if purged, false if skipped */
1017static MY_ATTRIBUTE((nonnull, warn_unused_result))
1018bool
1019row_purge_record_func(
1020/*==================*/
1021 purge_node_t* node, /*!< in: row purge node */
1022 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
1023#ifdef UNIV_DEBUG
1024 const que_thr_t*thr, /*!< in: query thread */
1025#endif /* UNIV_DEBUG */
1026 bool updated_extern) /*!< in: whether external columns
1027 were updated */
1028{
1029 dict_index_t* clust_index;
1030 bool purged = true;
1031
1032 ut_ad(!node->found_clust);
1033 ut_ad(!node->table->skip_alter_undo);
1034
1035 clust_index = dict_table_get_first_index(node->table);
1036
1037 node->index = dict_table_get_next_index(clust_index);
1038 ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1039
1040 switch (node->rec_type) {
1041 case TRX_UNDO_DEL_MARK_REC:
1042 purged = row_purge_del_mark(node);
1043 if (purged) {
1044 if (node->table->stat_initialized
1045 && srv_stats_include_delete_marked) {
1046 dict_stats_update_if_needed(node->table);
1047 }
1048 MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
1049 }
1050 break;
1051 case TRX_UNDO_INSERT_DEFAULT:
1052 case TRX_UNDO_INSERT_REC:
1053 node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
1054 /* fall through */
1055 default:
1056 if (!updated_extern) {
1057 mtr_t mtr;
1058 row_purge_reset_trx_id(node, &mtr);
1059 break;
1060 }
1061 /* fall through */
1062 case TRX_UNDO_UPD_EXIST_REC:
1063 row_purge_upd_exist_or_extern(thr, node, undo_rec);
1064 MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
1065 break;
1066 }
1067
1068 if (node->found_clust) {
1069 btr_pcur_close(&node->pcur);
1070 node->found_clust = FALSE;
1071 }
1072
1073 if (node->table != NULL) {
1074 dict_table_close(node->table, FALSE, FALSE);
1075 node->table = NULL;
1076 }
1077
1078 return(purged);
1079}
1080
1081#ifdef UNIV_DEBUG
1082# define row_purge_record(node,undo_rec,thr,updated_extern) \
1083 row_purge_record_func(node,undo_rec,thr,updated_extern)
1084#else /* UNIV_DEBUG */
1085# define row_purge_record(node,undo_rec,thr,updated_extern) \
1086 row_purge_record_func(node,undo_rec,updated_extern)
1087#endif /* UNIV_DEBUG */
1088
1089/***********************************************************//**
1090Fetches an undo log record and does the purge for the recorded operation.
1091If none left, or the current purge completed, returns the control to the
1092parent node, which is always a query thread node. */
1093static MY_ATTRIBUTE((nonnull))
1094void
1095row_purge(
1096/*======*/
1097 purge_node_t* node, /*!< in: row purge node */
1098 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
1099 que_thr_t* thr) /*!< in: query thread */
1100{
1101 if (undo_rec != &trx_purge_dummy_rec) {
1102 bool updated_extern;
1103
1104 while (row_purge_parse_undo_rec(
1105 node, undo_rec, &updated_extern, thr)) {
1106
1107 bool purged = row_purge_record(
1108 node, undo_rec, thr, updated_extern);
1109
1110 rw_lock_s_unlock(dict_operation_lock);
1111
1112 if (purged
1113 || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
1114 return;
1115 }
1116
1117 /* Retry the purge in a second. */
1118 os_thread_sleep(1000000);
1119 }
1120 }
1121}
1122
1123/***********************************************************//**
1124Reset the purge query thread. */
1125UNIV_INLINE
1126void
1127row_purge_end(
1128/*==========*/
1129 que_thr_t* thr) /*!< in: query thread */
1130{
1131 purge_node_t* node;
1132
1133 ut_ad(thr);
1134
1135 node = static_cast<purge_node_t*>(thr->run_node);
1136
1137 ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1138
1139 thr->run_node = que_node_get_parent(node);
1140
1141 node->undo_recs = NULL;
1142
1143 node->done = TRUE;
1144
1145 ut_a(thr->run_node != NULL);
1146
1147 mem_heap_empty(node->heap);
1148}
1149
1150/***********************************************************//**
1151Does the purge operation for a single undo log record. This is a high-level
1152function used in an SQL execution graph.
1153@return query thread to run next or NULL */
1154que_thr_t*
1155row_purge_step(
1156/*===========*/
1157 que_thr_t* thr) /*!< in: query thread */
1158{
1159 purge_node_t* node;
1160
1161 ut_ad(thr);
1162
1163 node = static_cast<purge_node_t*>(thr->run_node);
1164
1165 node->table = NULL;
1166 node->row = NULL;
1167 node->ref = NULL;
1168 node->index = NULL;
1169 node->update = NULL;
1170 node->found_clust = FALSE;
1171 node->rec_type = ULINT_UNDEFINED;
1172 node->cmpl_info = ULINT_UNDEFINED;
1173
1174 ut_a(!node->done);
1175
1176 ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1177
1178 if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) {
1179 trx_purge_rec_t*purge_rec;
1180
1181 purge_rec = static_cast<trx_purge_rec_t*>(
1182 ib_vector_pop(node->undo_recs));
1183
1184 node->roll_ptr = purge_rec->roll_ptr;
1185
1186 row_purge(node, purge_rec->undo_rec, thr);
1187
1188 if (ib_vector_is_empty(node->undo_recs)) {
1189 row_purge_end(thr);
1190 } else {
1191 thr->run_node = node;
1192 }
1193 } else {
1194 row_purge_end(thr);
1195 }
1196
1197 innobase_reset_background_thd(thr_get_trx(thr)->mysql_thd);
1198
1199 return(thr);
1200}
1201
1202#ifdef UNIV_DEBUG
1203/***********************************************************//**
1204Validate the persisent cursor. The purge node has two references
1205to the clustered index record - one via the ref member, and the
1206other via the persistent cursor. These two references must match
1207each other if the found_clust flag is set.
1208@return true if the stored copy of persistent cursor is consistent
1209with the ref member.*/
1210bool
1211purge_node_t::validate_pcur()
1212{
1213 if (!found_clust) {
1214 return(true);
1215 }
1216
1217 if (index == NULL) {
1218 return(true);
1219 }
1220
1221 if (index->type == DICT_FTS) {
1222 return(true);
1223 }
1224
1225 if (!pcur.old_stored) {
1226 return(true);
1227 }
1228
1229 dict_index_t* clust_index = pcur.btr_cur.index;
1230
1231 ulint* offsets = rec_get_offsets(
1232 pcur.old_rec, clust_index, NULL, true,
1233 pcur.old_n_fields, &heap);
1234
1235 /* Here we are comparing the purge ref record and the stored initial
1236 part in persistent cursor. Both cases we store n_uniq fields of the
1237 cluster index and so it is fine to do the comparison. We note this
1238 dependency here as pcur and ref belong to different modules. */
1239 int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
1240
1241 if (st != 0) {
1242 ib::error() << "Purge node pcur validation failed";
1243 ib::error() << rec_printer(ref).str();
1244 ib::error() << rec_printer(pcur.old_rec, offsets).str();
1245 return(false);
1246 }
1247
1248 return(true);
1249}
1250#endif /* UNIV_DEBUG */
1251