row0uins.cc source code [MariaDB/storage/innobase/row/row0uins.cc]

1	/*****************************************************************************
2
3	Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2017, 2018, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file row/row0uins.cc
22	Fresh insert undo
23
24	Created 2/25/1997 Heikki Tuuri
25	*******************************************************/
26
27	#include "row0uins.h"
28	#include "dict0dict.h"
29	#include "dict0stats.h"
30	#include "dict0boot.h"
31	#include "dict0crea.h"
32	#include "trx0undo.h"
33	#include "trx0roll.h"
34	#include "btr0btr.h"
35	#include "mach0data.h"
36	#include "row0undo.h"
37	#include "row0vers.h"
38	#include "row0log.h"
39	#include "trx0trx.h"
40	#include "trx0rec.h"
41	#include "row0row.h"
42	#include "row0upd.h"
43	#include "que0que.h"
44	#include "ibuf0ibuf.h"
45	#include "log0log.h"
46	#include "fil0fil.h"
47
48	/*************************************************************************
49	IMPORTANT NOTE: Any operation that generates redo MUST check that there
50	is enough space in the redo log before for that operation. This is
51	done by calling log_free_check(). The reason for checking the
52	availability of the redo log space before the start of the operation is
53	that we MUST not hold any synchonization objects when performing the
54	check.
55	If you make a change in this module make sure that no codepath is
56	introduced where a call to log_free_check() is bypassed. /*
57
58	/*************************************************************//**
59	Removes a clustered index record. The pcur in node was positioned on the
60	record, now it is detached.
61	@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE /*
62	static MY_ATTRIBUTE((nonnull, warn_unused_result))
63	dberr_t
64	row_undo_ins_remove_clust_rec(
65	/==========================/
66	undo_node_t* node) /!< in: undo node /
67	{
68	btr_cur_t* btr_cur;
69	ibool success;
70	dberr_t err;
71	ulint n_tries = `0`;
72	mtr_t mtr;
73	dict_index_t* index = node->pcur.btr_cur.index;
74	bool online;
75
76	ut_ad(dict_index_is_clust(index));
77	ut_ad(node->trx->in_rollback);
78
79	mtr.start();
80	if (index->table->is_temporary()) {
81	ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
82	mtr.set_log_mode(MTR_LOG_NO_REDO);
83	} else {
84	index->set_modified(mtr);
85	}
86
87	/ This is similar to row_undo_mod_clust(). The DDL thread may*
88	already have copied this row from the log to the new table.
89	We must log the removal, so that the row will be correctly
90	purged. However, we can log the removal out of sync with the
91	B-tree modification. /*
92
93	online = dict_index_is_online_ddl(index);
94	if (online) {
95	ut_ad(node->trx->dict_operation_lock_mode
96	!= RW_X_LATCH);
97	ut_ad(node->table->id != DICT_INDEXES_ID);
98	mtr_s_lock(dict_index_get_lock(index), &mtr);
99	}
100
101	success = btr_pcur_restore_position(
102	online
103	? BTR_MODIFY_LEAF \| BTR_ALREADY_S_LATCHED
104	: BTR_MODIFY_LEAF, &node->pcur, &mtr);
105	ut_a(success);
106
107	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
108
109	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
110	== node->trx->id);
111	ut_ad(!rec_get_deleted_flag(
112	btr_cur_get_rec(btr_cur),
113	dict_table_is_comp(btr_cur->index->table)));
114
115	if (online && dict_index_is_online_ddl(index)) {
116	const rec_t* rec = btr_cur_get_rec(btr_cur);
117	mem_heap_t* heap = NULL;
118	const ulint* offsets = rec_get_offsets(
119	rec, index, NULL, true, ULINT_UNDEFINED, &heap);
120	row_log_table_delete(rec, index, offsets, NULL);
121	mem_heap_free(heap);
122	}
123
124	switch (node->table->id) {
125	case DICT_INDEXES_ID:
126	ut_ad(!online);
127	ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
128	ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
129
130	dict_drop_index_tree(
131	btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
132
133	mtr.commit();
134
135	mtr.start();
136
137	success = btr_pcur_restore_position(
138	BTR_MODIFY_LEAF, &node->pcur, &mtr);
139	ut_a(success);
140	break;
141	case DICT_COLUMNS_ID:
142	/ This is rolling back an INSERT into SYS_COLUMNS.*
143	If it was part of an instant ADD COLUMN operation, we
144	must modify the table definition. At this point, any
145	corresponding operation to the 'default row' will have
146	been rolled back. /*
147	ut_ad(!online);
148	ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
149	ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
150	const rec_t* rec = btr_pcur_get_rec(&node->pcur);
151	if (rec_get_n_fields_old(rec)
152	!= DICT_NUM_FIELDS__SYS_COLUMNS) {
153	break;
154	}
155	ulint len;
156	const byte* data = rec_get_nth_field_old(
157	rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len);
158	if (len != `8`) {
159	break;
160	}
161	const table_id_t table_id = mach_read_from_8(data);
162	data = rec_get_nth_field_old(rec, DICT_FLD__SYS_COLUMNS__POS,
163	&len);
164	if (len != `4`) {
165	break;
166	}
167	const unsigned pos = mach_read_from_4(data);
168	if (pos == `0` \|\| pos >= (`1U` << `16`)) {
169	break;
170	}
171	dict_table_t* table = dict_table_open_on_id(
172	table_id, true, DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
173	if (!table) {
174	break;
175	}
176
177	dict_index_t* index = dict_table_get_first_index(table);
178
179	if (index && index->is_instant()
180	&& DATA_N_SYS_COLS + `1` + pos == table->n_cols) {
181	/ This is the rollback of an instant ADD COLUMN.*
182	Remove the column from the dictionary cache,
183	but keep the system columns. /*
184	table->rollback_instant(pos);
185	}
186
187	dict_table_close(table, true, false);
188	}
189
190	if (btr_cur_optimistic_delete(btr_cur, `0`, &mtr)) {
191	err = DB_SUCCESS;
192	goto func_exit;
193	}
194
195	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
196	retry:
197	/ If did not succeed, try pessimistic descent to tree /
198	mtr.start();
199	if (index->table->is_temporary()) {
200	mtr.set_log_mode(MTR_LOG_NO_REDO);
201	} else {
202	index->set_modified(mtr);
203	}
204
205	success = btr_pcur_restore_position(
206	BTR_MODIFY_TREE \| BTR_LATCH_FOR_DELETE,
207	&node->pcur, &mtr);
208	ut_a(success);
209
210	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, `0`, true, &mtr);
211
212	/ The delete operation may fail if we have little*
213	file space left: TODO: easiest to crash the database
214	and restart with more file space /*
215
216	if (err == DB_OUT_OF_FILE_SPACE
217	&& n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
218
219	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
220
221	n_tries++;
222
223	os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
224
225	goto retry;
226	}
227
228	func_exit:
229	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
230	if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_DEFAULT) {
231	/ When rolling back the very first instant ADD COLUMN*
232	operation, reset the root page to the basic state. /*
233	ut_ad(!index->table->is_temporary());
234	mtr.start();
235	if (page_t* root = btr_root_get(index, &mtr)) {
236	byte* page_type = root + FIL_PAGE_TYPE;
237	ut_ad(mach_read_from_2(page_type)
238	== FIL_PAGE_TYPE_INSTANT
239	\|\| mach_read_from_2(page_type)
240	== FIL_PAGE_INDEX);
241	index->set_modified(mtr);
242	mlog_write_ulint(page_type, FIL_PAGE_INDEX,
243	MLOG_2BYTES, &mtr);
244	byte* instant = PAGE_INSTANT + PAGE_HEADER + root;
245	mlog_write_ulint(instant,
246	page_ptr_get_direction(instant + `1`),
247	MLOG_2BYTES, &mtr);
248	}
249	mtr.commit();
250	}
251
252	return(err);
253	}
254
255	/*************************************************************//**
256	Removes a secondary index entry if found.
257	@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE /*
258	static MY_ATTRIBUTE((nonnull, warn_unused_result))
259	dberr_t
260	row_undo_ins_remove_sec_low(
261	/========================/
262	ulint mode, /!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,*
263	depending on whether we wish optimistic or
264	pessimistic descent down the index tree /*
265	dict_index_t* index, /!< in: index /
266	dtuple_t* entry, /!< in: index entry to remove /
267	que_thr_t* thr) /!< in: query thread /
268	{
269	btr_pcur_t pcur;
270	btr_cur_t* btr_cur;
271	dberr_t err = DB_SUCCESS;
272	mtr_t mtr;
273	enum row_search_result search_result;
274	const bool modify_leaf = mode == BTR_MODIFY_LEAF;
275
276	memset(&pcur, `0`, sizeof(pcur));
277
278	row_mtr_start(&mtr, index, !modify_leaf);
279
280	if (modify_leaf) {
281	mode = BTR_MODIFY_LEAF \| BTR_ALREADY_S_LATCHED;
282	mtr_s_lock(dict_index_get_lock(index), &mtr);
283	} else {
284	ut_ad(mode == (BTR_MODIFY_TREE \| BTR_LATCH_FOR_DELETE));
285	mtr_sx_lock(dict_index_get_lock(index), &mtr);
286	}
287
288	if (row_log_online_op_try(index, entry, `0`)) {
289	goto func_exit_no_pcur;
290	}
291
292	if (dict_index_is_spatial(index)) {
293	if (modify_leaf) {
294	mode \|= BTR_RTREE_DELETE_MARK;
295	}
296	btr_pcur_get_btr_cur(&pcur)->thr = thr;
297	mode \|= BTR_RTREE_UNDO_INS;
298	}
299
300	search_result = row_search_index_entry(index, entry, mode,
301	&pcur, &mtr);
302
303	switch (search_result) {
304	case ROW_NOT_FOUND:
305	goto func_exit;
306	case ROW_FOUND:
307	if (dict_index_is_spatial(index)
308	&& rec_get_deleted_flag(
309	btr_pcur_get_rec(&pcur),
310	dict_table_is_comp(index->table))) {
311	ib::error () << "Record found in index " << index->name
312	<< " is deleted marked on insert rollback.";
313	ut_ad(`0`);
314	}
315	break;
316
317	case ROW_BUFFERED:
318	case ROW_NOT_DELETED_REF:
319	/ These are invalid outcomes, because the mode passed*
320	to row_search_index_entry() did not include any of the
321	flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. /*
322	ut_error;
323	}
324
325	btr_cur = btr_pcur_get_btr_cur(&pcur);
326
327	if (modify_leaf) {
328	err = btr_cur_optimistic_delete(btr_cur, `0`, &mtr)
329	? DB_SUCCESS : DB_FAIL;
330	} else {
331	/ Passing rollback=false here, because we are*
332	deleting a secondary index record: the distinction
333	only matters when deleting a record that contains
334	externally stored columns. /*
335	ut_ad(!dict_index_is_clust(index));
336	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, `0`,
337	false, &mtr);
338	}
339	func_exit:
340	btr_pcur_close(&pcur);
341	func_exit_no_pcur:
342	mtr_commit(&mtr);
343
344	return(err);
345	}
346
347	/*************************************************************//**
348	Removes a secondary index entry from the index if found. Tries first
349	optimistic, then pessimistic descent down the tree.
350	@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE /*
351	static MY_ATTRIBUTE((nonnull, warn_unused_result))
352	dberr_t
353	row_undo_ins_remove_sec(
354	/====================/
355	dict_index_t* index, /!< in: index /
356	dtuple_t* entry, /!< in: index entry to insert /
357	que_thr_t* thr) /!< in: query thread /
358	{
359	dberr_t err;
360	ulint n_tries = `0`;
361
362	/ Try first optimistic descent to the B-tree /
363
364	err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
365
366	if (err == DB_SUCCESS) {
367
368	return(err);
369	}
370
371	/ Try then pessimistic descent to the B-tree /
372	retry:
373	err = row_undo_ins_remove_sec_low(
374	BTR_MODIFY_TREE \| BTR_LATCH_FOR_DELETE,
375	index, entry, thr);
376
377	/ The delete operation may fail if we have little*
378	file space left: TODO: easiest to crash the database
379	and restart with more file space /*
380
381	if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
382
383	n_tries++;
384
385	os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
386
387	goto retry;
388	}
389
390	return(err);
391	}
392
393	/*********************************************************//**
394	Parses the row reference and other info in a fresh insert undo record. /*
395	static
396	void
397	row_undo_ins_parse_undo_rec(
398	/========================/
399	undo_node_t* node, /!< in/out: row undo node /
400	ibool dict_locked) /!< in: TRUE if own dict_sys->mutex /
401	{
402	dict_index_t* clust_index;
403	byte* ptr;
404	undo_no_t undo_no;
405	table_id_t table_id;
406	ulint dummy;
407	bool dummy_extern;
408
409	ut_ad(node);
410
411	ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy,
412	&dummy_extern, &undo_no, &table_id);
413
414	node->update = NULL;
415	node->table = dict_table_open_on_id(
416	table_id, dict_locked, DICT_TABLE_OP_NORMAL);
417
418	/ Skip the UNDO if we can't find the table or the .ibd file. /
419	if (UNIV_UNLIKELY(node->table == NULL)) {
420	return;
421	}
422
423	switch (node->rec_type) {
424	default:
425	ut_ad(!"wrong undo record type");
426	goto close_table;
427	case TRX_UNDO_INSERT_DEFAULT:
428	case TRX_UNDO_INSERT_REC:
429	break;
430	case TRX_UNDO_RENAME_TABLE:
431	dict_table_t* table = node->table;
432	ut_ad(!table->is_temporary());
433	ut_ad(dict_table_is_file_per_table(table)
434	== !is_system_tablespace(table->space->id));
435	size_t len = mach_read_from_2(node->undo_rec)
436	+ size_t(node->undo_rec - ptr) - `2`;
437	ptr[len] = `0`;
438	const char* name = reinterpret_cast<char*>(ptr);
439	if (strcmp(table->name.m_name, name)) {
440	dict_table_rename_in_cache(table, name, false);
441	}
442	goto close_table;
443	}
444
445	if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
446	close_table:
447	/ Normally, tables should not disappear or become*
448	unaccessible during ROLLBACK, because they should be
449	protected by InnoDB table locks. TRUNCATE TABLE
450	or table corruption could be valid exceptions.
451
452	FIXME: When running out of temporary tablespace, it
453	would probably be better to just drop all temporary
454	tables (and temporary undo log records) of the current
455	connection, instead of doing this rollback. /*
456	dict_table_close(node->table, dict_locked, FALSE);
457	node->table = NULL;
458	} else {
459	ut_ad(!node->table->skip_alter_undo);
460	clust_index = dict_table_get_first_index(node->table);
461
462	if (clust_index != NULL) {
463	if (node->rec_type == TRX_UNDO_INSERT_REC) {
464	ptr = trx_undo_rec_get_row_ref(
465	ptr, clust_index, &node->ref,
466	node->heap);
467	} else {
468	node->ref = &trx_undo_default_rec;
469	}
470
471	if (!row_undo_search_clust_to_pcur(node)) {
472	/ An error probably occurred during*
473	an insert into the clustered index,
474	after we wrote the undo log record. /*
475	goto close_table;
476	}
477	if (node->table->n_v_cols) {
478	trx_undo_read_v_cols(node->table, ptr,
479	node->row, false);
480	}
481
482	} else {
483	ib::warn () << "Table " << node->table->name
484	<< " has no indexes,"
485	" ignoring the table";
486	goto close_table;
487	}
488	}
489	}
490
491	/*************************************************************//**
492	Removes secondary index records.
493	@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE /*
494	static MY_ATTRIBUTE((nonnull, warn_unused_result))
495	dberr_t
496	row_undo_ins_remove_sec_rec(
497	/========================/
498	undo_node_t* node, /!< in/out: row undo node /
499	que_thr_t* thr) /!< in: query thread /
500	{
501	dberr_t err = DB_SUCCESS;
502	dict_index_t* index = node->index;
503	mem_heap_t* heap;
504
505	heap = mem_heap_create(`1024`);
506
507	while (index != NULL) {
508	dtuple_t* entry;
509
510	if (index->type & DICT_FTS) {
511	dict_table_next_uncorrupted_index(index);
512	continue;
513	}
514
515	/ An insert undo record TRX_UNDO_INSERT_REC will*
516	always contain all fields of the index. It does not
517	matter if any indexes were created afterwards; all
518	index entries can be reconstructed from the row. /*
519	entry = row_build_index_entry(
520	node->row, node->ext, index, heap);
521	if (UNIV_UNLIKELY(!entry)) {
522	/ The database must have crashed after*
523	inserting a clustered index record but before
524	writing all the externally stored columns of
525	that record, or a statement is being rolled
526	back because an error occurred while storing
527	off-page columns.
528
529	Because secondary index entries are inserted
530	after the clustered index record, we may
531	assume that the secondary index record does
532	not exist. /*
533	} else {
534	err = row_undo_ins_remove_sec(index, entry, thr);
535
536	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
537	goto func_exit;
538	}
539	}
540
541	mem_heap_empty(heap);
542	dict_table_next_uncorrupted_index(index);
543	}
544
545	func_exit:
546	node->index = index;
547	mem_heap_free(heap);
548	return(err);
549	}
550
551	/*********************************************************//**
552	Undoes a fresh insert of a row to a table. A fresh insert means that
553	the same clustered index unique key did not have any record, even delete
554	marked, at the time of the insert. InnoDB is eager in a rollback:
555	if it figures out that an index record will be removed in the purge
556	anyway, it will remove it in the rollback.
557	@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE /*
558	dberr_t
559	row_undo_ins(
560	/=========/
561	undo_node_t* node, /!< in: row undo node /
562	que_thr_t* thr) /!< in: query thread /
563	{
564	dberr_t err;
565	ibool dict_locked;
566
567	ut_ad(node->state == UNDO_NODE_INSERT);
568	ut_ad(node->trx->in_rollback);
569	ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
570
571	dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
572
573	row_undo_ins_parse_undo_rec(node, dict_locked);
574
575	if (node->table == NULL) {
576	return(DB_SUCCESS);
577	}
578
579	/ Iterate over all the indexes and undo the insert./
580
581	node->index = dict_table_get_first_index(node->table);
582	ut_ad(dict_index_is_clust(node->index));
583
584	switch (node->rec_type) {
585	default:
586	ut_ad(!"wrong undo record type");
587	case TRX_UNDO_INSERT_REC:
588	/ Skip the clustered index (the first index) /
589	node->index = dict_table_get_next_index(node->index);
590
591	dict_table_skip_corrupt_index(node->index);
592
593	err = row_undo_ins_remove_sec_rec(node, thr);
594
595	if (err != DB_SUCCESS) {
596	break;
597	}
598
599	/ fall through /
600	case TRX_UNDO_INSERT_DEFAULT:
601	log_free_check();
602
603	if (node->table->id == DICT_INDEXES_ID) {
604	ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
605
606	if (!dict_locked) {
607	mutex_enter(&dict_sys->mutex);
608	}
609	}
610
611	// FIXME: We need to update the dict_index_t::space and
612	// page number fields too.
613	err = row_undo_ins_remove_clust_rec(node);
614
615	if (node->table->id == DICT_INDEXES_ID
616	&& !dict_locked) {
617
618	mutex_exit(&dict_sys->mutex);
619	}
620
621	if (err == DB_SUCCESS && node->table->stat_initialized) {
622	/ Not protected by dict_table_stats_lock() for*
623	performance reasons, we would rather get garbage
624	in stat_n_rows (which is just an estimate anyway)
625	than protecting the following code with a latch. /*
626	dict_table_n_rows_dec(node->table);
627
628	/ Do not attempt to update statistics when*
629	executing ROLLBACK in the InnoDB SQL
630	interpreter, because in that case we would
631	already be holding dict_sys->mutex, which
632	would be acquired when updating statistics. /*
633	if (!dict_locked) {
634	dict_stats_update_if_needed(node->table);
635	}
636	}
637	}
638
639	dict_table_close(node->table, dict_locked, FALSE);
640
641	node->table = NULL;
642
643	return(err);
644	}
645

Browse the source code of MariaDB/storage/innobase/row/row0uins.cc