1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2016, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file trx/trx0roll.cc |
22 | Transaction rollback |
23 | |
24 | Created 3/26/1996 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #include "my_config.h" |
28 | #include <my_service_manager.h> |
29 | |
30 | #include "ha_prototypes.h" |
31 | #include "trx0roll.h" |
32 | |
33 | #include <mysql/service_wsrep.h> |
34 | |
35 | #include "fsp0fsp.h" |
36 | #include "lock0lock.h" |
37 | #include "mach0data.h" |
38 | #include "pars0pars.h" |
39 | #include "que0que.h" |
40 | #include "row0mysql.h" |
41 | #include "row0undo.h" |
42 | #include "srv0mon.h" |
43 | #include "srv0start.h" |
44 | #include "trx0rec.h" |
45 | #include "trx0rseg.h" |
46 | #include "trx0sys.h" |
47 | #include "trx0trx.h" |
48 | #include "trx0undo.h" |
49 | #include "ha_prototypes.h" |
50 | |
51 | /** This many pages must be undone before a truncate is tried within |
52 | rollback */ |
53 | static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1; |
54 | |
55 | /** true if trx_rollback_all_recovered() thread is active */ |
56 | bool trx_rollback_is_active; |
57 | |
58 | /** In crash recovery, the current trx to be rolled back; NULL otherwise */ |
59 | const trx_t* trx_roll_crash_recv_trx; |
60 | |
61 | /** Finish transaction rollback. |
62 | @param[in,out] trx transaction |
63 | @return whether the rollback was completed normally |
64 | @retval false if the rollback was aborted by shutdown */ |
65 | static bool trx_rollback_finish(trx_t* trx) |
66 | { |
67 | trx->mod_tables.clear(); |
68 | bool finished = trx->error_state == DB_SUCCESS; |
69 | if (UNIV_LIKELY(finished)) { |
70 | trx_commit(trx); |
71 | } else { |
72 | ut_a(trx->error_state == DB_INTERRUPTED); |
73 | ut_ad(!srv_is_being_started); |
74 | ut_a(!srv_undo_sources); |
75 | ut_ad(srv_fast_shutdown); |
76 | ut_d(trx->in_rollback = false); |
77 | if (trx_undo_t*& undo = trx->rsegs.m_redo.old_insert) { |
78 | UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->old_insert_list, |
79 | undo); |
80 | ut_free(undo); |
81 | undo = NULL; |
82 | } |
83 | if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) { |
84 | UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list, |
85 | undo); |
86 | ut_free(undo); |
87 | undo = NULL; |
88 | } |
89 | if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) { |
90 | UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list, |
91 | undo); |
92 | ut_free(undo); |
93 | undo = NULL; |
94 | } |
95 | trx_commit_low(trx, NULL); |
96 | } |
97 | |
98 | trx->lock.que_state = TRX_QUE_RUNNING; |
99 | |
100 | return finished; |
101 | } |
102 | |
103 | /*******************************************************************//** |
104 | Rollback a transaction used in MySQL. */ |
105 | static |
106 | void |
107 | trx_rollback_to_savepoint_low( |
108 | /*==========================*/ |
109 | trx_t* trx, /*!< in: transaction handle */ |
110 | trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if |
111 | partial rollback requested, or NULL for |
112 | complete rollback */ |
113 | { |
114 | que_thr_t* thr; |
115 | mem_heap_t* heap; |
116 | roll_node_t* roll_node; |
117 | |
118 | heap = mem_heap_create(512); |
119 | |
120 | roll_node = roll_node_create(heap); |
121 | |
122 | if (savept != NULL) { |
123 | roll_node->partial = TRUE; |
124 | roll_node->savept = *savept; |
125 | check_trx_state(trx); |
126 | } else { |
127 | assert_trx_nonlocking_or_in_list(trx); |
128 | } |
129 | |
130 | trx->error_state = DB_SUCCESS; |
131 | |
132 | if (trx->has_logged_or_recovered()) { |
133 | |
134 | ut_ad(trx->rsegs.m_redo.rseg != 0 |
135 | || trx->rsegs.m_noredo.rseg != 0); |
136 | |
137 | thr = pars_complete_graph_for_exec(roll_node, trx, heap, NULL); |
138 | |
139 | ut_a(thr == que_fork_start_command( |
140 | static_cast<que_fork_t*>(que_node_get_parent(thr)))); |
141 | |
142 | que_run_threads(thr); |
143 | |
144 | ut_a(roll_node->undo_thr != NULL); |
145 | que_run_threads(roll_node->undo_thr); |
146 | |
147 | /* Free the memory reserved by the undo graph. */ |
148 | que_graph_free(static_cast<que_t*>( |
149 | roll_node->undo_thr->common.parent)); |
150 | } |
151 | |
152 | if (savept == NULL) { |
153 | trx_rollback_finish(trx); |
154 | MONITOR_INC(MONITOR_TRX_ROLLBACK); |
155 | } else { |
156 | ut_a(trx->error_state == DB_SUCCESS); |
157 | const undo_no_t limit = savept->least_undo_no; |
158 | for (trx_mod_tables_t::iterator i = trx->mod_tables.begin(); |
159 | i != trx->mod_tables.end(); ) { |
160 | trx_mod_tables_t::iterator j = i++; |
161 | ut_ad(j->second.valid()); |
162 | if (j->second.rollback(limit)) { |
163 | trx->mod_tables.erase(j); |
164 | } |
165 | } |
166 | trx->lock.que_state = TRX_QUE_RUNNING; |
167 | MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT); |
168 | } |
169 | |
170 | mem_heap_free(heap); |
171 | |
172 | /* There might be work for utility threads.*/ |
173 | srv_active_wake_master_thread(); |
174 | |
175 | MONITOR_DEC(MONITOR_TRX_ACTIVE); |
176 | } |
177 | |
178 | /*******************************************************************//** |
179 | Rollback a transaction to a given savepoint or do a complete rollback. |
180 | @return error code or DB_SUCCESS */ |
181 | dberr_t |
182 | trx_rollback_to_savepoint( |
183 | /*======================*/ |
184 | trx_t* trx, /*!< in: transaction handle */ |
185 | trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if |
186 | partial rollback requested, or NULL for |
187 | complete rollback */ |
188 | { |
189 | ut_ad(!trx_mutex_own(trx)); |
190 | |
191 | trx_start_if_not_started_xa(trx, true); |
192 | |
193 | trx_rollback_to_savepoint_low(trx, savept); |
194 | |
195 | return(trx->error_state); |
196 | } |
197 | |
198 | /*******************************************************************//** |
199 | Rollback a transaction used in MySQL. |
200 | @return error code or DB_SUCCESS */ |
201 | static |
202 | dberr_t |
203 | trx_rollback_for_mysql_low( |
204 | /*=======================*/ |
205 | trx_t* trx) /*!< in/out: transaction */ |
206 | { |
207 | trx->op_info = "rollback" ; |
208 | |
209 | /* If we are doing the XA recovery of prepared transactions, |
210 | then the transaction object does not have an InnoDB session |
211 | object, and we set a dummy session that we use for all MySQL |
212 | transactions. */ |
213 | |
214 | trx_rollback_to_savepoint_low(trx, NULL); |
215 | |
216 | trx->op_info = "" ; |
217 | |
218 | return(trx->error_state); |
219 | } |
220 | |
221 | /** Rollback a transaction used in MySQL |
222 | @param[in, out] trx transaction |
223 | @return error code or DB_SUCCESS */ |
224 | dberr_t trx_rollback_for_mysql(trx_t* trx) |
225 | { |
226 | /* We are reading trx->state without holding trx_sys.mutex |
227 | here, because the rollback should be invoked for a running |
228 | active MySQL transaction (or recovered prepared transaction) |
229 | that is associated with the current thread. */ |
230 | |
231 | switch (trx->state) { |
232 | case TRX_STATE_NOT_STARTED: |
233 | trx->will_lock = 0; |
234 | ut_ad(trx->mysql_thd); |
235 | return(DB_SUCCESS); |
236 | |
237 | case TRX_STATE_ACTIVE: |
238 | ut_ad(trx->mysql_thd); |
239 | assert_trx_nonlocking_or_in_list(trx); |
240 | return(trx_rollback_for_mysql_low(trx)); |
241 | |
242 | case TRX_STATE_PREPARED: |
243 | ut_ad(!trx_is_autocommit_non_locking(trx)); |
244 | if (trx->rsegs.m_redo.undo || trx->rsegs.m_redo.old_insert) { |
245 | /* Change the undo log state back from |
246 | TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE |
247 | so that if the system gets killed, |
248 | recovery will perform the rollback. */ |
249 | ut_ad(!trx->rsegs.m_redo.undo |
250 | || trx->rsegs.m_redo.undo->rseg |
251 | == trx->rsegs.m_redo.rseg); |
252 | ut_ad(!trx->rsegs.m_redo.old_insert |
253 | || trx->rsegs.m_redo.old_insert->rseg |
254 | == trx->rsegs.m_redo.rseg); |
255 | mtr_t mtr; |
256 | mtr.start(); |
257 | mutex_enter(&trx->rsegs.m_redo.rseg->mutex); |
258 | if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { |
259 | trx_undo_set_state_at_prepare(trx, undo, true, |
260 | &mtr); |
261 | } |
262 | if (trx_undo_t* undo = trx->rsegs.m_redo.old_insert) { |
263 | trx_undo_set_state_at_prepare(trx, undo, true, |
264 | &mtr); |
265 | } |
266 | mutex_exit(&trx->rsegs.m_redo.rseg->mutex); |
267 | /* Persist the XA ROLLBACK, so that crash |
268 | recovery will replay the rollback in case |
269 | the redo log gets applied past this point. */ |
270 | mtr.commit(); |
271 | ut_ad(mtr.commit_lsn() > 0); |
272 | } |
273 | #ifdef ENABLED_DEBUG_SYNC |
274 | if (trx->mysql_thd == NULL) { |
275 | /* We could be executing XA ROLLBACK after |
276 | XA PREPARE and a server restart. */ |
277 | } else if (!trx->has_logged_persistent()) { |
278 | /* innobase_close_connection() may roll back a |
279 | transaction that did not generate any |
280 | persistent undo log. The DEBUG_SYNC |
281 | would cause an assertion failure for a |
282 | disconnected thread. |
283 | |
284 | NOTE: InnoDB will not know about the XID |
285 | if no persistent undo log was generated. */ |
286 | } else { |
287 | DEBUG_SYNC_C("trx_xa_rollback" ); |
288 | } |
289 | #endif /* ENABLED_DEBUG_SYNC */ |
290 | return(trx_rollback_for_mysql_low(trx)); |
291 | |
292 | case TRX_STATE_COMMITTED_IN_MEMORY: |
293 | check_trx_state(trx); |
294 | break; |
295 | } |
296 | |
297 | ut_error; |
298 | return(DB_CORRUPTION); |
299 | } |
300 | |
301 | /*******************************************************************//** |
302 | Rollback the latest SQL statement for MySQL. |
303 | @return error code or DB_SUCCESS */ |
304 | dberr_t |
305 | trx_rollback_last_sql_stat_for_mysql( |
306 | /*=================================*/ |
307 | trx_t* trx) /*!< in/out: transaction */ |
308 | { |
309 | dberr_t err; |
310 | |
311 | /* We are reading trx->state without holding trx_sys.mutex |
312 | here, because the statement rollback should be invoked for a |
313 | running active MySQL transaction that is associated with the |
314 | current thread. */ |
315 | ut_ad(trx->mysql_thd); |
316 | |
317 | switch (trx->state) { |
318 | case TRX_STATE_NOT_STARTED: |
319 | return(DB_SUCCESS); |
320 | |
321 | case TRX_STATE_ACTIVE: |
322 | assert_trx_nonlocking_or_in_list(trx); |
323 | |
324 | trx->op_info = "rollback of SQL statement" ; |
325 | |
326 | err = trx_rollback_to_savepoint( |
327 | trx, &trx->last_sql_stat_start); |
328 | |
329 | if (trx->fts_trx != NULL) { |
330 | fts_savepoint_rollback_last_stmt(trx); |
331 | } |
332 | |
333 | /* The following call should not be needed, |
334 | but we play it safe: */ |
335 | trx_mark_sql_stat_end(trx); |
336 | |
337 | trx->op_info = "" ; |
338 | |
339 | return(err); |
340 | |
341 | case TRX_STATE_PREPARED: |
342 | case TRX_STATE_COMMITTED_IN_MEMORY: |
343 | /* The statement rollback is only allowed on an ACTIVE |
344 | transaction, not a PREPARED or COMMITTED one. */ |
345 | break; |
346 | } |
347 | |
348 | ut_error; |
349 | return(DB_CORRUPTION); |
350 | } |
351 | |
352 | /*******************************************************************//** |
353 | Search for a savepoint using name. |
354 | @return savepoint if found else NULL */ |
355 | static |
356 | trx_named_savept_t* |
357 | trx_savepoint_find( |
358 | /*===============*/ |
359 | trx_t* trx, /*!< in: transaction */ |
360 | const char* name) /*!< in: savepoint name */ |
361 | { |
362 | trx_named_savept_t* savep; |
363 | |
364 | for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); |
365 | savep != NULL; |
366 | savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) { |
367 | |
368 | if (0 == ut_strcmp(savep->name, name)) { |
369 | return(savep); |
370 | } |
371 | } |
372 | |
373 | return(NULL); |
374 | } |
375 | |
376 | /*******************************************************************//** |
377 | Frees a single savepoint struct. */ |
378 | static |
379 | void |
380 | trx_roll_savepoint_free( |
381 | /*=====================*/ |
382 | trx_t* trx, /*!< in: transaction handle */ |
383 | trx_named_savept_t* savep) /*!< in: savepoint to free */ |
384 | { |
385 | UT_LIST_REMOVE(trx->trx_savepoints, savep); |
386 | |
387 | ut_free(savep->name); |
388 | ut_free(savep); |
389 | } |
390 | |
391 | /*******************************************************************//** |
392 | Frees savepoint structs starting from savep. */ |
393 | void |
394 | trx_roll_savepoints_free( |
395 | /*=====================*/ |
396 | trx_t* trx, /*!< in: transaction handle */ |
397 | trx_named_savept_t* savep) /*!< in: free all savepoints starting |
398 | with this savepoint i*/ |
399 | { |
400 | while (savep != NULL) { |
401 | trx_named_savept_t* next_savep; |
402 | |
403 | next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); |
404 | |
405 | trx_roll_savepoint_free(trx, savep); |
406 | |
407 | savep = next_savep; |
408 | } |
409 | } |
410 | |
411 | /*******************************************************************//** |
412 | Rolls back a transaction back to a named savepoint. Modifications after the |
413 | savepoint are undone but InnoDB does NOT release the corresponding locks |
414 | which are stored in memory. If a lock is 'implicit', that is, a new inserted |
415 | row holds a lock where the lock information is carried by the trx id stored in |
416 | the row, these locks are naturally released in the rollback. Savepoints which |
417 | were set after this savepoint are deleted. |
418 | @return if no savepoint of the name found then DB_NO_SAVEPOINT, |
419 | otherwise DB_SUCCESS */ |
420 | static MY_ATTRIBUTE((nonnull, warn_unused_result)) |
421 | dberr_t |
422 | trx_rollback_to_savepoint_for_mysql_low( |
423 | /*====================================*/ |
424 | trx_t* trx, /*!< in/out: transaction */ |
425 | trx_named_savept_t* savep, /*!< in/out: savepoint */ |
426 | int64_t* mysql_binlog_cache_pos) |
427 | /*!< out: the MySQL binlog |
428 | cache position corresponding |
429 | to this savepoint; MySQL needs |
430 | this information to remove the |
431 | binlog entries of the queries |
432 | executed after the savepoint */ |
433 | { |
434 | dberr_t err; |
435 | |
436 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); |
437 | ut_ad(trx->mysql_thd); |
438 | |
439 | /* Free all savepoints strictly later than savep. */ |
440 | |
441 | trx_roll_savepoints_free( |
442 | trx, UT_LIST_GET_NEXT(trx_savepoints, savep)); |
443 | |
444 | *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; |
445 | |
446 | trx->op_info = "rollback to a savepoint" ; |
447 | |
448 | err = trx_rollback_to_savepoint(trx, &savep->savept); |
449 | |
450 | /* Store the current undo_no of the transaction so that |
451 | we know where to roll back if we have to roll back the |
452 | next SQL statement: */ |
453 | |
454 | trx_mark_sql_stat_end(trx); |
455 | |
456 | trx->op_info = "" ; |
457 | |
458 | #ifdef WITH_WSREP |
459 | if (wsrep_on(trx->mysql_thd) && |
460 | trx->lock.was_chosen_as_deadlock_victim) { |
461 | trx->lock.was_chosen_as_deadlock_victim = FALSE; |
462 | } |
463 | #endif |
464 | return(err); |
465 | } |
466 | |
467 | /*******************************************************************//** |
468 | Rolls back a transaction back to a named savepoint. Modifications after the |
469 | savepoint are undone but InnoDB does NOT release the corresponding locks |
470 | which are stored in memory. If a lock is 'implicit', that is, a new inserted |
471 | row holds a lock where the lock information is carried by the trx id stored in |
472 | the row, these locks are naturally released in the rollback. Savepoints which |
473 | were set after this savepoint are deleted. |
474 | @return if no savepoint of the name found then DB_NO_SAVEPOINT, |
475 | otherwise DB_SUCCESS */ |
476 | dberr_t |
477 | trx_rollback_to_savepoint_for_mysql( |
478 | /*================================*/ |
479 | trx_t* trx, /*!< in: transaction handle */ |
480 | const char* savepoint_name, /*!< in: savepoint name */ |
481 | int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache |
482 | position corresponding to this |
483 | savepoint; MySQL needs this |
484 | information to remove the |
485 | binlog entries of the queries |
486 | executed after the savepoint */ |
487 | { |
488 | trx_named_savept_t* savep; |
489 | |
490 | /* We are reading trx->state without holding trx_sys.mutex |
491 | here, because the savepoint rollback should be invoked for a |
492 | running active MySQL transaction that is associated with the |
493 | current thread. */ |
494 | ut_ad(trx->mysql_thd); |
495 | |
496 | savep = trx_savepoint_find(trx, savepoint_name); |
497 | |
498 | if (savep == NULL) { |
499 | return(DB_NO_SAVEPOINT); |
500 | } |
501 | |
502 | switch (trx->state) { |
503 | case TRX_STATE_NOT_STARTED: |
504 | ib::error() << "Transaction has a savepoint " |
505 | << savep->name |
506 | << " though it is not started" ; |
507 | return(DB_ERROR); |
508 | |
509 | case TRX_STATE_ACTIVE: |
510 | |
511 | return(trx_rollback_to_savepoint_for_mysql_low( |
512 | trx, savep, mysql_binlog_cache_pos)); |
513 | |
514 | case TRX_STATE_PREPARED: |
515 | case TRX_STATE_COMMITTED_IN_MEMORY: |
516 | /* The savepoint rollback is only allowed on an ACTIVE |
517 | transaction, not a PREPARED or COMMITTED one. */ |
518 | break; |
519 | } |
520 | |
521 | ut_error; |
522 | return(DB_CORRUPTION); |
523 | } |
524 | |
525 | /*******************************************************************//** |
526 | Creates a named savepoint. If the transaction is not yet started, starts it. |
527 | If there is already a savepoint of the same name, this call erases that old |
528 | savepoint and replaces it with a new. Savepoints are deleted in a transaction |
529 | commit or rollback. |
530 | @return always DB_SUCCESS */ |
531 | dberr_t |
532 | trx_savepoint_for_mysql( |
533 | /*====================*/ |
534 | trx_t* trx, /*!< in: transaction handle */ |
535 | const char* savepoint_name, /*!< in: savepoint name */ |
536 | int64_t binlog_cache_pos) /*!< in: MySQL binlog cache |
537 | position corresponding to this |
538 | connection at the time of the |
539 | savepoint */ |
540 | { |
541 | trx_named_savept_t* savep; |
542 | |
543 | trx_start_if_not_started_xa(trx, false); |
544 | |
545 | savep = trx_savepoint_find(trx, savepoint_name); |
546 | |
547 | if (savep) { |
548 | /* There is a savepoint with the same name: free that */ |
549 | |
550 | UT_LIST_REMOVE(trx->trx_savepoints, savep); |
551 | |
552 | ut_free(savep->name); |
553 | ut_free(savep); |
554 | } |
555 | |
556 | /* Create a new savepoint and add it as the last in the list */ |
557 | |
558 | savep = static_cast<trx_named_savept_t*>( |
559 | ut_malloc_nokey(sizeof(*savep))); |
560 | |
561 | savep->name = mem_strdup(savepoint_name); |
562 | |
563 | savep->savept = trx_savept_take(trx); |
564 | |
565 | savep->mysql_binlog_cache_pos = binlog_cache_pos; |
566 | |
567 | UT_LIST_ADD_LAST(trx->trx_savepoints, savep); |
568 | |
569 | return(DB_SUCCESS); |
570 | } |
571 | |
572 | /*******************************************************************//** |
573 | Releases only the named savepoint. Savepoints which were set after this |
574 | savepoint are left as is. |
575 | @return if no savepoint of the name found then DB_NO_SAVEPOINT, |
576 | otherwise DB_SUCCESS */ |
577 | dberr_t |
578 | trx_release_savepoint_for_mysql( |
579 | /*============================*/ |
580 | trx_t* trx, /*!< in: transaction handle */ |
581 | const char* savepoint_name) /*!< in: savepoint name */ |
582 | { |
583 | trx_named_savept_t* savep; |
584 | |
585 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true) |
586 | || trx_state_eq(trx, TRX_STATE_PREPARED, true)); |
587 | ut_ad(trx->mysql_thd); |
588 | |
589 | savep = trx_savepoint_find(trx, savepoint_name); |
590 | |
591 | if (savep != NULL) { |
592 | trx_roll_savepoint_free(trx, savep); |
593 | } |
594 | |
595 | return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT); |
596 | } |
597 | |
598 | /*******************************************************************//** |
599 | Determines if this transaction is rolling back an incomplete transaction |
600 | in crash recovery. |
601 | @return TRUE if trx is an incomplete transaction that is being rolled |
602 | back in crash recovery */ |
603 | ibool |
604 | trx_is_recv( |
605 | /*========*/ |
606 | const trx_t* trx) /*!< in: transaction */ |
607 | { |
608 | return(trx == trx_roll_crash_recv_trx); |
609 | } |
610 | |
611 | /*******************************************************************//** |
612 | Returns a transaction savepoint taken at this point in time. |
613 | @return savepoint */ |
614 | trx_savept_t |
615 | trx_savept_take( |
616 | /*============*/ |
617 | trx_t* trx) /*!< in: transaction */ |
618 | { |
619 | trx_savept_t savept; |
620 | |
621 | savept.least_undo_no = trx->undo_no; |
622 | |
623 | return(savept); |
624 | } |
625 | |
626 | /*******************************************************************//** |
627 | Roll back an active transaction. */ |
628 | static |
629 | void |
630 | trx_rollback_active( |
631 | /*================*/ |
632 | trx_t* trx) /*!< in/out: transaction */ |
633 | { |
634 | mem_heap_t* heap; |
635 | que_fork_t* fork; |
636 | que_thr_t* thr; |
637 | roll_node_t* roll_node; |
638 | const trx_id_t trx_id = trx->id; |
639 | |
640 | ut_ad(trx_id); |
641 | |
642 | heap = mem_heap_create(512); |
643 | |
644 | fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); |
645 | fork->trx = trx; |
646 | |
647 | thr = que_thr_create(fork, heap, NULL); |
648 | |
649 | roll_node = roll_node_create(heap); |
650 | |
651 | thr->child = roll_node; |
652 | roll_node->common.parent = thr; |
653 | |
654 | trx->graph = fork; |
655 | |
656 | ut_a(thr == que_fork_start_command(fork)); |
657 | |
658 | trx_roll_crash_recv_trx = trx; |
659 | |
660 | const bool dictionary_locked = trx_get_dict_operation(trx) |
661 | != TRX_DICT_OP_NONE; |
662 | |
663 | if (dictionary_locked) { |
664 | row_mysql_lock_data_dictionary(trx); |
665 | } |
666 | |
667 | que_run_threads(thr); |
668 | ut_a(roll_node->undo_thr != NULL); |
669 | |
670 | que_run_threads(roll_node->undo_thr); |
671 | |
672 | que_graph_free( |
673 | static_cast<que_t*>(roll_node->undo_thr->common.parent)); |
674 | |
675 | if (UNIV_UNLIKELY(!trx_rollback_finish(trx))) { |
676 | ut_ad(!dictionary_locked); |
677 | goto func_exit; |
678 | } |
679 | |
680 | ut_a(trx->lock.que_state == TRX_QUE_RUNNING); |
681 | |
682 | if (!dictionary_locked || !trx->table_id) { |
683 | } else if (dict_table_t* table = dict_table_open_on_id( |
684 | trx->table_id, TRUE, DICT_TABLE_OP_NORMAL)) { |
685 | ib::info() << "Dropping table " << table->name |
686 | << ", with id " << trx->table_id |
687 | << " in recovery" ; |
688 | |
689 | dict_table_close_and_drop(trx, table); |
690 | |
691 | trx_commit_for_mysql(trx); |
692 | } |
693 | |
694 | ib::info() << "Rolled back recovered transaction " << trx_id; |
695 | |
696 | func_exit: |
697 | if (dictionary_locked) { |
698 | row_mysql_unlock_data_dictionary(trx); |
699 | } |
700 | |
701 | mem_heap_free(heap); |
702 | |
703 | trx_roll_crash_recv_trx = NULL; |
704 | } |
705 | |
706 | |
707 | struct trx_roll_count_callback_arg |
708 | { |
709 | uint32_t n_trx; |
710 | uint64_t n_rows; |
711 | trx_roll_count_callback_arg(): n_trx(0), n_rows(0) {} |
712 | }; |
713 | |
714 | |
715 | static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element, |
716 | trx_roll_count_callback_arg *arg) |
717 | { |
718 | mutex_enter(&element->mutex); |
719 | if (trx_t *trx= element->trx) |
720 | { |
721 | if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)) |
722 | { |
723 | arg->n_trx++; |
724 | arg->n_rows+= trx->undo_no; |
725 | } |
726 | } |
727 | mutex_exit(&element->mutex); |
728 | return 0; |
729 | } |
730 | |
731 | /** Report progress when rolling back a row of a recovered transaction. */ |
732 | void trx_roll_report_progress() |
733 | { |
734 | ib_time_t time = ut_time(); |
735 | mutex_enter(&recv_sys->mutex); |
736 | bool report = recv_sys->report(time); |
737 | mutex_exit(&recv_sys->mutex); |
738 | |
739 | if (report) { |
740 | trx_roll_count_callback_arg arg; |
741 | |
742 | /* Get number of recovered active transactions and number of |
743 | rows they modified. Numbers must be accurate, because only this |
744 | thread is allowed to touch recovered transactions. */ |
745 | trx_sys.rw_trx_hash.iterate_no_dups( |
746 | reinterpret_cast<my_hash_walk_action> |
747 | (trx_roll_count_callback), &arg); |
748 | |
749 | if (arg.n_rows > 0) { |
750 | service_manager_extend_timeout( |
751 | INNODB_EXTEND_TIMEOUT_INTERVAL, |
752 | "To roll back: " UINT32PF " transactions, " |
753 | UINT64PF " rows" , arg.n_trx, arg.n_rows); |
754 | } |
755 | |
756 | ib::info() << "To roll back: " << arg.n_trx |
757 | << " transactions, " << arg.n_rows << " rows" ; |
758 | |
759 | } |
760 | } |
761 | |
762 | |
763 | static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element, |
764 | std::vector<trx_t*> *trx_list) |
765 | { |
766 | mutex_enter(&element->mutex); |
767 | if (trx_t *trx= element->trx) |
768 | { |
769 | mutex_enter(&trx->mutex); |
770 | if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)) |
771 | trx_list->push_back(trx); |
772 | mutex_exit(&trx->mutex); |
773 | } |
774 | mutex_exit(&element->mutex); |
775 | return 0; |
776 | } |
777 | |
778 | |
779 | /** |
780 | Rollback any incomplete transactions which were encountered in crash recovery. |
781 | |
782 | If the transaction already was committed, then we clean up a possible insert |
783 | undo log. If the transaction was not yet committed, then we roll it back. |
784 | |
785 | Note: For XA recovered transactions, we rely on MySQL to |
786 | do rollback. They will be in TRX_STATE_PREPARED state. If the server |
787 | is shutdown and they are still lingering in trx_sys_t::trx_list |
788 | then the shutdown will hang. |
789 | |
790 | @param[in] all true=roll back all recovered active transactions; |
791 | false=roll back any incomplete dictionary transaction |
792 | */ |
793 | |
794 | void trx_rollback_recovered(bool all) |
795 | { |
796 | std::vector<trx_t*> trx_list; |
797 | |
798 | ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO); |
799 | |
800 | /* |
801 | Collect list of recovered ACTIVE transaction ids first. Once collected, no |
802 | other thread is allowed to modify or remove these transactions from |
803 | rw_trx_hash. |
804 | */ |
805 | trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action> |
806 | (trx_rollback_recovered_callback), |
807 | &trx_list); |
808 | |
809 | while (!trx_list.empty()) |
810 | { |
811 | trx_t *trx= trx_list.back(); |
812 | trx_list.pop_back(); |
813 | |
814 | #ifdef UNIV_DEBUG |
815 | ut_ad(trx); |
816 | trx_mutex_enter(trx); |
817 | ut_ad(trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)); |
818 | trx_mutex_exit(trx); |
819 | #endif |
820 | |
821 | if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown) |
822 | goto discard; |
823 | |
824 | if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) |
825 | { |
826 | trx_rollback_active(trx); |
827 | if (trx->error_state != DB_SUCCESS) |
828 | { |
829 | ut_ad(trx->error_state == DB_INTERRUPTED); |
830 | trx->error_state= DB_SUCCESS; |
831 | ut_ad(!srv_undo_sources); |
832 | ut_ad(srv_fast_shutdown); |
833 | discard: |
834 | trx_sys.deregister_rw(trx); |
835 | trx_free_at_shutdown(trx); |
836 | } |
837 | else |
838 | trx_free(trx); |
839 | } |
840 | } |
841 | } |
842 | |
843 | |
844 | /*******************************************************************//** |
845 | Rollback or clean up any incomplete transactions which were |
846 | encountered in crash recovery. If the transaction already was |
847 | committed, then we clean up a possible insert undo log. If the |
848 | transaction was not yet committed, then we roll it back. |
849 | Note: this is done in a background thread. |
850 | @return a dummy parameter */ |
851 | extern "C" |
852 | os_thread_ret_t |
853 | DECLARE_THREAD(trx_rollback_all_recovered)(void*) |
854 | { |
855 | my_thread_init(); |
856 | ut_ad(!srv_read_only_mode); |
857 | |
858 | #ifdef UNIV_PFS_THREAD |
859 | pfs_register_thread(trx_rollback_clean_thread_key); |
860 | #endif /* UNIV_PFS_THREAD */ |
861 | |
862 | if (trx_sys.rw_trx_hash.size()) { |
863 | ib::info() << "Starting in background the rollback of" |
864 | " recovered transactions" ; |
865 | trx_rollback_recovered(true); |
866 | ib::info() << "Rollback of non-prepared transactions" |
867 | " completed" ; |
868 | } |
869 | |
870 | trx_rollback_is_active = false; |
871 | |
872 | my_thread_end(); |
873 | /* We count the number of threads in os_thread_exit(). A created |
874 | thread should always use that to exit and not use return() to exit. */ |
875 | |
876 | os_thread_exit(); |
877 | |
878 | OS_THREAD_DUMMY_RETURN; |
879 | } |
880 | |
881 | /** Try to truncate the undo logs. |
882 | @param[in,out] trx transaction */ |
883 | static |
884 | void |
885 | trx_roll_try_truncate(trx_t* trx) |
886 | { |
887 | trx->pages_undone = 0; |
888 | |
889 | undo_no_t undo_no = trx->undo_no; |
890 | |
891 | if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { |
892 | ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); |
893 | mutex_enter(&undo->rseg->mutex); |
894 | trx_undo_truncate_end(undo, undo_no, false); |
895 | mutex_exit(&undo->rseg->mutex); |
896 | } |
897 | |
898 | if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { |
899 | ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); |
900 | mutex_enter(&undo->rseg->mutex); |
901 | trx_undo_truncate_end(undo, undo_no, true); |
902 | mutex_exit(&undo->rseg->mutex); |
903 | } |
904 | |
905 | #ifdef WITH_WSREP_OUT |
906 | if (wsrep_on(trx->mysql_thd)) { |
907 | trx->lock.was_chosen_as_deadlock_victim = FALSE; |
908 | } |
909 | #endif /* WITH_WSREP */ |
910 | } |
911 | |
912 | /***********************************************************************//** |
913 | Pops the topmost undo log record in a single undo log and updates the info |
914 | about the topmost record in the undo log memory struct. |
915 | @return undo log record, the page s-latched */ |
916 | static |
917 | trx_undo_rec_t* |
918 | trx_roll_pop_top_rec( |
919 | /*=================*/ |
920 | trx_t* trx, /*!< in: transaction */ |
921 | trx_undo_t* undo, /*!< in: undo log */ |
922 | mtr_t* mtr) /*!< in: mtr */ |
923 | { |
924 | page_t* undo_page = trx_undo_page_get_s_latched( |
925 | page_id_t(undo->rseg->space->id, undo->top_page_no), mtr); |
926 | |
927 | ulint offset = undo->top_offset; |
928 | |
929 | trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( |
930 | undo_page + offset, undo->hdr_page_no, undo->hdr_offset, |
931 | true, mtr); |
932 | |
933 | if (prev_rec == NULL) { |
934 | undo->top_undo_no = IB_ID_MAX; |
935 | ut_ad(undo->empty()); |
936 | } else { |
937 | page_t* prev_rec_page = page_align(prev_rec); |
938 | |
939 | if (prev_rec_page != undo_page) { |
940 | |
941 | trx->pages_undone++; |
942 | } |
943 | |
944 | undo->top_page_no = page_get_page_no(prev_rec_page); |
945 | undo->top_offset = ulint(prev_rec - prev_rec_page); |
946 | undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); |
947 | ut_ad(!undo->empty()); |
948 | } |
949 | |
950 | return(undo_page + offset); |
951 | } |
952 | |
953 | /** Get the last undo log record of a transaction (for rollback). |
954 | @param[in,out] trx transaction |
955 | @param[out] roll_ptr DB_ROLL_PTR to the undo record |
956 | @param[in,out] heap memory heap for allocation |
957 | @return undo log record copied to heap |
958 | @retval NULL if none left or the roll_limit (savepoint) was reached */ |
959 | trx_undo_rec_t* |
960 | trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap) |
961 | { |
962 | if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { |
963 | trx_roll_try_truncate(trx); |
964 | } |
965 | |
966 | trx_undo_t* undo = NULL; |
967 | trx_undo_t* insert = trx->rsegs.m_redo.old_insert; |
968 | trx_undo_t* update = trx->rsegs.m_redo.undo; |
969 | trx_undo_t* temp = trx->rsegs.m_noredo.undo; |
970 | const undo_no_t limit = trx->roll_limit; |
971 | |
972 | ut_ad(!insert || !update || insert->empty() || update->empty() |
973 | || insert->top_undo_no != update->top_undo_no); |
974 | ut_ad(!insert || !temp || insert->empty() || temp->empty() |
975 | || insert->top_undo_no != temp->top_undo_no); |
976 | ut_ad(!update || !temp || update->empty() || temp->empty() |
977 | || update->top_undo_no != temp->top_undo_no); |
978 | |
979 | if (UNIV_LIKELY_NULL(insert) |
980 | && !insert->empty() && limit <= insert->top_undo_no) { |
981 | undo = insert; |
982 | } |
983 | |
984 | if (update && !update->empty() && update->top_undo_no >= limit) { |
985 | if (!undo) { |
986 | undo = update; |
987 | } else if (undo->top_undo_no < update->top_undo_no) { |
988 | undo = update; |
989 | } |
990 | } |
991 | |
992 | if (temp && !temp->empty() && temp->top_undo_no >= limit) { |
993 | if (!undo) { |
994 | undo = temp; |
995 | } else if (undo->top_undo_no < temp->top_undo_no) { |
996 | undo = temp; |
997 | } |
998 | } |
999 | |
1000 | if (undo == NULL) { |
1001 | trx_roll_try_truncate(trx); |
1002 | /* Mark any ROLLBACK TO SAVEPOINT completed, so that |
1003 | if the transaction object is committed and reused |
1004 | later, we will default to a full ROLLBACK. */ |
1005 | trx->roll_limit = 0; |
1006 | trx->in_rollback = false; |
1007 | return(NULL); |
1008 | } |
1009 | |
1010 | ut_ad(!undo->empty()); |
1011 | ut_ad(limit <= undo->top_undo_no); |
1012 | |
1013 | *roll_ptr = trx_undo_build_roll_ptr( |
1014 | false, undo->rseg->id, undo->top_page_no, undo->top_offset); |
1015 | |
1016 | mtr_t mtr; |
1017 | mtr.start(); |
1018 | |
1019 | trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); |
1020 | const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec); |
1021 | switch (trx_undo_rec_get_type(undo_rec)) { |
1022 | case TRX_UNDO_INSERT_DEFAULT: |
1023 | /* This record type was introduced in MDEV-11369 |
1024 | instant ADD COLUMN, which was implemented after |
1025 | MDEV-12288 removed the insert_undo log. There is no |
1026 | instant ADD COLUMN for temporary tables. Therefore, |
1027 | this record can only be present in the main undo log. */ |
1028 | ut_ad(undo == update); |
1029 | /* fall through */ |
1030 | case TRX_UNDO_RENAME_TABLE: |
1031 | ut_ad(undo == insert || undo == update); |
1032 | /* fall through */ |
1033 | case TRX_UNDO_INSERT_REC: |
1034 | ut_ad(undo == insert || undo == update || undo == temp); |
1035 | *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS; |
1036 | break; |
1037 | default: |
1038 | ut_ad(undo == update || undo == temp); |
1039 | break; |
1040 | } |
1041 | |
1042 | trx->undo_no = undo_no; |
1043 | |
1044 | trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); |
1045 | mtr.commit(); |
1046 | |
1047 | return(undo_rec_copy); |
1048 | } |
1049 | |
1050 | /****************************************************************//** |
1051 | Builds an undo 'query' graph for a transaction. The actual rollback is |
1052 | performed by executing this query graph like a query subprocedure call. |
1053 | The reply about the completion of the rollback will be sent by this |
1054 | graph. |
1055 | @return own: the query graph */ |
1056 | static |
1057 | que_t* |
1058 | trx_roll_graph_build( |
1059 | /*=================*/ |
1060 | trx_t* trx) /*!< in/out: transaction */ |
1061 | { |
1062 | mem_heap_t* heap; |
1063 | que_fork_t* fork; |
1064 | que_thr_t* thr; |
1065 | |
1066 | ut_ad(trx_mutex_own(trx)); |
1067 | |
1068 | heap = mem_heap_create(512); |
1069 | fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap); |
1070 | fork->trx = trx; |
1071 | |
1072 | thr = que_thr_create(fork, heap, NULL); |
1073 | |
1074 | thr->child = row_undo_node_create(trx, thr, heap); |
1075 | |
1076 | return(fork); |
1077 | } |
1078 | |
1079 | /*********************************************************************//** |
1080 | Starts a rollback operation, creates the UNDO graph that will do the |
1081 | actual undo operation. |
1082 | @return query graph thread that will perform the UNDO operations. */ |
1083 | static |
1084 | que_thr_t* |
1085 | trx_rollback_start( |
1086 | /*===============*/ |
1087 | trx_t* trx, /*!< in: transaction */ |
1088 | ib_id_t roll_limit) /*!< in: rollback to undo no (for |
1089 | partial undo), 0 if we are rolling back |
1090 | the entire transaction */ |
1091 | { |
1092 | ut_ad(trx_mutex_own(trx)); |
1093 | |
1094 | /* Initialize the rollback field in the transaction */ |
1095 | |
1096 | ut_ad(!trx->roll_limit); |
1097 | ut_ad(!trx->in_rollback); |
1098 | |
1099 | trx->roll_limit = roll_limit; |
1100 | trx->in_rollback = true; |
1101 | |
1102 | ut_a(trx->roll_limit <= trx->undo_no); |
1103 | |
1104 | trx->pages_undone = 0; |
1105 | |
1106 | /* Build a 'query' graph which will perform the undo operations */ |
1107 | |
1108 | que_t* roll_graph = trx_roll_graph_build(trx); |
1109 | |
1110 | trx->graph = roll_graph; |
1111 | |
1112 | trx->lock.que_state = TRX_QUE_ROLLING_BACK; |
1113 | |
1114 | return(que_fork_start_command(roll_graph)); |
1115 | } |
1116 | |
1117 | /*********************************************************************//** |
1118 | Creates a rollback command node struct. |
1119 | @return own: rollback node struct */ |
1120 | roll_node_t* |
1121 | roll_node_create( |
1122 | /*=============*/ |
1123 | mem_heap_t* heap) /*!< in: mem heap where created */ |
1124 | { |
1125 | roll_node_t* node; |
1126 | |
1127 | node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node))); |
1128 | |
1129 | node->state = ROLL_NODE_SEND; |
1130 | |
1131 | node->common.type = QUE_NODE_ROLLBACK; |
1132 | |
1133 | return(node); |
1134 | } |
1135 | |
1136 | /***********************************************************//** |
1137 | Performs an execution step for a rollback command node in a query graph. |
1138 | @return query thread to run next, or NULL */ |
1139 | que_thr_t* |
1140 | trx_rollback_step( |
1141 | /*==============*/ |
1142 | que_thr_t* thr) /*!< in: query thread */ |
1143 | { |
1144 | roll_node_t* node; |
1145 | |
1146 | node = static_cast<roll_node_t*>(thr->run_node); |
1147 | |
1148 | ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK); |
1149 | |
1150 | if (thr->prev_node == que_node_get_parent(node)) { |
1151 | node->state = ROLL_NODE_SEND; |
1152 | } |
1153 | |
1154 | if (node->state == ROLL_NODE_SEND) { |
1155 | trx_t* trx; |
1156 | ib_id_t roll_limit; |
1157 | |
1158 | trx = thr_get_trx(thr); |
1159 | |
1160 | trx_mutex_enter(trx); |
1161 | |
1162 | node->state = ROLL_NODE_WAIT; |
1163 | |
1164 | ut_a(node->undo_thr == NULL); |
1165 | |
1166 | roll_limit = node->partial ? node->savept.least_undo_no : 0; |
1167 | |
1168 | trx_commit_or_rollback_prepare(trx); |
1169 | |
1170 | node->undo_thr = trx_rollback_start(trx, roll_limit); |
1171 | |
1172 | trx_mutex_exit(trx); |
1173 | |
1174 | } else { |
1175 | ut_ad(node->state == ROLL_NODE_WAIT); |
1176 | |
1177 | thr->run_node = que_node_get_parent(node); |
1178 | } |
1179 | |
1180 | return(thr); |
1181 | } |
1182 | |