1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2015, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file trx/trx0trx.cc |
22 | The transaction |
23 | |
24 | Created 3/26/1996 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #include "ha_prototypes.h" |
28 | |
29 | #include "trx0trx.h" |
30 | |
31 | #ifdef WITH_WSREP |
32 | #include <mysql/service_wsrep.h> |
33 | #endif |
34 | |
35 | #include <mysql/service_thd_error_context.h> |
36 | |
37 | #include "btr0sea.h" |
38 | #include "lock0lock.h" |
39 | #include "log0log.h" |
40 | #include "os0proc.h" |
41 | #include "que0que.h" |
42 | #include "srv0mon.h" |
43 | #include "srv0srv.h" |
44 | #include "fsp0sysspace.h" |
45 | #include "srv0start.h" |
46 | #include "trx0purge.h" |
47 | #include "trx0rec.h" |
48 | #include "trx0roll.h" |
49 | #include "trx0rseg.h" |
50 | #include "trx0undo.h" |
51 | #include "trx0xa.h" |
52 | #include "ut0new.h" |
53 | #include "ut0pool.h" |
54 | #include "ut0vec.h" |
55 | |
56 | #include <set> |
57 | #include <new> |
58 | |
59 | /** The bit pattern corresponding to TRX_ID_MAX */ |
60 | const byte trx_id_max_bytes[8] = { |
61 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
62 | }; |
63 | |
64 | /** The bit pattern corresponding to max timestamp */ |
65 | const byte timestamp_max_bytes[7] = { |
66 | 0x7f, 0xff, 0xff, 0xff, 0x0f, 0x42, 0x3f |
67 | }; |
68 | |
69 | |
70 | static const ulint MAX_DETAILED_ERROR_LEN = 256; |
71 | |
72 | /** Set of table_id */ |
73 | typedef std::set< |
74 | table_id_t, |
75 | std::less<table_id_t>, |
76 | ut_allocator<table_id_t> > table_id_set; |
77 | |
78 | /** Set flush observer for the transaction |
79 | @param[in/out] trx transaction struct |
80 | @param[in] observer flush observer */ |
81 | void |
82 | trx_set_flush_observer( |
83 | trx_t* trx, |
84 | FlushObserver* observer) |
85 | { |
86 | trx->flush_observer = observer; |
87 | } |
88 | |
89 | /*************************************************************//** |
90 | Set detailed error message for the transaction. */ |
91 | void |
92 | trx_set_detailed_error( |
93 | /*===================*/ |
94 | trx_t* trx, /*!< in: transaction struct */ |
95 | const char* msg) /*!< in: detailed error message */ |
96 | { |
97 | ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN); |
98 | } |
99 | |
100 | /*************************************************************//** |
101 | Set detailed error message for the transaction from a file. Note that the |
102 | file is rewinded before reading from it. */ |
103 | void |
104 | trx_set_detailed_error_from_file( |
105 | /*=============================*/ |
106 | trx_t* trx, /*!< in: transaction struct */ |
107 | FILE* file) /*!< in: file to read message from */ |
108 | { |
109 | os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN); |
110 | } |
111 | |
112 | /********************************************************************//** |
113 | Initialize transaction object. |
114 | @param trx trx to initialize */ |
115 | static |
116 | void |
117 | trx_init( |
118 | /*=====*/ |
119 | trx_t* trx) |
120 | { |
121 | trx->id = 0; |
122 | |
123 | trx->no = TRX_ID_MAX; |
124 | |
125 | trx->state = TRX_STATE_NOT_STARTED; |
126 | |
127 | trx->is_recovered = false; |
128 | |
129 | trx->op_info = "" ; |
130 | |
131 | trx->active_commit_ordered = 0; |
132 | |
133 | trx->isolation_level = TRX_ISO_REPEATABLE_READ; |
134 | |
135 | trx->check_foreigns = true; |
136 | |
137 | trx->check_unique_secondary = true; |
138 | |
139 | trx->lock.n_rec_locks = 0; |
140 | |
141 | trx->dict_operation = TRX_DICT_OP_NONE; |
142 | |
143 | trx->table_id = 0; |
144 | |
145 | trx->error_state = DB_SUCCESS; |
146 | |
147 | trx->error_key_num = ULINT_UNDEFINED; |
148 | |
149 | trx->undo_no = 0; |
150 | |
151 | trx->rsegs.m_redo.rseg = NULL; |
152 | |
153 | trx->rsegs.m_noredo.rseg = NULL; |
154 | |
155 | trx->read_only = false; |
156 | |
157 | trx->auto_commit = false; |
158 | |
159 | trx->will_lock = 0; |
160 | |
161 | trx->ddl = false; |
162 | |
163 | trx->internal = false; |
164 | |
165 | ut_d(trx->start_file = 0); |
166 | |
167 | ut_d(trx->start_line = 0); |
168 | |
169 | trx->magic_n = TRX_MAGIC_N; |
170 | |
171 | trx->lock.que_state = TRX_QUE_RUNNING; |
172 | |
173 | trx->last_sql_stat_start.least_undo_no = 0; |
174 | |
175 | ut_ad(!trx->read_view.is_open()); |
176 | |
177 | trx->lock.rec_cached = 0; |
178 | |
179 | trx->lock.table_cached = 0; |
180 | |
181 | trx->flush_observer = NULL; |
182 | } |
183 | |
184 | /** For managing the life-cycle of the trx_t instance that we get |
185 | from the pool. */ |
186 | struct TrxFactory { |
187 | |
188 | /** Initializes a transaction object. It must be explicitly started |
189 | with trx_start_if_not_started() before using it. The default isolation |
190 | level is TRX_ISO_REPEATABLE_READ. |
191 | @param trx Transaction instance to initialise */ |
192 | static void init(trx_t* trx) |
193 | { |
194 | /* Explicitly call the constructor of the already |
195 | allocated object. trx_t objects are allocated by |
196 | ut_zalloc_nokey() in Pool::Pool() which would not call |
197 | the constructors of the trx_t members. */ |
198 | new(&trx->mod_tables) trx_mod_tables_t(); |
199 | |
200 | new(&trx->lock.rec_pool) lock_pool_t(); |
201 | |
202 | new(&trx->lock.table_pool) lock_pool_t(); |
203 | |
204 | new(&trx->lock.table_locks) lock_pool_t(); |
205 | |
206 | new(&trx->read_view) ReadView(); |
207 | |
208 | trx->rw_trx_hash_pins = 0; |
209 | trx_init(trx); |
210 | |
211 | trx->dict_operation_lock_mode = 0; |
212 | |
213 | trx->xid = UT_NEW_NOKEY(xid_t()); |
214 | |
215 | trx->detailed_error = reinterpret_cast<char*>( |
216 | ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN)); |
217 | |
218 | trx->lock.lock_heap = mem_heap_create_typed( |
219 | 1024, MEM_HEAP_FOR_LOCK_HEAP); |
220 | |
221 | lock_trx_lock_list_init(&trx->lock.trx_locks); |
222 | |
223 | UT_LIST_INIT( |
224 | trx->trx_savepoints, |
225 | &trx_named_savept_t::trx_savepoints); |
226 | |
227 | mutex_create(LATCH_ID_TRX, &trx->mutex); |
228 | |
229 | lock_trx_alloc_locks(trx); |
230 | } |
231 | |
232 | /** Release resources held by the transaction object. |
233 | @param trx the transaction for which to release resources */ |
234 | static void destroy(trx_t* trx) |
235 | { |
236 | ut_a(trx->magic_n == TRX_MAGIC_N); |
237 | ut_ad(!trx->mysql_thd); |
238 | |
239 | ut_a(trx->lock.wait_lock == NULL); |
240 | ut_a(trx->lock.wait_thr == NULL); |
241 | ut_a(trx->dict_operation_lock_mode == 0); |
242 | |
243 | if (trx->lock.lock_heap != NULL) { |
244 | mem_heap_free(trx->lock.lock_heap); |
245 | trx->lock.lock_heap = NULL; |
246 | } |
247 | |
248 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
249 | |
250 | UT_DELETE(trx->xid); |
251 | ut_free(trx->detailed_error); |
252 | |
253 | mutex_free(&trx->mutex); |
254 | |
255 | trx->mod_tables.~trx_mod_tables_t(); |
256 | |
257 | ut_ad(!trx->read_view.is_open()); |
258 | |
259 | if (!trx->lock.rec_pool.empty()) { |
260 | |
261 | /* See lock_trx_alloc_locks() why we only free |
262 | the first element. */ |
263 | |
264 | ut_free(trx->lock.rec_pool[0]); |
265 | } |
266 | |
267 | if (!trx->lock.table_pool.empty()) { |
268 | |
269 | /* See lock_trx_alloc_locks() why we only free |
270 | the first element. */ |
271 | |
272 | ut_free(trx->lock.table_pool[0]); |
273 | } |
274 | |
275 | trx->lock.rec_pool.~lock_pool_t(); |
276 | |
277 | trx->lock.table_pool.~lock_pool_t(); |
278 | |
279 | trx->lock.table_locks.~lock_pool_t(); |
280 | |
281 | trx->read_view.~ReadView(); |
282 | } |
283 | |
284 | /** Enforce any invariants here, this is called before the transaction |
285 | is added to the pool. |
286 | @return true if all OK */ |
287 | static bool debug(const trx_t* trx) |
288 | { |
289 | ut_a(trx->error_state == DB_SUCCESS); |
290 | |
291 | ut_a(trx->magic_n == TRX_MAGIC_N); |
292 | |
293 | ut_ad(!trx->read_only); |
294 | |
295 | ut_ad(trx->state == TRX_STATE_NOT_STARTED); |
296 | |
297 | ut_ad(trx->dict_operation == TRX_DICT_OP_NONE); |
298 | |
299 | ut_ad(trx->mysql_thd == 0); |
300 | |
301 | ut_a(trx->lock.wait_thr == NULL); |
302 | ut_a(trx->lock.wait_lock == NULL); |
303 | ut_a(trx->dict_operation_lock_mode == 0); |
304 | |
305 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
306 | |
307 | ut_ad(trx->autoinc_locks == NULL); |
308 | |
309 | ut_ad(trx->lock.table_locks.empty()); |
310 | |
311 | return(true); |
312 | } |
313 | }; |
314 | |
315 | /** The lock strategy for TrxPool */ |
316 | struct TrxPoolLock { |
317 | TrxPoolLock() { } |
318 | |
319 | /** Create the mutex */ |
320 | void create() |
321 | { |
322 | mutex_create(LATCH_ID_TRX_POOL, &m_mutex); |
323 | } |
324 | |
325 | /** Acquire the mutex */ |
326 | void enter() { mutex_enter(&m_mutex); } |
327 | |
328 | /** Release the mutex */ |
329 | void exit() { mutex_exit(&m_mutex); } |
330 | |
331 | /** Free the mutex */ |
332 | void destroy() { mutex_free(&m_mutex); } |
333 | |
334 | /** Mutex to use */ |
335 | ib_mutex_t m_mutex; |
336 | }; |
337 | |
338 | /** The lock strategy for the TrxPoolManager */ |
339 | struct TrxPoolManagerLock { |
340 | TrxPoolManagerLock() { } |
341 | |
342 | /** Create the mutex */ |
343 | void create() |
344 | { |
345 | mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex); |
346 | } |
347 | |
348 | /** Acquire the mutex */ |
349 | void enter() { mutex_enter(&m_mutex); } |
350 | |
351 | /** Release the mutex */ |
352 | void exit() { mutex_exit(&m_mutex); } |
353 | |
354 | /** Free the mutex */ |
355 | void destroy() { mutex_free(&m_mutex); } |
356 | |
357 | /** Mutex to use */ |
358 | ib_mutex_t m_mutex; |
359 | }; |
360 | |
361 | /** Use explicit mutexes for the trx_t pool and its manager. */ |
362 | typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t; |
363 | typedef PoolManager<trx_pool_t, TrxPoolManagerLock > trx_pools_t; |
364 | |
365 | /** The trx_t pool manager */ |
366 | static trx_pools_t* trx_pools; |
367 | |
368 | /** Size of on trx_t pool in bytes. */ |
369 | static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4; |
370 | |
371 | /** Create the trx_t pool */ |
372 | void |
373 | trx_pool_init() |
374 | { |
375 | trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE)); |
376 | |
377 | ut_a(trx_pools != 0); |
378 | } |
379 | |
380 | /** Destroy the trx_t pool */ |
381 | void |
382 | trx_pool_close() |
383 | { |
384 | UT_DELETE(trx_pools); |
385 | |
386 | trx_pools = 0; |
387 | } |
388 | |
389 | /** @return a trx_t instance from trx_pools. */ |
390 | trx_t *trx_create() |
391 | { |
392 | trx_t* trx = trx_pools->get(); |
393 | |
394 | assert_trx_is_free(trx); |
395 | |
396 | mem_heap_t* heap; |
397 | ib_alloc_t* alloc; |
398 | |
399 | /* We just got trx from pool, it should be non locking */ |
400 | ut_ad(trx->will_lock == 0); |
401 | ut_ad(trx->state == TRX_STATE_NOT_STARTED); |
402 | ut_ad(!trx->rw_trx_hash_pins); |
403 | |
404 | DBUG_LOG("trx" , "Create: " << trx); |
405 | |
406 | heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8); |
407 | |
408 | alloc = ib_heap_allocator_create(heap); |
409 | |
410 | /* Remember to free the vector explicitly in trx_free(). */ |
411 | trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4); |
412 | |
413 | /* Should have been either just initialized or .clear()ed by |
414 | trx_free(). */ |
415 | ut_a(trx->mod_tables.size() == 0); |
416 | |
417 | #ifdef WITH_WSREP |
418 | trx->wsrep_event = NULL; |
419 | #endif /* WITH_WSREP */ |
420 | |
421 | trx_sys.register_trx(trx); |
422 | |
423 | return(trx); |
424 | } |
425 | |
426 | /** |
427 | Release a trx_t instance back to the pool. |
428 | @param trx the instance to release. |
429 | */ |
430 | void trx_free(trx_t*& trx) |
431 | { |
432 | ut_ad(!trx->declared_to_be_inside_innodb); |
433 | ut_ad(!trx->n_mysql_tables_in_use); |
434 | ut_ad(!trx->mysql_n_tables_locked); |
435 | ut_ad(!trx->internal); |
436 | |
437 | if (trx->declared_to_be_inside_innodb) { |
438 | |
439 | ib::error() << "Freeing a trx (" << trx << ", " |
440 | << trx_get_id_for_print(trx) << ") which is declared" |
441 | " to be processing inside InnoDB" ; |
442 | |
443 | trx_print(stderr, trx, 600); |
444 | putc('\n', stderr); |
445 | |
446 | /* This is an error but not a fatal error. We must keep |
447 | the counters like srv_conc.n_active accurate. */ |
448 | srv_conc_force_exit_innodb(trx); |
449 | } |
450 | |
451 | if (trx->n_mysql_tables_in_use != 0 |
452 | || trx->mysql_n_tables_locked != 0) { |
453 | |
454 | ib::error() << "MySQL is freeing a thd though" |
455 | " trx->n_mysql_tables_in_use is " |
456 | << trx->n_mysql_tables_in_use |
457 | << " and trx->mysql_n_tables_locked is " |
458 | << trx->mysql_n_tables_locked << "." ; |
459 | |
460 | trx_print(stderr, trx, 600); |
461 | ut_print_buf(stderr, trx, sizeof(trx_t)); |
462 | putc('\n', stderr); |
463 | } |
464 | |
465 | trx->dict_operation = TRX_DICT_OP_NONE; |
466 | assert_trx_is_inactive(trx); |
467 | |
468 | trx_sys.deregister_trx(trx); |
469 | |
470 | assert_trx_is_free(trx); |
471 | |
472 | trx_sys.rw_trx_hash.put_pins(trx); |
473 | trx->mysql_thd = 0; |
474 | trx->mysql_log_file_name = 0; |
475 | |
476 | // FIXME: We need to avoid this heap free/alloc for each commit. |
477 | if (trx->autoinc_locks != NULL) { |
478 | ut_ad(ib_vector_is_empty(trx->autoinc_locks)); |
479 | /* We allocated a dedicated heap for the vector. */ |
480 | ib_vector_free(trx->autoinc_locks); |
481 | trx->autoinc_locks = NULL; |
482 | } |
483 | |
484 | trx->mod_tables.clear(); |
485 | |
486 | /* trx locking state should have been reset before returning trx |
487 | to pool */ |
488 | ut_ad(trx->will_lock == 0); |
489 | |
490 | trx_pools->mem_free(trx); |
491 | /* Unpoison the memory for innodb_monitor_set_option; |
492 | it is operating also on the freed transaction objects. */ |
493 | MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex); |
494 | /* Declare the contents as initialized for Valgrind; |
495 | we checked that it was initialized in trx_pools->mem_free(trx). */ |
496 | UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex); |
497 | |
498 | trx = NULL; |
499 | } |
500 | |
501 | /** At shutdown, frees a transaction object. */ |
502 | void |
503 | trx_free_at_shutdown(trx_t *trx) |
504 | { |
505 | ut_ad(trx->is_recovered); |
506 | ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) |
507 | || (trx_state_eq(trx, TRX_STATE_ACTIVE) |
508 | && (!srv_was_started |
509 | || srv_operation == SRV_OPERATION_RESTORE |
510 | || srv_operation == SRV_OPERATION_RESTORE_EXPORT |
511 | || srv_read_only_mode |
512 | || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO |
513 | || (!srv_is_being_started |
514 | && !srv_undo_sources && srv_fast_shutdown)))); |
515 | ut_a(trx->magic_n == TRX_MAGIC_N); |
516 | |
517 | lock_trx_release_locks(trx); |
518 | trx_undo_free_at_shutdown(trx); |
519 | |
520 | ut_a(!trx->read_only); |
521 | |
522 | DBUG_LOG("trx" , "Free prepared: " << trx); |
523 | trx->state = TRX_STATE_NOT_STARTED; |
524 | |
525 | /* Undo trx_resurrect_table_locks(). */ |
526 | lock_trx_lock_list_init(&trx->lock.trx_locks); |
527 | |
528 | /* Note: This vector is not guaranteed to be empty because the |
529 | transaction was never committed and therefore lock_trx_release() |
530 | was not called. */ |
531 | trx->lock.table_locks.clear(); |
532 | |
533 | trx_free(trx); |
534 | } |
535 | |
536 | |
537 | /** |
538 | Disconnect a prepared transaction from MySQL |
539 | @param[in,out] trx transaction |
540 | */ |
541 | void trx_disconnect_prepared(trx_t *trx) |
542 | { |
543 | ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)); |
544 | ut_ad(trx->mysql_thd); |
545 | trx->read_view.close(); |
546 | trx->is_recovered= true; |
547 | trx->mysql_thd= NULL; |
548 | /* todo/fixme: suggest to do it at innodb prepare */ |
549 | trx->will_lock= 0; |
550 | } |
551 | |
552 | /****************************************************************//** |
553 | Resurrect the table locks for a resurrected transaction. */ |
554 | static |
555 | void |
556 | trx_resurrect_table_locks( |
557 | /*======================*/ |
558 | trx_t* trx, /*!< in/out: transaction */ |
559 | const trx_undo_t* undo) /*!< in: undo log */ |
560 | { |
561 | mtr_t mtr; |
562 | page_t* undo_page; |
563 | trx_undo_rec_t* undo_rec; |
564 | table_id_set tables; |
565 | |
566 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || |
567 | trx_state_eq(trx, TRX_STATE_PREPARED)); |
568 | ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); |
569 | |
570 | if (undo->empty()) { |
571 | return; |
572 | } |
573 | |
574 | mtr_start(&mtr); |
575 | |
576 | /* trx_rseg_mem_create() may have acquired an X-latch on this |
577 | page, so we cannot acquire an S-latch. */ |
578 | undo_page = trx_undo_page_get( |
579 | page_id_t(trx->rsegs.m_redo.rseg->space->id, |
580 | undo->top_page_no), &mtr); |
581 | |
582 | undo_rec = undo_page + undo->top_offset; |
583 | |
584 | do { |
585 | ulint type; |
586 | undo_no_t undo_no; |
587 | table_id_t table_id; |
588 | ulint cmpl_info; |
589 | bool updated_extern; |
590 | |
591 | page_t* undo_rec_page = page_align(undo_rec); |
592 | |
593 | if (undo_rec_page != undo_page) { |
594 | mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX); |
595 | undo_page = undo_rec_page; |
596 | } |
597 | |
598 | trx_undo_rec_get_pars( |
599 | undo_rec, &type, &cmpl_info, |
600 | &updated_extern, &undo_no, &table_id); |
601 | tables.insert(table_id); |
602 | |
603 | undo_rec = trx_undo_get_prev_rec( |
604 | undo_rec, undo->hdr_page_no, |
605 | undo->hdr_offset, false, &mtr); |
606 | } while (undo_rec); |
607 | |
608 | mtr_commit(&mtr); |
609 | |
610 | for (table_id_set::const_iterator i = tables.begin(); |
611 | i != tables.end(); i++) { |
612 | if (dict_table_t* table = dict_table_open_on_id( |
613 | *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) { |
614 | if (!table->is_readable()) { |
615 | mutex_enter(&dict_sys->mutex); |
616 | dict_table_close(table, TRUE, FALSE); |
617 | dict_table_remove_from_cache(table); |
618 | mutex_exit(&dict_sys->mutex); |
619 | continue; |
620 | } |
621 | |
622 | if (trx->state == TRX_STATE_PREPARED) { |
623 | trx->mod_tables.insert( |
624 | trx_mod_tables_t::value_type(table, |
625 | 0)); |
626 | } |
627 | lock_table_ix_resurrect(table, trx); |
628 | |
629 | DBUG_LOG("ib_trx" , |
630 | "resurrect " << ib::hex(trx->id) |
631 | << " IX lock on " << table->name); |
632 | |
633 | dict_table_close(table, FALSE, FALSE); |
634 | } |
635 | } |
636 | } |
637 | |
638 | |
639 | /** |
640 | Resurrect the transactions that were doing inserts/updates the time of the |
641 | crash, they need to be undone. |
642 | */ |
643 | |
644 | static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, |
645 | ib_time_t start_time, uint64_t *rows_to_undo, |
646 | bool is_old_insert) |
647 | { |
648 | trx_state_t state; |
649 | /* |
650 | This is single-threaded startup code, we do not need the |
651 | protection of trx->mutex or trx_sys.mutex here. |
652 | */ |
653 | switch (undo->state) |
654 | { |
655 | case TRX_UNDO_ACTIVE: |
656 | state= TRX_STATE_ACTIVE; |
657 | break; |
658 | case TRX_UNDO_PREPARED: |
659 | /* |
660 | Prepared transactions are left in the prepared state |
661 | waiting for a commit or abort decision from MySQL |
662 | */ |
663 | ib::info() << "Transaction " << undo->trx_id |
664 | << " was in the XA prepared state." ; |
665 | |
666 | state= TRX_STATE_PREPARED; |
667 | break; |
668 | default: |
669 | if (is_old_insert && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) |
670 | trx_undo_commit_cleanup(undo, false); |
671 | return; |
672 | } |
673 | |
674 | trx_t *trx= trx_create(); |
675 | trx->state= state; |
676 | ut_d(trx->start_file= __FILE__); |
677 | ut_d(trx->start_line= __LINE__); |
678 | ut_ad(trx->no == TRX_ID_MAX); |
679 | |
680 | if (is_old_insert) |
681 | trx->rsegs.m_redo.old_insert= undo; |
682 | else |
683 | trx->rsegs.m_redo.undo= undo; |
684 | |
685 | trx->undo_no= undo->top_undo_no + 1; |
686 | trx->rsegs.m_redo.rseg= rseg; |
687 | /* |
688 | For transactions with active data will not have rseg size = 1 |
689 | or will not qualify for purge limit criteria. So it is safe to increment |
690 | this trx_ref_count w/o mutex protection. |
691 | */ |
692 | ++trx->rsegs.m_redo.rseg->trx_ref_count; |
693 | *trx->xid= undo->xid; |
694 | trx->id= undo->trx_id; |
695 | trx->is_recovered= true; |
696 | trx->start_time= start_time; |
697 | |
698 | if (undo->dict_operation) |
699 | { |
700 | trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); |
701 | trx->table_id= undo->table_id; |
702 | } |
703 | |
704 | trx_sys.rw_trx_hash.insert(trx); |
705 | trx_sys.rw_trx_hash.put_pins(trx); |
706 | trx_resurrect_table_locks(trx, undo); |
707 | if (trx_state_eq(trx, TRX_STATE_ACTIVE)) |
708 | *rows_to_undo+= trx->undo_no; |
709 | } |
710 | |
711 | |
712 | /** Initialize (resurrect) transactions at startup. */ |
713 | void |
714 | trx_lists_init_at_db_start() |
715 | { |
716 | ut_a(srv_is_being_started); |
717 | ut_ad(!srv_was_started); |
718 | |
719 | if (srv_operation == SRV_OPERATION_RESTORE) { |
720 | /* mariabackup --prepare only deals with |
721 | the redo log and the data files, not with |
722 | transactions or the data dictionary. */ |
723 | trx_rseg_array_init(); |
724 | return; |
725 | } |
726 | |
727 | if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { |
728 | return; |
729 | } |
730 | |
731 | purge_sys.create(); |
732 | trx_rseg_array_init(); |
733 | |
734 | /* Look from the rollback segments if there exist undo logs for |
735 | transactions. */ |
736 | const ib_time_t start_time = ut_time(); |
737 | uint64_t rows_to_undo = 0; |
738 | |
739 | for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { |
740 | trx_undo_t* undo; |
741 | trx_rseg_t* rseg = trx_sys.rseg_array[i]; |
742 | |
743 | /* Some rollback segment may be unavailable, |
744 | especially if the server was previously run with a |
745 | non-default value of innodb_undo_logs. */ |
746 | if (rseg == NULL) { |
747 | continue; |
748 | } |
749 | |
750 | /* Resurrect transactions that were doing inserts |
751 | using the old separate insert_undo log. */ |
752 | undo = UT_LIST_GET_FIRST(rseg->old_insert_list); |
753 | while (undo) { |
754 | trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo); |
755 | trx_resurrect(undo, rseg, start_time, &rows_to_undo, |
756 | true); |
757 | undo = next; |
758 | } |
759 | |
760 | /* Ressurrect other transactions. */ |
761 | for (undo = UT_LIST_GET_FIRST(rseg->undo_list); |
762 | undo != NULL; |
763 | undo = UT_LIST_GET_NEXT(undo_list, undo)) { |
764 | trx_t *trx = trx_sys.rw_trx_hash.find(0, undo->trx_id); |
765 | if (!trx) { |
766 | trx_resurrect(undo, rseg, start_time, |
767 | &rows_to_undo, false); |
768 | } else { |
769 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || |
770 | trx_state_eq(trx, TRX_STATE_PREPARED)); |
771 | ut_ad(trx->start_time == start_time); |
772 | ut_ad(trx->is_recovered); |
773 | ut_ad(trx->rsegs.m_redo.rseg == rseg); |
774 | ut_ad(trx->rsegs.m_redo.rseg->trx_ref_count); |
775 | |
776 | trx->rsegs.m_redo.undo = undo; |
777 | if (undo->top_undo_no >= trx->undo_no) { |
778 | if (trx_state_eq(trx, |
779 | TRX_STATE_ACTIVE)) { |
780 | rows_to_undo -= trx->undo_no; |
781 | rows_to_undo += |
782 | undo->top_undo_no + 1; |
783 | } |
784 | |
785 | trx->undo_no = undo->top_undo_no + 1; |
786 | } |
787 | trx_resurrect_table_locks(trx, undo); |
788 | } |
789 | } |
790 | } |
791 | |
792 | if (trx_sys.rw_trx_hash.size()) { |
793 | |
794 | ib::info() << trx_sys.rw_trx_hash.size() |
795 | << " transaction(s) which must be rolled back or" |
796 | " cleaned up in total " << rows_to_undo |
797 | << " row operations to undo" ; |
798 | |
799 | ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id(); |
800 | } |
801 | trx_sys.clone_oldest_view(); |
802 | } |
803 | |
804 | /** Assign a persistent rollback segment in a round-robin fashion, |
805 | evenly distributed between 0 and innodb_undo_logs-1 |
806 | @return persistent rollback segment |
807 | @retval NULL if innodb_read_only */ |
808 | static |
809 | trx_rseg_t* |
810 | trx_assign_rseg_low() |
811 | { |
812 | if (srv_read_only_mode) { |
813 | ut_ad(srv_undo_logs == ULONG_UNDEFINED); |
814 | return(NULL); |
815 | } |
816 | |
817 | /* The first slot is always assigned to the system tablespace. */ |
818 | ut_ad(trx_sys.rseg_array[0]->space == fil_system.sys_space); |
819 | |
820 | /* Choose a rollback segment evenly distributed between 0 and |
821 | innodb_undo_logs-1 in a round-robin fashion, skipping those |
822 | undo tablespaces that are scheduled for truncation. |
823 | |
824 | Because rseg_slot is not protected by atomics or any mutex, race |
825 | conditions are possible, meaning that multiple transactions |
826 | that start modifications concurrently will write their undo |
827 | log to the same rollback segment. */ |
828 | static ulong rseg_slot; |
829 | ulint slot = rseg_slot++ % srv_undo_logs; |
830 | trx_rseg_t* rseg; |
831 | |
832 | #ifdef UNIV_DEBUG |
833 | ulint start_scan_slot = slot; |
834 | bool look_for_rollover = false; |
835 | #endif /* UNIV_DEBUG */ |
836 | |
837 | bool allocated = false; |
838 | |
839 | do { |
840 | for (;;) { |
841 | rseg = trx_sys.rseg_array[slot]; |
842 | |
843 | #ifdef UNIV_DEBUG |
844 | /* Ensure that we are not revisiting the same |
845 | slot that we have already inspected. */ |
846 | if (look_for_rollover) { |
847 | ut_ad(start_scan_slot != slot); |
848 | } |
849 | look_for_rollover = true; |
850 | #endif /* UNIV_DEBUG */ |
851 | |
852 | slot = (slot + 1) % srv_undo_logs; |
853 | |
854 | if (rseg == NULL) { |
855 | continue; |
856 | } |
857 | |
858 | ut_ad(rseg->is_persistent()); |
859 | |
860 | if (rseg->space != fil_system.sys_space) { |
861 | ut_ad(srv_undo_tablespaces > 1); |
862 | if (rseg->skip_allocation) { |
863 | continue; |
864 | } |
865 | } else if (trx_rseg_t* next |
866 | = trx_sys.rseg_array[slot]) { |
867 | if (next->space != fil_system.sys_space |
868 | && srv_undo_tablespaces > 0) { |
869 | /** If dedicated |
870 | innodb_undo_tablespaces have |
871 | been configured, try to use them |
872 | instead of the system tablespace. */ |
873 | continue; |
874 | } |
875 | } |
876 | |
877 | break; |
878 | } |
879 | |
880 | /* By now we have only selected the rseg but not marked it |
881 | allocated. By marking it allocated we are ensuring that it will |
882 | never be selected for UNDO truncate purge. */ |
883 | mutex_enter(&rseg->mutex); |
884 | if (!rseg->skip_allocation) { |
885 | rseg->trx_ref_count++; |
886 | allocated = true; |
887 | } |
888 | mutex_exit(&rseg->mutex); |
889 | } while (!allocated); |
890 | |
891 | ut_ad(rseg->trx_ref_count > 0); |
892 | ut_ad(rseg->is_persistent()); |
893 | return(rseg); |
894 | } |
895 | |
896 | /** Assign a rollback segment for modifying temporary tables. |
897 | @return the assigned rollback segment */ |
898 | trx_rseg_t* |
899 | trx_t::assign_temp_rseg() |
900 | { |
901 | ut_ad(!rsegs.m_noredo.rseg); |
902 | ut_ad(!trx_is_autocommit_non_locking(this)); |
903 | compile_time_assert(ut_is_2pow(TRX_SYS_N_RSEGS)); |
904 | |
905 | /* Choose a temporary rollback segment between 0 and 127 |
906 | in a round-robin fashion. Because rseg_slot is not protected by |
907 | atomics or any mutex, race conditions are possible, meaning that |
908 | multiple transactions that start modifications concurrently |
909 | will write their undo log to the same rollback segment. */ |
910 | static ulong rseg_slot; |
911 | trx_rseg_t* rseg = trx_sys.temp_rsegs[ |
912 | rseg_slot++ & (TRX_SYS_N_RSEGS - 1)]; |
913 | ut_ad(!rseg->is_persistent()); |
914 | rsegs.m_noredo.rseg = rseg; |
915 | |
916 | if (id == 0) { |
917 | trx_sys.register_rw(this); |
918 | } |
919 | |
920 | ut_ad(!rseg->is_persistent()); |
921 | return(rseg); |
922 | } |
923 | |
924 | /****************************************************************//** |
925 | Starts a transaction. */ |
926 | static |
927 | void |
928 | trx_start_low( |
929 | /*==========*/ |
930 | trx_t* trx, /*!< in: transaction */ |
931 | bool read_write) /*!< in: true if read-write transaction */ |
932 | { |
933 | ut_ad(!trx->in_rollback); |
934 | ut_ad(!trx->is_recovered); |
935 | ut_ad(trx->start_line != 0); |
936 | ut_ad(trx->start_file != 0); |
937 | ut_ad(trx->roll_limit == 0); |
938 | ut_ad(trx->error_state == DB_SUCCESS); |
939 | ut_ad(trx->rsegs.m_redo.rseg == NULL); |
940 | ut_ad(trx->rsegs.m_noredo.rseg == NULL); |
941 | ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); |
942 | ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
943 | |
944 | /* Check whether it is an AUTOCOMMIT SELECT */ |
945 | trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd); |
946 | |
947 | trx->read_only = srv_read_only_mode |
948 | || (!trx->ddl && !trx->internal |
949 | && thd_trx_is_read_only(trx->mysql_thd)); |
950 | |
951 | if (!trx->auto_commit) { |
952 | ++trx->will_lock; |
953 | } else if (trx->will_lock == 0) { |
954 | trx->read_only = true; |
955 | } |
956 | |
957 | #ifdef WITH_WSREP |
958 | memset(trx->xid, 0, sizeof(xid_t)); |
959 | trx->xid->formatID = -1; |
960 | #endif /* WITH_WSREP */ |
961 | |
962 | /* The initial value for trx->no: TRX_ID_MAX is used in |
963 | read_view_open_now: */ |
964 | |
965 | trx->no = TRX_ID_MAX; |
966 | |
967 | ut_a(ib_vector_is_empty(trx->autoinc_locks)); |
968 | ut_a(trx->lock.table_locks.empty()); |
969 | |
970 | /* No other thread can access this trx object through rw_trx_hash, thus |
971 | we don't need trx_sys.mutex protection for that purpose. Still this |
972 | trx can be found through trx_sys.trx_list, which means state |
973 | change must be protected by e.g. trx->mutex. |
974 | |
975 | For now we update it without mutex protection, because original code |
976 | did it this way. It has to be reviewed and fixed properly. */ |
977 | trx->state = TRX_STATE_ACTIVE; |
978 | |
979 | /* By default all transactions are in the read-only list unless they |
980 | are non-locking auto-commit read only transactions or background |
981 | (internal) transactions. Note: Transactions marked explicitly as |
982 | read only can write to temporary tables, we put those on the RO |
983 | list too. */ |
984 | |
985 | if (!trx->read_only |
986 | && (trx->mysql_thd == 0 || read_write || trx->ddl)) { |
987 | |
988 | /* Temporary rseg is assigned only if the transaction |
989 | updates a temporary table */ |
990 | trx->rsegs.m_redo.rseg = trx_assign_rseg_low(); |
991 | ut_ad(trx->rsegs.m_redo.rseg != 0 |
992 | || srv_read_only_mode |
993 | || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); |
994 | |
995 | trx_sys.register_rw(trx); |
996 | } else { |
997 | trx->id = 0; |
998 | |
999 | if (!trx_is_autocommit_non_locking(trx)) { |
1000 | |
1001 | /* If this is a read-only transaction that is writing |
1002 | to a temporary table then it needs a transaction id |
1003 | to write to the temporary table. */ |
1004 | |
1005 | if (read_write) { |
1006 | ut_ad(!srv_read_only_mode); |
1007 | trx_sys.register_rw(trx); |
1008 | } |
1009 | } else { |
1010 | ut_ad(!read_write); |
1011 | } |
1012 | } |
1013 | |
1014 | if (trx->mysql_thd != NULL) { |
1015 | trx->start_time = thd_start_time_in_secs(trx->mysql_thd); |
1016 | trx->start_time_micro = thd_query_start_micro(trx->mysql_thd); |
1017 | |
1018 | } else { |
1019 | trx->start_time = ut_time(); |
1020 | trx->start_time_micro = 0; |
1021 | } |
1022 | |
1023 | ut_a(trx->error_state == DB_SUCCESS); |
1024 | |
1025 | MONITOR_INC(MONITOR_TRX_ACTIVE); |
1026 | } |
1027 | |
1028 | /** Set the serialisation number for a persistent committed transaction. |
1029 | @param[in,out] trx committed transaction with persistent changes */ |
1030 | static |
1031 | void |
1032 | trx_serialise(trx_t* trx) |
1033 | { |
1034 | trx_rseg_t *rseg = trx->rsegs.m_redo.rseg; |
1035 | ut_ad(rseg); |
1036 | ut_ad(mutex_own(&rseg->mutex)); |
1037 | |
1038 | if (rseg->last_page_no == FIL_NULL) { |
1039 | mutex_enter(&purge_sys.pq_mutex); |
1040 | } |
1041 | |
1042 | trx_sys.assign_new_trx_no(trx); |
1043 | |
1044 | /* If the rollback segment is not empty then the |
1045 | new trx_t::no can't be less than any trx_t::no |
1046 | already in the rollback segment. User threads only |
1047 | produce events when a rollback segment is empty. */ |
1048 | if (rseg->last_page_no == FIL_NULL) { |
1049 | purge_sys.purge_queue.push(TrxUndoRsegs(trx->no, *rseg)); |
1050 | mutex_exit(&purge_sys.pq_mutex); |
1051 | } |
1052 | } |
1053 | |
1054 | /****************************************************************//** |
1055 | Assign the transaction its history serialisation number and write the |
1056 | update UNDO log record to the assigned rollback segment. */ |
1057 | static |
1058 | void |
1059 | trx_write_serialisation_history( |
1060 | /*============================*/ |
1061 | trx_t* trx, /*!< in/out: transaction */ |
1062 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
1063 | { |
1064 | /* Change the undo log segment states from TRX_UNDO_ACTIVE to some |
1065 | other state: these modifications to the file data structure define |
1066 | the transaction as committed in the file based domain, at the |
1067 | serialization point of the log sequence number lsn obtained below. */ |
1068 | |
1069 | /* We have to hold the rseg mutex because update log headers have |
1070 | to be put to the history list in the (serialisation) order of the |
1071 | UNDO trx number. This is required for the purge in-memory data |
1072 | structures too. */ |
1073 | |
1074 | if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { |
1075 | /* Undo log for temporary tables is discarded at transaction |
1076 | commit. There is no purge for temporary tables, and also no |
1077 | MVCC, because they are private to a session. */ |
1078 | |
1079 | mtr_t temp_mtr; |
1080 | temp_mtr.start(); |
1081 | temp_mtr.set_log_mode(MTR_LOG_NO_REDO); |
1082 | |
1083 | mutex_enter(&trx->rsegs.m_noredo.rseg->mutex); |
1084 | trx_undo_set_state_at_finish(undo, &temp_mtr); |
1085 | mutex_exit(&trx->rsegs.m_noredo.rseg->mutex); |
1086 | temp_mtr.commit(); |
1087 | } |
1088 | |
1089 | trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; |
1090 | if (!rseg) { |
1091 | ut_ad(!trx->rsegs.m_redo.undo); |
1092 | ut_ad(!trx->rsegs.m_redo.old_insert); |
1093 | return; |
1094 | } |
1095 | |
1096 | trx_undo_t*& undo = trx->rsegs.m_redo.undo; |
1097 | trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert; |
1098 | |
1099 | if (!undo && !old_insert) { |
1100 | return; |
1101 | } |
1102 | |
1103 | ut_ad(!trx->read_only); |
1104 | ut_ad(!undo || undo->rseg == rseg); |
1105 | ut_ad(!old_insert || old_insert->rseg == rseg); |
1106 | mutex_enter(&rseg->mutex); |
1107 | |
1108 | /* Assign the transaction serialisation number and add any |
1109 | undo log to the purge queue. */ |
1110 | trx_serialise(trx); |
1111 | |
1112 | if (UNIV_LIKELY_NULL(old_insert)) { |
1113 | UT_LIST_REMOVE(rseg->old_insert_list, old_insert); |
1114 | trx_purge_add_undo_to_history(trx, old_insert, mtr); |
1115 | } |
1116 | if (undo) { |
1117 | UT_LIST_REMOVE(rseg->undo_list, undo); |
1118 | trx_purge_add_undo_to_history(trx, undo, mtr); |
1119 | } |
1120 | |
1121 | mutex_exit(&rseg->mutex); |
1122 | |
1123 | MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); |
1124 | |
1125 | trx->mysql_log_file_name = NULL; |
1126 | } |
1127 | |
1128 | /******************************************************************** |
1129 | Finalize a transaction containing updates for a FTS table. */ |
1130 | static |
1131 | void |
1132 | trx_finalize_for_fts_table( |
1133 | /*=======================*/ |
1134 | fts_trx_table_t* ftt) /* in: FTS trx table */ |
1135 | { |
1136 | fts_t* fts = ftt->table->fts; |
1137 | fts_doc_ids_t* doc_ids = ftt->added_doc_ids; |
1138 | |
1139 | mutex_enter(&fts->bg_threads_mutex); |
1140 | |
1141 | if (fts->fts_status & BG_THREAD_STOP) { |
1142 | /* The table is about to be dropped, no use |
1143 | adding anything to its work queue. */ |
1144 | |
1145 | mutex_exit(&fts->bg_threads_mutex); |
1146 | } else { |
1147 | mem_heap_t* heap; |
1148 | mutex_exit(&fts->bg_threads_mutex); |
1149 | |
1150 | ut_a(fts->add_wq); |
1151 | |
1152 | heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg); |
1153 | |
1154 | ib_wqueue_add(fts->add_wq, doc_ids, heap); |
1155 | |
1156 | /* fts_trx_table_t no longer owns the list. */ |
1157 | ftt->added_doc_ids = NULL; |
1158 | } |
1159 | } |
1160 | |
1161 | /******************************************************************//** |
1162 | Finalize a transaction containing updates to FTS tables. */ |
1163 | static |
1164 | void |
1165 | trx_finalize_for_fts( |
1166 | /*=================*/ |
1167 | trx_t* trx, /*!< in/out: transaction */ |
1168 | bool is_commit) /*!< in: true if the transaction was |
1169 | committed, false if it was rolled back. */ |
1170 | { |
1171 | if (is_commit) { |
1172 | const ib_rbt_node_t* node; |
1173 | ib_rbt_t* tables; |
1174 | fts_savepoint_t* savepoint; |
1175 | |
1176 | savepoint = static_cast<fts_savepoint_t*>( |
1177 | ib_vector_last(trx->fts_trx->savepoints)); |
1178 | |
1179 | tables = savepoint->tables; |
1180 | |
1181 | for (node = rbt_first(tables); |
1182 | node; |
1183 | node = rbt_next(tables, node)) { |
1184 | fts_trx_table_t** ftt; |
1185 | |
1186 | ftt = rbt_value(fts_trx_table_t*, node); |
1187 | |
1188 | if ((*ftt)->added_doc_ids) { |
1189 | trx_finalize_for_fts_table(*ftt); |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | fts_trx_free(trx->fts_trx); |
1195 | trx->fts_trx = NULL; |
1196 | } |
1197 | |
1198 | /**********************************************************************//** |
1199 | If required, flushes the log to disk based on the value of |
1200 | innodb_flush_log_at_trx_commit. */ |
1201 | static |
1202 | void |
1203 | trx_flush_log_if_needed_low( |
1204 | /*========================*/ |
1205 | lsn_t lsn) /*!< in: lsn up to which logs are to be |
1206 | flushed. */ |
1207 | { |
1208 | bool flush = srv_file_flush_method != SRV_NOSYNC; |
1209 | |
1210 | switch (srv_flush_log_at_trx_commit) { |
1211 | case 3: |
1212 | case 2: |
1213 | /* Write the log but do not flush it to disk */ |
1214 | flush = false; |
1215 | /* fall through */ |
1216 | case 1: |
1217 | /* Write the log and optionally flush it to disk */ |
1218 | log_write_up_to(lsn, flush); |
1219 | return; |
1220 | case 0: |
1221 | /* Do nothing */ |
1222 | return; |
1223 | } |
1224 | |
1225 | ut_error; |
1226 | } |
1227 | |
1228 | /**********************************************************************//** |
1229 | If required, flushes the log to disk based on the value of |
1230 | innodb_flush_log_at_trx_commit. */ |
1231 | static |
1232 | void |
1233 | trx_flush_log_if_needed( |
1234 | /*====================*/ |
1235 | lsn_t lsn, /*!< in: lsn up to which logs are to be |
1236 | flushed. */ |
1237 | trx_t* trx) /*!< in/out: transaction */ |
1238 | { |
1239 | trx->op_info = "flushing log" ; |
1240 | trx_flush_log_if_needed_low(lsn); |
1241 | trx->op_info = "" ; |
1242 | } |
1243 | |
1244 | /**********************************************************************//** |
1245 | For each table that has been modified by the given transaction: update |
1246 | its dict_table_t::update_time with the current timestamp. Clear the list |
1247 | of the modified tables at the end. */ |
1248 | static |
1249 | void |
1250 | trx_update_mod_tables_timestamp( |
1251 | /*============================*/ |
1252 | trx_t* trx) /*!< in: transaction */ |
1253 | { |
1254 | |
1255 | ut_ad(trx->id != 0); |
1256 | |
1257 | /* consider using trx->start_time if calling time() is too |
1258 | expensive here */ |
1259 | time_t now = ut_time(); |
1260 | |
1261 | trx_mod_tables_t::const_iterator end = trx->mod_tables.end(); |
1262 | |
1263 | for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin(); |
1264 | it != end; |
1265 | ++it) { |
1266 | |
1267 | /* This could be executed by multiple threads concurrently |
1268 | on the same table object. This is fine because time_t is |
1269 | word size or less. And _purely_ _theoretically_, even if |
1270 | time_t write is not atomic, likely the value of 'now' is |
1271 | the same in all threads and even if it is not, getting a |
1272 | "garbage" in table->update_time is justified because |
1273 | protecting it with a latch here would be too performance |
1274 | intrusive. */ |
1275 | it->first->update_time = now; |
1276 | } |
1277 | |
1278 | trx->mod_tables.clear(); |
1279 | } |
1280 | |
1281 | /****************************************************************//** |
1282 | Commits a transaction in memory. */ |
1283 | static |
1284 | void |
1285 | trx_commit_in_memory( |
1286 | /*=================*/ |
1287 | trx_t* trx, /*!< in/out: transaction */ |
1288 | const mtr_t* mtr) /*!< in: mini-transaction of |
1289 | trx_write_serialisation_history(), or NULL if |
1290 | the transaction did not modify anything */ |
1291 | { |
1292 | trx->must_flush_log_later = false; |
1293 | trx->read_view.close(); |
1294 | |
1295 | if (trx_is_autocommit_non_locking(trx)) { |
1296 | ut_ad(trx->id == 0); |
1297 | ut_ad(trx->read_only); |
1298 | ut_a(!trx->is_recovered); |
1299 | ut_ad(trx->rsegs.m_redo.rseg == NULL); |
1300 | |
1301 | /* Note: We are asserting without holding the lock mutex. But |
1302 | that is OK because this transaction is not waiting and cannot |
1303 | be rolled back and no new locks can (or should not) be added |
1304 | becuase it is flagged as a non-locking read-only transaction. */ |
1305 | |
1306 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
1307 | |
1308 | /* This state change is not protected by any mutex, therefore |
1309 | there is an inherent race here around state transition during |
1310 | printouts. We ignore this race for the sake of efficiency. |
1311 | However, the trx_sys_t::mutex will protect the trx_t instance |
1312 | and it cannot be removed from the trx_list and freed |
1313 | without first acquiring the trx_sys_t::mutex. */ |
1314 | |
1315 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); |
1316 | |
1317 | MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT); |
1318 | |
1319 | DBUG_LOG("trx" , "Autocommit in memory: " << trx); |
1320 | trx->state = TRX_STATE_NOT_STARTED; |
1321 | } else { |
1322 | if (trx->id > 0) { |
1323 | /* For consistent snapshot, we need to remove current |
1324 | transaction from rw_trx_hash before doing commit and |
1325 | releasing locks. */ |
1326 | trx_sys.deregister_rw(trx); |
1327 | } |
1328 | |
1329 | lock_trx_release_locks(trx); |
1330 | |
1331 | /* Remove the transaction from the list of active |
1332 | transactions now that it no longer holds any user locks. */ |
1333 | |
1334 | ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); |
1335 | DEBUG_SYNC_C("after_trx_committed_in_memory" ); |
1336 | |
1337 | if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) { |
1338 | MONITOR_INC(MONITOR_TRX_RO_COMMIT); |
1339 | } else { |
1340 | trx_update_mod_tables_timestamp(trx); |
1341 | MONITOR_INC(MONITOR_TRX_RW_COMMIT); |
1342 | } |
1343 | } |
1344 | |
1345 | ut_ad(!trx->rsegs.m_redo.undo); |
1346 | |
1347 | if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { |
1348 | mutex_enter(&rseg->mutex); |
1349 | ut_ad(rseg->trx_ref_count > 0); |
1350 | --rseg->trx_ref_count; |
1351 | mutex_exit(&rseg->mutex); |
1352 | |
1353 | if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) { |
1354 | ut_ad(insert->rseg == rseg); |
1355 | trx_undo_commit_cleanup(insert, false); |
1356 | insert = NULL; |
1357 | } |
1358 | } |
1359 | |
1360 | ut_ad(!trx->rsegs.m_redo.old_insert); |
1361 | |
1362 | if (mtr != NULL) { |
1363 | if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) { |
1364 | ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); |
1365 | trx_undo_commit_cleanup(undo, true); |
1366 | undo = NULL; |
1367 | } |
1368 | |
1369 | /* NOTE that we could possibly make a group commit more |
1370 | efficient here: call os_thread_yield here to allow also other |
1371 | trxs to come to commit! */ |
1372 | |
1373 | /*-------------------------------------*/ |
1374 | |
1375 | /* Depending on the my.cnf options, we may now write the log |
1376 | buffer to the log files, making the transaction durable if |
1377 | the OS does not crash. We may also flush the log files to |
1378 | disk, making the transaction durable also at an OS crash or a |
1379 | power outage. |
1380 | |
1381 | The idea in InnoDB's group commit is that a group of |
1382 | transactions gather behind a trx doing a physical disk write |
1383 | to log files, and when that physical write has been completed, |
1384 | one of those transactions does a write which commits the whole |
1385 | group. Note that this group commit will only bring benefit if |
1386 | there are > 2 users in the database. Then at least 2 users can |
1387 | gather behind one doing the physical log write to disk. |
1388 | |
1389 | If we are calling trx_commit() under prepare_commit_mutex, we |
1390 | will delay possible log write and flush to a separate function |
1391 | trx_commit_complete_for_mysql(), which is only called when the |
1392 | thread has released the mutex. This is to make the |
1393 | group commit algorithm to work. Otherwise, the prepare_commit |
1394 | mutex would serialize all commits and prevent a group of |
1395 | transactions from gathering. */ |
1396 | |
1397 | lsn_t lsn = mtr->commit_lsn(); |
1398 | |
1399 | if (lsn == 0) { |
1400 | /* Nothing to be done. */ |
1401 | } else if (trx->flush_log_later) { |
1402 | /* Do nothing yet */ |
1403 | trx->must_flush_log_later = true; |
1404 | } else if (srv_flush_log_at_trx_commit == 0) { |
1405 | /* Do nothing */ |
1406 | } else { |
1407 | trx_flush_log_if_needed(lsn, trx); |
1408 | } |
1409 | |
1410 | trx->commit_lsn = lsn; |
1411 | |
1412 | /* Tell server some activity has happened, since the trx |
1413 | does changes something. Background utility threads like |
1414 | master thread, purge thread or page_cleaner thread might |
1415 | have some work to do. */ |
1416 | srv_active_wake_master_thread(); |
1417 | } |
1418 | |
1419 | ut_ad(!trx->rsegs.m_noredo.undo); |
1420 | |
1421 | /* Free all savepoints, starting from the first. */ |
1422 | trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints); |
1423 | |
1424 | trx_roll_savepoints_free(trx, savep); |
1425 | |
1426 | if (trx->fts_trx != NULL) { |
1427 | trx_finalize_for_fts(trx, trx->undo_no != 0); |
1428 | } |
1429 | |
1430 | trx_mutex_enter(trx); |
1431 | trx->dict_operation = TRX_DICT_OP_NONE; |
1432 | |
1433 | #ifdef WITH_WSREP |
1434 | if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) { |
1435 | trx->lock.was_chosen_as_deadlock_victim = FALSE; |
1436 | } |
1437 | #endif |
1438 | |
1439 | DBUG_LOG("trx" , "Commit in memory: " << trx); |
1440 | trx->state = TRX_STATE_NOT_STARTED; |
1441 | |
1442 | assert_trx_is_free(trx); |
1443 | |
1444 | trx_init(trx); |
1445 | |
1446 | trx_mutex_exit(trx); |
1447 | |
1448 | ut_a(trx->error_state == DB_SUCCESS); |
1449 | srv_wake_purge_thread_if_not_active(); |
1450 | } |
1451 | |
1452 | /** Commit a transaction and a mini-transaction. |
1453 | @param[in,out] trx transaction |
1454 | @param[in,out] mtr mini-transaction (NULL if no modifications) */ |
1455 | void trx_commit_low(trx_t* trx, mtr_t* mtr) |
1456 | { |
1457 | assert_trx_nonlocking_or_in_list(trx); |
1458 | ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); |
1459 | ut_ad(!mtr || mtr->is_active()); |
1460 | ut_d(bool aborted = trx->in_rollback |
1461 | && trx->error_state == DB_DEADLOCK); |
1462 | ut_ad(!mtr == (aborted || !trx->has_logged_or_recovered())); |
1463 | ut_ad(!mtr || !aborted); |
1464 | |
1465 | /* undo_no is non-zero if we're doing the final commit. */ |
1466 | if (trx->fts_trx != NULL && trx->undo_no != 0) { |
1467 | dberr_t error; |
1468 | |
1469 | ut_a(!trx_is_autocommit_non_locking(trx)); |
1470 | |
1471 | error = fts_commit(trx); |
1472 | |
1473 | /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY |
1474 | instead of dying. This is a possible scenario if there |
1475 | is a crash between insert to DELETED table committing |
1476 | and transaction committing. The fix would be able to |
1477 | return error from this function */ |
1478 | if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) { |
1479 | /* FTS-FIXME: once we can return values from this |
1480 | function, we should do so and signal an error |
1481 | instead of just dying. */ |
1482 | |
1483 | ut_error; |
1484 | } |
1485 | } |
1486 | |
1487 | if (mtr != NULL) { |
1488 | |
1489 | mtr->set_sync(); |
1490 | |
1491 | trx_write_serialisation_history(trx, mtr); |
1492 | |
1493 | /* The following call commits the mini-transaction, making the |
1494 | whole transaction committed in the file-based world, at this |
1495 | log sequence number. The transaction becomes 'durable' when |
1496 | we write the log to disk, but in the logical sense the commit |
1497 | in the file-based data structures (undo logs etc.) happens |
1498 | here. |
1499 | |
1500 | NOTE that transaction numbers, which are assigned only to |
1501 | transactions with an update undo log, do not necessarily come |
1502 | in exactly the same order as commit lsn's, if the transactions |
1503 | have different rollback segments. To get exactly the same |
1504 | order we should hold the kernel mutex up to this point, |
1505 | adding to the contention of the kernel mutex. However, if |
1506 | a transaction T2 is able to see modifications made by |
1507 | a transaction T1, T2 will always get a bigger transaction |
1508 | number and a bigger commit lsn than T1. */ |
1509 | |
1510 | /*--------------*/ |
1511 | mtr_commit(mtr); |
1512 | |
1513 | DBUG_EXECUTE_IF("ib_crash_during_trx_commit_in_mem" , |
1514 | if (trx->has_logged()) { |
1515 | log_make_checkpoint_at(LSN_MAX, TRUE); |
1516 | DBUG_SUICIDE(); |
1517 | }); |
1518 | /*--------------*/ |
1519 | } |
1520 | #ifndef DBUG_OFF |
1521 | /* In case of this function is called from a stack executing |
1522 | THD::release_resources -> ... |
1523 | innobase_connection_close() -> |
1524 | trx_rollback_for_mysql... -> . |
1525 | mysql's thd does not seem to have |
1526 | thd->debug_sync_control defined any longer. However the stack |
1527 | is possible only with a prepared trx not updating any data. |
1528 | */ |
1529 | if (trx->mysql_thd != NULL && trx->has_logged_persistent()) { |
1530 | DEBUG_SYNC_C("before_trx_state_committed_in_memory" ); |
1531 | } |
1532 | #endif |
1533 | |
1534 | trx_commit_in_memory(trx, mtr); |
1535 | } |
1536 | |
1537 | /****************************************************************//** |
1538 | Commits a transaction. */ |
1539 | void |
1540 | trx_commit( |
1541 | /*=======*/ |
1542 | trx_t* trx) /*!< in/out: transaction */ |
1543 | { |
1544 | mtr_t* mtr; |
1545 | mtr_t local_mtr; |
1546 | |
1547 | DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start" , |
1548 | DBUG_SUICIDE();); |
1549 | |
1550 | if (trx->has_logged_or_recovered()) { |
1551 | mtr = &local_mtr; |
1552 | mtr_start_sync(mtr); |
1553 | } else { |
1554 | |
1555 | mtr = NULL; |
1556 | } |
1557 | |
1558 | trx_commit_low(trx, mtr); |
1559 | } |
1560 | |
1561 | /****************************************************************//** |
1562 | Prepares a transaction for commit/rollback. */ |
1563 | void |
1564 | trx_commit_or_rollback_prepare( |
1565 | /*===========================*/ |
1566 | trx_t* trx) /*!< in/out: transaction */ |
1567 | { |
1568 | /* We are reading trx->state without holding trx_sys.mutex |
1569 | here, because the commit or rollback should be invoked for a |
1570 | running (or recovered prepared) transaction that is associated |
1571 | with the current thread. */ |
1572 | |
1573 | switch (trx->state) { |
1574 | case TRX_STATE_NOT_STARTED: |
1575 | trx_start_low(trx, true); |
1576 | /* fall through */ |
1577 | |
1578 | case TRX_STATE_ACTIVE: |
1579 | case TRX_STATE_PREPARED: |
1580 | |
1581 | /* If the trx is in a lock wait state, moves the waiting |
1582 | query thread to the suspended state */ |
1583 | |
1584 | if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { |
1585 | |
1586 | ut_a(trx->lock.wait_thr != NULL); |
1587 | trx->lock.wait_thr->state = QUE_THR_SUSPENDED; |
1588 | trx->lock.wait_thr = NULL; |
1589 | |
1590 | trx->lock.que_state = TRX_QUE_RUNNING; |
1591 | } |
1592 | |
1593 | ut_a(trx->lock.n_active_thrs == 1); |
1594 | return; |
1595 | |
1596 | case TRX_STATE_COMMITTED_IN_MEMORY: |
1597 | break; |
1598 | } |
1599 | |
1600 | ut_error; |
1601 | } |
1602 | |
1603 | /*********************************************************************//** |
1604 | Creates a commit command node struct. |
1605 | @return own: commit node struct */ |
1606 | commit_node_t* |
1607 | trx_commit_node_create( |
1608 | /*===================*/ |
1609 | mem_heap_t* heap) /*!< in: mem heap where created */ |
1610 | { |
1611 | commit_node_t* node; |
1612 | |
1613 | node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node))); |
1614 | node->common.type = QUE_NODE_COMMIT; |
1615 | node->state = COMMIT_NODE_SEND; |
1616 | |
1617 | return(node); |
1618 | } |
1619 | |
1620 | /***********************************************************//** |
1621 | Performs an execution step for a commit type node in a query graph. |
1622 | @return query thread to run next, or NULL */ |
1623 | que_thr_t* |
1624 | trx_commit_step( |
1625 | /*============*/ |
1626 | que_thr_t* thr) /*!< in: query thread */ |
1627 | { |
1628 | commit_node_t* node; |
1629 | |
1630 | node = static_cast<commit_node_t*>(thr->run_node); |
1631 | |
1632 | ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); |
1633 | |
1634 | if (thr->prev_node == que_node_get_parent(node)) { |
1635 | node->state = COMMIT_NODE_SEND; |
1636 | } |
1637 | |
1638 | if (node->state == COMMIT_NODE_SEND) { |
1639 | trx_t* trx; |
1640 | |
1641 | node->state = COMMIT_NODE_WAIT; |
1642 | |
1643 | trx = thr_get_trx(thr); |
1644 | |
1645 | ut_a(trx->lock.wait_thr == NULL); |
1646 | ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT); |
1647 | |
1648 | trx_commit_or_rollback_prepare(trx); |
1649 | |
1650 | trx->lock.que_state = TRX_QUE_COMMITTING; |
1651 | |
1652 | trx_commit(trx); |
1653 | |
1654 | ut_ad(trx->lock.wait_thr == NULL); |
1655 | |
1656 | trx->lock.que_state = TRX_QUE_RUNNING; |
1657 | |
1658 | thr = NULL; |
1659 | } else { |
1660 | ut_ad(node->state == COMMIT_NODE_WAIT); |
1661 | |
1662 | node->state = COMMIT_NODE_SEND; |
1663 | |
1664 | thr->run_node = que_node_get_parent(node); |
1665 | } |
1666 | |
1667 | return(thr); |
1668 | } |
1669 | |
1670 | /**********************************************************************//** |
1671 | Does the transaction commit for MySQL. |
1672 | @return DB_SUCCESS or error number */ |
1673 | dberr_t |
1674 | trx_commit_for_mysql( |
1675 | /*=================*/ |
1676 | trx_t* trx) /*!< in/out: transaction */ |
1677 | { |
1678 | /* Because we do not do the commit by sending an Innobase |
1679 | sig to the transaction, we must here make sure that trx has been |
1680 | started. */ |
1681 | |
1682 | switch (trx->state) { |
1683 | case TRX_STATE_NOT_STARTED: |
1684 | ut_d(trx->start_file = __FILE__); |
1685 | ut_d(trx->start_line = __LINE__); |
1686 | |
1687 | trx_start_low(trx, true); |
1688 | /* fall through */ |
1689 | case TRX_STATE_ACTIVE: |
1690 | case TRX_STATE_PREPARED: |
1691 | |
1692 | trx->op_info = "committing" ; |
1693 | |
1694 | trx_commit(trx); |
1695 | |
1696 | MONITOR_DEC(MONITOR_TRX_ACTIVE); |
1697 | trx->op_info = "" ; |
1698 | return(DB_SUCCESS); |
1699 | case TRX_STATE_COMMITTED_IN_MEMORY: |
1700 | break; |
1701 | } |
1702 | ut_error; |
1703 | return(DB_CORRUPTION); |
1704 | } |
1705 | |
1706 | /**********************************************************************//** |
1707 | If required, flushes the log to disk if we called trx_commit_for_mysql() |
1708 | with trx->flush_log_later == TRUE. */ |
1709 | void |
1710 | trx_commit_complete_for_mysql( |
1711 | /*==========================*/ |
1712 | trx_t* trx) /*!< in/out: transaction */ |
1713 | { |
1714 | if (trx->id != 0 |
1715 | || !trx->must_flush_log_later |
1716 | || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) { |
1717 | |
1718 | return; |
1719 | } |
1720 | |
1721 | trx_flush_log_if_needed(trx->commit_lsn, trx); |
1722 | |
1723 | trx->must_flush_log_later = false; |
1724 | } |
1725 | |
1726 | /**********************************************************************//** |
1727 | Marks the latest SQL statement ended. */ |
1728 | void |
1729 | trx_mark_sql_stat_end( |
1730 | /*==================*/ |
1731 | trx_t* trx) /*!< in: trx handle */ |
1732 | { |
1733 | ut_a(trx); |
1734 | |
1735 | switch (trx->state) { |
1736 | case TRX_STATE_PREPARED: |
1737 | case TRX_STATE_COMMITTED_IN_MEMORY: |
1738 | break; |
1739 | case TRX_STATE_NOT_STARTED: |
1740 | trx->undo_no = 0; |
1741 | /* fall through */ |
1742 | case TRX_STATE_ACTIVE: |
1743 | trx->last_sql_stat_start.least_undo_no = trx->undo_no; |
1744 | |
1745 | if (trx->fts_trx != NULL) { |
1746 | fts_savepoint_laststmt_refresh(trx); |
1747 | } |
1748 | |
1749 | return; |
1750 | } |
1751 | |
1752 | ut_error; |
1753 | } |
1754 | |
1755 | /**********************************************************************//** |
1756 | Prints info about a transaction. */ |
1757 | void |
1758 | trx_print_low( |
1759 | /*==========*/ |
1760 | FILE* f, |
1761 | /*!< in: output stream */ |
1762 | const trx_t* trx, |
1763 | /*!< in: transaction */ |
1764 | ulint max_query_len, |
1765 | /*!< in: max query length to print, |
1766 | or 0 to use the default max length */ |
1767 | ulint n_rec_locks, |
1768 | /*!< in: lock_number_of_rows_locked(&trx->lock) */ |
1769 | ulint n_trx_locks, |
1770 | /*!< in: length of trx->lock.trx_locks */ |
1771 | ulint heap_size) |
1772 | /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ |
1773 | { |
1774 | ibool newline; |
1775 | const char* op_info; |
1776 | |
1777 | fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx)); |
1778 | |
1779 | /* trx->state cannot change from or to NOT_STARTED while we |
1780 | are holding the trx_sys.mutex. It may change from ACTIVE to |
1781 | PREPARED or COMMITTED. */ |
1782 | switch (trx->state) { |
1783 | case TRX_STATE_NOT_STARTED: |
1784 | fputs(", not started" , f); |
1785 | goto state_ok; |
1786 | case TRX_STATE_ACTIVE: |
1787 | fprintf(f, ", ACTIVE %lu sec" , |
1788 | (ulong) difftime(time(NULL), trx->start_time)); |
1789 | goto state_ok; |
1790 | case TRX_STATE_PREPARED: |
1791 | fprintf(f, ", ACTIVE (PREPARED) %lu sec" , |
1792 | (ulong) difftime(time(NULL), trx->start_time)); |
1793 | goto state_ok; |
1794 | case TRX_STATE_COMMITTED_IN_MEMORY: |
1795 | fputs(", COMMITTED IN MEMORY" , f); |
1796 | goto state_ok; |
1797 | } |
1798 | fprintf(f, ", state %lu" , (ulong) trx->state); |
1799 | ut_ad(0); |
1800 | state_ok: |
1801 | |
1802 | /* prevent a race condition */ |
1803 | op_info = trx->op_info; |
1804 | |
1805 | if (*op_info) { |
1806 | putc(' ', f); |
1807 | fputs(op_info, f); |
1808 | } |
1809 | |
1810 | if (trx->is_recovered) { |
1811 | fputs(" recovered trx" , f); |
1812 | } |
1813 | |
1814 | if (trx->declared_to_be_inside_innodb) { |
1815 | fprintf(f, ", thread declared inside InnoDB %lu" , |
1816 | (ulong) trx->n_tickets_to_enter_innodb); |
1817 | } |
1818 | |
1819 | putc('\n', f); |
1820 | |
1821 | if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { |
1822 | fprintf(f, "mysql tables in use %lu, locked %lu\n" , |
1823 | (ulong) trx->n_mysql_tables_in_use, |
1824 | (ulong) trx->mysql_n_tables_locked); |
1825 | } |
1826 | |
1827 | newline = TRUE; |
1828 | |
1829 | /* trx->lock.que_state of an ACTIVE transaction may change |
1830 | while we are not holding trx->mutex. We perform a dirty read |
1831 | for performance reasons. */ |
1832 | |
1833 | switch (trx->lock.que_state) { |
1834 | case TRX_QUE_RUNNING: |
1835 | newline = FALSE; break; |
1836 | case TRX_QUE_LOCK_WAIT: |
1837 | fputs("LOCK WAIT " , f); break; |
1838 | case TRX_QUE_ROLLING_BACK: |
1839 | fputs("ROLLING BACK " , f); break; |
1840 | case TRX_QUE_COMMITTING: |
1841 | fputs("COMMITTING " , f); break; |
1842 | default: |
1843 | fprintf(f, "que state %lu " , (ulong) trx->lock.que_state); |
1844 | } |
1845 | |
1846 | if (n_trx_locks > 0 || heap_size > 400) { |
1847 | newline = TRUE; |
1848 | |
1849 | fprintf(f, "%lu lock struct(s), heap size %lu," |
1850 | " %lu row lock(s)" , |
1851 | (ulong) n_trx_locks, |
1852 | (ulong) heap_size, |
1853 | (ulong) n_rec_locks); |
1854 | } |
1855 | |
1856 | if (trx->undo_no != 0) { |
1857 | newline = TRUE; |
1858 | fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no); |
1859 | } |
1860 | |
1861 | if (newline) { |
1862 | putc('\n', f); |
1863 | } |
1864 | |
1865 | if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL) { |
1866 | innobase_mysql_print_thd( |
1867 | f, trx->mysql_thd, static_cast<uint>(max_query_len)); |
1868 | } |
1869 | } |
1870 | |
1871 | /**********************************************************************//** |
1872 | Prints info about a transaction. |
1873 | The caller must hold lock_sys.mutex. |
1874 | When possible, use trx_print() instead. */ |
1875 | void |
1876 | trx_print_latched( |
1877 | /*==============*/ |
1878 | FILE* f, /*!< in: output stream */ |
1879 | const trx_t* trx, /*!< in: transaction */ |
1880 | ulint max_query_len) /*!< in: max query length to print, |
1881 | or 0 to use the default max length */ |
1882 | { |
1883 | ut_ad(lock_mutex_own()); |
1884 | |
1885 | trx_print_low(f, trx, max_query_len, |
1886 | lock_number_of_rows_locked(&trx->lock), |
1887 | UT_LIST_GET_LEN(trx->lock.trx_locks), |
1888 | mem_heap_get_size(trx->lock.lock_heap)); |
1889 | } |
1890 | |
1891 | /**********************************************************************//** |
1892 | Prints info about a transaction. |
1893 | Acquires and releases lock_sys.mutex. */ |
1894 | void |
1895 | trx_print( |
1896 | /*======*/ |
1897 | FILE* f, /*!< in: output stream */ |
1898 | const trx_t* trx, /*!< in: transaction */ |
1899 | ulint max_query_len) /*!< in: max query length to print, |
1900 | or 0 to use the default max length */ |
1901 | { |
1902 | ulint n_rec_locks; |
1903 | ulint n_trx_locks; |
1904 | ulint heap_size; |
1905 | |
1906 | lock_mutex_enter(); |
1907 | n_rec_locks = lock_number_of_rows_locked(&trx->lock); |
1908 | n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); |
1909 | heap_size = mem_heap_get_size(trx->lock.lock_heap); |
1910 | lock_mutex_exit(); |
1911 | |
1912 | trx_print_low(f, trx, max_query_len, |
1913 | n_rec_locks, n_trx_locks, heap_size); |
1914 | } |
1915 | |
1916 | /*******************************************************************//** |
1917 | Compares the "weight" (or size) of two transactions. Transactions that |
1918 | have edited non-transactional tables are considered heavier than ones |
1919 | that have not. |
1920 | @return TRUE if weight(a) >= weight(b) */ |
1921 | bool |
1922 | trx_weight_ge( |
1923 | /*==========*/ |
1924 | const trx_t* a, /*!< in: transaction to be compared */ |
1925 | const trx_t* b) /*!< in: transaction to be compared */ |
1926 | { |
1927 | ibool a_notrans_edit; |
1928 | ibool b_notrans_edit; |
1929 | |
1930 | /* If mysql_thd is NULL for a transaction we assume that it has |
1931 | not edited non-transactional tables. */ |
1932 | |
1933 | a_notrans_edit = a->mysql_thd != NULL |
1934 | && thd_has_edited_nontrans_tables(a->mysql_thd); |
1935 | |
1936 | b_notrans_edit = b->mysql_thd != NULL |
1937 | && thd_has_edited_nontrans_tables(b->mysql_thd); |
1938 | |
1939 | if (a_notrans_edit != b_notrans_edit) { |
1940 | |
1941 | return(a_notrans_edit); |
1942 | } |
1943 | |
1944 | /* Either both had edited non-transactional tables or both had |
1945 | not, we fall back to comparing the number of altered/locked |
1946 | rows. */ |
1947 | |
1948 | return(TRX_WEIGHT(a) >= TRX_WEIGHT(b)); |
1949 | } |
1950 | |
1951 | /** Prepare a transaction. |
1952 | @return log sequence number that makes the XA PREPARE durable |
1953 | @retval 0 if no changes needed to be made durable */ |
1954 | static |
1955 | lsn_t |
1956 | trx_prepare_low(trx_t* trx) |
1957 | { |
1958 | ut_ad(!trx->rsegs.m_redo.old_insert); |
1959 | ut_ad(!trx->is_recovered); |
1960 | |
1961 | mtr_t mtr; |
1962 | |
1963 | if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { |
1964 | ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); |
1965 | |
1966 | mtr.start(); |
1967 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
1968 | |
1969 | mutex_enter(&undo->rseg->mutex); |
1970 | trx_undo_set_state_at_prepare(trx, undo, false, &mtr); |
1971 | mutex_exit(&undo->rseg->mutex); |
1972 | |
1973 | mtr.commit(); |
1974 | } |
1975 | |
1976 | trx_undo_t* undo = trx->rsegs.m_redo.undo; |
1977 | |
1978 | if (!undo) { |
1979 | /* There were no changes to persistent tables. */ |
1980 | return(0); |
1981 | } |
1982 | |
1983 | trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; |
1984 | ut_ad(undo->rseg == rseg); |
1985 | |
1986 | mtr.start(true); |
1987 | |
1988 | /* Change the undo log segment states from TRX_UNDO_ACTIVE to |
1989 | TRX_UNDO_PREPARED: these modifications to the file data |
1990 | structure define the transaction as prepared in the file-based |
1991 | world, at the serialization point of lsn. */ |
1992 | |
1993 | mutex_enter(&rseg->mutex); |
1994 | trx_undo_set_state_at_prepare(trx, undo, false, &mtr); |
1995 | mutex_exit(&rseg->mutex); |
1996 | |
1997 | /* Make the XA PREPARE durable. */ |
1998 | mtr.commit(); |
1999 | ut_ad(mtr.commit_lsn() > 0); |
2000 | return(mtr.commit_lsn()); |
2001 | } |
2002 | |
2003 | /****************************************************************//** |
2004 | Prepares a transaction. */ |
2005 | static |
2006 | void |
2007 | trx_prepare( |
2008 | /*========*/ |
2009 | trx_t* trx) /*!< in/out: transaction */ |
2010 | { |
2011 | /* Only fresh user transactions can be prepared. |
2012 | Recovered transactions cannot. */ |
2013 | ut_a(!trx->is_recovered); |
2014 | |
2015 | lsn_t lsn = trx_prepare_low(trx); |
2016 | |
2017 | DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step" , DBUG_SUICIDE();); |
2018 | |
2019 | ut_a(trx->state == TRX_STATE_ACTIVE); |
2020 | trx_mutex_enter(trx); |
2021 | trx->state = TRX_STATE_PREPARED; |
2022 | trx_mutex_exit(trx); |
2023 | |
2024 | if (lsn) { |
2025 | /* Depending on the my.cnf options, we may now write the log |
2026 | buffer to the log files, making the prepared state of the |
2027 | transaction durable if the OS does not crash. We may also |
2028 | flush the log files to disk, making the prepared state of the |
2029 | transaction durable also at an OS crash or a power outage. |
2030 | |
2031 | The idea in InnoDB's group prepare is that a group of |
2032 | transactions gather behind a trx doing a physical disk write |
2033 | to log files, and when that physical write has been completed, |
2034 | one of those transactions does a write which prepares the whole |
2035 | group. Note that this group prepare will only bring benefit if |
2036 | there are > 2 users in the database. Then at least 2 users can |
2037 | gather behind one doing the physical log write to disk. |
2038 | |
2039 | We must not be holding any mutexes or latches here. */ |
2040 | |
2041 | trx_flush_log_if_needed(lsn, trx); |
2042 | } |
2043 | } |
2044 | |
2045 | /** XA PREPARE a transaction. |
2046 | @param[in,out] trx transaction to prepare */ |
2047 | void trx_prepare_for_mysql(trx_t* trx) |
2048 | { |
2049 | trx_start_if_not_started_xa(trx, false); |
2050 | |
2051 | trx->op_info = "preparing" ; |
2052 | |
2053 | trx_prepare(trx); |
2054 | |
2055 | trx->op_info = "" ; |
2056 | } |
2057 | |
2058 | |
2059 | struct trx_recover_for_mysql_callback_arg |
2060 | { |
2061 | XID *xid_list; |
2062 | uint len; |
2063 | uint count; |
2064 | }; |
2065 | |
2066 | |
2067 | static my_bool trx_recover_for_mysql_callback(rw_trx_hash_element_t *element, |
2068 | trx_recover_for_mysql_callback_arg *arg) |
2069 | { |
2070 | mutex_enter(&element->mutex); |
2071 | if (trx_t *trx= element->trx) |
2072 | { |
2073 | /* |
2074 | The state of a read-write transaction can only change from ACTIVE to |
2075 | PREPARED while we are holding the element->mutex. But since it is |
2076 | executed at startup no state change should occur. |
2077 | */ |
2078 | if (trx_state_eq(trx, TRX_STATE_PREPARED)) |
2079 | { |
2080 | ut_ad(trx->is_recovered); |
2081 | if (arg->count == 0) |
2082 | ib::info() << "Starting recovery for XA transactions..." ; |
2083 | ib::info() << "Transaction " << trx_get_id_for_print(trx) |
2084 | << " in prepared state after recovery" ; |
2085 | ib::info() << "Transaction contains changes to " << trx->undo_no |
2086 | << " rows" ; |
2087 | arg->xid_list[arg->count++]= *trx->xid; |
2088 | } |
2089 | } |
2090 | mutex_exit(&element->mutex); |
2091 | return arg->count == arg->len; |
2092 | } |
2093 | |
2094 | |
2095 | /** |
2096 | Find prepared transaction objects for recovery. |
2097 | |
2098 | @param[out] xid_list prepared transactions |
2099 | @param[in] len number of slots in xid_list |
2100 | |
2101 | @return number of prepared transactions stored in xid_list |
2102 | */ |
2103 | |
2104 | int trx_recover_for_mysql(XID *xid_list, uint len) |
2105 | { |
2106 | trx_recover_for_mysql_callback_arg arg= { xid_list, len, 0 }; |
2107 | |
2108 | ut_ad(xid_list); |
2109 | ut_ad(len); |
2110 | |
2111 | /* Fill xid_list with PREPARED transactions. */ |
2112 | trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action> |
2113 | (trx_recover_for_mysql_callback), &arg); |
2114 | if (arg.count) |
2115 | ib::info() << arg.count |
2116 | << " transactions in prepared state after recovery" ; |
2117 | return int(arg.count); |
2118 | } |
2119 | |
2120 | |
2121 | struct trx_get_trx_by_xid_callback_arg |
2122 | { |
2123 | XID *xid; |
2124 | trx_t *trx; |
2125 | }; |
2126 | |
2127 | |
2128 | static my_bool trx_get_trx_by_xid_callback(rw_trx_hash_element_t *element, |
2129 | trx_get_trx_by_xid_callback_arg *arg) |
2130 | { |
2131 | my_bool found= 0; |
2132 | mutex_enter(&element->mutex); |
2133 | if (trx_t *trx= element->trx) |
2134 | { |
2135 | if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) && |
2136 | arg->xid->eq(reinterpret_cast<XID*>(trx->xid))) |
2137 | { |
2138 | /* Invalidate the XID, so that subsequent calls will not find it. */ |
2139 | trx->xid->null(); |
2140 | arg->trx= trx; |
2141 | found= 1; |
2142 | } |
2143 | } |
2144 | mutex_exit(&element->mutex); |
2145 | return found; |
2146 | } |
2147 | |
2148 | |
2149 | /** |
2150 | Finds PREPARED XA transaction by xid. |
2151 | |
2152 | trx may have been committed, unless the caller is holding lock_sys.mutex. |
2153 | |
2154 | @param[in] xid X/Open XA transaction identifier |
2155 | |
2156 | @return trx or NULL; on match, the trx->xid will be invalidated; |
2157 | */ |
2158 | |
2159 | trx_t *trx_get_trx_by_xid(XID *xid) |
2160 | { |
2161 | trx_get_trx_by_xid_callback_arg arg= { xid, 0 }; |
2162 | |
2163 | if (xid) |
2164 | trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action> |
2165 | (trx_get_trx_by_xid_callback), &arg); |
2166 | return arg.trx; |
2167 | } |
2168 | |
2169 | |
2170 | /*************************************************************//** |
2171 | Starts the transaction if it is not yet started. */ |
2172 | void |
2173 | trx_start_if_not_started_xa_low( |
2174 | /*============================*/ |
2175 | trx_t* trx, /*!< in/out: transaction */ |
2176 | bool read_write) /*!< in: true if read write transaction */ |
2177 | { |
2178 | switch (trx->state) { |
2179 | case TRX_STATE_NOT_STARTED: |
2180 | trx_start_low(trx, read_write); |
2181 | return; |
2182 | |
2183 | case TRX_STATE_ACTIVE: |
2184 | if (trx->id == 0 && read_write) { |
2185 | /* If the transaction is tagged as read-only then |
2186 | it can only write to temp tables and for such |
2187 | transactions we don't want to move them to the |
2188 | trx_sys_t::rw_trx_hash. */ |
2189 | if (!trx->read_only) { |
2190 | trx_set_rw_mode(trx); |
2191 | } |
2192 | } |
2193 | return; |
2194 | case TRX_STATE_PREPARED: |
2195 | case TRX_STATE_COMMITTED_IN_MEMORY: |
2196 | break; |
2197 | } |
2198 | |
2199 | ut_error; |
2200 | } |
2201 | |
2202 | /*************************************************************//** |
2203 | Starts the transaction if it is not yet started. */ |
2204 | void |
2205 | trx_start_if_not_started_low( |
2206 | /*==========================*/ |
2207 | trx_t* trx, /*!< in: transaction */ |
2208 | bool read_write) /*!< in: true if read write transaction */ |
2209 | { |
2210 | switch (trx->state) { |
2211 | case TRX_STATE_NOT_STARTED: |
2212 | trx_start_low(trx, read_write); |
2213 | return; |
2214 | |
2215 | case TRX_STATE_ACTIVE: |
2216 | if (read_write && trx->id == 0 && !trx->read_only) { |
2217 | trx_set_rw_mode(trx); |
2218 | } |
2219 | return; |
2220 | |
2221 | case TRX_STATE_PREPARED: |
2222 | case TRX_STATE_COMMITTED_IN_MEMORY: |
2223 | break; |
2224 | } |
2225 | |
2226 | ut_error; |
2227 | } |
2228 | |
2229 | /*************************************************************//** |
2230 | Starts a transaction for internal processing. */ |
2231 | void |
2232 | trx_start_internal_low( |
2233 | /*===================*/ |
2234 | trx_t* trx) /*!< in/out: transaction */ |
2235 | { |
2236 | /* Ensure it is not flagged as an auto-commit-non-locking |
2237 | transaction. */ |
2238 | |
2239 | trx->will_lock = 1; |
2240 | |
2241 | trx->internal = true; |
2242 | |
2243 | trx_start_low(trx, true); |
2244 | } |
2245 | |
2246 | /** Starts a read-only transaction for internal processing. |
2247 | @param[in,out] trx transaction to be started */ |
2248 | void |
2249 | trx_start_internal_read_only_low( |
2250 | trx_t* trx) |
2251 | { |
2252 | /* Ensure it is not flagged as an auto-commit-non-locking |
2253 | transaction. */ |
2254 | |
2255 | trx->will_lock = 1; |
2256 | |
2257 | trx->internal = true; |
2258 | |
2259 | trx_start_low(trx, false); |
2260 | } |
2261 | |
2262 | /*************************************************************//** |
2263 | Starts the transaction for a DDL operation. */ |
2264 | void |
2265 | trx_start_for_ddl_low( |
2266 | /*==================*/ |
2267 | trx_t* trx, /*!< in/out: transaction */ |
2268 | trx_dict_op_t op) /*!< in: dictionary operation type */ |
2269 | { |
2270 | switch (trx->state) { |
2271 | case TRX_STATE_NOT_STARTED: |
2272 | /* Flag this transaction as a dictionary operation, so that |
2273 | the data dictionary will be locked in crash recovery. */ |
2274 | |
2275 | trx_set_dict_operation(trx, op); |
2276 | |
2277 | /* Ensure it is not flagged as an auto-commit-non-locking |
2278 | transation. */ |
2279 | trx->will_lock = 1; |
2280 | |
2281 | trx->ddl= true; |
2282 | |
2283 | trx_start_internal_low(trx); |
2284 | return; |
2285 | |
2286 | case TRX_STATE_ACTIVE: |
2287 | case TRX_STATE_PREPARED: |
2288 | case TRX_STATE_COMMITTED_IN_MEMORY: |
2289 | break; |
2290 | } |
2291 | |
2292 | ut_error; |
2293 | } |
2294 | |
2295 | /*************************************************************//** |
2296 | Set the transaction as a read-write transaction if it is not already |
2297 | tagged as such. Read-only transactions that are writing to temporary |
2298 | tables are assigned an ID and a rollback segment but are not added |
2299 | to the trx read-write list because their updates should not be visible |
2300 | to other transactions and therefore their changes can be ignored by |
2301 | by MVCC. */ |
2302 | void |
2303 | trx_set_rw_mode( |
2304 | /*============*/ |
2305 | trx_t* trx) /*!< in/out: transaction that is RW */ |
2306 | { |
2307 | ut_ad(trx->rsegs.m_redo.rseg == 0); |
2308 | ut_ad(!trx_is_autocommit_non_locking(trx)); |
2309 | ut_ad(!trx->read_only); |
2310 | ut_ad(trx->id == 0); |
2311 | |
2312 | if (high_level_read_only) { |
2313 | return; |
2314 | } |
2315 | |
2316 | /* Function is promoting existing trx from ro mode to rw mode. |
2317 | In this process it has acquired trx_sys.mutex as it plan to |
2318 | move trx from ro list to rw list. If in future, some other thread |
2319 | looks at this trx object while it is being promoted then ensure |
2320 | that both threads are synced by acquring trx->mutex to avoid decision |
2321 | based on in-consistent view formed during promotion. */ |
2322 | |
2323 | trx->rsegs.m_redo.rseg = trx_assign_rseg_low(); |
2324 | ut_ad(trx->rsegs.m_redo.rseg != 0); |
2325 | |
2326 | trx_sys.register_rw(trx); |
2327 | |
2328 | /* So that we can see our own changes. */ |
2329 | if (trx->read_view.is_open()) { |
2330 | trx->read_view.set_creator_trx_id(trx->id); |
2331 | } |
2332 | } |
2333 | |