1/*****************************************************************************
2
3Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file trx/trx0trx.cc
22The transaction
23
24Created 3/26/1996 Heikki Tuuri
25*******************************************************/
26
27#include "ha_prototypes.h"
28
29#include "trx0trx.h"
30
31#ifdef WITH_WSREP
32#include <mysql/service_wsrep.h>
33#endif
34
35#include <mysql/service_thd_error_context.h>
36
37#include "btr0sea.h"
38#include "lock0lock.h"
39#include "log0log.h"
40#include "os0proc.h"
41#include "que0que.h"
42#include "srv0mon.h"
43#include "srv0srv.h"
44#include "fsp0sysspace.h"
45#include "srv0start.h"
46#include "trx0purge.h"
47#include "trx0rec.h"
48#include "trx0roll.h"
49#include "trx0rseg.h"
50#include "trx0undo.h"
51#include "trx0xa.h"
52#include "ut0new.h"
53#include "ut0pool.h"
54#include "ut0vec.h"
55
56#include <set>
57#include <new>
58
59/** The bit pattern corresponding to TRX_ID_MAX */
60const byte trx_id_max_bytes[8] = {
61 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
62};
63
64/** The bit pattern corresponding to max timestamp */
65const byte timestamp_max_bytes[7] = {
66 0x7f, 0xff, 0xff, 0xff, 0x0f, 0x42, 0x3f
67};
68
69
70static const ulint MAX_DETAILED_ERROR_LEN = 256;
71
72/** Set of table_id */
73typedef std::set<
74 table_id_t,
75 std::less<table_id_t>,
76 ut_allocator<table_id_t> > table_id_set;
77
78/** Set flush observer for the transaction
79@param[in/out] trx transaction struct
80@param[in] observer flush observer */
81void
82trx_set_flush_observer(
83 trx_t* trx,
84 FlushObserver* observer)
85{
86 trx->flush_observer = observer;
87}
88
89/*************************************************************//**
90Set detailed error message for the transaction. */
91void
92trx_set_detailed_error(
93/*===================*/
94 trx_t* trx, /*!< in: transaction struct */
95 const char* msg) /*!< in: detailed error message */
96{
97 ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN);
98}
99
100/*************************************************************//**
101Set detailed error message for the transaction from a file. Note that the
102file is rewinded before reading from it. */
103void
104trx_set_detailed_error_from_file(
105/*=============================*/
106 trx_t* trx, /*!< in: transaction struct */
107 FILE* file) /*!< in: file to read message from */
108{
109 os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN);
110}
111
112/********************************************************************//**
113Initialize transaction object.
114@param trx trx to initialize */
115static
116void
117trx_init(
118/*=====*/
119 trx_t* trx)
120{
121 trx->id = 0;
122
123 trx->no = TRX_ID_MAX;
124
125 trx->state = TRX_STATE_NOT_STARTED;
126
127 trx->is_recovered = false;
128
129 trx->op_info = "";
130
131 trx->active_commit_ordered = 0;
132
133 trx->isolation_level = TRX_ISO_REPEATABLE_READ;
134
135 trx->check_foreigns = true;
136
137 trx->check_unique_secondary = true;
138
139 trx->lock.n_rec_locks = 0;
140
141 trx->dict_operation = TRX_DICT_OP_NONE;
142
143 trx->table_id = 0;
144
145 trx->error_state = DB_SUCCESS;
146
147 trx->error_key_num = ULINT_UNDEFINED;
148
149 trx->undo_no = 0;
150
151 trx->rsegs.m_redo.rseg = NULL;
152
153 trx->rsegs.m_noredo.rseg = NULL;
154
155 trx->read_only = false;
156
157 trx->auto_commit = false;
158
159 trx->will_lock = 0;
160
161 trx->ddl = false;
162
163 trx->internal = false;
164
165 ut_d(trx->start_file = 0);
166
167 ut_d(trx->start_line = 0);
168
169 trx->magic_n = TRX_MAGIC_N;
170
171 trx->lock.que_state = TRX_QUE_RUNNING;
172
173 trx->last_sql_stat_start.least_undo_no = 0;
174
175 ut_ad(!trx->read_view.is_open());
176
177 trx->lock.rec_cached = 0;
178
179 trx->lock.table_cached = 0;
180
181 trx->flush_observer = NULL;
182}
183
184/** For managing the life-cycle of the trx_t instance that we get
185from the pool. */
186struct TrxFactory {
187
188 /** Initializes a transaction object. It must be explicitly started
189 with trx_start_if_not_started() before using it. The default isolation
190 level is TRX_ISO_REPEATABLE_READ.
191 @param trx Transaction instance to initialise */
192 static void init(trx_t* trx)
193 {
194 /* Explicitly call the constructor of the already
195 allocated object. trx_t objects are allocated by
196 ut_zalloc_nokey() in Pool::Pool() which would not call
197 the constructors of the trx_t members. */
198 new(&trx->mod_tables) trx_mod_tables_t();
199
200 new(&trx->lock.rec_pool) lock_pool_t();
201
202 new(&trx->lock.table_pool) lock_pool_t();
203
204 new(&trx->lock.table_locks) lock_pool_t();
205
206 new(&trx->read_view) ReadView();
207
208 trx->rw_trx_hash_pins = 0;
209 trx_init(trx);
210
211 trx->dict_operation_lock_mode = 0;
212
213 trx->xid = UT_NEW_NOKEY(xid_t());
214
215 trx->detailed_error = reinterpret_cast<char*>(
216 ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN));
217
218 trx->lock.lock_heap = mem_heap_create_typed(
219 1024, MEM_HEAP_FOR_LOCK_HEAP);
220
221 lock_trx_lock_list_init(&trx->lock.trx_locks);
222
223 UT_LIST_INIT(
224 trx->trx_savepoints,
225 &trx_named_savept_t::trx_savepoints);
226
227 mutex_create(LATCH_ID_TRX, &trx->mutex);
228
229 lock_trx_alloc_locks(trx);
230 }
231
232 /** Release resources held by the transaction object.
233 @param trx the transaction for which to release resources */
234 static void destroy(trx_t* trx)
235 {
236 ut_a(trx->magic_n == TRX_MAGIC_N);
237 ut_ad(!trx->mysql_thd);
238
239 ut_a(trx->lock.wait_lock == NULL);
240 ut_a(trx->lock.wait_thr == NULL);
241 ut_a(trx->dict_operation_lock_mode == 0);
242
243 if (trx->lock.lock_heap != NULL) {
244 mem_heap_free(trx->lock.lock_heap);
245 trx->lock.lock_heap = NULL;
246 }
247
248 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
249
250 UT_DELETE(trx->xid);
251 ut_free(trx->detailed_error);
252
253 mutex_free(&trx->mutex);
254
255 trx->mod_tables.~trx_mod_tables_t();
256
257 ut_ad(!trx->read_view.is_open());
258
259 if (!trx->lock.rec_pool.empty()) {
260
261 /* See lock_trx_alloc_locks() why we only free
262 the first element. */
263
264 ut_free(trx->lock.rec_pool[0]);
265 }
266
267 if (!trx->lock.table_pool.empty()) {
268
269 /* See lock_trx_alloc_locks() why we only free
270 the first element. */
271
272 ut_free(trx->lock.table_pool[0]);
273 }
274
275 trx->lock.rec_pool.~lock_pool_t();
276
277 trx->lock.table_pool.~lock_pool_t();
278
279 trx->lock.table_locks.~lock_pool_t();
280
281 trx->read_view.~ReadView();
282 }
283
284 /** Enforce any invariants here, this is called before the transaction
285 is added to the pool.
286 @return true if all OK */
287 static bool debug(const trx_t* trx)
288 {
289 ut_a(trx->error_state == DB_SUCCESS);
290
291 ut_a(trx->magic_n == TRX_MAGIC_N);
292
293 ut_ad(!trx->read_only);
294
295 ut_ad(trx->state == TRX_STATE_NOT_STARTED);
296
297 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
298
299 ut_ad(trx->mysql_thd == 0);
300
301 ut_a(trx->lock.wait_thr == NULL);
302 ut_a(trx->lock.wait_lock == NULL);
303 ut_a(trx->dict_operation_lock_mode == 0);
304
305 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
306
307 ut_ad(trx->autoinc_locks == NULL);
308
309 ut_ad(trx->lock.table_locks.empty());
310
311 return(true);
312 }
313};
314
315/** The lock strategy for TrxPool */
316struct TrxPoolLock {
317 TrxPoolLock() { }
318
319 /** Create the mutex */
320 void create()
321 {
322 mutex_create(LATCH_ID_TRX_POOL, &m_mutex);
323 }
324
325 /** Acquire the mutex */
326 void enter() { mutex_enter(&m_mutex); }
327
328 /** Release the mutex */
329 void exit() { mutex_exit(&m_mutex); }
330
331 /** Free the mutex */
332 void destroy() { mutex_free(&m_mutex); }
333
334 /** Mutex to use */
335 ib_mutex_t m_mutex;
336};
337
338/** The lock strategy for the TrxPoolManager */
339struct TrxPoolManagerLock {
340 TrxPoolManagerLock() { }
341
342 /** Create the mutex */
343 void create()
344 {
345 mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex);
346 }
347
348 /** Acquire the mutex */
349 void enter() { mutex_enter(&m_mutex); }
350
351 /** Release the mutex */
352 void exit() { mutex_exit(&m_mutex); }
353
354 /** Free the mutex */
355 void destroy() { mutex_free(&m_mutex); }
356
357 /** Mutex to use */
358 ib_mutex_t m_mutex;
359};
360
361/** Use explicit mutexes for the trx_t pool and its manager. */
362typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t;
363typedef PoolManager<trx_pool_t, TrxPoolManagerLock > trx_pools_t;
364
365/** The trx_t pool manager */
366static trx_pools_t* trx_pools;
367
368/** Size of on trx_t pool in bytes. */
369static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4;
370
371/** Create the trx_t pool */
372void
373trx_pool_init()
374{
375 trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE));
376
377 ut_a(trx_pools != 0);
378}
379
380/** Destroy the trx_t pool */
381void
382trx_pool_close()
383{
384 UT_DELETE(trx_pools);
385
386 trx_pools = 0;
387}
388
389/** @return a trx_t instance from trx_pools. */
390trx_t *trx_create()
391{
392 trx_t* trx = trx_pools->get();
393
394 assert_trx_is_free(trx);
395
396 mem_heap_t* heap;
397 ib_alloc_t* alloc;
398
399 /* We just got trx from pool, it should be non locking */
400 ut_ad(trx->will_lock == 0);
401 ut_ad(trx->state == TRX_STATE_NOT_STARTED);
402 ut_ad(!trx->rw_trx_hash_pins);
403
404 DBUG_LOG("trx", "Create: " << trx);
405
406 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
407
408 alloc = ib_heap_allocator_create(heap);
409
410 /* Remember to free the vector explicitly in trx_free(). */
411 trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4);
412
413 /* Should have been either just initialized or .clear()ed by
414 trx_free(). */
415 ut_a(trx->mod_tables.size() == 0);
416
417#ifdef WITH_WSREP
418 trx->wsrep_event = NULL;
419#endif /* WITH_WSREP */
420
421 trx_sys.register_trx(trx);
422
423 return(trx);
424}
425
426/**
427 Release a trx_t instance back to the pool.
428 @param trx the instance to release.
429*/
430void trx_free(trx_t*& trx)
431{
432 ut_ad(!trx->declared_to_be_inside_innodb);
433 ut_ad(!trx->n_mysql_tables_in_use);
434 ut_ad(!trx->mysql_n_tables_locked);
435 ut_ad(!trx->internal);
436
437 if (trx->declared_to_be_inside_innodb) {
438
439 ib::error() << "Freeing a trx (" << trx << ", "
440 << trx_get_id_for_print(trx) << ") which is declared"
441 " to be processing inside InnoDB";
442
443 trx_print(stderr, trx, 600);
444 putc('\n', stderr);
445
446 /* This is an error but not a fatal error. We must keep
447 the counters like srv_conc.n_active accurate. */
448 srv_conc_force_exit_innodb(trx);
449 }
450
451 if (trx->n_mysql_tables_in_use != 0
452 || trx->mysql_n_tables_locked != 0) {
453
454 ib::error() << "MySQL is freeing a thd though"
455 " trx->n_mysql_tables_in_use is "
456 << trx->n_mysql_tables_in_use
457 << " and trx->mysql_n_tables_locked is "
458 << trx->mysql_n_tables_locked << ".";
459
460 trx_print(stderr, trx, 600);
461 ut_print_buf(stderr, trx, sizeof(trx_t));
462 putc('\n', stderr);
463 }
464
465 trx->dict_operation = TRX_DICT_OP_NONE;
466 assert_trx_is_inactive(trx);
467
468 trx_sys.deregister_trx(trx);
469
470 assert_trx_is_free(trx);
471
472 trx_sys.rw_trx_hash.put_pins(trx);
473 trx->mysql_thd = 0;
474 trx->mysql_log_file_name = 0;
475
476 // FIXME: We need to avoid this heap free/alloc for each commit.
477 if (trx->autoinc_locks != NULL) {
478 ut_ad(ib_vector_is_empty(trx->autoinc_locks));
479 /* We allocated a dedicated heap for the vector. */
480 ib_vector_free(trx->autoinc_locks);
481 trx->autoinc_locks = NULL;
482 }
483
484 trx->mod_tables.clear();
485
486 /* trx locking state should have been reset before returning trx
487 to pool */
488 ut_ad(trx->will_lock == 0);
489
490 trx_pools->mem_free(trx);
491 /* Unpoison the memory for innodb_monitor_set_option;
492 it is operating also on the freed transaction objects. */
493 MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
494 /* Declare the contents as initialized for Valgrind;
495 we checked that it was initialized in trx_pools->mem_free(trx). */
496 UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex);
497
498 trx = NULL;
499}
500
501/** At shutdown, frees a transaction object. */
502void
503trx_free_at_shutdown(trx_t *trx)
504{
505 ut_ad(trx->is_recovered);
506 ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
507 || (trx_state_eq(trx, TRX_STATE_ACTIVE)
508 && (!srv_was_started
509 || srv_operation == SRV_OPERATION_RESTORE
510 || srv_operation == SRV_OPERATION_RESTORE_EXPORT
511 || srv_read_only_mode
512 || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
513 || (!srv_is_being_started
514 && !srv_undo_sources && srv_fast_shutdown))));
515 ut_a(trx->magic_n == TRX_MAGIC_N);
516
517 lock_trx_release_locks(trx);
518 trx_undo_free_at_shutdown(trx);
519
520 ut_a(!trx->read_only);
521
522 DBUG_LOG("trx", "Free prepared: " << trx);
523 trx->state = TRX_STATE_NOT_STARTED;
524
525 /* Undo trx_resurrect_table_locks(). */
526 lock_trx_lock_list_init(&trx->lock.trx_locks);
527
528 /* Note: This vector is not guaranteed to be empty because the
529 transaction was never committed and therefore lock_trx_release()
530 was not called. */
531 trx->lock.table_locks.clear();
532
533 trx_free(trx);
534}
535
536
537/**
538 Disconnect a prepared transaction from MySQL
539 @param[in,out] trx transaction
540*/
541void trx_disconnect_prepared(trx_t *trx)
542{
543 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
544 ut_ad(trx->mysql_thd);
545 trx->read_view.close();
546 trx->is_recovered= true;
547 trx->mysql_thd= NULL;
548 /* todo/fixme: suggest to do it at innodb prepare */
549 trx->will_lock= 0;
550}
551
552/****************************************************************//**
553Resurrect the table locks for a resurrected transaction. */
554static
555void
556trx_resurrect_table_locks(
557/*======================*/
558 trx_t* trx, /*!< in/out: transaction */
559 const trx_undo_t* undo) /*!< in: undo log */
560{
561 mtr_t mtr;
562 page_t* undo_page;
563 trx_undo_rec_t* undo_rec;
564 table_id_set tables;
565
566 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
567 trx_state_eq(trx, TRX_STATE_PREPARED));
568 ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
569
570 if (undo->empty()) {
571 return;
572 }
573
574 mtr_start(&mtr);
575
576 /* trx_rseg_mem_create() may have acquired an X-latch on this
577 page, so we cannot acquire an S-latch. */
578 undo_page = trx_undo_page_get(
579 page_id_t(trx->rsegs.m_redo.rseg->space->id,
580 undo->top_page_no), &mtr);
581
582 undo_rec = undo_page + undo->top_offset;
583
584 do {
585 ulint type;
586 undo_no_t undo_no;
587 table_id_t table_id;
588 ulint cmpl_info;
589 bool updated_extern;
590
591 page_t* undo_rec_page = page_align(undo_rec);
592
593 if (undo_rec_page != undo_page) {
594 mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX);
595 undo_page = undo_rec_page;
596 }
597
598 trx_undo_rec_get_pars(
599 undo_rec, &type, &cmpl_info,
600 &updated_extern, &undo_no, &table_id);
601 tables.insert(table_id);
602
603 undo_rec = trx_undo_get_prev_rec(
604 undo_rec, undo->hdr_page_no,
605 undo->hdr_offset, false, &mtr);
606 } while (undo_rec);
607
608 mtr_commit(&mtr);
609
610 for (table_id_set::const_iterator i = tables.begin();
611 i != tables.end(); i++) {
612 if (dict_table_t* table = dict_table_open_on_id(
613 *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
614 if (!table->is_readable()) {
615 mutex_enter(&dict_sys->mutex);
616 dict_table_close(table, TRUE, FALSE);
617 dict_table_remove_from_cache(table);
618 mutex_exit(&dict_sys->mutex);
619 continue;
620 }
621
622 if (trx->state == TRX_STATE_PREPARED) {
623 trx->mod_tables.insert(
624 trx_mod_tables_t::value_type(table,
625 0));
626 }
627 lock_table_ix_resurrect(table, trx);
628
629 DBUG_LOG("ib_trx",
630 "resurrect " << ib::hex(trx->id)
631 << " IX lock on " << table->name);
632
633 dict_table_close(table, FALSE, FALSE);
634 }
635 }
636}
637
638
639/**
640 Resurrect the transactions that were doing inserts/updates the time of the
641 crash, they need to be undone.
642*/
643
644static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
645 ib_time_t start_time, uint64_t *rows_to_undo,
646 bool is_old_insert)
647{
648 trx_state_t state;
649 /*
650 This is single-threaded startup code, we do not need the
651 protection of trx->mutex or trx_sys.mutex here.
652 */
653 switch (undo->state)
654 {
655 case TRX_UNDO_ACTIVE:
656 state= TRX_STATE_ACTIVE;
657 break;
658 case TRX_UNDO_PREPARED:
659 /*
660 Prepared transactions are left in the prepared state
661 waiting for a commit or abort decision from MySQL
662 */
663 ib::info() << "Transaction " << undo->trx_id
664 << " was in the XA prepared state.";
665
666 state= TRX_STATE_PREPARED;
667 break;
668 default:
669 if (is_old_insert && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO)
670 trx_undo_commit_cleanup(undo, false);
671 return;
672 }
673
674 trx_t *trx= trx_create();
675 trx->state= state;
676 ut_d(trx->start_file= __FILE__);
677 ut_d(trx->start_line= __LINE__);
678 ut_ad(trx->no == TRX_ID_MAX);
679
680 if (is_old_insert)
681 trx->rsegs.m_redo.old_insert= undo;
682 else
683 trx->rsegs.m_redo.undo= undo;
684
685 trx->undo_no= undo->top_undo_no + 1;
686 trx->rsegs.m_redo.rseg= rseg;
687 /*
688 For transactions with active data will not have rseg size = 1
689 or will not qualify for purge limit criteria. So it is safe to increment
690 this trx_ref_count w/o mutex protection.
691 */
692 ++trx->rsegs.m_redo.rseg->trx_ref_count;
693 *trx->xid= undo->xid;
694 trx->id= undo->trx_id;
695 trx->is_recovered= true;
696 trx->start_time= start_time;
697
698 if (undo->dict_operation)
699 {
700 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
701 trx->table_id= undo->table_id;
702 }
703
704 trx_sys.rw_trx_hash.insert(trx);
705 trx_sys.rw_trx_hash.put_pins(trx);
706 trx_resurrect_table_locks(trx, undo);
707 if (trx_state_eq(trx, TRX_STATE_ACTIVE))
708 *rows_to_undo+= trx->undo_no;
709}
710
711
712/** Initialize (resurrect) transactions at startup. */
713void
714trx_lists_init_at_db_start()
715{
716 ut_a(srv_is_being_started);
717 ut_ad(!srv_was_started);
718
719 if (srv_operation == SRV_OPERATION_RESTORE) {
720 /* mariabackup --prepare only deals with
721 the redo log and the data files, not with
722 transactions or the data dictionary. */
723 trx_rseg_array_init();
724 return;
725 }
726
727 if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
728 return;
729 }
730
731 purge_sys.create();
732 trx_rseg_array_init();
733
734 /* Look from the rollback segments if there exist undo logs for
735 transactions. */
736 const ib_time_t start_time = ut_time();
737 uint64_t rows_to_undo = 0;
738
739 for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
740 trx_undo_t* undo;
741 trx_rseg_t* rseg = trx_sys.rseg_array[i];
742
743 /* Some rollback segment may be unavailable,
744 especially if the server was previously run with a
745 non-default value of innodb_undo_logs. */
746 if (rseg == NULL) {
747 continue;
748 }
749
750 /* Resurrect transactions that were doing inserts
751 using the old separate insert_undo log. */
752 undo = UT_LIST_GET_FIRST(rseg->old_insert_list);
753 while (undo) {
754 trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo);
755 trx_resurrect(undo, rseg, start_time, &rows_to_undo,
756 true);
757 undo = next;
758 }
759
760 /* Ressurrect other transactions. */
761 for (undo = UT_LIST_GET_FIRST(rseg->undo_list);
762 undo != NULL;
763 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
764 trx_t *trx = trx_sys.rw_trx_hash.find(0, undo->trx_id);
765 if (!trx) {
766 trx_resurrect(undo, rseg, start_time,
767 &rows_to_undo, false);
768 } else {
769 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
770 trx_state_eq(trx, TRX_STATE_PREPARED));
771 ut_ad(trx->start_time == start_time);
772 ut_ad(trx->is_recovered);
773 ut_ad(trx->rsegs.m_redo.rseg == rseg);
774 ut_ad(trx->rsegs.m_redo.rseg->trx_ref_count);
775
776 trx->rsegs.m_redo.undo = undo;
777 if (undo->top_undo_no >= trx->undo_no) {
778 if (trx_state_eq(trx,
779 TRX_STATE_ACTIVE)) {
780 rows_to_undo -= trx->undo_no;
781 rows_to_undo +=
782 undo->top_undo_no + 1;
783 }
784
785 trx->undo_no = undo->top_undo_no + 1;
786 }
787 trx_resurrect_table_locks(trx, undo);
788 }
789 }
790 }
791
792 if (trx_sys.rw_trx_hash.size()) {
793
794 ib::info() << trx_sys.rw_trx_hash.size()
795 << " transaction(s) which must be rolled back or"
796 " cleaned up in total " << rows_to_undo
797 << " row operations to undo";
798
799 ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id();
800 }
801 trx_sys.clone_oldest_view();
802}
803
804/** Assign a persistent rollback segment in a round-robin fashion,
805evenly distributed between 0 and innodb_undo_logs-1
806@return persistent rollback segment
807@retval NULL if innodb_read_only */
808static
809trx_rseg_t*
810trx_assign_rseg_low()
811{
812 if (srv_read_only_mode) {
813 ut_ad(srv_undo_logs == ULONG_UNDEFINED);
814 return(NULL);
815 }
816
817 /* The first slot is always assigned to the system tablespace. */
818 ut_ad(trx_sys.rseg_array[0]->space == fil_system.sys_space);
819
820 /* Choose a rollback segment evenly distributed between 0 and
821 innodb_undo_logs-1 in a round-robin fashion, skipping those
822 undo tablespaces that are scheduled for truncation.
823
824 Because rseg_slot is not protected by atomics or any mutex, race
825 conditions are possible, meaning that multiple transactions
826 that start modifications concurrently will write their undo
827 log to the same rollback segment. */
828 static ulong rseg_slot;
829 ulint slot = rseg_slot++ % srv_undo_logs;
830 trx_rseg_t* rseg;
831
832#ifdef UNIV_DEBUG
833 ulint start_scan_slot = slot;
834 bool look_for_rollover = false;
835#endif /* UNIV_DEBUG */
836
837 bool allocated = false;
838
839 do {
840 for (;;) {
841 rseg = trx_sys.rseg_array[slot];
842
843#ifdef UNIV_DEBUG
844 /* Ensure that we are not revisiting the same
845 slot that we have already inspected. */
846 if (look_for_rollover) {
847 ut_ad(start_scan_slot != slot);
848 }
849 look_for_rollover = true;
850#endif /* UNIV_DEBUG */
851
852 slot = (slot + 1) % srv_undo_logs;
853
854 if (rseg == NULL) {
855 continue;
856 }
857
858 ut_ad(rseg->is_persistent());
859
860 if (rseg->space != fil_system.sys_space) {
861 ut_ad(srv_undo_tablespaces > 1);
862 if (rseg->skip_allocation) {
863 continue;
864 }
865 } else if (trx_rseg_t* next
866 = trx_sys.rseg_array[slot]) {
867 if (next->space != fil_system.sys_space
868 && srv_undo_tablespaces > 0) {
869 /** If dedicated
870 innodb_undo_tablespaces have
871 been configured, try to use them
872 instead of the system tablespace. */
873 continue;
874 }
875 }
876
877 break;
878 }
879
880 /* By now we have only selected the rseg but not marked it
881 allocated. By marking it allocated we are ensuring that it will
882 never be selected for UNDO truncate purge. */
883 mutex_enter(&rseg->mutex);
884 if (!rseg->skip_allocation) {
885 rseg->trx_ref_count++;
886 allocated = true;
887 }
888 mutex_exit(&rseg->mutex);
889 } while (!allocated);
890
891 ut_ad(rseg->trx_ref_count > 0);
892 ut_ad(rseg->is_persistent());
893 return(rseg);
894}
895
896/** Assign a rollback segment for modifying temporary tables.
897@return the assigned rollback segment */
898trx_rseg_t*
899trx_t::assign_temp_rseg()
900{
901 ut_ad(!rsegs.m_noredo.rseg);
902 ut_ad(!trx_is_autocommit_non_locking(this));
903 compile_time_assert(ut_is_2pow(TRX_SYS_N_RSEGS));
904
905 /* Choose a temporary rollback segment between 0 and 127
906 in a round-robin fashion. Because rseg_slot is not protected by
907 atomics or any mutex, race conditions are possible, meaning that
908 multiple transactions that start modifications concurrently
909 will write their undo log to the same rollback segment. */
910 static ulong rseg_slot;
911 trx_rseg_t* rseg = trx_sys.temp_rsegs[
912 rseg_slot++ & (TRX_SYS_N_RSEGS - 1)];
913 ut_ad(!rseg->is_persistent());
914 rsegs.m_noredo.rseg = rseg;
915
916 if (id == 0) {
917 trx_sys.register_rw(this);
918 }
919
920 ut_ad(!rseg->is_persistent());
921 return(rseg);
922}
923
924/****************************************************************//**
925Starts a transaction. */
926static
927void
928trx_start_low(
929/*==========*/
930 trx_t* trx, /*!< in: transaction */
931 bool read_write) /*!< in: true if read-write transaction */
932{
933 ut_ad(!trx->in_rollback);
934 ut_ad(!trx->is_recovered);
935 ut_ad(trx->start_line != 0);
936 ut_ad(trx->start_file != 0);
937 ut_ad(trx->roll_limit == 0);
938 ut_ad(trx->error_state == DB_SUCCESS);
939 ut_ad(trx->rsegs.m_redo.rseg == NULL);
940 ut_ad(trx->rsegs.m_noredo.rseg == NULL);
941 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
942 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
943
944 /* Check whether it is an AUTOCOMMIT SELECT */
945 trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd);
946
947 trx->read_only = srv_read_only_mode
948 || (!trx->ddl && !trx->internal
949 && thd_trx_is_read_only(trx->mysql_thd));
950
951 if (!trx->auto_commit) {
952 ++trx->will_lock;
953 } else if (trx->will_lock == 0) {
954 trx->read_only = true;
955 }
956
957#ifdef WITH_WSREP
958 memset(trx->xid, 0, sizeof(xid_t));
959 trx->xid->formatID = -1;
960#endif /* WITH_WSREP */
961
962 /* The initial value for trx->no: TRX_ID_MAX is used in
963 read_view_open_now: */
964
965 trx->no = TRX_ID_MAX;
966
967 ut_a(ib_vector_is_empty(trx->autoinc_locks));
968 ut_a(trx->lock.table_locks.empty());
969
970 /* No other thread can access this trx object through rw_trx_hash, thus
971 we don't need trx_sys.mutex protection for that purpose. Still this
972 trx can be found through trx_sys.trx_list, which means state
973 change must be protected by e.g. trx->mutex.
974
975 For now we update it without mutex protection, because original code
976 did it this way. It has to be reviewed and fixed properly. */
977 trx->state = TRX_STATE_ACTIVE;
978
979 /* By default all transactions are in the read-only list unless they
980 are non-locking auto-commit read only transactions or background
981 (internal) transactions. Note: Transactions marked explicitly as
982 read only can write to temporary tables, we put those on the RO
983 list too. */
984
985 if (!trx->read_only
986 && (trx->mysql_thd == 0 || read_write || trx->ddl)) {
987
988 /* Temporary rseg is assigned only if the transaction
989 updates a temporary table */
990 trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
991 ut_ad(trx->rsegs.m_redo.rseg != 0
992 || srv_read_only_mode
993 || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
994
995 trx_sys.register_rw(trx);
996 } else {
997 trx->id = 0;
998
999 if (!trx_is_autocommit_non_locking(trx)) {
1000
1001 /* If this is a read-only transaction that is writing
1002 to a temporary table then it needs a transaction id
1003 to write to the temporary table. */
1004
1005 if (read_write) {
1006 ut_ad(!srv_read_only_mode);
1007 trx_sys.register_rw(trx);
1008 }
1009 } else {
1010 ut_ad(!read_write);
1011 }
1012 }
1013
1014 if (trx->mysql_thd != NULL) {
1015 trx->start_time = thd_start_time_in_secs(trx->mysql_thd);
1016 trx->start_time_micro = thd_query_start_micro(trx->mysql_thd);
1017
1018 } else {
1019 trx->start_time = ut_time();
1020 trx->start_time_micro = 0;
1021 }
1022
1023 ut_a(trx->error_state == DB_SUCCESS);
1024
1025 MONITOR_INC(MONITOR_TRX_ACTIVE);
1026}
1027
1028/** Set the serialisation number for a persistent committed transaction.
1029@param[in,out] trx committed transaction with persistent changes */
1030static
1031void
1032trx_serialise(trx_t* trx)
1033{
1034 trx_rseg_t *rseg = trx->rsegs.m_redo.rseg;
1035 ut_ad(rseg);
1036 ut_ad(mutex_own(&rseg->mutex));
1037
1038 if (rseg->last_page_no == FIL_NULL) {
1039 mutex_enter(&purge_sys.pq_mutex);
1040 }
1041
1042 trx_sys.assign_new_trx_no(trx);
1043
1044 /* If the rollback segment is not empty then the
1045 new trx_t::no can't be less than any trx_t::no
1046 already in the rollback segment. User threads only
1047 produce events when a rollback segment is empty. */
1048 if (rseg->last_page_no == FIL_NULL) {
1049 purge_sys.purge_queue.push(TrxUndoRsegs(trx->no, *rseg));
1050 mutex_exit(&purge_sys.pq_mutex);
1051 }
1052}
1053
1054/****************************************************************//**
1055Assign the transaction its history serialisation number and write the
1056update UNDO log record to the assigned rollback segment. */
1057static
1058void
1059trx_write_serialisation_history(
1060/*============================*/
1061 trx_t* trx, /*!< in/out: transaction */
1062 mtr_t* mtr) /*!< in/out: mini-transaction */
1063{
1064 /* Change the undo log segment states from TRX_UNDO_ACTIVE to some
1065 other state: these modifications to the file data structure define
1066 the transaction as committed in the file based domain, at the
1067 serialization point of the log sequence number lsn obtained below. */
1068
1069 /* We have to hold the rseg mutex because update log headers have
1070 to be put to the history list in the (serialisation) order of the
1071 UNDO trx number. This is required for the purge in-memory data
1072 structures too. */
1073
1074 if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
1075 /* Undo log for temporary tables is discarded at transaction
1076 commit. There is no purge for temporary tables, and also no
1077 MVCC, because they are private to a session. */
1078
1079 mtr_t temp_mtr;
1080 temp_mtr.start();
1081 temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
1082
1083 mutex_enter(&trx->rsegs.m_noredo.rseg->mutex);
1084 trx_undo_set_state_at_finish(undo, &temp_mtr);
1085 mutex_exit(&trx->rsegs.m_noredo.rseg->mutex);
1086 temp_mtr.commit();
1087 }
1088
1089 trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
1090 if (!rseg) {
1091 ut_ad(!trx->rsegs.m_redo.undo);
1092 ut_ad(!trx->rsegs.m_redo.old_insert);
1093 return;
1094 }
1095
1096 trx_undo_t*& undo = trx->rsegs.m_redo.undo;
1097 trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert;
1098
1099 if (!undo && !old_insert) {
1100 return;
1101 }
1102
1103 ut_ad(!trx->read_only);
1104 ut_ad(!undo || undo->rseg == rseg);
1105 ut_ad(!old_insert || old_insert->rseg == rseg);
1106 mutex_enter(&rseg->mutex);
1107
1108 /* Assign the transaction serialisation number and add any
1109 undo log to the purge queue. */
1110 trx_serialise(trx);
1111
1112 if (UNIV_LIKELY_NULL(old_insert)) {
1113 UT_LIST_REMOVE(rseg->old_insert_list, old_insert);
1114 trx_purge_add_undo_to_history(trx, old_insert, mtr);
1115 }
1116 if (undo) {
1117 UT_LIST_REMOVE(rseg->undo_list, undo);
1118 trx_purge_add_undo_to_history(trx, undo, mtr);
1119 }
1120
1121 mutex_exit(&rseg->mutex);
1122
1123 MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1124
1125 trx->mysql_log_file_name = NULL;
1126}
1127
1128/********************************************************************
1129Finalize a transaction containing updates for a FTS table. */
1130static
1131void
1132trx_finalize_for_fts_table(
1133/*=======================*/
1134 fts_trx_table_t* ftt) /* in: FTS trx table */
1135{
1136 fts_t* fts = ftt->table->fts;
1137 fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
1138
1139 mutex_enter(&fts->bg_threads_mutex);
1140
1141 if (fts->fts_status & BG_THREAD_STOP) {
1142 /* The table is about to be dropped, no use
1143 adding anything to its work queue. */
1144
1145 mutex_exit(&fts->bg_threads_mutex);
1146 } else {
1147 mem_heap_t* heap;
1148 mutex_exit(&fts->bg_threads_mutex);
1149
1150 ut_a(fts->add_wq);
1151
1152 heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
1153
1154 ib_wqueue_add(fts->add_wq, doc_ids, heap);
1155
1156 /* fts_trx_table_t no longer owns the list. */
1157 ftt->added_doc_ids = NULL;
1158 }
1159}
1160
1161/******************************************************************//**
1162Finalize a transaction containing updates to FTS tables. */
1163static
1164void
1165trx_finalize_for_fts(
1166/*=================*/
1167 trx_t* trx, /*!< in/out: transaction */
1168 bool is_commit) /*!< in: true if the transaction was
1169 committed, false if it was rolled back. */
1170{
1171 if (is_commit) {
1172 const ib_rbt_node_t* node;
1173 ib_rbt_t* tables;
1174 fts_savepoint_t* savepoint;
1175
1176 savepoint = static_cast<fts_savepoint_t*>(
1177 ib_vector_last(trx->fts_trx->savepoints));
1178
1179 tables = savepoint->tables;
1180
1181 for (node = rbt_first(tables);
1182 node;
1183 node = rbt_next(tables, node)) {
1184 fts_trx_table_t** ftt;
1185
1186 ftt = rbt_value(fts_trx_table_t*, node);
1187
1188 if ((*ftt)->added_doc_ids) {
1189 trx_finalize_for_fts_table(*ftt);
1190 }
1191 }
1192 }
1193
1194 fts_trx_free(trx->fts_trx);
1195 trx->fts_trx = NULL;
1196}
1197
1198/**********************************************************************//**
1199If required, flushes the log to disk based on the value of
1200innodb_flush_log_at_trx_commit. */
1201static
1202void
1203trx_flush_log_if_needed_low(
1204/*========================*/
1205 lsn_t lsn) /*!< in: lsn up to which logs are to be
1206 flushed. */
1207{
1208 bool flush = srv_file_flush_method != SRV_NOSYNC;
1209
1210 switch (srv_flush_log_at_trx_commit) {
1211 case 3:
1212 case 2:
1213 /* Write the log but do not flush it to disk */
1214 flush = false;
1215 /* fall through */
1216 case 1:
1217 /* Write the log and optionally flush it to disk */
1218 log_write_up_to(lsn, flush);
1219 return;
1220 case 0:
1221 /* Do nothing */
1222 return;
1223 }
1224
1225 ut_error;
1226}
1227
1228/**********************************************************************//**
1229If required, flushes the log to disk based on the value of
1230innodb_flush_log_at_trx_commit. */
1231static
1232void
1233trx_flush_log_if_needed(
1234/*====================*/
1235 lsn_t lsn, /*!< in: lsn up to which logs are to be
1236 flushed. */
1237 trx_t* trx) /*!< in/out: transaction */
1238{
1239 trx->op_info = "flushing log";
1240 trx_flush_log_if_needed_low(lsn);
1241 trx->op_info = "";
1242}
1243
1244/**********************************************************************//**
1245For each table that has been modified by the given transaction: update
1246its dict_table_t::update_time with the current timestamp. Clear the list
1247of the modified tables at the end. */
1248static
1249void
1250trx_update_mod_tables_timestamp(
1251/*============================*/
1252 trx_t* trx) /*!< in: transaction */
1253{
1254
1255 ut_ad(trx->id != 0);
1256
1257 /* consider using trx->start_time if calling time() is too
1258 expensive here */
1259 time_t now = ut_time();
1260
1261 trx_mod_tables_t::const_iterator end = trx->mod_tables.end();
1262
1263 for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin();
1264 it != end;
1265 ++it) {
1266
1267 /* This could be executed by multiple threads concurrently
1268 on the same table object. This is fine because time_t is
1269 word size or less. And _purely_ _theoretically_, even if
1270 time_t write is not atomic, likely the value of 'now' is
1271 the same in all threads and even if it is not, getting a
1272 "garbage" in table->update_time is justified because
1273 protecting it with a latch here would be too performance
1274 intrusive. */
1275 it->first->update_time = now;
1276 }
1277
1278 trx->mod_tables.clear();
1279}
1280
1281/****************************************************************//**
1282Commits a transaction in memory. */
1283static
1284void
1285trx_commit_in_memory(
1286/*=================*/
1287 trx_t* trx, /*!< in/out: transaction */
1288 const mtr_t* mtr) /*!< in: mini-transaction of
1289 trx_write_serialisation_history(), or NULL if
1290 the transaction did not modify anything */
1291{
1292 trx->must_flush_log_later = false;
1293 trx->read_view.close();
1294
1295 if (trx_is_autocommit_non_locking(trx)) {
1296 ut_ad(trx->id == 0);
1297 ut_ad(trx->read_only);
1298 ut_a(!trx->is_recovered);
1299 ut_ad(trx->rsegs.m_redo.rseg == NULL);
1300
1301 /* Note: We are asserting without holding the lock mutex. But
1302 that is OK because this transaction is not waiting and cannot
1303 be rolled back and no new locks can (or should not) be added
1304 becuase it is flagged as a non-locking read-only transaction. */
1305
1306 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1307
1308 /* This state change is not protected by any mutex, therefore
1309 there is an inherent race here around state transition during
1310 printouts. We ignore this race for the sake of efficiency.
1311 However, the trx_sys_t::mutex will protect the trx_t instance
1312 and it cannot be removed from the trx_list and freed
1313 without first acquiring the trx_sys_t::mutex. */
1314
1315 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1316
1317 MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1318
1319 DBUG_LOG("trx", "Autocommit in memory: " << trx);
1320 trx->state = TRX_STATE_NOT_STARTED;
1321 } else {
1322 if (trx->id > 0) {
1323 /* For consistent snapshot, we need to remove current
1324 transaction from rw_trx_hash before doing commit and
1325 releasing locks. */
1326 trx_sys.deregister_rw(trx);
1327 }
1328
1329 lock_trx_release_locks(trx);
1330
1331 /* Remove the transaction from the list of active
1332 transactions now that it no longer holds any user locks. */
1333
1334 ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1335 DEBUG_SYNC_C("after_trx_committed_in_memory");
1336
1337 if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) {
1338 MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1339 } else {
1340 trx_update_mod_tables_timestamp(trx);
1341 MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1342 }
1343 }
1344
1345 ut_ad(!trx->rsegs.m_redo.undo);
1346
1347 if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) {
1348 mutex_enter(&rseg->mutex);
1349 ut_ad(rseg->trx_ref_count > 0);
1350 --rseg->trx_ref_count;
1351 mutex_exit(&rseg->mutex);
1352
1353 if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) {
1354 ut_ad(insert->rseg == rseg);
1355 trx_undo_commit_cleanup(insert, false);
1356 insert = NULL;
1357 }
1358 }
1359
1360 ut_ad(!trx->rsegs.m_redo.old_insert);
1361
1362 if (mtr != NULL) {
1363 if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
1364 ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
1365 trx_undo_commit_cleanup(undo, true);
1366 undo = NULL;
1367 }
1368
1369 /* NOTE that we could possibly make a group commit more
1370 efficient here: call os_thread_yield here to allow also other
1371 trxs to come to commit! */
1372
1373 /*-------------------------------------*/
1374
1375 /* Depending on the my.cnf options, we may now write the log
1376 buffer to the log files, making the transaction durable if
1377 the OS does not crash. We may also flush the log files to
1378 disk, making the transaction durable also at an OS crash or a
1379 power outage.
1380
1381 The idea in InnoDB's group commit is that a group of
1382 transactions gather behind a trx doing a physical disk write
1383 to log files, and when that physical write has been completed,
1384 one of those transactions does a write which commits the whole
1385 group. Note that this group commit will only bring benefit if
1386 there are > 2 users in the database. Then at least 2 users can
1387 gather behind one doing the physical log write to disk.
1388
1389 If we are calling trx_commit() under prepare_commit_mutex, we
1390 will delay possible log write and flush to a separate function
1391 trx_commit_complete_for_mysql(), which is only called when the
1392 thread has released the mutex. This is to make the
1393 group commit algorithm to work. Otherwise, the prepare_commit
1394 mutex would serialize all commits and prevent a group of
1395 transactions from gathering. */
1396
1397 lsn_t lsn = mtr->commit_lsn();
1398
1399 if (lsn == 0) {
1400 /* Nothing to be done. */
1401 } else if (trx->flush_log_later) {
1402 /* Do nothing yet */
1403 trx->must_flush_log_later = true;
1404 } else if (srv_flush_log_at_trx_commit == 0) {
1405 /* Do nothing */
1406 } else {
1407 trx_flush_log_if_needed(lsn, trx);
1408 }
1409
1410 trx->commit_lsn = lsn;
1411
1412 /* Tell server some activity has happened, since the trx
1413 does changes something. Background utility threads like
1414 master thread, purge thread or page_cleaner thread might
1415 have some work to do. */
1416 srv_active_wake_master_thread();
1417 }
1418
1419 ut_ad(!trx->rsegs.m_noredo.undo);
1420
1421 /* Free all savepoints, starting from the first. */
1422 trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1423
1424 trx_roll_savepoints_free(trx, savep);
1425
1426 if (trx->fts_trx != NULL) {
1427 trx_finalize_for_fts(trx, trx->undo_no != 0);
1428 }
1429
1430 trx_mutex_enter(trx);
1431 trx->dict_operation = TRX_DICT_OP_NONE;
1432
1433#ifdef WITH_WSREP
1434 if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) {
1435 trx->lock.was_chosen_as_deadlock_victim = FALSE;
1436 }
1437#endif
1438
1439 DBUG_LOG("trx", "Commit in memory: " << trx);
1440 trx->state = TRX_STATE_NOT_STARTED;
1441
1442 assert_trx_is_free(trx);
1443
1444 trx_init(trx);
1445
1446 trx_mutex_exit(trx);
1447
1448 ut_a(trx->error_state == DB_SUCCESS);
1449 srv_wake_purge_thread_if_not_active();
1450}
1451
1452/** Commit a transaction and a mini-transaction.
1453@param[in,out] trx transaction
1454@param[in,out] mtr mini-transaction (NULL if no modifications) */
1455void trx_commit_low(trx_t* trx, mtr_t* mtr)
1456{
1457 assert_trx_nonlocking_or_in_list(trx);
1458 ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1459 ut_ad(!mtr || mtr->is_active());
1460 ut_d(bool aborted = trx->in_rollback
1461 && trx->error_state == DB_DEADLOCK);
1462 ut_ad(!mtr == (aborted || !trx->has_logged_or_recovered()));
1463 ut_ad(!mtr || !aborted);
1464
1465 /* undo_no is non-zero if we're doing the final commit. */
1466 if (trx->fts_trx != NULL && trx->undo_no != 0) {
1467 dberr_t error;
1468
1469 ut_a(!trx_is_autocommit_non_locking(trx));
1470
1471 error = fts_commit(trx);
1472
1473 /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
1474 instead of dying. This is a possible scenario if there
1475 is a crash between insert to DELETED table committing
1476 and transaction committing. The fix would be able to
1477 return error from this function */
1478 if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
1479 /* FTS-FIXME: once we can return values from this
1480 function, we should do so and signal an error
1481 instead of just dying. */
1482
1483 ut_error;
1484 }
1485 }
1486
1487 if (mtr != NULL) {
1488
1489 mtr->set_sync();
1490
1491 trx_write_serialisation_history(trx, mtr);
1492
1493 /* The following call commits the mini-transaction, making the
1494 whole transaction committed in the file-based world, at this
1495 log sequence number. The transaction becomes 'durable' when
1496 we write the log to disk, but in the logical sense the commit
1497 in the file-based data structures (undo logs etc.) happens
1498 here.
1499
1500 NOTE that transaction numbers, which are assigned only to
1501 transactions with an update undo log, do not necessarily come
1502 in exactly the same order as commit lsn's, if the transactions
1503 have different rollback segments. To get exactly the same
1504 order we should hold the kernel mutex up to this point,
1505 adding to the contention of the kernel mutex. However, if
1506 a transaction T2 is able to see modifications made by
1507 a transaction T1, T2 will always get a bigger transaction
1508 number and a bigger commit lsn than T1. */
1509
1510 /*--------------*/
1511 mtr_commit(mtr);
1512
1513 DBUG_EXECUTE_IF("ib_crash_during_trx_commit_in_mem",
1514 if (trx->has_logged()) {
1515 log_make_checkpoint_at(LSN_MAX, TRUE);
1516 DBUG_SUICIDE();
1517 });
1518 /*--------------*/
1519 }
1520#ifndef DBUG_OFF
1521 /* In case of this function is called from a stack executing
1522 THD::release_resources -> ...
1523 innobase_connection_close() ->
1524 trx_rollback_for_mysql... -> .
1525 mysql's thd does not seem to have
1526 thd->debug_sync_control defined any longer. However the stack
1527 is possible only with a prepared trx not updating any data.
1528 */
1529 if (trx->mysql_thd != NULL && trx->has_logged_persistent()) {
1530 DEBUG_SYNC_C("before_trx_state_committed_in_memory");
1531 }
1532#endif
1533
1534 trx_commit_in_memory(trx, mtr);
1535}
1536
1537/****************************************************************//**
1538Commits a transaction. */
1539void
1540trx_commit(
1541/*=======*/
1542 trx_t* trx) /*!< in/out: transaction */
1543{
1544 mtr_t* mtr;
1545 mtr_t local_mtr;
1546
1547 DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
1548 DBUG_SUICIDE(););
1549
1550 if (trx->has_logged_or_recovered()) {
1551 mtr = &local_mtr;
1552 mtr_start_sync(mtr);
1553 } else {
1554
1555 mtr = NULL;
1556 }
1557
1558 trx_commit_low(trx, mtr);
1559}
1560
1561/****************************************************************//**
1562Prepares a transaction for commit/rollback. */
1563void
1564trx_commit_or_rollback_prepare(
1565/*===========================*/
1566 trx_t* trx) /*!< in/out: transaction */
1567{
1568 /* We are reading trx->state without holding trx_sys.mutex
1569 here, because the commit or rollback should be invoked for a
1570 running (or recovered prepared) transaction that is associated
1571 with the current thread. */
1572
1573 switch (trx->state) {
1574 case TRX_STATE_NOT_STARTED:
1575 trx_start_low(trx, true);
1576 /* fall through */
1577
1578 case TRX_STATE_ACTIVE:
1579 case TRX_STATE_PREPARED:
1580
1581 /* If the trx is in a lock wait state, moves the waiting
1582 query thread to the suspended state */
1583
1584 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1585
1586 ut_a(trx->lock.wait_thr != NULL);
1587 trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
1588 trx->lock.wait_thr = NULL;
1589
1590 trx->lock.que_state = TRX_QUE_RUNNING;
1591 }
1592
1593 ut_a(trx->lock.n_active_thrs == 1);
1594 return;
1595
1596 case TRX_STATE_COMMITTED_IN_MEMORY:
1597 break;
1598 }
1599
1600 ut_error;
1601}
1602
1603/*********************************************************************//**
1604Creates a commit command node struct.
1605@return own: commit node struct */
1606commit_node_t*
1607trx_commit_node_create(
1608/*===================*/
1609 mem_heap_t* heap) /*!< in: mem heap where created */
1610{
1611 commit_node_t* node;
1612
1613 node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
1614 node->common.type = QUE_NODE_COMMIT;
1615 node->state = COMMIT_NODE_SEND;
1616
1617 return(node);
1618}
1619
1620/***********************************************************//**
1621Performs an execution step for a commit type node in a query graph.
1622@return query thread to run next, or NULL */
1623que_thr_t*
1624trx_commit_step(
1625/*============*/
1626 que_thr_t* thr) /*!< in: query thread */
1627{
1628 commit_node_t* node;
1629
1630 node = static_cast<commit_node_t*>(thr->run_node);
1631
1632 ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1633
1634 if (thr->prev_node == que_node_get_parent(node)) {
1635 node->state = COMMIT_NODE_SEND;
1636 }
1637
1638 if (node->state == COMMIT_NODE_SEND) {
1639 trx_t* trx;
1640
1641 node->state = COMMIT_NODE_WAIT;
1642
1643 trx = thr_get_trx(thr);
1644
1645 ut_a(trx->lock.wait_thr == NULL);
1646 ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
1647
1648 trx_commit_or_rollback_prepare(trx);
1649
1650 trx->lock.que_state = TRX_QUE_COMMITTING;
1651
1652 trx_commit(trx);
1653
1654 ut_ad(trx->lock.wait_thr == NULL);
1655
1656 trx->lock.que_state = TRX_QUE_RUNNING;
1657
1658 thr = NULL;
1659 } else {
1660 ut_ad(node->state == COMMIT_NODE_WAIT);
1661
1662 node->state = COMMIT_NODE_SEND;
1663
1664 thr->run_node = que_node_get_parent(node);
1665 }
1666
1667 return(thr);
1668}
1669
1670/**********************************************************************//**
1671Does the transaction commit for MySQL.
1672@return DB_SUCCESS or error number */
1673dberr_t
1674trx_commit_for_mysql(
1675/*=================*/
1676 trx_t* trx) /*!< in/out: transaction */
1677{
1678 /* Because we do not do the commit by sending an Innobase
1679 sig to the transaction, we must here make sure that trx has been
1680 started. */
1681
1682 switch (trx->state) {
1683 case TRX_STATE_NOT_STARTED:
1684 ut_d(trx->start_file = __FILE__);
1685 ut_d(trx->start_line = __LINE__);
1686
1687 trx_start_low(trx, true);
1688 /* fall through */
1689 case TRX_STATE_ACTIVE:
1690 case TRX_STATE_PREPARED:
1691
1692 trx->op_info = "committing";
1693
1694 trx_commit(trx);
1695
1696 MONITOR_DEC(MONITOR_TRX_ACTIVE);
1697 trx->op_info = "";
1698 return(DB_SUCCESS);
1699 case TRX_STATE_COMMITTED_IN_MEMORY:
1700 break;
1701 }
1702 ut_error;
1703 return(DB_CORRUPTION);
1704}
1705
1706/**********************************************************************//**
1707If required, flushes the log to disk if we called trx_commit_for_mysql()
1708with trx->flush_log_later == TRUE. */
1709void
1710trx_commit_complete_for_mysql(
1711/*==========================*/
1712 trx_t* trx) /*!< in/out: transaction */
1713{
1714 if (trx->id != 0
1715 || !trx->must_flush_log_later
1716 || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
1717
1718 return;
1719 }
1720
1721 trx_flush_log_if_needed(trx->commit_lsn, trx);
1722
1723 trx->must_flush_log_later = false;
1724}
1725
1726/**********************************************************************//**
1727Marks the latest SQL statement ended. */
1728void
1729trx_mark_sql_stat_end(
1730/*==================*/
1731 trx_t* trx) /*!< in: trx handle */
1732{
1733 ut_a(trx);
1734
1735 switch (trx->state) {
1736 case TRX_STATE_PREPARED:
1737 case TRX_STATE_COMMITTED_IN_MEMORY:
1738 break;
1739 case TRX_STATE_NOT_STARTED:
1740 trx->undo_no = 0;
1741 /* fall through */
1742 case TRX_STATE_ACTIVE:
1743 trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1744
1745 if (trx->fts_trx != NULL) {
1746 fts_savepoint_laststmt_refresh(trx);
1747 }
1748
1749 return;
1750 }
1751
1752 ut_error;
1753}
1754
1755/**********************************************************************//**
1756Prints info about a transaction. */
1757void
1758trx_print_low(
1759/*==========*/
1760 FILE* f,
1761 /*!< in: output stream */
1762 const trx_t* trx,
1763 /*!< in: transaction */
1764 ulint max_query_len,
1765 /*!< in: max query length to print,
1766 or 0 to use the default max length */
1767 ulint n_rec_locks,
1768 /*!< in: lock_number_of_rows_locked(&trx->lock) */
1769 ulint n_trx_locks,
1770 /*!< in: length of trx->lock.trx_locks */
1771 ulint heap_size)
1772 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
1773{
1774 ibool newline;
1775 const char* op_info;
1776
1777 fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
1778
1779 /* trx->state cannot change from or to NOT_STARTED while we
1780 are holding the trx_sys.mutex. It may change from ACTIVE to
1781 PREPARED or COMMITTED. */
1782 switch (trx->state) {
1783 case TRX_STATE_NOT_STARTED:
1784 fputs(", not started", f);
1785 goto state_ok;
1786 case TRX_STATE_ACTIVE:
1787 fprintf(f, ", ACTIVE %lu sec",
1788 (ulong) difftime(time(NULL), trx->start_time));
1789 goto state_ok;
1790 case TRX_STATE_PREPARED:
1791 fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1792 (ulong) difftime(time(NULL), trx->start_time));
1793 goto state_ok;
1794 case TRX_STATE_COMMITTED_IN_MEMORY:
1795 fputs(", COMMITTED IN MEMORY", f);
1796 goto state_ok;
1797 }
1798 fprintf(f, ", state %lu", (ulong) trx->state);
1799 ut_ad(0);
1800state_ok:
1801
1802 /* prevent a race condition */
1803 op_info = trx->op_info;
1804
1805 if (*op_info) {
1806 putc(' ', f);
1807 fputs(op_info, f);
1808 }
1809
1810 if (trx->is_recovered) {
1811 fputs(" recovered trx", f);
1812 }
1813
1814 if (trx->declared_to_be_inside_innodb) {
1815 fprintf(f, ", thread declared inside InnoDB %lu",
1816 (ulong) trx->n_tickets_to_enter_innodb);
1817 }
1818
1819 putc('\n', f);
1820
1821 if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1822 fprintf(f, "mysql tables in use %lu, locked %lu\n",
1823 (ulong) trx->n_mysql_tables_in_use,
1824 (ulong) trx->mysql_n_tables_locked);
1825 }
1826
1827 newline = TRUE;
1828
1829 /* trx->lock.que_state of an ACTIVE transaction may change
1830 while we are not holding trx->mutex. We perform a dirty read
1831 for performance reasons. */
1832
1833 switch (trx->lock.que_state) {
1834 case TRX_QUE_RUNNING:
1835 newline = FALSE; break;
1836 case TRX_QUE_LOCK_WAIT:
1837 fputs("LOCK WAIT ", f); break;
1838 case TRX_QUE_ROLLING_BACK:
1839 fputs("ROLLING BACK ", f); break;
1840 case TRX_QUE_COMMITTING:
1841 fputs("COMMITTING ", f); break;
1842 default:
1843 fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1844 }
1845
1846 if (n_trx_locks > 0 || heap_size > 400) {
1847 newline = TRUE;
1848
1849 fprintf(f, "%lu lock struct(s), heap size %lu,"
1850 " %lu row lock(s)",
1851 (ulong) n_trx_locks,
1852 (ulong) heap_size,
1853 (ulong) n_rec_locks);
1854 }
1855
1856 if (trx->undo_no != 0) {
1857 newline = TRUE;
1858 fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
1859 }
1860
1861 if (newline) {
1862 putc('\n', f);
1863 }
1864
1865 if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL) {
1866 innobase_mysql_print_thd(
1867 f, trx->mysql_thd, static_cast<uint>(max_query_len));
1868 }
1869}
1870
1871/**********************************************************************//**
1872Prints info about a transaction.
1873The caller must hold lock_sys.mutex.
1874When possible, use trx_print() instead. */
1875void
1876trx_print_latched(
1877/*==============*/
1878 FILE* f, /*!< in: output stream */
1879 const trx_t* trx, /*!< in: transaction */
1880 ulint max_query_len) /*!< in: max query length to print,
1881 or 0 to use the default max length */
1882{
1883 ut_ad(lock_mutex_own());
1884
1885 trx_print_low(f, trx, max_query_len,
1886 lock_number_of_rows_locked(&trx->lock),
1887 UT_LIST_GET_LEN(trx->lock.trx_locks),
1888 mem_heap_get_size(trx->lock.lock_heap));
1889}
1890
1891/**********************************************************************//**
1892Prints info about a transaction.
1893Acquires and releases lock_sys.mutex. */
1894void
1895trx_print(
1896/*======*/
1897 FILE* f, /*!< in: output stream */
1898 const trx_t* trx, /*!< in: transaction */
1899 ulint max_query_len) /*!< in: max query length to print,
1900 or 0 to use the default max length */
1901{
1902 ulint n_rec_locks;
1903 ulint n_trx_locks;
1904 ulint heap_size;
1905
1906 lock_mutex_enter();
1907 n_rec_locks = lock_number_of_rows_locked(&trx->lock);
1908 n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
1909 heap_size = mem_heap_get_size(trx->lock.lock_heap);
1910 lock_mutex_exit();
1911
1912 trx_print_low(f, trx, max_query_len,
1913 n_rec_locks, n_trx_locks, heap_size);
1914}
1915
1916/*******************************************************************//**
1917Compares the "weight" (or size) of two transactions. Transactions that
1918have edited non-transactional tables are considered heavier than ones
1919that have not.
1920@return TRUE if weight(a) >= weight(b) */
1921bool
1922trx_weight_ge(
1923/*==========*/
1924 const trx_t* a, /*!< in: transaction to be compared */
1925 const trx_t* b) /*!< in: transaction to be compared */
1926{
1927 ibool a_notrans_edit;
1928 ibool b_notrans_edit;
1929
1930 /* If mysql_thd is NULL for a transaction we assume that it has
1931 not edited non-transactional tables. */
1932
1933 a_notrans_edit = a->mysql_thd != NULL
1934 && thd_has_edited_nontrans_tables(a->mysql_thd);
1935
1936 b_notrans_edit = b->mysql_thd != NULL
1937 && thd_has_edited_nontrans_tables(b->mysql_thd);
1938
1939 if (a_notrans_edit != b_notrans_edit) {
1940
1941 return(a_notrans_edit);
1942 }
1943
1944 /* Either both had edited non-transactional tables or both had
1945 not, we fall back to comparing the number of altered/locked
1946 rows. */
1947
1948 return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
1949}
1950
1951/** Prepare a transaction.
1952@return log sequence number that makes the XA PREPARE durable
1953@retval 0 if no changes needed to be made durable */
1954static
1955lsn_t
1956trx_prepare_low(trx_t* trx)
1957{
1958 ut_ad(!trx->rsegs.m_redo.old_insert);
1959 ut_ad(!trx->is_recovered);
1960
1961 mtr_t mtr;
1962
1963 if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
1964 ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
1965
1966 mtr.start();
1967 mtr.set_log_mode(MTR_LOG_NO_REDO);
1968
1969 mutex_enter(&undo->rseg->mutex);
1970 trx_undo_set_state_at_prepare(trx, undo, false, &mtr);
1971 mutex_exit(&undo->rseg->mutex);
1972
1973 mtr.commit();
1974 }
1975
1976 trx_undo_t* undo = trx->rsegs.m_redo.undo;
1977
1978 if (!undo) {
1979 /* There were no changes to persistent tables. */
1980 return(0);
1981 }
1982
1983 trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
1984 ut_ad(undo->rseg == rseg);
1985
1986 mtr.start(true);
1987
1988 /* Change the undo log segment states from TRX_UNDO_ACTIVE to
1989 TRX_UNDO_PREPARED: these modifications to the file data
1990 structure define the transaction as prepared in the file-based
1991 world, at the serialization point of lsn. */
1992
1993 mutex_enter(&rseg->mutex);
1994 trx_undo_set_state_at_prepare(trx, undo, false, &mtr);
1995 mutex_exit(&rseg->mutex);
1996
1997 /* Make the XA PREPARE durable. */
1998 mtr.commit();
1999 ut_ad(mtr.commit_lsn() > 0);
2000 return(mtr.commit_lsn());
2001}
2002
2003/****************************************************************//**
2004Prepares a transaction. */
2005static
2006void
2007trx_prepare(
2008/*========*/
2009 trx_t* trx) /*!< in/out: transaction */
2010{
2011 /* Only fresh user transactions can be prepared.
2012 Recovered transactions cannot. */
2013 ut_a(!trx->is_recovered);
2014
2015 lsn_t lsn = trx_prepare_low(trx);
2016
2017 DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
2018
2019 ut_a(trx->state == TRX_STATE_ACTIVE);
2020 trx_mutex_enter(trx);
2021 trx->state = TRX_STATE_PREPARED;
2022 trx_mutex_exit(trx);
2023
2024 if (lsn) {
2025 /* Depending on the my.cnf options, we may now write the log
2026 buffer to the log files, making the prepared state of the
2027 transaction durable if the OS does not crash. We may also
2028 flush the log files to disk, making the prepared state of the
2029 transaction durable also at an OS crash or a power outage.
2030
2031 The idea in InnoDB's group prepare is that a group of
2032 transactions gather behind a trx doing a physical disk write
2033 to log files, and when that physical write has been completed,
2034 one of those transactions does a write which prepares the whole
2035 group. Note that this group prepare will only bring benefit if
2036 there are > 2 users in the database. Then at least 2 users can
2037 gather behind one doing the physical log write to disk.
2038
2039 We must not be holding any mutexes or latches here. */
2040
2041 trx_flush_log_if_needed(lsn, trx);
2042 }
2043}
2044
2045/** XA PREPARE a transaction.
2046@param[in,out] trx transaction to prepare */
2047void trx_prepare_for_mysql(trx_t* trx)
2048{
2049 trx_start_if_not_started_xa(trx, false);
2050
2051 trx->op_info = "preparing";
2052
2053 trx_prepare(trx);
2054
2055 trx->op_info = "";
2056}
2057
2058
2059struct trx_recover_for_mysql_callback_arg
2060{
2061 XID *xid_list;
2062 uint len;
2063 uint count;
2064};
2065
2066
2067static my_bool trx_recover_for_mysql_callback(rw_trx_hash_element_t *element,
2068 trx_recover_for_mysql_callback_arg *arg)
2069{
2070 mutex_enter(&element->mutex);
2071 if (trx_t *trx= element->trx)
2072 {
2073 /*
2074 The state of a read-write transaction can only change from ACTIVE to
2075 PREPARED while we are holding the element->mutex. But since it is
2076 executed at startup no state change should occur.
2077 */
2078 if (trx_state_eq(trx, TRX_STATE_PREPARED))
2079 {
2080 ut_ad(trx->is_recovered);
2081 if (arg->count == 0)
2082 ib::info() << "Starting recovery for XA transactions...";
2083 ib::info() << "Transaction " << trx_get_id_for_print(trx)
2084 << " in prepared state after recovery";
2085 ib::info() << "Transaction contains changes to " << trx->undo_no
2086 << " rows";
2087 arg->xid_list[arg->count++]= *trx->xid;
2088 }
2089 }
2090 mutex_exit(&element->mutex);
2091 return arg->count == arg->len;
2092}
2093
2094
2095/**
2096 Find prepared transaction objects for recovery.
2097
2098 @param[out] xid_list prepared transactions
2099 @param[in] len number of slots in xid_list
2100
2101 @return number of prepared transactions stored in xid_list
2102*/
2103
2104int trx_recover_for_mysql(XID *xid_list, uint len)
2105{
2106 trx_recover_for_mysql_callback_arg arg= { xid_list, len, 0 };
2107
2108 ut_ad(xid_list);
2109 ut_ad(len);
2110
2111 /* Fill xid_list with PREPARED transactions. */
2112 trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
2113 (trx_recover_for_mysql_callback), &arg);
2114 if (arg.count)
2115 ib::info() << arg.count
2116 << " transactions in prepared state after recovery";
2117 return int(arg.count);
2118}
2119
2120
2121struct trx_get_trx_by_xid_callback_arg
2122{
2123 XID *xid;
2124 trx_t *trx;
2125};
2126
2127
2128static my_bool trx_get_trx_by_xid_callback(rw_trx_hash_element_t *element,
2129 trx_get_trx_by_xid_callback_arg *arg)
2130{
2131 my_bool found= 0;
2132 mutex_enter(&element->mutex);
2133 if (trx_t *trx= element->trx)
2134 {
2135 if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) &&
2136 arg->xid->eq(reinterpret_cast<XID*>(trx->xid)))
2137 {
2138 /* Invalidate the XID, so that subsequent calls will not find it. */
2139 trx->xid->null();
2140 arg->trx= trx;
2141 found= 1;
2142 }
2143 }
2144 mutex_exit(&element->mutex);
2145 return found;
2146}
2147
2148
2149/**
2150 Finds PREPARED XA transaction by xid.
2151
2152 trx may have been committed, unless the caller is holding lock_sys.mutex.
2153
2154 @param[in] xid X/Open XA transaction identifier
2155
2156 @return trx or NULL; on match, the trx->xid will be invalidated;
2157*/
2158
2159trx_t *trx_get_trx_by_xid(XID *xid)
2160{
2161 trx_get_trx_by_xid_callback_arg arg= { xid, 0 };
2162
2163 if (xid)
2164 trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
2165 (trx_get_trx_by_xid_callback), &arg);
2166 return arg.trx;
2167}
2168
2169
2170/*************************************************************//**
2171Starts the transaction if it is not yet started. */
2172void
2173trx_start_if_not_started_xa_low(
2174/*============================*/
2175 trx_t* trx, /*!< in/out: transaction */
2176 bool read_write) /*!< in: true if read write transaction */
2177{
2178 switch (trx->state) {
2179 case TRX_STATE_NOT_STARTED:
2180 trx_start_low(trx, read_write);
2181 return;
2182
2183 case TRX_STATE_ACTIVE:
2184 if (trx->id == 0 && read_write) {
2185 /* If the transaction is tagged as read-only then
2186 it can only write to temp tables and for such
2187 transactions we don't want to move them to the
2188 trx_sys_t::rw_trx_hash. */
2189 if (!trx->read_only) {
2190 trx_set_rw_mode(trx);
2191 }
2192 }
2193 return;
2194 case TRX_STATE_PREPARED:
2195 case TRX_STATE_COMMITTED_IN_MEMORY:
2196 break;
2197 }
2198
2199 ut_error;
2200}
2201
2202/*************************************************************//**
2203Starts the transaction if it is not yet started. */
2204void
2205trx_start_if_not_started_low(
2206/*==========================*/
2207 trx_t* trx, /*!< in: transaction */
2208 bool read_write) /*!< in: true if read write transaction */
2209{
2210 switch (trx->state) {
2211 case TRX_STATE_NOT_STARTED:
2212 trx_start_low(trx, read_write);
2213 return;
2214
2215 case TRX_STATE_ACTIVE:
2216 if (read_write && trx->id == 0 && !trx->read_only) {
2217 trx_set_rw_mode(trx);
2218 }
2219 return;
2220
2221 case TRX_STATE_PREPARED:
2222 case TRX_STATE_COMMITTED_IN_MEMORY:
2223 break;
2224 }
2225
2226 ut_error;
2227}
2228
2229/*************************************************************//**
2230Starts a transaction for internal processing. */
2231void
2232trx_start_internal_low(
2233/*===================*/
2234 trx_t* trx) /*!< in/out: transaction */
2235{
2236 /* Ensure it is not flagged as an auto-commit-non-locking
2237 transaction. */
2238
2239 trx->will_lock = 1;
2240
2241 trx->internal = true;
2242
2243 trx_start_low(trx, true);
2244}
2245
2246/** Starts a read-only transaction for internal processing.
2247@param[in,out] trx transaction to be started */
2248void
2249trx_start_internal_read_only_low(
2250 trx_t* trx)
2251{
2252 /* Ensure it is not flagged as an auto-commit-non-locking
2253 transaction. */
2254
2255 trx->will_lock = 1;
2256
2257 trx->internal = true;
2258
2259 trx_start_low(trx, false);
2260}
2261
2262/*************************************************************//**
2263Starts the transaction for a DDL operation. */
2264void
2265trx_start_for_ddl_low(
2266/*==================*/
2267 trx_t* trx, /*!< in/out: transaction */
2268 trx_dict_op_t op) /*!< in: dictionary operation type */
2269{
2270 switch (trx->state) {
2271 case TRX_STATE_NOT_STARTED:
2272 /* Flag this transaction as a dictionary operation, so that
2273 the data dictionary will be locked in crash recovery. */
2274
2275 trx_set_dict_operation(trx, op);
2276
2277 /* Ensure it is not flagged as an auto-commit-non-locking
2278 transation. */
2279 trx->will_lock = 1;
2280
2281 trx->ddl= true;
2282
2283 trx_start_internal_low(trx);
2284 return;
2285
2286 case TRX_STATE_ACTIVE:
2287 case TRX_STATE_PREPARED:
2288 case TRX_STATE_COMMITTED_IN_MEMORY:
2289 break;
2290 }
2291
2292 ut_error;
2293}
2294
2295/*************************************************************//**
2296Set the transaction as a read-write transaction if it is not already
2297tagged as such. Read-only transactions that are writing to temporary
2298tables are assigned an ID and a rollback segment but are not added
2299to the trx read-write list because their updates should not be visible
2300to other transactions and therefore their changes can be ignored by
2301by MVCC. */
2302void
2303trx_set_rw_mode(
2304/*============*/
2305 trx_t* trx) /*!< in/out: transaction that is RW */
2306{
2307 ut_ad(trx->rsegs.m_redo.rseg == 0);
2308 ut_ad(!trx_is_autocommit_non_locking(trx));
2309 ut_ad(!trx->read_only);
2310 ut_ad(trx->id == 0);
2311
2312 if (high_level_read_only) {
2313 return;
2314 }
2315
2316 /* Function is promoting existing trx from ro mode to rw mode.
2317 In this process it has acquired trx_sys.mutex as it plan to
2318 move trx from ro list to rw list. If in future, some other thread
2319 looks at this trx object while it is being promoted then ensure
2320 that both threads are synced by acquring trx->mutex to avoid decision
2321 based on in-consistent view formed during promotion. */
2322
2323 trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
2324 ut_ad(trx->rsegs.m_redo.rseg != 0);
2325
2326 trx_sys.register_rw(trx);
2327
2328 /* So that we can see our own changes. */
2329 if (trx->read_view.is_open()) {
2330 trx->read_view.set_creator_trx_id(trx->id);
2331 }
2332}
2333