1/*****************************************************************************
2
3Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file include/trx0trx.h
22The transaction
23
24Created 3/26/1996 Heikki Tuuri
25*******************************************************/
26
27#ifndef trx0trx_h
28#define trx0trx_h
29
30#include <set>
31
32#include "ha_prototypes.h"
33
34#include "dict0types.h"
35#include "trx0types.h"
36
37#include "lock0types.h"
38#include "log0log.h"
39#include "que0types.h"
40#include "mem0mem.h"
41#include "trx0xa.h"
42#include "ut0vec.h"
43#include "fts0fts.h"
44#include "read0types.h"
45
46// Forward declaration
47struct mtr_t;
48
49// Forward declaration
50class FlushObserver;
51
52struct rw_trx_hash_element_t;
53
54/** Set flush observer for the transaction
55@param[in/out] trx transaction struct
56@param[in] observer flush observer */
57void
58trx_set_flush_observer(
59 trx_t* trx,
60 FlushObserver* observer);
61
62/******************************************************************//**
63Set detailed error message for the transaction. */
64void
65trx_set_detailed_error(
66/*===================*/
67 trx_t* trx, /*!< in: transaction struct */
68 const char* msg); /*!< in: detailed error message */
69/*************************************************************//**
70Set detailed error message for the transaction from a file. Note that the
71file is rewinded before reading from it. */
72void
73trx_set_detailed_error_from_file(
74/*=============================*/
75 trx_t* trx, /*!< in: transaction struct */
76 FILE* file); /*!< in: file to read message from */
77/****************************************************************//**
78Retrieves the error_info field from a trx.
79@return the error info */
80UNIV_INLINE
81const dict_index_t*
82trx_get_error_info(
83/*===============*/
84 const trx_t* trx); /*!< in: trx object */
85
86/** @return a trx_t instance from trx_pools. */
87trx_t *trx_create();
88
89/**
90 Release a trx_t instance back to the pool.
91 @param trx the instance to release.
92*/
93void trx_free(trx_t*& trx);
94
95/** At shutdown, frees a transaction object. */
96void
97trx_free_at_shutdown(trx_t *trx);
98
99/** Disconnect a prepared transaction from MySQL.
100@param[in,out] trx transaction */
101void
102trx_disconnect_prepared(trx_t* trx);
103
104/** Initialize (resurrect) transactions at startup. */
105void
106trx_lists_init_at_db_start();
107
108/*************************************************************//**
109Starts the transaction if it is not yet started. */
110void
111trx_start_if_not_started_xa_low(
112/*============================*/
113 trx_t* trx, /*!< in/out: transaction */
114 bool read_write); /*!< in: true if read write transaction */
115/*************************************************************//**
116Starts the transaction if it is not yet started. */
117void
118trx_start_if_not_started_low(
119/*=========================*/
120 trx_t* trx, /*!< in/out: transaction */
121 bool read_write); /*!< in: true if read write transaction */
122
123/*************************************************************//**
124Starts a transaction for internal processing. */
125void
126trx_start_internal_low(
127/*===================*/
128 trx_t* trx); /*!< in/out: transaction */
129
130/** Starts a read-only transaction for internal processing.
131@param[in,out] trx transaction to be started */
132void
133trx_start_internal_read_only_low(
134 trx_t* trx);
135
136#ifdef UNIV_DEBUG
137#define trx_start_if_not_started_xa(t, rw) \
138 do { \
139 (t)->start_line = __LINE__; \
140 (t)->start_file = __FILE__; \
141 trx_start_if_not_started_xa_low((t), rw); \
142 } while (false)
143
144#define trx_start_if_not_started(t, rw) \
145 do { \
146 (t)->start_line = __LINE__; \
147 (t)->start_file = __FILE__; \
148 trx_start_if_not_started_low((t), rw); \
149 } while (false)
150
151#define trx_start_internal(t) \
152 do { \
153 (t)->start_line = __LINE__; \
154 (t)->start_file = __FILE__; \
155 trx_start_internal_low((t)); \
156 } while (false)
157
158#define trx_start_internal_read_only(t) \
159 do { \
160 (t)->start_line = __LINE__; \
161 (t)->start_file = __FILE__; \
162 trx_start_internal_read_only_low(t); \
163 } while (false)
164#else
165#define trx_start_if_not_started(t, rw) \
166 trx_start_if_not_started_low((t), rw)
167
168#define trx_start_internal(t) \
169 trx_start_internal_low((t))
170
171#define trx_start_internal_read_only(t) \
172 trx_start_internal_read_only_low(t)
173
174#define trx_start_if_not_started_xa(t, rw) \
175 trx_start_if_not_started_xa_low((t), (rw))
176#endif /* UNIV_DEBUG */
177
178/*************************************************************//**
179Starts the transaction for a DDL operation. */
180void
181trx_start_for_ddl_low(
182/*==================*/
183 trx_t* trx, /*!< in/out: transaction */
184 trx_dict_op_t op); /*!< in: dictionary operation type */
185
186#ifdef UNIV_DEBUG
187#define trx_start_for_ddl(t, o) \
188 do { \
189 ut_ad((t)->start_file == 0); \
190 (t)->start_line = __LINE__; \
191 (t)->start_file = __FILE__; \
192 trx_start_for_ddl_low((t), (o)); \
193 } while (0)
194#else
195#define trx_start_for_ddl(t, o) \
196 trx_start_for_ddl_low((t), (o))
197#endif /* UNIV_DEBUG */
198
199/****************************************************************//**
200Commits a transaction. */
201void
202trx_commit(
203/*=======*/
204 trx_t* trx); /*!< in/out: transaction */
205
206/** Commit a transaction and a mini-transaction.
207@param[in,out] trx transaction
208@param[in,out] mtr mini-transaction (NULL if no modifications) */
209void trx_commit_low(trx_t* trx, mtr_t* mtr);
210/**********************************************************************//**
211Does the transaction commit for MySQL.
212@return DB_SUCCESS or error number */
213dberr_t
214trx_commit_for_mysql(
215/*=================*/
216 trx_t* trx); /*!< in/out: transaction */
217/** XA PREPARE a transaction.
218@param[in,out] trx transaction to prepare */
219void trx_prepare_for_mysql(trx_t* trx);
220/**********************************************************************//**
221This function is used to find number of prepared transactions and
222their transaction objects for a recovery.
223@return number of prepared transactions */
224int
225trx_recover_for_mysql(
226/*==================*/
227 XID* xid_list, /*!< in/out: prepared transactions */
228 uint len); /*!< in: number of slots in xid_list */
229/*******************************************************************//**
230This function is used to find one X/Open XA distributed transaction
231which is in the prepared state
232@return trx or NULL; on match, the trx->xid will be invalidated;
233note that the trx may have been committed, unless the caller is
234holding lock_sys.mutex */
235trx_t *
236trx_get_trx_by_xid(
237/*===============*/
238 XID* xid); /*!< in: X/Open XA transaction identifier */
239/**********************************************************************//**
240If required, flushes the log to disk if we called trx_commit_for_mysql()
241with trx->flush_log_later == TRUE. */
242void
243trx_commit_complete_for_mysql(
244/*==========================*/
245 trx_t* trx); /*!< in/out: transaction */
246/**********************************************************************//**
247Marks the latest SQL statement ended. */
248void
249trx_mark_sql_stat_end(
250/*==================*/
251 trx_t* trx); /*!< in: trx handle */
252/****************************************************************//**
253Prepares a transaction for commit/rollback. */
254void
255trx_commit_or_rollback_prepare(
256/*===========================*/
257 trx_t* trx); /*!< in/out: transaction */
258/*********************************************************************//**
259Creates a commit command node struct.
260@return own: commit node struct */
261commit_node_t*
262trx_commit_node_create(
263/*===================*/
264 mem_heap_t* heap); /*!< in: mem heap where created */
265/***********************************************************//**
266Performs an execution step for a commit type node in a query graph.
267@return query thread to run next, or NULL */
268que_thr_t*
269trx_commit_step(
270/*============*/
271 que_thr_t* thr); /*!< in: query thread */
272
273/**********************************************************************//**
274Prints info about a transaction.
275Caller must hold trx_sys.mutex. */
276void
277trx_print_low(
278/*==========*/
279 FILE* f,
280 /*!< in: output stream */
281 const trx_t* trx,
282 /*!< in: transaction */
283 ulint max_query_len,
284 /*!< in: max query length to print,
285 or 0 to use the default max length */
286 ulint n_rec_locks,
287 /*!< in: lock_number_of_rows_locked(&trx->lock) */
288 ulint n_trx_locks,
289 /*!< in: length of trx->lock.trx_locks */
290 ulint heap_size);
291 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
292
293/**********************************************************************//**
294Prints info about a transaction.
295The caller must hold lock_sys.mutex and trx_sys.mutex.
296When possible, use trx_print() instead. */
297void
298trx_print_latched(
299/*==============*/
300 FILE* f, /*!< in: output stream */
301 const trx_t* trx, /*!< in: transaction */
302 ulint max_query_len); /*!< in: max query length to print,
303 or 0 to use the default max length */
304
305/**********************************************************************//**
306Prints info about a transaction.
307Acquires and releases lock_sys.mutex. */
308void
309trx_print(
310/*======*/
311 FILE* f, /*!< in: output stream */
312 const trx_t* trx, /*!< in: transaction */
313 ulint max_query_len); /*!< in: max query length to print,
314 or 0 to use the default max length */
315
316/**********************************************************************//**
317Determine if a transaction is a dictionary operation.
318@return dictionary operation mode */
319UNIV_INLINE
320enum trx_dict_op_t
321trx_get_dict_operation(
322/*===================*/
323 const trx_t* trx) /*!< in: transaction */
324 MY_ATTRIBUTE((warn_unused_result));
325/**********************************************************************//**
326Flag a transaction a dictionary operation. */
327UNIV_INLINE
328void
329trx_set_dict_operation(
330/*===================*/
331 trx_t* trx, /*!< in/out: transaction */
332 enum trx_dict_op_t op); /*!< in: operation, not
333 TRX_DICT_OP_NONE */
334
335/**********************************************************************//**
336Determines if a transaction is in the given state.
337The caller must hold trx_sys.mutex, or it must be the thread
338that is serving a running transaction.
339A running RW transaction must be in trx_sys.rw_trx_hash.
340@return TRUE if trx->state == state */
341UNIV_INLINE
342bool
343trx_state_eq(
344/*=========*/
345 const trx_t* trx, /*!< in: transaction */
346 trx_state_t state, /*!< in: state;
347 if state != TRX_STATE_NOT_STARTED
348 asserts that
349 trx->state != TRX_STATE_NOT_STARTED */
350 bool relaxed = false)
351 /*!< in: whether to allow
352 trx->state == TRX_STATE_NOT_STARTED
353 after an error has been reported */
354 MY_ATTRIBUTE((nonnull, warn_unused_result));
355
356/**********************************************************************//**
357Determines if the currently running transaction has been interrupted.
358@return true if interrupted */
359bool
360trx_is_interrupted(
361/*===============*/
362 const trx_t* trx); /*!< in: transaction */
363/**********************************************************************//**
364Determines if the currently running transaction is in strict mode.
365@return TRUE if strict */
366ibool
367trx_is_strict(
368/*==========*/
369 trx_t* trx); /*!< in: transaction */
370
371/*******************************************************************//**
372Calculates the "weight" of a transaction. The weight of one transaction
373is estimated as the number of altered rows + the number of locked rows.
374@param t transaction
375@return transaction weight */
376#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
377
378/*******************************************************************//**
379Compares the "weight" (or size) of two transactions. Transactions that
380have edited non-transactional tables are considered heavier than ones
381that have not.
382@return true if weight(a) >= weight(b) */
383bool
384trx_weight_ge(
385/*==========*/
386 const trx_t* a, /*!< in: the transaction to be compared */
387 const trx_t* b); /*!< in: the transaction to be compared */
388/* Maximum length of a string that can be returned by
389trx_get_que_state_str(). */
390#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
391
392/*******************************************************************//**
393Retrieves transaction's que state in a human readable string. The string
394should not be free()'d or modified.
395@return string in the data segment */
396UNIV_INLINE
397const char*
398trx_get_que_state_str(
399/*==================*/
400 const trx_t* trx); /*!< in: transaction */
401
402/** Retreieves the transaction ID.
403In a given point in time it is guaranteed that IDs of the running
404transactions are unique. The values returned by this function for readonly
405transactions may be reused, so a subsequent RO transaction may get the same ID
406as a RO transaction that existed in the past. The values returned by this
407function should be used for printing purposes only.
408@param[in] trx transaction whose id to retrieve
409@return transaction id */
410UNIV_INLINE
411trx_id_t
412trx_get_id_for_print(
413 const trx_t* trx);
414
415/** Create the trx_t pool */
416void
417trx_pool_init();
418
419/** Destroy the trx_t pool */
420void
421trx_pool_close();
422
423/**
424Set the transaction as a read-write transaction if it is not already
425tagged as such.
426@param[in,out] trx Transaction that needs to be "upgraded" to RW from RO */
427void
428trx_set_rw_mode(
429 trx_t* trx);
430
431/**
432Transactions that aren't started by the MySQL server don't set
433the trx_t::mysql_thd field. For such transactions we set the lock
434wait timeout to 0 instead of the user configured value that comes
435from innodb_lock_wait_timeout via trx_t::mysql_thd.
436@param trx transaction
437@return lock wait timeout in seconds */
438#define trx_lock_wait_timeout_get(t) \
439 ((t)->mysql_thd != NULL \
440 ? thd_lock_wait_timeout((t)->mysql_thd) \
441 : 0)
442
443/**
444Determine if the transaction is a non-locking autocommit select
445(implied read-only).
446@param t transaction
447@return true if non-locking autocommit select transaction. */
448#define trx_is_autocommit_non_locking(t) \
449((t)->auto_commit && (t)->will_lock == 0)
450
451/**
452Determine if the transaction is a non-locking autocommit select
453with an explicit check for the read-only status.
454@param t transaction
455@return true if non-locking autocommit read-only transaction. */
456#define trx_is_ac_nl_ro(t) \
457((t)->read_only && trx_is_autocommit_non_locking((t)))
458
459/**
460Check transaction state */
461#define check_trx_state(t) do { \
462 ut_ad(!trx_is_autocommit_non_locking((t))); \
463 switch ((t)->state) { \
464 case TRX_STATE_PREPARED: \
465 /* fall through */ \
466 case TRX_STATE_ACTIVE: \
467 case TRX_STATE_COMMITTED_IN_MEMORY: \
468 continue; \
469 case TRX_STATE_NOT_STARTED: \
470 break; \
471 } \
472 ut_error; \
473} while (0)
474
475/** Check if transaction is free so that it can be re-initialized.
476@param t transaction handle */
477#define assert_trx_is_free(t) do { \
478 ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)); \
479 ut_ad(!trx->has_logged()); \
480 ut_ad(!(t)->read_view.is_open()); \
481 ut_ad((t)->lock.wait_thr == NULL); \
482 ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \
483 ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \
484} while(0)
485
486/** Check if transaction is in-active so that it can be freed and put back to
487transaction pool.
488@param t transaction handle */
489#define assert_trx_is_inactive(t) do { \
490 assert_trx_is_free((t)); \
491 ut_ad((t)->dict_operation_lock_mode == 0); \
492} while(0)
493
494#ifdef UNIV_DEBUG
495/*******************************************************************//**
496Assert that an autocommit non-locking select cannot be in the
497rw_trx_hash and that it is a read-only transaction.
498The transaction must have mysql_thd assigned. */
499# define assert_trx_nonlocking_or_in_list(t) \
500 do { \
501 if (trx_is_autocommit_non_locking(t)) { \
502 trx_state_t t_state = (t)->state; \
503 ut_ad((t)->read_only); \
504 ut_ad(!(t)->is_recovered); \
505 ut_ad((t)->mysql_thd); \
506 ut_ad(t_state == TRX_STATE_NOT_STARTED \
507 || t_state == TRX_STATE_ACTIVE); \
508 } else { \
509 check_trx_state(t); \
510 } \
511 } while (0)
512#else /* UNIV_DEBUG */
513/*******************************************************************//**
514Assert that an autocommit non-locking slect cannot be in the
515rw_trx_hash and that it is a read-only transaction.
516The transaction must have mysql_thd assigned. */
517# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
518#endif /* UNIV_DEBUG */
519
520typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_pool_t;
521
522/*******************************************************************//**
523Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
524captures the state of the query thread during the execution of a query.
525This is different from a transaction state. The query state of a transaction
526can be updated asynchronously by other threads. The other threads can be
527system threads, like the timeout monitor thread or user threads executing
528other queries. Another thing to be mindful of is that there is a delay between
529when a query thread is put into LOCK_WAIT state and before it actually starts
530waiting. Between these two events it is possible that the query thread is
531granted the lock it was waiting for, which implies that the state can be changed
532asynchronously.
533
534All these operations take place within the context of locking. Therefore state
535changes within the locking code must acquire both the lock mutex and the
536trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
537trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
538to only acquire the trx->mutex.
539To query the state either of the mutexes is sufficient within the locking
540code and no mutex is required when the query thread is no longer waiting. */
541
542/** The locks and state of an active transaction. Protected by
543lock_sys.mutex, trx->mutex or both. */
544struct trx_lock_t {
545 ulint n_active_thrs; /*!< number of active query threads */
546
547 trx_que_t que_state; /*!< valid when trx->state
548 == TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
549 TRX_QUE_LOCK_WAIT, ... */
550
551 lock_t* wait_lock; /*!< if trx execution state is
552 TRX_QUE_LOCK_WAIT, this points to
553 the lock request, otherwise this is
554 NULL; set to non-NULL when holding
555 both trx->mutex and lock_sys.mutex;
556 set to NULL when holding
557 lock_sys.mutex; readers should
558 hold lock_sys.mutex, except when
559 they are holding trx->mutex and
560 wait_lock==NULL */
561 ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
562 to and checked against lock_mark_counter
563 by lock_deadlock_recursive(). */
564 bool was_chosen_as_deadlock_victim;
565 /*!< when the transaction decides to
566 wait for a lock, it sets this to false;
567 if another transaction chooses this
568 transaction as a victim in deadlock
569 resolution, it sets this to true.
570 Protected by trx->mutex. */
571 time_t wait_started; /*!< lock wait started at this time,
572 protected only by lock_sys.mutex */
573
574 que_thr_t* wait_thr; /*!< query thread belonging to this
575 trx that is in QUE_THR_LOCK_WAIT
576 state. For threads suspended in a
577 lock wait, this is protected by
578 lock_sys.mutex. Otherwise, this may
579 only be modified by the thread that is
580 serving the running transaction. */
581
582 lock_pool_t rec_pool; /*!< Pre-allocated record locks */
583
584 lock_pool_t table_pool; /*!< Pre-allocated table locks */
585
586 ulint rec_cached; /*!< Next free rec lock in pool */
587
588 ulint table_cached; /*!< Next free table lock in pool */
589
590 mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
591 protected by lock_sys.mutex */
592
593 trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
594 insertions are protected by trx->mutex
595 and lock_sys.mutex; removals are
596 protected by lock_sys.mutex */
597
598 lock_pool_t table_locks; /*!< All table locks requested by this
599 transaction, including AUTOINC locks */
600
601 bool cancel; /*!< true if the transaction is being
602 rolled back either via deadlock
603 detection or due to lock timeout. The
604 caller has to acquire the trx_t::mutex
605 in order to cancel the locks. In
606 lock_trx_table_locks_remove() we
607 check for this cancel of a transaction's
608 locks and avoid reacquiring the trx
609 mutex to prevent recursive deadlocks.
610 Protected by both the lock sys mutex
611 and the trx_t::mutex. */
612 ulint n_rec_locks; /*!< number of rec locks in this trx */
613};
614
615/** Logical first modification time of a table in a transaction */
616class trx_mod_table_time_t
617{
618 /** First modification of the table */
619 undo_no_t first;
620 /** First modification of a system versioned column */
621 undo_no_t first_versioned;
622
623 /** Magic value signifying that a system versioned column of a
624 table was never modified in a transaction. */
625 static const undo_no_t UNVERSIONED = IB_ID_MAX;
626
627public:
628 /** Constructor
629 @param[in] rows number of modified rows so far */
630 trx_mod_table_time_t(undo_no_t rows)
631 : first(rows), first_versioned(UNVERSIONED) {}
632
633#ifdef UNIV_DEBUG
634 /** Validation
635 @param[in] rows number of modified rows so far
636 @return whether the object is valid */
637 bool valid(undo_no_t rows = UNVERSIONED) const
638 {
639 return first <= first_versioned && first <= rows;
640 }
641#endif /* UNIV_DEBUG */
642 /** @return if versioned columns were modified */
643 bool is_versioned() const { return first_versioned != UNVERSIONED; }
644
645 /** After writing an undo log record, set is_versioned() if needed
646 @param[in] rows number of modified rows so far */
647 void set_versioned(undo_no_t rows)
648 {
649 ut_ad(!is_versioned());
650 first_versioned = rows;
651 ut_ad(valid());
652 }
653
654 /** Invoked after partial rollback
655 @param[in] limit number of surviving modified rows
656 @return whether this should be erased from trx_t::mod_tables */
657 bool rollback(undo_no_t limit)
658 {
659 ut_ad(valid());
660 if (first >= limit) {
661 return true;
662 }
663
664 if (first_versioned < limit && is_versioned()) {
665 first_versioned = UNVERSIONED;
666 }
667
668 return false;
669 }
670};
671
672/** Collection of persistent tables and their first modification
673in a transaction.
674We store pointers to the table objects in memory because
675we know that a table object will not be destroyed while a transaction
676that modified it is running. */
677typedef std::map<
678 dict_table_t*, trx_mod_table_time_t,
679 std::less<dict_table_t*>,
680 ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
681 trx_mod_tables_t;
682
683/** The transaction handle
684
685Normally, there is a 1:1 relationship between a transaction handle
686(trx) and a session (client connection). One session is associated
687with exactly one user transaction. There are some exceptions to this:
688
689* For DDL operations, a subtransaction is allocated that modifies the
690data dictionary tables. Lock waits and deadlocks are prevented by
691acquiring the dict_operation_lock before starting the subtransaction
692and releasing it after committing the subtransaction.
693
694* The purge system uses a special transaction that is not associated
695with any session.
696
697* If the system crashed or it was quickly shut down while there were
698transactions in the ACTIVE or PREPARED state, these transactions would
699no longer be associated with a session when the server is restarted.
700
701A session may be served by at most one thread at a time. The serving
702thread of a session might change in some MySQL implementations.
703Therefore we do not have os_thread_get_curr_id() assertions in the code.
704
705Normally, only the thread that is currently associated with a running
706transaction may access (read and modify) the trx object, and it may do
707so without holding any mutex. The following are exceptions to this:
708
709* trx_rollback_resurrected() may access resurrected (connectionless)
710transactions while the system is already processing new user
711transactions. The trx_sys.mutex prevents a race condition between it
712and lock_trx_release_locks() [invoked by trx_commit()].
713
714* trx_print_low() may access transactions not associated with the current
715thread. The caller must be holding lock_sys.mutex.
716
717* When a transaction handle is in the trx_sys.trx_list, some of its fields
718must not be modified without holding trx->mutex.
719
720* The locking code (in particular, lock_deadlock_recursive() and
721lock_rec_convert_impl_to_expl()) will access transactions associated
722to other connections. The locks of transactions are protected by
723lock_sys.mutex and sometimes by trx->mutex. */
724
725/** Represents an instance of rollback segment along with its state variables.*/
726struct trx_undo_ptr_t {
727 trx_rseg_t* rseg; /*!< rollback segment assigned to the
728 transaction, or NULL if not assigned
729 yet */
730 trx_undo_t* undo; /*!< pointer to the undo log, or
731 NULL if nothing logged yet */
732 trx_undo_t* old_insert; /*!< pointer to recovered
733 insert undo log, or NULL if no
734 INSERT transactions were
735 recovered from old-format undo logs */
736};
737
738/** An instance of temporary rollback segment. */
739struct trx_temp_undo_t {
740 /** temporary rollback segment, or NULL if not assigned yet */
741 trx_rseg_t* rseg;
742 /** pointer to the undo log, or NULL if nothing logged yet */
743 trx_undo_t* undo;
744};
745
746/** Rollback segments assigned to a transaction for undo logging. */
747struct trx_rsegs_t {
748 /** undo log ptr holding reference to a rollback segment that resides in
749 system/undo tablespace used for undo logging of tables that needs
750 to be recovered on crash. */
751 trx_undo_ptr_t m_redo;
752
753 /** undo log for temporary tables; discarded immediately after
754 transaction commit/rollback */
755 trx_temp_undo_t m_noredo;
756};
757
758struct trx_t {
759private:
760 /**
761 Count of references.
762
763 We can't release the locks nor commit the transaction until this reference
764 is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
765 that it is no longer "active".
766 */
767
768 int32_t n_ref;
769
770
771public:
772 TrxMutex mutex; /*!< Mutex protecting the fields
773 state and lock (except some fields
774 of lock, which are protected by
775 lock_sys.mutex) */
776
777 trx_id_t id; /*!< transaction id */
778
779 trx_id_t no; /*!< transaction serialization number:
780 max trx id shortly before the
781 transaction is moved to
782 COMMITTED_IN_MEMORY state.
783 Protected by trx_sys_t::mutex
784 when trx is in rw_trx_hash. Initially
785 set to TRX_ID_MAX. */
786
787 /** State of the trx from the point of view of concurrency control
788 and the valid state transitions.
789
790 Possible states:
791
792 TRX_STATE_NOT_STARTED
793 TRX_STATE_ACTIVE
794 TRX_STATE_PREPARED
795 TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
796
797 Valid state transitions are:
798
799 Regular transactions:
800 * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
801
802 Auto-commit non-locking read-only:
803 * NOT_STARTED -> ACTIVE -> NOT_STARTED
804
805 XA (2PC):
806 * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
807
808 Recovered XA:
809 * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
810
811 Recovered XA followed by XA ROLLBACK:
812 * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
813
814 XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
815 * NOT_STARTED -> PREPARED -> (freed)
816
817 Disconnected XA can become recovered:
818 * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
819 Disconnected means from mysql e.g due to the mysql client disconnection.
820 Latching and various transaction lists membership rules:
821
822 XA (2PC) transactions are always treated as non-autocommit.
823
824 Transitions to ACTIVE or NOT_STARTED occur when transaction
825 is not in rw_trx_hash (no trx_sys.mutex needed).
826
827 Autocommit non-locking read-only transactions move between states
828 without holding any mutex. They are not in rw_trx_hash.
829
830 All transactions, unless they are determined to be ac-nl-ro,
831 explicitly tagged as read-only or read-write, will first be put
832 on the read-only transaction list. Only when a !read-only transaction
833 in the read-only list tries to acquire an X or IX lock on a table
834 do we remove it from the read-only list and put it on the read-write
835 list. During this switch we assign it a rollback segment.
836
837 When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
838 in rw_trx_hash.
839
840 ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
841 The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
842
843 ACTIVE->COMMITTED is possible when the transaction is in
844 rw_trx_hash.
845
846 Transitions to COMMITTED are protected by both lock_sys.mutex
847 and trx->mutex.
848
849 NOTE: Some of these state change constraints are an overkill,
850 currently only required for a consistent view for printing stats.
851 This unnecessarily adds a huge cost for the general case. */
852
853 trx_state_t state;
854
855 ReadView read_view; /*!< consistent read view used in the
856 transaction, or NULL if not yet set */
857 trx_lock_t lock; /*!< Information about the transaction
858 locks and state. Protected by
859 trx->mutex or lock_sys.mutex
860 or both */
861 bool is_recovered; /*!< 0=normal transaction,
862 1=recovered, must be rolled back,
863 protected by trx_sys.mutex when
864 trx is in rw_trx_hash */
865
866
867 /* These fields are not protected by any mutex. */
868 const char* op_info; /*!< English text describing the
869 current operation, or an empty
870 string */
871 ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
872 bool check_foreigns; /*!< normally TRUE, but if the user
873 wants to suppress foreign key checks,
874 (in table imports, for example) we
875 set this FALSE */
876 /*------------------------------*/
877 /* MySQL has a transaction coordinator to coordinate two phase
878 commit between multiple storage engines and the binary log. When
879 an engine participates in a transaction, it's responsible for
880 registering itself using the trans_register_ha() API. */
881 bool is_registered; /* This flag is set to true after the
882 transaction has been registered with
883 the coordinator using the XA API, and
884 is set to false after commit or
885 rollback. */
886 unsigned active_commit_ordered:1;/* 1 if owns prepare mutex */
887 /*------------------------------*/
888 bool check_unique_secondary;
889 /*!< normally TRUE, but if the user
890 wants to speed up inserts by
891 suppressing unique key checks
892 for secondary indexes when we decide
893 if we can use the insert buffer for
894 them, we set this FALSE */
895 bool flush_log_later;/* In 2PC, we hold the
896 prepare_commit mutex across
897 both phases. In that case, we
898 defer flush of the logs to disk
899 until after we release the
900 mutex. */
901 bool must_flush_log_later;/*!< this flag is set to TRUE in
902 trx_commit() if flush_log_later was
903 TRUE, and there were modifications by
904 the transaction; in that case we must
905 flush the log in
906 trx_commit_complete_for_mysql() */
907 ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
908 trx_dict_op_t dict_operation; /**< @see enum trx_dict_op_t */
909
910 /* Fields protected by the srv_conc_mutex. */
911 bool declared_to_be_inside_innodb;
912 /*!< this is TRUE if we have declared
913 this transaction in
914 srv_conc_enter_innodb to be inside the
915 InnoDB engine */
916 ib_uint32_t n_tickets_to_enter_innodb;
917 /*!< this can be > 0 only when
918 declared_to_... is TRUE; when we come
919 to srv_conc_innodb_enter, if the value
920 here is > 0, we decrement this by 1 */
921 ib_uint32_t dict_operation_lock_mode;
922 /*!< 0, RW_S_LATCH, or RW_X_LATCH:
923 the latch mode trx currently holds
924 on dict_operation_lock. Protected
925 by dict_operation_lock. */
926
927 time_t start_time; /*!< time the state last time became
928 TRX_STATE_ACTIVE */
929 ib_uint64_t start_time_micro; /*!< start time of transaction in
930 microseconds */
931 lsn_t commit_lsn; /*!< lsn at the time of the commit */
932 table_id_t table_id; /*!< Table to drop iff dict_operation
933 == TRX_DICT_OP_TABLE, or 0. */
934 /*------------------------------*/
935 THD* mysql_thd; /*!< MySQL thread handle corresponding
936 to this trx, or NULL */
937
938 const char* mysql_log_file_name;
939 /*!< if MySQL binlog is used, this field
940 contains a pointer to the latest file
941 name; this is NULL if binlog is not
942 used */
943 ulonglong mysql_log_offset;
944 /*!< if MySQL binlog is used, this
945 field contains the end offset of the
946 binlog entry */
947 /*------------------------------*/
948 ib_uint32_t n_mysql_tables_in_use; /*!< number of Innobase tables
949 used in the processing of the current
950 SQL statement in MySQL */
951 ib_uint32_t mysql_n_tables_locked;
952 /*!< how many tables the current SQL
953 statement uses, except those
954 in consistent read */
955 /*------------------------------*/
956 UT_LIST_NODE_T(trx_t) trx_list; /*!< list of all transactions;
957 protected by trx_sys.mutex */
958 /*------------------------------*/
959 dberr_t error_state; /*!< 0 if no error, otherwise error
960 number; NOTE That ONLY the thread
961 doing the transaction is allowed to
962 set this field: this is NOT protected
963 by any mutex */
964 const dict_index_t*error_info; /*!< if the error number indicates a
965 duplicate key error, a pointer to
966 the problematic index is stored here */
967 ulint error_key_num; /*!< if the index creation fails to a
968 duplicate key error, a mysql key
969 number of that index is stored here */
970 que_t* graph; /*!< query currently run in the session,
971 or NULL if none; NOTE that the query
972 belongs to the session, and it can
973 survive over a transaction commit, if
974 it is a stored procedure with a COMMIT
975 WORK statement, for instance */
976 /*------------------------------*/
977 UT_LIST_BASE_NODE_T(trx_named_savept_t)
978 trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
979 oldest first */
980 /*------------------------------*/
981 undo_no_t undo_no; /*!< next undo log record number to
982 assign; since the undo log is
983 private for a transaction, this
984 is a simple ascending sequence
985 with no gaps; thus it represents
986 the number of modified/inserted
987 rows in a transaction */
988 trx_savept_t last_sql_stat_start;
989 /*!< undo_no when the last sql statement
990 was started: in case of an error, trx
991 is rolled back down to this number */
992 trx_rsegs_t rsegs; /* rollback segments for undo logging */
993 undo_no_t roll_limit; /*!< least undo number to undo during
994 a partial rollback; 0 otherwise */
995 bool in_rollback; /*!< true when the transaction is
996 executing a partial or full rollback */
997 ulint pages_undone; /*!< number of undo log pages undone
998 since the last undo log truncation */
999 /*------------------------------*/
1000 ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
1001 an SQL statement. This is useful for
1002 multi-row INSERTs */
1003 ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
1004 transaction. Note that these are
1005 also in the lock list trx_locks. This
1006 vector needs to be freed explicitly
1007 when the trx instance is destroyed.
1008 Protected by lock_sys.mutex. */
1009 /*------------------------------*/
1010 bool read_only; /*!< true if transaction is flagged
1011 as a READ-ONLY transaction.
1012 if auto_commit && will_lock == 0
1013 then it will be handled as a
1014 AC-NL-RO-SELECT (Auto Commit Non-Locking
1015 Read Only Select). A read only
1016 transaction will not be assigned an
1017 UNDO log. */
1018 bool auto_commit; /*!< true if it is an autocommit */
1019 ib_uint32_t will_lock; /*!< Will acquire some locks. Increment
1020 each time we determine that a lock will
1021 be acquired by the MySQL layer. */
1022 /*------------------------------*/
1023 fts_trx_t* fts_trx; /*!< FTS information, or NULL if
1024 transaction hasn't modified tables
1025 with FTS indexes (yet). */
1026 doc_id_t fts_next_doc_id;/* The document id used for updates */
1027 /*------------------------------*/
1028 ib_uint32_t flush_tables; /*!< if "covering" the FLUSH TABLES",
1029 count of tables being flushed. */
1030
1031 /*------------------------------*/
1032 bool ddl; /*!< true if it is an internal
1033 transaction for DDL */
1034 bool internal; /*!< true if it is a system/internal
1035 transaction background task. This
1036 includes DDL transactions too. Such
1037 transactions are always treated as
1038 read-write. */
1039 /*------------------------------*/
1040#ifdef UNIV_DEBUG
1041 unsigned start_line; /*!< Track where it was started from */
1042 const char* start_file; /*!< Filename where it was started */
1043#endif /* UNIV_DEBUG */
1044
1045 XID* xid; /*!< X/Open XA transaction
1046 identification to identify a
1047 transaction branch */
1048 trx_mod_tables_t mod_tables; /*!< List of tables that were modified
1049 by this transaction */
1050 /*------------------------------*/
1051 char* detailed_error; /*!< detailed error message for last
1052 error, or empty. */
1053 FlushObserver* flush_observer; /*!< flush observer */
1054
1055 /* Lock wait statistics */
1056 ulint n_rec_lock_waits;
1057 /*!< Number of record lock waits,
1058 might not be exactly correct. */
1059 ulint n_table_lock_waits;
1060 /*!< Number of table lock waits,
1061 might not be exactly correct. */
1062 ulint total_rec_lock_wait_time;
1063 /*!< Total rec lock wait time up
1064 to this moment. */
1065 ulint total_table_lock_wait_time;
1066 /*!< Total table lock wait time
1067 up to this moment. */
1068
1069#ifdef WITH_WSREP
1070 os_event_t wsrep_event; /* event waited for in srv_conc_slot */
1071#endif /* WITH_WSREP */
1072
1073 rw_trx_hash_element_t *rw_trx_hash_element;
1074 LF_PINS *rw_trx_hash_pins;
1075 ulint magic_n;
1076
1077 /** @return whether any persistent undo log has been generated */
1078 bool has_logged_persistent() const
1079 {
1080 return(rsegs.m_redo.undo);
1081 }
1082
1083 /** @return whether any undo log has been generated */
1084 bool has_logged() const
1085 {
1086 return(has_logged_persistent() || rsegs.m_noredo.undo);
1087 }
1088
1089 /** @return whether any undo log has been generated or
1090 recovered */
1091 bool has_logged_or_recovered() const
1092 {
1093 return(has_logged() || rsegs.m_redo.old_insert);
1094 }
1095
1096 /** @return rollback segment for modifying temporary tables */
1097 trx_rseg_t* get_temp_rseg()
1098 {
1099 if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
1100 ut_ad(id != 0);
1101 return(rseg);
1102 }
1103
1104 return(assign_temp_rseg());
1105 }
1106
1107
1108 bool is_referenced()
1109 {
1110 return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
1111 }
1112
1113
1114 void reference()
1115 {
1116#ifdef UNIV_DEBUG
1117 int32_t old_n_ref=
1118#endif
1119 my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
1120 ut_ad(old_n_ref >= 0);
1121 }
1122
1123
1124 void release_reference()
1125 {
1126#ifdef UNIV_DEBUG
1127 int32_t old_n_ref=
1128#endif
1129 my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
1130 ut_ad(old_n_ref > 0);
1131 }
1132
1133
1134private:
1135 /** Assign a rollback segment for modifying temporary tables.
1136 @return the assigned rollback segment */
1137 trx_rseg_t* assign_temp_rseg();
1138};
1139
1140/**
1141Check if transaction is started.
1142@param[in] trx Transaction whose state we need to check
1143@reutrn true if transaction is in state started */
1144inline bool trx_is_started(const trx_t* trx)
1145{
1146 return trx->state != TRX_STATE_NOT_STARTED;
1147}
1148
1149/* Transaction isolation levels (trx->isolation_level) */
1150#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
1151 SELECTs are performed so that
1152 we do not look at a possible
1153 earlier version of a record;
1154 thus they are not 'consistent'
1155 reads under this isolation
1156 level; otherwise like level
1157 2 */
1158
1159#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
1160 isolation, except that in
1161 range UPDATE and DELETE we
1162 must block phantom rows
1163 with next-key locks;
1164 SELECT ... FOR UPDATE and ...
1165 LOCK IN SHARE MODE only lock
1166 the index records, NOT the
1167 gaps before them, and thus
1168 allow free inserting;
1169 each consistent read reads its
1170 own snapshot */
1171
1172#define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
1173 all consistent reads in the
1174 same trx read the same
1175 snapshot;
1176 full next-key locking used
1177 in locking reads to block
1178 insertions into gaps */
1179
1180#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
1181 converted to LOCK IN SHARE
1182 MODE reads */
1183
1184/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
1185Multiple flags can be combined with bitwise OR. */
1186#define TRX_DUP_IGNORE 1U /* duplicate rows are to be updated */
1187#define TRX_DUP_REPLACE 2U /* duplicate rows are to be replaced */
1188
1189
1190/** Commit node states */
1191enum commit_node_state {
1192 COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
1193 the transaction */
1194 COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
1195 waiting for completion */
1196};
1197
1198/** Commit command node in a query graph */
1199struct commit_node_t{
1200 que_common_t common; /*!< node type: QUE_NODE_COMMIT */
1201 enum commit_node_state
1202 state; /*!< node execution state */
1203};
1204
1205
1206/** Test if trx->mutex is owned. */
1207#define trx_mutex_own(t) mutex_own(&t->mutex)
1208
1209/** Acquire the trx->mutex. */
1210#define trx_mutex_enter(t) do { \
1211 mutex_enter(&t->mutex); \
1212} while (0)
1213
1214/** Release the trx->mutex. */
1215#define trx_mutex_exit(t) do { \
1216 mutex_exit(&t->mutex); \
1217} while (0)
1218
1219#include "trx0trx.ic"
1220
1221#endif
1222