| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2015, 2018, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file trx/trx0trx.cc |
| 22 | The transaction |
| 23 | |
| 24 | Created 3/26/1996 Heikki Tuuri |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #include "ha_prototypes.h" |
| 28 | |
| 29 | #include "trx0trx.h" |
| 30 | |
| 31 | #ifdef WITH_WSREP |
| 32 | #include <mysql/service_wsrep.h> |
| 33 | #endif |
| 34 | |
| 35 | #include <mysql/service_thd_error_context.h> |
| 36 | |
| 37 | #include "btr0sea.h" |
| 38 | #include "lock0lock.h" |
| 39 | #include "log0log.h" |
| 40 | #include "os0proc.h" |
| 41 | #include "que0que.h" |
| 42 | #include "srv0mon.h" |
| 43 | #include "srv0srv.h" |
| 44 | #include "fsp0sysspace.h" |
| 45 | #include "srv0start.h" |
| 46 | #include "trx0purge.h" |
| 47 | #include "trx0rec.h" |
| 48 | #include "trx0roll.h" |
| 49 | #include "trx0rseg.h" |
| 50 | #include "trx0undo.h" |
| 51 | #include "trx0xa.h" |
| 52 | #include "ut0new.h" |
| 53 | #include "ut0pool.h" |
| 54 | #include "ut0vec.h" |
| 55 | |
| 56 | #include <set> |
| 57 | #include <new> |
| 58 | |
| 59 | /** The bit pattern corresponding to TRX_ID_MAX */ |
| 60 | const byte trx_id_max_bytes[8] = { |
| 61 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff |
| 62 | }; |
| 63 | |
| 64 | /** The bit pattern corresponding to max timestamp */ |
| 65 | const byte timestamp_max_bytes[7] = { |
| 66 | 0x7f, 0xff, 0xff, 0xff, 0x0f, 0x42, 0x3f |
| 67 | }; |
| 68 | |
| 69 | |
| 70 | static const ulint MAX_DETAILED_ERROR_LEN = 256; |
| 71 | |
| 72 | /** Set of table_id */ |
| 73 | typedef std::set< |
| 74 | table_id_t, |
| 75 | std::less<table_id_t>, |
| 76 | ut_allocator<table_id_t> > table_id_set; |
| 77 | |
| 78 | /** Set flush observer for the transaction |
| 79 | @param[in/out] trx transaction struct |
| 80 | @param[in] observer flush observer */ |
| 81 | void |
| 82 | trx_set_flush_observer( |
| 83 | trx_t* trx, |
| 84 | FlushObserver* observer) |
| 85 | { |
| 86 | trx->flush_observer = observer; |
| 87 | } |
| 88 | |
| 89 | /*************************************************************//** |
| 90 | Set detailed error message for the transaction. */ |
| 91 | void |
| 92 | trx_set_detailed_error( |
| 93 | /*===================*/ |
| 94 | trx_t* trx, /*!< in: transaction struct */ |
| 95 | const char* msg) /*!< in: detailed error message */ |
| 96 | { |
| 97 | ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN); |
| 98 | } |
| 99 | |
| 100 | /*************************************************************//** |
| 101 | Set detailed error message for the transaction from a file. Note that the |
| 102 | file is rewinded before reading from it. */ |
| 103 | void |
| 104 | trx_set_detailed_error_from_file( |
| 105 | /*=============================*/ |
| 106 | trx_t* trx, /*!< in: transaction struct */ |
| 107 | FILE* file) /*!< in: file to read message from */ |
| 108 | { |
| 109 | os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN); |
| 110 | } |
| 111 | |
| 112 | /********************************************************************//** |
| 113 | Initialize transaction object. |
| 114 | @param trx trx to initialize */ |
| 115 | static |
| 116 | void |
| 117 | trx_init( |
| 118 | /*=====*/ |
| 119 | trx_t* trx) |
| 120 | { |
| 121 | trx->id = 0; |
| 122 | |
| 123 | trx->no = TRX_ID_MAX; |
| 124 | |
| 125 | trx->state = TRX_STATE_NOT_STARTED; |
| 126 | |
| 127 | trx->is_recovered = false; |
| 128 | |
| 129 | trx->op_info = "" ; |
| 130 | |
| 131 | trx->active_commit_ordered = 0; |
| 132 | |
| 133 | trx->isolation_level = TRX_ISO_REPEATABLE_READ; |
| 134 | |
| 135 | trx->check_foreigns = true; |
| 136 | |
| 137 | trx->check_unique_secondary = true; |
| 138 | |
| 139 | trx->lock.n_rec_locks = 0; |
| 140 | |
| 141 | trx->dict_operation = TRX_DICT_OP_NONE; |
| 142 | |
| 143 | trx->table_id = 0; |
| 144 | |
| 145 | trx->error_state = DB_SUCCESS; |
| 146 | |
| 147 | trx->error_key_num = ULINT_UNDEFINED; |
| 148 | |
| 149 | trx->undo_no = 0; |
| 150 | |
| 151 | trx->rsegs.m_redo.rseg = NULL; |
| 152 | |
| 153 | trx->rsegs.m_noredo.rseg = NULL; |
| 154 | |
| 155 | trx->read_only = false; |
| 156 | |
| 157 | trx->auto_commit = false; |
| 158 | |
| 159 | trx->will_lock = 0; |
| 160 | |
| 161 | trx->ddl = false; |
| 162 | |
| 163 | trx->internal = false; |
| 164 | |
| 165 | ut_d(trx->start_file = 0); |
| 166 | |
| 167 | ut_d(trx->start_line = 0); |
| 168 | |
| 169 | trx->magic_n = TRX_MAGIC_N; |
| 170 | |
| 171 | trx->lock.que_state = TRX_QUE_RUNNING; |
| 172 | |
| 173 | trx->last_sql_stat_start.least_undo_no = 0; |
| 174 | |
| 175 | ut_ad(!trx->read_view.is_open()); |
| 176 | |
| 177 | trx->lock.rec_cached = 0; |
| 178 | |
| 179 | trx->lock.table_cached = 0; |
| 180 | |
| 181 | trx->flush_observer = NULL; |
| 182 | } |
| 183 | |
| 184 | /** For managing the life-cycle of the trx_t instance that we get |
| 185 | from the pool. */ |
| 186 | struct TrxFactory { |
| 187 | |
| 188 | /** Initializes a transaction object. It must be explicitly started |
| 189 | with trx_start_if_not_started() before using it. The default isolation |
| 190 | level is TRX_ISO_REPEATABLE_READ. |
| 191 | @param trx Transaction instance to initialise */ |
| 192 | static void init(trx_t* trx) |
| 193 | { |
| 194 | /* Explicitly call the constructor of the already |
| 195 | allocated object. trx_t objects are allocated by |
| 196 | ut_zalloc_nokey() in Pool::Pool() which would not call |
| 197 | the constructors of the trx_t members. */ |
| 198 | new(&trx->mod_tables) trx_mod_tables_t(); |
| 199 | |
| 200 | new(&trx->lock.rec_pool) lock_pool_t(); |
| 201 | |
| 202 | new(&trx->lock.table_pool) lock_pool_t(); |
| 203 | |
| 204 | new(&trx->lock.table_locks) lock_pool_t(); |
| 205 | |
| 206 | new(&trx->read_view) ReadView(); |
| 207 | |
| 208 | trx->rw_trx_hash_pins = 0; |
| 209 | trx_init(trx); |
| 210 | |
| 211 | trx->dict_operation_lock_mode = 0; |
| 212 | |
| 213 | trx->xid = UT_NEW_NOKEY(xid_t()); |
| 214 | |
| 215 | trx->detailed_error = reinterpret_cast<char*>( |
| 216 | ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN)); |
| 217 | |
| 218 | trx->lock.lock_heap = mem_heap_create_typed( |
| 219 | 1024, MEM_HEAP_FOR_LOCK_HEAP); |
| 220 | |
| 221 | lock_trx_lock_list_init(&trx->lock.trx_locks); |
| 222 | |
| 223 | UT_LIST_INIT( |
| 224 | trx->trx_savepoints, |
| 225 | &trx_named_savept_t::trx_savepoints); |
| 226 | |
| 227 | mutex_create(LATCH_ID_TRX, &trx->mutex); |
| 228 | |
| 229 | lock_trx_alloc_locks(trx); |
| 230 | } |
| 231 | |
| 232 | /** Release resources held by the transaction object. |
| 233 | @param trx the transaction for which to release resources */ |
| 234 | static void destroy(trx_t* trx) |
| 235 | { |
| 236 | ut_a(trx->magic_n == TRX_MAGIC_N); |
| 237 | ut_ad(!trx->mysql_thd); |
| 238 | |
| 239 | ut_a(trx->lock.wait_lock == NULL); |
| 240 | ut_a(trx->lock.wait_thr == NULL); |
| 241 | ut_a(trx->dict_operation_lock_mode == 0); |
| 242 | |
| 243 | if (trx->lock.lock_heap != NULL) { |
| 244 | mem_heap_free(trx->lock.lock_heap); |
| 245 | trx->lock.lock_heap = NULL; |
| 246 | } |
| 247 | |
| 248 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
| 249 | |
| 250 | UT_DELETE(trx->xid); |
| 251 | ut_free(trx->detailed_error); |
| 252 | |
| 253 | mutex_free(&trx->mutex); |
| 254 | |
| 255 | trx->mod_tables.~trx_mod_tables_t(); |
| 256 | |
| 257 | ut_ad(!trx->read_view.is_open()); |
| 258 | |
| 259 | if (!trx->lock.rec_pool.empty()) { |
| 260 | |
| 261 | /* See lock_trx_alloc_locks() why we only free |
| 262 | the first element. */ |
| 263 | |
| 264 | ut_free(trx->lock.rec_pool[0]); |
| 265 | } |
| 266 | |
| 267 | if (!trx->lock.table_pool.empty()) { |
| 268 | |
| 269 | /* See lock_trx_alloc_locks() why we only free |
| 270 | the first element. */ |
| 271 | |
| 272 | ut_free(trx->lock.table_pool[0]); |
| 273 | } |
| 274 | |
| 275 | trx->lock.rec_pool.~lock_pool_t(); |
| 276 | |
| 277 | trx->lock.table_pool.~lock_pool_t(); |
| 278 | |
| 279 | trx->lock.table_locks.~lock_pool_t(); |
| 280 | |
| 281 | trx->read_view.~ReadView(); |
| 282 | } |
| 283 | |
| 284 | /** Enforce any invariants here, this is called before the transaction |
| 285 | is added to the pool. |
| 286 | @return true if all OK */ |
| 287 | static bool debug(const trx_t* trx) |
| 288 | { |
| 289 | ut_a(trx->error_state == DB_SUCCESS); |
| 290 | |
| 291 | ut_a(trx->magic_n == TRX_MAGIC_N); |
| 292 | |
| 293 | ut_ad(!trx->read_only); |
| 294 | |
| 295 | ut_ad(trx->state == TRX_STATE_NOT_STARTED); |
| 296 | |
| 297 | ut_ad(trx->dict_operation == TRX_DICT_OP_NONE); |
| 298 | |
| 299 | ut_ad(trx->mysql_thd == 0); |
| 300 | |
| 301 | ut_a(trx->lock.wait_thr == NULL); |
| 302 | ut_a(trx->lock.wait_lock == NULL); |
| 303 | ut_a(trx->dict_operation_lock_mode == 0); |
| 304 | |
| 305 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
| 306 | |
| 307 | ut_ad(trx->autoinc_locks == NULL); |
| 308 | |
| 309 | ut_ad(trx->lock.table_locks.empty()); |
| 310 | |
| 311 | return(true); |
| 312 | } |
| 313 | }; |
| 314 | |
| 315 | /** The lock strategy for TrxPool */ |
| 316 | struct TrxPoolLock { |
| 317 | TrxPoolLock() { } |
| 318 | |
| 319 | /** Create the mutex */ |
| 320 | void create() |
| 321 | { |
| 322 | mutex_create(LATCH_ID_TRX_POOL, &m_mutex); |
| 323 | } |
| 324 | |
| 325 | /** Acquire the mutex */ |
| 326 | void enter() { mutex_enter(&m_mutex); } |
| 327 | |
| 328 | /** Release the mutex */ |
| 329 | void exit() { mutex_exit(&m_mutex); } |
| 330 | |
| 331 | /** Free the mutex */ |
| 332 | void destroy() { mutex_free(&m_mutex); } |
| 333 | |
| 334 | /** Mutex to use */ |
| 335 | ib_mutex_t m_mutex; |
| 336 | }; |
| 337 | |
| 338 | /** The lock strategy for the TrxPoolManager */ |
| 339 | struct TrxPoolManagerLock { |
| 340 | TrxPoolManagerLock() { } |
| 341 | |
| 342 | /** Create the mutex */ |
| 343 | void create() |
| 344 | { |
| 345 | mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex); |
| 346 | } |
| 347 | |
| 348 | /** Acquire the mutex */ |
| 349 | void enter() { mutex_enter(&m_mutex); } |
| 350 | |
| 351 | /** Release the mutex */ |
| 352 | void exit() { mutex_exit(&m_mutex); } |
| 353 | |
| 354 | /** Free the mutex */ |
| 355 | void destroy() { mutex_free(&m_mutex); } |
| 356 | |
| 357 | /** Mutex to use */ |
| 358 | ib_mutex_t m_mutex; |
| 359 | }; |
| 360 | |
| 361 | /** Use explicit mutexes for the trx_t pool and its manager. */ |
| 362 | typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t; |
| 363 | typedef PoolManager<trx_pool_t, TrxPoolManagerLock > trx_pools_t; |
| 364 | |
| 365 | /** The trx_t pool manager */ |
| 366 | static trx_pools_t* trx_pools; |
| 367 | |
| 368 | /** Size of on trx_t pool in bytes. */ |
| 369 | static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4; |
| 370 | |
| 371 | /** Create the trx_t pool */ |
| 372 | void |
| 373 | trx_pool_init() |
| 374 | { |
| 375 | trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE)); |
| 376 | |
| 377 | ut_a(trx_pools != 0); |
| 378 | } |
| 379 | |
| 380 | /** Destroy the trx_t pool */ |
| 381 | void |
| 382 | trx_pool_close() |
| 383 | { |
| 384 | UT_DELETE(trx_pools); |
| 385 | |
| 386 | trx_pools = 0; |
| 387 | } |
| 388 | |
| 389 | /** @return a trx_t instance from trx_pools. */ |
| 390 | trx_t *trx_create() |
| 391 | { |
| 392 | trx_t* trx = trx_pools->get(); |
| 393 | |
| 394 | assert_trx_is_free(trx); |
| 395 | |
| 396 | mem_heap_t* heap; |
| 397 | ib_alloc_t* alloc; |
| 398 | |
| 399 | /* We just got trx from pool, it should be non locking */ |
| 400 | ut_ad(trx->will_lock == 0); |
| 401 | ut_ad(trx->state == TRX_STATE_NOT_STARTED); |
| 402 | ut_ad(!trx->rw_trx_hash_pins); |
| 403 | |
| 404 | DBUG_LOG("trx" , "Create: " << trx); |
| 405 | |
| 406 | heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8); |
| 407 | |
| 408 | alloc = ib_heap_allocator_create(heap); |
| 409 | |
| 410 | /* Remember to free the vector explicitly in trx_free(). */ |
| 411 | trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4); |
| 412 | |
| 413 | /* Should have been either just initialized or .clear()ed by |
| 414 | trx_free(). */ |
| 415 | ut_a(trx->mod_tables.size() == 0); |
| 416 | |
| 417 | #ifdef WITH_WSREP |
| 418 | trx->wsrep_event = NULL; |
| 419 | #endif /* WITH_WSREP */ |
| 420 | |
| 421 | trx_sys.register_trx(trx); |
| 422 | |
| 423 | return(trx); |
| 424 | } |
| 425 | |
| 426 | /** |
| 427 | Release a trx_t instance back to the pool. |
| 428 | @param trx the instance to release. |
| 429 | */ |
| 430 | void trx_free(trx_t*& trx) |
| 431 | { |
| 432 | ut_ad(!trx->declared_to_be_inside_innodb); |
| 433 | ut_ad(!trx->n_mysql_tables_in_use); |
| 434 | ut_ad(!trx->mysql_n_tables_locked); |
| 435 | ut_ad(!trx->internal); |
| 436 | |
| 437 | if (trx->declared_to_be_inside_innodb) { |
| 438 | |
| 439 | ib::error() << "Freeing a trx (" << trx << ", " |
| 440 | << trx_get_id_for_print(trx) << ") which is declared" |
| 441 | " to be processing inside InnoDB" ; |
| 442 | |
| 443 | trx_print(stderr, trx, 600); |
| 444 | putc('\n', stderr); |
| 445 | |
| 446 | /* This is an error but not a fatal error. We must keep |
| 447 | the counters like srv_conc.n_active accurate. */ |
| 448 | srv_conc_force_exit_innodb(trx); |
| 449 | } |
| 450 | |
| 451 | if (trx->n_mysql_tables_in_use != 0 |
| 452 | || trx->mysql_n_tables_locked != 0) { |
| 453 | |
| 454 | ib::error() << "MySQL is freeing a thd though" |
| 455 | " trx->n_mysql_tables_in_use is " |
| 456 | << trx->n_mysql_tables_in_use |
| 457 | << " and trx->mysql_n_tables_locked is " |
| 458 | << trx->mysql_n_tables_locked << "." ; |
| 459 | |
| 460 | trx_print(stderr, trx, 600); |
| 461 | ut_print_buf(stderr, trx, sizeof(trx_t)); |
| 462 | putc('\n', stderr); |
| 463 | } |
| 464 | |
| 465 | trx->dict_operation = TRX_DICT_OP_NONE; |
| 466 | assert_trx_is_inactive(trx); |
| 467 | |
| 468 | trx_sys.deregister_trx(trx); |
| 469 | |
| 470 | assert_trx_is_free(trx); |
| 471 | |
| 472 | trx_sys.rw_trx_hash.put_pins(trx); |
| 473 | trx->mysql_thd = 0; |
| 474 | trx->mysql_log_file_name = 0; |
| 475 | |
| 476 | // FIXME: We need to avoid this heap free/alloc for each commit. |
| 477 | if (trx->autoinc_locks != NULL) { |
| 478 | ut_ad(ib_vector_is_empty(trx->autoinc_locks)); |
| 479 | /* We allocated a dedicated heap for the vector. */ |
| 480 | ib_vector_free(trx->autoinc_locks); |
| 481 | trx->autoinc_locks = NULL; |
| 482 | } |
| 483 | |
| 484 | trx->mod_tables.clear(); |
| 485 | |
| 486 | /* trx locking state should have been reset before returning trx |
| 487 | to pool */ |
| 488 | ut_ad(trx->will_lock == 0); |
| 489 | |
| 490 | trx_pools->mem_free(trx); |
| 491 | /* Unpoison the memory for innodb_monitor_set_option; |
| 492 | it is operating also on the freed transaction objects. */ |
| 493 | MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex); |
| 494 | /* Declare the contents as initialized for Valgrind; |
| 495 | we checked that it was initialized in trx_pools->mem_free(trx). */ |
| 496 | UNIV_MEM_VALID(&trx->mutex, sizeof trx->mutex); |
| 497 | |
| 498 | trx = NULL; |
| 499 | } |
| 500 | |
| 501 | /** At shutdown, frees a transaction object. */ |
| 502 | void |
| 503 | trx_free_at_shutdown(trx_t *trx) |
| 504 | { |
| 505 | ut_ad(trx->is_recovered); |
| 506 | ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) |
| 507 | || (trx_state_eq(trx, TRX_STATE_ACTIVE) |
| 508 | && (!srv_was_started |
| 509 | || srv_operation == SRV_OPERATION_RESTORE |
| 510 | || srv_operation == SRV_OPERATION_RESTORE_EXPORT |
| 511 | || srv_read_only_mode |
| 512 | || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO |
| 513 | || (!srv_is_being_started |
| 514 | && !srv_undo_sources && srv_fast_shutdown)))); |
| 515 | ut_a(trx->magic_n == TRX_MAGIC_N); |
| 516 | |
| 517 | lock_trx_release_locks(trx); |
| 518 | trx_undo_free_at_shutdown(trx); |
| 519 | |
| 520 | ut_a(!trx->read_only); |
| 521 | |
| 522 | DBUG_LOG("trx" , "Free prepared: " << trx); |
| 523 | trx->state = TRX_STATE_NOT_STARTED; |
| 524 | |
| 525 | /* Undo trx_resurrect_table_locks(). */ |
| 526 | lock_trx_lock_list_init(&trx->lock.trx_locks); |
| 527 | |
| 528 | /* Note: This vector is not guaranteed to be empty because the |
| 529 | transaction was never committed and therefore lock_trx_release() |
| 530 | was not called. */ |
| 531 | trx->lock.table_locks.clear(); |
| 532 | |
| 533 | trx_free(trx); |
| 534 | } |
| 535 | |
| 536 | |
| 537 | /** |
| 538 | Disconnect a prepared transaction from MySQL |
| 539 | @param[in,out] trx transaction |
| 540 | */ |
| 541 | void trx_disconnect_prepared(trx_t *trx) |
| 542 | { |
| 543 | ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)); |
| 544 | ut_ad(trx->mysql_thd); |
| 545 | trx->read_view.close(); |
| 546 | trx->is_recovered= true; |
| 547 | trx->mysql_thd= NULL; |
| 548 | /* todo/fixme: suggest to do it at innodb prepare */ |
| 549 | trx->will_lock= 0; |
| 550 | } |
| 551 | |
| 552 | /****************************************************************//** |
| 553 | Resurrect the table locks for a resurrected transaction. */ |
| 554 | static |
| 555 | void |
| 556 | trx_resurrect_table_locks( |
| 557 | /*======================*/ |
| 558 | trx_t* trx, /*!< in/out: transaction */ |
| 559 | const trx_undo_t* undo) /*!< in: undo log */ |
| 560 | { |
| 561 | mtr_t mtr; |
| 562 | page_t* undo_page; |
| 563 | trx_undo_rec_t* undo_rec; |
| 564 | table_id_set tables; |
| 565 | |
| 566 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || |
| 567 | trx_state_eq(trx, TRX_STATE_PREPARED)); |
| 568 | ut_ad(undo->rseg == trx->rsegs.m_redo.rseg); |
| 569 | |
| 570 | if (undo->empty()) { |
| 571 | return; |
| 572 | } |
| 573 | |
| 574 | mtr_start(&mtr); |
| 575 | |
| 576 | /* trx_rseg_mem_create() may have acquired an X-latch on this |
| 577 | page, so we cannot acquire an S-latch. */ |
| 578 | undo_page = trx_undo_page_get( |
| 579 | page_id_t(trx->rsegs.m_redo.rseg->space->id, |
| 580 | undo->top_page_no), &mtr); |
| 581 | |
| 582 | undo_rec = undo_page + undo->top_offset; |
| 583 | |
| 584 | do { |
| 585 | ulint type; |
| 586 | undo_no_t undo_no; |
| 587 | table_id_t table_id; |
| 588 | ulint cmpl_info; |
| 589 | bool updated_extern; |
| 590 | |
| 591 | page_t* undo_rec_page = page_align(undo_rec); |
| 592 | |
| 593 | if (undo_rec_page != undo_page) { |
| 594 | mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX); |
| 595 | undo_page = undo_rec_page; |
| 596 | } |
| 597 | |
| 598 | trx_undo_rec_get_pars( |
| 599 | undo_rec, &type, &cmpl_info, |
| 600 | &updated_extern, &undo_no, &table_id); |
| 601 | tables.insert(table_id); |
| 602 | |
| 603 | undo_rec = trx_undo_get_prev_rec( |
| 604 | undo_rec, undo->hdr_page_no, |
| 605 | undo->hdr_offset, false, &mtr); |
| 606 | } while (undo_rec); |
| 607 | |
| 608 | mtr_commit(&mtr); |
| 609 | |
| 610 | for (table_id_set::const_iterator i = tables.begin(); |
| 611 | i != tables.end(); i++) { |
| 612 | if (dict_table_t* table = dict_table_open_on_id( |
| 613 | *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) { |
| 614 | if (!table->is_readable()) { |
| 615 | mutex_enter(&dict_sys->mutex); |
| 616 | dict_table_close(table, TRUE, FALSE); |
| 617 | dict_table_remove_from_cache(table); |
| 618 | mutex_exit(&dict_sys->mutex); |
| 619 | continue; |
| 620 | } |
| 621 | |
| 622 | if (trx->state == TRX_STATE_PREPARED) { |
| 623 | trx->mod_tables.insert( |
| 624 | trx_mod_tables_t::value_type(table, |
| 625 | 0)); |
| 626 | } |
| 627 | lock_table_ix_resurrect(table, trx); |
| 628 | |
| 629 | DBUG_LOG("ib_trx" , |
| 630 | "resurrect " << ib::hex(trx->id) |
| 631 | << " IX lock on " << table->name); |
| 632 | |
| 633 | dict_table_close(table, FALSE, FALSE); |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | |
| 639 | /** |
| 640 | Resurrect the transactions that were doing inserts/updates the time of the |
| 641 | crash, they need to be undone. |
| 642 | */ |
| 643 | |
| 644 | static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, |
| 645 | ib_time_t start_time, uint64_t *rows_to_undo, |
| 646 | bool is_old_insert) |
| 647 | { |
| 648 | trx_state_t state; |
| 649 | /* |
| 650 | This is single-threaded startup code, we do not need the |
| 651 | protection of trx->mutex or trx_sys.mutex here. |
| 652 | */ |
| 653 | switch (undo->state) |
| 654 | { |
| 655 | case TRX_UNDO_ACTIVE: |
| 656 | state= TRX_STATE_ACTIVE; |
| 657 | break; |
| 658 | case TRX_UNDO_PREPARED: |
| 659 | /* |
| 660 | Prepared transactions are left in the prepared state |
| 661 | waiting for a commit or abort decision from MySQL |
| 662 | */ |
| 663 | ib::info() << "Transaction " << undo->trx_id |
| 664 | << " was in the XA prepared state." ; |
| 665 | |
| 666 | state= TRX_STATE_PREPARED; |
| 667 | break; |
| 668 | default: |
| 669 | if (is_old_insert && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) |
| 670 | trx_undo_commit_cleanup(undo, false); |
| 671 | return; |
| 672 | } |
| 673 | |
| 674 | trx_t *trx= trx_create(); |
| 675 | trx->state= state; |
| 676 | ut_d(trx->start_file= __FILE__); |
| 677 | ut_d(trx->start_line= __LINE__); |
| 678 | ut_ad(trx->no == TRX_ID_MAX); |
| 679 | |
| 680 | if (is_old_insert) |
| 681 | trx->rsegs.m_redo.old_insert= undo; |
| 682 | else |
| 683 | trx->rsegs.m_redo.undo= undo; |
| 684 | |
| 685 | trx->undo_no= undo->top_undo_no + 1; |
| 686 | trx->rsegs.m_redo.rseg= rseg; |
| 687 | /* |
| 688 | For transactions with active data will not have rseg size = 1 |
| 689 | or will not qualify for purge limit criteria. So it is safe to increment |
| 690 | this trx_ref_count w/o mutex protection. |
| 691 | */ |
| 692 | ++trx->rsegs.m_redo.rseg->trx_ref_count; |
| 693 | *trx->xid= undo->xid; |
| 694 | trx->id= undo->trx_id; |
| 695 | trx->is_recovered= true; |
| 696 | trx->start_time= start_time; |
| 697 | |
| 698 | if (undo->dict_operation) |
| 699 | { |
| 700 | trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); |
| 701 | trx->table_id= undo->table_id; |
| 702 | } |
| 703 | |
| 704 | trx_sys.rw_trx_hash.insert(trx); |
| 705 | trx_sys.rw_trx_hash.put_pins(trx); |
| 706 | trx_resurrect_table_locks(trx, undo); |
| 707 | if (trx_state_eq(trx, TRX_STATE_ACTIVE)) |
| 708 | *rows_to_undo+= trx->undo_no; |
| 709 | } |
| 710 | |
| 711 | |
| 712 | /** Initialize (resurrect) transactions at startup. */ |
| 713 | void |
| 714 | trx_lists_init_at_db_start() |
| 715 | { |
| 716 | ut_a(srv_is_being_started); |
| 717 | ut_ad(!srv_was_started); |
| 718 | |
| 719 | if (srv_operation == SRV_OPERATION_RESTORE) { |
| 720 | /* mariabackup --prepare only deals with |
| 721 | the redo log and the data files, not with |
| 722 | transactions or the data dictionary. */ |
| 723 | trx_rseg_array_init(); |
| 724 | return; |
| 725 | } |
| 726 | |
| 727 | if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { |
| 728 | return; |
| 729 | } |
| 730 | |
| 731 | purge_sys.create(); |
| 732 | trx_rseg_array_init(); |
| 733 | |
| 734 | /* Look from the rollback segments if there exist undo logs for |
| 735 | transactions. */ |
| 736 | const ib_time_t start_time = ut_time(); |
| 737 | uint64_t rows_to_undo = 0; |
| 738 | |
| 739 | for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { |
| 740 | trx_undo_t* undo; |
| 741 | trx_rseg_t* rseg = trx_sys.rseg_array[i]; |
| 742 | |
| 743 | /* Some rollback segment may be unavailable, |
| 744 | especially if the server was previously run with a |
| 745 | non-default value of innodb_undo_logs. */ |
| 746 | if (rseg == NULL) { |
| 747 | continue; |
| 748 | } |
| 749 | |
| 750 | /* Resurrect transactions that were doing inserts |
| 751 | using the old separate insert_undo log. */ |
| 752 | undo = UT_LIST_GET_FIRST(rseg->old_insert_list); |
| 753 | while (undo) { |
| 754 | trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo); |
| 755 | trx_resurrect(undo, rseg, start_time, &rows_to_undo, |
| 756 | true); |
| 757 | undo = next; |
| 758 | } |
| 759 | |
| 760 | /* Ressurrect other transactions. */ |
| 761 | for (undo = UT_LIST_GET_FIRST(rseg->undo_list); |
| 762 | undo != NULL; |
| 763 | undo = UT_LIST_GET_NEXT(undo_list, undo)) { |
| 764 | trx_t *trx = trx_sys.rw_trx_hash.find(0, undo->trx_id); |
| 765 | if (!trx) { |
| 766 | trx_resurrect(undo, rseg, start_time, |
| 767 | &rows_to_undo, false); |
| 768 | } else { |
| 769 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || |
| 770 | trx_state_eq(trx, TRX_STATE_PREPARED)); |
| 771 | ut_ad(trx->start_time == start_time); |
| 772 | ut_ad(trx->is_recovered); |
| 773 | ut_ad(trx->rsegs.m_redo.rseg == rseg); |
| 774 | ut_ad(trx->rsegs.m_redo.rseg->trx_ref_count); |
| 775 | |
| 776 | trx->rsegs.m_redo.undo = undo; |
| 777 | if (undo->top_undo_no >= trx->undo_no) { |
| 778 | if (trx_state_eq(trx, |
| 779 | TRX_STATE_ACTIVE)) { |
| 780 | rows_to_undo -= trx->undo_no; |
| 781 | rows_to_undo += |
| 782 | undo->top_undo_no + 1; |
| 783 | } |
| 784 | |
| 785 | trx->undo_no = undo->top_undo_no + 1; |
| 786 | } |
| 787 | trx_resurrect_table_locks(trx, undo); |
| 788 | } |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | if (trx_sys.rw_trx_hash.size()) { |
| 793 | |
| 794 | ib::info() << trx_sys.rw_trx_hash.size() |
| 795 | << " transaction(s) which must be rolled back or" |
| 796 | " cleaned up in total " << rows_to_undo |
| 797 | << " row operations to undo" ; |
| 798 | |
| 799 | ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id(); |
| 800 | } |
| 801 | trx_sys.clone_oldest_view(); |
| 802 | } |
| 803 | |
| 804 | /** Assign a persistent rollback segment in a round-robin fashion, |
| 805 | evenly distributed between 0 and innodb_undo_logs-1 |
| 806 | @return persistent rollback segment |
| 807 | @retval NULL if innodb_read_only */ |
| 808 | static |
| 809 | trx_rseg_t* |
| 810 | trx_assign_rseg_low() |
| 811 | { |
| 812 | if (srv_read_only_mode) { |
| 813 | ut_ad(srv_undo_logs == ULONG_UNDEFINED); |
| 814 | return(NULL); |
| 815 | } |
| 816 | |
| 817 | /* The first slot is always assigned to the system tablespace. */ |
| 818 | ut_ad(trx_sys.rseg_array[0]->space == fil_system.sys_space); |
| 819 | |
| 820 | /* Choose a rollback segment evenly distributed between 0 and |
| 821 | innodb_undo_logs-1 in a round-robin fashion, skipping those |
| 822 | undo tablespaces that are scheduled for truncation. |
| 823 | |
| 824 | Because rseg_slot is not protected by atomics or any mutex, race |
| 825 | conditions are possible, meaning that multiple transactions |
| 826 | that start modifications concurrently will write their undo |
| 827 | log to the same rollback segment. */ |
| 828 | static ulong rseg_slot; |
| 829 | ulint slot = rseg_slot++ % srv_undo_logs; |
| 830 | trx_rseg_t* rseg; |
| 831 | |
| 832 | #ifdef UNIV_DEBUG |
| 833 | ulint start_scan_slot = slot; |
| 834 | bool look_for_rollover = false; |
| 835 | #endif /* UNIV_DEBUG */ |
| 836 | |
| 837 | bool allocated = false; |
| 838 | |
| 839 | do { |
| 840 | for (;;) { |
| 841 | rseg = trx_sys.rseg_array[slot]; |
| 842 | |
| 843 | #ifdef UNIV_DEBUG |
| 844 | /* Ensure that we are not revisiting the same |
| 845 | slot that we have already inspected. */ |
| 846 | if (look_for_rollover) { |
| 847 | ut_ad(start_scan_slot != slot); |
| 848 | } |
| 849 | look_for_rollover = true; |
| 850 | #endif /* UNIV_DEBUG */ |
| 851 | |
| 852 | slot = (slot + 1) % srv_undo_logs; |
| 853 | |
| 854 | if (rseg == NULL) { |
| 855 | continue; |
| 856 | } |
| 857 | |
| 858 | ut_ad(rseg->is_persistent()); |
| 859 | |
| 860 | if (rseg->space != fil_system.sys_space) { |
| 861 | ut_ad(srv_undo_tablespaces > 1); |
| 862 | if (rseg->skip_allocation) { |
| 863 | continue; |
| 864 | } |
| 865 | } else if (trx_rseg_t* next |
| 866 | = trx_sys.rseg_array[slot]) { |
| 867 | if (next->space != fil_system.sys_space |
| 868 | && srv_undo_tablespaces > 0) { |
| 869 | /** If dedicated |
| 870 | innodb_undo_tablespaces have |
| 871 | been configured, try to use them |
| 872 | instead of the system tablespace. */ |
| 873 | continue; |
| 874 | } |
| 875 | } |
| 876 | |
| 877 | break; |
| 878 | } |
| 879 | |
| 880 | /* By now we have only selected the rseg but not marked it |
| 881 | allocated. By marking it allocated we are ensuring that it will |
| 882 | never be selected for UNDO truncate purge. */ |
| 883 | mutex_enter(&rseg->mutex); |
| 884 | if (!rseg->skip_allocation) { |
| 885 | rseg->trx_ref_count++; |
| 886 | allocated = true; |
| 887 | } |
| 888 | mutex_exit(&rseg->mutex); |
| 889 | } while (!allocated); |
| 890 | |
| 891 | ut_ad(rseg->trx_ref_count > 0); |
| 892 | ut_ad(rseg->is_persistent()); |
| 893 | return(rseg); |
| 894 | } |
| 895 | |
| 896 | /** Assign a rollback segment for modifying temporary tables. |
| 897 | @return the assigned rollback segment */ |
| 898 | trx_rseg_t* |
| 899 | trx_t::assign_temp_rseg() |
| 900 | { |
| 901 | ut_ad(!rsegs.m_noredo.rseg); |
| 902 | ut_ad(!trx_is_autocommit_non_locking(this)); |
| 903 | compile_time_assert(ut_is_2pow(TRX_SYS_N_RSEGS)); |
| 904 | |
| 905 | /* Choose a temporary rollback segment between 0 and 127 |
| 906 | in a round-robin fashion. Because rseg_slot is not protected by |
| 907 | atomics or any mutex, race conditions are possible, meaning that |
| 908 | multiple transactions that start modifications concurrently |
| 909 | will write their undo log to the same rollback segment. */ |
| 910 | static ulong rseg_slot; |
| 911 | trx_rseg_t* rseg = trx_sys.temp_rsegs[ |
| 912 | rseg_slot++ & (TRX_SYS_N_RSEGS - 1)]; |
| 913 | ut_ad(!rseg->is_persistent()); |
| 914 | rsegs.m_noredo.rseg = rseg; |
| 915 | |
| 916 | if (id == 0) { |
| 917 | trx_sys.register_rw(this); |
| 918 | } |
| 919 | |
| 920 | ut_ad(!rseg->is_persistent()); |
| 921 | return(rseg); |
| 922 | } |
| 923 | |
| 924 | /****************************************************************//** |
| 925 | Starts a transaction. */ |
| 926 | static |
| 927 | void |
| 928 | trx_start_low( |
| 929 | /*==========*/ |
| 930 | trx_t* trx, /*!< in: transaction */ |
| 931 | bool read_write) /*!< in: true if read-write transaction */ |
| 932 | { |
| 933 | ut_ad(!trx->in_rollback); |
| 934 | ut_ad(!trx->is_recovered); |
| 935 | ut_ad(trx->start_line != 0); |
| 936 | ut_ad(trx->start_file != 0); |
| 937 | ut_ad(trx->roll_limit == 0); |
| 938 | ut_ad(trx->error_state == DB_SUCCESS); |
| 939 | ut_ad(trx->rsegs.m_redo.rseg == NULL); |
| 940 | ut_ad(trx->rsegs.m_noredo.rseg == NULL); |
| 941 | ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); |
| 942 | ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
| 943 | |
| 944 | /* Check whether it is an AUTOCOMMIT SELECT */ |
| 945 | trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd); |
| 946 | |
| 947 | trx->read_only = srv_read_only_mode |
| 948 | || (!trx->ddl && !trx->internal |
| 949 | && thd_trx_is_read_only(trx->mysql_thd)); |
| 950 | |
| 951 | if (!trx->auto_commit) { |
| 952 | ++trx->will_lock; |
| 953 | } else if (trx->will_lock == 0) { |
| 954 | trx->read_only = true; |
| 955 | } |
| 956 | |
| 957 | #ifdef WITH_WSREP |
| 958 | memset(trx->xid, 0, sizeof(xid_t)); |
| 959 | trx->xid->formatID = -1; |
| 960 | #endif /* WITH_WSREP */ |
| 961 | |
| 962 | /* The initial value for trx->no: TRX_ID_MAX is used in |
| 963 | read_view_open_now: */ |
| 964 | |
| 965 | trx->no = TRX_ID_MAX; |
| 966 | |
| 967 | ut_a(ib_vector_is_empty(trx->autoinc_locks)); |
| 968 | ut_a(trx->lock.table_locks.empty()); |
| 969 | |
| 970 | /* No other thread can access this trx object through rw_trx_hash, thus |
| 971 | we don't need trx_sys.mutex protection for that purpose. Still this |
| 972 | trx can be found through trx_sys.trx_list, which means state |
| 973 | change must be protected by e.g. trx->mutex. |
| 974 | |
| 975 | For now we update it without mutex protection, because original code |
| 976 | did it this way. It has to be reviewed and fixed properly. */ |
| 977 | trx->state = TRX_STATE_ACTIVE; |
| 978 | |
| 979 | /* By default all transactions are in the read-only list unless they |
| 980 | are non-locking auto-commit read only transactions or background |
| 981 | (internal) transactions. Note: Transactions marked explicitly as |
| 982 | read only can write to temporary tables, we put those on the RO |
| 983 | list too. */ |
| 984 | |
| 985 | if (!trx->read_only |
| 986 | && (trx->mysql_thd == 0 || read_write || trx->ddl)) { |
| 987 | |
| 988 | /* Temporary rseg is assigned only if the transaction |
| 989 | updates a temporary table */ |
| 990 | trx->rsegs.m_redo.rseg = trx_assign_rseg_low(); |
| 991 | ut_ad(trx->rsegs.m_redo.rseg != 0 |
| 992 | || srv_read_only_mode |
| 993 | || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); |
| 994 | |
| 995 | trx_sys.register_rw(trx); |
| 996 | } else { |
| 997 | trx->id = 0; |
| 998 | |
| 999 | if (!trx_is_autocommit_non_locking(trx)) { |
| 1000 | |
| 1001 | /* If this is a read-only transaction that is writing |
| 1002 | to a temporary table then it needs a transaction id |
| 1003 | to write to the temporary table. */ |
| 1004 | |
| 1005 | if (read_write) { |
| 1006 | ut_ad(!srv_read_only_mode); |
| 1007 | trx_sys.register_rw(trx); |
| 1008 | } |
| 1009 | } else { |
| 1010 | ut_ad(!read_write); |
| 1011 | } |
| 1012 | } |
| 1013 | |
| 1014 | if (trx->mysql_thd != NULL) { |
| 1015 | trx->start_time = thd_start_time_in_secs(trx->mysql_thd); |
| 1016 | trx->start_time_micro = thd_query_start_micro(trx->mysql_thd); |
| 1017 | |
| 1018 | } else { |
| 1019 | trx->start_time = ut_time(); |
| 1020 | trx->start_time_micro = 0; |
| 1021 | } |
| 1022 | |
| 1023 | ut_a(trx->error_state == DB_SUCCESS); |
| 1024 | |
| 1025 | MONITOR_INC(MONITOR_TRX_ACTIVE); |
| 1026 | } |
| 1027 | |
| 1028 | /** Set the serialisation number for a persistent committed transaction. |
| 1029 | @param[in,out] trx committed transaction with persistent changes */ |
| 1030 | static |
| 1031 | void |
| 1032 | trx_serialise(trx_t* trx) |
| 1033 | { |
| 1034 | trx_rseg_t *rseg = trx->rsegs.m_redo.rseg; |
| 1035 | ut_ad(rseg); |
| 1036 | ut_ad(mutex_own(&rseg->mutex)); |
| 1037 | |
| 1038 | if (rseg->last_page_no == FIL_NULL) { |
| 1039 | mutex_enter(&purge_sys.pq_mutex); |
| 1040 | } |
| 1041 | |
| 1042 | trx_sys.assign_new_trx_no(trx); |
| 1043 | |
| 1044 | /* If the rollback segment is not empty then the |
| 1045 | new trx_t::no can't be less than any trx_t::no |
| 1046 | already in the rollback segment. User threads only |
| 1047 | produce events when a rollback segment is empty. */ |
| 1048 | if (rseg->last_page_no == FIL_NULL) { |
| 1049 | purge_sys.purge_queue.push(TrxUndoRsegs(trx->no, *rseg)); |
| 1050 | mutex_exit(&purge_sys.pq_mutex); |
| 1051 | } |
| 1052 | } |
| 1053 | |
| 1054 | /****************************************************************//** |
| 1055 | Assign the transaction its history serialisation number and write the |
| 1056 | update UNDO log record to the assigned rollback segment. */ |
| 1057 | static |
| 1058 | void |
| 1059 | trx_write_serialisation_history( |
| 1060 | /*============================*/ |
| 1061 | trx_t* trx, /*!< in/out: transaction */ |
| 1062 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 1063 | { |
| 1064 | /* Change the undo log segment states from TRX_UNDO_ACTIVE to some |
| 1065 | other state: these modifications to the file data structure define |
| 1066 | the transaction as committed in the file based domain, at the |
| 1067 | serialization point of the log sequence number lsn obtained below. */ |
| 1068 | |
| 1069 | /* We have to hold the rseg mutex because update log headers have |
| 1070 | to be put to the history list in the (serialisation) order of the |
| 1071 | UNDO trx number. This is required for the purge in-memory data |
| 1072 | structures too. */ |
| 1073 | |
| 1074 | if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { |
| 1075 | /* Undo log for temporary tables is discarded at transaction |
| 1076 | commit. There is no purge for temporary tables, and also no |
| 1077 | MVCC, because they are private to a session. */ |
| 1078 | |
| 1079 | mtr_t temp_mtr; |
| 1080 | temp_mtr.start(); |
| 1081 | temp_mtr.set_log_mode(MTR_LOG_NO_REDO); |
| 1082 | |
| 1083 | mutex_enter(&trx->rsegs.m_noredo.rseg->mutex); |
| 1084 | trx_undo_set_state_at_finish(undo, &temp_mtr); |
| 1085 | mutex_exit(&trx->rsegs.m_noredo.rseg->mutex); |
| 1086 | temp_mtr.commit(); |
| 1087 | } |
| 1088 | |
| 1089 | trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; |
| 1090 | if (!rseg) { |
| 1091 | ut_ad(!trx->rsegs.m_redo.undo); |
| 1092 | ut_ad(!trx->rsegs.m_redo.old_insert); |
| 1093 | return; |
| 1094 | } |
| 1095 | |
| 1096 | trx_undo_t*& undo = trx->rsegs.m_redo.undo; |
| 1097 | trx_undo_t*& old_insert = trx->rsegs.m_redo.old_insert; |
| 1098 | |
| 1099 | if (!undo && !old_insert) { |
| 1100 | return; |
| 1101 | } |
| 1102 | |
| 1103 | ut_ad(!trx->read_only); |
| 1104 | ut_ad(!undo || undo->rseg == rseg); |
| 1105 | ut_ad(!old_insert || old_insert->rseg == rseg); |
| 1106 | mutex_enter(&rseg->mutex); |
| 1107 | |
| 1108 | /* Assign the transaction serialisation number and add any |
| 1109 | undo log to the purge queue. */ |
| 1110 | trx_serialise(trx); |
| 1111 | |
| 1112 | if (UNIV_LIKELY_NULL(old_insert)) { |
| 1113 | UT_LIST_REMOVE(rseg->old_insert_list, old_insert); |
| 1114 | trx_purge_add_undo_to_history(trx, old_insert, mtr); |
| 1115 | } |
| 1116 | if (undo) { |
| 1117 | UT_LIST_REMOVE(rseg->undo_list, undo); |
| 1118 | trx_purge_add_undo_to_history(trx, undo, mtr); |
| 1119 | } |
| 1120 | |
| 1121 | mutex_exit(&rseg->mutex); |
| 1122 | |
| 1123 | MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); |
| 1124 | |
| 1125 | trx->mysql_log_file_name = NULL; |
| 1126 | } |
| 1127 | |
| 1128 | /******************************************************************** |
| 1129 | Finalize a transaction containing updates for a FTS table. */ |
| 1130 | static |
| 1131 | void |
| 1132 | trx_finalize_for_fts_table( |
| 1133 | /*=======================*/ |
| 1134 | fts_trx_table_t* ftt) /* in: FTS trx table */ |
| 1135 | { |
| 1136 | fts_t* fts = ftt->table->fts; |
| 1137 | fts_doc_ids_t* doc_ids = ftt->added_doc_ids; |
| 1138 | |
| 1139 | mutex_enter(&fts->bg_threads_mutex); |
| 1140 | |
| 1141 | if (fts->fts_status & BG_THREAD_STOP) { |
| 1142 | /* The table is about to be dropped, no use |
| 1143 | adding anything to its work queue. */ |
| 1144 | |
| 1145 | mutex_exit(&fts->bg_threads_mutex); |
| 1146 | } else { |
| 1147 | mem_heap_t* heap; |
| 1148 | mutex_exit(&fts->bg_threads_mutex); |
| 1149 | |
| 1150 | ut_a(fts->add_wq); |
| 1151 | |
| 1152 | heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg); |
| 1153 | |
| 1154 | ib_wqueue_add(fts->add_wq, doc_ids, heap); |
| 1155 | |
| 1156 | /* fts_trx_table_t no longer owns the list. */ |
| 1157 | ftt->added_doc_ids = NULL; |
| 1158 | } |
| 1159 | } |
| 1160 | |
| 1161 | /******************************************************************//** |
| 1162 | Finalize a transaction containing updates to FTS tables. */ |
| 1163 | static |
| 1164 | void |
| 1165 | trx_finalize_for_fts( |
| 1166 | /*=================*/ |
| 1167 | trx_t* trx, /*!< in/out: transaction */ |
| 1168 | bool is_commit) /*!< in: true if the transaction was |
| 1169 | committed, false if it was rolled back. */ |
| 1170 | { |
| 1171 | if (is_commit) { |
| 1172 | const ib_rbt_node_t* node; |
| 1173 | ib_rbt_t* tables; |
| 1174 | fts_savepoint_t* savepoint; |
| 1175 | |
| 1176 | savepoint = static_cast<fts_savepoint_t*>( |
| 1177 | ib_vector_last(trx->fts_trx->savepoints)); |
| 1178 | |
| 1179 | tables = savepoint->tables; |
| 1180 | |
| 1181 | for (node = rbt_first(tables); |
| 1182 | node; |
| 1183 | node = rbt_next(tables, node)) { |
| 1184 | fts_trx_table_t** ftt; |
| 1185 | |
| 1186 | ftt = rbt_value(fts_trx_table_t*, node); |
| 1187 | |
| 1188 | if ((*ftt)->added_doc_ids) { |
| 1189 | trx_finalize_for_fts_table(*ftt); |
| 1190 | } |
| 1191 | } |
| 1192 | } |
| 1193 | |
| 1194 | fts_trx_free(trx->fts_trx); |
| 1195 | trx->fts_trx = NULL; |
| 1196 | } |
| 1197 | |
| 1198 | /**********************************************************************//** |
| 1199 | If required, flushes the log to disk based on the value of |
| 1200 | innodb_flush_log_at_trx_commit. */ |
| 1201 | static |
| 1202 | void |
| 1203 | trx_flush_log_if_needed_low( |
| 1204 | /*========================*/ |
| 1205 | lsn_t lsn) /*!< in: lsn up to which logs are to be |
| 1206 | flushed. */ |
| 1207 | { |
| 1208 | bool flush = srv_file_flush_method != SRV_NOSYNC; |
| 1209 | |
| 1210 | switch (srv_flush_log_at_trx_commit) { |
| 1211 | case 3: |
| 1212 | case 2: |
| 1213 | /* Write the log but do not flush it to disk */ |
| 1214 | flush = false; |
| 1215 | /* fall through */ |
| 1216 | case 1: |
| 1217 | /* Write the log and optionally flush it to disk */ |
| 1218 | log_write_up_to(lsn, flush); |
| 1219 | return; |
| 1220 | case 0: |
| 1221 | /* Do nothing */ |
| 1222 | return; |
| 1223 | } |
| 1224 | |
| 1225 | ut_error; |
| 1226 | } |
| 1227 | |
| 1228 | /**********************************************************************//** |
| 1229 | If required, flushes the log to disk based on the value of |
| 1230 | innodb_flush_log_at_trx_commit. */ |
| 1231 | static |
| 1232 | void |
| 1233 | trx_flush_log_if_needed( |
| 1234 | /*====================*/ |
| 1235 | lsn_t lsn, /*!< in: lsn up to which logs are to be |
| 1236 | flushed. */ |
| 1237 | trx_t* trx) /*!< in/out: transaction */ |
| 1238 | { |
| 1239 | trx->op_info = "flushing log" ; |
| 1240 | trx_flush_log_if_needed_low(lsn); |
| 1241 | trx->op_info = "" ; |
| 1242 | } |
| 1243 | |
| 1244 | /**********************************************************************//** |
| 1245 | For each table that has been modified by the given transaction: update |
| 1246 | its dict_table_t::update_time with the current timestamp. Clear the list |
| 1247 | of the modified tables at the end. */ |
| 1248 | static |
| 1249 | void |
| 1250 | trx_update_mod_tables_timestamp( |
| 1251 | /*============================*/ |
| 1252 | trx_t* trx) /*!< in: transaction */ |
| 1253 | { |
| 1254 | |
| 1255 | ut_ad(trx->id != 0); |
| 1256 | |
| 1257 | /* consider using trx->start_time if calling time() is too |
| 1258 | expensive here */ |
| 1259 | time_t now = ut_time(); |
| 1260 | |
| 1261 | trx_mod_tables_t::const_iterator end = trx->mod_tables.end(); |
| 1262 | |
| 1263 | for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin(); |
| 1264 | it != end; |
| 1265 | ++it) { |
| 1266 | |
| 1267 | /* This could be executed by multiple threads concurrently |
| 1268 | on the same table object. This is fine because time_t is |
| 1269 | word size or less. And _purely_ _theoretically_, even if |
| 1270 | time_t write is not atomic, likely the value of 'now' is |
| 1271 | the same in all threads and even if it is not, getting a |
| 1272 | "garbage" in table->update_time is justified because |
| 1273 | protecting it with a latch here would be too performance |
| 1274 | intrusive. */ |
| 1275 | it->first->update_time = now; |
| 1276 | } |
| 1277 | |
| 1278 | trx->mod_tables.clear(); |
| 1279 | } |
| 1280 | |
| 1281 | /****************************************************************//** |
| 1282 | Commits a transaction in memory. */ |
| 1283 | static |
| 1284 | void |
| 1285 | trx_commit_in_memory( |
| 1286 | /*=================*/ |
| 1287 | trx_t* trx, /*!< in/out: transaction */ |
| 1288 | const mtr_t* mtr) /*!< in: mini-transaction of |
| 1289 | trx_write_serialisation_history(), or NULL if |
| 1290 | the transaction did not modify anything */ |
| 1291 | { |
| 1292 | trx->must_flush_log_later = false; |
| 1293 | trx->read_view.close(); |
| 1294 | |
| 1295 | if (trx_is_autocommit_non_locking(trx)) { |
| 1296 | ut_ad(trx->id == 0); |
| 1297 | ut_ad(trx->read_only); |
| 1298 | ut_a(!trx->is_recovered); |
| 1299 | ut_ad(trx->rsegs.m_redo.rseg == NULL); |
| 1300 | |
| 1301 | /* Note: We are asserting without holding the lock mutex. But |
| 1302 | that is OK because this transaction is not waiting and cannot |
| 1303 | be rolled back and no new locks can (or should not) be added |
| 1304 | becuase it is flagged as a non-locking read-only transaction. */ |
| 1305 | |
| 1306 | ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); |
| 1307 | |
| 1308 | /* This state change is not protected by any mutex, therefore |
| 1309 | there is an inherent race here around state transition during |
| 1310 | printouts. We ignore this race for the sake of efficiency. |
| 1311 | However, the trx_sys_t::mutex will protect the trx_t instance |
| 1312 | and it cannot be removed from the trx_list and freed |
| 1313 | without first acquiring the trx_sys_t::mutex. */ |
| 1314 | |
| 1315 | ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); |
| 1316 | |
| 1317 | MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT); |
| 1318 | |
| 1319 | DBUG_LOG("trx" , "Autocommit in memory: " << trx); |
| 1320 | trx->state = TRX_STATE_NOT_STARTED; |
| 1321 | } else { |
| 1322 | if (trx->id > 0) { |
| 1323 | /* For consistent snapshot, we need to remove current |
| 1324 | transaction from rw_trx_hash before doing commit and |
| 1325 | releasing locks. */ |
| 1326 | trx_sys.deregister_rw(trx); |
| 1327 | } |
| 1328 | |
| 1329 | lock_trx_release_locks(trx); |
| 1330 | |
| 1331 | /* Remove the transaction from the list of active |
| 1332 | transactions now that it no longer holds any user locks. */ |
| 1333 | |
| 1334 | ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); |
| 1335 | DEBUG_SYNC_C("after_trx_committed_in_memory" ); |
| 1336 | |
| 1337 | if (trx->read_only || trx->rsegs.m_redo.rseg == NULL) { |
| 1338 | MONITOR_INC(MONITOR_TRX_RO_COMMIT); |
| 1339 | } else { |
| 1340 | trx_update_mod_tables_timestamp(trx); |
| 1341 | MONITOR_INC(MONITOR_TRX_RW_COMMIT); |
| 1342 | } |
| 1343 | } |
| 1344 | |
| 1345 | ut_ad(!trx->rsegs.m_redo.undo); |
| 1346 | |
| 1347 | if (trx_rseg_t* rseg = trx->rsegs.m_redo.rseg) { |
| 1348 | mutex_enter(&rseg->mutex); |
| 1349 | ut_ad(rseg->trx_ref_count > 0); |
| 1350 | --rseg->trx_ref_count; |
| 1351 | mutex_exit(&rseg->mutex); |
| 1352 | |
| 1353 | if (trx_undo_t*& insert = trx->rsegs.m_redo.old_insert) { |
| 1354 | ut_ad(insert->rseg == rseg); |
| 1355 | trx_undo_commit_cleanup(insert, false); |
| 1356 | insert = NULL; |
| 1357 | } |
| 1358 | } |
| 1359 | |
| 1360 | ut_ad(!trx->rsegs.m_redo.old_insert); |
| 1361 | |
| 1362 | if (mtr != NULL) { |
| 1363 | if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) { |
| 1364 | ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); |
| 1365 | trx_undo_commit_cleanup(undo, true); |
| 1366 | undo = NULL; |
| 1367 | } |
| 1368 | |
| 1369 | /* NOTE that we could possibly make a group commit more |
| 1370 | efficient here: call os_thread_yield here to allow also other |
| 1371 | trxs to come to commit! */ |
| 1372 | |
| 1373 | /*-------------------------------------*/ |
| 1374 | |
| 1375 | /* Depending on the my.cnf options, we may now write the log |
| 1376 | buffer to the log files, making the transaction durable if |
| 1377 | the OS does not crash. We may also flush the log files to |
| 1378 | disk, making the transaction durable also at an OS crash or a |
| 1379 | power outage. |
| 1380 | |
| 1381 | The idea in InnoDB's group commit is that a group of |
| 1382 | transactions gather behind a trx doing a physical disk write |
| 1383 | to log files, and when that physical write has been completed, |
| 1384 | one of those transactions does a write which commits the whole |
| 1385 | group. Note that this group commit will only bring benefit if |
| 1386 | there are > 2 users in the database. Then at least 2 users can |
| 1387 | gather behind one doing the physical log write to disk. |
| 1388 | |
| 1389 | If we are calling trx_commit() under prepare_commit_mutex, we |
| 1390 | will delay possible log write and flush to a separate function |
| 1391 | trx_commit_complete_for_mysql(), which is only called when the |
| 1392 | thread has released the mutex. This is to make the |
| 1393 | group commit algorithm to work. Otherwise, the prepare_commit |
| 1394 | mutex would serialize all commits and prevent a group of |
| 1395 | transactions from gathering. */ |
| 1396 | |
| 1397 | lsn_t lsn = mtr->commit_lsn(); |
| 1398 | |
| 1399 | if (lsn == 0) { |
| 1400 | /* Nothing to be done. */ |
| 1401 | } else if (trx->flush_log_later) { |
| 1402 | /* Do nothing yet */ |
| 1403 | trx->must_flush_log_later = true; |
| 1404 | } else if (srv_flush_log_at_trx_commit == 0) { |
| 1405 | /* Do nothing */ |
| 1406 | } else { |
| 1407 | trx_flush_log_if_needed(lsn, trx); |
| 1408 | } |
| 1409 | |
| 1410 | trx->commit_lsn = lsn; |
| 1411 | |
| 1412 | /* Tell server some activity has happened, since the trx |
| 1413 | does changes something. Background utility threads like |
| 1414 | master thread, purge thread or page_cleaner thread might |
| 1415 | have some work to do. */ |
| 1416 | srv_active_wake_master_thread(); |
| 1417 | } |
| 1418 | |
| 1419 | ut_ad(!trx->rsegs.m_noredo.undo); |
| 1420 | |
| 1421 | /* Free all savepoints, starting from the first. */ |
| 1422 | trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints); |
| 1423 | |
| 1424 | trx_roll_savepoints_free(trx, savep); |
| 1425 | |
| 1426 | if (trx->fts_trx != NULL) { |
| 1427 | trx_finalize_for_fts(trx, trx->undo_no != 0); |
| 1428 | } |
| 1429 | |
| 1430 | trx_mutex_enter(trx); |
| 1431 | trx->dict_operation = TRX_DICT_OP_NONE; |
| 1432 | |
| 1433 | #ifdef WITH_WSREP |
| 1434 | if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) { |
| 1435 | trx->lock.was_chosen_as_deadlock_victim = FALSE; |
| 1436 | } |
| 1437 | #endif |
| 1438 | |
| 1439 | DBUG_LOG("trx" , "Commit in memory: " << trx); |
| 1440 | trx->state = TRX_STATE_NOT_STARTED; |
| 1441 | |
| 1442 | assert_trx_is_free(trx); |
| 1443 | |
| 1444 | trx_init(trx); |
| 1445 | |
| 1446 | trx_mutex_exit(trx); |
| 1447 | |
| 1448 | ut_a(trx->error_state == DB_SUCCESS); |
| 1449 | srv_wake_purge_thread_if_not_active(); |
| 1450 | } |
| 1451 | |
| 1452 | /** Commit a transaction and a mini-transaction. |
| 1453 | @param[in,out] trx transaction |
| 1454 | @param[in,out] mtr mini-transaction (NULL if no modifications) */ |
| 1455 | void trx_commit_low(trx_t* trx, mtr_t* mtr) |
| 1456 | { |
| 1457 | assert_trx_nonlocking_or_in_list(trx); |
| 1458 | ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); |
| 1459 | ut_ad(!mtr || mtr->is_active()); |
| 1460 | ut_d(bool aborted = trx->in_rollback |
| 1461 | && trx->error_state == DB_DEADLOCK); |
| 1462 | ut_ad(!mtr == (aborted || !trx->has_logged_or_recovered())); |
| 1463 | ut_ad(!mtr || !aborted); |
| 1464 | |
| 1465 | /* undo_no is non-zero if we're doing the final commit. */ |
| 1466 | if (trx->fts_trx != NULL && trx->undo_no != 0) { |
| 1467 | dberr_t error; |
| 1468 | |
| 1469 | ut_a(!trx_is_autocommit_non_locking(trx)); |
| 1470 | |
| 1471 | error = fts_commit(trx); |
| 1472 | |
| 1473 | /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY |
| 1474 | instead of dying. This is a possible scenario if there |
| 1475 | is a crash between insert to DELETED table committing |
| 1476 | and transaction committing. The fix would be able to |
| 1477 | return error from this function */ |
| 1478 | if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) { |
| 1479 | /* FTS-FIXME: once we can return values from this |
| 1480 | function, we should do so and signal an error |
| 1481 | instead of just dying. */ |
| 1482 | |
| 1483 | ut_error; |
| 1484 | } |
| 1485 | } |
| 1486 | |
| 1487 | if (mtr != NULL) { |
| 1488 | |
| 1489 | mtr->set_sync(); |
| 1490 | |
| 1491 | trx_write_serialisation_history(trx, mtr); |
| 1492 | |
| 1493 | /* The following call commits the mini-transaction, making the |
| 1494 | whole transaction committed in the file-based world, at this |
| 1495 | log sequence number. The transaction becomes 'durable' when |
| 1496 | we write the log to disk, but in the logical sense the commit |
| 1497 | in the file-based data structures (undo logs etc.) happens |
| 1498 | here. |
| 1499 | |
| 1500 | NOTE that transaction numbers, which are assigned only to |
| 1501 | transactions with an update undo log, do not necessarily come |
| 1502 | in exactly the same order as commit lsn's, if the transactions |
| 1503 | have different rollback segments. To get exactly the same |
| 1504 | order we should hold the kernel mutex up to this point, |
| 1505 | adding to the contention of the kernel mutex. However, if |
| 1506 | a transaction T2 is able to see modifications made by |
| 1507 | a transaction T1, T2 will always get a bigger transaction |
| 1508 | number and a bigger commit lsn than T1. */ |
| 1509 | |
| 1510 | /*--------------*/ |
| 1511 | mtr_commit(mtr); |
| 1512 | |
| 1513 | DBUG_EXECUTE_IF("ib_crash_during_trx_commit_in_mem" , |
| 1514 | if (trx->has_logged()) { |
| 1515 | log_make_checkpoint_at(LSN_MAX, TRUE); |
| 1516 | DBUG_SUICIDE(); |
| 1517 | }); |
| 1518 | /*--------------*/ |
| 1519 | } |
| 1520 | #ifndef DBUG_OFF |
| 1521 | /* In case of this function is called from a stack executing |
| 1522 | THD::release_resources -> ... |
| 1523 | innobase_connection_close() -> |
| 1524 | trx_rollback_for_mysql... -> . |
| 1525 | mysql's thd does not seem to have |
| 1526 | thd->debug_sync_control defined any longer. However the stack |
| 1527 | is possible only with a prepared trx not updating any data. |
| 1528 | */ |
| 1529 | if (trx->mysql_thd != NULL && trx->has_logged_persistent()) { |
| 1530 | DEBUG_SYNC_C("before_trx_state_committed_in_memory" ); |
| 1531 | } |
| 1532 | #endif |
| 1533 | |
| 1534 | trx_commit_in_memory(trx, mtr); |
| 1535 | } |
| 1536 | |
| 1537 | /****************************************************************//** |
| 1538 | Commits a transaction. */ |
| 1539 | void |
| 1540 | trx_commit( |
| 1541 | /*=======*/ |
| 1542 | trx_t* trx) /*!< in/out: transaction */ |
| 1543 | { |
| 1544 | mtr_t* mtr; |
| 1545 | mtr_t local_mtr; |
| 1546 | |
| 1547 | DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start" , |
| 1548 | DBUG_SUICIDE();); |
| 1549 | |
| 1550 | if (trx->has_logged_or_recovered()) { |
| 1551 | mtr = &local_mtr; |
| 1552 | mtr_start_sync(mtr); |
| 1553 | } else { |
| 1554 | |
| 1555 | mtr = NULL; |
| 1556 | } |
| 1557 | |
| 1558 | trx_commit_low(trx, mtr); |
| 1559 | } |
| 1560 | |
| 1561 | /****************************************************************//** |
| 1562 | Prepares a transaction for commit/rollback. */ |
| 1563 | void |
| 1564 | trx_commit_or_rollback_prepare( |
| 1565 | /*===========================*/ |
| 1566 | trx_t* trx) /*!< in/out: transaction */ |
| 1567 | { |
| 1568 | /* We are reading trx->state without holding trx_sys.mutex |
| 1569 | here, because the commit or rollback should be invoked for a |
| 1570 | running (or recovered prepared) transaction that is associated |
| 1571 | with the current thread. */ |
| 1572 | |
| 1573 | switch (trx->state) { |
| 1574 | case TRX_STATE_NOT_STARTED: |
| 1575 | trx_start_low(trx, true); |
| 1576 | /* fall through */ |
| 1577 | |
| 1578 | case TRX_STATE_ACTIVE: |
| 1579 | case TRX_STATE_PREPARED: |
| 1580 | |
| 1581 | /* If the trx is in a lock wait state, moves the waiting |
| 1582 | query thread to the suspended state */ |
| 1583 | |
| 1584 | if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { |
| 1585 | |
| 1586 | ut_a(trx->lock.wait_thr != NULL); |
| 1587 | trx->lock.wait_thr->state = QUE_THR_SUSPENDED; |
| 1588 | trx->lock.wait_thr = NULL; |
| 1589 | |
| 1590 | trx->lock.que_state = TRX_QUE_RUNNING; |
| 1591 | } |
| 1592 | |
| 1593 | ut_a(trx->lock.n_active_thrs == 1); |
| 1594 | return; |
| 1595 | |
| 1596 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 1597 | break; |
| 1598 | } |
| 1599 | |
| 1600 | ut_error; |
| 1601 | } |
| 1602 | |
| 1603 | /*********************************************************************//** |
| 1604 | Creates a commit command node struct. |
| 1605 | @return own: commit node struct */ |
| 1606 | commit_node_t* |
| 1607 | trx_commit_node_create( |
| 1608 | /*===================*/ |
| 1609 | mem_heap_t* heap) /*!< in: mem heap where created */ |
| 1610 | { |
| 1611 | commit_node_t* node; |
| 1612 | |
| 1613 | node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node))); |
| 1614 | node->common.type = QUE_NODE_COMMIT; |
| 1615 | node->state = COMMIT_NODE_SEND; |
| 1616 | |
| 1617 | return(node); |
| 1618 | } |
| 1619 | |
| 1620 | /***********************************************************//** |
| 1621 | Performs an execution step for a commit type node in a query graph. |
| 1622 | @return query thread to run next, or NULL */ |
| 1623 | que_thr_t* |
| 1624 | trx_commit_step( |
| 1625 | /*============*/ |
| 1626 | que_thr_t* thr) /*!< in: query thread */ |
| 1627 | { |
| 1628 | commit_node_t* node; |
| 1629 | |
| 1630 | node = static_cast<commit_node_t*>(thr->run_node); |
| 1631 | |
| 1632 | ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); |
| 1633 | |
| 1634 | if (thr->prev_node == que_node_get_parent(node)) { |
| 1635 | node->state = COMMIT_NODE_SEND; |
| 1636 | } |
| 1637 | |
| 1638 | if (node->state == COMMIT_NODE_SEND) { |
| 1639 | trx_t* trx; |
| 1640 | |
| 1641 | node->state = COMMIT_NODE_WAIT; |
| 1642 | |
| 1643 | trx = thr_get_trx(thr); |
| 1644 | |
| 1645 | ut_a(trx->lock.wait_thr == NULL); |
| 1646 | ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT); |
| 1647 | |
| 1648 | trx_commit_or_rollback_prepare(trx); |
| 1649 | |
| 1650 | trx->lock.que_state = TRX_QUE_COMMITTING; |
| 1651 | |
| 1652 | trx_commit(trx); |
| 1653 | |
| 1654 | ut_ad(trx->lock.wait_thr == NULL); |
| 1655 | |
| 1656 | trx->lock.que_state = TRX_QUE_RUNNING; |
| 1657 | |
| 1658 | thr = NULL; |
| 1659 | } else { |
| 1660 | ut_ad(node->state == COMMIT_NODE_WAIT); |
| 1661 | |
| 1662 | node->state = COMMIT_NODE_SEND; |
| 1663 | |
| 1664 | thr->run_node = que_node_get_parent(node); |
| 1665 | } |
| 1666 | |
| 1667 | return(thr); |
| 1668 | } |
| 1669 | |
| 1670 | /**********************************************************************//** |
| 1671 | Does the transaction commit for MySQL. |
| 1672 | @return DB_SUCCESS or error number */ |
| 1673 | dberr_t |
| 1674 | trx_commit_for_mysql( |
| 1675 | /*=================*/ |
| 1676 | trx_t* trx) /*!< in/out: transaction */ |
| 1677 | { |
| 1678 | /* Because we do not do the commit by sending an Innobase |
| 1679 | sig to the transaction, we must here make sure that trx has been |
| 1680 | started. */ |
| 1681 | |
| 1682 | switch (trx->state) { |
| 1683 | case TRX_STATE_NOT_STARTED: |
| 1684 | ut_d(trx->start_file = __FILE__); |
| 1685 | ut_d(trx->start_line = __LINE__); |
| 1686 | |
| 1687 | trx_start_low(trx, true); |
| 1688 | /* fall through */ |
| 1689 | case TRX_STATE_ACTIVE: |
| 1690 | case TRX_STATE_PREPARED: |
| 1691 | |
| 1692 | trx->op_info = "committing" ; |
| 1693 | |
| 1694 | trx_commit(trx); |
| 1695 | |
| 1696 | MONITOR_DEC(MONITOR_TRX_ACTIVE); |
| 1697 | trx->op_info = "" ; |
| 1698 | return(DB_SUCCESS); |
| 1699 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 1700 | break; |
| 1701 | } |
| 1702 | ut_error; |
| 1703 | return(DB_CORRUPTION); |
| 1704 | } |
| 1705 | |
| 1706 | /**********************************************************************//** |
| 1707 | If required, flushes the log to disk if we called trx_commit_for_mysql() |
| 1708 | with trx->flush_log_later == TRUE. */ |
| 1709 | void |
| 1710 | trx_commit_complete_for_mysql( |
| 1711 | /*==========================*/ |
| 1712 | trx_t* trx) /*!< in/out: transaction */ |
| 1713 | { |
| 1714 | if (trx->id != 0 |
| 1715 | || !trx->must_flush_log_later |
| 1716 | || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) { |
| 1717 | |
| 1718 | return; |
| 1719 | } |
| 1720 | |
| 1721 | trx_flush_log_if_needed(trx->commit_lsn, trx); |
| 1722 | |
| 1723 | trx->must_flush_log_later = false; |
| 1724 | } |
| 1725 | |
| 1726 | /**********************************************************************//** |
| 1727 | Marks the latest SQL statement ended. */ |
| 1728 | void |
| 1729 | trx_mark_sql_stat_end( |
| 1730 | /*==================*/ |
| 1731 | trx_t* trx) /*!< in: trx handle */ |
| 1732 | { |
| 1733 | ut_a(trx); |
| 1734 | |
| 1735 | switch (trx->state) { |
| 1736 | case TRX_STATE_PREPARED: |
| 1737 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 1738 | break; |
| 1739 | case TRX_STATE_NOT_STARTED: |
| 1740 | trx->undo_no = 0; |
| 1741 | /* fall through */ |
| 1742 | case TRX_STATE_ACTIVE: |
| 1743 | trx->last_sql_stat_start.least_undo_no = trx->undo_no; |
| 1744 | |
| 1745 | if (trx->fts_trx != NULL) { |
| 1746 | fts_savepoint_laststmt_refresh(trx); |
| 1747 | } |
| 1748 | |
| 1749 | return; |
| 1750 | } |
| 1751 | |
| 1752 | ut_error; |
| 1753 | } |
| 1754 | |
| 1755 | /**********************************************************************//** |
| 1756 | Prints info about a transaction. */ |
| 1757 | void |
| 1758 | trx_print_low( |
| 1759 | /*==========*/ |
| 1760 | FILE* f, |
| 1761 | /*!< in: output stream */ |
| 1762 | const trx_t* trx, |
| 1763 | /*!< in: transaction */ |
| 1764 | ulint max_query_len, |
| 1765 | /*!< in: max query length to print, |
| 1766 | or 0 to use the default max length */ |
| 1767 | ulint n_rec_locks, |
| 1768 | /*!< in: lock_number_of_rows_locked(&trx->lock) */ |
| 1769 | ulint n_trx_locks, |
| 1770 | /*!< in: length of trx->lock.trx_locks */ |
| 1771 | ulint heap_size) |
| 1772 | /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ |
| 1773 | { |
| 1774 | ibool newline; |
| 1775 | const char* op_info; |
| 1776 | |
| 1777 | fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx)); |
| 1778 | |
| 1779 | /* trx->state cannot change from or to NOT_STARTED while we |
| 1780 | are holding the trx_sys.mutex. It may change from ACTIVE to |
| 1781 | PREPARED or COMMITTED. */ |
| 1782 | switch (trx->state) { |
| 1783 | case TRX_STATE_NOT_STARTED: |
| 1784 | fputs(", not started" , f); |
| 1785 | goto state_ok; |
| 1786 | case TRX_STATE_ACTIVE: |
| 1787 | fprintf(f, ", ACTIVE %lu sec" , |
| 1788 | (ulong) difftime(time(NULL), trx->start_time)); |
| 1789 | goto state_ok; |
| 1790 | case TRX_STATE_PREPARED: |
| 1791 | fprintf(f, ", ACTIVE (PREPARED) %lu sec" , |
| 1792 | (ulong) difftime(time(NULL), trx->start_time)); |
| 1793 | goto state_ok; |
| 1794 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 1795 | fputs(", COMMITTED IN MEMORY" , f); |
| 1796 | goto state_ok; |
| 1797 | } |
| 1798 | fprintf(f, ", state %lu" , (ulong) trx->state); |
| 1799 | ut_ad(0); |
| 1800 | state_ok: |
| 1801 | |
| 1802 | /* prevent a race condition */ |
| 1803 | op_info = trx->op_info; |
| 1804 | |
| 1805 | if (*op_info) { |
| 1806 | putc(' ', f); |
| 1807 | fputs(op_info, f); |
| 1808 | } |
| 1809 | |
| 1810 | if (trx->is_recovered) { |
| 1811 | fputs(" recovered trx" , f); |
| 1812 | } |
| 1813 | |
| 1814 | if (trx->declared_to_be_inside_innodb) { |
| 1815 | fprintf(f, ", thread declared inside InnoDB %lu" , |
| 1816 | (ulong) trx->n_tickets_to_enter_innodb); |
| 1817 | } |
| 1818 | |
| 1819 | putc('\n', f); |
| 1820 | |
| 1821 | if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { |
| 1822 | fprintf(f, "mysql tables in use %lu, locked %lu\n" , |
| 1823 | (ulong) trx->n_mysql_tables_in_use, |
| 1824 | (ulong) trx->mysql_n_tables_locked); |
| 1825 | } |
| 1826 | |
| 1827 | newline = TRUE; |
| 1828 | |
| 1829 | /* trx->lock.que_state of an ACTIVE transaction may change |
| 1830 | while we are not holding trx->mutex. We perform a dirty read |
| 1831 | for performance reasons. */ |
| 1832 | |
| 1833 | switch (trx->lock.que_state) { |
| 1834 | case TRX_QUE_RUNNING: |
| 1835 | newline = FALSE; break; |
| 1836 | case TRX_QUE_LOCK_WAIT: |
| 1837 | fputs("LOCK WAIT " , f); break; |
| 1838 | case TRX_QUE_ROLLING_BACK: |
| 1839 | fputs("ROLLING BACK " , f); break; |
| 1840 | case TRX_QUE_COMMITTING: |
| 1841 | fputs("COMMITTING " , f); break; |
| 1842 | default: |
| 1843 | fprintf(f, "que state %lu " , (ulong) trx->lock.que_state); |
| 1844 | } |
| 1845 | |
| 1846 | if (n_trx_locks > 0 || heap_size > 400) { |
| 1847 | newline = TRUE; |
| 1848 | |
| 1849 | fprintf(f, "%lu lock struct(s), heap size %lu," |
| 1850 | " %lu row lock(s)" , |
| 1851 | (ulong) n_trx_locks, |
| 1852 | (ulong) heap_size, |
| 1853 | (ulong) n_rec_locks); |
| 1854 | } |
| 1855 | |
| 1856 | if (trx->undo_no != 0) { |
| 1857 | newline = TRUE; |
| 1858 | fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no); |
| 1859 | } |
| 1860 | |
| 1861 | if (newline) { |
| 1862 | putc('\n', f); |
| 1863 | } |
| 1864 | |
| 1865 | if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL) { |
| 1866 | innobase_mysql_print_thd( |
| 1867 | f, trx->mysql_thd, static_cast<uint>(max_query_len)); |
| 1868 | } |
| 1869 | } |
| 1870 | |
| 1871 | /**********************************************************************//** |
| 1872 | Prints info about a transaction. |
| 1873 | The caller must hold lock_sys.mutex. |
| 1874 | When possible, use trx_print() instead. */ |
| 1875 | void |
| 1876 | trx_print_latched( |
| 1877 | /*==============*/ |
| 1878 | FILE* f, /*!< in: output stream */ |
| 1879 | const trx_t* trx, /*!< in: transaction */ |
| 1880 | ulint max_query_len) /*!< in: max query length to print, |
| 1881 | or 0 to use the default max length */ |
| 1882 | { |
| 1883 | ut_ad(lock_mutex_own()); |
| 1884 | |
| 1885 | trx_print_low(f, trx, max_query_len, |
| 1886 | lock_number_of_rows_locked(&trx->lock), |
| 1887 | UT_LIST_GET_LEN(trx->lock.trx_locks), |
| 1888 | mem_heap_get_size(trx->lock.lock_heap)); |
| 1889 | } |
| 1890 | |
| 1891 | /**********************************************************************//** |
| 1892 | Prints info about a transaction. |
| 1893 | Acquires and releases lock_sys.mutex. */ |
| 1894 | void |
| 1895 | trx_print( |
| 1896 | /*======*/ |
| 1897 | FILE* f, /*!< in: output stream */ |
| 1898 | const trx_t* trx, /*!< in: transaction */ |
| 1899 | ulint max_query_len) /*!< in: max query length to print, |
| 1900 | or 0 to use the default max length */ |
| 1901 | { |
| 1902 | ulint n_rec_locks; |
| 1903 | ulint n_trx_locks; |
| 1904 | ulint heap_size; |
| 1905 | |
| 1906 | lock_mutex_enter(); |
| 1907 | n_rec_locks = lock_number_of_rows_locked(&trx->lock); |
| 1908 | n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); |
| 1909 | heap_size = mem_heap_get_size(trx->lock.lock_heap); |
| 1910 | lock_mutex_exit(); |
| 1911 | |
| 1912 | trx_print_low(f, trx, max_query_len, |
| 1913 | n_rec_locks, n_trx_locks, heap_size); |
| 1914 | } |
| 1915 | |
| 1916 | /*******************************************************************//** |
| 1917 | Compares the "weight" (or size) of two transactions. Transactions that |
| 1918 | have edited non-transactional tables are considered heavier than ones |
| 1919 | that have not. |
| 1920 | @return TRUE if weight(a) >= weight(b) */ |
| 1921 | bool |
| 1922 | trx_weight_ge( |
| 1923 | /*==========*/ |
| 1924 | const trx_t* a, /*!< in: transaction to be compared */ |
| 1925 | const trx_t* b) /*!< in: transaction to be compared */ |
| 1926 | { |
| 1927 | ibool a_notrans_edit; |
| 1928 | ibool b_notrans_edit; |
| 1929 | |
| 1930 | /* If mysql_thd is NULL for a transaction we assume that it has |
| 1931 | not edited non-transactional tables. */ |
| 1932 | |
| 1933 | a_notrans_edit = a->mysql_thd != NULL |
| 1934 | && thd_has_edited_nontrans_tables(a->mysql_thd); |
| 1935 | |
| 1936 | b_notrans_edit = b->mysql_thd != NULL |
| 1937 | && thd_has_edited_nontrans_tables(b->mysql_thd); |
| 1938 | |
| 1939 | if (a_notrans_edit != b_notrans_edit) { |
| 1940 | |
| 1941 | return(a_notrans_edit); |
| 1942 | } |
| 1943 | |
| 1944 | /* Either both had edited non-transactional tables or both had |
| 1945 | not, we fall back to comparing the number of altered/locked |
| 1946 | rows. */ |
| 1947 | |
| 1948 | return(TRX_WEIGHT(a) >= TRX_WEIGHT(b)); |
| 1949 | } |
| 1950 | |
| 1951 | /** Prepare a transaction. |
| 1952 | @return log sequence number that makes the XA PREPARE durable |
| 1953 | @retval 0 if no changes needed to be made durable */ |
| 1954 | static |
| 1955 | lsn_t |
| 1956 | trx_prepare_low(trx_t* trx) |
| 1957 | { |
| 1958 | ut_ad(!trx->rsegs.m_redo.old_insert); |
| 1959 | ut_ad(!trx->is_recovered); |
| 1960 | |
| 1961 | mtr_t mtr; |
| 1962 | |
| 1963 | if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) { |
| 1964 | ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg); |
| 1965 | |
| 1966 | mtr.start(); |
| 1967 | mtr.set_log_mode(MTR_LOG_NO_REDO); |
| 1968 | |
| 1969 | mutex_enter(&undo->rseg->mutex); |
| 1970 | trx_undo_set_state_at_prepare(trx, undo, false, &mtr); |
| 1971 | mutex_exit(&undo->rseg->mutex); |
| 1972 | |
| 1973 | mtr.commit(); |
| 1974 | } |
| 1975 | |
| 1976 | trx_undo_t* undo = trx->rsegs.m_redo.undo; |
| 1977 | |
| 1978 | if (!undo) { |
| 1979 | /* There were no changes to persistent tables. */ |
| 1980 | return(0); |
| 1981 | } |
| 1982 | |
| 1983 | trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; |
| 1984 | ut_ad(undo->rseg == rseg); |
| 1985 | |
| 1986 | mtr.start(true); |
| 1987 | |
| 1988 | /* Change the undo log segment states from TRX_UNDO_ACTIVE to |
| 1989 | TRX_UNDO_PREPARED: these modifications to the file data |
| 1990 | structure define the transaction as prepared in the file-based |
| 1991 | world, at the serialization point of lsn. */ |
| 1992 | |
| 1993 | mutex_enter(&rseg->mutex); |
| 1994 | trx_undo_set_state_at_prepare(trx, undo, false, &mtr); |
| 1995 | mutex_exit(&rseg->mutex); |
| 1996 | |
| 1997 | /* Make the XA PREPARE durable. */ |
| 1998 | mtr.commit(); |
| 1999 | ut_ad(mtr.commit_lsn() > 0); |
| 2000 | return(mtr.commit_lsn()); |
| 2001 | } |
| 2002 | |
| 2003 | /****************************************************************//** |
| 2004 | Prepares a transaction. */ |
| 2005 | static |
| 2006 | void |
| 2007 | trx_prepare( |
| 2008 | /*========*/ |
| 2009 | trx_t* trx) /*!< in/out: transaction */ |
| 2010 | { |
| 2011 | /* Only fresh user transactions can be prepared. |
| 2012 | Recovered transactions cannot. */ |
| 2013 | ut_a(!trx->is_recovered); |
| 2014 | |
| 2015 | lsn_t lsn = trx_prepare_low(trx); |
| 2016 | |
| 2017 | DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step" , DBUG_SUICIDE();); |
| 2018 | |
| 2019 | ut_a(trx->state == TRX_STATE_ACTIVE); |
| 2020 | trx_mutex_enter(trx); |
| 2021 | trx->state = TRX_STATE_PREPARED; |
| 2022 | trx_mutex_exit(trx); |
| 2023 | |
| 2024 | if (lsn) { |
| 2025 | /* Depending on the my.cnf options, we may now write the log |
| 2026 | buffer to the log files, making the prepared state of the |
| 2027 | transaction durable if the OS does not crash. We may also |
| 2028 | flush the log files to disk, making the prepared state of the |
| 2029 | transaction durable also at an OS crash or a power outage. |
| 2030 | |
| 2031 | The idea in InnoDB's group prepare is that a group of |
| 2032 | transactions gather behind a trx doing a physical disk write |
| 2033 | to log files, and when that physical write has been completed, |
| 2034 | one of those transactions does a write which prepares the whole |
| 2035 | group. Note that this group prepare will only bring benefit if |
| 2036 | there are > 2 users in the database. Then at least 2 users can |
| 2037 | gather behind one doing the physical log write to disk. |
| 2038 | |
| 2039 | We must not be holding any mutexes or latches here. */ |
| 2040 | |
| 2041 | trx_flush_log_if_needed(lsn, trx); |
| 2042 | } |
| 2043 | } |
| 2044 | |
| 2045 | /** XA PREPARE a transaction. |
| 2046 | @param[in,out] trx transaction to prepare */ |
| 2047 | void trx_prepare_for_mysql(trx_t* trx) |
| 2048 | { |
| 2049 | trx_start_if_not_started_xa(trx, false); |
| 2050 | |
| 2051 | trx->op_info = "preparing" ; |
| 2052 | |
| 2053 | trx_prepare(trx); |
| 2054 | |
| 2055 | trx->op_info = "" ; |
| 2056 | } |
| 2057 | |
| 2058 | |
| 2059 | struct trx_recover_for_mysql_callback_arg |
| 2060 | { |
| 2061 | XID *xid_list; |
| 2062 | uint len; |
| 2063 | uint count; |
| 2064 | }; |
| 2065 | |
| 2066 | |
| 2067 | static my_bool trx_recover_for_mysql_callback(rw_trx_hash_element_t *element, |
| 2068 | trx_recover_for_mysql_callback_arg *arg) |
| 2069 | { |
| 2070 | mutex_enter(&element->mutex); |
| 2071 | if (trx_t *trx= element->trx) |
| 2072 | { |
| 2073 | /* |
| 2074 | The state of a read-write transaction can only change from ACTIVE to |
| 2075 | PREPARED while we are holding the element->mutex. But since it is |
| 2076 | executed at startup no state change should occur. |
| 2077 | */ |
| 2078 | if (trx_state_eq(trx, TRX_STATE_PREPARED)) |
| 2079 | { |
| 2080 | ut_ad(trx->is_recovered); |
| 2081 | if (arg->count == 0) |
| 2082 | ib::info() << "Starting recovery for XA transactions..." ; |
| 2083 | ib::info() << "Transaction " << trx_get_id_for_print(trx) |
| 2084 | << " in prepared state after recovery" ; |
| 2085 | ib::info() << "Transaction contains changes to " << trx->undo_no |
| 2086 | << " rows" ; |
| 2087 | arg->xid_list[arg->count++]= *trx->xid; |
| 2088 | } |
| 2089 | } |
| 2090 | mutex_exit(&element->mutex); |
| 2091 | return arg->count == arg->len; |
| 2092 | } |
| 2093 | |
| 2094 | |
| 2095 | /** |
| 2096 | Find prepared transaction objects for recovery. |
| 2097 | |
| 2098 | @param[out] xid_list prepared transactions |
| 2099 | @param[in] len number of slots in xid_list |
| 2100 | |
| 2101 | @return number of prepared transactions stored in xid_list |
| 2102 | */ |
| 2103 | |
| 2104 | int trx_recover_for_mysql(XID *xid_list, uint len) |
| 2105 | { |
| 2106 | trx_recover_for_mysql_callback_arg arg= { xid_list, len, 0 }; |
| 2107 | |
| 2108 | ut_ad(xid_list); |
| 2109 | ut_ad(len); |
| 2110 | |
| 2111 | /* Fill xid_list with PREPARED transactions. */ |
| 2112 | trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action> |
| 2113 | (trx_recover_for_mysql_callback), &arg); |
| 2114 | if (arg.count) |
| 2115 | ib::info() << arg.count |
| 2116 | << " transactions in prepared state after recovery" ; |
| 2117 | return int(arg.count); |
| 2118 | } |
| 2119 | |
| 2120 | |
| 2121 | struct trx_get_trx_by_xid_callback_arg |
| 2122 | { |
| 2123 | XID *xid; |
| 2124 | trx_t *trx; |
| 2125 | }; |
| 2126 | |
| 2127 | |
| 2128 | static my_bool trx_get_trx_by_xid_callback(rw_trx_hash_element_t *element, |
| 2129 | trx_get_trx_by_xid_callback_arg *arg) |
| 2130 | { |
| 2131 | my_bool found= 0; |
| 2132 | mutex_enter(&element->mutex); |
| 2133 | if (trx_t *trx= element->trx) |
| 2134 | { |
| 2135 | if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) && |
| 2136 | arg->xid->eq(reinterpret_cast<XID*>(trx->xid))) |
| 2137 | { |
| 2138 | /* Invalidate the XID, so that subsequent calls will not find it. */ |
| 2139 | trx->xid->null(); |
| 2140 | arg->trx= trx; |
| 2141 | found= 1; |
| 2142 | } |
| 2143 | } |
| 2144 | mutex_exit(&element->mutex); |
| 2145 | return found; |
| 2146 | } |
| 2147 | |
| 2148 | |
| 2149 | /** |
| 2150 | Finds PREPARED XA transaction by xid. |
| 2151 | |
| 2152 | trx may have been committed, unless the caller is holding lock_sys.mutex. |
| 2153 | |
| 2154 | @param[in] xid X/Open XA transaction identifier |
| 2155 | |
| 2156 | @return trx or NULL; on match, the trx->xid will be invalidated; |
| 2157 | */ |
| 2158 | |
| 2159 | trx_t *trx_get_trx_by_xid(XID *xid) |
| 2160 | { |
| 2161 | trx_get_trx_by_xid_callback_arg arg= { xid, 0 }; |
| 2162 | |
| 2163 | if (xid) |
| 2164 | trx_sys.rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action> |
| 2165 | (trx_get_trx_by_xid_callback), &arg); |
| 2166 | return arg.trx; |
| 2167 | } |
| 2168 | |
| 2169 | |
| 2170 | /*************************************************************//** |
| 2171 | Starts the transaction if it is not yet started. */ |
| 2172 | void |
| 2173 | trx_start_if_not_started_xa_low( |
| 2174 | /*============================*/ |
| 2175 | trx_t* trx, /*!< in/out: transaction */ |
| 2176 | bool read_write) /*!< in: true if read write transaction */ |
| 2177 | { |
| 2178 | switch (trx->state) { |
| 2179 | case TRX_STATE_NOT_STARTED: |
| 2180 | trx_start_low(trx, read_write); |
| 2181 | return; |
| 2182 | |
| 2183 | case TRX_STATE_ACTIVE: |
| 2184 | if (trx->id == 0 && read_write) { |
| 2185 | /* If the transaction is tagged as read-only then |
| 2186 | it can only write to temp tables and for such |
| 2187 | transactions we don't want to move them to the |
| 2188 | trx_sys_t::rw_trx_hash. */ |
| 2189 | if (!trx->read_only) { |
| 2190 | trx_set_rw_mode(trx); |
| 2191 | } |
| 2192 | } |
| 2193 | return; |
| 2194 | case TRX_STATE_PREPARED: |
| 2195 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 2196 | break; |
| 2197 | } |
| 2198 | |
| 2199 | ut_error; |
| 2200 | } |
| 2201 | |
| 2202 | /*************************************************************//** |
| 2203 | Starts the transaction if it is not yet started. */ |
| 2204 | void |
| 2205 | trx_start_if_not_started_low( |
| 2206 | /*==========================*/ |
| 2207 | trx_t* trx, /*!< in: transaction */ |
| 2208 | bool read_write) /*!< in: true if read write transaction */ |
| 2209 | { |
| 2210 | switch (trx->state) { |
| 2211 | case TRX_STATE_NOT_STARTED: |
| 2212 | trx_start_low(trx, read_write); |
| 2213 | return; |
| 2214 | |
| 2215 | case TRX_STATE_ACTIVE: |
| 2216 | if (read_write && trx->id == 0 && !trx->read_only) { |
| 2217 | trx_set_rw_mode(trx); |
| 2218 | } |
| 2219 | return; |
| 2220 | |
| 2221 | case TRX_STATE_PREPARED: |
| 2222 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 2223 | break; |
| 2224 | } |
| 2225 | |
| 2226 | ut_error; |
| 2227 | } |
| 2228 | |
| 2229 | /*************************************************************//** |
| 2230 | Starts a transaction for internal processing. */ |
| 2231 | void |
| 2232 | trx_start_internal_low( |
| 2233 | /*===================*/ |
| 2234 | trx_t* trx) /*!< in/out: transaction */ |
| 2235 | { |
| 2236 | /* Ensure it is not flagged as an auto-commit-non-locking |
| 2237 | transaction. */ |
| 2238 | |
| 2239 | trx->will_lock = 1; |
| 2240 | |
| 2241 | trx->internal = true; |
| 2242 | |
| 2243 | trx_start_low(trx, true); |
| 2244 | } |
| 2245 | |
| 2246 | /** Starts a read-only transaction for internal processing. |
| 2247 | @param[in,out] trx transaction to be started */ |
| 2248 | void |
| 2249 | trx_start_internal_read_only_low( |
| 2250 | trx_t* trx) |
| 2251 | { |
| 2252 | /* Ensure it is not flagged as an auto-commit-non-locking |
| 2253 | transaction. */ |
| 2254 | |
| 2255 | trx->will_lock = 1; |
| 2256 | |
| 2257 | trx->internal = true; |
| 2258 | |
| 2259 | trx_start_low(trx, false); |
| 2260 | } |
| 2261 | |
| 2262 | /*************************************************************//** |
| 2263 | Starts the transaction for a DDL operation. */ |
| 2264 | void |
| 2265 | trx_start_for_ddl_low( |
| 2266 | /*==================*/ |
| 2267 | trx_t* trx, /*!< in/out: transaction */ |
| 2268 | trx_dict_op_t op) /*!< in: dictionary operation type */ |
| 2269 | { |
| 2270 | switch (trx->state) { |
| 2271 | case TRX_STATE_NOT_STARTED: |
| 2272 | /* Flag this transaction as a dictionary operation, so that |
| 2273 | the data dictionary will be locked in crash recovery. */ |
| 2274 | |
| 2275 | trx_set_dict_operation(trx, op); |
| 2276 | |
| 2277 | /* Ensure it is not flagged as an auto-commit-non-locking |
| 2278 | transation. */ |
| 2279 | trx->will_lock = 1; |
| 2280 | |
| 2281 | trx->ddl= true; |
| 2282 | |
| 2283 | trx_start_internal_low(trx); |
| 2284 | return; |
| 2285 | |
| 2286 | case TRX_STATE_ACTIVE: |
| 2287 | case TRX_STATE_PREPARED: |
| 2288 | case TRX_STATE_COMMITTED_IN_MEMORY: |
| 2289 | break; |
| 2290 | } |
| 2291 | |
| 2292 | ut_error; |
| 2293 | } |
| 2294 | |
| 2295 | /*************************************************************//** |
| 2296 | Set the transaction as a read-write transaction if it is not already |
| 2297 | tagged as such. Read-only transactions that are writing to temporary |
| 2298 | tables are assigned an ID and a rollback segment but are not added |
| 2299 | to the trx read-write list because their updates should not be visible |
| 2300 | to other transactions and therefore their changes can be ignored by |
| 2301 | by MVCC. */ |
| 2302 | void |
| 2303 | trx_set_rw_mode( |
| 2304 | /*============*/ |
| 2305 | trx_t* trx) /*!< in/out: transaction that is RW */ |
| 2306 | { |
| 2307 | ut_ad(trx->rsegs.m_redo.rseg == 0); |
| 2308 | ut_ad(!trx_is_autocommit_non_locking(trx)); |
| 2309 | ut_ad(!trx->read_only); |
| 2310 | ut_ad(trx->id == 0); |
| 2311 | |
| 2312 | if (high_level_read_only) { |
| 2313 | return; |
| 2314 | } |
| 2315 | |
| 2316 | /* Function is promoting existing trx from ro mode to rw mode. |
| 2317 | In this process it has acquired trx_sys.mutex as it plan to |
| 2318 | move trx from ro list to rw list. If in future, some other thread |
| 2319 | looks at this trx object while it is being promoted then ensure |
| 2320 | that both threads are synced by acquring trx->mutex to avoid decision |
| 2321 | based on in-consistent view formed during promotion. */ |
| 2322 | |
| 2323 | trx->rsegs.m_redo.rseg = trx_assign_rseg_low(); |
| 2324 | ut_ad(trx->rsegs.m_redo.rseg != 0); |
| 2325 | |
| 2326 | trx_sys.register_rw(trx); |
| 2327 | |
| 2328 | /* So that we can see our own changes. */ |
| 2329 | if (trx->read_view.is_open()) { |
| 2330 | trx->read_view.set_creator_trx_id(trx->id); |
| 2331 | } |
| 2332 | } |
| 2333 | |