| 1 | /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 15 | |
| 16 | |
| 17 | #include <my_global.h> |
| 18 | #include <my_sys.h> |
| 19 | #include <m_string.h> |
| 20 | #include "trnman.h" |
| 21 | #include "ma_checkpoint.h" |
| 22 | #include "ma_control_file.h" |
| 23 | |
| 24 | /* |
| 25 | status variables: |
| 26 | how many trns in the active list currently, |
| 27 | in the committed list currently, allocated since startup. |
| 28 | */ |
| 29 | uint trnman_active_transactions, trnman_committed_transactions, |
| 30 | trnman_allocated_transactions; |
| 31 | |
| 32 | #ifdef WORKAROUND_GCC_4_3_2_BUG |
| 33 | volatile |
| 34 | #endif |
| 35 | /* list of active transactions in the trid order */ |
| 36 | static TRN active_list_min, active_list_max; |
| 37 | /* list of committed transactions in the trid order */ |
| 38 | static TRN committed_list_min, committed_list_max; |
| 39 | |
| 40 | /* a counter, used to generate transaction ids */ |
| 41 | static TrID global_trid_generator; |
| 42 | |
| 43 | /* |
| 44 | The minimum existing transaction id for trnman_get_min_trid() |
| 45 | The default value is used when transaction manager not initialize; |
| 46 | Probably called from maria_chk |
| 47 | */ |
| 48 | static TrID trid_min_read_from= MAX_TRID; |
| 49 | |
| 50 | /* the mutex for everything above */ |
| 51 | static mysql_mutex_t LOCK_trn_list; |
| 52 | |
| 53 | /* LIFO pool of unused TRN structured for reuse */ |
| 54 | static TRN *pool; |
| 55 | |
| 56 | /* a hash for committed transactions that maps trid to a TRN structure */ |
| 57 | static LF_HASH trid_to_trn; |
| 58 | |
| 59 | /* an array that maps short_id of an active transaction to a TRN structure */ |
| 60 | static TRN **short_trid_to_active_trn; |
| 61 | |
| 62 | /* locks for short_trid_to_active_trn and pool */ |
| 63 | static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool); |
| 64 | static void trnman_free_trn(TRN *); |
| 65 | |
| 66 | my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)= |
| 67 | default_trnman_end_trans_hook; |
| 68 | |
| 69 | /* |
| 70 | Simple interface functions |
| 71 | QQ: if they stay so simple, should we make them inline? |
| 72 | */ |
| 73 | |
| 74 | uint trnman_increment_locked_tables(TRN *trn) |
| 75 | { |
| 76 | return trn->locked_tables++; |
| 77 | } |
| 78 | |
| 79 | uint trnman_has_locked_tables(TRN *trn) |
| 80 | { |
| 81 | return trn->locked_tables; |
| 82 | } |
| 83 | |
| 84 | uint trnman_decrement_locked_tables(TRN *trn) |
| 85 | { |
| 86 | return --trn->locked_tables; |
| 87 | } |
| 88 | |
| 89 | void trnman_reset_locked_tables(TRN *trn, uint locked_tables) |
| 90 | { |
| 91 | trn->locked_tables= locked_tables; |
| 92 | } |
| 93 | |
| 94 | #ifdef EXTRA_DEBUG |
| 95 | uint16 trnman_get_flags(TRN *trn) |
| 96 | { |
| 97 | return trn->flags; |
| 98 | } |
| 99 | |
| 100 | void trnman_set_flags(TRN *trn, uint16 flags) |
| 101 | { |
| 102 | trn->flags= flags; |
| 103 | } |
| 104 | #endif |
| 105 | |
| 106 | /** Wake up threads waiting for this transaction */ |
| 107 | static void wt_thd_release_self(TRN *trn) |
| 108 | { |
| 109 | if (trn->wt) |
| 110 | { |
| 111 | WT_RESOURCE_ID rc; |
| 112 | rc.type= &ma_rc_dup_unique; |
| 113 | rc.value= (intptr)trn; |
| 114 | wt_thd_release(trn->wt, & rc); |
| 115 | trn->wt= 0; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | static my_bool |
| 120 | default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)), |
| 121 | my_bool commit __attribute__ ((unused)), |
| 122 | my_bool active_transactions |
| 123 | __attribute__ ((unused))) |
| 124 | { |
| 125 | return 0; |
| 126 | } |
| 127 | |
| 128 | |
| 129 | static uchar *trn_get_hash_key(const uchar *trn, size_t *len, |
| 130 | my_bool unused __attribute__ ((unused))) |
| 131 | { |
| 132 | *len= sizeof(TrID); |
| 133 | return (uchar *) & ((*((TRN **)trn))->trid); |
| 134 | } |
| 135 | |
| 136 | |
| 137 | /** |
| 138 | @brief Initializes transaction manager. |
| 139 | |
| 140 | @param initial_trid Generated TrIDs will start from initial_trid+1. |
| 141 | |
| 142 | @return Operation status |
| 143 | @retval 0 OK |
| 144 | @retval !=0 Error |
| 145 | */ |
| 146 | |
| 147 | int trnman_init(TrID initial_trid) |
| 148 | { |
| 149 | DBUG_ENTER("trnman_init" ); |
| 150 | DBUG_PRINT("enter" , ("initial_trid: %lu" , (ulong) initial_trid)); |
| 151 | |
| 152 | short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), |
| 153 | MYF(MY_WME|MY_ZEROFILL)); |
| 154 | if (unlikely(!short_trid_to_active_trn)) |
| 155 | DBUG_RETURN(1); |
| 156 | short_trid_to_active_trn--; /* min short_id is 1 */ |
| 157 | |
| 158 | /* |
| 159 | Initialize lists. |
| 160 | active_list_max.min_read_from must be larger than any trid, |
| 161 | so that when an active list is empty we would could free |
| 162 | all committed list. |
| 163 | And committed_list_max itself can not be freed so |
| 164 | committed_list_max.commit_trid must not be smaller that |
| 165 | active_list_max.min_read_from |
| 166 | */ |
| 167 | |
| 168 | active_list_max.trid= active_list_min.trid= 0; |
| 169 | active_list_max.min_read_from= MAX_TRID; |
| 170 | active_list_max.next= active_list_min.prev= 0; |
| 171 | active_list_max.prev= &active_list_min; |
| 172 | active_list_min.next= &active_list_max; |
| 173 | |
| 174 | committed_list_max.commit_trid= MAX_TRID; |
| 175 | committed_list_max.next= committed_list_min.prev= 0; |
| 176 | committed_list_max.prev= &committed_list_min; |
| 177 | committed_list_min.next= &committed_list_max; |
| 178 | |
| 179 | trnman_active_transactions= 0; |
| 180 | trnman_committed_transactions= 0; |
| 181 | trnman_allocated_transactions= 0; |
| 182 | /* This is needed for recovery and repair */ |
| 183 | dummy_transaction_object.min_read_from= ~(TrID) 0; |
| 184 | dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; |
| 185 | |
| 186 | pool= 0; |
| 187 | global_trid_generator= initial_trid; |
| 188 | trid_min_read_from= initial_trid; |
| 189 | lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE, |
| 190 | 0, 0, trn_get_hash_key, 0); |
| 191 | DBUG_PRINT("info" , ("mysql_mutex_init LOCK_trn_list" )); |
| 192 | mysql_mutex_init(key_LOCK_trn_list, &LOCK_trn_list, MY_MUTEX_INIT_FAST); |
| 193 | |
| 194 | DBUG_RETURN(0); |
| 195 | } |
| 196 | |
| 197 | /* |
| 198 | NOTE |
| 199 | this could only be called in the "idle" state - no transaction can be |
| 200 | running. See asserts below. |
| 201 | */ |
| 202 | void trnman_destroy() |
| 203 | { |
| 204 | DBUG_ENTER("trnman_destroy" ); |
| 205 | |
| 206 | if (short_trid_to_active_trn == NULL) /* trnman already destroyed */ |
| 207 | DBUG_VOID_RETURN; |
| 208 | DBUG_ASSERT(trid_to_trn.count == 0); |
| 209 | DBUG_ASSERT(trnman_active_transactions == 0); |
| 210 | DBUG_ASSERT(trnman_committed_transactions == 0); |
| 211 | DBUG_ASSERT(active_list_max.prev == &active_list_min); |
| 212 | DBUG_ASSERT(active_list_min.next == &active_list_max); |
| 213 | DBUG_ASSERT(committed_list_max.prev == &committed_list_min); |
| 214 | DBUG_ASSERT(committed_list_min.next == &committed_list_max); |
| 215 | while (pool) |
| 216 | { |
| 217 | TRN *trn= pool; |
| 218 | pool= pool->next; |
| 219 | DBUG_ASSERT(trn->wt == NULL); |
| 220 | mysql_mutex_destroy(&trn->state_lock); |
| 221 | my_free(trn); |
| 222 | } |
| 223 | lf_hash_destroy(&trid_to_trn); |
| 224 | DBUG_PRINT("info" , ("mysql_mutex_destroy LOCK_trn_list" )); |
| 225 | mysql_mutex_destroy(&LOCK_trn_list); |
| 226 | my_free(short_trid_to_active_trn+1); |
| 227 | short_trid_to_active_trn= NULL; |
| 228 | |
| 229 | DBUG_VOID_RETURN; |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | NOTE |
| 234 | TrID is limited to 6 bytes. Initial value of the generator |
| 235 | is set by the recovery code - being read from the last checkpoint |
| 236 | (or 1 on a first run). |
| 237 | */ |
| 238 | static TrID new_trid() |
| 239 | { |
| 240 | DBUG_ENTER("new_trid" ); |
| 241 | DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); |
| 242 | DBUG_PRINT("info" , ("mysql_mutex_assert_owner LOCK_trn_list" )); |
| 243 | mysql_mutex_assert_owner(&LOCK_trn_list); |
| 244 | DBUG_RETURN(++global_trid_generator); |
| 245 | } |
| 246 | |
| 247 | static uint get_short_trid(TRN *trn) |
| 248 | { |
| 249 | int i= (int) ((global_trid_generator + (intptr)trn) * 312089 % |
| 250 | SHORT_TRID_MAX) + 1; |
| 251 | uint res=0; |
| 252 | |
| 253 | for ( ; !res ; i= 1) |
| 254 | { |
| 255 | for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */ |
| 256 | { |
| 257 | void *tmp= NULL; |
| 258 | if (short_trid_to_active_trn[i] == NULL && |
| 259 | my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) |
| 260 | { |
| 261 | res= i; |
| 262 | break; |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | return res; |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | Allocates and initialzies a new TRN object |
| 271 | |
| 272 | @note the 'wt' parameter can only be 0 in a single-threaded code (or, |
| 273 | generally, where threads cannot block each other), otherwise the |
| 274 | first call to the deadlock detector will sigsegv. |
| 275 | */ |
| 276 | |
| 277 | TRN *trnman_new_trn(WT_THD *wt) |
| 278 | { |
| 279 | int res; |
| 280 | TRN *trn; |
| 281 | union { TRN *trn; void *v; } tmp; |
| 282 | DBUG_ENTER("trnman_new_trn" ); |
| 283 | |
| 284 | /* |
| 285 | we have a mutex, to do simple things under it - allocate a TRN, |
| 286 | increment trnman_active_transactions, set trn->min_read_from. |
| 287 | |
| 288 | Note that all the above is fast. generating short_id may be slow, |
| 289 | as it involves scanning a large array - so it's done outside of the |
| 290 | mutex. |
| 291 | */ |
| 292 | |
| 293 | DBUG_PRINT("info" , ("mysql_mutex_lock LOCK_trn_list" )); |
| 294 | mysql_mutex_lock(&LOCK_trn_list); |
| 295 | |
| 296 | /* Allocating a new TRN structure */ |
| 297 | tmp.trn= pool; |
| 298 | /* |
| 299 | Popping an unused TRN from the pool |
| 300 | (ABA isn't possible, we're behind a mutex |
| 301 | */ |
| 302 | while (tmp.trn && !my_atomic_casptr((void **)(char*) &pool, &tmp.v, |
| 303 | (void *)tmp.trn->next)) |
| 304 | /* no-op */; |
| 305 | |
| 306 | /* Nothing in the pool ? Allocate a new one */ |
| 307 | if (!(trn= tmp.trn)) |
| 308 | { |
| 309 | /* |
| 310 | trn should be completely initalized at create time to allow |
| 311 | one to keep a known state on it. |
| 312 | (Like redo_lns, which is assumed to be 0 at start of row handling |
| 313 | and reset to zero before end of row handling) |
| 314 | */ |
| 315 | trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME | MY_ZEROFILL)); |
| 316 | if (unlikely(!trn)) |
| 317 | { |
| 318 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
| 319 | mysql_mutex_unlock(&LOCK_trn_list); |
| 320 | return 0; |
| 321 | } |
| 322 | trnman_allocated_transactions++; |
| 323 | mysql_mutex_init(key_TRN_state_lock, &trn->state_lock, MY_MUTEX_INIT_FAST); |
| 324 | } |
| 325 | trn->wt= wt; |
| 326 | trn->pins= lf_hash_get_pins(&trid_to_trn); |
| 327 | if (!trn->pins) |
| 328 | { |
| 329 | trnman_free_trn(trn); |
| 330 | mysql_mutex_unlock(&LOCK_trn_list); |
| 331 | return 0; |
| 332 | } |
| 333 | |
| 334 | trnman_active_transactions++; |
| 335 | |
| 336 | trn->min_read_from= active_list_min.next->trid; |
| 337 | |
| 338 | trn->trid= new_trid(); |
| 339 | |
| 340 | trn->next= &active_list_max; |
| 341 | trn->prev= active_list_max.prev; |
| 342 | active_list_max.prev= trn->prev->next= trn; |
| 343 | trid_min_read_from= active_list_min.next->min_read_from; |
| 344 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
| 345 | mysql_mutex_unlock(&LOCK_trn_list); |
| 346 | |
| 347 | if (unlikely(!trn->min_read_from)) |
| 348 | { |
| 349 | /* |
| 350 | We are the only transaction. Set min_read_from so that we can read |
| 351 | our own rows |
| 352 | */ |
| 353 | trn->min_read_from= trn->trid + 1; |
| 354 | } |
| 355 | |
| 356 | /* no other transaction can read changes done by this one */ |
| 357 | trn->commit_trid= MAX_TRID; |
| 358 | trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0; |
| 359 | trn->used_tables= 0; |
| 360 | |
| 361 | trn->locked_tables= 0; |
| 362 | trn->flags= 0; |
| 363 | |
| 364 | /* |
| 365 | only after the following function TRN is considered initialized, |
| 366 | so it must be done the last |
| 367 | */ |
| 368 | mysql_mutex_lock(&trn->state_lock); |
| 369 | trn->short_id= get_short_trid(trn); |
| 370 | mysql_mutex_unlock(&trn->state_lock); |
| 371 | |
| 372 | res= lf_hash_insert(&trid_to_trn, trn->pins, &trn); |
| 373 | DBUG_ASSERT(res <= 0); |
| 374 | if (res) |
| 375 | { |
| 376 | trnman_end_trn(trn, 0); |
| 377 | return 0; |
| 378 | } |
| 379 | |
| 380 | DBUG_PRINT("exit" , ("trn: %p trid: 0x%lu min_read_from: 0x%lu" , |
| 381 | trn, (ulong) trn->trid, (ulong) trn->min_read_from)); |
| 382 | |
| 383 | DBUG_RETURN(trn); |
| 384 | } |
| 385 | |
| 386 | /* |
| 387 | remove a trn from the active list. |
| 388 | if necessary - move to committed list and set commit_trid |
| 389 | |
| 390 | NOTE |
| 391 | Locks are released at the end. In particular, after placing the |
| 392 | transaction in commit list, and after setting commit_trid. It's |
| 393 | important, as commit_trid affects visibility. Locks don't affect |
| 394 | anything they simply delay execution of other threads - they could be |
| 395 | released arbitrarily late. In other words, when locks are released it |
| 396 | serves as a start banner for other threads, they start to run. So |
| 397 | everything they may need must be ready at that point. |
| 398 | |
| 399 | RETURN |
| 400 | 0 ok |
| 401 | 1 error |
| 402 | */ |
| 403 | my_bool trnman_end_trn(TRN *trn, my_bool commit) |
| 404 | { |
| 405 | int res= 1; |
| 406 | uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */ |
| 407 | TRN *free_me= 0; |
| 408 | LF_PINS *pins= trn->pins; |
| 409 | DBUG_ENTER("trnman_end_trn" ); |
| 410 | DBUG_PRINT("enter" , ("trn: %p commit: %d" , trn, commit)); |
| 411 | |
| 412 | /* if a rollback, all UNDO records should have been executed */ |
| 413 | DBUG_ASSERT(commit || trn->undo_lsn == 0); |
| 414 | DBUG_ASSERT(trn != &dummy_transaction_object); |
| 415 | DBUG_PRINT("info" , ("mysql_mutex_lock LOCK_trn_list" )); |
| 416 | |
| 417 | mysql_mutex_lock(&LOCK_trn_list); |
| 418 | |
| 419 | /* remove from active list */ |
| 420 | trn->next->prev= trn->prev; |
| 421 | trn->prev->next= trn->next; |
| 422 | |
| 423 | /* |
| 424 | if trn was the oldest active transaction, now that it goes away there |
| 425 | may be committed transactions in the list which no active transaction |
| 426 | needs to bother about - clean up the committed list |
| 427 | */ |
| 428 | if (trn->prev == &active_list_min) |
| 429 | { |
| 430 | uint free_me_count; |
| 431 | TRN *t; |
| 432 | for (t= committed_list_min.next, free_me_count= 0; |
| 433 | t->commit_trid < active_list_min.next->min_read_from; |
| 434 | t= t->next, free_me_count++) /* no-op */; |
| 435 | |
| 436 | DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || |
| 437 | (t == committed_list_min.next && free_me_count == 0)); |
| 438 | /* found transactions committed before the oldest active one */ |
| 439 | if (t != committed_list_min.next) |
| 440 | { |
| 441 | free_me= committed_list_min.next; |
| 442 | committed_list_min.next= t; |
| 443 | t->prev->next= 0; |
| 444 | t->prev= &committed_list_min; |
| 445 | trnman_committed_transactions-= free_me_count; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | mysql_mutex_lock(&trn->state_lock); |
| 450 | if (commit) |
| 451 | trn->commit_trid= global_trid_generator; |
| 452 | wt_thd_release_self(trn); |
| 453 | mysql_mutex_unlock(&trn->state_lock); |
| 454 | |
| 455 | /* |
| 456 | if transaction is committed and it was not the only active transaction - |
| 457 | add it to the committed list |
| 458 | */ |
| 459 | if (commit && active_list_min.next != &active_list_max) |
| 460 | { |
| 461 | trn->next= &committed_list_max; |
| 462 | trn->prev= committed_list_max.prev; |
| 463 | trnman_committed_transactions++; |
| 464 | committed_list_max.prev= trn->prev->next= trn; |
| 465 | } |
| 466 | else |
| 467 | { |
| 468 | trn->next= free_me; |
| 469 | free_me= trn; |
| 470 | } |
| 471 | trid_min_read_from= active_list_min.next->min_read_from; |
| 472 | |
| 473 | if ((*trnman_end_trans_hook)(trn, commit, |
| 474 | active_list_min.next != &active_list_max)) |
| 475 | res= -1; |
| 476 | trnman_active_transactions--; |
| 477 | |
| 478 | DBUG_PRINT("info" , ("mysql_mutex_unlock LOCK_trn_list" )); |
| 479 | mysql_mutex_unlock(&LOCK_trn_list); |
| 480 | |
| 481 | /* |
| 482 | the rest is done outside of a critical section |
| 483 | |
| 484 | note that we don't own trn anymore, it may be in a shared list now. |
| 485 | Thus, we cannot dereference it, and must use cached_short_id below. |
| 486 | */ |
| 487 | my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0); |
| 488 | |
| 489 | /* |
| 490 | we, under the mutex, removed going-in-free_me transactions from the |
| 491 | active and committed lists, thus nobody else may see them when it scans |
| 492 | those lists, and thus nobody may want to free them. Now we don't |
| 493 | need a mutex to access free_me list |
| 494 | */ |
| 495 | /* QQ: send them to the purge thread */ |
| 496 | while (free_me) |
| 497 | { |
| 498 | TRN *t= free_me; |
| 499 | free_me= free_me->next; |
| 500 | |
| 501 | /* ignore OOM. it's harmless, and we can do nothing here anyway */ |
| 502 | (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID)); |
| 503 | |
| 504 | trnman_free_trn(t); |
| 505 | } |
| 506 | |
| 507 | lf_hash_put_pins(pins); |
| 508 | |
| 509 | DBUG_RETURN(res < 0); |
| 510 | } |
| 511 | |
| 512 | /* |
| 513 | free a trn (add to the pool, that is) |
| 514 | note - we can never really free() a TRN if there's at least one other |
| 515 | running transaction - see, e.g., how lock waits are implemented in |
| 516 | lockman.c |
| 517 | The same is true for other lock-free data structures too. We may need some |
| 518 | kind of FLUSH command to reset them all - ensuring that no transactions are |
| 519 | running. It may even be called automatically on checkpoints if no |
| 520 | transactions are running. |
| 521 | */ |
| 522 | static void trnman_free_trn(TRN *trn) |
| 523 | { |
| 524 | /* |
| 525 | union is to solve strict aliasing issue. |
| 526 | without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp |
| 527 | modifies the value of tmp. |
| 528 | */ |
| 529 | union { TRN *trn; void *v; } tmp; |
| 530 | |
| 531 | mysql_mutex_lock(&trn->state_lock); |
| 532 | trn->short_id= 0; |
| 533 | mysql_mutex_unlock(&trn->state_lock); |
| 534 | |
| 535 | tmp.trn= pool; |
| 536 | |
| 537 | do |
| 538 | { |
| 539 | /* |
| 540 | without this volatile cast gcc-3.4.4 moves the assignment |
| 541 | down after the loop at -O2 |
| 542 | */ |
| 543 | *(TRN * volatile *)&(trn->next)= tmp.trn; |
| 544 | } while (!my_atomic_casptr((void **)(char*)&pool, &tmp.v, trn)); |
| 545 | } |
| 546 | |
| 547 | /* |
| 548 | NOTE |
| 549 | here we access the hash in a lock-free manner. |
| 550 | It's safe, a 'found' TRN can never be freed/reused before we access it. |
| 551 | In fact, it cannot be freed before 'trn' ends, because a 'found' TRN |
| 552 | can only be removed from the hash when: |
| 553 | found->commit_trid < ALL (trn->min_read_from) |
| 554 | that is, at least |
| 555 | found->commit_trid < trn->min_read_from |
| 556 | but |
| 557 | found->trid >= trn->min_read_from |
| 558 | and |
| 559 | found->commit_trid > found->trid |
| 560 | |
| 561 | RETURN |
| 562 | 1 can |
| 563 | 0 cannot |
| 564 | -1 error (OOM) |
| 565 | */ |
| 566 | int trnman_can_read_from(TRN *trn, TrID trid) |
| 567 | { |
| 568 | TRN **found; |
| 569 | my_bool can; |
| 570 | |
| 571 | if (trid < trn->min_read_from) |
| 572 | return 1; /* Row is visible by all transactions in the system */ |
| 573 | |
| 574 | if (trid >= trn->trid) |
| 575 | { |
| 576 | /* |
| 577 | We have now two cases |
| 578 | trid > trn->trid, in which case the row is from a new transaction |
| 579 | and not visible, in which case we should return 0. |
| 580 | trid == trn->trid in which case the row is from the current transaction |
| 581 | and we should return 1 |
| 582 | */ |
| 583 | return trid == trn->trid; |
| 584 | } |
| 585 | |
| 586 | found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); |
| 587 | if (found == NULL) |
| 588 | return 0; /* not in the hash of transactions = cannot read */ |
| 589 | if (found == MY_ERRPTR) |
| 590 | return -1; |
| 591 | |
| 592 | can= (*found)->commit_trid < trn->trid; |
| 593 | lf_hash_search_unpin(trn->pins); |
| 594 | return can; |
| 595 | } |
| 596 | |
| 597 | /** |
| 598 | Finds a TRN by its TrID |
| 599 | |
| 600 | @param trn current trn. Needed for pinning pointers (see lf_pin) |
| 601 | @param trid trid to search for |
| 602 | |
| 603 | @return found trn or 0 |
| 604 | |
| 605 | @note that trn is returned with its state locked! |
| 606 | */ |
| 607 | TRN *trnman_trid_to_trn(TRN *trn, TrID trid) |
| 608 | { |
| 609 | TRN **found; |
| 610 | |
| 611 | if (trid < trn->min_read_from) |
| 612 | return 0; /* it's committed eons ago */ |
| 613 | |
| 614 | found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); |
| 615 | if (found == NULL || found == MY_ERRPTR) |
| 616 | return 0; /* no luck */ |
| 617 | |
| 618 | /* we've found something */ |
| 619 | mysql_mutex_lock(&(*found)->state_lock); |
| 620 | |
| 621 | if ((*found)->short_id == 0) |
| 622 | { |
| 623 | mysql_mutex_unlock(&(*found)->state_lock); |
| 624 | lf_hash_search_unpin(trn->pins); |
| 625 | return 0; /* but it was a ghost */ |
| 626 | } |
| 627 | lf_hash_search_unpin(trn->pins); |
| 628 | |
| 629 | /* Gotcha! */ |
| 630 | return *found; |
| 631 | } |
| 632 | |
| 633 | /* TODO: the stubs below are waiting for savepoints to be implemented */ |
| 634 | |
| 635 | void trnman_new_statement(TRN *trn __attribute__ ((unused))) |
| 636 | { |
| 637 | } |
| 638 | |
| 639 | void trnman_rollback_statement(TRN *trn __attribute__ ((unused))) |
| 640 | { |
| 641 | } |
| 642 | |
| 643 | |
| 644 | /** |
| 645 | @brief Allocates buffers and stores in them some info about transactions |
| 646 | |
| 647 | Does the allocation because the caller cannot know the size itself. |
| 648 | Memory freeing is to be done by the caller (if the "str" member of the |
| 649 | LEX_STRING is not NULL). |
| 650 | The caller has the intention of doing checkpoints. |
| 651 | |
| 652 | @param[out] str_act pointer to where the allocated buffer, |
| 653 | and its size, will be put; buffer will be filled |
| 654 | with info about active transactions |
| 655 | @param[out] str_com pointer to where the allocated buffer, |
| 656 | and its size, will be put; buffer will be filled |
| 657 | with info about committed transactions |
| 658 | @param[out] min_first_undo_lsn pointer to where the minimum |
| 659 | first_undo_lsn of all transactions will be put |
| 660 | |
| 661 | @return Operation status |
| 662 | @retval 0 OK |
| 663 | @retval 1 Error |
| 664 | */ |
| 665 | |
| 666 | my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, |
| 667 | LSN *min_rec_lsn, LSN *min_first_undo_lsn) |
| 668 | { |
| 669 | my_bool error; |
| 670 | TRN *trn; |
| 671 | char *ptr; |
| 672 | uint stored_transactions= 0; |
| 673 | LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX; |
| 674 | DBUG_ENTER("trnman_collect_transactions" ); |
| 675 | |
| 676 | DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); |
| 677 | |
| 678 | /* validate the use of read_non_atomic() in general: */ |
| 679 | compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8)); |
| 680 | mysql_mutex_lock(&LOCK_trn_list); |
| 681 | str_act->length= 2 + /* number of active transactions */ |
| 682 | LSN_STORE_SIZE + /* minimum of their rec_lsn */ |
| 683 | TRANSID_SIZE + /* current TrID generator value */ |
| 684 | (2 + /* short id */ |
| 685 | 6 + /* long id */ |
| 686 | LSN_STORE_SIZE + /* undo_lsn */ |
| 687 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
| 688 | LSN_STORE_SIZE + /* undo_purge_lsn */ |
| 689 | #endif |
| 690 | LSN_STORE_SIZE /* first_undo_lsn */ |
| 691 | ) * trnman_active_transactions; |
| 692 | str_com->length= 4 + /* number of committed transactions */ |
| 693 | (6 + /* long id */ |
| 694 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
| 695 | LSN_STORE_SIZE + /* undo_purge_lsn */ |
| 696 | #endif |
| 697 | LSN_STORE_SIZE /* first_undo_lsn */ |
| 698 | ) * trnman_committed_transactions; |
| 699 | if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) || |
| 700 | (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME))))) |
| 701 | goto err; |
| 702 | /* First, the active transactions */ |
| 703 | ptr= str_act->str + 2 + LSN_STORE_SIZE; |
| 704 | transid_store(ptr, global_trid_generator); |
| 705 | ptr+= TRANSID_SIZE; |
| 706 | for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) |
| 707 | { |
| 708 | uint sid; |
| 709 | LSN rec_lsn, undo_lsn, first_undo_lsn; |
| 710 | mysql_mutex_lock(&trn->state_lock); |
| 711 | sid= trn->short_id; |
| 712 | mysql_mutex_unlock(&trn->state_lock); |
| 713 | if (sid == 0) |
| 714 | { |
| 715 | /* |
| 716 | Not even inited, has done nothing. Or it is the |
| 717 | dummy_transaction_object, which does only non-transactional |
| 718 | immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so |
| 719 | can be forgotten for Checkpoint. |
| 720 | */ |
| 721 | continue; |
| 722 | } |
| 723 | /* needed for low-water mark calculation */ |
| 724 | if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) && |
| 725 | (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0)) |
| 726 | minimum_rec_lsn= rec_lsn; |
| 727 | /* |
| 728 | If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be |
| 729 | discovered when seeing that log record which is for sure located after |
| 730 | checkpoint_start_log_horizon. |
| 731 | */ |
| 732 | if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) & |
| 733 | TRANSACTION_LOGGED_LONG_ID) == 0) |
| 734 | continue; |
| 735 | /* |
| 736 | On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log |
| 737 | records; so we must include trn in the checkpoint now, because we cannot |
| 738 | count on LOGREC_LONG_TRANSACTION_ID (as we are already past it). |
| 739 | */ |
| 740 | undo_lsn= trn->undo_lsn; |
| 741 | stored_transactions++; |
| 742 | int2store(ptr, sid); |
| 743 | ptr+= 2; |
| 744 | int6store(ptr, trn->trid); |
| 745 | ptr+= 6; |
| 746 | lsn_store(ptr, undo_lsn); /* needed for rollback */ |
| 747 | ptr+= LSN_STORE_SIZE; |
| 748 | /* needed for low-water mark calculation */ |
| 749 | if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) && |
| 750 | (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)) |
| 751 | minimum_first_undo_lsn= first_undo_lsn; |
| 752 | lsn_store(ptr, first_undo_lsn); |
| 753 | ptr+= LSN_STORE_SIZE; |
| 754 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
| 755 | /* to know where purging should start (last delete of this trn) */ |
| 756 | lsn_store(ptr, trn->undo_purge_lsn); |
| 757 | ptr+= LSN_STORE_SIZE; |
| 758 | #endif |
| 759 | /** |
| 760 | @todo RECOVERY: add a comment explaining why we can dirtily read some |
| 761 | vars, inspired by the text of "assumption 8" in WL#3072 |
| 762 | */ |
| 763 | } |
| 764 | str_act->length= ptr - str_act->str; /* as we maybe over-estimated */ |
| 765 | ptr= str_act->str; |
| 766 | DBUG_PRINT("info" ,("collected %u active transactions" , |
| 767 | (uint)stored_transactions)); |
| 768 | int2store(ptr, stored_transactions); |
| 769 | ptr+= 2; |
| 770 | /* this LSN influences how REDOs for any page can be ignored by Recovery */ |
| 771 | lsn_store(ptr, minimum_rec_lsn); |
| 772 | /* one day there will also be a list of prepared transactions */ |
| 773 | /* do the same for committed ones */ |
| 774 | ptr= str_com->str; |
| 775 | int4store(ptr, trnman_committed_transactions); |
| 776 | ptr+= 4; |
| 777 | DBUG_PRINT("info" ,("collected %u committed transactions" , |
| 778 | (uint)trnman_committed_transactions)); |
| 779 | for (trn= committed_list_min.next; trn != &committed_list_max; |
| 780 | trn= trn->next) |
| 781 | { |
| 782 | LSN first_undo_lsn; |
| 783 | int6store(ptr, trn->trid); |
| 784 | ptr+= 6; |
| 785 | #ifdef MARIA_VERSIONING /* not enabled yet */ |
| 786 | lsn_store(ptr, trn->undo_purge_lsn); |
| 787 | ptr+= LSN_STORE_SIZE; |
| 788 | #endif |
| 789 | first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn); |
| 790 | if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0) |
| 791 | minimum_first_undo_lsn= first_undo_lsn; |
| 792 | lsn_store(ptr, first_undo_lsn); |
| 793 | ptr+= LSN_STORE_SIZE; |
| 794 | } |
| 795 | /* |
| 796 | TODO: if we see there exists no transaction (active and committed) we can |
| 797 | tell the lock-free structures to do some freeing (my_free()). |
| 798 | */ |
| 799 | error= 0; |
| 800 | *min_rec_lsn= minimum_rec_lsn; |
| 801 | *min_first_undo_lsn= minimum_first_undo_lsn; |
| 802 | goto end; |
| 803 | err: |
| 804 | error= 1; |
| 805 | end: |
| 806 | mysql_mutex_unlock(&LOCK_trn_list); |
| 807 | DBUG_RETURN(error); |
| 808 | } |
| 809 | |
| 810 | |
| 811 | TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid) |
| 812 | { |
| 813 | TrID old_trid_generator= global_trid_generator; |
| 814 | TRN *trn; |
| 815 | DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded); |
| 816 | global_trid_generator= longid-1; /* force a correct trid in the new trn */ |
| 817 | if (unlikely((trn= trnman_new_trn(NULL)) == NULL)) |
| 818 | return NULL; |
| 819 | /* deallocate excessive allocations of trnman_new_trn() */ |
| 820 | global_trid_generator= old_trid_generator; |
| 821 | set_if_bigger(global_trid_generator, longid); |
| 822 | short_trid_to_active_trn[trn->short_id]= 0; |
| 823 | DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL); |
| 824 | short_trid_to_active_trn[shortid]= trn; |
| 825 | trn->short_id= shortid; |
| 826 | return trn; |
| 827 | } |
| 828 | |
| 829 | |
| 830 | TRN *trnman_get_any_trn() |
| 831 | { |
| 832 | TRN *trn= active_list_min.next; |
| 833 | return (trn != &active_list_max) ? trn : NULL; |
| 834 | } |
| 835 | |
| 836 | |
| 837 | /** |
| 838 | Returns the minimum existing transaction id. May return a too small |
| 839 | number in race conditions, but this is ok as the value is used to |
| 840 | remove not visible transid from index/rows. |
| 841 | */ |
| 842 | |
| 843 | TrID trnman_get_min_trid() |
| 844 | { |
| 845 | return trid_min_read_from; |
| 846 | } |
| 847 | |
| 848 | |
| 849 | /** |
| 850 | Returns the minimum possible transaction id |
| 851 | |
| 852 | @notes |
| 853 | If there is no transactions running, returns number for next running |
| 854 | transaction. |
| 855 | If one has an active transaction, the returned number will be less or |
| 856 | equal to this. If one is not running in a transaction one will ge the |
| 857 | number for the next started transaction. This is used in create table |
| 858 | to get a safe minimum trid to use. |
| 859 | */ |
| 860 | |
| 861 | TrID trnman_get_min_safe_trid() |
| 862 | { |
| 863 | TrID trid; |
| 864 | mysql_mutex_lock(&LOCK_trn_list); |
| 865 | trid= MY_MIN(active_list_min.next->min_read_from, |
| 866 | global_trid_generator); |
| 867 | mysql_mutex_unlock(&LOCK_trn_list); |
| 868 | return trid; |
| 869 | } |
| 870 | |
| 871 | |
| 872 | /** |
| 873 | Returns maximum transaction id given to a transaction so far. |
| 874 | */ |
| 875 | |
| 876 | TrID trnman_get_max_trid() |
| 877 | { |
| 878 | TrID id; |
| 879 | if (short_trid_to_active_trn == NULL) |
| 880 | return 0; |
| 881 | mysql_mutex_lock(&LOCK_trn_list); |
| 882 | id= global_trid_generator; |
| 883 | mysql_mutex_unlock(&LOCK_trn_list); |
| 884 | return id; |
| 885 | } |
| 886 | |
| 887 | /** |
| 888 | @brief Check if there exist an active transaction between two commit_id's |
| 889 | |
| 890 | @todo |
| 891 | Improve speed of this. |
| 892 | - Store transactions in tree or skip list |
| 893 | - Have function to copying all active transaction id's to b-tree |
| 894 | and use b-tree for checking states. This could be a big win |
| 895 | for checkpoint that will call this function for a lot of objects. |
| 896 | |
| 897 | @return |
| 898 | 0 No transaction exists |
| 899 | 1 There is at least on active transaction in the given range |
| 900 | */ |
| 901 | |
| 902 | my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id, |
| 903 | my_bool trnman_is_locked) |
| 904 | { |
| 905 | TRN *trn; |
| 906 | my_bool ret= 0; |
| 907 | |
| 908 | if (!trnman_is_locked) |
| 909 | mysql_mutex_lock(&LOCK_trn_list); |
| 910 | mysql_mutex_assert_owner(&LOCK_trn_list); |
| 911 | for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) |
| 912 | { |
| 913 | /* |
| 914 | We use <= for max_id as max_id is a commit_trid and trn->trid |
| 915 | is transaction id. When calculating commit_trid we use the |
| 916 | current value of global_trid_generator. global_trid_generator is |
| 917 | incremented for each new transaction. |
| 918 | |
| 919 | For example, assuming we have |
| 920 | min_id = 5 |
| 921 | max_id = 10 |
| 922 | |
| 923 | A trid of value 5 can't see the history event between 5 & 10 |
| 924 | at it vas started before min_id 5 was committed. |
| 925 | A trid of value 10 can't see the next history event (max_id = 10) |
| 926 | as it started before this was committed. In this case it must use |
| 927 | the this event. |
| 928 | */ |
| 929 | if (trn->trid > min_id && trn->trid <= max_id) |
| 930 | { |
| 931 | ret= 1; |
| 932 | break; |
| 933 | } |
| 934 | } |
| 935 | if (!trnman_is_locked) |
| 936 | mysql_mutex_unlock(&LOCK_trn_list); |
| 937 | return ret; |
| 938 | } |
| 939 | |
| 940 | |
| 941 | /** |
| 942 | lock transaction list |
| 943 | */ |
| 944 | |
| 945 | void trnman_lock() |
| 946 | { |
| 947 | mysql_mutex_lock(&LOCK_trn_list); |
| 948 | } |
| 949 | |
| 950 | |
| 951 | /** |
| 952 | unlock transaction list |
| 953 | */ |
| 954 | |
| 955 | void trnman_unlock() |
| 956 | { |
| 957 | mysql_mutex_unlock(&LOCK_trn_list); |
| 958 | } |
| 959 | |
| 960 | |
| 961 | /** |
| 962 | Is trman initialized |
| 963 | */ |
| 964 | |
| 965 | my_bool trman_is_inited() |
| 966 | { |
| 967 | return (short_trid_to_active_trn != NULL); |
| 968 | } |
| 969 | |