| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #include <db.h> |
| 40 | |
| 41 | #include <portability/toku_race_tools.h> |
| 42 | #include <portability/toku_atomic.h> |
| 43 | |
| 44 | #include <ft/cachetable/checkpoint.h> |
| 45 | #include <ft/log_header.h> |
| 46 | #include <ft/txn/txn_manager.h> |
| 47 | |
| 48 | |
| 49 | #include "ydb-internal.h" |
| 50 | #include "ydb_txn.h" |
| 51 | #include "ydb_row_lock.h" |
| 52 | |
| 53 | static uint64_t toku_txn_id64(DB_TXN * txn) { |
| 54 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 55 | return toku_txn_get_root_id(db_txn_struct_i(txn)->tokutxn); |
| 56 | } |
| 57 | |
| 58 | static void toku_txn_release_locks(DB_TXN *txn) { |
| 59 | // Prevent access to the locktree map while releasing. |
| 60 | // It is possible for lock escalation to attempt to |
| 61 | // modify this data structure while the txn commits. |
| 62 | toku_mutex_lock(&db_txn_struct_i(txn)->txn_mutex); |
| 63 | |
| 64 | size_t num_ranges = db_txn_struct_i(txn)->lt_map.size(); |
| 65 | for (size_t i = 0; i < num_ranges; i++) { |
| 66 | txn_lt_key_ranges ranges; |
| 67 | int r = db_txn_struct_i(txn)->lt_map.fetch(i, &ranges); |
| 68 | invariant_zero(r); |
| 69 | toku_db_release_lt_key_ranges(txn, &ranges); |
| 70 | } |
| 71 | |
| 72 | toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex); |
| 73 | } |
| 74 | |
| 75 | static void toku_txn_destroy(DB_TXN *txn) { |
| 76 | db_txn_struct_i(txn)->lt_map.destroy(); |
| 77 | toku_txn_destroy_txn(db_txn_struct_i(txn)->tokutxn); |
| 78 | toku_mutex_destroy(&db_txn_struct_i(txn)->txn_mutex); |
| 79 | toku_free(txn); |
| 80 | } |
| 81 | |
| 82 | static int toku_txn_commit(DB_TXN * txn, uint32_t flags, |
| 83 | TXN_PROGRESS_POLL_FUNCTION poll, void *, |
| 84 | bool release_mo_lock, bool low_priority) { |
| 85 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 86 | //Recursively kill off children |
| 87 | if (db_txn_struct_i(txn)->child) { |
| 88 | //commit of child sets the child pointer to NULL |
| 89 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, flags, NULL, NULL, false, false); |
| 90 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
| 91 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n" ); |
| 92 | } |
| 93 | //In a panicked env, the child may not be removed from the list. |
| 94 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 95 | } |
| 96 | assert(!db_txn_struct_i(txn)->child); |
| 97 | //Remove from parent |
| 98 | if (txn->parent) { |
| 99 | assert(db_txn_struct_i(txn->parent)->child == txn); |
| 100 | db_txn_struct_i(txn->parent)->child=NULL; |
| 101 | } |
| 102 | if (flags & DB_TXN_SYNC) { |
| 103 | toku_txn_force_fsync_on_commit(db_txn_struct_i(txn)->tokutxn); |
| 104 | flags &= ~DB_TXN_SYNC; |
| 105 | } |
| 106 | int nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC); |
| 107 | flags &= ~DB_TXN_NOSYNC; |
| 108 | |
| 109 | int r; |
| 110 | if (flags!=0) { |
| 111 | // frees the tokutxn |
| 112 | r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra); |
| 113 | } else { |
| 114 | // frees the tokutxn |
| 115 | r = toku_txn_commit_txn(db_txn_struct_i(txn)->tokutxn, nosync, |
| 116 | poll, poll_extra); |
| 117 | } |
| 118 | if (r!=0 && !toku_env_is_panicked(txn->mgrp)) { |
| 119 | env_panic(txn->mgrp, r, "Error during commit.\n" ); |
| 120 | } |
| 121 | //If panicked, we're done. |
| 122 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 123 | assert_zero(r); |
| 124 | |
| 125 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
| 126 | TOKULOGGER logger = txn->mgrp->i->logger; |
| 127 | LSN do_fsync_lsn; |
| 128 | bool do_fsync; |
| 129 | toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn); |
| 130 | // remove the txn from the list of live transactions, and then |
| 131 | // release the lock tree locks. MVCC requires that toku_txn_complete_txn |
| 132 | // get called first, otherwise we have bugs, such as #4145 and #4153 |
| 133 | toku_txn_complete_txn(ttxn); |
| 134 | toku_txn_release_locks(txn); |
| 135 | // this lock must be released after toku_txn_complete_txn and toku_txn_release_locks because |
| 136 | // this lock must be held until the references to the open FTs is released |
| 137 | // begin checkpoint logs these associations, so we must be protect |
| 138 | // the changing of these associations with checkpointing |
| 139 | if (release_mo_lock) { |
| 140 | if (low_priority) { |
| 141 | toku_low_priority_multi_operation_client_unlock(); |
| 142 | } else { |
| 143 | toku_multi_operation_client_unlock(); |
| 144 | } |
| 145 | } |
| 146 | toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync); |
| 147 | if (flags!=0) { |
| 148 | r = EINVAL; |
| 149 | goto cleanup; |
| 150 | } |
| 151 | cleanup: |
| 152 | toku_txn_destroy(txn); |
| 153 | return r; |
| 154 | } |
| 155 | |
| 156 | static int toku_txn_abort(DB_TXN * txn, |
| 157 | TXN_PROGRESS_POLL_FUNCTION poll, void *) { |
| 158 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 159 | //Recursively kill off children (abort or commit are both correct, commit is cheaper) |
| 160 | if (db_txn_struct_i(txn)->child) { |
| 161 | //commit of child sets the child pointer to NULL |
| 162 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, DB_TXN_NOSYNC, NULL, NULL, false, false); |
| 163 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
| 164 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent abort.\n" ); |
| 165 | } |
| 166 | //In a panicked env, the child may not be removed from the list. |
| 167 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 168 | } |
| 169 | assert(!db_txn_struct_i(txn)->child); |
| 170 | //Remove from parent |
| 171 | if (txn->parent) { |
| 172 | assert(db_txn_struct_i(txn->parent)->child == txn); |
| 173 | db_txn_struct_i(txn->parent)->child=NULL; |
| 174 | } |
| 175 | |
| 176 | int r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra); |
| 177 | if (r!=0 && !toku_env_is_panicked(txn->mgrp)) { |
| 178 | env_panic(txn->mgrp, r, "Error during abort.\n" ); |
| 179 | } |
| 180 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 181 | assert_zero(r); |
| 182 | toku_txn_complete_txn(db_txn_struct_i(txn)->tokutxn); |
| 183 | toku_txn_release_locks(txn); |
| 184 | toku_txn_destroy(txn); |
| 185 | return r; |
| 186 | } |
| 187 | |
| 188 | static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid, uint32_t flags) { |
| 189 | int r = 0; |
| 190 | if (!txn) { |
| 191 | r = EINVAL; |
| 192 | goto exit; |
| 193 | } |
| 194 | if (txn->parent) { |
| 195 | r = 0; // make this a NO-OP, MySQL calls this |
| 196 | goto exit; |
| 197 | } |
| 198 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 199 | // Take the mo lock as soon as a non-readonly txn is found |
| 200 | bool holds_mo_lock; |
| 201 | holds_mo_lock = false; |
| 202 | if (!toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) { |
| 203 | // A readonly transaction does no logging, and therefore does not |
| 204 | // need the MO lock. |
| 205 | toku_multi_operation_client_lock(); |
| 206 | holds_mo_lock = true; |
| 207 | } |
| 208 | //Recursively commit any children. |
| 209 | if (db_txn_struct_i(txn)->child) { |
| 210 | //commit of child sets the child pointer to NULL |
| 211 | |
| 212 | // toku_txn_commit will take the mo_lock if not held and a non-readonly txn is found. |
| 213 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false, false); |
| 214 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
| 215 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n" ); |
| 216 | } |
| 217 | //In a panicked env, the child may not be removed from the list. |
| 218 | HANDLE_PANICKED_ENV(txn->mgrp); |
| 219 | } |
| 220 | assert(!db_txn_struct_i(txn)->child); |
| 221 | int nosync; |
| 222 | nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC); |
| 223 | TOKUTXN ttxn; |
| 224 | ttxn = db_txn_struct_i(txn)->tokutxn; |
| 225 | toku_txn_prepare_txn(ttxn, xid, nosync); |
| 226 | TOKULOGGER logger; |
| 227 | logger = txn->mgrp->i->logger; |
| 228 | LSN do_fsync_lsn; |
| 229 | bool do_fsync; |
| 230 | toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn); |
| 231 | // release the multi operation lock before fsyncing the log |
| 232 | if (holds_mo_lock) { |
| 233 | toku_multi_operation_client_unlock(); |
| 234 | } |
| 235 | toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync); |
| 236 | exit: |
| 237 | return r; |
| 238 | } |
| 239 | |
| 240 | // requires: must hold the multi operation lock. it is |
| 241 | // released in toku_txn_xa_prepare before the fsync. |
| 242 | static int toku_txn_prepare (DB_TXN *txn, uint8_t gid[DB_GID_SIZE], uint32_t flags) { |
| 243 | TOKU_XA_XID xid; |
| 244 | TOKU_ANNOTATE_NEW_MEMORY(&xid, sizeof(xid)); |
| 245 | xid.formatID=0x756b6f54; // "Toku" |
| 246 | xid.gtrid_length=DB_GID_SIZE/2; // The maximum allowed gtrid length is 64. See the XA spec in source:/import/opengroup.org/C193.pdf page 20. |
| 247 | xid.bqual_length=DB_GID_SIZE/2; // The maximum allowed bqual length is 64. |
| 248 | memcpy(xid.data, gid, DB_GID_SIZE); |
| 249 | return toku_txn_xa_prepare(txn, &xid, flags); |
| 250 | } |
| 251 | |
| 252 | static int toku_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) { |
| 253 | XMALLOC(*txn_stat); |
| 254 | return toku_logger_txn_rollback_stats(db_txn_struct_i(txn)->tokutxn, *txn_stat); |
| 255 | } |
| 256 | |
| 257 | static int locked_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) { |
| 258 | int r = toku_txn_txn_stat(txn, txn_stat); |
| 259 | return r; |
| 260 | } |
| 261 | |
| 262 | static int locked_txn_commit_with_progress(DB_TXN *txn, uint32_t flags, |
| 263 | TXN_PROGRESS_POLL_FUNCTION poll, void* ) { |
| 264 | bool holds_mo_lock = false; |
| 265 | bool low_priority = false; |
| 266 | TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn; |
| 267 | if (!toku_txn_is_read_only(tokutxn)) { |
| 268 | // A readonly transaction does no logging, and therefore does not need the MO lock. |
| 269 | holds_mo_lock = true; |
| 270 | if (toku_is_big_tokutxn(tokutxn)) { |
| 271 | low_priority = true; |
| 272 | toku_low_priority_multi_operation_client_lock(); |
| 273 | } else { |
| 274 | toku_multi_operation_client_lock(); |
| 275 | } |
| 276 | } |
| 277 | // cannot begin a checkpoint. |
| 278 | // the multi operation lock is taken the first time we |
| 279 | // see a non-readonly txn in the recursive commit. |
| 280 | // But released in the first-level toku_txn_commit (if taken), |
| 281 | // this way, we don't hold it while we fsync the log. |
| 282 | int r = toku_txn_commit(txn, flags, poll, poll_extra, holds_mo_lock, low_priority); |
| 283 | return r; |
| 284 | } |
| 285 | |
| 286 | static int locked_txn_abort_with_progress(DB_TXN *txn, |
| 287 | TXN_PROGRESS_POLL_FUNCTION poll, void* ) { |
| 288 | // cannot begin a checkpoint |
| 289 | // the multi operation lock is taken the first time we |
| 290 | // see a non-readonly txn in the abort (or recursive commit). |
| 291 | // But released here so we don't have to hold additional state. |
| 292 | bool holds_mo_lock = false; |
| 293 | bool low_priority = false; |
| 294 | TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn; |
| 295 | if (!toku_txn_is_read_only(tokutxn)) { |
| 296 | // A readonly transaction does no logging, and therefore does not need the MO lock. |
| 297 | holds_mo_lock = true; |
| 298 | if (toku_is_big_tokutxn(tokutxn)) { |
| 299 | low_priority = true; |
| 300 | toku_low_priority_multi_operation_client_lock(); |
| 301 | } else { |
| 302 | toku_multi_operation_client_lock(); |
| 303 | } |
| 304 | } |
| 305 | int r = toku_txn_abort(txn, poll, poll_extra); |
| 306 | if (holds_mo_lock) { |
| 307 | if (low_priority) { |
| 308 | toku_low_priority_multi_operation_client_unlock(); |
| 309 | } else { |
| 310 | toku_multi_operation_client_unlock(); |
| 311 | } |
| 312 | } |
| 313 | return r; |
| 314 | } |
| 315 | |
| 316 | int locked_txn_commit(DB_TXN *txn, uint32_t flags) { |
| 317 | int r = locked_txn_commit_with_progress(txn, flags, NULL, NULL); |
| 318 | return r; |
| 319 | } |
| 320 | |
| 321 | int locked_txn_abort(DB_TXN *txn) { |
| 322 | int r = locked_txn_abort_with_progress(txn, NULL, NULL); |
| 323 | return r; |
| 324 | } |
| 325 | |
| 326 | static void locked_txn_set_client_id(DB_TXN *txn, uint64_t client_id, void *) { |
| 327 | toku_txn_set_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra); |
| 328 | } |
| 329 | |
| 330 | static void locked_txn_get_client_id(DB_TXN *txn, uint64_t *client_id, void **) { |
| 331 | toku_txn_get_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra); |
| 332 | } |
| 333 | |
| 334 | static int toku_txn_discard(DB_TXN *txn, uint32_t flags) { |
| 335 | // check parameters |
| 336 | if (flags != 0) |
| 337 | return EINVAL; |
| 338 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
| 339 | if (toku_txn_get_state(ttxn) != TOKUTXN_PREPARING) |
| 340 | return EINVAL; |
| 341 | |
| 342 | bool low_priority; |
| 343 | if (toku_is_big_tokutxn(ttxn)) { |
| 344 | low_priority = true; |
| 345 | toku_low_priority_multi_operation_client_lock(); |
| 346 | } else { |
| 347 | low_priority = false; |
| 348 | toku_multi_operation_client_lock(); |
| 349 | } |
| 350 | |
| 351 | // discard |
| 352 | toku_txn_discard_txn(ttxn); |
| 353 | |
| 354 | // complete |
| 355 | toku_txn_complete_txn(ttxn); |
| 356 | |
| 357 | // release locks |
| 358 | toku_txn_release_locks(txn); |
| 359 | |
| 360 | if (low_priority) { |
| 361 | toku_low_priority_multi_operation_client_unlock(); |
| 362 | } else { |
| 363 | toku_multi_operation_client_unlock(); |
| 364 | } |
| 365 | |
| 366 | // destroy |
| 367 | toku_txn_destroy(txn); |
| 368 | |
| 369 | return 0; |
| 370 | } |
| 371 | |
| 372 | static bool toku_txn_is_prepared(DB_TXN *txn) { |
| 373 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
| 374 | return toku_txn_get_state(ttxn) == TOKUTXN_PREPARING; |
| 375 | } |
| 376 | |
| 377 | static DB_TXN *toku_txn_get_child(DB_TXN *txn) { |
| 378 | return db_txn_struct_i(txn)->child; |
| 379 | } |
| 380 | |
| 381 | static uint64_t toku_txn_get_start_time(DB_TXN *txn) { |
| 382 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
| 383 | return toku_txn_get_start_time(ttxn); |
| 384 | } |
| 385 | |
| 386 | static inline void txn_func_init(DB_TXN *txn) { |
| 387 | #define STXN(name) txn->name = locked_txn_ ## name |
| 388 | STXN(abort); |
| 389 | STXN(commit); |
| 390 | STXN(abort_with_progress); |
| 391 | STXN(commit_with_progress); |
| 392 | STXN(txn_stat); |
| 393 | STXN(set_client_id); |
| 394 | STXN(get_client_id); |
| 395 | #undef STXN |
| 396 | #define SUTXN(name) txn->name = toku_txn_ ## name |
| 397 | SUTXN(prepare); |
| 398 | SUTXN(xa_prepare); |
| 399 | SUTXN(discard); |
| 400 | #undef SUTXN |
| 401 | txn->id64 = toku_txn_id64; |
| 402 | txn->is_prepared = toku_txn_is_prepared; |
| 403 | txn->get_child = toku_txn_get_child; |
| 404 | txn->get_start_time = toku_txn_get_start_time; |
| 405 | } |
| 406 | |
| 407 | // |
| 408 | // Creates a transaction for the user |
| 409 | // In our system, as far as the user is concerned, the rules are as follows: |
| 410 | // - one cannot operate on a transaction if a child exists, with the exception of commit/abort |
| 411 | // - one cannot operate on a transaction simultaneously in two separate threads |
| 412 | // (the reason for this is that some operations may create a child transaction |
| 413 | // as part of the function, such as env->dbremove and env->dbrename, and if |
| 414 | // transactions could be operated on simulatenously in different threads, the first |
| 415 | // rule above is violated) |
| 416 | // - if a parent transaction is committed/aborted, the child transactions are recursively |
| 417 | // committed |
| 418 | // |
| 419 | int toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) { |
| 420 | HANDLE_PANICKED_ENV(env); |
| 421 | HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, stxn); //Cannot create child while child already exists. |
| 422 | if (!toku_logger_is_open(env->i->logger)) |
| 423 | return toku_ydb_do_error(env, EINVAL, "Environment does not have logging enabled\n" ); |
| 424 | if (!(env->i->open_flags & DB_INIT_TXN)) |
| 425 | return toku_ydb_do_error(env, EINVAL, "Environment does not have transactions enabled\n" ); |
| 426 | |
| 427 | uint32_t txn_flags = 0; |
| 428 | txn_flags |= DB_TXN_NOWAIT; //We do not support blocking locks. RFP remove this? |
| 429 | |
| 430 | // handle whether txn is declared as read only |
| 431 | bool parent_txn_declared_read_only = |
| 432 | stxn && |
| 433 | (db_txn_struct_i(stxn)->flags & DB_TXN_READ_ONLY); |
| 434 | bool txn_declared_read_only = false; |
| 435 | if (flags & DB_TXN_READ_ONLY) { |
| 436 | txn_declared_read_only = true; |
| 437 | txn_flags |= DB_TXN_READ_ONLY; |
| 438 | flags &= ~(DB_TXN_READ_ONLY); |
| 439 | } |
| 440 | if (txn_declared_read_only && stxn && |
| 441 | !parent_txn_declared_read_only |
| 442 | ) |
| 443 | { |
| 444 | return toku_ydb_do_error( |
| 445 | env, |
| 446 | EINVAL, |
| 447 | "Current transaction set as read only, but parent transaction is not\n" |
| 448 | ); |
| 449 | } |
| 450 | if (parent_txn_declared_read_only) |
| 451 | { |
| 452 | // don't require child transaction to also set transaction as read only |
| 453 | // if parent has already done so |
| 454 | txn_flags |= DB_TXN_READ_ONLY; |
| 455 | txn_declared_read_only = true; |
| 456 | } |
| 457 | |
| 458 | |
| 459 | TOKU_ISOLATION child_isolation = TOKU_ISO_SERIALIZABLE; |
| 460 | uint32_t iso_flags = flags & DB_ISOLATION_FLAGS; |
| 461 | if (!(iso_flags == 0 || |
| 462 | iso_flags == DB_TXN_SNAPSHOT || |
| 463 | iso_flags == DB_READ_COMMITTED || |
| 464 | iso_flags == DB_READ_COMMITTED_ALWAYS || |
| 465 | iso_flags == DB_READ_UNCOMMITTED || |
| 466 | iso_flags == DB_SERIALIZABLE || |
| 467 | iso_flags == DB_INHERIT_ISOLATION) |
| 468 | ) |
| 469 | { |
| 470 | return toku_ydb_do_error( |
| 471 | env, |
| 472 | EINVAL, |
| 473 | "Invalid isolation flags set\n" |
| 474 | ); |
| 475 | } |
| 476 | flags &= ~iso_flags; |
| 477 | |
| 478 | switch (iso_flags) { |
| 479 | case (DB_INHERIT_ISOLATION): |
| 480 | if (stxn) { |
| 481 | child_isolation = db_txn_struct_i(stxn)->iso; |
| 482 | } |
| 483 | else { |
| 484 | return toku_ydb_do_error( |
| 485 | env, |
| 486 | EINVAL, |
| 487 | "Cannot set DB_INHERIT_ISOLATION when no parent exists\n" |
| 488 | ); |
| 489 | } |
| 490 | break; |
| 491 | case (DB_READ_COMMITTED): |
| 492 | child_isolation = TOKU_ISO_READ_COMMITTED; |
| 493 | break; |
| 494 | case (DB_READ_COMMITTED_ALWAYS): |
| 495 | child_isolation = TOKU_ISO_READ_COMMITTED_ALWAYS; |
| 496 | break; |
| 497 | case (DB_READ_UNCOMMITTED): |
| 498 | child_isolation = TOKU_ISO_READ_UNCOMMITTED; |
| 499 | break; |
| 500 | case (DB_TXN_SNAPSHOT): |
| 501 | child_isolation = TOKU_ISO_SNAPSHOT; |
| 502 | break; |
| 503 | case (DB_SERIALIZABLE): |
| 504 | child_isolation = TOKU_ISO_SERIALIZABLE; |
| 505 | break; |
| 506 | case (0): |
| 507 | child_isolation = stxn ? db_txn_struct_i(stxn)->iso : TOKU_ISO_SERIALIZABLE; |
| 508 | break; |
| 509 | default: |
| 510 | assert(false); // error path is above, so this should not happen |
| 511 | break; |
| 512 | } |
| 513 | if (stxn && child_isolation != db_txn_struct_i(stxn)->iso) { |
| 514 | return toku_ydb_do_error( |
| 515 | env, |
| 516 | EINVAL, |
| 517 | "Cannot set isolation level of transaction to something different \ |
| 518 | isolation level\n" |
| 519 | ); |
| 520 | } |
| 521 | |
| 522 | if (flags&DB_TXN_NOWAIT) { |
| 523 | txn_flags |= DB_TXN_NOWAIT; |
| 524 | flags &= ~DB_TXN_NOWAIT; |
| 525 | } |
| 526 | if (flags&DB_TXN_NOSYNC) { |
| 527 | txn_flags |= DB_TXN_NOSYNC; |
| 528 | flags &= ~DB_TXN_NOSYNC; |
| 529 | } |
| 530 | if (flags!=0) return toku_ydb_do_error(env, EINVAL, "Invalid flags passed to DB_ENV->txn_begin\n" ); |
| 531 | |
| 532 | struct __toku_db_txn_external *XCALLOC(eresult); // so the internal stuff is stuck on the end. |
| 533 | DB_TXN *result = &eresult->external_part; |
| 534 | |
| 535 | result->mgrp = env; |
| 536 | txn_func_init(result); |
| 537 | |
| 538 | result->parent = stxn; |
| 539 | db_txn_struct_i(result)->flags = txn_flags; |
| 540 | db_txn_struct_i(result)->iso = child_isolation; |
| 541 | db_txn_struct_i(result)->lt_map.create_no_array(); |
| 542 | |
| 543 | toku_mutex_init(*db_txn_struct_i_txn_mutex_key, |
| 544 | &db_txn_struct_i(result)->txn_mutex, |
| 545 | nullptr); |
| 546 | |
| 547 | TXN_SNAPSHOT_TYPE snapshot_type; |
| 548 | switch (db_txn_struct_i(result)->iso) { |
| 549 | case(TOKU_ISO_SNAPSHOT): |
| 550 | { |
| 551 | snapshot_type = TXN_SNAPSHOT_ROOT; |
| 552 | break; |
| 553 | } |
| 554 | case(TOKU_ISO_READ_COMMITTED): |
| 555 | { |
| 556 | snapshot_type = TXN_SNAPSHOT_CHILD; |
| 557 | break; |
| 558 | } |
| 559 | case(TOKU_ISO_READ_COMMITTED_ALWAYS) : |
| 560 | { |
| 561 | snapshot_type = TXN_COPIES_SNAPSHOT; |
| 562 | break; |
| 563 | } |
| 564 | default: |
| 565 | { |
| 566 | snapshot_type = TXN_SNAPSHOT_NONE; |
| 567 | break; |
| 568 | } |
| 569 | } |
| 570 | int r = toku_txn_begin_with_xid( |
| 571 | stxn ? db_txn_struct_i(stxn)->tokutxn : 0, |
| 572 | &db_txn_struct_i(result)->tokutxn, |
| 573 | env->i->logger, |
| 574 | TXNID_PAIR_NONE, |
| 575 | snapshot_type, |
| 576 | result, |
| 577 | false, // for_recovery |
| 578 | txn_declared_read_only // read_only |
| 579 | ); |
| 580 | if (r != 0) { |
| 581 | toku_free(result); |
| 582 | return r; |
| 583 | } |
| 584 | |
| 585 | //Add to the list of children for the parent. |
| 586 | if (result->parent) { |
| 587 | assert(!db_txn_struct_i(result->parent)->child); |
| 588 | db_txn_struct_i(result->parent)->child = result; |
| 589 | } |
| 590 | |
| 591 | *txn = result; |
| 592 | return 0; |
| 593 | } |
| 594 | |
| 595 | void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) { |
| 596 | struct __toku_db_txn_external *XCALLOC(eresult); |
| 597 | DB_TXN *result = &eresult->external_part; |
| 598 | result->mgrp = env; |
| 599 | txn_func_init(result); |
| 600 | |
| 601 | result->parent = NULL; |
| 602 | |
| 603 | db_txn_struct_i(result)->tokutxn = tokutxn; |
| 604 | db_txn_struct_i(result)->lt_map.create(); |
| 605 | |
| 606 | toku_txn_set_container_db_txn(tokutxn, result); |
| 607 | |
| 608 | toku_mutex_init(*db_txn_struct_i_txn_mutex_key, |
| 609 | &db_txn_struct_i(result)->txn_mutex, |
| 610 | nullptr); |
| 611 | } |
| 612 | |
| 613 | // Test-only function |
| 614 | void toku_increase_last_xid(DB_ENV *env, uint64_t increment) { |
| 615 | toku_txn_manager_increase_last_xid(toku_logger_get_txn_manager(env->i->logger), increment); |
| 616 | } |
| 617 | |
| 618 | bool toku_is_big_txn(DB_TXN *txn) { |
| 619 | return toku_is_big_tokutxn(db_txn_struct_i(txn)->tokutxn); |
| 620 | } |
| 621 | |
| 622 | bool toku_is_big_tokutxn(TOKUTXN tokutxn) { |
| 623 | return toku_txn_has_spilled_rollback(tokutxn); |
| 624 | } |
| 625 | |