1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #include <db.h> |
40 | |
41 | #include <portability/toku_race_tools.h> |
42 | #include <portability/toku_atomic.h> |
43 | |
44 | #include <ft/cachetable/checkpoint.h> |
45 | #include <ft/log_header.h> |
46 | #include <ft/txn/txn_manager.h> |
47 | |
48 | |
49 | #include "ydb-internal.h" |
50 | #include "ydb_txn.h" |
51 | #include "ydb_row_lock.h" |
52 | |
53 | static uint64_t toku_txn_id64(DB_TXN * txn) { |
54 | HANDLE_PANICKED_ENV(txn->mgrp); |
55 | return toku_txn_get_root_id(db_txn_struct_i(txn)->tokutxn); |
56 | } |
57 | |
58 | static void toku_txn_release_locks(DB_TXN *txn) { |
59 | // Prevent access to the locktree map while releasing. |
60 | // It is possible for lock escalation to attempt to |
61 | // modify this data structure while the txn commits. |
62 | toku_mutex_lock(&db_txn_struct_i(txn)->txn_mutex); |
63 | |
64 | size_t num_ranges = db_txn_struct_i(txn)->lt_map.size(); |
65 | for (size_t i = 0; i < num_ranges; i++) { |
66 | txn_lt_key_ranges ranges; |
67 | int r = db_txn_struct_i(txn)->lt_map.fetch(i, &ranges); |
68 | invariant_zero(r); |
69 | toku_db_release_lt_key_ranges(txn, &ranges); |
70 | } |
71 | |
72 | toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex); |
73 | } |
74 | |
75 | static void toku_txn_destroy(DB_TXN *txn) { |
76 | db_txn_struct_i(txn)->lt_map.destroy(); |
77 | toku_txn_destroy_txn(db_txn_struct_i(txn)->tokutxn); |
78 | toku_mutex_destroy(&db_txn_struct_i(txn)->txn_mutex); |
79 | toku_free(txn); |
80 | } |
81 | |
82 | static int toku_txn_commit(DB_TXN * txn, uint32_t flags, |
83 | TXN_PROGRESS_POLL_FUNCTION poll, void *, |
84 | bool release_mo_lock, bool low_priority) { |
85 | HANDLE_PANICKED_ENV(txn->mgrp); |
86 | //Recursively kill off children |
87 | if (db_txn_struct_i(txn)->child) { |
88 | //commit of child sets the child pointer to NULL |
89 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, flags, NULL, NULL, false, false); |
90 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
91 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n" ); |
92 | } |
93 | //In a panicked env, the child may not be removed from the list. |
94 | HANDLE_PANICKED_ENV(txn->mgrp); |
95 | } |
96 | assert(!db_txn_struct_i(txn)->child); |
97 | //Remove from parent |
98 | if (txn->parent) { |
99 | assert(db_txn_struct_i(txn->parent)->child == txn); |
100 | db_txn_struct_i(txn->parent)->child=NULL; |
101 | } |
102 | if (flags & DB_TXN_SYNC) { |
103 | toku_txn_force_fsync_on_commit(db_txn_struct_i(txn)->tokutxn); |
104 | flags &= ~DB_TXN_SYNC; |
105 | } |
106 | int nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC); |
107 | flags &= ~DB_TXN_NOSYNC; |
108 | |
109 | int r; |
110 | if (flags!=0) { |
111 | // frees the tokutxn |
112 | r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra); |
113 | } else { |
114 | // frees the tokutxn |
115 | r = toku_txn_commit_txn(db_txn_struct_i(txn)->tokutxn, nosync, |
116 | poll, poll_extra); |
117 | } |
118 | if (r!=0 && !toku_env_is_panicked(txn->mgrp)) { |
119 | env_panic(txn->mgrp, r, "Error during commit.\n" ); |
120 | } |
121 | //If panicked, we're done. |
122 | HANDLE_PANICKED_ENV(txn->mgrp); |
123 | assert_zero(r); |
124 | |
125 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
126 | TOKULOGGER logger = txn->mgrp->i->logger; |
127 | LSN do_fsync_lsn; |
128 | bool do_fsync; |
129 | toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn); |
130 | // remove the txn from the list of live transactions, and then |
131 | // release the lock tree locks. MVCC requires that toku_txn_complete_txn |
132 | // get called first, otherwise we have bugs, such as #4145 and #4153 |
133 | toku_txn_complete_txn(ttxn); |
134 | toku_txn_release_locks(txn); |
135 | // this lock must be released after toku_txn_complete_txn and toku_txn_release_locks because |
136 | // this lock must be held until the references to the open FTs is released |
137 | // begin checkpoint logs these associations, so we must be protect |
138 | // the changing of these associations with checkpointing |
139 | if (release_mo_lock) { |
140 | if (low_priority) { |
141 | toku_low_priority_multi_operation_client_unlock(); |
142 | } else { |
143 | toku_multi_operation_client_unlock(); |
144 | } |
145 | } |
146 | toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync); |
147 | if (flags!=0) { |
148 | r = EINVAL; |
149 | goto cleanup; |
150 | } |
151 | cleanup: |
152 | toku_txn_destroy(txn); |
153 | return r; |
154 | } |
155 | |
156 | static int toku_txn_abort(DB_TXN * txn, |
157 | TXN_PROGRESS_POLL_FUNCTION poll, void *) { |
158 | HANDLE_PANICKED_ENV(txn->mgrp); |
159 | //Recursively kill off children (abort or commit are both correct, commit is cheaper) |
160 | if (db_txn_struct_i(txn)->child) { |
161 | //commit of child sets the child pointer to NULL |
162 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, DB_TXN_NOSYNC, NULL, NULL, false, false); |
163 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
164 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent abort.\n" ); |
165 | } |
166 | //In a panicked env, the child may not be removed from the list. |
167 | HANDLE_PANICKED_ENV(txn->mgrp); |
168 | } |
169 | assert(!db_txn_struct_i(txn)->child); |
170 | //Remove from parent |
171 | if (txn->parent) { |
172 | assert(db_txn_struct_i(txn->parent)->child == txn); |
173 | db_txn_struct_i(txn->parent)->child=NULL; |
174 | } |
175 | |
176 | int r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra); |
177 | if (r!=0 && !toku_env_is_panicked(txn->mgrp)) { |
178 | env_panic(txn->mgrp, r, "Error during abort.\n" ); |
179 | } |
180 | HANDLE_PANICKED_ENV(txn->mgrp); |
181 | assert_zero(r); |
182 | toku_txn_complete_txn(db_txn_struct_i(txn)->tokutxn); |
183 | toku_txn_release_locks(txn); |
184 | toku_txn_destroy(txn); |
185 | return r; |
186 | } |
187 | |
188 | static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid, uint32_t flags) { |
189 | int r = 0; |
190 | if (!txn) { |
191 | r = EINVAL; |
192 | goto exit; |
193 | } |
194 | if (txn->parent) { |
195 | r = 0; // make this a NO-OP, MySQL calls this |
196 | goto exit; |
197 | } |
198 | HANDLE_PANICKED_ENV(txn->mgrp); |
199 | // Take the mo lock as soon as a non-readonly txn is found |
200 | bool holds_mo_lock; |
201 | holds_mo_lock = false; |
202 | if (!toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) { |
203 | // A readonly transaction does no logging, and therefore does not |
204 | // need the MO lock. |
205 | toku_multi_operation_client_lock(); |
206 | holds_mo_lock = true; |
207 | } |
208 | //Recursively commit any children. |
209 | if (db_txn_struct_i(txn)->child) { |
210 | //commit of child sets the child pointer to NULL |
211 | |
212 | // toku_txn_commit will take the mo_lock if not held and a non-readonly txn is found. |
213 | int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false, false); |
214 | if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) { |
215 | env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n" ); |
216 | } |
217 | //In a panicked env, the child may not be removed from the list. |
218 | HANDLE_PANICKED_ENV(txn->mgrp); |
219 | } |
220 | assert(!db_txn_struct_i(txn)->child); |
221 | int nosync; |
222 | nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC); |
223 | TOKUTXN ttxn; |
224 | ttxn = db_txn_struct_i(txn)->tokutxn; |
225 | toku_txn_prepare_txn(ttxn, xid, nosync); |
226 | TOKULOGGER logger; |
227 | logger = txn->mgrp->i->logger; |
228 | LSN do_fsync_lsn; |
229 | bool do_fsync; |
230 | toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn); |
231 | // release the multi operation lock before fsyncing the log |
232 | if (holds_mo_lock) { |
233 | toku_multi_operation_client_unlock(); |
234 | } |
235 | toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync); |
236 | exit: |
237 | return r; |
238 | } |
239 | |
240 | // requires: must hold the multi operation lock. it is |
241 | // released in toku_txn_xa_prepare before the fsync. |
242 | static int toku_txn_prepare (DB_TXN *txn, uint8_t gid[DB_GID_SIZE], uint32_t flags) { |
243 | TOKU_XA_XID xid; |
244 | TOKU_ANNOTATE_NEW_MEMORY(&xid, sizeof(xid)); |
245 | xid.formatID=0x756b6f54; // "Toku" |
246 | xid.gtrid_length=DB_GID_SIZE/2; // The maximum allowed gtrid length is 64. See the XA spec in source:/import/opengroup.org/C193.pdf page 20. |
247 | xid.bqual_length=DB_GID_SIZE/2; // The maximum allowed bqual length is 64. |
248 | memcpy(xid.data, gid, DB_GID_SIZE); |
249 | return toku_txn_xa_prepare(txn, &xid, flags); |
250 | } |
251 | |
252 | static int toku_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) { |
253 | XMALLOC(*txn_stat); |
254 | return toku_logger_txn_rollback_stats(db_txn_struct_i(txn)->tokutxn, *txn_stat); |
255 | } |
256 | |
257 | static int locked_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) { |
258 | int r = toku_txn_txn_stat(txn, txn_stat); |
259 | return r; |
260 | } |
261 | |
262 | static int locked_txn_commit_with_progress(DB_TXN *txn, uint32_t flags, |
263 | TXN_PROGRESS_POLL_FUNCTION poll, void* ) { |
264 | bool holds_mo_lock = false; |
265 | bool low_priority = false; |
266 | TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn; |
267 | if (!toku_txn_is_read_only(tokutxn)) { |
268 | // A readonly transaction does no logging, and therefore does not need the MO lock. |
269 | holds_mo_lock = true; |
270 | if (toku_is_big_tokutxn(tokutxn)) { |
271 | low_priority = true; |
272 | toku_low_priority_multi_operation_client_lock(); |
273 | } else { |
274 | toku_multi_operation_client_lock(); |
275 | } |
276 | } |
277 | // cannot begin a checkpoint. |
278 | // the multi operation lock is taken the first time we |
279 | // see a non-readonly txn in the recursive commit. |
280 | // But released in the first-level toku_txn_commit (if taken), |
281 | // this way, we don't hold it while we fsync the log. |
282 | int r = toku_txn_commit(txn, flags, poll, poll_extra, holds_mo_lock, low_priority); |
283 | return r; |
284 | } |
285 | |
286 | static int locked_txn_abort_with_progress(DB_TXN *txn, |
287 | TXN_PROGRESS_POLL_FUNCTION poll, void* ) { |
288 | // cannot begin a checkpoint |
289 | // the multi operation lock is taken the first time we |
290 | // see a non-readonly txn in the abort (or recursive commit). |
291 | // But released here so we don't have to hold additional state. |
292 | bool holds_mo_lock = false; |
293 | bool low_priority = false; |
294 | TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn; |
295 | if (!toku_txn_is_read_only(tokutxn)) { |
296 | // A readonly transaction does no logging, and therefore does not need the MO lock. |
297 | holds_mo_lock = true; |
298 | if (toku_is_big_tokutxn(tokutxn)) { |
299 | low_priority = true; |
300 | toku_low_priority_multi_operation_client_lock(); |
301 | } else { |
302 | toku_multi_operation_client_lock(); |
303 | } |
304 | } |
305 | int r = toku_txn_abort(txn, poll, poll_extra); |
306 | if (holds_mo_lock) { |
307 | if (low_priority) { |
308 | toku_low_priority_multi_operation_client_unlock(); |
309 | } else { |
310 | toku_multi_operation_client_unlock(); |
311 | } |
312 | } |
313 | return r; |
314 | } |
315 | |
316 | int locked_txn_commit(DB_TXN *txn, uint32_t flags) { |
317 | int r = locked_txn_commit_with_progress(txn, flags, NULL, NULL); |
318 | return r; |
319 | } |
320 | |
321 | int locked_txn_abort(DB_TXN *txn) { |
322 | int r = locked_txn_abort_with_progress(txn, NULL, NULL); |
323 | return r; |
324 | } |
325 | |
326 | static void locked_txn_set_client_id(DB_TXN *txn, uint64_t client_id, void *) { |
327 | toku_txn_set_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra); |
328 | } |
329 | |
330 | static void locked_txn_get_client_id(DB_TXN *txn, uint64_t *client_id, void **) { |
331 | toku_txn_get_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra); |
332 | } |
333 | |
334 | static int toku_txn_discard(DB_TXN *txn, uint32_t flags) { |
335 | // check parameters |
336 | if (flags != 0) |
337 | return EINVAL; |
338 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
339 | if (toku_txn_get_state(ttxn) != TOKUTXN_PREPARING) |
340 | return EINVAL; |
341 | |
342 | bool low_priority; |
343 | if (toku_is_big_tokutxn(ttxn)) { |
344 | low_priority = true; |
345 | toku_low_priority_multi_operation_client_lock(); |
346 | } else { |
347 | low_priority = false; |
348 | toku_multi_operation_client_lock(); |
349 | } |
350 | |
351 | // discard |
352 | toku_txn_discard_txn(ttxn); |
353 | |
354 | // complete |
355 | toku_txn_complete_txn(ttxn); |
356 | |
357 | // release locks |
358 | toku_txn_release_locks(txn); |
359 | |
360 | if (low_priority) { |
361 | toku_low_priority_multi_operation_client_unlock(); |
362 | } else { |
363 | toku_multi_operation_client_unlock(); |
364 | } |
365 | |
366 | // destroy |
367 | toku_txn_destroy(txn); |
368 | |
369 | return 0; |
370 | } |
371 | |
372 | static bool toku_txn_is_prepared(DB_TXN *txn) { |
373 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
374 | return toku_txn_get_state(ttxn) == TOKUTXN_PREPARING; |
375 | } |
376 | |
377 | static DB_TXN *toku_txn_get_child(DB_TXN *txn) { |
378 | return db_txn_struct_i(txn)->child; |
379 | } |
380 | |
381 | static uint64_t toku_txn_get_start_time(DB_TXN *txn) { |
382 | TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; |
383 | return toku_txn_get_start_time(ttxn); |
384 | } |
385 | |
386 | static inline void txn_func_init(DB_TXN *txn) { |
387 | #define STXN(name) txn->name = locked_txn_ ## name |
388 | STXN(abort); |
389 | STXN(commit); |
390 | STXN(abort_with_progress); |
391 | STXN(commit_with_progress); |
392 | STXN(txn_stat); |
393 | STXN(set_client_id); |
394 | STXN(get_client_id); |
395 | #undef STXN |
396 | #define SUTXN(name) txn->name = toku_txn_ ## name |
397 | SUTXN(prepare); |
398 | SUTXN(xa_prepare); |
399 | SUTXN(discard); |
400 | #undef SUTXN |
401 | txn->id64 = toku_txn_id64; |
402 | txn->is_prepared = toku_txn_is_prepared; |
403 | txn->get_child = toku_txn_get_child; |
404 | txn->get_start_time = toku_txn_get_start_time; |
405 | } |
406 | |
407 | // |
408 | // Creates a transaction for the user |
409 | // In our system, as far as the user is concerned, the rules are as follows: |
410 | // - one cannot operate on a transaction if a child exists, with the exception of commit/abort |
411 | // - one cannot operate on a transaction simultaneously in two separate threads |
412 | // (the reason for this is that some operations may create a child transaction |
413 | // as part of the function, such as env->dbremove and env->dbrename, and if |
414 | // transactions could be operated on simulatenously in different threads, the first |
415 | // rule above is violated) |
416 | // - if a parent transaction is committed/aborted, the child transactions are recursively |
417 | // committed |
418 | // |
419 | int toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) { |
420 | HANDLE_PANICKED_ENV(env); |
421 | HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, stxn); //Cannot create child while child already exists. |
422 | if (!toku_logger_is_open(env->i->logger)) |
423 | return toku_ydb_do_error(env, EINVAL, "Environment does not have logging enabled\n" ); |
424 | if (!(env->i->open_flags & DB_INIT_TXN)) |
425 | return toku_ydb_do_error(env, EINVAL, "Environment does not have transactions enabled\n" ); |
426 | |
427 | uint32_t txn_flags = 0; |
428 | txn_flags |= DB_TXN_NOWAIT; //We do not support blocking locks. RFP remove this? |
429 | |
430 | // handle whether txn is declared as read only |
431 | bool parent_txn_declared_read_only = |
432 | stxn && |
433 | (db_txn_struct_i(stxn)->flags & DB_TXN_READ_ONLY); |
434 | bool txn_declared_read_only = false; |
435 | if (flags & DB_TXN_READ_ONLY) { |
436 | txn_declared_read_only = true; |
437 | txn_flags |= DB_TXN_READ_ONLY; |
438 | flags &= ~(DB_TXN_READ_ONLY); |
439 | } |
440 | if (txn_declared_read_only && stxn && |
441 | !parent_txn_declared_read_only |
442 | ) |
443 | { |
444 | return toku_ydb_do_error( |
445 | env, |
446 | EINVAL, |
447 | "Current transaction set as read only, but parent transaction is not\n" |
448 | ); |
449 | } |
450 | if (parent_txn_declared_read_only) |
451 | { |
452 | // don't require child transaction to also set transaction as read only |
453 | // if parent has already done so |
454 | txn_flags |= DB_TXN_READ_ONLY; |
455 | txn_declared_read_only = true; |
456 | } |
457 | |
458 | |
459 | TOKU_ISOLATION child_isolation = TOKU_ISO_SERIALIZABLE; |
460 | uint32_t iso_flags = flags & DB_ISOLATION_FLAGS; |
461 | if (!(iso_flags == 0 || |
462 | iso_flags == DB_TXN_SNAPSHOT || |
463 | iso_flags == DB_READ_COMMITTED || |
464 | iso_flags == DB_READ_COMMITTED_ALWAYS || |
465 | iso_flags == DB_READ_UNCOMMITTED || |
466 | iso_flags == DB_SERIALIZABLE || |
467 | iso_flags == DB_INHERIT_ISOLATION) |
468 | ) |
469 | { |
470 | return toku_ydb_do_error( |
471 | env, |
472 | EINVAL, |
473 | "Invalid isolation flags set\n" |
474 | ); |
475 | } |
476 | flags &= ~iso_flags; |
477 | |
478 | switch (iso_flags) { |
479 | case (DB_INHERIT_ISOLATION): |
480 | if (stxn) { |
481 | child_isolation = db_txn_struct_i(stxn)->iso; |
482 | } |
483 | else { |
484 | return toku_ydb_do_error( |
485 | env, |
486 | EINVAL, |
487 | "Cannot set DB_INHERIT_ISOLATION when no parent exists\n" |
488 | ); |
489 | } |
490 | break; |
491 | case (DB_READ_COMMITTED): |
492 | child_isolation = TOKU_ISO_READ_COMMITTED; |
493 | break; |
494 | case (DB_READ_COMMITTED_ALWAYS): |
495 | child_isolation = TOKU_ISO_READ_COMMITTED_ALWAYS; |
496 | break; |
497 | case (DB_READ_UNCOMMITTED): |
498 | child_isolation = TOKU_ISO_READ_UNCOMMITTED; |
499 | break; |
500 | case (DB_TXN_SNAPSHOT): |
501 | child_isolation = TOKU_ISO_SNAPSHOT; |
502 | break; |
503 | case (DB_SERIALIZABLE): |
504 | child_isolation = TOKU_ISO_SERIALIZABLE; |
505 | break; |
506 | case (0): |
507 | child_isolation = stxn ? db_txn_struct_i(stxn)->iso : TOKU_ISO_SERIALIZABLE; |
508 | break; |
509 | default: |
510 | assert(false); // error path is above, so this should not happen |
511 | break; |
512 | } |
513 | if (stxn && child_isolation != db_txn_struct_i(stxn)->iso) { |
514 | return toku_ydb_do_error( |
515 | env, |
516 | EINVAL, |
517 | "Cannot set isolation level of transaction to something different \ |
518 | isolation level\n" |
519 | ); |
520 | } |
521 | |
522 | if (flags&DB_TXN_NOWAIT) { |
523 | txn_flags |= DB_TXN_NOWAIT; |
524 | flags &= ~DB_TXN_NOWAIT; |
525 | } |
526 | if (flags&DB_TXN_NOSYNC) { |
527 | txn_flags |= DB_TXN_NOSYNC; |
528 | flags &= ~DB_TXN_NOSYNC; |
529 | } |
530 | if (flags!=0) return toku_ydb_do_error(env, EINVAL, "Invalid flags passed to DB_ENV->txn_begin\n" ); |
531 | |
532 | struct __toku_db_txn_external *XCALLOC(eresult); // so the internal stuff is stuck on the end. |
533 | DB_TXN *result = &eresult->external_part; |
534 | |
535 | result->mgrp = env; |
536 | txn_func_init(result); |
537 | |
538 | result->parent = stxn; |
539 | db_txn_struct_i(result)->flags = txn_flags; |
540 | db_txn_struct_i(result)->iso = child_isolation; |
541 | db_txn_struct_i(result)->lt_map.create_no_array(); |
542 | |
543 | toku_mutex_init(*db_txn_struct_i_txn_mutex_key, |
544 | &db_txn_struct_i(result)->txn_mutex, |
545 | nullptr); |
546 | |
547 | TXN_SNAPSHOT_TYPE snapshot_type; |
548 | switch (db_txn_struct_i(result)->iso) { |
549 | case(TOKU_ISO_SNAPSHOT): |
550 | { |
551 | snapshot_type = TXN_SNAPSHOT_ROOT; |
552 | break; |
553 | } |
554 | case(TOKU_ISO_READ_COMMITTED): |
555 | { |
556 | snapshot_type = TXN_SNAPSHOT_CHILD; |
557 | break; |
558 | } |
559 | case(TOKU_ISO_READ_COMMITTED_ALWAYS) : |
560 | { |
561 | snapshot_type = TXN_COPIES_SNAPSHOT; |
562 | break; |
563 | } |
564 | default: |
565 | { |
566 | snapshot_type = TXN_SNAPSHOT_NONE; |
567 | break; |
568 | } |
569 | } |
570 | int r = toku_txn_begin_with_xid( |
571 | stxn ? db_txn_struct_i(stxn)->tokutxn : 0, |
572 | &db_txn_struct_i(result)->tokutxn, |
573 | env->i->logger, |
574 | TXNID_PAIR_NONE, |
575 | snapshot_type, |
576 | result, |
577 | false, // for_recovery |
578 | txn_declared_read_only // read_only |
579 | ); |
580 | if (r != 0) { |
581 | toku_free(result); |
582 | return r; |
583 | } |
584 | |
585 | //Add to the list of children for the parent. |
586 | if (result->parent) { |
587 | assert(!db_txn_struct_i(result->parent)->child); |
588 | db_txn_struct_i(result->parent)->child = result; |
589 | } |
590 | |
591 | *txn = result; |
592 | return 0; |
593 | } |
594 | |
595 | void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) { |
596 | struct __toku_db_txn_external *XCALLOC(eresult); |
597 | DB_TXN *result = &eresult->external_part; |
598 | result->mgrp = env; |
599 | txn_func_init(result); |
600 | |
601 | result->parent = NULL; |
602 | |
603 | db_txn_struct_i(result)->tokutxn = tokutxn; |
604 | db_txn_struct_i(result)->lt_map.create(); |
605 | |
606 | toku_txn_set_container_db_txn(tokutxn, result); |
607 | |
608 | toku_mutex_init(*db_txn_struct_i_txn_mutex_key, |
609 | &db_txn_struct_i(result)->txn_mutex, |
610 | nullptr); |
611 | } |
612 | |
613 | // Test-only function |
614 | void toku_increase_last_xid(DB_ENV *env, uint64_t increment) { |
615 | toku_txn_manager_increase_last_xid(toku_logger_get_txn_manager(env->i->logger), increment); |
616 | } |
617 | |
618 | bool toku_is_big_txn(DB_TXN *txn) { |
619 | return toku_is_big_tokutxn(db_txn_struct_i(txn)->tokutxn); |
620 | } |
621 | |
622 | bool toku_is_big_tokutxn(TOKUTXN tokutxn) { |
623 | return toku_txn_has_spilled_rollback(tokutxn); |
624 | } |
625 | |