| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #pragma once |
| 40 | |
| 41 | #include "portability/toku_stdint.h" |
| 42 | |
| 43 | #include "ft/txn/txn_state.h" |
| 44 | #include "ft/serialize/block_table.h" |
| 45 | #include "ft/ft-status.h" |
| 46 | #include "util/omt.h" |
| 47 | |
| 48 | typedef uint64_t TXNID; |
| 49 | |
| 50 | typedef struct tokutxn *TOKUTXN; |
| 51 | |
| 52 | #define TXNID_NONE_LIVING ((TXNID)0) |
| 53 | #define TXNID_NONE ((TXNID)0) |
| 54 | #define TXNID_MAX ((TXNID)-1) |
| 55 | |
| 56 | typedef struct txnid_pair_s { |
| 57 | TXNID parent_id64; |
| 58 | TXNID child_id64; |
| 59 | } TXNID_PAIR; |
| 60 | |
| 61 | static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; |
| 62 | |
| 63 | // We include the child manager here beacuse it uses the TXNID / TOKUTXN types |
| 64 | #include "ft/txn/txn_child_manager.h" |
| 65 | |
| 66 | /* Log Sequence Number (LSN) |
| 67 | * Make the LSN be a struct instead of an integer so that we get better type checking. */ |
| 68 | typedef struct __toku_lsn { uint64_t lsn; } LSN; |
| 69 | static const LSN ZERO_LSN = { .lsn = 0 }; |
| 70 | static const LSN MAX_LSN = { .lsn = UINT64_MAX }; |
| 71 | |
| 72 | // |
| 73 | // Types of snapshots that can be taken by a tokutxn |
| 74 | // - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. |
| 75 | // used for SERIALIZABLE and READ UNCOMMITTED |
| 76 | // - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot |
| 77 | // used for REPEATABLE READ |
| 78 | // - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot |
| 79 | // used for READ COMMITTED |
| 80 | // |
| 81 | |
| 82 | typedef enum __TXN_SNAPSHOT_TYPE { |
| 83 | TXN_SNAPSHOT_NONE=0, |
| 84 | TXN_SNAPSHOT_ROOT=1, |
| 85 | TXN_SNAPSHOT_CHILD=2, |
| 86 | TXN_COPIES_SNAPSHOT=3 |
| 87 | } TXN_SNAPSHOT_TYPE; |
| 88 | |
| 89 | typedef toku::omt<struct tokutxn *> txn_omt_t; |
| 90 | typedef toku::omt<TXNID> xid_omt_t; |
| 91 | typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx_omt_t; |
| 92 | |
| 93 | inline bool txn_pair_is_none(TXNID_PAIR txnid) { |
| 94 | return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; |
| 95 | } |
| 96 | |
| 97 | struct tokulogger; |
| 98 | |
| 99 | struct txn_roll_info { |
| 100 | // these are number of rollback nodes and rollback entries for this txn. |
| 101 | // |
| 102 | // the current rollback node below has sequence number num_rollback_nodes - 1 |
| 103 | // (because they are numbered 0...num-1). often, the current rollback is |
| 104 | // already set to this block num, which means it exists and is available to |
| 105 | // log some entries. if the current rollback is NONE and the number of |
| 106 | // rollback nodes for this transaction is non-zero, then we will use |
| 107 | // the number of rollback nodes to know which sequence number to assign |
| 108 | // to a new one we create |
| 109 | uint64_t num_rollback_nodes; |
| 110 | uint64_t num_rollentries; |
| 111 | uint64_t num_rollentries_processed; |
| 112 | uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. |
| 113 | |
| 114 | // spilled rollback nodes are rollback nodes that were gorged by this |
| 115 | // transaction, retired, and saved in a list. |
| 116 | |
| 117 | // the spilled rollback head is the block number of the first rollback node |
| 118 | // that makes up the rollback log chain |
| 119 | BLOCKNUM spilled_rollback_head; |
| 120 | |
| 121 | // the spilled rollback is the block number of the last rollback node that |
| 122 | // makes up the rollback log chain. |
| 123 | BLOCKNUM spilled_rollback_tail; |
| 124 | |
| 125 | // the current rollback node block number we may use. if this is ROLLBACK_NONE, |
| 126 | // then we need to create one and set it here before using it. |
| 127 | BLOCKNUM current_rollback; |
| 128 | }; |
| 129 | |
| 130 | struct tokutxn { |
| 131 | // These don't change after create: |
| 132 | |
| 133 | TXNID_PAIR txnid; |
| 134 | |
| 135 | uint64_t snapshot_txnid64; // this is the lsn of the snapshot |
| 136 | const TXN_SNAPSHOT_TYPE snapshot_type; |
| 137 | const bool for_recovery; |
| 138 | struct tokulogger *const logger; |
| 139 | struct tokutxn *const parent; |
| 140 | // The child txn is protected by the child_txn_manager lock |
| 141 | // and by the user contract. The user contract states (and is |
| 142 | // enforced at the ydb layer) that a child txn should not be created |
| 143 | // while another child exists. The txn_child_manager will protect |
| 144 | // other threads from trying to read this value while another |
| 145 | // thread commits/aborts the child |
| 146 | struct tokutxn *child; |
| 147 | |
| 148 | // statically allocated child manager, if this |
| 149 | // txn is a root txn, this manager will be used and set to |
| 150 | // child_manager for this transaction and all of its children |
| 151 | txn_child_manager child_manager_s; |
| 152 | |
| 153 | // child manager for this transaction, all of its children, |
| 154 | // and all of its ancestors |
| 155 | txn_child_manager* child_manager; |
| 156 | |
| 157 | // These don't change but they're created in a way that's hard to make |
| 158 | // strictly const. |
| 159 | DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn |
| 160 | xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. |
| 161 | struct XIDS_S *xids; // Represents the xid list |
| 162 | |
| 163 | struct tokutxn *snapshot_next; |
| 164 | struct tokutxn *snapshot_prev; |
| 165 | |
| 166 | bool begin_was_logged; |
| 167 | bool declared_read_only; // true if the txn was declared read only when began |
| 168 | |
| 169 | // These are not read until a commit, prepare, or abort starts, and |
| 170 | // they're "monotonic" (only go false->true) during operation: |
| 171 | bool do_fsync; |
| 172 | bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) |
| 173 | |
| 174 | // Not used until commit, prepare, or abort starts: |
| 175 | LSN do_fsync_lsn; |
| 176 | TOKU_XA_XID xa_xid; // for prepared transactions |
| 177 | TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; |
| 178 | void *; |
| 179 | |
| 180 | toku_mutex_t txn_lock; |
| 181 | // Protected by the txn lock: |
| 182 | toku::omt<struct ft*> open_fts; // a collection of the fts that we touched. Indexed by filenum. |
| 183 | struct txn_roll_info roll_info; // Info used to manage rollback entries |
| 184 | |
| 185 | // mutex that protects the transition of the state variable |
| 186 | // the rest of the variables are used by the txn code and |
| 187 | // hot indexing to ensure that when hot indexing is processing a |
| 188 | // leafentry, a TOKUTXN cannot dissappear or change state out from |
| 189 | // underneath it |
| 190 | toku_mutex_t state_lock; |
| 191 | toku_cond_t state_cond; |
| 192 | TOKUTXN_STATE state; |
| 193 | uint32_t num_pin; // number of threads (all hot indexes) that want this |
| 194 | // txn to not transition to commit or abort |
| 195 | uint64_t client_id; |
| 196 | void *; |
| 197 | time_t start_time; |
| 198 | }; |
| 199 | typedef struct tokutxn *TOKUTXN; |
| 200 | |
| 201 | void toku_txn_lock(struct tokutxn *txn); |
| 202 | void toku_txn_unlock(struct tokutxn *txn); |
| 203 | |
| 204 | uint64_t toku_txn_get_root_id(struct tokutxn *txn); |
| 205 | bool txn_declared_read_only(struct tokutxn *txn); |
| 206 | |
| 207 | int toku_txn_begin_txn ( |
| 208 | DB_TXN *container_db_txn, |
| 209 | struct tokutxn *parent_tokutxn, |
| 210 | struct tokutxn **tokutxn, |
| 211 | struct tokulogger *logger, |
| 212 | TXN_SNAPSHOT_TYPE snapshot_type, |
| 213 | bool read_only |
| 214 | ); |
| 215 | |
| 216 | DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn); |
| 217 | void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn); |
| 218 | |
| 219 | // toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN |
| 220 | int toku_txn_begin_with_xid ( |
| 221 | struct tokutxn *parent_tokutxn, |
| 222 | struct tokutxn **tokutxn, |
| 223 | struct tokulogger *logger, |
| 224 | TXNID_PAIR xid, |
| 225 | TXN_SNAPSHOT_TYPE snapshot_type, |
| 226 | DB_TXN *container_db_txn, |
| 227 | bool for_recovery, |
| 228 | bool read_only |
| 229 | ); |
| 230 | |
| 231 | void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid); |
| 232 | |
| 233 | int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info); |
| 234 | |
| 235 | int toku_txn_commit_txn (struct tokutxn *txn, int nosync, |
| 236 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
| 237 | int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn, |
| 238 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
| 239 | |
| 240 | int toku_txn_abort_txn(struct tokutxn *txn, |
| 241 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
| 242 | int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, |
| 243 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
| 244 | |
| 245 | int toku_txn_discard_txn(struct tokutxn *txn); |
| 246 | |
| 247 | void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid, int nosync); |
| 248 | // Effect: Do the internal work of preparing a transaction (does not log the prepare record). |
| 249 | |
| 250 | void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); |
| 251 | // Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. |
| 252 | |
| 253 | void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync); |
| 254 | |
| 255 | void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn); |
| 256 | |
| 257 | // Complete and destroy a txn |
| 258 | void toku_txn_close_txn(struct tokutxn *txn); |
| 259 | |
| 260 | // Remove a txn from any live txn lists |
| 261 | void toku_txn_complete_txn(struct tokutxn *txn); |
| 262 | |
| 263 | // Free the memory of a txn |
| 264 | void toku_txn_destroy_txn(struct tokutxn *txn); |
| 265 | |
| 266 | struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn); |
| 267 | |
| 268 | // Force fsync on commit |
| 269 | void toku_txn_force_fsync_on_commit(struct tokutxn *txn); |
| 270 | |
| 271 | void toku_txn_get_status(TXN_STATUS s); |
| 272 | |
| 273 | bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); |
| 274 | |
| 275 | TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn); |
| 276 | |
| 277 | TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn); |
| 278 | |
| 279 | struct tokulogger_preplist { |
| 280 | TOKU_XA_XID xid; |
| 281 | DB_TXN *txn; |
| 282 | }; |
| 283 | int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); |
| 284 | |
| 285 | void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn); |
| 286 | |
| 287 | // Return whether txn (or it's descendents) have done no work. |
| 288 | bool toku_txn_is_read_only(struct tokutxn *txn); |
| 289 | |
| 290 | void toku_txn_lock_state(struct tokutxn *txn); |
| 291 | void toku_txn_unlock_state(struct tokutxn *txn); |
| 292 | void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn); |
| 293 | void toku_txn_unpin_live_txn(struct tokutxn *txn); |
| 294 | |
| 295 | bool toku_txn_has_spilled_rollback(struct tokutxn *txn); |
| 296 | |
| 297 | void toku_txn_get_client_id(struct tokutxn *txn, uint64_t *client_id, void **); |
| 298 | void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id, void *); |
| 299 | |
| 300 | time_t toku_txn_get_start_time(struct tokutxn *txn); |
| 301 | |
| 302 | // |
| 303 | // This function is used by the leafentry iterators. |
| 304 | // returns TOKUDB_ACCEPT if live transaction context is allowed to read a value |
| 305 | // that is written by transaction with LSN of id |
| 306 | // live transaction context may read value if either id is the root ancestor of context, or if |
| 307 | // id was committed before context's snapshot was taken. |
| 308 | // For id to be committed before context's snapshot was taken, the following must be true: |
| 309 | // - id < context->snapshot_txnid64 AND id is not in context's live root transaction list |
| 310 | // For the above to NOT be true: |
| 311 | // - id > context->snapshot_txnid64 OR id is in context's live root transaction list |
| 312 | // |
| 313 | int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn, bool is_provisional UU()); |
| 314 | |
| 315 | // For serialize / deserialize |
| 316 | |
| 317 | #include "ft/serialize/wbuf.h" |
| 318 | |
| 319 | static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) { |
| 320 | wbuf_ulonglong(wb, txnid); |
| 321 | } |
| 322 | |
| 323 | static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) { |
| 324 | wbuf_nocrc_ulonglong(wb, txnid); |
| 325 | } |
| 326 | |
| 327 | static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) { |
| 328 | wbuf_nocrc_ulonglong(wb, txnid.parent_id64); |
| 329 | wbuf_nocrc_ulonglong(wb, txnid.child_id64); |
| 330 | } |
| 331 | |
| 332 | static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) { |
| 333 | wbuf_nocrc_ulonglong(wb, lsn.lsn); |
| 334 | } |
| 335 | |
| 336 | static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) { |
| 337 | wbuf_ulonglong(wb, lsn.lsn); |
| 338 | } |
| 339 | |
| 340 | #include "ft/serialize/rbuf.h" |
| 341 | |
| 342 | static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) { |
| 343 | *txnid = rbuf_ulonglong(rb); |
| 344 | } |
| 345 | |
| 346 | static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) { |
| 347 | txnid->parent_id64 = rbuf_ulonglong(rb); |
| 348 | txnid->child_id64 = rbuf_ulonglong(rb); |
| 349 | } |
| 350 | |
| 351 | static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) { |
| 352 | rbuf_TXNID(rb, txnid); |
| 353 | } |
| 354 | |
| 355 | static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) { |
| 356 | rbuf_TXNID_PAIR(r, txnid); |
| 357 | } |
| 358 | |
| 359 | static inline LSN rbuf_LSN(struct rbuf *rb) { |
| 360 | LSN lsn = { .lsn = rbuf_ulonglong(rb) }; |
| 361 | return lsn; |
| 362 | } |
| 363 | |