| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ | 
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: | 
| 3 | #ident "$Id$" | 
| 4 | /*====== | 
| 5 | This file is part of PerconaFT. | 
| 6 |  | 
| 7 |  | 
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. | 
| 9 |  | 
| 10 |     PerconaFT is free software: you can redistribute it and/or modify | 
| 11 |     it under the terms of the GNU General Public License, version 2, | 
| 12 |     as published by the Free Software Foundation. | 
| 13 |  | 
| 14 |     PerconaFT is distributed in the hope that it will be useful, | 
| 15 |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 17 |     GNU General Public License for more details. | 
| 18 |  | 
| 19 |     You should have received a copy of the GNU General Public License | 
| 20 |     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>. | 
| 21 |  | 
| 22 | ---------------------------------------- | 
| 23 |  | 
| 24 |     PerconaFT is free software: you can redistribute it and/or modify | 
| 25 |     it under the terms of the GNU Affero General Public License, version 3, | 
| 26 |     as published by the Free Software Foundation. | 
| 27 |  | 
| 28 |     PerconaFT is distributed in the hope that it will be useful, | 
| 29 |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 30 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 31 |     GNU Affero General Public License for more details. | 
| 32 |  | 
| 33 |     You should have received a copy of the GNU Affero General Public License | 
| 34 |     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>. | 
| 35 | ======= */ | 
| 36 |  | 
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." | 
| 38 |  | 
| 39 | #pragma once | 
| 40 |  | 
| 41 | #include "portability/toku_stdint.h" | 
| 42 |  | 
| 43 | #include "ft/txn/txn_state.h" | 
| 44 | #include "ft/serialize/block_table.h" | 
| 45 | #include "ft/ft-status.h" | 
| 46 | #include "util/omt.h" | 
| 47 |  | 
| 48 | typedef uint64_t TXNID; | 
| 49 |  | 
| 50 | typedef struct tokutxn *TOKUTXN; | 
| 51 |  | 
| 52 | #define TXNID_NONE_LIVING ((TXNID)0) | 
| 53 | #define TXNID_NONE        ((TXNID)0) | 
| 54 | #define TXNID_MAX         ((TXNID)-1) | 
| 55 |  | 
| 56 | typedef struct txnid_pair_s { | 
| 57 |     TXNID parent_id64; | 
| 58 |     TXNID child_id64; | 
| 59 | } TXNID_PAIR; | 
| 60 |  | 
| 61 | static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; | 
| 62 |  | 
| 63 | // We include the child manager here beacuse it uses the TXNID / TOKUTXN types | 
| 64 | #include "ft/txn/txn_child_manager.h" | 
| 65 |  | 
| 66 | /* Log Sequence Number (LSN) | 
| 67 |  * Make the LSN be a struct instead of an integer so that we get better type checking. */ | 
| 68 | typedef struct __toku_lsn { uint64_t lsn; } LSN; | 
| 69 | static const LSN ZERO_LSN = { .lsn = 0 }; | 
| 70 | static const LSN MAX_LSN = { .lsn = UINT64_MAX }; | 
| 71 |  | 
| 72 | // | 
| 73 | // Types of snapshots that can be taken by a tokutxn | 
| 74 | //  - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. | 
| 75 | //                       used for SERIALIZABLE and READ UNCOMMITTED | 
| 76 | //  - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot | 
| 77 | //                       used for REPEATABLE READ | 
| 78 | //  - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot | 
| 79 | //                        used for READ COMMITTED | 
| 80 | // | 
| 81 |  | 
| 82 | typedef enum __TXN_SNAPSHOT_TYPE {  | 
| 83 |     TXN_SNAPSHOT_NONE=0, | 
| 84 |     TXN_SNAPSHOT_ROOT=1, | 
| 85 |     TXN_SNAPSHOT_CHILD=2, | 
| 86 |     TXN_COPIES_SNAPSHOT=3 | 
| 87 | } TXN_SNAPSHOT_TYPE; | 
| 88 |  | 
| 89 | typedef toku::omt<struct tokutxn *> txn_omt_t; | 
| 90 | typedef toku::omt<TXNID> xid_omt_t; | 
| 91 | typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx_omt_t; | 
| 92 |  | 
| 93 | inline bool txn_pair_is_none(TXNID_PAIR txnid) { | 
| 94 |     return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; | 
| 95 | } | 
| 96 |  | 
| 97 | struct tokulogger; | 
| 98 |  | 
| 99 | struct txn_roll_info { | 
| 100 |     // these are number of rollback nodes and rollback entries for this txn. | 
| 101 |     // | 
| 102 |     // the current rollback node below has sequence number num_rollback_nodes - 1 | 
| 103 |     // (because they are numbered 0...num-1). often, the current rollback is | 
| 104 |     // already set to this block num, which means it exists and is available to | 
| 105 |     // log some entries. if the current rollback is NONE and the number of | 
| 106 |     // rollback nodes for this transaction is non-zero, then we will use | 
| 107 |     // the number of rollback nodes to know which sequence number to assign | 
| 108 |     // to a new one we create | 
| 109 |     uint64_t num_rollback_nodes; | 
| 110 |     uint64_t num_rollentries; | 
| 111 |     uint64_t num_rollentries_processed; | 
| 112 |     uint64_t rollentry_raw_count;  // the total count of every byte in the transaction and all its children. | 
| 113 |  | 
| 114 |     // spilled rollback nodes are rollback nodes that were gorged by this | 
| 115 |     // transaction, retired, and saved in a list. | 
| 116 |  | 
| 117 |     // the spilled rollback head is the block number of the first rollback node | 
| 118 |     // that makes up the rollback log chain | 
| 119 |     BLOCKNUM spilled_rollback_head; | 
| 120 |  | 
| 121 |     // the spilled rollback is the block number of the last rollback node that | 
| 122 |     // makes up the rollback log chain.  | 
| 123 |     BLOCKNUM spilled_rollback_tail; | 
| 124 |  | 
| 125 |     // the current rollback node block number we may use. if this is ROLLBACK_NONE, | 
| 126 |     // then we need to create one and set it here before using it. | 
| 127 |     BLOCKNUM current_rollback;  | 
| 128 | }; | 
| 129 |  | 
| 130 | struct tokutxn { | 
| 131 |     // These don't change after create: | 
| 132 |  | 
| 133 |     TXNID_PAIR txnid; | 
| 134 |  | 
| 135 |     uint64_t snapshot_txnid64; // this is the lsn of the snapshot | 
| 136 |     const TXN_SNAPSHOT_TYPE snapshot_type; | 
| 137 |     const bool for_recovery; | 
| 138 |     struct tokulogger *const logger; | 
| 139 |     struct tokutxn *const parent; | 
| 140 |     // The child txn is protected by the child_txn_manager lock | 
| 141 |     // and by the user contract. The user contract states (and is | 
| 142 |     // enforced at the ydb layer) that a child txn should not be created | 
| 143 |     // while another child exists. The txn_child_manager will protect | 
| 144 |     // other threads from trying to read this value while another | 
| 145 |     // thread commits/aborts the child | 
| 146 |     struct tokutxn *child; | 
| 147 |  | 
| 148 |     // statically allocated child manager, if this  | 
| 149 |     // txn is a root txn, this manager will be used and set to  | 
| 150 |     // child_manager for this transaction and all of its children | 
| 151 |     txn_child_manager child_manager_s; | 
| 152 |  | 
| 153 |     // child manager for this transaction, all of its children, | 
| 154 |     // and all of its ancestors | 
| 155 |     txn_child_manager* child_manager; | 
| 156 |  | 
| 157 |     // These don't change but they're created in a way that's hard to make | 
| 158 |     // strictly const. | 
| 159 |     DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn | 
| 160 |     xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. | 
| 161 |     struct XIDS_S *xids; // Represents the xid list | 
| 162 |  | 
| 163 |     struct tokutxn *snapshot_next; | 
| 164 |     struct tokutxn *snapshot_prev; | 
| 165 |  | 
| 166 |     bool begin_was_logged; | 
| 167 |     bool declared_read_only; // true if the txn was declared read only when began | 
| 168 |  | 
| 169 |     // These are not read until a commit, prepare, or abort starts, and | 
| 170 |     // they're "monotonic" (only go false->true) during operation: | 
| 171 |     bool do_fsync; | 
| 172 |     bool force_fsync_on_commit;  //This transaction NEEDS an fsync once (if) it commits.  (commit means root txn) | 
| 173 |  | 
| 174 |     // Not used until commit, prepare, or abort starts: | 
| 175 |     LSN do_fsync_lsn; | 
| 176 |     TOKU_XA_XID xa_xid; // for prepared transactions | 
| 177 |     TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; | 
| 178 |     void *; | 
| 179 |  | 
| 180 |     toku_mutex_t txn_lock; | 
| 181 |     // Protected by the txn lock: | 
| 182 |     toku::omt<struct ft*> open_fts; // a collection of the fts that we touched.  Indexed by filenum. | 
| 183 |     struct txn_roll_info roll_info; // Info used to manage rollback entries | 
| 184 |  | 
| 185 |     // mutex that protects the transition of the state variable | 
| 186 |     // the rest of the variables are used by the txn code and  | 
| 187 |     // hot indexing to ensure that when hot indexing is processing a  | 
| 188 |     // leafentry, a TOKUTXN cannot dissappear or change state out from | 
| 189 |     // underneath it | 
| 190 |     toku_mutex_t state_lock; | 
| 191 |     toku_cond_t state_cond; | 
| 192 |     TOKUTXN_STATE state; | 
| 193 |     uint32_t num_pin; // number of threads (all hot indexes) that want this | 
| 194 |                       // txn to not transition to commit or abort | 
| 195 |     uint64_t client_id; | 
| 196 |     void *; | 
| 197 |     time_t start_time; | 
| 198 | }; | 
| 199 | typedef struct tokutxn *TOKUTXN; | 
| 200 |  | 
| 201 | void toku_txn_lock(struct tokutxn *txn); | 
| 202 | void toku_txn_unlock(struct tokutxn *txn); | 
| 203 |  | 
| 204 | uint64_t toku_txn_get_root_id(struct tokutxn *txn); | 
| 205 | bool txn_declared_read_only(struct tokutxn *txn); | 
| 206 |  | 
| 207 | int toku_txn_begin_txn ( | 
| 208 |     DB_TXN  *container_db_txn, | 
| 209 |     struct tokutxn *parent_tokutxn,  | 
| 210 |     struct tokutxn **tokutxn,  | 
| 211 |     struct tokulogger *logger, | 
| 212 |     TXN_SNAPSHOT_TYPE snapshot_type, | 
| 213 |     bool read_only | 
| 214 |     ); | 
| 215 |  | 
| 216 | DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn); | 
| 217 | void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn); | 
| 218 |  | 
| 219 | // toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN  | 
| 220 | int toku_txn_begin_with_xid ( | 
| 221 |     struct tokutxn *parent_tokutxn,  | 
| 222 |     struct tokutxn **tokutxn,  | 
| 223 |     struct tokulogger *logger,  | 
| 224 |     TXNID_PAIR xid,  | 
| 225 |     TXN_SNAPSHOT_TYPE snapshot_type, | 
| 226 |     DB_TXN *container_db_txn, | 
| 227 |     bool for_recovery, | 
| 228 |     bool read_only | 
| 229 |     ); | 
| 230 |  | 
| 231 | void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid); | 
| 232 |  | 
| 233 | int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info); | 
| 234 |  | 
| 235 | int toku_txn_commit_txn (struct tokutxn *txn, int nosync, | 
| 236 |                          TXN_PROGRESS_POLL_FUNCTION poll, void *); | 
| 237 | int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn, | 
| 238 |                              TXN_PROGRESS_POLL_FUNCTION poll, void *); | 
| 239 |  | 
| 240 | int toku_txn_abort_txn(struct tokutxn *txn, | 
| 241 |                        TXN_PROGRESS_POLL_FUNCTION poll, void *); | 
| 242 | int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, | 
| 243 |                             TXN_PROGRESS_POLL_FUNCTION poll, void *); | 
| 244 |  | 
| 245 | int toku_txn_discard_txn(struct tokutxn *txn); | 
| 246 |  | 
| 247 | void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid, int nosync); | 
| 248 | // Effect: Do the internal work of preparing a transaction (does not log the prepare record). | 
| 249 |  | 
| 250 | void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); | 
| 251 | // Effect: Fill in the XID information for a transaction.  The caller allocates the XID and the function fills in values. | 
| 252 |  | 
| 253 | void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync); | 
| 254 |  | 
| 255 | void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn); | 
| 256 |  | 
| 257 | // Complete and destroy a txn | 
| 258 | void toku_txn_close_txn(struct tokutxn *txn); | 
| 259 |  | 
| 260 | // Remove a txn from any live txn lists | 
| 261 | void toku_txn_complete_txn(struct tokutxn *txn); | 
| 262 |  | 
| 263 | // Free the memory of a txn | 
| 264 | void toku_txn_destroy_txn(struct tokutxn *txn); | 
| 265 |  | 
| 266 | struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn); | 
| 267 |  | 
| 268 | // Force fsync on commit | 
| 269 | void toku_txn_force_fsync_on_commit(struct tokutxn *txn); | 
| 270 |  | 
| 271 | void toku_txn_get_status(TXN_STATUS s); | 
| 272 |  | 
| 273 | bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); | 
| 274 |  | 
| 275 | TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn); | 
| 276 |  | 
| 277 | TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn); | 
| 278 |  | 
| 279 | struct tokulogger_preplist { | 
| 280 |     TOKU_XA_XID xid; | 
| 281 |     DB_TXN *txn; | 
| 282 | }; | 
| 283 | int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); | 
| 284 |  | 
| 285 | void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn); | 
| 286 |  | 
| 287 | // Return whether txn (or it's descendents) have done no work. | 
| 288 | bool toku_txn_is_read_only(struct tokutxn *txn); | 
| 289 |  | 
| 290 | void toku_txn_lock_state(struct tokutxn *txn); | 
| 291 | void toku_txn_unlock_state(struct tokutxn *txn); | 
| 292 | void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn); | 
| 293 | void toku_txn_unpin_live_txn(struct tokutxn *txn); | 
| 294 |  | 
| 295 | bool toku_txn_has_spilled_rollback(struct tokutxn *txn); | 
| 296 |  | 
| 297 | void toku_txn_get_client_id(struct tokutxn *txn, uint64_t *client_id, void **); | 
| 298 | void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id, void *); | 
| 299 |  | 
| 300 | time_t toku_txn_get_start_time(struct tokutxn *txn); | 
| 301 |  | 
| 302 | // | 
| 303 | // This function is used by the leafentry iterators. | 
| 304 | // returns TOKUDB_ACCEPT if live transaction context is allowed to read a value | 
| 305 | // that is written by transaction with LSN of id | 
| 306 | // live transaction context may read value if either id is the root ancestor of context, or if | 
| 307 | // id was committed before context's snapshot was taken. | 
| 308 | // For id to be committed before context's snapshot was taken, the following must be true: | 
| 309 | //  - id < context->snapshot_txnid64 AND id is not in context's live root transaction list | 
| 310 | // For the above to NOT be true: | 
| 311 | //  - id > context->snapshot_txnid64 OR id is in context's live root transaction list | 
| 312 | // | 
| 313 | int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn, bool is_provisional UU()); | 
| 314 |  | 
| 315 | // For serialize / deserialize | 
| 316 |  | 
| 317 | #include "ft/serialize/wbuf.h" | 
| 318 |  | 
| 319 | static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) { | 
| 320 |     wbuf_ulonglong(wb, txnid); | 
| 321 | } | 
| 322 |  | 
| 323 | static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) { | 
| 324 |     wbuf_nocrc_ulonglong(wb, txnid); | 
| 325 | } | 
| 326 |  | 
| 327 | static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) { | 
| 328 |     wbuf_nocrc_ulonglong(wb, txnid.parent_id64); | 
| 329 |     wbuf_nocrc_ulonglong(wb, txnid.child_id64); | 
| 330 | } | 
| 331 |  | 
| 332 | static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) { | 
| 333 |     wbuf_nocrc_ulonglong(wb, lsn.lsn); | 
| 334 | } | 
| 335 |  | 
| 336 | static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) { | 
| 337 |     wbuf_ulonglong(wb, lsn.lsn); | 
| 338 | } | 
| 339 |  | 
| 340 | #include "ft/serialize/rbuf.h" | 
| 341 |  | 
| 342 | static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) { | 
| 343 |     *txnid = rbuf_ulonglong(rb); | 
| 344 | } | 
| 345 |  | 
| 346 | static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) { | 
| 347 |     txnid->parent_id64 = rbuf_ulonglong(rb); | 
| 348 |     txnid->child_id64 = rbuf_ulonglong(rb); | 
| 349 | } | 
| 350 |  | 
| 351 | static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) { | 
| 352 |     rbuf_TXNID(rb, txnid); | 
| 353 | } | 
| 354 |  | 
| 355 | static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) { | 
| 356 |     rbuf_TXNID_PAIR(r, txnid); | 
| 357 | } | 
| 358 |  | 
| 359 | static inline LSN rbuf_LSN(struct rbuf *rb) { | 
| 360 |     LSN lsn = { .lsn = rbuf_ulonglong(rb) }; | 
| 361 |     return lsn; | 
| 362 | } | 
| 363 |  |