1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #pragma once |
40 | |
41 | #include "portability/toku_stdint.h" |
42 | |
43 | #include "ft/txn/txn_state.h" |
44 | #include "ft/serialize/block_table.h" |
45 | #include "ft/ft-status.h" |
46 | #include "util/omt.h" |
47 | |
48 | typedef uint64_t TXNID; |
49 | |
50 | typedef struct tokutxn *TOKUTXN; |
51 | |
52 | #define TXNID_NONE_LIVING ((TXNID)0) |
53 | #define TXNID_NONE ((TXNID)0) |
54 | #define TXNID_MAX ((TXNID)-1) |
55 | |
56 | typedef struct txnid_pair_s { |
57 | TXNID parent_id64; |
58 | TXNID child_id64; |
59 | } TXNID_PAIR; |
60 | |
61 | static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; |
62 | |
63 | // We include the child manager here beacuse it uses the TXNID / TOKUTXN types |
64 | #include "ft/txn/txn_child_manager.h" |
65 | |
66 | /* Log Sequence Number (LSN) |
67 | * Make the LSN be a struct instead of an integer so that we get better type checking. */ |
68 | typedef struct __toku_lsn { uint64_t lsn; } LSN; |
69 | static const LSN ZERO_LSN = { .lsn = 0 }; |
70 | static const LSN MAX_LSN = { .lsn = UINT64_MAX }; |
71 | |
72 | // |
73 | // Types of snapshots that can be taken by a tokutxn |
74 | // - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. |
75 | // used for SERIALIZABLE and READ UNCOMMITTED |
76 | // - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot |
77 | // used for REPEATABLE READ |
78 | // - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot |
79 | // used for READ COMMITTED |
80 | // |
81 | |
82 | typedef enum __TXN_SNAPSHOT_TYPE { |
83 | TXN_SNAPSHOT_NONE=0, |
84 | TXN_SNAPSHOT_ROOT=1, |
85 | TXN_SNAPSHOT_CHILD=2, |
86 | TXN_COPIES_SNAPSHOT=3 |
87 | } TXN_SNAPSHOT_TYPE; |
88 | |
89 | typedef toku::omt<struct tokutxn *> txn_omt_t; |
90 | typedef toku::omt<TXNID> xid_omt_t; |
91 | typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx_omt_t; |
92 | |
93 | inline bool txn_pair_is_none(TXNID_PAIR txnid) { |
94 | return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; |
95 | } |
96 | |
97 | struct tokulogger; |
98 | |
99 | struct txn_roll_info { |
100 | // these are number of rollback nodes and rollback entries for this txn. |
101 | // |
102 | // the current rollback node below has sequence number num_rollback_nodes - 1 |
103 | // (because they are numbered 0...num-1). often, the current rollback is |
104 | // already set to this block num, which means it exists and is available to |
105 | // log some entries. if the current rollback is NONE and the number of |
106 | // rollback nodes for this transaction is non-zero, then we will use |
107 | // the number of rollback nodes to know which sequence number to assign |
108 | // to a new one we create |
109 | uint64_t num_rollback_nodes; |
110 | uint64_t num_rollentries; |
111 | uint64_t num_rollentries_processed; |
112 | uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. |
113 | |
114 | // spilled rollback nodes are rollback nodes that were gorged by this |
115 | // transaction, retired, and saved in a list. |
116 | |
117 | // the spilled rollback head is the block number of the first rollback node |
118 | // that makes up the rollback log chain |
119 | BLOCKNUM spilled_rollback_head; |
120 | |
121 | // the spilled rollback is the block number of the last rollback node that |
122 | // makes up the rollback log chain. |
123 | BLOCKNUM spilled_rollback_tail; |
124 | |
125 | // the current rollback node block number we may use. if this is ROLLBACK_NONE, |
126 | // then we need to create one and set it here before using it. |
127 | BLOCKNUM current_rollback; |
128 | }; |
129 | |
130 | struct tokutxn { |
131 | // These don't change after create: |
132 | |
133 | TXNID_PAIR txnid; |
134 | |
135 | uint64_t snapshot_txnid64; // this is the lsn of the snapshot |
136 | const TXN_SNAPSHOT_TYPE snapshot_type; |
137 | const bool for_recovery; |
138 | struct tokulogger *const logger; |
139 | struct tokutxn *const parent; |
140 | // The child txn is protected by the child_txn_manager lock |
141 | // and by the user contract. The user contract states (and is |
142 | // enforced at the ydb layer) that a child txn should not be created |
143 | // while another child exists. The txn_child_manager will protect |
144 | // other threads from trying to read this value while another |
145 | // thread commits/aborts the child |
146 | struct tokutxn *child; |
147 | |
148 | // statically allocated child manager, if this |
149 | // txn is a root txn, this manager will be used and set to |
150 | // child_manager for this transaction and all of its children |
151 | txn_child_manager child_manager_s; |
152 | |
153 | // child manager for this transaction, all of its children, |
154 | // and all of its ancestors |
155 | txn_child_manager* child_manager; |
156 | |
157 | // These don't change but they're created in a way that's hard to make |
158 | // strictly const. |
159 | DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn |
160 | xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. |
161 | struct XIDS_S *xids; // Represents the xid list |
162 | |
163 | struct tokutxn *snapshot_next; |
164 | struct tokutxn *snapshot_prev; |
165 | |
166 | bool begin_was_logged; |
167 | bool declared_read_only; // true if the txn was declared read only when began |
168 | |
169 | // These are not read until a commit, prepare, or abort starts, and |
170 | // they're "monotonic" (only go false->true) during operation: |
171 | bool do_fsync; |
172 | bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) |
173 | |
174 | // Not used until commit, prepare, or abort starts: |
175 | LSN do_fsync_lsn; |
176 | TOKU_XA_XID xa_xid; // for prepared transactions |
177 | TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; |
178 | void *; |
179 | |
180 | toku_mutex_t txn_lock; |
181 | // Protected by the txn lock: |
182 | toku::omt<struct ft*> open_fts; // a collection of the fts that we touched. Indexed by filenum. |
183 | struct txn_roll_info roll_info; // Info used to manage rollback entries |
184 | |
185 | // mutex that protects the transition of the state variable |
186 | // the rest of the variables are used by the txn code and |
187 | // hot indexing to ensure that when hot indexing is processing a |
188 | // leafentry, a TOKUTXN cannot dissappear or change state out from |
189 | // underneath it |
190 | toku_mutex_t state_lock; |
191 | toku_cond_t state_cond; |
192 | TOKUTXN_STATE state; |
193 | uint32_t num_pin; // number of threads (all hot indexes) that want this |
194 | // txn to not transition to commit or abort |
195 | uint64_t client_id; |
196 | void *; |
197 | time_t start_time; |
198 | }; |
199 | typedef struct tokutxn *TOKUTXN; |
200 | |
201 | void toku_txn_lock(struct tokutxn *txn); |
202 | void toku_txn_unlock(struct tokutxn *txn); |
203 | |
204 | uint64_t toku_txn_get_root_id(struct tokutxn *txn); |
205 | bool txn_declared_read_only(struct tokutxn *txn); |
206 | |
207 | int toku_txn_begin_txn ( |
208 | DB_TXN *container_db_txn, |
209 | struct tokutxn *parent_tokutxn, |
210 | struct tokutxn **tokutxn, |
211 | struct tokulogger *logger, |
212 | TXN_SNAPSHOT_TYPE snapshot_type, |
213 | bool read_only |
214 | ); |
215 | |
216 | DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn); |
217 | void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn); |
218 | |
219 | // toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN |
220 | int toku_txn_begin_with_xid ( |
221 | struct tokutxn *parent_tokutxn, |
222 | struct tokutxn **tokutxn, |
223 | struct tokulogger *logger, |
224 | TXNID_PAIR xid, |
225 | TXN_SNAPSHOT_TYPE snapshot_type, |
226 | DB_TXN *container_db_txn, |
227 | bool for_recovery, |
228 | bool read_only |
229 | ); |
230 | |
231 | void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid); |
232 | |
233 | int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info); |
234 | |
235 | int toku_txn_commit_txn (struct tokutxn *txn, int nosync, |
236 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
237 | int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn, |
238 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
239 | |
240 | int toku_txn_abort_txn(struct tokutxn *txn, |
241 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
242 | int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, |
243 | TXN_PROGRESS_POLL_FUNCTION poll, void *); |
244 | |
245 | int toku_txn_discard_txn(struct tokutxn *txn); |
246 | |
247 | void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid, int nosync); |
248 | // Effect: Do the internal work of preparing a transaction (does not log the prepare record). |
249 | |
250 | void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); |
251 | // Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. |
252 | |
253 | void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync); |
254 | |
255 | void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn); |
256 | |
257 | // Complete and destroy a txn |
258 | void toku_txn_close_txn(struct tokutxn *txn); |
259 | |
260 | // Remove a txn from any live txn lists |
261 | void toku_txn_complete_txn(struct tokutxn *txn); |
262 | |
263 | // Free the memory of a txn |
264 | void toku_txn_destroy_txn(struct tokutxn *txn); |
265 | |
266 | struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn); |
267 | |
268 | // Force fsync on commit |
269 | void toku_txn_force_fsync_on_commit(struct tokutxn *txn); |
270 | |
271 | void toku_txn_get_status(TXN_STATUS s); |
272 | |
273 | bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); |
274 | |
275 | TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn); |
276 | |
277 | TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn); |
278 | |
279 | struct tokulogger_preplist { |
280 | TOKU_XA_XID xid; |
281 | DB_TXN *txn; |
282 | }; |
283 | int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); |
284 | |
285 | void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn); |
286 | |
287 | // Return whether txn (or it's descendents) have done no work. |
288 | bool toku_txn_is_read_only(struct tokutxn *txn); |
289 | |
290 | void toku_txn_lock_state(struct tokutxn *txn); |
291 | void toku_txn_unlock_state(struct tokutxn *txn); |
292 | void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn); |
293 | void toku_txn_unpin_live_txn(struct tokutxn *txn); |
294 | |
295 | bool toku_txn_has_spilled_rollback(struct tokutxn *txn); |
296 | |
297 | void toku_txn_get_client_id(struct tokutxn *txn, uint64_t *client_id, void **); |
298 | void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id, void *); |
299 | |
300 | time_t toku_txn_get_start_time(struct tokutxn *txn); |
301 | |
302 | // |
303 | // This function is used by the leafentry iterators. |
304 | // returns TOKUDB_ACCEPT if live transaction context is allowed to read a value |
305 | // that is written by transaction with LSN of id |
306 | // live transaction context may read value if either id is the root ancestor of context, or if |
307 | // id was committed before context's snapshot was taken. |
308 | // For id to be committed before context's snapshot was taken, the following must be true: |
309 | // - id < context->snapshot_txnid64 AND id is not in context's live root transaction list |
310 | // For the above to NOT be true: |
311 | // - id > context->snapshot_txnid64 OR id is in context's live root transaction list |
312 | // |
313 | int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn, bool is_provisional UU()); |
314 | |
315 | // For serialize / deserialize |
316 | |
317 | #include "ft/serialize/wbuf.h" |
318 | |
319 | static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) { |
320 | wbuf_ulonglong(wb, txnid); |
321 | } |
322 | |
323 | static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) { |
324 | wbuf_nocrc_ulonglong(wb, txnid); |
325 | } |
326 | |
327 | static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) { |
328 | wbuf_nocrc_ulonglong(wb, txnid.parent_id64); |
329 | wbuf_nocrc_ulonglong(wb, txnid.child_id64); |
330 | } |
331 | |
332 | static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) { |
333 | wbuf_nocrc_ulonglong(wb, lsn.lsn); |
334 | } |
335 | |
336 | static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) { |
337 | wbuf_ulonglong(wb, lsn.lsn); |
338 | } |
339 | |
340 | #include "ft/serialize/rbuf.h" |
341 | |
342 | static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) { |
343 | *txnid = rbuf_ulonglong(rb); |
344 | } |
345 | |
346 | static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) { |
347 | txnid->parent_id64 = rbuf_ulonglong(rb); |
348 | txnid->child_id64 = rbuf_ulonglong(rb); |
349 | } |
350 | |
351 | static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) { |
352 | rbuf_TXNID(rb, txnid); |
353 | } |
354 | |
355 | static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) { |
356 | rbuf_TXNID_PAIR(r, txnid); |
357 | } |
358 | |
359 | static inline LSN rbuf_LSN(struct rbuf *rb) { |
360 | LSN lsn = { .lsn = rbuf_ulonglong(rb) }; |
361 | return lsn; |
362 | } |
363 | |