1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #pragma once |
40 | |
41 | #include <db.h> |
42 | #include <limits.h> |
43 | |
44 | #include <ft/cachetable/cachetable.h> |
45 | #include <ft/cursor.h> |
46 | #include <ft/comparator.h> |
47 | #include <ft/logger/logger.h> |
48 | #include <ft/txn/txn.h> |
49 | |
50 | #include <util/growable_array.h> |
51 | #include <util/minicron.h> |
52 | #include <util/omt.h> |
53 | |
54 | #include <locktree/locktree.h> |
55 | #include <locktree/range_buffer.h> |
56 | |
57 | #include <toku_list.h> |
58 | |
59 | struct __toku_db_internal { |
60 | int opened; |
61 | uint32_t open_flags; |
62 | int open_mode; |
63 | FT_HANDLE ft_handle; |
64 | DICTIONARY_ID dict_id; // unique identifier used by locktree logic |
65 | toku::locktree *lt; |
66 | struct simple_dbt skey, sval; // static key and value |
67 | bool key_compare_was_set; // true if a comparison function was provided before call to db->open() (if false, use environment's comparison function). |
68 | char *dname; // dname is constant for this handle (handle must be closed before file is renamed) |
69 | DB_INDEXER *indexer; |
70 | }; |
71 | |
72 | int toku_db_set_indexer(DB *db, DB_INDEXER *indexer); |
73 | DB_INDEXER *toku_db_get_indexer(DB *db); |
74 | |
75 | #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1 |
76 | typedef void (*toku_env_errcall_t)(const char *, char *); |
77 | #elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3 |
78 | typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *); |
79 | #else |
80 | #error |
81 | #endif |
82 | |
83 | struct __toku_db_env_internal { |
84 | int is_panicked; // if nonzero, then its an error number |
85 | char *panic_string; |
86 | uint32_t open_flags; |
87 | int open_mode; |
88 | toku_env_errcall_t errcall; |
89 | void *errfile; |
90 | const char *errpfx; |
91 | char *dir; /* A malloc'd copy of the directory. */ |
92 | char *tmp_dir; |
93 | char *lg_dir; |
94 | char *data_dir; |
95 | int (*bt_compare) (DB *, const DBT *, const DBT *); |
96 | int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *, void (*set_val)(const DBT *new_val, void *), void *); |
97 | generate_row_for_put_func generate_row_for_put; |
98 | generate_row_for_del_func generate_row_for_del; |
99 | |
100 | unsigned long cachetable_size; |
101 | unsigned long client_pool_threads; |
102 | unsigned long cachetable_pool_threads; |
103 | unsigned long checkpoint_pool_threads; |
104 | CACHETABLE cachetable; |
105 | TOKULOGGER logger; |
106 | toku::locktree_manager ltm; |
107 | lock_timeout_callback lock_wait_timeout_callback; // Called when a lock request times out waiting for a lock. |
108 | lock_wait_callback lock_wait_needed_callback; // Called when a lock request requires a wait. |
109 | |
110 | DB *directory; // Maps dnames to inames |
111 | DB *persistent_environment; // Stores environment settings, can be used for upgrade |
112 | toku::omt<DB *> *open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location) |
113 | toku::omt<DB *> *open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location) |
114 | toku_pthread_rwlock_t open_dbs_rwlock; // rwlock that protects the OMT of open dbs. |
115 | |
116 | char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /) |
117 | char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /) |
118 | char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /) |
119 | |
120 | fs_redzone_state fs_state; |
121 | uint64_t fs_seq; // how many times has fs_poller run? |
122 | uint64_t last_seq_entered_red; |
123 | uint64_t last_seq_entered_yellow; |
124 | int redzone; // percent of total fs space that marks boundary between yellow and red zones |
125 | int enospc_redzone_ctr; // number of operations rejected by enospc prevention (red zone) |
126 | int fs_poll_time; // Time in seconds between statfs calls |
127 | struct minicron fs_poller; // Poll the file systems |
128 | bool fs_poller_is_init; |
129 | uint32_t fsync_log_period_ms; |
130 | bool fsync_log_cron_is_init; |
131 | struct minicron fsync_log_cron; // fsync recovery log |
132 | int envdir_lockfd; |
133 | int datadir_lockfd; |
134 | int logdir_lockfd; |
135 | int tmpdir_lockfd; |
136 | bool check_thp; // if set check if transparent huge pages are disabled |
137 | bool dir_per_db; |
138 | uint64_t (*get_loader_memory_size_callback)(void); |
139 | uint64_t default_lock_timeout_msec; |
140 | uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec); |
141 | uint64_t default_killed_time_msec; |
142 | uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec); |
143 | int (*killed_callback)(void); |
144 | }; |
145 | |
146 | // test-only environment function for running lock escalation |
147 | static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) { |
148 | toku::locktree_manager *mgr = &env->i->ltm; |
149 | mgr->run_escalation_for_test(); |
150 | } |
151 | |
152 | // Common error handling macros and panic detection |
153 | #define MAYBE_RETURN_ERROR(cond, status) if (cond) return status; |
154 | #define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; } |
155 | #define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv) |
156 | |
157 | // Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists. |
158 | #define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \ |
159 | MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \ |
160 | toku_ydb_do_error((env), \ |
161 | EINVAL, \ |
162 | "%s: Transaction cannot do work when child exists\n", __FUNCTION__)) |
163 | |
164 | #define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \ |
165 | HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn) |
166 | |
167 | #define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c) \ |
168 | HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn) |
169 | |
170 | // Bail out if we get unknown flags |
171 | #define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \ |
172 | MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \ |
173 | toku_ydb_do_error((env), \ |
174 | EINVAL, \ |
175 | "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__)) |
176 | |
177 | int toku_ydb_check_avail_fs_space(DB_ENV *env); |
178 | |
179 | void toku_ydb_error_all_cases(const DB_ENV * env, |
180 | int error, |
181 | bool include_stderrstring, |
182 | bool use_stderr_if_nothing_else, |
183 | const char *fmt, va_list ap) |
184 | __attribute__((format (printf, 5, 0))) |
185 | __attribute__((__visibility__("default" ))); // this is needed by the C++ interface. |
186 | |
187 | int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...) |
188 | __attribute__((__format__(__printf__, 3, 4))); |
189 | |
190 | /* Environment related errors */ |
191 | int toku_env_is_panicked(DB_ENV *dbenv); |
192 | void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...) |
193 | __attribute__((__format__(__printf__, 3, 4))); |
194 | |
195 | typedef enum __toku_isolation_level { |
196 | TOKU_ISO_SERIALIZABLE=0, |
197 | TOKU_ISO_SNAPSHOT=1, |
198 | TOKU_ISO_READ_COMMITTED=2, |
199 | TOKU_ISO_READ_UNCOMMITTED=3, |
200 | TOKU_ISO_READ_COMMITTED_ALWAYS=4 |
201 | } TOKU_ISOLATION; |
202 | |
203 | // needed in ydb_db.c |
204 | #define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION) |
205 | |
206 | struct txn_lock_range { |
207 | DBT left; |
208 | DBT right; |
209 | }; |
210 | |
211 | struct txn_lt_key_ranges { |
212 | toku::locktree *lt; |
213 | toku::range_buffer *buffer; |
214 | }; |
215 | |
216 | struct __toku_db_txn_internal { |
217 | struct tokutxn *tokutxn; |
218 | uint32_t flags; |
219 | TOKU_ISOLATION iso; |
220 | DB_TXN *child; |
221 | toku_mutex_t txn_mutex; |
222 | |
223 | // maps a locktree to a buffer of key ranges that are locked. |
224 | // it is protected by the txn_mutex, so hot indexing and a client |
225 | // thread can concurrently operate on this txn. |
226 | toku::omt<txn_lt_key_ranges> lt_map; |
227 | }; |
228 | |
229 | struct __toku_db_txn_external { |
230 | struct __toku_db_txn external_part; |
231 | struct __toku_db_txn_internal internal_part; |
232 | }; |
233 | #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part) |
234 | |
235 | struct __toku_dbc_internal { |
236 | struct ft_cursor ftcursor; |
237 | DB_TXN *txn; |
238 | TOKU_ISOLATION iso; |
239 | struct simple_dbt skey_s,sval_s; |
240 | struct simple_dbt *skey,*sval; |
241 | |
242 | // if the rmw flag is asserted, cursor operations (like set) grab write |
243 | // locks instead of read locks |
244 | // the rmw flag is set when the cursor is created with the DB_RMW flag set |
245 | bool rmw; |
246 | bool locking_read; |
247 | }; |
248 | |
249 | static_assert( |
250 | sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal), |
251 | "__toku_dbc_internal doesn't fit in the internal portion of a DBC" ); |
252 | |
253 | static inline __toku_dbc_internal *dbc_struct_i(DBC *c) { |
254 | union dbc_union { |
255 | __toku_dbc_internal *dbc_internal; |
256 | char *buf; |
257 | } u; |
258 | u.buf = c->_internal; |
259 | return u.dbc_internal; |
260 | } |
261 | |
262 | static inline struct ft_cursor *dbc_ftcursor(DBC *c) { |
263 | return &dbc_struct_i(c)->ftcursor; |
264 | } |
265 | |
266 | static inline int |
267 | env_opened(DB_ENV *env) { |
268 | return env->i->cachetable != 0; |
269 | } |
270 | |
271 | static inline bool |
272 | txn_is_read_only(DB_TXN* txn) { |
273 | if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) { |
274 | return true; |
275 | } |
276 | return false; |
277 | } |
278 | |
279 | #define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL; |
280 | |
281 | void env_panic(DB_ENV * env, int cause, const char * msg); |
282 | void env_note_db_opened(DB_ENV *env, DB *db); |
283 | void env_note_db_closed(DB_ENV *env, DB *db); |
284 | |