| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #pragma once |
| 40 | |
| 41 | #include <db.h> |
| 42 | #include <limits.h> |
| 43 | |
| 44 | #include <ft/cachetable/cachetable.h> |
| 45 | #include <ft/cursor.h> |
| 46 | #include <ft/comparator.h> |
| 47 | #include <ft/logger/logger.h> |
| 48 | #include <ft/txn/txn.h> |
| 49 | |
| 50 | #include <util/growable_array.h> |
| 51 | #include <util/minicron.h> |
| 52 | #include <util/omt.h> |
| 53 | |
| 54 | #include <locktree/locktree.h> |
| 55 | #include <locktree/range_buffer.h> |
| 56 | |
| 57 | #include <toku_list.h> |
| 58 | |
| 59 | struct __toku_db_internal { |
| 60 | int opened; |
| 61 | uint32_t open_flags; |
| 62 | int open_mode; |
| 63 | FT_HANDLE ft_handle; |
| 64 | DICTIONARY_ID dict_id; // unique identifier used by locktree logic |
| 65 | toku::locktree *lt; |
| 66 | struct simple_dbt skey, sval; // static key and value |
| 67 | bool key_compare_was_set; // true if a comparison function was provided before call to db->open() (if false, use environment's comparison function). |
| 68 | char *dname; // dname is constant for this handle (handle must be closed before file is renamed) |
| 69 | DB_INDEXER *indexer; |
| 70 | }; |
| 71 | |
| 72 | int toku_db_set_indexer(DB *db, DB_INDEXER *indexer); |
| 73 | DB_INDEXER *toku_db_get_indexer(DB *db); |
| 74 | |
| 75 | #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1 |
| 76 | typedef void (*toku_env_errcall_t)(const char *, char *); |
| 77 | #elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3 |
| 78 | typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *); |
| 79 | #else |
| 80 | #error |
| 81 | #endif |
| 82 | |
| 83 | struct __toku_db_env_internal { |
| 84 | int is_panicked; // if nonzero, then its an error number |
| 85 | char *panic_string; |
| 86 | uint32_t open_flags; |
| 87 | int open_mode; |
| 88 | toku_env_errcall_t errcall; |
| 89 | void *errfile; |
| 90 | const char *errpfx; |
| 91 | char *dir; /* A malloc'd copy of the directory. */ |
| 92 | char *tmp_dir; |
| 93 | char *lg_dir; |
| 94 | char *data_dir; |
| 95 | int (*bt_compare) (DB *, const DBT *, const DBT *); |
| 96 | int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *, void (*set_val)(const DBT *new_val, void *), void *); |
| 97 | generate_row_for_put_func generate_row_for_put; |
| 98 | generate_row_for_del_func generate_row_for_del; |
| 99 | |
| 100 | unsigned long cachetable_size; |
| 101 | unsigned long client_pool_threads; |
| 102 | unsigned long cachetable_pool_threads; |
| 103 | unsigned long checkpoint_pool_threads; |
| 104 | CACHETABLE cachetable; |
| 105 | TOKULOGGER logger; |
| 106 | toku::locktree_manager ltm; |
| 107 | lock_timeout_callback lock_wait_timeout_callback; // Called when a lock request times out waiting for a lock. |
| 108 | lock_wait_callback lock_wait_needed_callback; // Called when a lock request requires a wait. |
| 109 | |
| 110 | DB *directory; // Maps dnames to inames |
| 111 | DB *persistent_environment; // Stores environment settings, can be used for upgrade |
| 112 | toku::omt<DB *> *open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location) |
| 113 | toku::omt<DB *> *open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location) |
| 114 | toku_pthread_rwlock_t open_dbs_rwlock; // rwlock that protects the OMT of open dbs. |
| 115 | |
| 116 | char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /) |
| 117 | char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /) |
| 118 | char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /) |
| 119 | |
| 120 | fs_redzone_state fs_state; |
| 121 | uint64_t fs_seq; // how many times has fs_poller run? |
| 122 | uint64_t last_seq_entered_red; |
| 123 | uint64_t last_seq_entered_yellow; |
| 124 | int redzone; // percent of total fs space that marks boundary between yellow and red zones |
| 125 | int enospc_redzone_ctr; // number of operations rejected by enospc prevention (red zone) |
| 126 | int fs_poll_time; // Time in seconds between statfs calls |
| 127 | struct minicron fs_poller; // Poll the file systems |
| 128 | bool fs_poller_is_init; |
| 129 | uint32_t fsync_log_period_ms; |
| 130 | bool fsync_log_cron_is_init; |
| 131 | struct minicron fsync_log_cron; // fsync recovery log |
| 132 | int envdir_lockfd; |
| 133 | int datadir_lockfd; |
| 134 | int logdir_lockfd; |
| 135 | int tmpdir_lockfd; |
| 136 | bool check_thp; // if set check if transparent huge pages are disabled |
| 137 | bool dir_per_db; |
| 138 | uint64_t (*get_loader_memory_size_callback)(void); |
| 139 | uint64_t default_lock_timeout_msec; |
| 140 | uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec); |
| 141 | uint64_t default_killed_time_msec; |
| 142 | uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec); |
| 143 | int (*killed_callback)(void); |
| 144 | }; |
| 145 | |
| 146 | // test-only environment function for running lock escalation |
| 147 | static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) { |
| 148 | toku::locktree_manager *mgr = &env->i->ltm; |
| 149 | mgr->run_escalation_for_test(); |
| 150 | } |
| 151 | |
| 152 | // Common error handling macros and panic detection |
| 153 | #define MAYBE_RETURN_ERROR(cond, status) if (cond) return status; |
| 154 | #define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; } |
| 155 | #define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv) |
| 156 | |
| 157 | // Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists. |
| 158 | #define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \ |
| 159 | MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \ |
| 160 | toku_ydb_do_error((env), \ |
| 161 | EINVAL, \ |
| 162 | "%s: Transaction cannot do work when child exists\n", __FUNCTION__)) |
| 163 | |
| 164 | #define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \ |
| 165 | HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn) |
| 166 | |
| 167 | #define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c) \ |
| 168 | HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn) |
| 169 | |
| 170 | // Bail out if we get unknown flags |
| 171 | #define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \ |
| 172 | MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \ |
| 173 | toku_ydb_do_error((env), \ |
| 174 | EINVAL, \ |
| 175 | "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__)) |
| 176 | |
| 177 | int toku_ydb_check_avail_fs_space(DB_ENV *env); |
| 178 | |
| 179 | void toku_ydb_error_all_cases(const DB_ENV * env, |
| 180 | int error, |
| 181 | bool include_stderrstring, |
| 182 | bool use_stderr_if_nothing_else, |
| 183 | const char *fmt, va_list ap) |
| 184 | __attribute__((format (printf, 5, 0))) |
| 185 | __attribute__((__visibility__("default" ))); // this is needed by the C++ interface. |
| 186 | |
| 187 | int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...) |
| 188 | __attribute__((__format__(__printf__, 3, 4))); |
| 189 | |
| 190 | /* Environment related errors */ |
| 191 | int toku_env_is_panicked(DB_ENV *dbenv); |
| 192 | void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...) |
| 193 | __attribute__((__format__(__printf__, 3, 4))); |
| 194 | |
| 195 | typedef enum __toku_isolation_level { |
| 196 | TOKU_ISO_SERIALIZABLE=0, |
| 197 | TOKU_ISO_SNAPSHOT=1, |
| 198 | TOKU_ISO_READ_COMMITTED=2, |
| 199 | TOKU_ISO_READ_UNCOMMITTED=3, |
| 200 | TOKU_ISO_READ_COMMITTED_ALWAYS=4 |
| 201 | } TOKU_ISOLATION; |
| 202 | |
| 203 | // needed in ydb_db.c |
| 204 | #define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION) |
| 205 | |
| 206 | struct txn_lock_range { |
| 207 | DBT left; |
| 208 | DBT right; |
| 209 | }; |
| 210 | |
| 211 | struct txn_lt_key_ranges { |
| 212 | toku::locktree *lt; |
| 213 | toku::range_buffer *buffer; |
| 214 | }; |
| 215 | |
| 216 | struct __toku_db_txn_internal { |
| 217 | struct tokutxn *tokutxn; |
| 218 | uint32_t flags; |
| 219 | TOKU_ISOLATION iso; |
| 220 | DB_TXN *child; |
| 221 | toku_mutex_t txn_mutex; |
| 222 | |
| 223 | // maps a locktree to a buffer of key ranges that are locked. |
| 224 | // it is protected by the txn_mutex, so hot indexing and a client |
| 225 | // thread can concurrently operate on this txn. |
| 226 | toku::omt<txn_lt_key_ranges> lt_map; |
| 227 | }; |
| 228 | |
| 229 | struct __toku_db_txn_external { |
| 230 | struct __toku_db_txn external_part; |
| 231 | struct __toku_db_txn_internal internal_part; |
| 232 | }; |
| 233 | #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part) |
| 234 | |
| 235 | struct __toku_dbc_internal { |
| 236 | struct ft_cursor ftcursor; |
| 237 | DB_TXN *txn; |
| 238 | TOKU_ISOLATION iso; |
| 239 | struct simple_dbt skey_s,sval_s; |
| 240 | struct simple_dbt *skey,*sval; |
| 241 | |
| 242 | // if the rmw flag is asserted, cursor operations (like set) grab write |
| 243 | // locks instead of read locks |
| 244 | // the rmw flag is set when the cursor is created with the DB_RMW flag set |
| 245 | bool rmw; |
| 246 | bool locking_read; |
| 247 | }; |
| 248 | |
| 249 | static_assert( |
| 250 | sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal), |
| 251 | "__toku_dbc_internal doesn't fit in the internal portion of a DBC" ); |
| 252 | |
| 253 | static inline __toku_dbc_internal *dbc_struct_i(DBC *c) { |
| 254 | union dbc_union { |
| 255 | __toku_dbc_internal *dbc_internal; |
| 256 | char *buf; |
| 257 | } u; |
| 258 | u.buf = c->_internal; |
| 259 | return u.dbc_internal; |
| 260 | } |
| 261 | |
| 262 | static inline struct ft_cursor *dbc_ftcursor(DBC *c) { |
| 263 | return &dbc_struct_i(c)->ftcursor; |
| 264 | } |
| 265 | |
| 266 | static inline int |
| 267 | env_opened(DB_ENV *env) { |
| 268 | return env->i->cachetable != 0; |
| 269 | } |
| 270 | |
| 271 | static inline bool |
| 272 | txn_is_read_only(DB_TXN* txn) { |
| 273 | if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) { |
| 274 | return true; |
| 275 | } |
| 276 | return false; |
| 277 | } |
| 278 | |
| 279 | #define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL; |
| 280 | |
| 281 | void env_panic(DB_ENV * env, int cause, const char * msg); |
| 282 | void env_note_db_opened(DB_ENV *env, DB *db); |
| 283 | void env_note_db_closed(DB_ENV *env, DB *db); |
| 284 | |