1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#pragma once
40
41#include <db.h>
42#include <limits.h>
43
44#include <ft/cachetable/cachetable.h>
45#include <ft/cursor.h>
46#include <ft/comparator.h>
47#include <ft/logger/logger.h>
48#include <ft/txn/txn.h>
49
50#include <util/growable_array.h>
51#include <util/minicron.h>
52#include <util/omt.h>
53
54#include <locktree/locktree.h>
55#include <locktree/range_buffer.h>
56
57#include <toku_list.h>
58
59struct __toku_db_internal {
60 int opened;
61 uint32_t open_flags;
62 int open_mode;
63 FT_HANDLE ft_handle;
64 DICTIONARY_ID dict_id; // unique identifier used by locktree logic
65 toku::locktree *lt;
66 struct simple_dbt skey, sval; // static key and value
67 bool key_compare_was_set; // true if a comparison function was provided before call to db->open() (if false, use environment's comparison function).
68 char *dname; // dname is constant for this handle (handle must be closed before file is renamed)
69 DB_INDEXER *indexer;
70};
71
72int toku_db_set_indexer(DB *db, DB_INDEXER *indexer);
73DB_INDEXER *toku_db_get_indexer(DB *db);
74
75#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1
76typedef void (*toku_env_errcall_t)(const char *, char *);
77#elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
78typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *);
79#else
80#error
81#endif
82
83struct __toku_db_env_internal {
84 int is_panicked; // if nonzero, then its an error number
85 char *panic_string;
86 uint32_t open_flags;
87 int open_mode;
88 toku_env_errcall_t errcall;
89 void *errfile;
90 const char *errpfx;
91 char *dir; /* A malloc'd copy of the directory. */
92 char *tmp_dir;
93 char *lg_dir;
94 char *data_dir;
95 int (*bt_compare) (DB *, const DBT *, const DBT *);
96 int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
97 generate_row_for_put_func generate_row_for_put;
98 generate_row_for_del_func generate_row_for_del;
99
100 unsigned long cachetable_size;
101 unsigned long client_pool_threads;
102 unsigned long cachetable_pool_threads;
103 unsigned long checkpoint_pool_threads;
104 CACHETABLE cachetable;
105 TOKULOGGER logger;
106 toku::locktree_manager ltm;
107 lock_timeout_callback lock_wait_timeout_callback; // Called when a lock request times out waiting for a lock.
108 lock_wait_callback lock_wait_needed_callback; // Called when a lock request requires a wait.
109
110 DB *directory; // Maps dnames to inames
111 DB *persistent_environment; // Stores environment settings, can be used for upgrade
112 toku::omt<DB *> *open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
113 toku::omt<DB *> *open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location)
114 toku_pthread_rwlock_t open_dbs_rwlock; // rwlock that protects the OMT of open dbs.
115
116 char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /)
117 char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /)
118 char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
119
120 fs_redzone_state fs_state;
121 uint64_t fs_seq; // how many times has fs_poller run?
122 uint64_t last_seq_entered_red;
123 uint64_t last_seq_entered_yellow;
124 int redzone; // percent of total fs space that marks boundary between yellow and red zones
125 int enospc_redzone_ctr; // number of operations rejected by enospc prevention (red zone)
126 int fs_poll_time; // Time in seconds between statfs calls
127 struct minicron fs_poller; // Poll the file systems
128 bool fs_poller_is_init;
129 uint32_t fsync_log_period_ms;
130 bool fsync_log_cron_is_init;
131 struct minicron fsync_log_cron; // fsync recovery log
132 int envdir_lockfd;
133 int datadir_lockfd;
134 int logdir_lockfd;
135 int tmpdir_lockfd;
136 bool check_thp; // if set check if transparent huge pages are disabled
137 bool dir_per_db;
138 uint64_t (*get_loader_memory_size_callback)(void);
139 uint64_t default_lock_timeout_msec;
140 uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
141 uint64_t default_killed_time_msec;
142 uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec);
143 int (*killed_callback)(void);
144};
145
146// test-only environment function for running lock escalation
147static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) {
148 toku::locktree_manager *mgr = &env->i->ltm;
149 mgr->run_escalation_for_test();
150}
151
152// Common error handling macros and panic detection
153#define MAYBE_RETURN_ERROR(cond, status) if (cond) return status;
154#define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; }
155#define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv)
156
157// Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists.
158#define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \
159 MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \
160 toku_ydb_do_error((env), \
161 EINVAL, \
162 "%s: Transaction cannot do work when child exists\n", __FUNCTION__))
163
164#define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \
165 HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn)
166
167#define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c) \
168 HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn)
169
170// Bail out if we get unknown flags
171#define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \
172 MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \
173 toku_ydb_do_error((env), \
174 EINVAL, \
175 "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__))
176
177int toku_ydb_check_avail_fs_space(DB_ENV *env);
178
179void toku_ydb_error_all_cases(const DB_ENV * env,
180 int error,
181 bool include_stderrstring,
182 bool use_stderr_if_nothing_else,
183 const char *fmt, va_list ap)
184 __attribute__((format (printf, 5, 0)))
185 __attribute__((__visibility__("default"))); // this is needed by the C++ interface.
186
187int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...)
188 __attribute__((__format__(__printf__, 3, 4)));
189
190/* Environment related errors */
191int toku_env_is_panicked(DB_ENV *dbenv);
192void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...)
193 __attribute__((__format__(__printf__, 3, 4)));
194
195typedef enum __toku_isolation_level {
196 TOKU_ISO_SERIALIZABLE=0,
197 TOKU_ISO_SNAPSHOT=1,
198 TOKU_ISO_READ_COMMITTED=2,
199 TOKU_ISO_READ_UNCOMMITTED=3,
200 TOKU_ISO_READ_COMMITTED_ALWAYS=4
201} TOKU_ISOLATION;
202
203// needed in ydb_db.c
204#define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION)
205
206struct txn_lock_range {
207 DBT left;
208 DBT right;
209};
210
211struct txn_lt_key_ranges {
212 toku::locktree *lt;
213 toku::range_buffer *buffer;
214};
215
216struct __toku_db_txn_internal {
217 struct tokutxn *tokutxn;
218 uint32_t flags;
219 TOKU_ISOLATION iso;
220 DB_TXN *child;
221 toku_mutex_t txn_mutex;
222
223 // maps a locktree to a buffer of key ranges that are locked.
224 // it is protected by the txn_mutex, so hot indexing and a client
225 // thread can concurrently operate on this txn.
226 toku::omt<txn_lt_key_ranges> lt_map;
227};
228
229struct __toku_db_txn_external {
230 struct __toku_db_txn external_part;
231 struct __toku_db_txn_internal internal_part;
232};
233#define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part)
234
235struct __toku_dbc_internal {
236 struct ft_cursor ftcursor;
237 DB_TXN *txn;
238 TOKU_ISOLATION iso;
239 struct simple_dbt skey_s,sval_s;
240 struct simple_dbt *skey,*sval;
241
242 // if the rmw flag is asserted, cursor operations (like set) grab write
243 // locks instead of read locks
244 // the rmw flag is set when the cursor is created with the DB_RMW flag set
245 bool rmw;
246 bool locking_read;
247};
248
249static_assert(
250 sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal),
251 "__toku_dbc_internal doesn't fit in the internal portion of a DBC");
252
253static inline __toku_dbc_internal *dbc_struct_i(DBC *c) {
254 union dbc_union {
255 __toku_dbc_internal *dbc_internal;
256 char *buf;
257 } u;
258 u.buf = c->_internal;
259 return u.dbc_internal;
260}
261
262static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
263 return &dbc_struct_i(c)->ftcursor;
264}
265
266static inline int
267env_opened(DB_ENV *env) {
268 return env->i->cachetable != 0;
269}
270
271static inline bool
272txn_is_read_only(DB_TXN* txn) {
273 if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) {
274 return true;
275 }
276 return false;
277}
278
279#define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL;
280
281void env_panic(DB_ENV * env, int cause, const char * msg);
282void env_note_db_opened(DB_ENV *env, DB *db);
283void env_note_db_closed(DB_ENV *env, DB *db);
284