| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #pragma once |
| 40 | |
| 41 | #include <stdio.h> |
| 42 | #include <sys/types.h> |
| 43 | #include <string.h> |
| 44 | #include <dirent.h> |
| 45 | |
| 46 | #include "portability/toku_list.h" |
| 47 | #include "portability/toku_pthread.h" |
| 48 | #include "ft/ft-internal.h" |
| 49 | #include "ft/logger/log.h" |
| 50 | #include "ft/logger/logfilemgr.h" |
| 51 | #include "ft/txn/txn.h" |
| 52 | #include "ft/txn/txn_manager.h" |
| 53 | #include "ft/txn/rollback_log_node_cache.h" |
| 54 | |
| 55 | #include "util/memarena.h" |
| 56 | #include "util/omt.h" |
| 57 | |
| 58 | using namespace toku; |
| 59 | // Locking for the logger |
| 60 | // For most purposes we use the big ydb lock. |
| 61 | // To log: grab the buf lock |
| 62 | // If the buf would overflow, then grab the file lock, swap file&buf, release buf lock, write the file, write the entry, release the file lock |
| 63 | // else append to buf & release lock |
| 64 | |
| 65 | #define LOGGER_MIN_BUF_SIZE (1<<24) |
| 66 | |
| 67 | // TODO: Remove mylock, it has no value |
| 68 | struct mylock { |
| 69 | toku_mutex_t lock; |
| 70 | }; |
| 71 | |
| 72 | static inline void ml_init(struct mylock *l) { |
| 73 | toku_mutex_init(*log_internal_lock_mutex_key, &l->lock, nullptr); |
| 74 | } |
| 75 | // TODO: source location info might have be to be pulled up one caller |
| 76 | // to be useful |
| 77 | static inline void ml_lock(struct mylock *l) { toku_mutex_lock(&l->lock); } |
| 78 | static inline void ml_unlock(struct mylock *l) { |
| 79 | toku_mutex_unlock(&l->lock); |
| 80 | } |
| 81 | static inline void ml_destroy(struct mylock *l) { |
| 82 | toku_mutex_destroy(&l->lock); |
| 83 | } |
| 84 | |
| 85 | struct logbuf { |
| 86 | int n_in_buf; |
| 87 | int buf_size; |
| 88 | char *buf; |
| 89 | LSN max_lsn_in_buf; |
| 90 | }; |
| 91 | |
| 92 | struct tokulogger { |
| 93 | struct mylock input_lock; |
| 94 | |
| 95 | toku_mutex_t output_condition_lock; // if you need both this lock and input_lock, acquire the output_lock first, then input_lock. More typical is to get the output_is_available condition to be false, and then acquire the input_lock. |
| 96 | toku_cond_t output_condition; // |
| 97 | bool output_is_available; // this is part of the predicate for the output condition. It's true if no thread is modifying the output (either doing an fsync or otherwise fiddling with the output). |
| 98 | |
| 99 | bool is_open; |
| 100 | bool write_log_files; |
| 101 | bool trim_log_files; // for test purposes |
| 102 | char *directory; // file system directory |
| 103 | DIR *dir; // descriptor for directory |
| 104 | int fd; |
| 105 | CACHETABLE ct; |
| 106 | int lg_max; // The size of the single file in the log. Default is 100MB. |
| 107 | |
| 108 | // To access these, you must have the input lock |
| 109 | LSN lsn; // the next available lsn |
| 110 | struct logbuf inbuf; // data being accumulated for the write |
| 111 | |
| 112 | // To access these, you must have the output condition lock. |
| 113 | LSN written_lsn; // the last lsn written |
| 114 | LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held) |
| 115 | LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint. |
| 116 | long long next_log_file_number; |
| 117 | struct logbuf outbuf; // data being written to the file |
| 118 | int n_in_file; // The amount of data in the current file |
| 119 | |
| 120 | // To access the logfilemgr you must have the output condition lock. |
| 121 | TOKULOGFILEMGR logfilemgr; |
| 122 | |
| 123 | uint32_t write_block_size; // How big should the blocks be written to various logs? |
| 124 | |
| 125 | uint64_t num_writes_to_disk; // how many times did we write to disk? |
| 126 | uint64_t bytes_written_to_disk; // how many bytes have been written to disk? |
| 127 | tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk? |
| 128 | uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf |
| 129 | |
| 130 | CACHEFILE rollback_cachefile; |
| 131 | rollback_log_node_cache rollback_cache; |
| 132 | TXN_MANAGER txn_manager; |
| 133 | }; |
| 134 | |
| 135 | int toku_logger_find_next_unused_log_file(const char *directory, long long *result); |
| 136 | int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles); |
| 137 | void toku_logger_free_logfiles (char **logfiles, int n_logfiles); |
| 138 | |
| 139 | static inline int |
| 140 | txn_has_current_rollback_log(TOKUTXN txn) { |
| 141 | return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b; |
| 142 | } |
| 143 | |
| 144 | static inline int |
| 145 | txn_has_spilled_rollback_logs(TOKUTXN txn) { |
| 146 | return txn->roll_info.spilled_rollback_tail.b != ROLLBACK_NONE.b; |
| 147 | } |
| 148 | |
| 149 | struct txninfo { |
| 150 | uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. |
| 151 | uint32_t num_fts; |
| 152 | FT *open_fts; |
| 153 | bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) |
| 154 | uint64_t num_rollback_nodes; |
| 155 | uint64_t num_rollentries; |
| 156 | BLOCKNUM spilled_rollback_head; |
| 157 | BLOCKNUM spilled_rollback_tail; |
| 158 | BLOCKNUM current_rollback; |
| 159 | }; |
| 160 | |
| 161 | static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) { |
| 162 | return 1; |
| 163 | } |
| 164 | |
| 165 | static inline int toku_logsizeof_uint32_t (uint32_t v __attribute__((__unused__))) { |
| 166 | return 4; |
| 167 | } |
| 168 | |
| 169 | static inline int toku_logsizeof_uint64_t (uint32_t v __attribute__((__unused__))) { |
| 170 | return 8; |
| 171 | } |
| 172 | |
| 173 | static inline int toku_logsizeof_bool (uint32_t v __attribute__((__unused__))) { |
| 174 | return 1; |
| 175 | } |
| 176 | |
| 177 | static inline int toku_logsizeof_FILENUM (FILENUM v __attribute__((__unused__))) { |
| 178 | return 4; |
| 179 | } |
| 180 | |
| 181 | static inline int toku_logsizeof_DISKOFF (DISKOFF v __attribute__((__unused__))) { |
| 182 | return 8; |
| 183 | } |
| 184 | static inline int toku_logsizeof_BLOCKNUM (BLOCKNUM v __attribute__((__unused__))) { |
| 185 | return 8; |
| 186 | } |
| 187 | |
| 188 | static inline int toku_logsizeof_LSN (LSN lsn __attribute__((__unused__))) { |
| 189 | return 8; |
| 190 | } |
| 191 | |
| 192 | static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__))) { |
| 193 | return 8; |
| 194 | } |
| 195 | |
| 196 | static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) { |
| 197 | return 16; |
| 198 | } |
| 199 | |
| 200 | static inline int toku_logsizeof_XIDP (XIDP xid) { |
| 201 | assert(0<=xid->gtrid_length && xid->gtrid_length<=64); |
| 202 | assert(0<=xid->bqual_length && xid->bqual_length<=64); |
| 203 | return xid->gtrid_length |
| 204 | + xid->bqual_length |
| 205 | + 4 // formatID |
| 206 | + 1 // gtrid_length |
| 207 | + 1; // bqual_length |
| 208 | } |
| 209 | |
| 210 | static inline int toku_logsizeof_FILENUMS (FILENUMS fs) { |
| 211 | static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference |
| 212 | return 4 + fs.num * toku_logsizeof_FILENUM(f); |
| 213 | } |
| 214 | |
| 215 | static inline int toku_logsizeof_BYTESTRING (BYTESTRING bs) { |
| 216 | return 4+bs.len; |
| 217 | } |
| 218 | |
| 219 | static inline char *fixup_fname(BYTESTRING *f) { |
| 220 | assert(f->len>0); |
| 221 | char *fname = (char*)toku_xmalloc(f->len+1); |
| 222 | memcpy(fname, f->data, f->len); |
| 223 | fname[f->len]=0; |
| 224 | return fname; |
| 225 | } |
| 226 | |