| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2017, 2018, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file include/log0recv.h |
| 22 | Recovery |
| 23 | |
| 24 | Created 9/20/1997 Heikki Tuuri |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #ifndef log0recv_h |
| 28 | #define log0recv_h |
| 29 | |
| 30 | #include "univ.i" |
| 31 | #include "ut0byte.h" |
| 32 | #include "buf0types.h" |
| 33 | #include "hash0hash.h" |
| 34 | #include "log0log.h" |
| 35 | #include "mtr0types.h" |
| 36 | #include "ut0new.h" |
| 37 | |
| 38 | #include <list> |
| 39 | #include <vector> |
| 40 | |
| 41 | /** Is recv_writer_thread active? */ |
| 42 | extern bool recv_writer_thread_active; |
| 43 | |
| 44 | /** @return whether recovery is currently running. */ |
| 45 | #define recv_recovery_is_on() recv_recovery_on |
| 46 | |
| 47 | /** Find the latest checkpoint in the log header. |
| 48 | @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 |
| 49 | @return error code or DB_SUCCESS */ |
| 50 | dberr_t |
| 51 | recv_find_max_checkpoint(ulint* max_field) |
| 52 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
| 53 | |
| 54 | /** Apply the hashed log records to the page, if the page lsn is less than the |
| 55 | lsn of a log record. |
| 56 | @param just_read_in whether the page recently arrived to the I/O handler |
| 57 | @param block the page in the buffer pool */ |
| 58 | void |
| 59 | recv_recover_page(bool just_read_in, buf_block_t* block); |
| 60 | |
| 61 | /** Start recovering from a redo log checkpoint. |
| 62 | @see recv_recovery_from_checkpoint_finish |
| 63 | @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN |
| 64 | of first system tablespace page |
| 65 | @return error code or DB_SUCCESS */ |
| 66 | dberr_t |
| 67 | recv_recovery_from_checkpoint_start( |
| 68 | lsn_t flush_lsn); |
| 69 | /** Complete recovery from a checkpoint. */ |
| 70 | void |
| 71 | recv_recovery_from_checkpoint_finish(void); |
| 72 | /********************************************************//** |
| 73 | Initiates the rollback of active transactions. */ |
| 74 | void |
| 75 | recv_recovery_rollback_active(void); |
| 76 | /*===============================*/ |
| 77 | /******************************************************//** |
| 78 | Resets the logs. The contents of log files will be lost! */ |
| 79 | void |
| 80 | recv_reset_logs( |
| 81 | /*============*/ |
| 82 | lsn_t lsn); /*!< in: reset to this lsn |
| 83 | rounded up to be divisible by |
| 84 | OS_FILE_LOG_BLOCK_SIZE, after |
| 85 | which we add |
| 86 | LOG_BLOCK_HDR_SIZE */ |
| 87 | /** Clean up after recv_sys_init() */ |
| 88 | void |
| 89 | recv_sys_close(); |
| 90 | /** Initialize the redo log recovery subsystem. */ |
| 91 | void |
| 92 | recv_sys_init(); |
| 93 | /********************************************************//** |
| 94 | Frees the recovery system. */ |
| 95 | void |
| 96 | recv_sys_debug_free(void); |
| 97 | /*=====================*/ |
| 98 | |
| 99 | /********************************************************//** |
| 100 | Reset the state of the recovery system variables. */ |
| 101 | void |
| 102 | recv_sys_var_init(void); |
| 103 | /*===================*/ |
| 104 | |
| 105 | /** Apply the hash table of stored log records to persistent data pages. |
| 106 | @param[in] last_batch whether the change buffer merge will be |
| 107 | performed as part of the operation */ |
| 108 | void |
| 109 | recv_apply_hashed_log_recs(bool last_batch); |
| 110 | |
| 111 | /** Whether to store redo log records to the hash table */ |
| 112 | enum store_t { |
| 113 | /** Do not store redo log records. */ |
| 114 | STORE_NO, |
| 115 | /** Store redo log records. */ |
| 116 | STORE_YES, |
| 117 | /** Store redo log records if the tablespace exists. */ |
| 118 | STORE_IF_EXISTS |
| 119 | }; |
| 120 | |
| 121 | |
| 122 | /** Adds data from a new log block to the parsing buffer of recv_sys if |
| 123 | recv_sys->parse_start_lsn is non-zero. |
| 124 | @param[in] log_block log block to add |
| 125 | @param[in] scanned_lsn lsn of how far we were able to find |
| 126 | data in this log block |
| 127 | @return true if more data added */ |
| 128 | bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn); |
| 129 | |
| 130 | /** Parse log records from a buffer and optionally store them to a |
| 131 | hash table to wait merging to file pages. |
| 132 | @param[in] checkpoint_lsn the LSN of the latest checkpoint |
| 133 | @param[in] store whether to store page operations |
| 134 | @param[in] apply whether to apply the records |
| 135 | @return whether MLOG_CHECKPOINT record was seen the first time, |
| 136 | or corruption was noticed */ |
| 137 | bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t store, bool apply); |
| 138 | |
| 139 | /** Moves the parsing buffer data left to the buffer start. */ |
| 140 | void recv_sys_justify_left_parsing_buf(); |
| 141 | |
| 142 | /** Backup function checks whether the space id belongs to |
| 143 | the skip table list given in the mariabackup option. */ |
| 144 | extern bool(*check_if_backup_includes)(ulint space_id); |
| 145 | |
| 146 | /** Block of log record data */ |
| 147 | struct recv_data_t{ |
| 148 | recv_data_t* next; /*!< pointer to the next block or NULL */ |
| 149 | /*!< the log record data is stored physically |
| 150 | immediately after this struct, max amount |
| 151 | RECV_DATA_BLOCK_SIZE bytes of it */ |
| 152 | }; |
| 153 | |
| 154 | /** Stored log record struct */ |
| 155 | struct recv_t{ |
| 156 | mlog_id_t type; /*!< log record type */ |
| 157 | ulint len; /*!< log record body length in bytes */ |
| 158 | recv_data_t* data; /*!< chain of blocks containing the log record |
| 159 | body */ |
| 160 | lsn_t start_lsn;/*!< start lsn of the log segment written by |
| 161 | the mtr which generated this log record: NOTE |
| 162 | that this is not necessarily the start lsn of |
| 163 | this log record */ |
| 164 | lsn_t end_lsn;/*!< end lsn of the log segment written by |
| 165 | the mtr which generated this log record: NOTE |
| 166 | that this is not necessarily the end lsn of |
| 167 | this log record */ |
| 168 | UT_LIST_NODE_T(recv_t) |
| 169 | rec_list;/*!< list of log records for this page */ |
| 170 | }; |
| 171 | |
| 172 | /** States of recv_addr_t */ |
| 173 | enum recv_addr_state { |
| 174 | /** not yet processed */ |
| 175 | RECV_NOT_PROCESSED, |
| 176 | /** page is being read */ |
| 177 | RECV_BEING_READ, |
| 178 | /** log records are being applied on the page */ |
| 179 | RECV_BEING_PROCESSED, |
| 180 | /** log records have been applied on the page */ |
| 181 | RECV_PROCESSED, |
| 182 | /** log records have been discarded because the tablespace |
| 183 | does not exist */ |
| 184 | RECV_DISCARDED |
| 185 | }; |
| 186 | |
| 187 | /** Hashed page file address struct */ |
| 188 | struct recv_addr_t{ |
| 189 | enum recv_addr_state state; |
| 190 | /*!< recovery state of the page */ |
| 191 | unsigned space:32;/*!< space id */ |
| 192 | unsigned page_no:32;/*!< page number */ |
| 193 | UT_LIST_BASE_NODE_T(recv_t) |
| 194 | rec_list;/*!< list of log records for this page */ |
| 195 | hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ |
| 196 | }; |
| 197 | |
| 198 | struct recv_dblwr_t { |
| 199 | /** Add a page frame to the doublewrite recovery buffer. */ |
| 200 | void add(byte* page) { |
| 201 | pages.push_back(page); |
| 202 | } |
| 203 | |
| 204 | /** Find a doublewrite copy of a page. |
| 205 | @param[in] space_id tablespace identifier |
| 206 | @param[in] page_no page number |
| 207 | @return page frame |
| 208 | @retval NULL if no page was found */ |
| 209 | const byte* find_page(ulint space_id, ulint page_no); |
| 210 | |
| 211 | typedef std::list<byte*, ut_allocator<byte*> > list; |
| 212 | |
| 213 | /** Recovered doublewrite buffer page frames */ |
| 214 | list pages; |
| 215 | }; |
| 216 | |
| 217 | /** Recovery system data structure */ |
| 218 | struct recv_sys_t{ |
| 219 | ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs, |
| 220 | n_addrs, and the state field in each recv_addr |
| 221 | struct */ |
| 222 | ib_mutex_t writer_mutex;/*!< mutex coordinating |
| 223 | flushing between recv_writer_thread and |
| 224 | the recovery thread. */ |
| 225 | os_event_t flush_start;/*!< event to activate |
| 226 | page cleaner threads */ |
| 227 | os_event_t flush_end;/*!< event to signal that the page |
| 228 | cleaner has finished the request */ |
| 229 | buf_flush_t flush_type;/*!< type of the flush request. |
| 230 | BUF_FLUSH_LRU: flush end of LRU, keeping free blocks. |
| 231 | BUF_FLUSH_LIST: flush all of blocks. */ |
| 232 | ibool apply_log_recs; |
| 233 | /*!< this is TRUE when log rec application to |
| 234 | pages is allowed; this flag tells the |
| 235 | i/o-handler if it should do log record |
| 236 | application */ |
| 237 | ibool apply_batch_on; |
| 238 | /*!< this is TRUE when a log rec application |
| 239 | batch is running */ |
| 240 | byte* buf; /*!< buffer for parsing log records */ |
| 241 | size_t buf_size; /*!< size of buf */ |
| 242 | ulint len; /*!< amount of data in buf */ |
| 243 | lsn_t parse_start_lsn; |
| 244 | /*!< this is the lsn from which we were able to |
| 245 | start parsing log records and adding them to |
| 246 | the hash table; zero if a suitable |
| 247 | start point not found yet */ |
| 248 | lsn_t scanned_lsn; |
| 249 | /*!< the log data has been scanned up to this |
| 250 | lsn */ |
| 251 | ulint scanned_checkpoint_no; |
| 252 | /*!< the log data has been scanned up to this |
| 253 | checkpoint number (lowest 4 bytes) */ |
| 254 | ulint recovered_offset; |
| 255 | /*!< start offset of non-parsed log records in |
| 256 | buf */ |
| 257 | lsn_t recovered_lsn; |
| 258 | /*!< the log records have been parsed up to |
| 259 | this lsn */ |
| 260 | bool found_corrupt_log; |
| 261 | /*!< set when finding a corrupt log |
| 262 | block or record, or there is a log |
| 263 | parsing buffer overflow */ |
| 264 | bool found_corrupt_fs; |
| 265 | /*!< set when an inconsistency with |
| 266 | the file system contents is detected |
| 267 | during log scan or apply */ |
| 268 | lsn_t mlog_checkpoint_lsn; |
| 269 | /*!< the LSN of a MLOG_CHECKPOINT |
| 270 | record, or 0 if none was parsed */ |
| 271 | /** the time when progress was last reported */ |
| 272 | ib_time_t progress_time; |
| 273 | mem_heap_t* heap; /*!< memory heap of log records and file |
| 274 | addresses*/ |
| 275 | hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ |
| 276 | ulint n_addrs;/*!< number of not processed hashed file |
| 277 | addresses in the hash table */ |
| 278 | |
| 279 | recv_dblwr_t dblwr; |
| 280 | |
| 281 | /** Lastly added LSN to the hash table of log records. */ |
| 282 | lsn_t last_stored_lsn; |
| 283 | |
| 284 | /** Determine whether redo log recovery progress should be reported. |
| 285 | @param[in] time the current time |
| 286 | @return whether progress should be reported |
| 287 | (the last report was at least 15 seconds ago) */ |
| 288 | bool report(ib_time_t time) |
| 289 | { |
| 290 | if (time - progress_time < 15) { |
| 291 | return false; |
| 292 | } |
| 293 | |
| 294 | progress_time = time; |
| 295 | return true; |
| 296 | } |
| 297 | }; |
| 298 | |
| 299 | /** The recovery system */ |
| 300 | extern recv_sys_t* recv_sys; |
| 301 | |
| 302 | /** TRUE when applying redo log records during crash recovery; FALSE |
| 303 | otherwise. Note that this is FALSE while a background thread is |
| 304 | rolling back incomplete transactions. */ |
| 305 | extern volatile bool recv_recovery_on; |
| 306 | /** If the following is TRUE, the buffer pool file pages must be invalidated |
| 307 | after recovery and no ibuf operations are allowed; this becomes TRUE if |
| 308 | the log record hash table becomes too full, and log records must be merged |
| 309 | to file pages already before the recovery is finished: in this case no |
| 310 | ibuf operations are allowed, as they could modify the pages read in the |
| 311 | buffer pool before the pages have been recovered to the up-to-date state. |
| 312 | |
| 313 | TRUE means that recovery is running and no operations on the log files |
| 314 | are allowed yet: the variable name is misleading. */ |
| 315 | extern bool recv_no_ibuf_operations; |
| 316 | /** TRUE when recv_init_crash_recovery() has been called. */ |
| 317 | extern bool recv_needed_recovery; |
| 318 | #ifdef UNIV_DEBUG |
| 319 | /** TRUE if writing to the redo log (mtr_commit) is forbidden. |
| 320 | Protected by log_sys.mutex. */ |
| 321 | extern bool recv_no_log_write; |
| 322 | #endif /* UNIV_DEBUG */ |
| 323 | |
| 324 | /** TRUE if buf_page_is_corrupted() should check if the log sequence |
| 325 | number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by |
| 326 | recv_recovery_from_checkpoint_start(). */ |
| 327 | extern bool recv_lsn_checks_on; |
| 328 | |
| 329 | /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many |
| 330 | times! */ |
| 331 | #define RECV_PARSING_BUF_SIZE (2U << 20) |
| 332 | |
| 333 | /** Size of block reads when the log groups are scanned forward to do a |
| 334 | roll-forward */ |
| 335 | #define RECV_SCAN_SIZE (4U << srv_page_size_shift) |
| 336 | |
| 337 | /** This many frames must be left free in the buffer pool when we scan |
| 338 | the log and store the scanned log records in the buffer pool: we will |
| 339 | use these free frames to read in pages when we start applying the |
| 340 | log records to the database. */ |
| 341 | extern ulint recv_n_pool_free_frames; |
| 342 | |
| 343 | #endif |
| 344 | |