1/*****************************************************************************
2
3Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file include/log0recv.h
22Recovery
23
24Created 9/20/1997 Heikki Tuuri
25*******************************************************/
26
27#ifndef log0recv_h
28#define log0recv_h
29
30#include "univ.i"
31#include "ut0byte.h"
32#include "buf0types.h"
33#include "hash0hash.h"
34#include "log0log.h"
35#include "mtr0types.h"
36#include "ut0new.h"
37
38#include <list>
39#include <vector>
40
41/** Is recv_writer_thread active? */
42extern bool recv_writer_thread_active;
43
44/** @return whether recovery is currently running. */
45#define recv_recovery_is_on() recv_recovery_on
46
47/** Find the latest checkpoint in the log header.
48@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
49@return error code or DB_SUCCESS */
50dberr_t
51recv_find_max_checkpoint(ulint* max_field)
52 MY_ATTRIBUTE((nonnull, warn_unused_result));
53
54/** Apply the hashed log records to the page, if the page lsn is less than the
55lsn of a log record.
56@param just_read_in whether the page recently arrived to the I/O handler
57@param block the page in the buffer pool */
58void
59recv_recover_page(bool just_read_in, buf_block_t* block);
60
61/** Start recovering from a redo log checkpoint.
62@see recv_recovery_from_checkpoint_finish
63@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
64of first system tablespace page
65@return error code or DB_SUCCESS */
66dberr_t
67recv_recovery_from_checkpoint_start(
68 lsn_t flush_lsn);
69/** Complete recovery from a checkpoint. */
70void
71recv_recovery_from_checkpoint_finish(void);
72/********************************************************//**
73Initiates the rollback of active transactions. */
74void
75recv_recovery_rollback_active(void);
76/*===============================*/
77/******************************************************//**
78Resets the logs. The contents of log files will be lost! */
79void
80recv_reset_logs(
81/*============*/
82 lsn_t lsn); /*!< in: reset to this lsn
83 rounded up to be divisible by
84 OS_FILE_LOG_BLOCK_SIZE, after
85 which we add
86 LOG_BLOCK_HDR_SIZE */
87/** Clean up after recv_sys_init() */
88void
89recv_sys_close();
90/** Initialize the redo log recovery subsystem. */
91void
92recv_sys_init();
93/********************************************************//**
94Frees the recovery system. */
95void
96recv_sys_debug_free(void);
97/*=====================*/
98
99/********************************************************//**
100Reset the state of the recovery system variables. */
101void
102recv_sys_var_init(void);
103/*===================*/
104
105/** Apply the hash table of stored log records to persistent data pages.
106@param[in] last_batch whether the change buffer merge will be
107 performed as part of the operation */
108void
109recv_apply_hashed_log_recs(bool last_batch);
110
111/** Whether to store redo log records to the hash table */
112enum store_t {
113 /** Do not store redo log records. */
114 STORE_NO,
115 /** Store redo log records. */
116 STORE_YES,
117 /** Store redo log records if the tablespace exists. */
118 STORE_IF_EXISTS
119};
120
121
122/** Adds data from a new log block to the parsing buffer of recv_sys if
123recv_sys->parse_start_lsn is non-zero.
124@param[in] log_block log block to add
125@param[in] scanned_lsn lsn of how far we were able to find
126 data in this log block
127@return true if more data added */
128bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
129
130/** Parse log records from a buffer and optionally store them to a
131hash table to wait merging to file pages.
132@param[in] checkpoint_lsn the LSN of the latest checkpoint
133@param[in] store whether to store page operations
134@param[in] apply whether to apply the records
135@return whether MLOG_CHECKPOINT record was seen the first time,
136or corruption was noticed */
137bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t store, bool apply);
138
139/** Moves the parsing buffer data left to the buffer start. */
140void recv_sys_justify_left_parsing_buf();
141
142/** Backup function checks whether the space id belongs to
143the skip table list given in the mariabackup option. */
144extern bool(*check_if_backup_includes)(ulint space_id);
145
146/** Block of log record data */
147struct recv_data_t{
148 recv_data_t* next; /*!< pointer to the next block or NULL */
149 /*!< the log record data is stored physically
150 immediately after this struct, max amount
151 RECV_DATA_BLOCK_SIZE bytes of it */
152};
153
154/** Stored log record struct */
155struct recv_t{
156 mlog_id_t type; /*!< log record type */
157 ulint len; /*!< log record body length in bytes */
158 recv_data_t* data; /*!< chain of blocks containing the log record
159 body */
160 lsn_t start_lsn;/*!< start lsn of the log segment written by
161 the mtr which generated this log record: NOTE
162 that this is not necessarily the start lsn of
163 this log record */
164 lsn_t end_lsn;/*!< end lsn of the log segment written by
165 the mtr which generated this log record: NOTE
166 that this is not necessarily the end lsn of
167 this log record */
168 UT_LIST_NODE_T(recv_t)
169 rec_list;/*!< list of log records for this page */
170};
171
172/** States of recv_addr_t */
173enum recv_addr_state {
174 /** not yet processed */
175 RECV_NOT_PROCESSED,
176 /** page is being read */
177 RECV_BEING_READ,
178 /** log records are being applied on the page */
179 RECV_BEING_PROCESSED,
180 /** log records have been applied on the page */
181 RECV_PROCESSED,
182 /** log records have been discarded because the tablespace
183 does not exist */
184 RECV_DISCARDED
185};
186
187/** Hashed page file address struct */
188struct recv_addr_t{
189 enum recv_addr_state state;
190 /*!< recovery state of the page */
191 unsigned space:32;/*!< space id */
192 unsigned page_no:32;/*!< page number */
193 UT_LIST_BASE_NODE_T(recv_t)
194 rec_list;/*!< list of log records for this page */
195 hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
196};
197
198struct recv_dblwr_t {
199 /** Add a page frame to the doublewrite recovery buffer. */
200 void add(byte* page) {
201 pages.push_back(page);
202 }
203
204 /** Find a doublewrite copy of a page.
205 @param[in] space_id tablespace identifier
206 @param[in] page_no page number
207 @return page frame
208 @retval NULL if no page was found */
209 const byte* find_page(ulint space_id, ulint page_no);
210
211 typedef std::list<byte*, ut_allocator<byte*> > list;
212
213 /** Recovered doublewrite buffer page frames */
214 list pages;
215};
216
217/** Recovery system data structure */
218struct recv_sys_t{
219 ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
220 n_addrs, and the state field in each recv_addr
221 struct */
222 ib_mutex_t writer_mutex;/*!< mutex coordinating
223 flushing between recv_writer_thread and
224 the recovery thread. */
225 os_event_t flush_start;/*!< event to activate
226 page cleaner threads */
227 os_event_t flush_end;/*!< event to signal that the page
228 cleaner has finished the request */
229 buf_flush_t flush_type;/*!< type of the flush request.
230 BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
231 BUF_FLUSH_LIST: flush all of blocks. */
232 ibool apply_log_recs;
233 /*!< this is TRUE when log rec application to
234 pages is allowed; this flag tells the
235 i/o-handler if it should do log record
236 application */
237 ibool apply_batch_on;
238 /*!< this is TRUE when a log rec application
239 batch is running */
240 byte* buf; /*!< buffer for parsing log records */
241 size_t buf_size; /*!< size of buf */
242 ulint len; /*!< amount of data in buf */
243 lsn_t parse_start_lsn;
244 /*!< this is the lsn from which we were able to
245 start parsing log records and adding them to
246 the hash table; zero if a suitable
247 start point not found yet */
248 lsn_t scanned_lsn;
249 /*!< the log data has been scanned up to this
250 lsn */
251 ulint scanned_checkpoint_no;
252 /*!< the log data has been scanned up to this
253 checkpoint number (lowest 4 bytes) */
254 ulint recovered_offset;
255 /*!< start offset of non-parsed log records in
256 buf */
257 lsn_t recovered_lsn;
258 /*!< the log records have been parsed up to
259 this lsn */
260 bool found_corrupt_log;
261 /*!< set when finding a corrupt log
262 block or record, or there is a log
263 parsing buffer overflow */
264 bool found_corrupt_fs;
265 /*!< set when an inconsistency with
266 the file system contents is detected
267 during log scan or apply */
268 lsn_t mlog_checkpoint_lsn;
269 /*!< the LSN of a MLOG_CHECKPOINT
270 record, or 0 if none was parsed */
271 /** the time when progress was last reported */
272 ib_time_t progress_time;
273 mem_heap_t* heap; /*!< memory heap of log records and file
274 addresses*/
275 hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
276 ulint n_addrs;/*!< number of not processed hashed file
277 addresses in the hash table */
278
279 recv_dblwr_t dblwr;
280
281 /** Lastly added LSN to the hash table of log records. */
282 lsn_t last_stored_lsn;
283
284 /** Determine whether redo log recovery progress should be reported.
285 @param[in] time the current time
286 @return whether progress should be reported
287 (the last report was at least 15 seconds ago) */
288 bool report(ib_time_t time)
289 {
290 if (time - progress_time < 15) {
291 return false;
292 }
293
294 progress_time = time;
295 return true;
296 }
297};
298
299/** The recovery system */
300extern recv_sys_t* recv_sys;
301
302/** TRUE when applying redo log records during crash recovery; FALSE
303otherwise. Note that this is FALSE while a background thread is
304rolling back incomplete transactions. */
305extern volatile bool recv_recovery_on;
306/** If the following is TRUE, the buffer pool file pages must be invalidated
307after recovery and no ibuf operations are allowed; this becomes TRUE if
308the log record hash table becomes too full, and log records must be merged
309to file pages already before the recovery is finished: in this case no
310ibuf operations are allowed, as they could modify the pages read in the
311buffer pool before the pages have been recovered to the up-to-date state.
312
313TRUE means that recovery is running and no operations on the log files
314are allowed yet: the variable name is misleading. */
315extern bool recv_no_ibuf_operations;
316/** TRUE when recv_init_crash_recovery() has been called. */
317extern bool recv_needed_recovery;
318#ifdef UNIV_DEBUG
319/** TRUE if writing to the redo log (mtr_commit) is forbidden.
320Protected by log_sys.mutex. */
321extern bool recv_no_log_write;
322#endif /* UNIV_DEBUG */
323
324/** TRUE if buf_page_is_corrupted() should check if the log sequence
325number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
326recv_recovery_from_checkpoint_start(). */
327extern bool recv_lsn_checks_on;
328
329/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
330times! */
331#define RECV_PARSING_BUF_SIZE (2U << 20)
332
333/** Size of block reads when the log groups are scanned forward to do a
334roll-forward */
335#define RECV_SCAN_SIZE (4U << srv_page_size_shift)
336
337/** This many frames must be left free in the buffer pool when we scan
338the log and store the scanned log records in the buffer pool: we will
339use these free frames to read in pages when we start applying the
340log records to the database. */
341extern ulint recv_n_pool_free_frames;
342
343#endif
344