1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file include/log0recv.h |
22 | Recovery |
23 | |
24 | Created 9/20/1997 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #ifndef log0recv_h |
28 | #define log0recv_h |
29 | |
30 | #include "univ.i" |
31 | #include "ut0byte.h" |
32 | #include "buf0types.h" |
33 | #include "hash0hash.h" |
34 | #include "log0log.h" |
35 | #include "mtr0types.h" |
36 | #include "ut0new.h" |
37 | |
38 | #include <list> |
39 | #include <vector> |
40 | |
41 | /** Is recv_writer_thread active? */ |
42 | extern bool recv_writer_thread_active; |
43 | |
44 | /** @return whether recovery is currently running. */ |
45 | #define recv_recovery_is_on() recv_recovery_on |
46 | |
47 | /** Find the latest checkpoint in the log header. |
48 | @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 |
49 | @return error code or DB_SUCCESS */ |
50 | dberr_t |
51 | recv_find_max_checkpoint(ulint* max_field) |
52 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
53 | |
54 | /** Apply the hashed log records to the page, if the page lsn is less than the |
55 | lsn of a log record. |
56 | @param just_read_in whether the page recently arrived to the I/O handler |
57 | @param block the page in the buffer pool */ |
58 | void |
59 | recv_recover_page(bool just_read_in, buf_block_t* block); |
60 | |
61 | /** Start recovering from a redo log checkpoint. |
62 | @see recv_recovery_from_checkpoint_finish |
63 | @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN |
64 | of first system tablespace page |
65 | @return error code or DB_SUCCESS */ |
66 | dberr_t |
67 | recv_recovery_from_checkpoint_start( |
68 | lsn_t flush_lsn); |
69 | /** Complete recovery from a checkpoint. */ |
70 | void |
71 | recv_recovery_from_checkpoint_finish(void); |
72 | /********************************************************//** |
73 | Initiates the rollback of active transactions. */ |
74 | void |
75 | recv_recovery_rollback_active(void); |
76 | /*===============================*/ |
77 | /******************************************************//** |
78 | Resets the logs. The contents of log files will be lost! */ |
79 | void |
80 | recv_reset_logs( |
81 | /*============*/ |
82 | lsn_t lsn); /*!< in: reset to this lsn |
83 | rounded up to be divisible by |
84 | OS_FILE_LOG_BLOCK_SIZE, after |
85 | which we add |
86 | LOG_BLOCK_HDR_SIZE */ |
87 | /** Clean up after recv_sys_init() */ |
88 | void |
89 | recv_sys_close(); |
90 | /** Initialize the redo log recovery subsystem. */ |
91 | void |
92 | recv_sys_init(); |
93 | /********************************************************//** |
94 | Frees the recovery system. */ |
95 | void |
96 | recv_sys_debug_free(void); |
97 | /*=====================*/ |
98 | |
99 | /********************************************************//** |
100 | Reset the state of the recovery system variables. */ |
101 | void |
102 | recv_sys_var_init(void); |
103 | /*===================*/ |
104 | |
105 | /** Apply the hash table of stored log records to persistent data pages. |
106 | @param[in] last_batch whether the change buffer merge will be |
107 | performed as part of the operation */ |
108 | void |
109 | recv_apply_hashed_log_recs(bool last_batch); |
110 | |
111 | /** Whether to store redo log records to the hash table */ |
112 | enum store_t { |
113 | /** Do not store redo log records. */ |
114 | STORE_NO, |
115 | /** Store redo log records. */ |
116 | STORE_YES, |
117 | /** Store redo log records if the tablespace exists. */ |
118 | STORE_IF_EXISTS |
119 | }; |
120 | |
121 | |
122 | /** Adds data from a new log block to the parsing buffer of recv_sys if |
123 | recv_sys->parse_start_lsn is non-zero. |
124 | @param[in] log_block log block to add |
125 | @param[in] scanned_lsn lsn of how far we were able to find |
126 | data in this log block |
127 | @return true if more data added */ |
128 | bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn); |
129 | |
130 | /** Parse log records from a buffer and optionally store them to a |
131 | hash table to wait merging to file pages. |
132 | @param[in] checkpoint_lsn the LSN of the latest checkpoint |
133 | @param[in] store whether to store page operations |
134 | @param[in] apply whether to apply the records |
135 | @return whether MLOG_CHECKPOINT record was seen the first time, |
136 | or corruption was noticed */ |
137 | bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t store, bool apply); |
138 | |
139 | /** Moves the parsing buffer data left to the buffer start. */ |
140 | void recv_sys_justify_left_parsing_buf(); |
141 | |
142 | /** Backup function checks whether the space id belongs to |
143 | the skip table list given in the mariabackup option. */ |
144 | extern bool(*check_if_backup_includes)(ulint space_id); |
145 | |
146 | /** Block of log record data */ |
147 | struct recv_data_t{ |
148 | recv_data_t* next; /*!< pointer to the next block or NULL */ |
149 | /*!< the log record data is stored physically |
150 | immediately after this struct, max amount |
151 | RECV_DATA_BLOCK_SIZE bytes of it */ |
152 | }; |
153 | |
154 | /** Stored log record struct */ |
155 | struct recv_t{ |
156 | mlog_id_t type; /*!< log record type */ |
157 | ulint len; /*!< log record body length in bytes */ |
158 | recv_data_t* data; /*!< chain of blocks containing the log record |
159 | body */ |
160 | lsn_t start_lsn;/*!< start lsn of the log segment written by |
161 | the mtr which generated this log record: NOTE |
162 | that this is not necessarily the start lsn of |
163 | this log record */ |
164 | lsn_t end_lsn;/*!< end lsn of the log segment written by |
165 | the mtr which generated this log record: NOTE |
166 | that this is not necessarily the end lsn of |
167 | this log record */ |
168 | UT_LIST_NODE_T(recv_t) |
169 | rec_list;/*!< list of log records for this page */ |
170 | }; |
171 | |
172 | /** States of recv_addr_t */ |
173 | enum recv_addr_state { |
174 | /** not yet processed */ |
175 | RECV_NOT_PROCESSED, |
176 | /** page is being read */ |
177 | RECV_BEING_READ, |
178 | /** log records are being applied on the page */ |
179 | RECV_BEING_PROCESSED, |
180 | /** log records have been applied on the page */ |
181 | RECV_PROCESSED, |
182 | /** log records have been discarded because the tablespace |
183 | does not exist */ |
184 | RECV_DISCARDED |
185 | }; |
186 | |
187 | /** Hashed page file address struct */ |
188 | struct recv_addr_t{ |
189 | enum recv_addr_state state; |
190 | /*!< recovery state of the page */ |
191 | unsigned space:32;/*!< space id */ |
192 | unsigned page_no:32;/*!< page number */ |
193 | UT_LIST_BASE_NODE_T(recv_t) |
194 | rec_list;/*!< list of log records for this page */ |
195 | hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ |
196 | }; |
197 | |
198 | struct recv_dblwr_t { |
199 | /** Add a page frame to the doublewrite recovery buffer. */ |
200 | void add(byte* page) { |
201 | pages.push_back(page); |
202 | } |
203 | |
204 | /** Find a doublewrite copy of a page. |
205 | @param[in] space_id tablespace identifier |
206 | @param[in] page_no page number |
207 | @return page frame |
208 | @retval NULL if no page was found */ |
209 | const byte* find_page(ulint space_id, ulint page_no); |
210 | |
211 | typedef std::list<byte*, ut_allocator<byte*> > list; |
212 | |
213 | /** Recovered doublewrite buffer page frames */ |
214 | list pages; |
215 | }; |
216 | |
217 | /** Recovery system data structure */ |
218 | struct recv_sys_t{ |
219 | ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs, |
220 | n_addrs, and the state field in each recv_addr |
221 | struct */ |
222 | ib_mutex_t writer_mutex;/*!< mutex coordinating |
223 | flushing between recv_writer_thread and |
224 | the recovery thread. */ |
225 | os_event_t flush_start;/*!< event to activate |
226 | page cleaner threads */ |
227 | os_event_t flush_end;/*!< event to signal that the page |
228 | cleaner has finished the request */ |
229 | buf_flush_t flush_type;/*!< type of the flush request. |
230 | BUF_FLUSH_LRU: flush end of LRU, keeping free blocks. |
231 | BUF_FLUSH_LIST: flush all of blocks. */ |
232 | ibool apply_log_recs; |
233 | /*!< this is TRUE when log rec application to |
234 | pages is allowed; this flag tells the |
235 | i/o-handler if it should do log record |
236 | application */ |
237 | ibool apply_batch_on; |
238 | /*!< this is TRUE when a log rec application |
239 | batch is running */ |
240 | byte* buf; /*!< buffer for parsing log records */ |
241 | size_t buf_size; /*!< size of buf */ |
242 | ulint len; /*!< amount of data in buf */ |
243 | lsn_t parse_start_lsn; |
244 | /*!< this is the lsn from which we were able to |
245 | start parsing log records and adding them to |
246 | the hash table; zero if a suitable |
247 | start point not found yet */ |
248 | lsn_t scanned_lsn; |
249 | /*!< the log data has been scanned up to this |
250 | lsn */ |
251 | ulint scanned_checkpoint_no; |
252 | /*!< the log data has been scanned up to this |
253 | checkpoint number (lowest 4 bytes) */ |
254 | ulint recovered_offset; |
255 | /*!< start offset of non-parsed log records in |
256 | buf */ |
257 | lsn_t recovered_lsn; |
258 | /*!< the log records have been parsed up to |
259 | this lsn */ |
260 | bool found_corrupt_log; |
261 | /*!< set when finding a corrupt log |
262 | block or record, or there is a log |
263 | parsing buffer overflow */ |
264 | bool found_corrupt_fs; |
265 | /*!< set when an inconsistency with |
266 | the file system contents is detected |
267 | during log scan or apply */ |
268 | lsn_t mlog_checkpoint_lsn; |
269 | /*!< the LSN of a MLOG_CHECKPOINT |
270 | record, or 0 if none was parsed */ |
271 | /** the time when progress was last reported */ |
272 | ib_time_t progress_time; |
273 | mem_heap_t* heap; /*!< memory heap of log records and file |
274 | addresses*/ |
275 | hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ |
276 | ulint n_addrs;/*!< number of not processed hashed file |
277 | addresses in the hash table */ |
278 | |
279 | recv_dblwr_t dblwr; |
280 | |
281 | /** Lastly added LSN to the hash table of log records. */ |
282 | lsn_t last_stored_lsn; |
283 | |
284 | /** Determine whether redo log recovery progress should be reported. |
285 | @param[in] time the current time |
286 | @return whether progress should be reported |
287 | (the last report was at least 15 seconds ago) */ |
288 | bool report(ib_time_t time) |
289 | { |
290 | if (time - progress_time < 15) { |
291 | return false; |
292 | } |
293 | |
294 | progress_time = time; |
295 | return true; |
296 | } |
297 | }; |
298 | |
299 | /** The recovery system */ |
300 | extern recv_sys_t* recv_sys; |
301 | |
302 | /** TRUE when applying redo log records during crash recovery; FALSE |
303 | otherwise. Note that this is FALSE while a background thread is |
304 | rolling back incomplete transactions. */ |
305 | extern volatile bool recv_recovery_on; |
306 | /** If the following is TRUE, the buffer pool file pages must be invalidated |
307 | after recovery and no ibuf operations are allowed; this becomes TRUE if |
308 | the log record hash table becomes too full, and log records must be merged |
309 | to file pages already before the recovery is finished: in this case no |
310 | ibuf operations are allowed, as they could modify the pages read in the |
311 | buffer pool before the pages have been recovered to the up-to-date state. |
312 | |
313 | TRUE means that recovery is running and no operations on the log files |
314 | are allowed yet: the variable name is misleading. */ |
315 | extern bool recv_no_ibuf_operations; |
316 | /** TRUE when recv_init_crash_recovery() has been called. */ |
317 | extern bool recv_needed_recovery; |
318 | #ifdef UNIV_DEBUG |
319 | /** TRUE if writing to the redo log (mtr_commit) is forbidden. |
320 | Protected by log_sys.mutex. */ |
321 | extern bool recv_no_log_write; |
322 | #endif /* UNIV_DEBUG */ |
323 | |
324 | /** TRUE if buf_page_is_corrupted() should check if the log sequence |
325 | number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by |
326 | recv_recovery_from_checkpoint_start(). */ |
327 | extern bool recv_lsn_checks_on; |
328 | |
329 | /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many |
330 | times! */ |
331 | #define RECV_PARSING_BUF_SIZE (2U << 20) |
332 | |
333 | /** Size of block reads when the log groups are scanned forward to do a |
334 | roll-forward */ |
335 | #define RECV_SCAN_SIZE (4U << srv_page_size_shift) |
336 | |
337 | /** This many frames must be left free in the buffer pool when we scan |
338 | the log and store the scanned log records in the buffer pool: we will |
339 | use these free frames to read in pages when we start applying the |
340 | log records to the database. */ |
341 | extern ulint recv_n_pool_free_frames; |
342 | |
343 | #endif |
344 | |