1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. |
4 | Copyright (c) 2009, Google Inc. |
5 | Copyright (c) 2017, 2018, MariaDB Corporation. |
6 | |
7 | Portions of this file contain modifications contributed and copyrighted by |
8 | Google, Inc. Those modifications are gratefully acknowledged and are described |
9 | briefly in the InnoDB documentation. The contributions by Google are |
10 | incorporated with their permission, and subject to the conditions contained in |
11 | the file COPYING.Google. |
12 | |
13 | This program is free software; you can redistribute it and/or modify it under |
14 | the terms of the GNU General Public License as published by the Free Software |
15 | Foundation; version 2 of the License. |
16 | |
17 | This program is distributed in the hope that it will be useful, but WITHOUT |
18 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
19 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
20 | |
21 | You should have received a copy of the GNU General Public License along with |
22 | this program; if not, write to the Free Software Foundation, Inc., |
23 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
24 | |
25 | *****************************************************************************/ |
26 | |
27 | /**************************************************//** |
28 | @file include/log0log.h |
29 | Database log |
30 | |
31 | Created 12/9/1995 Heikki Tuuri |
32 | *******************************************************/ |
33 | |
34 | #ifndef log0log_h |
35 | #define log0log_h |
36 | |
37 | #include "univ.i" |
38 | #include "dyn0buf.h" |
39 | #include "sync0rw.h" |
40 | #include "log0types.h" |
41 | #include "os0event.h" |
42 | #include "os0file.h" |
43 | |
44 | /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */ |
45 | #define SRV_N_LOG_FILES_MAX 100 |
46 | |
47 | /** Magic value to use instead of log checksums when they are disabled */ |
48 | #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL |
49 | |
50 | /* Margin for the free space in the smallest log group, before a new query |
51 | step which modifies the database, is started */ |
52 | |
53 | #define LOG_CHECKPOINT_FREE_PER_THREAD (4U << srv_page_size_shift) |
54 | #define (8U << srv_page_size_shift) |
55 | |
56 | typedef ulint (*log_checksum_func_t)(const byte* log_block); |
57 | |
58 | /** Pointer to the log checksum calculation function. Protected with |
59 | log_sys.mutex. */ |
60 | extern log_checksum_func_t log_checksum_algorithm_ptr; |
61 | |
62 | /** Append a string to the log. |
63 | @param[in] str string |
64 | @param[in] len string length |
65 | @param[out] start_lsn start LSN of the log record |
66 | @return end lsn of the log record, zero if did not succeed */ |
67 | UNIV_INLINE |
68 | lsn_t |
69 | log_reserve_and_write_fast( |
70 | const void* str, |
71 | ulint len, |
72 | lsn_t* start_lsn); |
73 | /***********************************************************************//** |
74 | Checks if there is need for a log buffer flush or a new checkpoint, and does |
75 | this if yes. Any database operation should call this when it has modified |
76 | more than about 4 pages. NOTE that this function may only be called when the |
77 | OS thread owns no synchronization objects except the dictionary mutex. */ |
78 | UNIV_INLINE |
79 | void |
80 | log_free_check(void); |
81 | /*================*/ |
82 | |
83 | /** Extends the log buffer. |
84 | @param[in] len requested minimum size in bytes */ |
85 | void log_buffer_extend(ulong len); |
86 | |
87 | /** Check margin not to overwrite transaction log from the last checkpoint. |
88 | If would estimate the log write to exceed the log_group_capacity, |
89 | waits for the checkpoint is done enough. |
90 | @param[in] len length of the data to be written */ |
91 | |
92 | void |
93 | log_margin_checkpoint_age( |
94 | ulint len); |
95 | |
96 | /** Open the log for log_write_low. The log must be closed with log_close. |
97 | @param[in] len length of the data to be written |
98 | @return start lsn of the log record */ |
99 | lsn_t |
100 | log_reserve_and_open( |
101 | ulint len); |
102 | /************************************************************//** |
103 | Writes to the log the string given. It is assumed that the caller holds the |
104 | log mutex. */ |
105 | void |
106 | log_write_low( |
107 | /*==========*/ |
108 | const byte* str, /*!< in: string */ |
109 | ulint str_len); /*!< in: string length */ |
110 | /************************************************************//** |
111 | Closes the log. |
112 | @return lsn */ |
113 | lsn_t |
114 | log_close(void); |
115 | /*===========*/ |
116 | /************************************************************//** |
117 | Gets the current lsn. |
118 | @return current lsn */ |
119 | UNIV_INLINE |
120 | lsn_t |
121 | log_get_lsn(void); |
122 | /*=============*/ |
123 | /************************************************************//** |
124 | Gets the current lsn. |
125 | @return current lsn */ |
126 | UNIV_INLINE |
127 | lsn_t |
128 | log_get_lsn_nowait(void); |
129 | /*=============*/ |
130 | /************************************************************//** |
131 | Gets the last lsn that is fully flushed to disk. |
132 | @return last flushed lsn */ |
133 | UNIV_INLINE |
134 | ib_uint64_t |
135 | log_get_flush_lsn(void); |
136 | /*=============*/ |
137 | /**************************************************************** |
138 | Gets the log group capacity. It is OK to read the value without |
139 | holding log_sys.mutex because it is constant. |
140 | @return log group capacity */ |
141 | UNIV_INLINE |
142 | lsn_t |
143 | log_get_capacity(void); |
144 | /*==================*/ |
145 | /**************************************************************** |
146 | Get log_sys::max_modified_age_async. It is OK to read the value without |
147 | holding log_sys::mutex because it is constant. |
148 | @return max_modified_age_async */ |
149 | UNIV_INLINE |
150 | lsn_t |
151 | log_get_max_modified_age_async(void); |
152 | /*================================*/ |
153 | |
154 | /** Calculate the recommended highest values for lsn - last_checkpoint_lsn |
155 | and lsn - buf_get_oldest_modification(). |
156 | @param[in] file_size requested innodb_log_file_size |
157 | @retval true on success |
158 | @retval false if the smallest log group is too small to |
159 | accommodate the number of OS threads in the database server */ |
160 | bool |
161 | log_set_capacity(ulonglong file_size) |
162 | MY_ATTRIBUTE((warn_unused_result)); |
163 | |
164 | /******************************************************//** |
165 | This function is called, e.g., when a transaction wants to commit. It checks |
166 | that the log has been written to the log file up to the last log entry written |
167 | by the transaction. If there is a flush running, it waits and checks if the |
168 | flush flushed enough. If not, starts a new flush. */ |
169 | void |
170 | log_write_up_to( |
171 | /*============*/ |
172 | lsn_t lsn, /*!< in: log sequence number up to which |
173 | the log should be written, LSN_MAX if not specified */ |
174 | bool flush_to_disk); |
175 | /*!< in: true if we want the written log |
176 | also to be flushed to disk */ |
177 | /** write to the log file up to the last log entry. |
178 | @param[in] sync whether we want the written log |
179 | also to be flushed to disk. */ |
180 | void |
181 | log_buffer_flush_to_disk( |
182 | bool sync = true); |
183 | /****************************************************************//** |
184 | This functions writes the log buffer to the log file and if 'flush' |
185 | is set it forces a flush of the log file as well. This is meant to be |
186 | called from background master thread only as it does not wait for |
187 | the write (+ possible flush) to finish. */ |
188 | void |
189 | log_buffer_sync_in_background( |
190 | /*==========================*/ |
191 | bool flush); /*<! in: flush the logs to disk */ |
192 | /** Make a checkpoint. Note that this function does not flush dirty |
193 | blocks from the buffer pool: it only checks what is lsn of the oldest |
194 | modification in the pool, and writes information about the lsn in |
195 | log files. Use log_make_checkpoint_at() to flush also the pool. |
196 | @param[in] sync whether to wait for the write to complete |
197 | @param[in] write_always force a write even if no log |
198 | has been generated since the latest checkpoint |
199 | @return true if success, false if a checkpoint write was already running */ |
200 | bool |
201 | log_checkpoint( |
202 | bool sync, |
203 | bool write_always); |
204 | |
205 | /** Make a checkpoint at or after a specified LSN. |
206 | @param[in] lsn the log sequence number, or LSN_MAX |
207 | for the latest LSN |
208 | @param[in] write_always force a write even if no log |
209 | has been generated since the latest checkpoint */ |
210 | void |
211 | log_make_checkpoint_at( |
212 | lsn_t lsn, |
213 | bool write_always); |
214 | |
215 | /****************************************************************//** |
216 | Makes a checkpoint at the latest lsn and writes it to first page of each |
217 | data file in the database, so that we know that the file spaces contain |
218 | all modifications up to that lsn. This can only be called at database |
219 | shutdown. This function also writes all log in log files to the log archive. */ |
220 | void |
221 | logs_empty_and_mark_files_at_shutdown(void); |
222 | /*=======================================*/ |
223 | /** Read a log group header page to log_sys.checkpoint_buf. |
224 | @param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */ |
225 | void (ulint ); |
226 | /** Write checkpoint info to the log header and invoke log_mutex_exit(). |
227 | @param[in] sync whether to wait for the write to complete |
228 | @param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */ |
229 | void |
230 | log_write_checkpoint_info(bool sync, lsn_t end_lsn); |
231 | |
232 | /** Set extra data to be written to the redo log during checkpoint. |
233 | @param[in] buf data to be appended on checkpoint, or NULL |
234 | @return pointer to previous data to be appended on checkpoint */ |
235 | mtr_buf_t* |
236 | log_append_on_checkpoint( |
237 | mtr_buf_t* buf); |
238 | /** |
239 | Checks that there is enough free space in the log to start a new query step. |
240 | Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this |
241 | function may only be called if the calling thread owns no synchronization |
242 | objects! */ |
243 | void |
244 | log_check_margins(void); |
245 | |
246 | /************************************************************//** |
247 | Gets a log block flush bit. |
248 | @return TRUE if this block was the first to be written in a log flush */ |
249 | UNIV_INLINE |
250 | ibool |
251 | log_block_get_flush_bit( |
252 | /*====================*/ |
253 | const byte* log_block); /*!< in: log block */ |
254 | /************************************************************//** |
255 | Gets a log block number stored in the header. |
256 | @return log block number stored in the block header */ |
257 | UNIV_INLINE |
258 | ulint |
259 | log_block_get_hdr_no( |
260 | /*=================*/ |
261 | const byte* log_block); /*!< in: log block */ |
262 | /************************************************************//** |
263 | Gets a log block data length. |
264 | @return log block data length measured as a byte offset from the block start */ |
265 | UNIV_INLINE |
266 | ulint |
267 | log_block_get_data_len( |
268 | /*===================*/ |
269 | const byte* log_block); /*!< in: log block */ |
270 | /************************************************************//** |
271 | Sets the log block data length. */ |
272 | UNIV_INLINE |
273 | void |
274 | log_block_set_data_len( |
275 | /*===================*/ |
276 | byte* log_block, /*!< in/out: log block */ |
277 | ulint len); /*!< in: data length */ |
278 | /************************************************************//** |
279 | Calculates the checksum for a log block. |
280 | @return checksum */ |
281 | UNIV_INLINE |
282 | ulint |
283 | log_block_calc_checksum( |
284 | /*====================*/ |
285 | const byte* block); /*!< in: log block */ |
286 | |
287 | /** Calculates the checksum for a log block using the CRC32 algorithm. |
288 | @param[in] block log block |
289 | @return checksum */ |
290 | UNIV_INLINE |
291 | ulint |
292 | log_block_calc_checksum_crc32( |
293 | const byte* block); |
294 | |
295 | /** Calculates the checksum for a log block using the "no-op" algorithm. |
296 | @return the calculated checksum value */ |
297 | UNIV_INLINE |
298 | ulint |
299 | log_block_calc_checksum_none(const byte*); |
300 | |
301 | /************************************************************//** |
302 | Gets a log block checksum field value. |
303 | @return checksum */ |
304 | UNIV_INLINE |
305 | ulint |
306 | log_block_get_checksum( |
307 | /*===================*/ |
308 | const byte* log_block); /*!< in: log block */ |
309 | /************************************************************//** |
310 | Sets a log block checksum field value. */ |
311 | UNIV_INLINE |
312 | void |
313 | log_block_set_checksum( |
314 | /*===================*/ |
315 | byte* log_block, /*!< in/out: log block */ |
316 | ulint checksum); /*!< in: checksum */ |
317 | /************************************************************//** |
318 | Gets a log block first mtr log record group offset. |
319 | @return first mtr log record group byte offset from the block start, 0 |
320 | if none */ |
321 | UNIV_INLINE |
322 | ulint |
323 | log_block_get_first_rec_group( |
324 | /*==========================*/ |
325 | const byte* log_block); /*!< in: log block */ |
326 | /************************************************************//** |
327 | Sets the log block first mtr log record group offset. */ |
328 | UNIV_INLINE |
329 | void |
330 | log_block_set_first_rec_group( |
331 | /*==========================*/ |
332 | byte* log_block, /*!< in/out: log block */ |
333 | ulint offset); /*!< in: offset, 0 if none */ |
334 | /************************************************************//** |
335 | Gets a log block checkpoint number field (4 lowest bytes). |
336 | @return checkpoint no (4 lowest bytes) */ |
337 | UNIV_INLINE |
338 | ulint |
339 | log_block_get_checkpoint_no( |
340 | /*========================*/ |
341 | const byte* log_block); /*!< in: log block */ |
342 | /************************************************************//** |
343 | Initializes a log block in the log buffer. */ |
344 | UNIV_INLINE |
345 | void |
346 | log_block_init( |
347 | /*===========*/ |
348 | byte* log_block, /*!< in: pointer to the log buffer */ |
349 | lsn_t lsn); /*!< in: lsn within the log block */ |
350 | /************************************************************//** |
351 | Converts a lsn to a log block number. |
352 | @return log block number, it is > 0 and <= 1G */ |
353 | UNIV_INLINE |
354 | ulint |
355 | log_block_convert_lsn_to_no( |
356 | /*========================*/ |
357 | lsn_t lsn); /*!< in: lsn of a byte within the block */ |
358 | /******************************************************//** |
359 | Prints info of the log. */ |
360 | void |
361 | log_print( |
362 | /*======*/ |
363 | FILE* file); /*!< in: file where to print */ |
364 | /******************************************************//** |
365 | Peeks the current lsn. |
366 | @return TRUE if success, FALSE if could not get the log system mutex */ |
367 | ibool |
368 | log_peek_lsn( |
369 | /*=========*/ |
370 | lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */ |
371 | /**********************************************************************//** |
372 | Refreshes the statistics used to print per-second averages. */ |
373 | void |
374 | log_refresh_stats(void); |
375 | /*===================*/ |
376 | |
377 | /** Whether to generate and require checksums on the redo log pages */ |
378 | extern my_bool innodb_log_checksums; |
379 | |
380 | /* Values used as flags */ |
381 | #define LOG_FLUSH 7652559 |
382 | #define LOG_CHECKPOINT 78656949 |
383 | |
384 | /* The counting of lsn's starts from this value: this must be non-zero */ |
385 | #define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) |
386 | |
387 | /* Offsets of a log block header */ |
388 | #define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and |
389 | is allowed to wrap around at 2G; the |
390 | highest bit is set to 1 if this is the |
391 | first log block in a log flush write |
392 | segment */ |
393 | #define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL |
394 | /* mask used to get the highest bit in |
395 | the preceding field */ |
396 | #define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to |
397 | this block */ |
398 | #define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an |
399 | mtr log record group in this log block, |
400 | 0 if none; if the value is the same |
401 | as LOG_BLOCK_HDR_DATA_LEN, it means |
402 | that the first rec group has not yet |
403 | been catenated to this log block, but |
404 | if it will, it will start at this |
405 | offset; an archive recovery can |
406 | start parsing the log records starting |
407 | from this offset in this log block, |
408 | if value not 0 */ |
409 | #define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of |
410 | log_sys.next_checkpoint_no when the |
411 | log block was last written to: if the |
412 | block has not yet been written full, |
413 | this value is only updated before a |
414 | log buffer flush */ |
415 | #define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in |
416 | bytes */ |
417 | |
418 | /* Offsets of a log block trailer from the end of the block */ |
419 | #define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block |
420 | contents; in InnoDB versions |
421 | < 3.23.52 this did not contain the |
422 | checksum but the same value as |
423 | .._HDR_NO */ |
424 | #define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ |
425 | |
426 | /** Offsets inside the checkpoint pages (redo log format version 1) @{ */ |
427 | /** Checkpoint number */ |
428 | #define LOG_CHECKPOINT_NO 0 |
429 | /** Log sequence number up to which all changes have been flushed */ |
430 | #define LOG_CHECKPOINT_LSN 8 |
431 | /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */ |
432 | #define LOG_CHECKPOINT_OFFSET 16 |
433 | /** srv_log_buffer_size at the time of the checkpoint (not used) */ |
434 | #define LOG_CHECKPOINT_LOG_BUF_SIZE 24 |
435 | /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/ |
436 | #define LOG_CHECKPOINT_CRYPT_KEY 32 |
437 | /** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */ |
438 | #define LOG_CHECKPOINT_CRYPT_NONCE 36 |
439 | /** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */ |
440 | #define LOG_CHECKPOINT_CRYPT_MESSAGE 40 |
441 | /** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding |
442 | to this checkpoint, or 0 if the information has not been written */ |
443 | #define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16 |
444 | |
445 | /* @} */ |
446 | |
447 | /** Offsets of a log file header */ |
448 | /* @{ */ |
449 | /** Log file header format identifier (32-bit unsigned big-endian integer). |
450 | This used to be called LOG_GROUP_ID and always written as 0, |
451 | because InnoDB never supported more than one copy of the redo log. */ |
452 | #define 0 |
453 | /** 4 unused (zero-initialized) bytes. In format version 0, the |
454 | LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN, |
455 | which the LOG_FILE_START_LSN was renamed to. */ |
456 | #define 4 |
457 | /** LSN of the start of data in this log file (with format version 1; |
458 | in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */ |
459 | #define 8 |
460 | /** A null-terminated string which will contain either the string 'ibbackup' |
461 | and the creation time if the log file was created by mysqlbackup --restore, |
462 | or the MySQL version that created the redo log file. */ |
463 | #define 16 |
464 | /** End of the log file creator field. */ |
465 | #define (LOG_HEADER_CREATOR + 32) |
466 | /** Contents of the LOG_HEADER_CREATOR field */ |
467 | #define \ |
468 | "MariaDB " \ |
469 | IB_TO_STR(MYSQL_VERSION_MAJOR) "." \ |
470 | IB_TO_STR(MYSQL_VERSION_MINOR) "." \ |
471 | IB_TO_STR(MYSQL_VERSION_PATCH) |
472 | |
473 | /** The original (not version-tagged) InnoDB redo log format */ |
474 | #define 0 |
475 | /** The MySQL 5.7.9/MariaDB 10.2.2 log format */ |
476 | #define 1 |
477 | /** The MariaDB 10.3.2 log format */ |
478 | #define 103 |
479 | /** The redo log format identifier corresponding to the current format version. |
480 | Stored in LOG_HEADER_FORMAT. */ |
481 | #define LOG_HEADER_FORMAT_10_3 |
482 | /** Encrypted MariaDB redo log */ |
483 | #define (1U<<31) |
484 | |
485 | /* @} */ |
486 | |
487 | #define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE |
488 | /* first checkpoint field in the log |
489 | header; we write alternately to the |
490 | checkpoint fields when we make new |
491 | checkpoints; this field is only defined |
492 | in the first log file of a log group */ |
493 | #define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE) |
494 | /* second checkpoint field in the log |
495 | header */ |
496 | #define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE) |
497 | |
498 | /** The state of a log group */ |
499 | enum log_group_state_t { |
500 | /** No corruption detected */ |
501 | LOG_GROUP_OK, |
502 | /** Corrupted */ |
503 | LOG_GROUP_CORRUPTED |
504 | }; |
505 | |
506 | typedef ib_mutex_t LogSysMutex; |
507 | typedef ib_mutex_t FlushOrderMutex; |
508 | |
509 | /** Redo log buffer */ |
510 | struct log_t{ |
511 | MY_ALIGNED(CACHE_LINE_SIZE) |
512 | lsn_t lsn; /*!< log sequence number */ |
513 | ulong buf_free; /*!< first free offset within the log |
514 | buffer in use */ |
515 | |
516 | MY_ALIGNED(CACHE_LINE_SIZE) |
517 | LogSysMutex mutex; /*!< mutex protecting the log */ |
518 | MY_ALIGNED(CACHE_LINE_SIZE) |
519 | LogSysMutex write_mutex; /*!< mutex protecting writing to log */ |
520 | MY_ALIGNED(CACHE_LINE_SIZE) |
521 | FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to |
522 | the flush list when we are putting |
523 | dirty blocks in the list. The idea |
524 | behind this mutex is to be able |
525 | to release log_sys.mutex during |
526 | mtr_commit and still ensure that |
527 | insertions in the flush_list happen |
528 | in the LSN order. */ |
529 | byte* buf; /*!< Memory of double the |
530 | srv_log_buffer_size is |
531 | allocated here. This pointer will change |
532 | however to either the first half or the |
533 | second half in turns, so that log |
534 | write/flush to disk don't block |
535 | concurrent mtrs which will write |
536 | log to this buffer. Care to switch back |
537 | to the first half before freeing/resizing |
538 | must be undertaken. */ |
539 | bool first_in_use; /*!< true if buf points to the first |
540 | half of the aligned(buf_ptr), false |
541 | if the second half */ |
542 | ulong max_buf_free; /*!< recommended maximum value of |
543 | buf_free for the buffer in use, after |
544 | which the buffer is flushed */ |
545 | bool check_flush_or_checkpoint; |
546 | /*!< this is set when there may |
547 | be need to flush the log buffer, or |
548 | preflush buffer pool pages, or make |
549 | a checkpoint; this MUST be TRUE when |
550 | lsn - last_checkpoint_lsn > |
551 | max_checkpoint_age; this flag is |
552 | peeked at by log_free_check(), which |
553 | does not reserve the log mutex */ |
554 | |
555 | /** Log files. Protected by mutex or write_mutex. */ |
556 | struct files { |
557 | /** number of files */ |
558 | ulint n_files; |
559 | /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */ |
560 | ulint format; |
561 | /** individual log file size in bytes, including the header */ |
562 | lsn_t file_size; |
563 | /** corruption status */ |
564 | log_group_state_t state; |
565 | /** lsn used to fix coordinates within the log group */ |
566 | lsn_t lsn; |
567 | /** the byte offset of the above lsn */ |
568 | lsn_t lsn_offset; |
569 | |
570 | /** unaligned buffers */ |
571 | byte* ; |
572 | /** buffers for each file header in the group */ |
573 | byte* [SRV_N_LOG_FILES_MAX]; |
574 | |
575 | /** used only in recovery: recovery scan succeeded up to this |
576 | lsn in this log group */ |
577 | lsn_t scanned_lsn; |
578 | |
579 | /** @return whether the redo log is encrypted */ |
580 | bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; } |
581 | /** @return capacity in bytes */ |
582 | lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; } |
583 | /** Calculate the offset of a log sequence number. |
584 | @param[in] lsn log sequence number |
585 | @return offset within the log */ |
586 | inline lsn_t calc_lsn_offset(lsn_t lsn) const; |
587 | |
588 | /** Set the field values to correspond to a given lsn. */ |
589 | void set_fields(lsn_t lsn) |
590 | { |
591 | lsn_offset = calc_lsn_offset(lsn); |
592 | this->lsn = lsn; |
593 | } |
594 | |
595 | /** Read a log segment to log_sys.buf. |
596 | @param[in,out] start_lsn in: read area start, |
597 | out: the last read valid lsn |
598 | @param[in] end_lsn read area end |
599 | @return whether no invalid blocks (e.g checksum mismatch) were found */ |
600 | bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn); |
601 | |
602 | /** Initialize the redo log buffer. |
603 | @param[in] n_files number of files */ |
604 | void create(ulint n_files); |
605 | |
606 | /** Close the redo log buffer. */ |
607 | void close() |
608 | { |
609 | ut_free(file_header_bufs_ptr); |
610 | n_files = 0; |
611 | file_header_bufs_ptr = NULL; |
612 | memset(file_header_bufs, 0, sizeof file_header_bufs); |
613 | } |
614 | } log; |
615 | |
616 | /** The fields involved in the log buffer flush @{ */ |
617 | |
618 | ulong buf_next_to_write;/*!< first offset in the log buffer |
619 | where the byte content may not exist |
620 | written to file, e.g., the start |
621 | offset of a log record catenated |
622 | later; this is advanced when a flush |
623 | operation is completed to all the log |
624 | groups */ |
625 | volatile bool is_extending; /*!< this is set to true during extend |
626 | the log buffer size */ |
627 | lsn_t write_lsn; /*!< last written lsn */ |
628 | lsn_t current_flush_lsn;/*!< end lsn for the current running |
629 | write + flush operation */ |
630 | lsn_t flushed_to_disk_lsn; |
631 | /*!< how far we have written the log |
632 | AND flushed to disk */ |
633 | ulint n_pending_flushes;/*!< number of currently |
634 | pending flushes; protected by |
635 | log_sys.mutex */ |
636 | os_event_t flush_event; /*!< this event is in the reset state |
637 | when a flush is running; |
638 | os_event_set() and os_event_reset() |
639 | are protected by log_sys.mutex */ |
640 | ulint n_log_ios; /*!< number of log i/os initiated thus |
641 | far */ |
642 | ulint n_log_ios_old; /*!< number of log i/o's at the |
643 | previous printout */ |
644 | time_t last_printout_time;/*!< when log_print was last time |
645 | called */ |
646 | /* @} */ |
647 | |
648 | /** Fields involved in checkpoints @{ */ |
649 | lsn_t log_group_capacity; /*!< capacity of the log group; if |
650 | the checkpoint age exceeds this, it is |
651 | a serious error because it is possible |
652 | we will then overwrite log and spoil |
653 | crash recovery */ |
654 | lsn_t max_modified_age_async; |
655 | /*!< when this recommended |
656 | value for lsn - |
657 | buf_pool_get_oldest_modification() |
658 | is exceeded, we start an |
659 | asynchronous preflush of pool pages */ |
660 | lsn_t max_modified_age_sync; |
661 | /*!< when this recommended |
662 | value for lsn - |
663 | buf_pool_get_oldest_modification() |
664 | is exceeded, we start a |
665 | synchronous preflush of pool pages */ |
666 | lsn_t max_checkpoint_age_async; |
667 | /*!< when this checkpoint age |
668 | is exceeded we start an |
669 | asynchronous writing of a new |
670 | checkpoint */ |
671 | lsn_t max_checkpoint_age; |
672 | /*!< this is the maximum allowed value |
673 | for lsn - last_checkpoint_lsn when a |
674 | new query step is started */ |
675 | ib_uint64_t next_checkpoint_no; |
676 | /*!< next checkpoint number */ |
677 | lsn_t last_checkpoint_lsn; |
678 | /*!< latest checkpoint lsn */ |
679 | lsn_t next_checkpoint_lsn; |
680 | /*!< next checkpoint lsn */ |
681 | mtr_buf_t* append_on_checkpoint; |
682 | /*!< extra redo log records to write |
683 | during a checkpoint, or NULL if none. |
684 | The pointer is protected by |
685 | log_sys.mutex, and the data must |
686 | remain constant as long as this |
687 | pointer is not NULL. */ |
688 | ulint n_pending_checkpoint_writes; |
689 | /*!< number of currently pending |
690 | checkpoint writes */ |
691 | rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a |
692 | checkpoint write is running; a thread |
693 | should wait for this without owning |
694 | the log mutex */ |
695 | |
696 | /** buffer for checkpoint header */ |
697 | MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) |
698 | byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE]; |
699 | /* @} */ |
700 | |
701 | private: |
702 | bool m_initialised; |
703 | public: |
704 | /** |
705 | Constructor. |
706 | |
707 | Some members may require late initialisation, thus we just mark object as |
708 | uninitialised. Real initialisation happens in create(). |
709 | */ |
710 | log_t(): m_initialised(false) {} |
711 | |
712 | /** @return whether the redo log is encrypted */ |
713 | bool is_encrypted() const { return(log.is_encrypted()); } |
714 | |
715 | bool is_initialised() { return m_initialised; } |
716 | |
717 | /** Complete an asynchronous checkpoint write. */ |
718 | void complete_checkpoint(); |
719 | |
720 | /** Initialise the redo log subsystem. */ |
721 | void create(); |
722 | |
723 | /** Shut down the redo log subsystem. */ |
724 | void close(); |
725 | }; |
726 | |
727 | /** Redo log system */ |
728 | extern log_t log_sys; |
729 | |
730 | /** Calculate the offset of a log sequence number. |
731 | @param[in] lsn log sequence number |
732 | @return offset within the log */ |
733 | inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const |
734 | { |
735 | ut_ad(this == &log_sys.log); |
736 | /* The lsn parameters are updated while holding both the mutexes |
737 | and it is ok to have either of them while reading */ |
738 | ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned()); |
739 | const lsn_t group_size= capacity(); |
740 | lsn_t l= lsn - this->lsn; |
741 | if (longlong(l) < 0) { |
742 | l= lsn_t(-longlong(l)) % group_size; |
743 | l= group_size - l; |
744 | } |
745 | |
746 | l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size); |
747 | l%= group_size; |
748 | return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE)); |
749 | } |
750 | |
751 | /** Test if flush order mutex is owned. */ |
752 | #define log_flush_order_mutex_own() \ |
753 | mutex_own(&log_sys.log_flush_order_mutex) |
754 | |
755 | /** Acquire the flush order mutex. */ |
756 | #define log_flush_order_mutex_enter() do { \ |
757 | mutex_enter(&log_sys.log_flush_order_mutex); \ |
758 | } while (0) |
759 | /** Release the flush order mutex. */ |
760 | # define log_flush_order_mutex_exit() do { \ |
761 | mutex_exit(&log_sys.log_flush_order_mutex); \ |
762 | } while (0) |
763 | |
764 | /** Test if log sys mutex is owned. */ |
765 | #define log_mutex_own() mutex_own(&log_sys.mutex) |
766 | |
767 | /** Test if log sys write mutex is owned. */ |
768 | #define log_write_mutex_own() mutex_own(&log_sys.write_mutex) |
769 | |
770 | /** Acquire the log sys mutex. */ |
771 | #define log_mutex_enter() mutex_enter(&log_sys.mutex) |
772 | |
773 | /** Acquire the log sys write mutex. */ |
774 | #define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex) |
775 | |
776 | /** Acquire all the log sys mutexes. */ |
777 | #define log_mutex_enter_all() do { \ |
778 | mutex_enter(&log_sys.write_mutex); \ |
779 | mutex_enter(&log_sys.mutex); \ |
780 | } while (0) |
781 | |
782 | /** Release the log sys mutex. */ |
783 | #define log_mutex_exit() mutex_exit(&log_sys.mutex) |
784 | |
785 | /** Release the log sys write mutex.*/ |
786 | #define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex) |
787 | |
788 | /** Release all the log sys mutexes. */ |
789 | #define log_mutex_exit_all() do { \ |
790 | mutex_exit(&log_sys.mutex); \ |
791 | mutex_exit(&log_sys.write_mutex); \ |
792 | } while (0) |
793 | |
794 | /* log scrubbing speed, in bytes/sec */ |
795 | extern ulonglong innodb_scrub_log_speed; |
796 | |
797 | /** Event to wake up log_scrub_thread */ |
798 | extern os_event_t log_scrub_event; |
799 | /** Whether log_scrub_thread is active */ |
800 | extern bool log_scrub_thread_active; |
801 | |
802 | #include "log0log.ic" |
803 | |
804 | #endif |
805 | |