| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. |
| 4 | Copyright (c) 2009, Google Inc. |
| 5 | Copyright (c) 2017, 2018, MariaDB Corporation. |
| 6 | |
| 7 | Portions of this file contain modifications contributed and copyrighted by |
| 8 | Google, Inc. Those modifications are gratefully acknowledged and are described |
| 9 | briefly in the InnoDB documentation. The contributions by Google are |
| 10 | incorporated with their permission, and subject to the conditions contained in |
| 11 | the file COPYING.Google. |
| 12 | |
| 13 | This program is free software; you can redistribute it and/or modify it under |
| 14 | the terms of the GNU General Public License as published by the Free Software |
| 15 | Foundation; version 2 of the License. |
| 16 | |
| 17 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 18 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 19 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 20 | |
| 21 | You should have received a copy of the GNU General Public License along with |
| 22 | this program; if not, write to the Free Software Foundation, Inc., |
| 23 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 24 | |
| 25 | *****************************************************************************/ |
| 26 | |
| 27 | /**************************************************//** |
| 28 | @file include/log0log.h |
| 29 | Database log |
| 30 | |
| 31 | Created 12/9/1995 Heikki Tuuri |
| 32 | *******************************************************/ |
| 33 | |
| 34 | #ifndef log0log_h |
| 35 | #define log0log_h |
| 36 | |
| 37 | #include "univ.i" |
| 38 | #include "dyn0buf.h" |
| 39 | #include "sync0rw.h" |
| 40 | #include "log0types.h" |
| 41 | #include "os0event.h" |
| 42 | #include "os0file.h" |
| 43 | |
| 44 | /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */ |
| 45 | #define SRV_N_LOG_FILES_MAX 100 |
| 46 | |
| 47 | /** Magic value to use instead of log checksums when they are disabled */ |
| 48 | #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL |
| 49 | |
| 50 | /* Margin for the free space in the smallest log group, before a new query |
| 51 | step which modifies the database, is started */ |
| 52 | |
| 53 | #define LOG_CHECKPOINT_FREE_PER_THREAD (4U << srv_page_size_shift) |
| 54 | #define (8U << srv_page_size_shift) |
| 55 | |
| 56 | typedef ulint (*log_checksum_func_t)(const byte* log_block); |
| 57 | |
| 58 | /** Pointer to the log checksum calculation function. Protected with |
| 59 | log_sys.mutex. */ |
| 60 | extern log_checksum_func_t log_checksum_algorithm_ptr; |
| 61 | |
| 62 | /** Append a string to the log. |
| 63 | @param[in] str string |
| 64 | @param[in] len string length |
| 65 | @param[out] start_lsn start LSN of the log record |
| 66 | @return end lsn of the log record, zero if did not succeed */ |
| 67 | UNIV_INLINE |
| 68 | lsn_t |
| 69 | log_reserve_and_write_fast( |
| 70 | const void* str, |
| 71 | ulint len, |
| 72 | lsn_t* start_lsn); |
| 73 | /***********************************************************************//** |
| 74 | Checks if there is need for a log buffer flush or a new checkpoint, and does |
| 75 | this if yes. Any database operation should call this when it has modified |
| 76 | more than about 4 pages. NOTE that this function may only be called when the |
| 77 | OS thread owns no synchronization objects except the dictionary mutex. */ |
| 78 | UNIV_INLINE |
| 79 | void |
| 80 | log_free_check(void); |
| 81 | /*================*/ |
| 82 | |
| 83 | /** Extends the log buffer. |
| 84 | @param[in] len requested minimum size in bytes */ |
| 85 | void log_buffer_extend(ulong len); |
| 86 | |
| 87 | /** Check margin not to overwrite transaction log from the last checkpoint. |
| 88 | If would estimate the log write to exceed the log_group_capacity, |
| 89 | waits for the checkpoint is done enough. |
| 90 | @param[in] len length of the data to be written */ |
| 91 | |
| 92 | void |
| 93 | log_margin_checkpoint_age( |
| 94 | ulint len); |
| 95 | |
| 96 | /** Open the log for log_write_low. The log must be closed with log_close. |
| 97 | @param[in] len length of the data to be written |
| 98 | @return start lsn of the log record */ |
| 99 | lsn_t |
| 100 | log_reserve_and_open( |
| 101 | ulint len); |
| 102 | /************************************************************//** |
| 103 | Writes to the log the string given. It is assumed that the caller holds the |
| 104 | log mutex. */ |
| 105 | void |
| 106 | log_write_low( |
| 107 | /*==========*/ |
| 108 | const byte* str, /*!< in: string */ |
| 109 | ulint str_len); /*!< in: string length */ |
| 110 | /************************************************************//** |
| 111 | Closes the log. |
| 112 | @return lsn */ |
| 113 | lsn_t |
| 114 | log_close(void); |
| 115 | /*===========*/ |
| 116 | /************************************************************//** |
| 117 | Gets the current lsn. |
| 118 | @return current lsn */ |
| 119 | UNIV_INLINE |
| 120 | lsn_t |
| 121 | log_get_lsn(void); |
| 122 | /*=============*/ |
| 123 | /************************************************************//** |
| 124 | Gets the current lsn. |
| 125 | @return current lsn */ |
| 126 | UNIV_INLINE |
| 127 | lsn_t |
| 128 | log_get_lsn_nowait(void); |
| 129 | /*=============*/ |
| 130 | /************************************************************//** |
| 131 | Gets the last lsn that is fully flushed to disk. |
| 132 | @return last flushed lsn */ |
| 133 | UNIV_INLINE |
| 134 | ib_uint64_t |
| 135 | log_get_flush_lsn(void); |
| 136 | /*=============*/ |
| 137 | /**************************************************************** |
| 138 | Gets the log group capacity. It is OK to read the value without |
| 139 | holding log_sys.mutex because it is constant. |
| 140 | @return log group capacity */ |
| 141 | UNIV_INLINE |
| 142 | lsn_t |
| 143 | log_get_capacity(void); |
| 144 | /*==================*/ |
| 145 | /**************************************************************** |
| 146 | Get log_sys::max_modified_age_async. It is OK to read the value without |
| 147 | holding log_sys::mutex because it is constant. |
| 148 | @return max_modified_age_async */ |
| 149 | UNIV_INLINE |
| 150 | lsn_t |
| 151 | log_get_max_modified_age_async(void); |
| 152 | /*================================*/ |
| 153 | |
| 154 | /** Calculate the recommended highest values for lsn - last_checkpoint_lsn |
| 155 | and lsn - buf_get_oldest_modification(). |
| 156 | @param[in] file_size requested innodb_log_file_size |
| 157 | @retval true on success |
| 158 | @retval false if the smallest log group is too small to |
| 159 | accommodate the number of OS threads in the database server */ |
| 160 | bool |
| 161 | log_set_capacity(ulonglong file_size) |
| 162 | MY_ATTRIBUTE((warn_unused_result)); |
| 163 | |
| 164 | /******************************************************//** |
| 165 | This function is called, e.g., when a transaction wants to commit. It checks |
| 166 | that the log has been written to the log file up to the last log entry written |
| 167 | by the transaction. If there is a flush running, it waits and checks if the |
| 168 | flush flushed enough. If not, starts a new flush. */ |
| 169 | void |
| 170 | log_write_up_to( |
| 171 | /*============*/ |
| 172 | lsn_t lsn, /*!< in: log sequence number up to which |
| 173 | the log should be written, LSN_MAX if not specified */ |
| 174 | bool flush_to_disk); |
| 175 | /*!< in: true if we want the written log |
| 176 | also to be flushed to disk */ |
| 177 | /** write to the log file up to the last log entry. |
| 178 | @param[in] sync whether we want the written log |
| 179 | also to be flushed to disk. */ |
| 180 | void |
| 181 | log_buffer_flush_to_disk( |
| 182 | bool sync = true); |
| 183 | /****************************************************************//** |
| 184 | This functions writes the log buffer to the log file and if 'flush' |
| 185 | is set it forces a flush of the log file as well. This is meant to be |
| 186 | called from background master thread only as it does not wait for |
| 187 | the write (+ possible flush) to finish. */ |
| 188 | void |
| 189 | log_buffer_sync_in_background( |
| 190 | /*==========================*/ |
| 191 | bool flush); /*<! in: flush the logs to disk */ |
| 192 | /** Make a checkpoint. Note that this function does not flush dirty |
| 193 | blocks from the buffer pool: it only checks what is lsn of the oldest |
| 194 | modification in the pool, and writes information about the lsn in |
| 195 | log files. Use log_make_checkpoint_at() to flush also the pool. |
| 196 | @param[in] sync whether to wait for the write to complete |
| 197 | @param[in] write_always force a write even if no log |
| 198 | has been generated since the latest checkpoint |
| 199 | @return true if success, false if a checkpoint write was already running */ |
| 200 | bool |
| 201 | log_checkpoint( |
| 202 | bool sync, |
| 203 | bool write_always); |
| 204 | |
| 205 | /** Make a checkpoint at or after a specified LSN. |
| 206 | @param[in] lsn the log sequence number, or LSN_MAX |
| 207 | for the latest LSN |
| 208 | @param[in] write_always force a write even if no log |
| 209 | has been generated since the latest checkpoint */ |
| 210 | void |
| 211 | log_make_checkpoint_at( |
| 212 | lsn_t lsn, |
| 213 | bool write_always); |
| 214 | |
| 215 | /****************************************************************//** |
| 216 | Makes a checkpoint at the latest lsn and writes it to first page of each |
| 217 | data file in the database, so that we know that the file spaces contain |
| 218 | all modifications up to that lsn. This can only be called at database |
| 219 | shutdown. This function also writes all log in log files to the log archive. */ |
| 220 | void |
| 221 | logs_empty_and_mark_files_at_shutdown(void); |
| 222 | /*=======================================*/ |
| 223 | /** Read a log group header page to log_sys.checkpoint_buf. |
| 224 | @param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */ |
| 225 | void (ulint ); |
| 226 | /** Write checkpoint info to the log header and invoke log_mutex_exit(). |
| 227 | @param[in] sync whether to wait for the write to complete |
| 228 | @param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */ |
| 229 | void |
| 230 | log_write_checkpoint_info(bool sync, lsn_t end_lsn); |
| 231 | |
| 232 | /** Set extra data to be written to the redo log during checkpoint. |
| 233 | @param[in] buf data to be appended on checkpoint, or NULL |
| 234 | @return pointer to previous data to be appended on checkpoint */ |
| 235 | mtr_buf_t* |
| 236 | log_append_on_checkpoint( |
| 237 | mtr_buf_t* buf); |
| 238 | /** |
| 239 | Checks that there is enough free space in the log to start a new query step. |
| 240 | Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this |
| 241 | function may only be called if the calling thread owns no synchronization |
| 242 | objects! */ |
| 243 | void |
| 244 | log_check_margins(void); |
| 245 | |
| 246 | /************************************************************//** |
| 247 | Gets a log block flush bit. |
| 248 | @return TRUE if this block was the first to be written in a log flush */ |
| 249 | UNIV_INLINE |
| 250 | ibool |
| 251 | log_block_get_flush_bit( |
| 252 | /*====================*/ |
| 253 | const byte* log_block); /*!< in: log block */ |
| 254 | /************************************************************//** |
| 255 | Gets a log block number stored in the header. |
| 256 | @return log block number stored in the block header */ |
| 257 | UNIV_INLINE |
| 258 | ulint |
| 259 | log_block_get_hdr_no( |
| 260 | /*=================*/ |
| 261 | const byte* log_block); /*!< in: log block */ |
| 262 | /************************************************************//** |
| 263 | Gets a log block data length. |
| 264 | @return log block data length measured as a byte offset from the block start */ |
| 265 | UNIV_INLINE |
| 266 | ulint |
| 267 | log_block_get_data_len( |
| 268 | /*===================*/ |
| 269 | const byte* log_block); /*!< in: log block */ |
| 270 | /************************************************************//** |
| 271 | Sets the log block data length. */ |
| 272 | UNIV_INLINE |
| 273 | void |
| 274 | log_block_set_data_len( |
| 275 | /*===================*/ |
| 276 | byte* log_block, /*!< in/out: log block */ |
| 277 | ulint len); /*!< in: data length */ |
| 278 | /************************************************************//** |
| 279 | Calculates the checksum for a log block. |
| 280 | @return checksum */ |
| 281 | UNIV_INLINE |
| 282 | ulint |
| 283 | log_block_calc_checksum( |
| 284 | /*====================*/ |
| 285 | const byte* block); /*!< in: log block */ |
| 286 | |
| 287 | /** Calculates the checksum for a log block using the CRC32 algorithm. |
| 288 | @param[in] block log block |
| 289 | @return checksum */ |
| 290 | UNIV_INLINE |
| 291 | ulint |
| 292 | log_block_calc_checksum_crc32( |
| 293 | const byte* block); |
| 294 | |
| 295 | /** Calculates the checksum for a log block using the "no-op" algorithm. |
| 296 | @return the calculated checksum value */ |
| 297 | UNIV_INLINE |
| 298 | ulint |
| 299 | log_block_calc_checksum_none(const byte*); |
| 300 | |
| 301 | /************************************************************//** |
| 302 | Gets a log block checksum field value. |
| 303 | @return checksum */ |
| 304 | UNIV_INLINE |
| 305 | ulint |
| 306 | log_block_get_checksum( |
| 307 | /*===================*/ |
| 308 | const byte* log_block); /*!< in: log block */ |
| 309 | /************************************************************//** |
| 310 | Sets a log block checksum field value. */ |
| 311 | UNIV_INLINE |
| 312 | void |
| 313 | log_block_set_checksum( |
| 314 | /*===================*/ |
| 315 | byte* log_block, /*!< in/out: log block */ |
| 316 | ulint checksum); /*!< in: checksum */ |
| 317 | /************************************************************//** |
| 318 | Gets a log block first mtr log record group offset. |
| 319 | @return first mtr log record group byte offset from the block start, 0 |
| 320 | if none */ |
| 321 | UNIV_INLINE |
| 322 | ulint |
| 323 | log_block_get_first_rec_group( |
| 324 | /*==========================*/ |
| 325 | const byte* log_block); /*!< in: log block */ |
| 326 | /************************************************************//** |
| 327 | Sets the log block first mtr log record group offset. */ |
| 328 | UNIV_INLINE |
| 329 | void |
| 330 | log_block_set_first_rec_group( |
| 331 | /*==========================*/ |
| 332 | byte* log_block, /*!< in/out: log block */ |
| 333 | ulint offset); /*!< in: offset, 0 if none */ |
| 334 | /************************************************************//** |
| 335 | Gets a log block checkpoint number field (4 lowest bytes). |
| 336 | @return checkpoint no (4 lowest bytes) */ |
| 337 | UNIV_INLINE |
| 338 | ulint |
| 339 | log_block_get_checkpoint_no( |
| 340 | /*========================*/ |
| 341 | const byte* log_block); /*!< in: log block */ |
| 342 | /************************************************************//** |
| 343 | Initializes a log block in the log buffer. */ |
| 344 | UNIV_INLINE |
| 345 | void |
| 346 | log_block_init( |
| 347 | /*===========*/ |
| 348 | byte* log_block, /*!< in: pointer to the log buffer */ |
| 349 | lsn_t lsn); /*!< in: lsn within the log block */ |
| 350 | /************************************************************//** |
| 351 | Converts a lsn to a log block number. |
| 352 | @return log block number, it is > 0 and <= 1G */ |
| 353 | UNIV_INLINE |
| 354 | ulint |
| 355 | log_block_convert_lsn_to_no( |
| 356 | /*========================*/ |
| 357 | lsn_t lsn); /*!< in: lsn of a byte within the block */ |
| 358 | /******************************************************//** |
| 359 | Prints info of the log. */ |
| 360 | void |
| 361 | log_print( |
| 362 | /*======*/ |
| 363 | FILE* file); /*!< in: file where to print */ |
| 364 | /******************************************************//** |
| 365 | Peeks the current lsn. |
| 366 | @return TRUE if success, FALSE if could not get the log system mutex */ |
| 367 | ibool |
| 368 | log_peek_lsn( |
| 369 | /*=========*/ |
| 370 | lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */ |
| 371 | /**********************************************************************//** |
| 372 | Refreshes the statistics used to print per-second averages. */ |
| 373 | void |
| 374 | log_refresh_stats(void); |
| 375 | /*===================*/ |
| 376 | |
| 377 | /** Whether to generate and require checksums on the redo log pages */ |
| 378 | extern my_bool innodb_log_checksums; |
| 379 | |
| 380 | /* Values used as flags */ |
| 381 | #define LOG_FLUSH 7652559 |
| 382 | #define LOG_CHECKPOINT 78656949 |
| 383 | |
| 384 | /* The counting of lsn's starts from this value: this must be non-zero */ |
| 385 | #define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) |
| 386 | |
| 387 | /* Offsets of a log block header */ |
| 388 | #define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and |
| 389 | is allowed to wrap around at 2G; the |
| 390 | highest bit is set to 1 if this is the |
| 391 | first log block in a log flush write |
| 392 | segment */ |
| 393 | #define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL |
| 394 | /* mask used to get the highest bit in |
| 395 | the preceding field */ |
| 396 | #define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to |
| 397 | this block */ |
| 398 | #define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an |
| 399 | mtr log record group in this log block, |
| 400 | 0 if none; if the value is the same |
| 401 | as LOG_BLOCK_HDR_DATA_LEN, it means |
| 402 | that the first rec group has not yet |
| 403 | been catenated to this log block, but |
| 404 | if it will, it will start at this |
| 405 | offset; an archive recovery can |
| 406 | start parsing the log records starting |
| 407 | from this offset in this log block, |
| 408 | if value not 0 */ |
| 409 | #define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of |
| 410 | log_sys.next_checkpoint_no when the |
| 411 | log block was last written to: if the |
| 412 | block has not yet been written full, |
| 413 | this value is only updated before a |
| 414 | log buffer flush */ |
| 415 | #define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in |
| 416 | bytes */ |
| 417 | |
| 418 | /* Offsets of a log block trailer from the end of the block */ |
| 419 | #define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block |
| 420 | contents; in InnoDB versions |
| 421 | < 3.23.52 this did not contain the |
| 422 | checksum but the same value as |
| 423 | .._HDR_NO */ |
| 424 | #define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ |
| 425 | |
| 426 | /** Offsets inside the checkpoint pages (redo log format version 1) @{ */ |
| 427 | /** Checkpoint number */ |
| 428 | #define LOG_CHECKPOINT_NO 0 |
| 429 | /** Log sequence number up to which all changes have been flushed */ |
| 430 | #define LOG_CHECKPOINT_LSN 8 |
| 431 | /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */ |
| 432 | #define LOG_CHECKPOINT_OFFSET 16 |
| 433 | /** srv_log_buffer_size at the time of the checkpoint (not used) */ |
| 434 | #define LOG_CHECKPOINT_LOG_BUF_SIZE 24 |
| 435 | /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/ |
| 436 | #define LOG_CHECKPOINT_CRYPT_KEY 32 |
| 437 | /** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */ |
| 438 | #define LOG_CHECKPOINT_CRYPT_NONCE 36 |
| 439 | /** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */ |
| 440 | #define LOG_CHECKPOINT_CRYPT_MESSAGE 40 |
| 441 | /** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding |
| 442 | to this checkpoint, or 0 if the information has not been written */ |
| 443 | #define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16 |
| 444 | |
| 445 | /* @} */ |
| 446 | |
| 447 | /** Offsets of a log file header */ |
| 448 | /* @{ */ |
| 449 | /** Log file header format identifier (32-bit unsigned big-endian integer). |
| 450 | This used to be called LOG_GROUP_ID and always written as 0, |
| 451 | because InnoDB never supported more than one copy of the redo log. */ |
| 452 | #define 0 |
| 453 | /** 4 unused (zero-initialized) bytes. In format version 0, the |
| 454 | LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN, |
| 455 | which the LOG_FILE_START_LSN was renamed to. */ |
| 456 | #define 4 |
| 457 | /** LSN of the start of data in this log file (with format version 1; |
| 458 | in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */ |
| 459 | #define 8 |
| 460 | /** A null-terminated string which will contain either the string 'ibbackup' |
| 461 | and the creation time if the log file was created by mysqlbackup --restore, |
| 462 | or the MySQL version that created the redo log file. */ |
| 463 | #define 16 |
| 464 | /** End of the log file creator field. */ |
| 465 | #define (LOG_HEADER_CREATOR + 32) |
| 466 | /** Contents of the LOG_HEADER_CREATOR field */ |
| 467 | #define \ |
| 468 | "MariaDB " \ |
| 469 | IB_TO_STR(MYSQL_VERSION_MAJOR) "." \ |
| 470 | IB_TO_STR(MYSQL_VERSION_MINOR) "." \ |
| 471 | IB_TO_STR(MYSQL_VERSION_PATCH) |
| 472 | |
| 473 | /** The original (not version-tagged) InnoDB redo log format */ |
| 474 | #define 0 |
| 475 | /** The MySQL 5.7.9/MariaDB 10.2.2 log format */ |
| 476 | #define 1 |
| 477 | /** The MariaDB 10.3.2 log format */ |
| 478 | #define 103 |
| 479 | /** The redo log format identifier corresponding to the current format version. |
| 480 | Stored in LOG_HEADER_FORMAT. */ |
| 481 | #define LOG_HEADER_FORMAT_10_3 |
| 482 | /** Encrypted MariaDB redo log */ |
| 483 | #define (1U<<31) |
| 484 | |
| 485 | /* @} */ |
| 486 | |
| 487 | #define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE |
| 488 | /* first checkpoint field in the log |
| 489 | header; we write alternately to the |
| 490 | checkpoint fields when we make new |
| 491 | checkpoints; this field is only defined |
| 492 | in the first log file of a log group */ |
| 493 | #define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE) |
| 494 | /* second checkpoint field in the log |
| 495 | header */ |
| 496 | #define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE) |
| 497 | |
| 498 | /** The state of a log group */ |
| 499 | enum log_group_state_t { |
| 500 | /** No corruption detected */ |
| 501 | LOG_GROUP_OK, |
| 502 | /** Corrupted */ |
| 503 | LOG_GROUP_CORRUPTED |
| 504 | }; |
| 505 | |
| 506 | typedef ib_mutex_t LogSysMutex; |
| 507 | typedef ib_mutex_t FlushOrderMutex; |
| 508 | |
| 509 | /** Redo log buffer */ |
| 510 | struct log_t{ |
| 511 | MY_ALIGNED(CACHE_LINE_SIZE) |
| 512 | lsn_t lsn; /*!< log sequence number */ |
| 513 | ulong buf_free; /*!< first free offset within the log |
| 514 | buffer in use */ |
| 515 | |
| 516 | MY_ALIGNED(CACHE_LINE_SIZE) |
| 517 | LogSysMutex mutex; /*!< mutex protecting the log */ |
| 518 | MY_ALIGNED(CACHE_LINE_SIZE) |
| 519 | LogSysMutex write_mutex; /*!< mutex protecting writing to log */ |
| 520 | MY_ALIGNED(CACHE_LINE_SIZE) |
| 521 | FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to |
| 522 | the flush list when we are putting |
| 523 | dirty blocks in the list. The idea |
| 524 | behind this mutex is to be able |
| 525 | to release log_sys.mutex during |
| 526 | mtr_commit and still ensure that |
| 527 | insertions in the flush_list happen |
| 528 | in the LSN order. */ |
| 529 | byte* buf; /*!< Memory of double the |
| 530 | srv_log_buffer_size is |
| 531 | allocated here. This pointer will change |
| 532 | however to either the first half or the |
| 533 | second half in turns, so that log |
| 534 | write/flush to disk don't block |
| 535 | concurrent mtrs which will write |
| 536 | log to this buffer. Care to switch back |
| 537 | to the first half before freeing/resizing |
| 538 | must be undertaken. */ |
| 539 | bool first_in_use; /*!< true if buf points to the first |
| 540 | half of the aligned(buf_ptr), false |
| 541 | if the second half */ |
| 542 | ulong max_buf_free; /*!< recommended maximum value of |
| 543 | buf_free for the buffer in use, after |
| 544 | which the buffer is flushed */ |
| 545 | bool check_flush_or_checkpoint; |
| 546 | /*!< this is set when there may |
| 547 | be need to flush the log buffer, or |
| 548 | preflush buffer pool pages, or make |
| 549 | a checkpoint; this MUST be TRUE when |
| 550 | lsn - last_checkpoint_lsn > |
| 551 | max_checkpoint_age; this flag is |
| 552 | peeked at by log_free_check(), which |
| 553 | does not reserve the log mutex */ |
| 554 | |
| 555 | /** Log files. Protected by mutex or write_mutex. */ |
| 556 | struct files { |
| 557 | /** number of files */ |
| 558 | ulint n_files; |
| 559 | /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */ |
| 560 | ulint format; |
| 561 | /** individual log file size in bytes, including the header */ |
| 562 | lsn_t file_size; |
| 563 | /** corruption status */ |
| 564 | log_group_state_t state; |
| 565 | /** lsn used to fix coordinates within the log group */ |
| 566 | lsn_t lsn; |
| 567 | /** the byte offset of the above lsn */ |
| 568 | lsn_t lsn_offset; |
| 569 | |
| 570 | /** unaligned buffers */ |
| 571 | byte* ; |
| 572 | /** buffers for each file header in the group */ |
| 573 | byte* [SRV_N_LOG_FILES_MAX]; |
| 574 | |
| 575 | /** used only in recovery: recovery scan succeeded up to this |
| 576 | lsn in this log group */ |
| 577 | lsn_t scanned_lsn; |
| 578 | |
| 579 | /** @return whether the redo log is encrypted */ |
| 580 | bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; } |
| 581 | /** @return capacity in bytes */ |
| 582 | lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; } |
| 583 | /** Calculate the offset of a log sequence number. |
| 584 | @param[in] lsn log sequence number |
| 585 | @return offset within the log */ |
| 586 | inline lsn_t calc_lsn_offset(lsn_t lsn) const; |
| 587 | |
| 588 | /** Set the field values to correspond to a given lsn. */ |
| 589 | void set_fields(lsn_t lsn) |
| 590 | { |
| 591 | lsn_offset = calc_lsn_offset(lsn); |
| 592 | this->lsn = lsn; |
| 593 | } |
| 594 | |
| 595 | /** Read a log segment to log_sys.buf. |
| 596 | @param[in,out] start_lsn in: read area start, |
| 597 | out: the last read valid lsn |
| 598 | @param[in] end_lsn read area end |
| 599 | @return whether no invalid blocks (e.g checksum mismatch) were found */ |
| 600 | bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn); |
| 601 | |
| 602 | /** Initialize the redo log buffer. |
| 603 | @param[in] n_files number of files */ |
| 604 | void create(ulint n_files); |
| 605 | |
| 606 | /** Close the redo log buffer. */ |
| 607 | void close() |
| 608 | { |
| 609 | ut_free(file_header_bufs_ptr); |
| 610 | n_files = 0; |
| 611 | file_header_bufs_ptr = NULL; |
| 612 | memset(file_header_bufs, 0, sizeof file_header_bufs); |
| 613 | } |
| 614 | } log; |
| 615 | |
| 616 | /** The fields involved in the log buffer flush @{ */ |
| 617 | |
| 618 | ulong buf_next_to_write;/*!< first offset in the log buffer |
| 619 | where the byte content may not exist |
| 620 | written to file, e.g., the start |
| 621 | offset of a log record catenated |
| 622 | later; this is advanced when a flush |
| 623 | operation is completed to all the log |
| 624 | groups */ |
| 625 | volatile bool is_extending; /*!< this is set to true during extend |
| 626 | the log buffer size */ |
| 627 | lsn_t write_lsn; /*!< last written lsn */ |
| 628 | lsn_t current_flush_lsn;/*!< end lsn for the current running |
| 629 | write + flush operation */ |
| 630 | lsn_t flushed_to_disk_lsn; |
| 631 | /*!< how far we have written the log |
| 632 | AND flushed to disk */ |
| 633 | ulint n_pending_flushes;/*!< number of currently |
| 634 | pending flushes; protected by |
| 635 | log_sys.mutex */ |
| 636 | os_event_t flush_event; /*!< this event is in the reset state |
| 637 | when a flush is running; |
| 638 | os_event_set() and os_event_reset() |
| 639 | are protected by log_sys.mutex */ |
| 640 | ulint n_log_ios; /*!< number of log i/os initiated thus |
| 641 | far */ |
| 642 | ulint n_log_ios_old; /*!< number of log i/o's at the |
| 643 | previous printout */ |
| 644 | time_t last_printout_time;/*!< when log_print was last time |
| 645 | called */ |
| 646 | /* @} */ |
| 647 | |
| 648 | /** Fields involved in checkpoints @{ */ |
| 649 | lsn_t log_group_capacity; /*!< capacity of the log group; if |
| 650 | the checkpoint age exceeds this, it is |
| 651 | a serious error because it is possible |
| 652 | we will then overwrite log and spoil |
| 653 | crash recovery */ |
| 654 | lsn_t max_modified_age_async; |
| 655 | /*!< when this recommended |
| 656 | value for lsn - |
| 657 | buf_pool_get_oldest_modification() |
| 658 | is exceeded, we start an |
| 659 | asynchronous preflush of pool pages */ |
| 660 | lsn_t max_modified_age_sync; |
| 661 | /*!< when this recommended |
| 662 | value for lsn - |
| 663 | buf_pool_get_oldest_modification() |
| 664 | is exceeded, we start a |
| 665 | synchronous preflush of pool pages */ |
| 666 | lsn_t max_checkpoint_age_async; |
| 667 | /*!< when this checkpoint age |
| 668 | is exceeded we start an |
| 669 | asynchronous writing of a new |
| 670 | checkpoint */ |
| 671 | lsn_t max_checkpoint_age; |
| 672 | /*!< this is the maximum allowed value |
| 673 | for lsn - last_checkpoint_lsn when a |
| 674 | new query step is started */ |
| 675 | ib_uint64_t next_checkpoint_no; |
| 676 | /*!< next checkpoint number */ |
| 677 | lsn_t last_checkpoint_lsn; |
| 678 | /*!< latest checkpoint lsn */ |
| 679 | lsn_t next_checkpoint_lsn; |
| 680 | /*!< next checkpoint lsn */ |
| 681 | mtr_buf_t* append_on_checkpoint; |
| 682 | /*!< extra redo log records to write |
| 683 | during a checkpoint, or NULL if none. |
| 684 | The pointer is protected by |
| 685 | log_sys.mutex, and the data must |
| 686 | remain constant as long as this |
| 687 | pointer is not NULL. */ |
| 688 | ulint n_pending_checkpoint_writes; |
| 689 | /*!< number of currently pending |
| 690 | checkpoint writes */ |
| 691 | rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a |
| 692 | checkpoint write is running; a thread |
| 693 | should wait for this without owning |
| 694 | the log mutex */ |
| 695 | |
| 696 | /** buffer for checkpoint header */ |
| 697 | MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) |
| 698 | byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE]; |
| 699 | /* @} */ |
| 700 | |
| 701 | private: |
| 702 | bool m_initialised; |
| 703 | public: |
| 704 | /** |
| 705 | Constructor. |
| 706 | |
| 707 | Some members may require late initialisation, thus we just mark object as |
| 708 | uninitialised. Real initialisation happens in create(). |
| 709 | */ |
| 710 | log_t(): m_initialised(false) {} |
| 711 | |
| 712 | /** @return whether the redo log is encrypted */ |
| 713 | bool is_encrypted() const { return(log.is_encrypted()); } |
| 714 | |
| 715 | bool is_initialised() { return m_initialised; } |
| 716 | |
| 717 | /** Complete an asynchronous checkpoint write. */ |
| 718 | void complete_checkpoint(); |
| 719 | |
| 720 | /** Initialise the redo log subsystem. */ |
| 721 | void create(); |
| 722 | |
| 723 | /** Shut down the redo log subsystem. */ |
| 724 | void close(); |
| 725 | }; |
| 726 | |
| 727 | /** Redo log system */ |
| 728 | extern log_t log_sys; |
| 729 | |
| 730 | /** Calculate the offset of a log sequence number. |
| 731 | @param[in] lsn log sequence number |
| 732 | @return offset within the log */ |
| 733 | inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const |
| 734 | { |
| 735 | ut_ad(this == &log_sys.log); |
| 736 | /* The lsn parameters are updated while holding both the mutexes |
| 737 | and it is ok to have either of them while reading */ |
| 738 | ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned()); |
| 739 | const lsn_t group_size= capacity(); |
| 740 | lsn_t l= lsn - this->lsn; |
| 741 | if (longlong(l) < 0) { |
| 742 | l= lsn_t(-longlong(l)) % group_size; |
| 743 | l= group_size - l; |
| 744 | } |
| 745 | |
| 746 | l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size); |
| 747 | l%= group_size; |
| 748 | return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE)); |
| 749 | } |
| 750 | |
| 751 | /** Test if flush order mutex is owned. */ |
| 752 | #define log_flush_order_mutex_own() \ |
| 753 | mutex_own(&log_sys.log_flush_order_mutex) |
| 754 | |
| 755 | /** Acquire the flush order mutex. */ |
| 756 | #define log_flush_order_mutex_enter() do { \ |
| 757 | mutex_enter(&log_sys.log_flush_order_mutex); \ |
| 758 | } while (0) |
| 759 | /** Release the flush order mutex. */ |
| 760 | # define log_flush_order_mutex_exit() do { \ |
| 761 | mutex_exit(&log_sys.log_flush_order_mutex); \ |
| 762 | } while (0) |
| 763 | |
| 764 | /** Test if log sys mutex is owned. */ |
| 765 | #define log_mutex_own() mutex_own(&log_sys.mutex) |
| 766 | |
| 767 | /** Test if log sys write mutex is owned. */ |
| 768 | #define log_write_mutex_own() mutex_own(&log_sys.write_mutex) |
| 769 | |
| 770 | /** Acquire the log sys mutex. */ |
| 771 | #define log_mutex_enter() mutex_enter(&log_sys.mutex) |
| 772 | |
| 773 | /** Acquire the log sys write mutex. */ |
| 774 | #define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex) |
| 775 | |
| 776 | /** Acquire all the log sys mutexes. */ |
| 777 | #define log_mutex_enter_all() do { \ |
| 778 | mutex_enter(&log_sys.write_mutex); \ |
| 779 | mutex_enter(&log_sys.mutex); \ |
| 780 | } while (0) |
| 781 | |
| 782 | /** Release the log sys mutex. */ |
| 783 | #define log_mutex_exit() mutex_exit(&log_sys.mutex) |
| 784 | |
| 785 | /** Release the log sys write mutex.*/ |
| 786 | #define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex) |
| 787 | |
| 788 | /** Release all the log sys mutexes. */ |
| 789 | #define log_mutex_exit_all() do { \ |
| 790 | mutex_exit(&log_sys.mutex); \ |
| 791 | mutex_exit(&log_sys.write_mutex); \ |
| 792 | } while (0) |
| 793 | |
| 794 | /* log scrubbing speed, in bytes/sec */ |
| 795 | extern ulonglong innodb_scrub_log_speed; |
| 796 | |
| 797 | /** Event to wake up log_scrub_thread */ |
| 798 | extern os_event_t log_scrub_event; |
| 799 | /** Whether log_scrub_thread is active */ |
| 800 | extern bool log_scrub_thread_active; |
| 801 | |
| 802 | #include "log0log.ic" |
| 803 | |
| 804 | #endif |
| 805 | |