1/*****************************************************************************
2
3Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2009, Google Inc.
5Copyright (c) 2014, 2018, MariaDB Corporation.
6
7Portions of this file contain modifications contributed and copyrighted by
8Google, Inc. Those modifications are gratefully acknowledged and are described
9briefly in the InnoDB documentation. The contributions by Google are
10incorporated with their permission, and subject to the conditions contained in
11the file COPYING.Google.
12
13This program is free software; you can redistribute it and/or modify it under
14the terms of the GNU General Public License as published by the Free Software
15Foundation; version 2 of the License.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25*****************************************************************************/
26
27/**************************************************//**
28@file log/log0log.cc
29Database log
30
31Created 12/9/1995 Heikki Tuuri
32*******************************************************/
33
34#include "ha_prototypes.h"
35#include <debug_sync.h>
36#include <my_service_manager.h>
37
38#include "log0log.h"
39#include "log0crypt.h"
40#include "mem0mem.h"
41#include "buf0buf.h"
42#include "buf0flu.h"
43#include "lock0lock.h"
44#include "log0recv.h"
45#include "fil0fil.h"
46#include "dict0boot.h"
47#include "dict0stats_bg.h"
48#include "btr0defragment.h"
49#include "srv0srv.h"
50#include "srv0start.h"
51#include "trx0sys.h"
52#include "trx0trx.h"
53#include "trx0roll.h"
54#include "srv0mon.h"
55#include "sync0sync.h"
56
57/*
58General philosophy of InnoDB redo-logs:
59
601) Every change to a contents of a data page must be done
61through mtr, which in mtr_commit() writes log records
62to the InnoDB redo log.
63
642) Normally these changes are performed using a mlog_write_ulint()
65or similar function.
66
673) In some page level operations only a code number of a
68c-function and its parameters are written to the log to
69reduce the size of the log.
70
71 3a) You should not add parameters to these kind of functions
72 (e.g. trx_undo_header_create())
73
74 3b) You should not add such functionality which either change
75 working when compared with the old or are dependent on data
76 outside of the page. These kind of functions should implement
77 self-contained page transformation and it should be unchanged
78 if you don't have very essential reasons to change log
79 semantics or format.
80
81*/
82
83/** Redo log system */
84log_t log_sys;
85
86/** Whether to generate and require checksums on the redo log pages */
87my_bool innodb_log_checksums;
88
89/** Pointer to the log checksum calculation function */
90log_checksum_func_t log_checksum_algorithm_ptr;
91
92/* Next log block number to do dummy record filling if no log records written
93for a while */
94static ulint next_lbn_to_pad = 0;
95
96/* These control how often we print warnings if the last checkpoint is too
97old */
98static bool log_has_printed_chkp_warning = false;
99static time_t log_last_warning_time;
100
101static bool log_has_printed_chkp_margine_warning = false;
102static time_t log_last_margine_warning_time;
103
104/* A margin for free space in the log buffer before a log entry is catenated */
105#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
106
107/* Margins for free space in the log buffer after a log entry is catenated */
108#define LOG_BUF_FLUSH_RATIO 2
109#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN \
110 + (4U << srv_page_size_shift))
111
112/* This parameter controls asynchronous making of a new checkpoint; the value
113should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
114
115#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
116
117/* This parameter controls synchronous preflushing of modified buffer pages */
118#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
119
120/* The same ratio for asynchronous preflushing; this value should be less than
121the previous */
122#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
123
124/* Codes used in unlocking flush latches */
125#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
126#define LOG_UNLOCK_FLUSH_LOCK 2
127
128/** Event to wake up log_scrub_thread */
129os_event_t log_scrub_event;
130/** Whether log_scrub_thread is active */
131bool log_scrub_thread_active;
132
133extern "C" UNIV_INTERN
134os_thread_ret_t
135DECLARE_THREAD(log_scrub_thread)(void*);
136
137/****************************************************************//**
138Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
139exists.
140@return LSN of oldest modification */
141static
142lsn_t
143log_buf_pool_get_oldest_modification(void)
144/*======================================*/
145{
146 lsn_t lsn;
147
148 ut_ad(log_mutex_own());
149
150 lsn = buf_pool_get_oldest_modification();
151
152 if (!lsn) {
153
154 lsn = log_sys.lsn;
155 }
156
157 return(lsn);
158}
159
160/** Extends the log buffer.
161@param[in] len requested minimum size in bytes */
162void log_buffer_extend(ulong len)
163{
164 byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
165
166 log_mutex_enter_all();
167
168 while (log_sys.is_extending) {
169 /* Another thread is trying to extend already.
170 Needs to wait for. */
171 log_mutex_exit_all();
172
173 log_buffer_flush_to_disk();
174
175 log_mutex_enter_all();
176
177 if (srv_log_buffer_size > len) {
178 /* Already extended enough by the others */
179 log_mutex_exit_all();
180 return;
181 }
182 }
183
184 if (len >= srv_log_buffer_size / 2) {
185 DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
186 DBUG_SUICIDE(););
187
188 /* log_buffer is too small. try to extend instead of crash. */
189 ib::warn() << "The redo log transaction size " << len <<
190 " exceeds innodb_log_buffer_size="
191 << srv_log_buffer_size << " / 2). Trying to extend it.";
192 }
193
194 log_sys.is_extending = true;
195
196 while (ut_calc_align_down(log_sys.buf_free,
197 OS_FILE_LOG_BLOCK_SIZE)
198 != ut_calc_align_down(log_sys.buf_next_to_write,
199 OS_FILE_LOG_BLOCK_SIZE)) {
200 /* Buffer might have >1 blocks to write still. */
201 log_mutex_exit_all();
202
203 log_buffer_flush_to_disk();
204
205 log_mutex_enter_all();
206 }
207
208 ulong move_start = ut_calc_align_down(
209 log_sys.buf_free,
210 OS_FILE_LOG_BLOCK_SIZE);
211 ulong move_end = log_sys.buf_free;
212
213 /* store the last log block in buffer */
214 ut_memcpy(tmp_buf, log_sys.buf + move_start,
215 move_end - move_start);
216
217 log_sys.buf_free -= move_start;
218 log_sys.buf_next_to_write -= move_start;
219
220 /* free previous after getting the right address */
221 if (!log_sys.first_in_use) {
222 log_sys.buf -= srv_log_buffer_size;
223 }
224 ut_free_dodump(log_sys.buf, srv_log_buffer_size * 2);
225
226 /* reallocate log buffer */
227 srv_log_buffer_size = len;
228
229 log_sys.buf = static_cast<byte*>(
230 ut_malloc_dontdump(srv_log_buffer_size * 2));
231 TRASH_ALLOC(log_sys.buf, srv_log_buffer_size * 2);
232
233 log_sys.first_in_use = true;
234
235 log_sys.max_buf_free = srv_log_buffer_size / LOG_BUF_FLUSH_RATIO
236 - LOG_BUF_FLUSH_MARGIN;
237
238 /* restore the last log block */
239 ut_memcpy(log_sys.buf, tmp_buf, move_end - move_start);
240
241 ut_ad(log_sys.is_extending);
242 log_sys.is_extending = false;
243
244 log_mutex_exit_all();
245
246 ib::info() << "innodb_log_buffer_size was extended to "
247 << srv_log_buffer_size << ".";
248}
249
250/** Calculate actual length in redo buffer and file including
251block header and trailer.
252@param[in] len length to write
253@return actual length to write including header and trailer. */
254static inline
255ulint
256log_calculate_actual_len(
257 ulint len)
258{
259 ut_ad(log_mutex_own());
260
261 /* actual length stored per block */
262 const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE
263 - (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
264
265 /* actual data length in last block already written */
266 ulint extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE);
267
268 ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
269 extra_len -= LOG_BLOCK_HDR_SIZE;
270
271 /* total extra length for block header and trailer */
272 extra_len = ((len + extra_len) / len_per_blk)
273 * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
274
275 return(len + extra_len);
276}
277
278/** Check margin not to overwrite transaction log from the last checkpoint.
279If would estimate the log write to exceed the log_group_capacity,
280waits for the checkpoint is done enough.
281@param[in] len length of the data to be written */
282
283void
284log_margin_checkpoint_age(
285 ulint len)
286{
287 ulint margin = log_calculate_actual_len(len);
288
289 ut_ad(log_mutex_own());
290
291 if (margin > log_sys.log_group_capacity) {
292 /* return with warning output to avoid deadlock */
293 if (!log_has_printed_chkp_margine_warning
294 || difftime(time(NULL),
295 log_last_margine_warning_time) > 15) {
296 log_has_printed_chkp_margine_warning = true;
297 log_last_margine_warning_time = time(NULL);
298
299 ib::error() << "The transaction log files are too"
300 " small for the single transaction log (size="
301 << len << "). So, the last checkpoint age"
302 " might exceed the log group capacity "
303 << log_sys.log_group_capacity << ".";
304 }
305
306 return;
307 }
308
309 /* Our margin check should ensure that we never reach this condition.
310 Try to do checkpoint once. We cannot keep waiting here as it might
311 result in hang in case the current mtr has latch on oldest lsn */
312 if (log_sys.lsn - log_sys.last_checkpoint_lsn + margin
313 > log_sys.log_group_capacity) {
314 /* The log write of 'len' might overwrite the transaction log
315 after the last checkpoint. Makes checkpoint. */
316
317 bool flushed_enough = false;
318
319 if (log_sys.lsn - log_buf_pool_get_oldest_modification()
320 + margin
321 <= log_sys.log_group_capacity) {
322 flushed_enough = true;
323 }
324
325 log_sys.check_flush_or_checkpoint = true;
326 log_mutex_exit();
327
328 DEBUG_SYNC_C("margin_checkpoint_age_rescue");
329
330 if (!flushed_enough) {
331 os_thread_sleep(100000);
332 }
333 log_checkpoint(true, false);
334
335 log_mutex_enter();
336 }
337
338 return;
339}
340
341/** Open the log for log_write_low. The log must be closed with log_close.
342@param[in] len length of the data to be written
343@return start lsn of the log record */
344lsn_t
345log_reserve_and_open(
346 ulint len)
347{
348 ulint len_upper_limit;
349#ifdef UNIV_DEBUG
350 ulint count = 0;
351#endif /* UNIV_DEBUG */
352
353loop:
354 ut_ad(log_mutex_own());
355
356 if (log_sys.is_extending) {
357 log_mutex_exit();
358
359 /* Log buffer size is extending. Writing up to the next block
360 should wait for the extending finished. */
361
362 os_thread_sleep(100000);
363
364 ut_ad(++count < 50);
365
366 log_mutex_enter();
367 goto loop;
368 }
369
370 /* Calculate an upper limit for the space the string may take in the
371 log buffer */
372
373 len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
374 + (5 * len) / 4;
375
376 if (log_sys.buf_free + len_upper_limit > srv_log_buffer_size) {
377 log_mutex_exit();
378
379 DEBUG_SYNC_C("log_buf_size_exceeded");
380
381 /* Not enough free space, do a write of the log buffer */
382 log_buffer_sync_in_background(false);
383
384 srv_stats.log_waits.inc();
385
386 ut_ad(++count < 50);
387
388 log_mutex_enter();
389 goto loop;
390 }
391
392 return(log_sys.lsn);
393}
394
395/************************************************************//**
396Writes to the log the string given. It is assumed that the caller holds the
397log mutex. */
398void
399log_write_low(
400/*==========*/
401 const byte* str, /*!< in: string */
402 ulint str_len) /*!< in: string length */
403{
404 ulint len;
405 ulint data_len;
406 byte* log_block;
407
408 ut_ad(log_mutex_own());
409part_loop:
410 /* Calculate a part length */
411
412 data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
413
414 if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
415
416 /* The string fits within the current log block */
417
418 len = str_len;
419 } else {
420 data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
421
422 len = OS_FILE_LOG_BLOCK_SIZE
423 - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
424 - LOG_BLOCK_TRL_SIZE;
425 }
426
427 memcpy(log_sys.buf + log_sys.buf_free, str, len);
428
429 str_len -= len;
430 str = str + len;
431
432 log_block = static_cast<byte*>(
433 ut_align_down(log_sys.buf + log_sys.buf_free,
434 OS_FILE_LOG_BLOCK_SIZE));
435
436 log_block_set_data_len(log_block, data_len);
437
438 if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
439 /* This block became full */
440 log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
441 log_block_set_checkpoint_no(log_block,
442 log_sys.next_checkpoint_no);
443 len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
444
445 log_sys.lsn += len;
446
447 /* Initialize the next block header */
448 log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE,
449 log_sys.lsn);
450 } else {
451 log_sys.lsn += len;
452 }
453
454 log_sys.buf_free += ulong(len);
455
456 ut_ad(log_sys.buf_free <= srv_log_buffer_size);
457
458 if (str_len > 0) {
459 goto part_loop;
460 }
461
462 srv_stats.log_write_requests.inc();
463}
464
465/************************************************************//**
466Closes the log.
467@return lsn */
468lsn_t
469log_close(void)
470/*===========*/
471{
472 byte* log_block;
473 ulint first_rec_group;
474 lsn_t oldest_lsn;
475 lsn_t lsn;
476 lsn_t checkpoint_age;
477
478 ut_ad(log_mutex_own());
479
480 lsn = log_sys.lsn;
481
482 log_block = static_cast<byte*>(
483 ut_align_down(log_sys.buf + log_sys.buf_free,
484 OS_FILE_LOG_BLOCK_SIZE));
485
486 first_rec_group = log_block_get_first_rec_group(log_block);
487
488 if (first_rec_group == 0) {
489 /* We initialized a new log block which was not written
490 full by the current mtr: the next mtr log record group
491 will start within this block at the offset data_len */
492
493 log_block_set_first_rec_group(
494 log_block, log_block_get_data_len(log_block));
495 }
496
497 if (log_sys.buf_free > log_sys.max_buf_free) {
498 log_sys.check_flush_or_checkpoint = true;
499 }
500
501 checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
502
503 if (checkpoint_age >= log_sys.log_group_capacity) {
504 DBUG_EXECUTE_IF(
505 "print_all_chkp_warnings",
506 log_has_printed_chkp_warning = false;);
507
508 if (!log_has_printed_chkp_warning
509 || difftime(time(NULL), log_last_warning_time) > 15) {
510
511 log_has_printed_chkp_warning = true;
512 log_last_warning_time = time(NULL);
513
514 ib::error() << "The age of the last checkpoint is "
515 << checkpoint_age << ", which exceeds the log"
516 " group capacity "
517 << log_sys.log_group_capacity
518 << ".";
519 }
520 }
521
522 if (checkpoint_age <= log_sys.max_modified_age_sync) {
523 goto function_exit;
524 }
525
526 oldest_lsn = buf_pool_get_oldest_modification();
527
528 if (!oldest_lsn
529 || lsn - oldest_lsn > log_sys.max_modified_age_sync
530 || checkpoint_age > log_sys.max_checkpoint_age_async) {
531 log_sys.check_flush_or_checkpoint = true;
532 }
533function_exit:
534
535 return(lsn);
536}
537
538/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
539and lsn - buf_get_oldest_modification().
540@param[in] file_size requested innodb_log_file_size
541@retval true on success
542@retval false if the smallest log group is too small to
543accommodate the number of OS threads in the database server */
544bool
545log_set_capacity(ulonglong file_size)
546{
547 lsn_t margin;
548 ulint free;
549
550 lsn_t smallest_capacity = (file_size - LOG_FILE_HDR_SIZE)
551 * srv_n_log_files;
552 /* Add extra safety */
553 smallest_capacity -= smallest_capacity / 10;
554
555 /* For each OS thread we must reserve so much free space in the
556 smallest log group that it can accommodate the log entries produced
557 by single query steps: running out of free log space is a serious
558 system error which requires rebooting the database. */
559
560 free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
561 + LOG_CHECKPOINT_EXTRA_FREE;
562 if (free >= smallest_capacity / 2) {
563 ib::error() << "Cannot continue operation. ib_logfiles are too"
564 " small for innodb_thread_concurrency="
565 << srv_thread_concurrency << ". The combined size of"
566 " ib_logfiles should be bigger than"
567 " 200 kB * innodb_thread_concurrency. "
568 << INNODB_PARAMETERS_MSG;
569 return(false);
570 }
571
572 margin = smallest_capacity - free;
573 margin = margin - margin / 10; /* Add still some extra safety */
574
575 log_mutex_enter();
576
577 log_sys.log_group_capacity = smallest_capacity;
578
579 log_sys.max_modified_age_async = margin
580 - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
581 log_sys.max_modified_age_sync = margin
582 - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
583
584 log_sys.max_checkpoint_age_async = margin - margin
585 / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
586 log_sys.max_checkpoint_age = margin;
587
588 log_mutex_exit();
589
590 return(true);
591}
592
593/** Initialize the redo log subsystem. */
594void log_t::create()
595{
596 ut_ad(this == &log_sys);
597 ut_ad(!is_initialised());
598 m_initialised= true;
599
600 mutex_create(LATCH_ID_LOG_SYS, &mutex);
601 mutex_create(LATCH_ID_LOG_WRITE, &write_mutex);
602 mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_flush_order_mutex);
603
604 /* Start the lsn from one log block from zero: this way every
605 log record has a non-zero start lsn, a fact which we will use */
606
607 lsn= LOG_START_LSN;
608
609 ut_ad(srv_log_buffer_size >= 16 * OS_FILE_LOG_BLOCK_SIZE);
610 ut_ad(srv_log_buffer_size >= 4U << srv_page_size_shift);
611
612 buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size * 2));
613 TRASH_ALLOC(buf, srv_log_buffer_size * 2);
614
615 first_in_use= true;
616
617 max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
618 LOG_BUF_FLUSH_MARGIN;
619 check_flush_or_checkpoint= true;
620
621 n_log_ios_old= n_log_ios;
622 last_printout_time= time(NULL);
623
624 buf_next_to_write= 0;
625 is_extending= false;
626 write_lsn= lsn;
627 flushed_to_disk_lsn= 0;
628 n_pending_flushes= 0;
629 flush_event = os_event_create("log_flush_event");
630 os_event_set(flush_event);
631 n_log_ios= 0;
632 n_log_ios_old= 0;
633 log_group_capacity= 0;
634 max_modified_age_async= 0;
635 max_modified_age_sync= 0;
636 max_checkpoint_age_async= 0;
637 max_checkpoint_age= 0;
638 next_checkpoint_no= 0;
639 next_checkpoint_lsn= 0;
640 append_on_checkpoint= NULL;
641 n_pending_checkpoint_writes= 0;
642
643 last_checkpoint_lsn= lsn;
644 rw_lock_create(checkpoint_lock_key, &checkpoint_lock, SYNC_NO_ORDER_CHECK);
645
646 log_block_init(buf, lsn);
647 log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
648
649 buf_free= LOG_BLOCK_HDR_SIZE;
650 lsn= LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
651
652 MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, lsn - last_checkpoint_lsn);
653
654 log_scrub_thread_active= !srv_read_only_mode && srv_scrub_log;
655 if (log_scrub_thread_active) {
656 log_scrub_event= os_event_create("log_scrub_event");
657 os_thread_create(log_scrub_thread, NULL, NULL);
658 }
659}
660
661/** Initialize the redo log.
662@param[in] n_files number of files */
663void log_t::files::create(ulint n_files)
664{
665 ut_ad(n_files <= SRV_N_LOG_FILES_MAX);
666 ut_ad(this == &log_sys.log);
667 ut_ad(log_sys.is_initialised());
668
669 this->n_files= n_files;
670 format= srv_encrypt_log
671 ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED
672 : LOG_HEADER_FORMAT_CURRENT;
673 file_size= srv_log_file_size;
674 state= LOG_GROUP_OK;
675 lsn= LOG_START_LSN;
676 lsn_offset= LOG_FILE_HDR_SIZE;
677
678 byte* ptr= static_cast<byte*>(ut_zalloc_nokey(LOG_FILE_HDR_SIZE * n_files
679 + OS_FILE_LOG_BLOCK_SIZE));
680 file_header_bufs_ptr= ptr;
681 ptr= static_cast<byte*>(ut_align(ptr, OS_FILE_LOG_BLOCK_SIZE));
682
683 memset(file_header_bufs, 0, sizeof file_header_bufs);
684
685 for (ulint i = 0; i < n_files; i++, ptr += LOG_FILE_HDR_SIZE)
686 file_header_bufs[i] = ptr;
687}
688
689/******************************************************//**
690Writes a log file header to a log file space. */
691static
692void
693log_file_header_flush(
694 ulint nth_file, /*!< in: header to the nth file in the
695 log file space */
696 lsn_t start_lsn) /*!< in: log file data starts at this
697 lsn */
698{
699 byte* buf;
700 lsn_t dest_offset;
701
702 ut_ad(log_write_mutex_own());
703 ut_ad(!recv_no_log_write);
704 ut_a(nth_file < log_sys.log.n_files);
705 ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
706 == LOG_HEADER_FORMAT_CURRENT);
707
708 buf = log_sys.log.file_header_bufs[nth_file];
709
710 memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
711 mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
712 mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
713 strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
714 LOG_HEADER_CREATOR_CURRENT);
715 ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
716 >= sizeof LOG_HEADER_CREATOR_CURRENT);
717 log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
718
719 dest_offset = nth_file * log_sys.log.file_size;
720
721 DBUG_PRINT("ib_log", ("write " LSN_PF
722 " file " ULINTPF " header",
723 start_lsn, nth_file));
724
725 log_sys.n_log_ios++;
726
727 MONITOR_INC(MONITOR_LOG_IO);
728
729 srv_stats.os_log_pending_writes.inc();
730
731 const ulint page_no = ulint(dest_offset >> srv_page_size_shift);
732
733 fil_io(IORequestLogWrite, true,
734 page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
735 univ_page_size,
736 ulint(dest_offset & (srv_page_size - 1)),
737 OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
738
739 srv_stats.os_log_pending_writes.dec();
740}
741
742/******************************************************//**
743Stores a 4-byte checksum to the trailer checksum field of a log block
744before writing it to a log file. This checksum is used in recovery to
745check the consistency of a log block. */
746static
747void
748log_block_store_checksum(
749/*=====================*/
750 byte* block) /*!< in/out: pointer to a log block */
751{
752 log_block_set_checksum(block, log_block_calc_checksum(block));
753}
754
755/******************************************************//**
756Writes a buffer to a log file. */
757static
758void
759log_write_buf(
760 byte* buf, /*!< in: buffer */
761 ulint len, /*!< in: buffer len; must be divisible
762 by OS_FILE_LOG_BLOCK_SIZE */
763#ifdef UNIV_DEBUG
764 ulint pad_len, /*!< in: pad len in the buffer len */
765#endif /* UNIV_DEBUG */
766 lsn_t start_lsn, /*!< in: start lsn of the buffer; must
767 be divisible by
768 OS_FILE_LOG_BLOCK_SIZE */
769 ulint new_data_offset)/*!< in: start offset of new data in
770 buf: this parameter is used to decide
771 if we have to write a new log file
772 header */
773{
774 ulint write_len;
775 bool write_header = new_data_offset == 0;
776 lsn_t next_offset;
777 ulint i;
778
779 ut_ad(log_write_mutex_own());
780 ut_ad(!recv_no_log_write);
781 ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
782 ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
783
784loop:
785 if (len == 0) {
786
787 return;
788 }
789
790 next_offset = log_sys.log.calc_lsn_offset(start_lsn);
791
792 if (write_header
793 && next_offset % log_sys.log.file_size == LOG_FILE_HDR_SIZE) {
794 /* We start to write a new log file instance in the group */
795
796 ut_a(next_offset / log_sys.log.file_size <= ULINT_MAX);
797
798 log_file_header_flush(
799 ulint(next_offset / log_sys.log.file_size), start_lsn);
800 srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
801
802 srv_stats.log_writes.inc();
803 }
804
805 if ((next_offset % log_sys.log.file_size) + len
806 > log_sys.log.file_size) {
807 /* if the above condition holds, then the below expression
808 is < len which is ulint, so the typecast is ok */
809 write_len = ulint(log_sys.log.file_size
810 - (next_offset % log_sys.log.file_size));
811 } else {
812 write_len = len;
813 }
814
815 DBUG_PRINT("ib_log",
816 ("write " LSN_PF " to " LSN_PF
817 ": len " ULINTPF
818 " blocks " ULINTPF ".." ULINTPF,
819 start_lsn, next_offset,
820 write_len,
821 log_block_get_hdr_no(buf),
822 log_block_get_hdr_no(
823 buf + write_len
824 - OS_FILE_LOG_BLOCK_SIZE)));
825
826 ut_ad(pad_len >= len
827 || log_block_get_hdr_no(buf)
828 == log_block_convert_lsn_to_no(start_lsn));
829
830 /* Calculate the checksums for each log block and write them to
831 the trailer fields of the log blocks */
832
833 for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
834 ut_ad(pad_len >= len
835 || i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
836 || log_block_get_hdr_no(
837 buf + i * OS_FILE_LOG_BLOCK_SIZE)
838 == log_block_get_hdr_no(buf) + i);
839 log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
840 }
841
842 log_sys.n_log_ios++;
843
844 MONITOR_INC(MONITOR_LOG_IO);
845
846 srv_stats.os_log_pending_writes.inc();
847
848 ut_a((next_offset >> srv_page_size_shift) <= ULINT_MAX);
849
850 const ulint page_no = ulint(next_offset >> srv_page_size_shift);
851
852 fil_io(IORequestLogWrite, true,
853 page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
854 univ_page_size,
855 ulint(next_offset & (srv_page_size - 1)), write_len, buf, NULL);
856
857 srv_stats.os_log_pending_writes.dec();
858
859 srv_stats.os_log_written.add(write_len);
860 srv_stats.log_writes.inc();
861
862 if (write_len < len) {
863 start_lsn += write_len;
864 len -= write_len;
865 buf += write_len;
866
867 write_header = true;
868
869 goto loop;
870 }
871}
872
873/** Flush the recently written changes to the log file.
874and invoke log_mutex_enter(). */
875static
876void
877log_write_flush_to_disk_low()
878{
879 /* FIXME: This is not holding log_sys.mutex while
880 calling os_event_set()! */
881 ut_a(log_sys.n_pending_flushes == 1); /* No other threads here */
882
883 bool do_flush = srv_file_flush_method != SRV_O_DSYNC;
884
885 if (do_flush) {
886 fil_flush(SRV_LOG_SPACE_FIRST_ID);
887 }
888
889 MONITOR_DEC(MONITOR_PENDING_LOG_FLUSH);
890
891 log_mutex_enter();
892 if (do_flush) {
893 log_sys.flushed_to_disk_lsn = log_sys.current_flush_lsn;
894 }
895
896 log_sys.n_pending_flushes--;
897
898 os_event_set(log_sys.flush_event);
899}
900
901/** Switch the log buffer in use, and copy the content of last block
902from old log buffer to the head of the to be used one. Thus, buf_free and
903buf_next_to_write would be changed accordingly */
904static inline
905void
906log_buffer_switch()
907{
908 ut_ad(log_mutex_own());
909 ut_ad(log_write_mutex_own());
910
911 const byte* old_buf = log_sys.buf;
912 ulint area_end = ut_calc_align(log_sys.buf_free,
913 OS_FILE_LOG_BLOCK_SIZE);
914
915 if (log_sys.first_in_use) {
916 log_sys.first_in_use = false;
917 ut_ad(log_sys.buf == ut_align(log_sys.buf,
918 OS_FILE_LOG_BLOCK_SIZE));
919 log_sys.buf += srv_log_buffer_size;
920 } else {
921 log_sys.first_in_use = true;
922 log_sys.buf -= srv_log_buffer_size;
923 ut_ad(log_sys.buf == ut_align(log_sys.buf,
924 OS_FILE_LOG_BLOCK_SIZE));
925 }
926
927 /* Copy the last block to new buf */
928 ut_memcpy(log_sys.buf,
929 old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
930 OS_FILE_LOG_BLOCK_SIZE);
931
932 log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
933 log_sys.buf_next_to_write = log_sys.buf_free;
934}
935
936/** Ensure that the log has been written to the log file up to a given
937log entry (such as that of a transaction commit). Start a new write, or
938wait and check if an already running write is covering the request.
939@param[in] lsn log sequence number that should be
940included in the redo log file write
941@param[in] flush_to_disk whether the written log should also
942be flushed to the file system */
943void
944log_write_up_to(
945 lsn_t lsn,
946 bool flush_to_disk)
947{
948#ifdef UNIV_DEBUG
949 ulint loop_count = 0;
950#endif /* UNIV_DEBUG */
951 byte* write_buf;
952 lsn_t write_lsn;
953
954 ut_ad(!srv_read_only_mode);
955
956 if (recv_no_ibuf_operations) {
957 /* Recovery is running and no operations on the log files are
958 allowed yet (the variable name .._no_ibuf_.. is misleading) */
959
960 return;
961 }
962
963 if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
964 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
965 "log write up to: " LSN_PF,
966 lsn);
967 }
968
969loop:
970 ut_ad(++loop_count < 128);
971
972#if UNIV_WORD_SIZE > 7
973 /* We can do a dirty read of LSN. */
974 /* NOTE: Currently doesn't do dirty read for
975 (flush_to_disk == true) case, because the log_mutex
976 contention also works as the arbitrator for write-IO
977 (fsync) bandwidth between log files and data files. */
978 if (!flush_to_disk && log_sys.write_lsn >= lsn) {
979 return;
980 }
981#endif
982
983 log_write_mutex_enter();
984 ut_ad(!recv_no_log_write);
985
986 lsn_t limit_lsn = flush_to_disk
987 ? log_sys.flushed_to_disk_lsn
988 : log_sys.write_lsn;
989
990 if (limit_lsn >= lsn) {
991 log_write_mutex_exit();
992 return;
993 }
994
995 /* If it is a write call we should just go ahead and do it
996 as we checked that write_lsn is not where we'd like it to
997 be. If we have to flush as well then we check if there is a
998 pending flush and based on that we wait for it to finish
999 before proceeding further. */
1000 if (flush_to_disk
1001 && (log_sys.n_pending_flushes > 0
1002 || !os_event_is_set(log_sys.flush_event))) {
1003 /* Figure out if the current flush will do the job
1004 for us. */
1005 bool work_done = log_sys.current_flush_lsn >= lsn;
1006
1007 log_write_mutex_exit();
1008
1009 os_event_wait(log_sys.flush_event);
1010
1011 if (work_done) {
1012 return;
1013 } else {
1014 goto loop;
1015 }
1016 }
1017
1018 log_mutex_enter();
1019 if (!flush_to_disk
1020 && log_sys.buf_free == log_sys.buf_next_to_write) {
1021 /* Nothing to write and no flush to disk requested */
1022 log_mutex_exit_all();
1023 return;
1024 }
1025
1026 ulint start_offset;
1027 ulint end_offset;
1028 ulint area_start;
1029 ulint area_end;
1030 ulong write_ahead_size = srv_log_write_ahead_size;
1031 ulint pad_size;
1032
1033 DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
1034 log_sys.write_lsn,
1035 log_sys.lsn));
1036 if (flush_to_disk) {
1037 log_sys.n_pending_flushes++;
1038 log_sys.current_flush_lsn = log_sys.lsn;
1039 MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
1040 os_event_reset(log_sys.flush_event);
1041
1042 if (log_sys.buf_free == log_sys.buf_next_to_write) {
1043 /* Nothing to write, flush only */
1044 log_mutex_exit_all();
1045 log_write_flush_to_disk_low();
1046 log_mutex_exit();
1047 return;
1048 }
1049 }
1050
1051 start_offset = log_sys.buf_next_to_write;
1052 end_offset = log_sys.buf_free;
1053
1054 area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1055 area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1056
1057 ut_ad(area_end - area_start > 0);
1058
1059 log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
1060 log_block_set_checkpoint_no(
1061 log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1062 log_sys.next_checkpoint_no);
1063
1064 write_lsn = log_sys.lsn;
1065 write_buf = log_sys.buf;
1066
1067 log_buffer_switch();
1068
1069 log_sys.log.set_fields(log_sys.write_lsn);
1070
1071 log_mutex_exit();
1072 /* Erase the end of the last log block. */
1073 memset(write_buf + end_offset, 0,
1074 ~end_offset & (OS_FILE_LOG_BLOCK_SIZE - 1));
1075
1076 /* Calculate pad_size if needed. */
1077 pad_size = 0;
1078 if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
1079 ulint end_offset_in_unit;
1080 lsn_t end_offset = log_sys.log.calc_lsn_offset(
1081 ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
1082 end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
1083
1084 if (end_offset_in_unit > 0
1085 && (area_end - area_start) > end_offset_in_unit) {
1086 /* The first block in the unit was initialized
1087 after the last writing.
1088 Needs to be written padded data once. */
1089 pad_size = std::min<ulint>(
1090 ulint(write_ahead_size) - end_offset_in_unit,
1091 srv_log_buffer_size - area_end);
1092 ::memset(write_buf + area_end, 0, pad_size);
1093 }
1094 }
1095
1096 if (log_sys.is_encrypted()) {
1097 log_crypt(write_buf + area_start, log_sys.write_lsn,
1098 area_end - area_start);
1099 }
1100
1101 /* Do the write to the log files */
1102 log_write_buf(
1103 write_buf + area_start, area_end - area_start + pad_size,
1104#ifdef UNIV_DEBUG
1105 pad_size,
1106#endif /* UNIV_DEBUG */
1107 ut_uint64_align_down(log_sys.write_lsn,
1108 OS_FILE_LOG_BLOCK_SIZE),
1109 start_offset - area_start);
1110 srv_stats.log_padded.add(pad_size);
1111 log_sys.write_lsn = write_lsn;
1112
1113
1114 if (srv_file_flush_method == SRV_O_DSYNC) {
1115 /* O_SYNC means the OS did not buffer the log file at all:
1116 so we have also flushed to disk what we have written */
1117 log_sys.flushed_to_disk_lsn = log_sys.write_lsn;
1118 }
1119
1120 log_write_mutex_exit();
1121
1122 if (flush_to_disk) {
1123 log_write_flush_to_disk_low();
1124 ib_uint64_t flush_lsn = log_sys.flushed_to_disk_lsn;
1125 log_mutex_exit();
1126
1127 innobase_mysql_log_notify(flush_lsn);
1128 }
1129}
1130
1131/** write to the log file up to the last log entry.
1132@param[in] sync whether we want the written log
1133also to be flushed to disk. */
1134void
1135log_buffer_flush_to_disk(
1136 bool sync)
1137{
1138 ut_ad(!srv_read_only_mode);
1139 log_write_up_to(log_get_lsn(), sync);
1140}
1141
1142/****************************************************************//**
1143This functions writes the log buffer to the log file and if 'flush'
1144is set it forces a flush of the log file as well. This is meant to be
1145called from background master thread only as it does not wait for
1146the write (+ possible flush) to finish. */
1147void
1148log_buffer_sync_in_background(
1149/*==========================*/
1150 bool flush) /*!< in: flush the logs to disk */
1151{
1152 lsn_t lsn;
1153
1154 log_mutex_enter();
1155
1156 lsn = log_sys.lsn;
1157
1158 if (flush
1159 && log_sys.n_pending_flushes > 0
1160 && log_sys.current_flush_lsn >= lsn) {
1161 /* The write + flush will write enough */
1162 log_mutex_exit();
1163 return;
1164 }
1165
1166 log_mutex_exit();
1167
1168 log_write_up_to(lsn, flush);
1169}
1170
1171/********************************************************************
1172
1173Tries to establish a big enough margin of free space in the log buffer, such
1174that a new log entry can be catenated without an immediate need for a flush. */
1175static
1176void
1177log_flush_margin(void)
1178/*==================*/
1179{
1180 lsn_t lsn = 0;
1181
1182 log_mutex_enter();
1183
1184 if (log_sys.buf_free > log_sys.max_buf_free) {
1185 /* We can write during flush */
1186 lsn = log_sys.lsn;
1187 }
1188
1189 log_mutex_exit();
1190
1191 if (lsn) {
1192 log_write_up_to(lsn, false);
1193 }
1194}
1195
1196/** Advances the smallest lsn for which there are unflushed dirty blocks in the
1197buffer pool.
1198NOTE: this function may only be called if the calling thread owns no
1199synchronization objects!
1200@param[in] new_oldest try to advance oldest_modified_lsn at least to
1201this lsn
1202@return false if there was a flush batch of the same type running,
1203which means that we could not start this flush batch */
1204static
1205bool
1206log_preflush_pool_modified_pages(
1207 lsn_t new_oldest)
1208{
1209 bool success;
1210
1211 if (recv_recovery_on) {
1212 /* If the recovery is running, we must first apply all
1213 log records to their respective file pages to get the
1214 right modify lsn values to these pages: otherwise, there
1215 might be pages on disk which are not yet recovered to the
1216 current lsn, and even after calling this function, we could
1217 not know how up-to-date the disk version of the database is,
1218 and we could not make a new checkpoint on the basis of the
1219 info on the buffer pool only. */
1220 recv_apply_hashed_log_recs(true);
1221 }
1222
1223 if (new_oldest == LSN_MAX
1224 || !buf_page_cleaner_is_active
1225 || srv_is_being_started) {
1226
1227 ulint n_pages;
1228
1229 success = buf_flush_lists(ULINT_MAX, new_oldest, &n_pages);
1230
1231 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
1232
1233 if (!success) {
1234 MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
1235 }
1236
1237 MONITOR_INC_VALUE_CUMULATIVE(
1238 MONITOR_FLUSH_SYNC_TOTAL_PAGE,
1239 MONITOR_FLUSH_SYNC_COUNT,
1240 MONITOR_FLUSH_SYNC_PAGES,
1241 n_pages);
1242 } else {
1243 /* better to wait for flushed by page cleaner */
1244
1245 if (srv_flush_sync) {
1246 /* wake page cleaner for IO burst */
1247 buf_flush_request_force(new_oldest);
1248 }
1249
1250 buf_flush_wait_flushed(new_oldest);
1251
1252 success = true;
1253 }
1254
1255 return(success);
1256}
1257
1258/******************************************************//**
1259Completes a checkpoint. */
1260static
1261void
1262log_complete_checkpoint(void)
1263/*=========================*/
1264{
1265 ut_ad(log_mutex_own());
1266 ut_ad(log_sys.n_pending_checkpoint_writes == 0);
1267
1268 log_sys.next_checkpoint_no++;
1269
1270 log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
1271 MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
1272 log_sys.lsn - log_sys.last_checkpoint_lsn);
1273
1274 DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
1275 ", flushed to " LSN_PF,
1276 log_sys.last_checkpoint_lsn,
1277 log_sys.flushed_to_disk_lsn));
1278
1279 rw_lock_x_unlock_gen(&(log_sys.checkpoint_lock), LOG_CHECKPOINT);
1280}
1281
1282/** Complete an asynchronous checkpoint write. */
1283void log_t::complete_checkpoint()
1284{
1285 ut_ad(this == &log_sys);
1286 MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
1287
1288 log_mutex_enter();
1289
1290 ut_ad(n_pending_checkpoint_writes > 0);
1291
1292 if (!--n_pending_checkpoint_writes) {
1293 log_complete_checkpoint();
1294 }
1295
1296 log_mutex_exit();
1297}
1298
1299/** Write checkpoint info to the log header.
1300@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
1301static
1302void
1303log_group_checkpoint(lsn_t end_lsn)
1304{
1305 lsn_t lsn_offset;
1306
1307 ut_ad(!srv_read_only_mode);
1308 ut_ad(log_mutex_own());
1309 ut_ad(end_lsn == 0 || end_lsn >= log_sys.next_checkpoint_lsn);
1310 ut_ad(end_lsn <= log_sys.lsn);
1311 ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys.lsn
1312 || srv_shutdown_state != SRV_SHUTDOWN_NONE);
1313
1314 DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
1315 " written",
1316 log_sys.next_checkpoint_no,
1317 log_sys.next_checkpoint_lsn));
1318
1319 byte* buf = log_sys.checkpoint_buf;
1320 memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
1321
1322 mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
1323 mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
1324
1325 if (log_sys.is_encrypted()) {
1326 log_crypt_write_checkpoint_buf(buf);
1327 }
1328
1329 lsn_offset = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
1330 mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
1331 mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
1332 srv_log_buffer_size);
1333 mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
1334
1335 log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
1336
1337 MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
1338
1339 log_sys.n_log_ios++;
1340
1341 MONITOR_INC(MONITOR_LOG_IO);
1342
1343 ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
1344 ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
1345
1346 if (log_sys.n_pending_checkpoint_writes++ == 0) {
1347 rw_lock_x_lock_gen(&log_sys.checkpoint_lock,
1348 LOG_CHECKPOINT);
1349 }
1350
1351 /* Note: We alternate the physical place of the checkpoint info.
1352 See the (next_checkpoint_no & 1) below. */
1353
1354 fil_io(IORequestLogWrite, false,
1355 page_id_t(SRV_LOG_SPACE_FIRST_ID, 0),
1356 univ_page_size,
1357 (log_sys.next_checkpoint_no & 1)
1358 ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
1359 OS_FILE_LOG_BLOCK_SIZE,
1360 buf, reinterpret_cast<void*>(1) /* checkpoint write */);
1361}
1362
1363/** Read a log group header page to log_sys.checkpoint_buf.
1364@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
1365void log_header_read(ulint header)
1366{
1367 ut_ad(log_mutex_own());
1368
1369 log_sys.n_log_ios++;
1370
1371 MONITOR_INC(MONITOR_LOG_IO);
1372
1373 fil_io(IORequestLogRead, true,
1374 page_id_t(SRV_LOG_SPACE_FIRST_ID,
1375 header >> srv_page_size_shift),
1376 univ_page_size, header & (srv_page_size - 1),
1377 OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL);
1378}
1379
1380/** Write checkpoint info to the log header and invoke log_mutex_exit().
1381@param[in] sync whether to wait for the write to complete
1382@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
1383void
1384log_write_checkpoint_info(bool sync, lsn_t end_lsn)
1385{
1386 ut_ad(log_mutex_own());
1387 ut_ad(!srv_read_only_mode);
1388
1389 log_group_checkpoint(end_lsn);
1390
1391 log_mutex_exit();
1392
1393 MONITOR_INC(MONITOR_NUM_CHECKPOINT);
1394
1395 if (sync) {
1396 /* Wait for the checkpoint write to complete */
1397 rw_lock_s_lock(&log_sys.checkpoint_lock);
1398 rw_lock_s_unlock(&log_sys.checkpoint_lock);
1399
1400 DBUG_EXECUTE_IF(
1401 "crash_after_checkpoint",
1402 DBUG_SUICIDE(););
1403 }
1404}
1405
1406/** Set extra data to be written to the redo log during checkpoint.
1407@param[in] buf data to be appended on checkpoint, or NULL
1408@return pointer to previous data to be appended on checkpoint */
1409mtr_buf_t*
1410log_append_on_checkpoint(
1411 mtr_buf_t* buf)
1412{
1413 log_mutex_enter();
1414 mtr_buf_t* old = log_sys.append_on_checkpoint;
1415 log_sys.append_on_checkpoint = buf;
1416 log_mutex_exit();
1417 return(old);
1418}
1419
1420/** Make a checkpoint. Note that this function does not flush dirty
1421blocks from the buffer pool: it only checks what is lsn of the oldest
1422modification in the pool, and writes information about the lsn in
1423log files. Use log_make_checkpoint_at() to flush also the pool.
1424@param[in] sync whether to wait for the write to complete
1425@param[in] write_always force a write even if no log
1426has been generated since the latest checkpoint
1427@return true if success, false if a checkpoint write was already running */
1428bool
1429log_checkpoint(
1430 bool sync,
1431 bool write_always)
1432{
1433 lsn_t oldest_lsn;
1434
1435 ut_ad(!srv_read_only_mode);
1436
1437 DBUG_EXECUTE_IF("no_checkpoint",
1438 /* We sleep for a long enough time, forcing
1439 the checkpoint doesn't happen any more. */
1440 os_thread_sleep(360000000););
1441
1442 if (recv_recovery_is_on()) {
1443 recv_apply_hashed_log_recs(true);
1444 }
1445
1446 switch (srv_file_flush_method) {
1447 case SRV_NOSYNC:
1448 break;
1449 case SRV_O_DSYNC:
1450 case SRV_FSYNC:
1451 case SRV_LITTLESYNC:
1452 case SRV_O_DIRECT:
1453 case SRV_O_DIRECT_NO_FSYNC:
1454#ifdef _WIN32
1455 case SRV_ALL_O_DIRECT_FSYNC:
1456#endif
1457 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1458 }
1459
1460 log_mutex_enter();
1461
1462 ut_ad(!recv_no_log_write);
1463 oldest_lsn = log_buf_pool_get_oldest_modification();
1464
1465 /* Because log also contains headers and dummy log records,
1466 log_buf_pool_get_oldest_modification() will return log_sys.lsn
1467 if the buffer pool contains no dirty buffers.
1468 We must make sure that the log is flushed up to that lsn.
1469 If there are dirty buffers in the buffer pool, then our
1470 write-ahead-logging algorithm ensures that the log has been
1471 flushed up to oldest_lsn. */
1472
1473 ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
1474 if (!write_always
1475 && oldest_lsn
1476 <= log_sys.last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
1477 /* Do nothing, because nothing was logged (other than
1478 a MLOG_CHECKPOINT marker) since the previous checkpoint. */
1479 log_mutex_exit();
1480 return(true);
1481 }
1482 /* Repeat the MLOG_FILE_NAME records after the checkpoint, in
1483 case some log records between the checkpoint and log_sys.lsn
1484 need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
1485 apply expects to see a MLOG_CHECKPOINT after the checkpoint,
1486 except on clean shutdown, where the log will be empty after
1487 the checkpoint.
1488 It is important that we write out the redo log before any
1489 further dirty pages are flushed to the tablespace files. At
1490 this point, because log_mutex_own(), mtr_commit() in other
1491 threads will be blocked, and no pages can be added to the
1492 flush lists. */
1493 lsn_t flush_lsn = oldest_lsn;
1494 const lsn_t end_lsn = log_sys.lsn;
1495 const bool do_write
1496 = srv_shutdown_state == SRV_SHUTDOWN_NONE
1497 || flush_lsn != end_lsn;
1498
1499 if (fil_names_clear(flush_lsn, do_write)) {
1500 ut_ad(log_sys.lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
1501 flush_lsn = log_sys.lsn;
1502 }
1503
1504 log_mutex_exit();
1505
1506 log_write_up_to(flush_lsn, true);
1507
1508 DBUG_EXECUTE_IF(
1509 "using_wa_checkpoint_middle",
1510 if (write_always) {
1511 DEBUG_SYNC_C("wa_checkpoint_middle");
1512
1513 const my_bool b = TRUE;
1514 buf_flush_page_cleaner_disabled_debug_update(
1515 NULL, NULL, NULL, &b);
1516 dict_stats_disabled_debug_update(
1517 NULL, NULL, NULL, &b);
1518 srv_master_thread_disabled_debug_update(
1519 NULL, NULL, NULL, &b);
1520 });
1521
1522 log_mutex_enter();
1523
1524 ut_ad(log_sys.flushed_to_disk_lsn >= flush_lsn);
1525 ut_ad(flush_lsn >= oldest_lsn);
1526
1527 if (log_sys.last_checkpoint_lsn >= oldest_lsn) {
1528 log_mutex_exit();
1529 return(true);
1530 }
1531
1532 if (log_sys.n_pending_checkpoint_writes > 0) {
1533 /* A checkpoint write is running */
1534 log_mutex_exit();
1535
1536 if (sync) {
1537 /* Wait for the checkpoint write to complete */
1538 rw_lock_s_lock(&log_sys.checkpoint_lock);
1539 rw_lock_s_unlock(&log_sys.checkpoint_lock);
1540 }
1541
1542 return(false);
1543 }
1544
1545 log_sys.next_checkpoint_lsn = oldest_lsn;
1546 log_write_checkpoint_info(sync, end_lsn);
1547 ut_ad(!log_mutex_own());
1548
1549 return(true);
1550}
1551
1552/** Make a checkpoint at or after a specified LSN.
1553@param[in] lsn the log sequence number, or LSN_MAX
1554for the latest LSN
1555@param[in] write_always force a write even if no log
1556has been generated since the latest checkpoint */
1557void
1558log_make_checkpoint_at(
1559 lsn_t lsn,
1560 bool write_always)
1561{
1562 /* Preflush pages synchronously */
1563
1564 while (!log_preflush_pool_modified_pages(lsn)) {
1565 /* Flush as much as we can */
1566 }
1567
1568 while (!log_checkpoint(true, write_always)) {
1569 /* Force a checkpoint */
1570 }
1571}
1572
1573/****************************************************************//**
1574Tries to establish a big enough margin of free space in the log groups, such
1575that a new log entry can be catenated without an immediate need for a
1576checkpoint. NOTE: this function may only be called if the calling thread
1577owns no synchronization objects! */
1578static
1579void
1580log_checkpoint_margin(void)
1581/*=======================*/
1582{
1583 lsn_t age;
1584 lsn_t checkpoint_age;
1585 ib_uint64_t advance;
1586 lsn_t oldest_lsn;
1587 bool success;
1588loop:
1589 advance = 0;
1590
1591 log_mutex_enter();
1592 ut_ad(!recv_no_log_write);
1593
1594 if (!log_sys.check_flush_or_checkpoint) {
1595 log_mutex_exit();
1596 return;
1597 }
1598
1599 oldest_lsn = log_buf_pool_get_oldest_modification();
1600
1601 age = log_sys.lsn - oldest_lsn;
1602
1603 if (age > log_sys.max_modified_age_sync) {
1604
1605 /* A flush is urgent: we have to do a synchronous preflush */
1606 advance = age - log_sys.max_modified_age_sync;
1607 }
1608
1609 checkpoint_age = log_sys.lsn - log_sys.last_checkpoint_lsn;
1610
1611 bool checkpoint_sync;
1612 bool do_checkpoint;
1613
1614 if (checkpoint_age > log_sys.max_checkpoint_age) {
1615 /* A checkpoint is urgent: we do it synchronously */
1616 checkpoint_sync = true;
1617 do_checkpoint = true;
1618 } else if (checkpoint_age > log_sys.max_checkpoint_age_async) {
1619 /* A checkpoint is not urgent: do it asynchronously */
1620 do_checkpoint = true;
1621 checkpoint_sync = false;
1622 log_sys.check_flush_or_checkpoint = false;
1623 } else {
1624 do_checkpoint = false;
1625 checkpoint_sync = false;
1626 log_sys.check_flush_or_checkpoint = false;
1627 }
1628
1629 log_mutex_exit();
1630
1631 if (advance) {
1632 lsn_t new_oldest = oldest_lsn + advance;
1633
1634 success = log_preflush_pool_modified_pages(new_oldest);
1635
1636 /* If the flush succeeded, this thread has done its part
1637 and can proceed. If it did not succeed, there was another
1638 thread doing a flush at the same time. */
1639 if (!success) {
1640 log_mutex_enter();
1641 log_sys.check_flush_or_checkpoint = true;
1642 log_mutex_exit();
1643 goto loop;
1644 }
1645 }
1646
1647 if (do_checkpoint) {
1648 log_checkpoint(checkpoint_sync, FALSE);
1649
1650 if (checkpoint_sync) {
1651
1652 goto loop;
1653 }
1654 }
1655}
1656
1657/**
1658Checks that there is enough free space in the log to start a new query step.
1659Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
1660function may only be called if the calling thread owns no synchronization
1661objects! */
1662void
1663log_check_margins(void)
1664{
1665 bool check;
1666
1667 do {
1668 log_flush_margin();
1669 log_checkpoint_margin();
1670 log_mutex_enter();
1671 ut_ad(!recv_no_log_write);
1672 check = log_sys.check_flush_or_checkpoint;
1673 log_mutex_exit();
1674 } while (check);
1675}
1676
1677/****************************************************************//**
1678Makes a checkpoint at the latest lsn and writes it to first page of each
1679data file in the database, so that we know that the file spaces contain
1680all modifications up to that lsn. This can only be called at database
1681shutdown. This function also writes all log in log files to the log archive. */
1682void
1683logs_empty_and_mark_files_at_shutdown(void)
1684/*=======================================*/
1685{
1686 lsn_t lsn;
1687 ulint count = 0;
1688
1689 ib::info() << "Starting shutdown...";
1690
1691 /* Wait until the master thread and all other operations are idle: our
1692 algorithm only works if the server is idle at shutdown */
1693
1694 srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
1695loop:
1696 ut_ad(lock_sys.is_initialised() || !srv_was_started);
1697 ut_ad(log_sys.is_initialised() || !srv_was_started);
1698 ut_ad(fil_system.is_initialised() || !srv_was_started);
1699 os_event_set(srv_buf_resize_event);
1700
1701 if (!srv_read_only_mode) {
1702 os_event_set(srv_error_event);
1703 os_event_set(srv_monitor_event);
1704 os_event_set(srv_buf_dump_event);
1705 if (lock_sys.timeout_thread_active) {
1706 os_event_set(lock_sys.timeout_event);
1707 }
1708 if (dict_stats_event) {
1709 os_event_set(dict_stats_event);
1710 } else {
1711 ut_ad(!srv_dict_stats_thread_active);
1712 }
1713 if (recv_sys && recv_sys->flush_start) {
1714 /* This is in case recv_writer_thread was never
1715 started, or buf_flush_page_cleaner_coordinator
1716 failed to notice its termination. */
1717 os_event_set(recv_sys->flush_start);
1718 }
1719 }
1720#define COUNT_INTERVAL 600U
1721#define CHECK_INTERVAL 100000U
1722 os_thread_sleep(CHECK_INTERVAL);
1723
1724 count++;
1725
1726 /* Check that there are no longer transactions, except for
1727 PREPARED ones. We need this wait even for the 'very fast'
1728 shutdown, because the InnoDB layer may have committed or
1729 prepared transactions and we don't want to lose them. */
1730
1731 if (ulint total_trx = srv_was_started && !srv_read_only_mode
1732 && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
1733 ? trx_sys.any_active_transactions() : 0) {
1734
1735 if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1736 service_manager_extend_timeout(
1737 COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1738 "Waiting for %lu active transactions to finish",
1739 (ulong) total_trx);
1740 ib::info() << "Waiting for " << total_trx << " active"
1741 << " transactions to finish";
1742
1743 count = 0;
1744 }
1745
1746 goto loop;
1747 }
1748
1749 /* We need these threads to stop early in shutdown. */
1750 const char* thread_name;
1751
1752 if (srv_error_monitor_active) {
1753 thread_name = "srv_error_monitor_thread";
1754 } else if (srv_monitor_active) {
1755 thread_name = "srv_monitor_thread";
1756 } else if (srv_buf_resize_thread_active) {
1757 thread_name = "buf_resize_thread";
1758 goto wait_suspend_loop;
1759 } else if (srv_dict_stats_thread_active) {
1760 thread_name = "dict_stats_thread";
1761 } else if (lock_sys.timeout_thread_active) {
1762 thread_name = "lock_wait_timeout_thread";
1763 } else if (srv_buf_dump_thread_active) {
1764 thread_name = "buf_dump_thread";
1765 goto wait_suspend_loop;
1766 } else if (btr_defragment_thread_active) {
1767 thread_name = "btr_defragment_thread";
1768 } else if (srv_fast_shutdown != 2 && trx_rollback_is_active) {
1769 thread_name = "rollback of recovered transactions";
1770 } else {
1771 thread_name = NULL;
1772 }
1773
1774 if (thread_name) {
1775 ut_ad(!srv_read_only_mode);
1776wait_suspend_loop:
1777 service_manager_extend_timeout(
1778 COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1779 "Waiting for %s to exit", thread_name);
1780 if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1781 ib::info() << "Waiting for " << thread_name
1782 << "to exit";
1783 count = 0;
1784 }
1785 goto loop;
1786 }
1787
1788 /* Check that the background threads are suspended */
1789
1790 switch (srv_get_active_thread_type()) {
1791 case SRV_NONE:
1792 if (!srv_n_fil_crypt_threads_started) {
1793 srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
1794 break;
1795 }
1796 os_event_set(fil_crypt_threads_event);
1797 thread_name = "fil_crypt_thread";
1798 goto wait_suspend_loop;
1799 case SRV_PURGE:
1800 case SRV_WORKER:
1801 ut_ad(!"purge was not shut down");
1802 srv_purge_wakeup();
1803 thread_name = "purge thread";
1804 goto wait_suspend_loop;
1805 case SRV_MASTER:
1806 thread_name = "master thread";
1807 goto wait_suspend_loop;
1808 }
1809
1810 /* At this point only page_cleaner should be active. We wait
1811 here to let it complete the flushing of the buffer pools
1812 before proceeding further. */
1813
1814 count = 0;
1815 service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1816 "Waiting for page cleaner");
1817 while (buf_page_cleaner_is_active) {
1818 ++count;
1819 os_thread_sleep(CHECK_INTERVAL);
1820 if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1821 service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1822 "Waiting for page cleaner");
1823 ib::info() << "Waiting for page_cleaner to "
1824 "finish flushing of buffer pool";
1825 count = 0;
1826 }
1827 }
1828
1829 if (log_scrub_thread_active) {
1830 ut_ad(!srv_read_only_mode);
1831 os_event_set(log_scrub_event);
1832 }
1833
1834 if (log_sys.is_initialised()) {
1835 log_mutex_enter();
1836 const ulint n_write = log_sys.n_pending_checkpoint_writes;
1837 const ulint n_flush = log_sys.n_pending_flushes;
1838 log_mutex_exit();
1839
1840 if (log_scrub_thread_active || n_write || n_flush) {
1841 if (srv_print_verbose_log && count > 600) {
1842 ib::info() << "Pending checkpoint_writes: "
1843 << n_write
1844 << ". Pending log flush writes: "
1845 << n_flush;
1846 count = 0;
1847 }
1848 goto loop;
1849 }
1850 }
1851
1852 ut_ad(!log_scrub_thread_active);
1853
1854 if (!buf_pool_ptr) {
1855 ut_ad(!srv_was_started);
1856 } else if (ulint pending_io = buf_pool_check_no_pending_io()) {
1857 if (srv_print_verbose_log && count > 600) {
1858 ib::info() << "Waiting for " << pending_io << " buffer"
1859 " page I/Os to complete";
1860 count = 0;
1861 }
1862
1863 goto loop;
1864 }
1865
1866 if (srv_fast_shutdown == 2 || !srv_was_started) {
1867 if (!srv_read_only_mode && srv_was_started) {
1868 ib::info() << "MySQL has requested a very fast"
1869 " shutdown without flushing the InnoDB buffer"
1870 " pool to data files. At the next mysqld"
1871 " startup InnoDB will do a crash recovery!";
1872
1873 /* In this fastest shutdown we do not flush the
1874 buffer pool:
1875
1876 it is essentially a 'crash' of the InnoDB server.
1877 Make sure that the log is all flushed to disk, so
1878 that we can recover all committed transactions in
1879 a crash recovery. We must not write the lsn stamps
1880 to the data files, since at a startup InnoDB deduces
1881 from the stamps if the previous shutdown was clean. */
1882
1883 log_buffer_flush_to_disk();
1884 }
1885
1886 srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1887
1888 if (fil_system.is_initialised()) {
1889 fil_close_all_files();
1890 }
1891 return;
1892 }
1893
1894 if (!srv_read_only_mode) {
1895 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1896 "ensuring dirty buffer pool are written to log");
1897 log_make_checkpoint_at(LSN_MAX, TRUE);
1898
1899 log_mutex_enter();
1900
1901 lsn = log_sys.lsn;
1902
1903 const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn;
1904 ut_ad(lsn >= log_sys.last_checkpoint_lsn);
1905
1906 log_mutex_exit();
1907
1908 if (lsn_changed) {
1909 goto loop;
1910 }
1911
1912 /* Ensure that all buffered changes are written to the
1913 redo log before fil_close_all_files(). */
1914 fil_flush_file_spaces(FIL_TYPE_LOG);
1915 } else {
1916 lsn = srv_start_lsn;
1917 }
1918
1919 srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1920
1921 /* Make some checks that the server really is quiet */
1922 ut_a(srv_get_active_thread_type() == SRV_NONE);
1923
1924 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1925 "Free innodb buffer pool");
1926 buf_all_freed();
1927
1928 ut_a(lsn == log_sys.lsn
1929 || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1930
1931 if (lsn < srv_start_lsn) {
1932 ib::error() << "Shutdown LSN=" << lsn
1933 << " is less than start LSN=" << srv_start_lsn;
1934 }
1935
1936 srv_shutdown_lsn = lsn;
1937
1938 if (!srv_read_only_mode) {
1939 dberr_t err = fil_write_flushed_lsn(lsn);
1940
1941 if (err != DB_SUCCESS) {
1942 ib::error() << "Writing flushed lsn " << lsn
1943 << " failed; error=" << err;
1944 }
1945 }
1946
1947 fil_close_all_files();
1948
1949 /* Make some checks that the server really is quiet */
1950 ut_a(srv_get_active_thread_type() == SRV_NONE);
1951
1952 ut_a(lsn == log_sys.lsn
1953 || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1954}
1955
1956/******************************************************//**
1957Peeks the current lsn.
1958@return TRUE if success, FALSE if could not get the log system mutex */
1959ibool
1960log_peek_lsn(
1961/*=========*/
1962 lsn_t* lsn) /*!< out: if returns TRUE, current lsn is here */
1963{
1964 if (0 == mutex_enter_nowait(&(log_sys.mutex))) {
1965 *lsn = log_sys.lsn;
1966
1967 log_mutex_exit();
1968
1969 return(TRUE);
1970 }
1971
1972 return(FALSE);
1973}
1974
1975/******************************************************//**
1976Prints info of the log. */
1977void
1978log_print(
1979/*======*/
1980 FILE* file) /*!< in: file where to print */
1981{
1982 double time_elapsed;
1983 time_t current_time;
1984
1985 log_mutex_enter();
1986
1987 fprintf(file,
1988 "Log sequence number " LSN_PF "\n"
1989 "Log flushed up to " LSN_PF "\n"
1990 "Pages flushed up to " LSN_PF "\n"
1991 "Last checkpoint at " LSN_PF "\n",
1992 log_sys.lsn,
1993 log_sys.flushed_to_disk_lsn,
1994 log_buf_pool_get_oldest_modification(),
1995 log_sys.last_checkpoint_lsn);
1996
1997 current_time = time(NULL);
1998
1999 time_elapsed = difftime(current_time,
2000 log_sys.last_printout_time);
2001
2002 if (time_elapsed <= 0) {
2003 time_elapsed = 1;
2004 }
2005
2006 fprintf(file,
2007 ULINTPF " pending log flushes, "
2008 ULINTPF " pending chkp writes\n"
2009 ULINTPF " log i/o's done, %.2f log i/o's/second\n",
2010 log_sys.n_pending_flushes,
2011 log_sys.n_pending_checkpoint_writes,
2012 log_sys.n_log_ios,
2013 static_cast<double>(
2014 log_sys.n_log_ios - log_sys.n_log_ios_old)
2015 / time_elapsed);
2016
2017 log_sys.n_log_ios_old = log_sys.n_log_ios;
2018 log_sys.last_printout_time = current_time;
2019
2020 log_mutex_exit();
2021}
2022
2023/**********************************************************************//**
2024Refreshes the statistics used to print per-second averages. */
2025void
2026log_refresh_stats(void)
2027/*===================*/
2028{
2029 log_sys.n_log_ios_old = log_sys.n_log_ios;
2030 log_sys.last_printout_time = time(NULL);
2031}
2032
2033/** Shut down the redo log subsystem. */
2034void log_t::close()
2035{
2036 ut_ad(this == &log_sys);
2037 if (!is_initialised()) return;
2038 m_initialised = false;
2039 log.close();
2040
2041 if (!first_in_use)
2042 buf -= srv_log_buffer_size;
2043 ut_free_dodump(buf, srv_log_buffer_size * 2);
2044 buf = NULL;
2045
2046 os_event_destroy(flush_event);
2047
2048 rw_lock_free(&checkpoint_lock);
2049 /* rw_lock_free() already called checkpoint_lock.~rw_lock_t();
2050 tame the debug assertions when the destructor will be called once more. */
2051 ut_ad(checkpoint_lock.magic_n == 0);
2052 ut_d(checkpoint_lock.magic_n = RW_LOCK_MAGIC_N);
2053
2054 mutex_free(&mutex);
2055 mutex_free(&write_mutex);
2056 mutex_free(&log_flush_order_mutex);
2057
2058 if (!srv_read_only_mode && srv_scrub_log)
2059 os_event_destroy(log_scrub_event);
2060
2061 recv_sys_close();
2062}
2063
2064/******************************************************//**
2065Pads the current log block full with dummy log records. Used in producing
2066consistent archived log files and scrubbing redo log. */
2067static
2068void
2069log_pad_current_log_block(void)
2070/*===========================*/
2071{
2072 byte b = MLOG_DUMMY_RECORD;
2073 ulint pad_length;
2074 ulint i;
2075 lsn_t lsn;
2076
2077 ut_ad(!recv_no_log_write);
2078 /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
2079 lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
2080
2081 pad_length = OS_FILE_LOG_BLOCK_SIZE
2082 - (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
2083 - LOG_BLOCK_TRL_SIZE;
2084 if (pad_length
2085 == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
2086 - LOG_BLOCK_TRL_SIZE)) {
2087
2088 pad_length = 0;
2089 }
2090
2091 if (pad_length) {
2092 srv_stats.n_log_scrubs.inc();
2093 }
2094
2095 for (i = 0; i < pad_length; i++) {
2096 log_write_low(&b, 1);
2097 }
2098
2099 lsn = log_sys.lsn;
2100
2101 log_close();
2102
2103 ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
2104}
2105
2106/*****************************************************************//*
2107If no log record has been written for a while, fill current log
2108block with dummy records. */
2109static
2110void
2111log_scrub()
2112/*=========*/
2113{
2114 log_mutex_enter();
2115 ulint cur_lbn = log_block_convert_lsn_to_no(log_sys.lsn);
2116
2117 if (next_lbn_to_pad == cur_lbn)
2118 {
2119 log_pad_current_log_block();
2120 }
2121
2122 next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys.lsn);
2123 log_mutex_exit();
2124}
2125
2126/* log scrubbing speed, in bytes/sec */
2127UNIV_INTERN ulonglong innodb_scrub_log_speed;
2128
2129/*****************************************************************//**
2130This is the main thread for log scrub. It waits for an event and
2131when waked up fills current log block with dummy records and
2132sleeps again.
2133@return this function does not return, it calls os_thread_exit() */
2134extern "C" UNIV_INTERN
2135os_thread_ret_t
2136DECLARE_THREAD(log_scrub_thread)(void*)
2137{
2138 ut_ad(!srv_read_only_mode);
2139
2140 while (srv_shutdown_state < SRV_SHUTDOWN_FLUSH_PHASE) {
2141 /* log scrubbing interval in µs. */
2142 ulonglong interval = 1000*1000*512/innodb_scrub_log_speed;
2143
2144 os_event_wait_time(log_scrub_event, static_cast<ulint>(interval));
2145
2146 log_scrub();
2147
2148 os_event_reset(log_scrub_event);
2149 }
2150
2151 log_scrub_thread_active = false;
2152
2153 /* We count the number of threads in os_thread_exit(). A created
2154 thread should always use that to exit and not use return() to exit. */
2155 os_thread_exit();
2156
2157 OS_THREAD_DUMMY_RETURN;
2158}
2159