log0log.cc source code [MariaDB/storage/innobase/log/log0log.cc]

1	/*****************************************************************************
2
3	Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2009, Google Inc.
5	Copyright (c) 2014, 2018, MariaDB Corporation.
6
7	Portions of this file contain modifications contributed and copyrighted by
8	Google, Inc. Those modifications are gratefully acknowledged and are described
9	briefly in the InnoDB documentation. The contributions by Google are
10	incorporated with their permission, and subject to the conditions contained in
11	the file COPYING.Google.
12
13	This program is free software; you can redistribute it and/or modify it under
14	the terms of the GNU General Public License as published by the Free Software
15	Foundation; version 2 of the License.
16
17	This program is distributed in the hope that it will be useful, but WITHOUT
18	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21	You should have received a copy of the GNU General Public License along with
22	this program; if not, write to the Free Software Foundation, Inc.,
23	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25	*****************************************************************************/
26
27	/************************************************//**
28	@file log/log0log.cc
29	Database log
30
31	Created 12/9/1995 Heikki Tuuri
32	*******************************************************/
33
34	#include "ha_prototypes.h"
35	#include <debug_sync.h>
36	#include <my_service_manager.h>
37
38	#include "log0log.h"
39	#include "log0crypt.h"
40	#include "mem0mem.h"
41	#include "buf0buf.h"
42	#include "buf0flu.h"
43	#include "lock0lock.h"
44	#include "log0recv.h"
45	#include "fil0fil.h"
46	#include "dict0boot.h"
47	#include "dict0stats_bg.h"
48	#include "btr0defragment.h"
49	#include "srv0srv.h"
50	#include "srv0start.h"
51	#include "trx0sys.h"
52	#include "trx0trx.h"
53	#include "trx0roll.h"
54	#include "srv0mon.h"
55	#include "sync0sync.h"
56
57	/*
58	General philosophy of InnoDB redo-logs:
59
60	1) Every change to a contents of a data page must be done
61	through mtr, which in mtr_commit() writes log records
62	to the InnoDB redo log.
63
64	2) Normally these changes are performed using a mlog_write_ulint()
65	or similar function.
66
67	3) In some page level operations only a code number of a
68	c-function and its parameters are written to the log to
69	reduce the size of the log.
70
71	3a) You should not add parameters to these kind of functions
72	(e.g. trx_undo_header_create())
73
74	3b) You should not add such functionality which either change
75	working when compared with the old or are dependent on data
76	outside of the page. These kind of functions should implement
77	self-contained page transformation and it should be unchanged
78	if you don't have very essential reasons to change log
79	semantics or format.
80
81	*/
82
83	/* Redo log system /
84	log_t log_sys;
85
86	/* Whether to generate and require checksums on the redo log pages /
87	my_bool innodb_log_checksums;
88
89	/* Pointer to the log checksum calculation function /
90	log_checksum_func_t log_checksum_algorithm_ptr;
91
92	/ Next log block number to do dummy record filling if no log records written*
93	for a while /*
94	static ulint next_lbn_to_pad = `0`;
95
96	/ These control how often we print warnings if the last checkpoint is too*
97	old /*
98	static bool log_has_printed_chkp_warning = false;
99	static time_t log_last_warning_time;
100
101	static bool log_has_printed_chkp_margine_warning = false;
102	static time_t log_last_margine_warning_time;
103
104	/ A margin for free space in the log buffer before a log entry is catenated /
105	#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
106
107	/ Margins for free space in the log buffer after a log entry is catenated /
108	#define LOG_BUF_FLUSH_RATIO 2
109	#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN \
110	+ (4U << srv_page_size_shift))
111
112	/ This parameter controls asynchronous making of a new checkpoint; the value*
113	should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC /*
114
115	#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
116
117	/ This parameter controls synchronous preflushing of modified buffer pages /
118	#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
119
120	/ The same ratio for asynchronous preflushing; this value should be less than*
121	the previous /*
122	#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
123
124	/ Codes used in unlocking flush latches /
125	#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
126	#define LOG_UNLOCK_FLUSH_LOCK 2
127
128	/* Event to wake up log_scrub_thread /
129	os_event_t log_scrub_event;
130	/* Whether log_scrub_thread is active /
131	bool log_scrub_thread_active;
132
133	extern "C" UNIV_INTERN
134	os_thread_ret_t
135	DECLARE_THREAD(log_scrub_thread)(void*);
136
137	/**************************************************************//**
138	Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
139	exists.
140	@return LSN of oldest modification /*
141	static
142	lsn_t
143	log_buf_pool_get_oldest_modification(void)
144	/======================================/
145	{
146	lsn_t lsn;
147
148	ut_ad(log_mutex_own());
149
150	lsn = buf_pool_get_oldest_modification();
151
152	if (!lsn) {
153
154	lsn = log_sys.lsn;
155	}
156
157	return(lsn);
158	}
159
160	/* Extends the log buffer.*
161	@param[in] len requested minimum size in bytes /*
162	void log_buffer_extend(ulong len)
163	{
164	byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
165
166	log_mutex_enter_all();
167
168	while (log_sys.is_extending) {
169	/ Another thread is trying to extend already.*
170	Needs to wait for. /*
171	log_mutex_exit_all();
172
173	log_buffer_flush_to_disk();
174
175	log_mutex_enter_all();
176
177	if (srv_log_buffer_size > len) {
178	/ Already extended enough by the others /
179	log_mutex_exit_all();
180	return;
181	}
182	}
183
184	if (len >= srv_log_buffer_size / `2`) {
185	DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
186	DBUG_SUICIDE(););
187
188	/ log_buffer is too small. try to extend instead of crash. /
189	ib::warn () << "The redo log transaction size " << len <<
190	" exceeds innodb_log_buffer_size="
191	<< srv_log_buffer_size << " / 2). Trying to extend it.";
192	}
193
194	log_sys.is_extending = true;
195
196	while (ut_calc_align_down(log_sys.buf_free,
197	OS_FILE_LOG_BLOCK_SIZE)
198	!= ut_calc_align_down(log_sys.buf_next_to_write,
199	OS_FILE_LOG_BLOCK_SIZE)) {
200	/ Buffer might have >1 blocks to write still. /
201	log_mutex_exit_all();
202
203	log_buffer_flush_to_disk();
204
205	log_mutex_enter_all();
206	}
207
208	ulong move_start = ut_calc_align_down(
209	log_sys.buf_free,
210	OS_FILE_LOG_BLOCK_SIZE);
211	ulong move_end = log_sys.buf_free;
212
213	/ store the last log block in buffer /
214	ut_memcpy(tmp_buf, log_sys.buf + move_start,
215	move_end - move_start);
216
217	log_sys.buf_free -= move_start;
218	log_sys.buf_next_to_write -= move_start;
219
220	/ free previous after getting the right address /
221	if (!log_sys.first_in_use) {
222	log_sys.buf -= srv_log_buffer_size;
223	}
224	ut_free_dodump(log_sys.buf, srv_log_buffer_size * `2`);
225
226	/ reallocate log buffer /
227	srv_log_buffer_size = len;
228
229	log_sys.buf = static_cast<byte*>(
230	ut_malloc_dontdump(srv_log_buffer_size * `2`));
231	TRASH_ALLOC(log_sys.buf, srv_log_buffer_size * `2`);
232
233	log_sys.first_in_use = true;
234
235	log_sys.max_buf_free = srv_log_buffer_size / LOG_BUF_FLUSH_RATIO
236	- LOG_BUF_FLUSH_MARGIN;
237
238	/ restore the last log block /
239	ut_memcpy(log_sys.buf, tmp_buf, move_end - move_start);
240
241	ut_ad(log_sys.is_extending);
242	log_sys.is_extending = false;
243
244	log_mutex_exit_all();
245
246	ib::info () << "innodb_log_buffer_size was extended to "
247	<< srv_log_buffer_size << ".";
248	}
249
250	/* Calculate actual length in redo buffer and file including*
251	block header and trailer.
252	@param[in] len length to write
253	@return actual length to write including header and trailer. /*
254	static inline
255	ulint
256	log_calculate_actual_len(
257	ulint len)
258	{
259	ut_ad(log_mutex_own());
260
261	/ actual length stored per block /
262	const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE
263	- (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
264
265	/ actual data length in last block already written /
266	ulint extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE);
267
268	ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
269	extra_len -= LOG_BLOCK_HDR_SIZE;
270
271	/ total extra length for block header and trailer /
272	extra_len = ((len + extra_len) / len_per_blk)
273	* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
274
275	return(len + extra_len);
276	}
277
278	/* Check margin not to overwrite transaction log from the last checkpoint.*
279	If would estimate the log write to exceed the log_group_capacity,
280	waits for the checkpoint is done enough.
281	@param[in] len length of the data to be written /*
282
283	void
284	log_margin_checkpoint_age(
285	ulint len)
286	{
287	ulint margin = log_calculate_actual_len(len);
288
289	ut_ad(log_mutex_own());
290
291	if (margin > log_sys.log_group_capacity) {
292	/ return with warning output to avoid deadlock /
293	if (!log_has_printed_chkp_margine_warning
294	\|\| difftime(time(NULL),
295	log_last_margine_warning_time) > `15`) {
296	log_has_printed_chkp_margine_warning = true;
297	log_last_margine_warning_time = time(NULL);
298
299	ib::error () << "The transaction log files are too"
300	" small for the single transaction log (size="
301	<< len << "). So, the last checkpoint age"
302	" might exceed the log group capacity "
303	<< log_sys.log_group_capacity << ".";
304	}
305
306	return;
307	}
308
309	/ Our margin check should ensure that we never reach this condition.*
310	Try to do checkpoint once. We cannot keep waiting here as it might
311	result in hang in case the current mtr has latch on oldest lsn /*
312	if (log_sys.lsn - log_sys.last_checkpoint_lsn + margin
313	> log_sys.log_group_capacity) {
314	/ The log write of 'len' might overwrite the transaction log*
315	after the last checkpoint. Makes checkpoint. /*
316
317	bool flushed_enough = false;
318
319	if (log_sys.lsn - log_buf_pool_get_oldest_modification()
320	+ margin
321	<= log_sys.log_group_capacity) {
322	flushed_enough = true;
323	}
324
325	log_sys.check_flush_or_checkpoint = true;
326	log_mutex_exit();
327
328	DEBUG_SYNC_C("margin_checkpoint_age_rescue");
329
330	if (!flushed_enough) {
331	os_thread_sleep(`100000`);
332	}
333	log_checkpoint(true, false);
334
335	log_mutex_enter();
336	}
337
338	return;
339	}
340
341	/* Open the log for log_write_low. The log must be closed with log_close.*
342	@param[in] len length of the data to be written
343	@return start lsn of the log record /*
344	lsn_t
345	log_reserve_and_open(
346	ulint len)
347	{
348	ulint len_upper_limit;
349	#ifdef UNIV_DEBUG
350	ulint count = `0`;
351	#endif /* UNIV_DEBUG */
352
353	loop:
354	ut_ad(log_mutex_own());
355
356	if (log_sys.is_extending) {
357	log_mutex_exit();
358
359	/ Log buffer size is extending. Writing up to the next block*
360	should wait for the extending finished. /*
361
362	os_thread_sleep(`100000`);
363
364	ut_ad(++count < `50`);
365
366	log_mutex_enter();
367	goto loop;
368	}
369
370	/ Calculate an upper limit for the space the string may take in the*
371	log buffer /*
372
373	len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
374	+ (`5` * len) / `4`;
375
376	if (log_sys.buf_free + len_upper_limit > srv_log_buffer_size) {
377	log_mutex_exit();
378
379	DEBUG_SYNC_C("log_buf_size_exceeded");
380
381	/ Not enough free space, do a write of the log buffer /
382	log_buffer_sync_in_background(false);
383
384	srv_stats.log_waits.inc();
385
386	ut_ad(++count < `50`);
387
388	log_mutex_enter();
389	goto loop;
390	}
391
392	return(log_sys.lsn);
393	}
394
395	/**********************************************************//**
396	Writes to the log the string given. It is assumed that the caller holds the
397	log mutex. /*
398	void
399	log_write_low(
400	/==========/
401	const byte* str, /!< in: string /
402	ulint str_len) /!< in: string length /
403	{
404	ulint len;
405	ulint data_len;
406	byte* log_block;
407
408	ut_ad(log_mutex_own());
409	part_loop:
410	/ Calculate a part length /
411
412	data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
413
414	if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
415
416	/ The string fits within the current log block /
417
418	len = str_len;
419	} else {
420	data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
421
422	len = OS_FILE_LOG_BLOCK_SIZE
423	- (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
424	- LOG_BLOCK_TRL_SIZE;
425	}
426
427	memcpy(log_sys.buf + log_sys.buf_free, str, len);
428
429	str_len -= len;
430	str = str + len;
431
432	log_block = static_cast<byte*>(
433	ut_align_down(log_sys.buf + log_sys.buf_free,
434	OS_FILE_LOG_BLOCK_SIZE));
435
436	log_block_set_data_len(log_block, data_len);
437
438	if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
439	/ This block became full /
440	log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
441	log_block_set_checkpoint_no(log_block,
442	log_sys.next_checkpoint_no);
443	len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
444
445	log_sys.lsn += len;
446
447	/ Initialize the next block header /
448	log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE,
449	log_sys.lsn);
450	} else {
451	log_sys.lsn += len;
452	}
453
454	log_sys.buf_free += ulong(len);
455
456	ut_ad(log_sys.buf_free <= srv_log_buffer_size);
457
458	if (str_len > `0`) {
459	goto part_loop;
460	}
461
462	srv_stats.log_write_requests.inc();
463	}
464
465	/**********************************************************//**
466	Closes the log.
467	@return lsn /*
468	lsn_t
469	log_close(void)
470	/===========/
471	{
472	byte* log_block;
473	ulint first_rec_group;
474	lsn_t oldest_lsn;
475	lsn_t lsn;
476	lsn_t checkpoint_age;
477
478	ut_ad(log_mutex_own());
479
480	lsn = log_sys.lsn;
481
482	log_block = static_cast<byte*>(
483	ut_align_down(log_sys.buf + log_sys.buf_free,
484	OS_FILE_LOG_BLOCK_SIZE));
485
486	first_rec_group = log_block_get_first_rec_group(log_block);
487
488	if (first_rec_group == `0`) {
489	/ We initialized a new log block which was not written*
490	full by the current mtr: the next mtr log record group
491	will start within this block at the offset data_len /*
492
493	log_block_set_first_rec_group(
494	log_block, log_block_get_data_len(log_block));
495	}
496
497	if (log_sys.buf_free > log_sys.max_buf_free) {
498	log_sys.check_flush_or_checkpoint = true;
499	}
500
501	checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
502
503	if (checkpoint_age >= log_sys.log_group_capacity) {
504	DBUG_EXECUTE_IF(
505	"print_all_chkp_warnings",
506	log_has_printed_chkp_warning = false;);
507
508	if (!log_has_printed_chkp_warning
509	\|\| difftime(time(NULL), log_last_warning_time) > `15`) {
510
511	log_has_printed_chkp_warning = true;
512	log_last_warning_time = time(NULL);
513
514	ib::error () << "The age of the last checkpoint is "
515	<< checkpoint_age << ", which exceeds the log"
516	" group capacity "
517	<< log_sys.log_group_capacity
518	<< ".";
519	}
520	}
521
522	if (checkpoint_age <= log_sys.max_modified_age_sync) {
523	goto function_exit;
524	}
525
526	oldest_lsn = buf_pool_get_oldest_modification();
527
528	if (!oldest_lsn
529	\|\| lsn - oldest_lsn > log_sys.max_modified_age_sync
530	\|\| checkpoint_age > log_sys.max_checkpoint_age_async) {
531	log_sys.check_flush_or_checkpoint = true;
532	}
533	function_exit:
534
535	return(lsn);
536	}
537
538	/* Calculate the recommended highest values for lsn - last_checkpoint_lsn*
539	and lsn - buf_get_oldest_modification().
540	@param[in] file_size requested innodb_log_file_size
541	@retval true on success
542	@retval false if the smallest log group is too small to
543	accommodate the number of OS threads in the database server /*
544	bool
545	log_set_capacity(ulonglong file_size)
546	{
547	lsn_t margin;
548	ulint free;
549
550	lsn_t smallest_capacity = (file_size - LOG_FILE_HDR_SIZE)
551	* srv_n_log_files;
552	/ Add extra safety /
553	smallest_capacity -= smallest_capacity / `10`;
554
555	/ For each OS thread we must reserve so much free space in the*
556	smallest log group that it can accommodate the log entries produced
557	by single query steps: running out of free log space is a serious
558	system error which requires rebooting the database. /*
559
560	free = LOG_CHECKPOINT_FREE_PER_THREAD * (`10` + srv_thread_concurrency)
561	+ LOG_CHECKPOINT_EXTRA_FREE;
562	if (free >= smallest_capacity / `2`) {
563	ib::error () << "Cannot continue operation. ib_logfiles are too"
564	" small for innodb_thread_concurrency="
565	<< srv_thread_concurrency << ". The combined size of"
566	" ib_logfiles should be bigger than"
567	" 200 kB * innodb_thread_concurrency. "
568	<< INNODB_PARAMETERS_MSG;
569	return(false);
570	}
571
572	margin = smallest_capacity - free;
573	margin = margin - margin / `10`; / Add still some extra safety /
574
575	log_mutex_enter();
576
577	log_sys.log_group_capacity = smallest_capacity;
578
579	log_sys.max_modified_age_async = margin
580	- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
581	log_sys.max_modified_age_sync = margin
582	- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
583
584	log_sys.max_checkpoint_age_async = margin - margin
585	/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
586	log_sys.max_checkpoint_age = margin;
587
588	log_mutex_exit();
589
590	return(true);
591	}
592
593	/* Initialize the redo log subsystem. /
594	void log_t::create()
595	{
596	ut_ad(this == &log_sys);
597	ut_ad(!is_initialised());
598	m_initialised= true;
599
600	mutex_create(LATCH_ID_LOG_SYS, &mutex);
601	mutex_create(LATCH_ID_LOG_WRITE, &write_mutex);
602	mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_flush_order_mutex);
603
604	/ Start the lsn from one log block from zero: this way every*
605	log record has a non-zero start lsn, a fact which we will use /*
606
607	lsn= LOG_START_LSN;
608
609	ut_ad(srv_log_buffer_size >= `16` * OS_FILE_LOG_BLOCK_SIZE);
610	ut_ad(srv_log_buffer_size >= `4U` << srv_page_size_shift);
611
612	buf= static_cast<byte>(ut_malloc_dontdump(srv_log_buffer_size `2`));
613	TRASH_ALLOC(buf, srv_log_buffer_size * `2`);
614
615	first_in_use= true;
616
617	max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
618	LOG_BUF_FLUSH_MARGIN;
619	check_flush_or_checkpoint= true;
620
621	n_log_ios_old= n_log_ios;
622	last_printout_time= time(NULL);
623
624	buf_next_to_write= `0`;
625	is_extending= false;
626	write_lsn= lsn;
627	flushed_to_disk_lsn= `0`;
628	n_pending_flushes= `0`;
629	flush_event = os_event_create("log_flush_event");
630	os_event_set(flush_event);
631	n_log_ios= `0`;
632	n_log_ios_old= `0`;
633	log_group_capacity= `0`;
634	max_modified_age_async= `0`;
635	max_modified_age_sync= `0`;
636	max_checkpoint_age_async= `0`;
637	max_checkpoint_age= `0`;
638	next_checkpoint_no= `0`;
639	next_checkpoint_lsn= `0`;
640	append_on_checkpoint= NULL;
641	n_pending_checkpoint_writes= `0`;
642
643	last_checkpoint_lsn= lsn;
644	rw_lock_create(checkpoint_lock_key, &checkpoint_lock, SYNC_NO_ORDER_CHECK);
645
646	log_block_init(buf, lsn);
647	log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
648
649	buf_free= LOG_BLOCK_HDR_SIZE;
650	lsn= LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
651
652	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, lsn - last_checkpoint_lsn);
653
654	log_scrub_thread_active= !srv_read_only_mode && srv_scrub_log;
655	if (log_scrub_thread_active) {
656	log_scrub_event= os_event_create("log_scrub_event");
657	os_thread_create(log_scrub_thread, NULL, NULL);
658	}
659	}
660
661	/* Initialize the redo log.*
662	@param[in] n_files number of files /*
663	void log_t::files::create(ulint n_files)
664	{
665	ut_ad(n_files <= SRV_N_LOG_FILES_MAX);
666	ut_ad(this == &log_sys.log);
667	ut_ad(log_sys.is_initialised());
668
669	this->n_files= n_files;
670	format= srv_encrypt_log
671	? LOG_HEADER_FORMAT_CURRENT \| LOG_HEADER_FORMAT_ENCRYPTED
672	: LOG_HEADER_FORMAT_CURRENT;
673	file_size= srv_log_file_size;
674	state= LOG_GROUP_OK;
675	lsn= LOG_START_LSN;
676	lsn_offset= LOG_FILE_HDR_SIZE;
677
678	byte* ptr= static_cast<byte>(ut_zalloc_nokey(LOG_FILE_HDR_SIZE n_files
679	+ OS_FILE_LOG_BLOCK_SIZE));
680	file_header_bufs_ptr= ptr;
681	ptr= static_cast<byte*>(ut_align(ptr, OS_FILE_LOG_BLOCK_SIZE));
682
683	memset(file_header_bufs, `0`, sizeof file_header_bufs);
684
685	for (ulint i = `0`; i < n_files; i++, ptr += LOG_FILE_HDR_SIZE)
686	file_header_bufs[i] = ptr;
687	}
688
689	/****************************************************//**
690	Writes a log file header to a log file space. /*
691	static
692	void
693	log_file_header_flush(
694	ulint nth_file, /!< in: header to the nth file in the*
695	log file space /*
696	lsn_t start_lsn) /!< in: log file data starts at this*
697	lsn /*
698	{
699	byte* buf;
700	lsn_t dest_offset;
701
702	ut_ad(log_write_mutex_own());
703	ut_ad(!recv_no_log_write);
704	ut_a(nth_file < log_sys.log.n_files);
705	ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
706	== LOG_HEADER_FORMAT_CURRENT);
707
708	buf = log_sys.log.file_header_bufs[nth_file];
709
710	memset(buf, `0`, OS_FILE_LOG_BLOCK_SIZE);
711	mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
712	mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
713	strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
714	LOG_HEADER_CREATOR_CURRENT);
715	ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
716	>= sizeof LOG_HEADER_CREATOR_CURRENT);
717	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
718
719	dest_offset = nth_file * log_sys.log.file_size;
720
721	DBUG_PRINT("ib_log", ("write " LSN_PF
722	" file " ULINTPF " header",
723	start_lsn, nth_file));
724
725	log_sys.n_log_ios++;
726
727	MONITOR_INC(MONITOR_LOG_IO);
728
729	srv_stats.os_log_pending_writes.inc();
730
731	const ulint page_no = ulint(dest_offset >> srv_page_size_shift);
732
733	fil_io(IORequestLogWrite, true,
734	page_id_t (SRV_LOG_SPACE_FIRST_ID, page_no),
735	univ_page_size,
736	ulint(dest_offset & (srv_page_size - `1`)),
737	OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
738
739	srv_stats.os_log_pending_writes.dec();
740	}
741
742	/****************************************************//**
743	Stores a 4-byte checksum to the trailer checksum field of a log block
744	before writing it to a log file. This checksum is used in recovery to
745	check the consistency of a log block. /*
746	static
747	void
748	log_block_store_checksum(
749	/=====================/
750	byte* block) /!< in/out: pointer to a log block /
751	{
752	log_block_set_checksum(block, log_block_calc_checksum(block));
753	}
754
755	/****************************************************//**
756	Writes a buffer to a log file. /*
757	static
758	void
759	log_write_buf(
760	byte* buf, /!< in: buffer /
761	ulint len, /!< in: buffer len; must be divisible*
762	by OS_FILE_LOG_BLOCK_SIZE /*
763	#ifdef UNIV_DEBUG
764	ulint pad_len, /!< in: pad len in the buffer len /
765	#endif /* UNIV_DEBUG */
766	lsn_t start_lsn, /!< in: start lsn of the buffer; must*
767	be divisible by
768	OS_FILE_LOG_BLOCK_SIZE /*
769	ulint new_data_offset)/!< in: start offset of new data in*
770	buf: this parameter is used to decide
771	if we have to write a new log file
772	header /*
773	{
774	ulint write_len;
775	bool write_header = new_data_offset == `0`;
776	lsn_t next_offset;
777	ulint i;
778
779	ut_ad(log_write_mutex_own());
780	ut_ad(!recv_no_log_write);
781	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == `0`);
782	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == `0`);
783
784	loop:
785	if (len == `0`) {
786
787	return;
788	}
789
790	next_offset = log_sys.log.calc_lsn_offset(start_lsn);
791
792	if (write_header
793	&& next_offset % log_sys.log.file_size == LOG_FILE_HDR_SIZE) {
794	/ We start to write a new log file instance in the group /
795
796	ut_a(next_offset / log_sys.log.file_size <= ULINT_MAX);
797
798	log_file_header_flush(
799	ulint(next_offset / log_sys.log.file_size), start_lsn);
800	srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
801
802	srv_stats.log_writes.inc();
803	}
804
805	if ((next_offset % log_sys.log.file_size) + len
806	> log_sys.log.file_size) {
807	/ if the above condition holds, then the below expression*
808	is < len which is ulint, so the typecast is ok /*
809	write_len = ulint(log_sys.log.file_size
810	- (next_offset % log_sys.log.file_size));
811	} else {
812	write_len = len;
813	}
814
815	DBUG_PRINT("ib_log",
816	("write " LSN_PF " to " LSN_PF
817	": len " ULINTPF
818	" blocks " ULINTPF ".." ULINTPF,
819	start_lsn, next_offset,
820	write_len,
821	log_block_get_hdr_no(buf),
822	log_block_get_hdr_no(
823	buf + write_len
824	- OS_FILE_LOG_BLOCK_SIZE)));
825
826	ut_ad(pad_len >= len
827	\|\| log_block_get_hdr_no(buf)
828	== log_block_convert_lsn_to_no(start_lsn));
829
830	/ Calculate the checksums for each log block and write them to*
831	the trailer fields of the log blocks /*
832
833	for (i = `0`; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
834	ut_ad(pad_len >= len
835	\|\| i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
836	\|\| log_block_get_hdr_no(
837	buf + i * OS_FILE_LOG_BLOCK_SIZE)
838	== log_block_get_hdr_no(buf) + i);
839	log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
840	}
841
842	log_sys.n_log_ios++;
843
844	MONITOR_INC(MONITOR_LOG_IO);
845
846	srv_stats.os_log_pending_writes.inc();
847
848	ut_a((next_offset >> srv_page_size_shift) <= ULINT_MAX);
849
850	const ulint page_no = ulint(next_offset >> srv_page_size_shift);
851
852	fil_io(IORequestLogWrite, true,
853	page_id_t (SRV_LOG_SPACE_FIRST_ID, page_no),
854	univ_page_size,
855	ulint(next_offset & (srv_page_size - `1`)), write_len, buf, NULL);
856
857	srv_stats.os_log_pending_writes.dec();
858
859	srv_stats.os_log_written.add(write_len);
860	srv_stats.log_writes.inc();
861
862	if (write_len < len) {
863	start_lsn += write_len;
864	len -= write_len;
865	buf += write_len;
866
867	write_header = true;
868
869	goto loop;
870	}
871	}
872
873	/* Flush the recently written changes to the log file.*
874	and invoke log_mutex_enter(). /*
875	static
876	void
877	log_write_flush_to_disk_low()
878	{
879	/ FIXME: This is not holding log_sys.mutex while*
880	calling os_event_set()! /*
881	ut_a(log_sys.n_pending_flushes == `1`); / No other threads here /
882
883	bool do_flush = srv_file_flush_method != SRV_O_DSYNC;
884
885	if (do_flush) {
886	fil_flush(SRV_LOG_SPACE_FIRST_ID);
887	}
888
889	MONITOR_DEC(MONITOR_PENDING_LOG_FLUSH);
890
891	log_mutex_enter();
892	if (do_flush) {
893	log_sys.flushed_to_disk_lsn = log_sys.current_flush_lsn;
894	}
895
896	log_sys.n_pending_flushes--;
897
898	os_event_set(log_sys.flush_event);
899	}
900
901	/* Switch the log buffer in use, and copy the content of last block*
902	from old log buffer to the head of the to be used one. Thus, buf_free and
903	buf_next_to_write would be changed accordingly /*
904	static inline
905	void
906	log_buffer_switch()
907	{
908	ut_ad(log_mutex_own());
909	ut_ad(log_write_mutex_own());
910
911	const byte* old_buf = log_sys.buf;
912	ulint area_end = ut_calc_align(log_sys.buf_free,
913	OS_FILE_LOG_BLOCK_SIZE);
914
915	if (log_sys.first_in_use) {
916	log_sys.first_in_use = false;
917	ut_ad(log_sys.buf == ut_align(log_sys.buf,
918	OS_FILE_LOG_BLOCK_SIZE));
919	log_sys.buf += srv_log_buffer_size;
920	} else {
921	log_sys.first_in_use = true;
922	log_sys.buf -= srv_log_buffer_size;
923	ut_ad(log_sys.buf == ut_align(log_sys.buf,
924	OS_FILE_LOG_BLOCK_SIZE));
925	}
926
927	/ Copy the last block to new buf /
928	ut_memcpy(log_sys.buf,
929	old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
930	OS_FILE_LOG_BLOCK_SIZE);
931
932	log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
933	log_sys.buf_next_to_write = log_sys.buf_free;
934	}
935
936	/* Ensure that the log has been written to the log file up to a given*
937	log entry (such as that of a transaction commit). Start a new write, or
938	wait and check if an already running write is covering the request.
939	@param[in] lsn log sequence number that should be
940	included in the redo log file write
941	@param[in] flush_to_disk whether the written log should also
942	be flushed to the file system /*
943	void
944	log_write_up_to(
945	lsn_t lsn,
946	bool flush_to_disk)
947	{
948	#ifdef UNIV_DEBUG
949	ulint loop_count = `0`;
950	#endif /* UNIV_DEBUG */
951	byte* write_buf;
952	lsn_t write_lsn;
953
954	ut_ad(!srv_read_only_mode);
955
956	if (recv_no_ibuf_operations) {
957	/ Recovery is running and no operations on the log files are*
958	allowed yet (the variable name .._no_ibuf_.. is misleading) /*
959
960	return;
961	}
962
963	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
964	service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
965	"log write up to: " LSN_PF,
966	lsn);
967	}
968
969	loop:
970	ut_ad(++loop_count < `128`);
971
972	#if UNIV_WORD_SIZE > 7
973	/ We can do a dirty read of LSN. /
974	/ NOTE: Currently doesn't do dirty read for*
975	(flush_to_disk == true) case, because the log_mutex
976	contention also works as the arbitrator for write-IO
977	(fsync) bandwidth between log files and data files. /*
978	if (!flush_to_disk && log_sys.write_lsn >= lsn) {
979	return;
980	}
981	#endif
982
983	log_write_mutex_enter();
984	ut_ad(!recv_no_log_write);
985
986	lsn_t limit_lsn = flush_to_disk
987	? log_sys.flushed_to_disk_lsn
988	: log_sys.write_lsn;
989
990	if (limit_lsn >= lsn) {
991	log_write_mutex_exit();
992	return;
993	}
994
995	/ If it is a write call we should just go ahead and do it*
996	as we checked that write_lsn is not where we'd like it to
997	be. If we have to flush as well then we check if there is a
998	pending flush and based on that we wait for it to finish
999	before proceeding further. /*
1000	if (flush_to_disk
1001	&& (log_sys.n_pending_flushes > `0`
1002	\|\| !os_event_is_set(log_sys.flush_event))) {
1003	/ Figure out if the current flush will do the job*
1004	for us. /*
1005	bool work_done = log_sys.current_flush_lsn >= lsn;
1006
1007	log_write_mutex_exit();
1008
1009	os_event_wait(log_sys.flush_event);
1010
1011	if (work_done) {
1012	return;
1013	} else {
1014	goto loop;
1015	}
1016	}
1017
1018	log_mutex_enter();
1019	if (!flush_to_disk
1020	&& log_sys.buf_free == log_sys.buf_next_to_write) {
1021	/ Nothing to write and no flush to disk requested /
1022	log_mutex_exit_all();
1023	return;
1024	}
1025
1026	ulint start_offset;
1027	ulint end_offset;
1028	ulint area_start;
1029	ulint area_end;
1030	ulong write_ahead_size = srv_log_write_ahead_size;
1031	ulint pad_size;
1032
1033	DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
1034	log_sys.write_lsn,
1035	log_sys.lsn));
1036	if (flush_to_disk) {
1037	log_sys.n_pending_flushes++;
1038	log_sys.current_flush_lsn = log_sys.lsn;
1039	MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
1040	os_event_reset(log_sys.flush_event);
1041
1042	if (log_sys.buf_free == log_sys.buf_next_to_write) {
1043	/ Nothing to write, flush only /
1044	log_mutex_exit_all();
1045	log_write_flush_to_disk_low();
1046	log_mutex_exit();
1047	return;
1048	}
1049	}
1050
1051	start_offset = log_sys.buf_next_to_write;
1052	end_offset = log_sys.buf_free;
1053
1054	area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1055	area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1056
1057	ut_ad(area_end - area_start > `0`);
1058
1059	log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
1060	log_block_set_checkpoint_no(
1061	log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1062	log_sys.next_checkpoint_no);
1063
1064	write_lsn = log_sys.lsn;
1065	write_buf = log_sys.buf;
1066
1067	log_buffer_switch();
1068
1069	log_sys.log.set_fields(log_sys.write_lsn);
1070
1071	log_mutex_exit();
1072	/ Erase the end of the last log block. /
1073	memset(write_buf + end_offset, `0`,
1074	~end_offset & (OS_FILE_LOG_BLOCK_SIZE - `1`));
1075
1076	/ Calculate pad_size if needed. /
1077	pad_size = `0`;
1078	if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
1079	ulint end_offset_in_unit;
1080	lsn_t end_offset = log_sys.log.calc_lsn_offset(
1081	ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
1082	end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
1083
1084	if (end_offset_in_unit > `0`
1085	&& (area_end - area_start) > end_offset_in_unit) {
1086	/ The first block in the unit was initialized*
1087	after the last writing.
1088	Needs to be written padded data once. /*
1089	pad_size = std::min<ulint>(
1090	ulint(write_ahead_size) - end_offset_in_unit,
1091	srv_log_buffer_size - area_end);
1092	::memset(write_buf + area_end, `0`, pad_size);
1093	}
1094	}
1095
1096	if (log_sys.is_encrypted()) {
1097	log_crypt(write_buf + area_start, log_sys.write_lsn,
1098	area_end - area_start);
1099	}
1100
1101	/ Do the write to the log files /
1102	log_write_buf(
1103	write_buf + area_start, area_end - area_start + pad_size,
1104	#ifdef UNIV_DEBUG
1105	pad_size,
1106	#endif /* UNIV_DEBUG */
1107	ut_uint64_align_down(log_sys.write_lsn,
1108	OS_FILE_LOG_BLOCK_SIZE),
1109	start_offset - area_start);
1110	srv_stats.log_padded.add(pad_size);
1111	log_sys.write_lsn = write_lsn;
1112
1113
1114	if (srv_file_flush_method == SRV_O_DSYNC) {
1115	/ O_SYNC means the OS did not buffer the log file at all:*
1116	so we have also flushed to disk what we have written /*
1117	log_sys.flushed_to_disk_lsn = log_sys.write_lsn;
1118	}
1119
1120	log_write_mutex_exit();
1121
1122	if (flush_to_disk) {
1123	log_write_flush_to_disk_low();
1124	ib_uint64_t flush_lsn = log_sys.flushed_to_disk_lsn;
1125	log_mutex_exit();
1126
1127	innobase_mysql_log_notify(flush_lsn);
1128	}
1129	}
1130
1131	/* write to the log file up to the last log entry.*
1132	@param[in] sync whether we want the written log
1133	also to be flushed to disk. /*
1134	void
1135	log_buffer_flush_to_disk(
1136	bool sync)
1137	{
1138	ut_ad(!srv_read_only_mode);
1139	log_write_up_to(log_get_lsn(), sync);
1140	}
1141
1142	/**************************************************************//**
1143	This functions writes the log buffer to the log file and if 'flush'
1144	is set it forces a flush of the log file as well. This is meant to be
1145	called from background master thread only as it does not wait for
1146	the write (+ possible flush) to finish. /*
1147	void
1148	log_buffer_sync_in_background(
1149	/==========================/
1150	bool flush) /!< in: flush the logs to disk /
1151	{
1152	lsn_t lsn;
1153
1154	log_mutex_enter();
1155
1156	lsn = log_sys.lsn;
1157
1158	if (flush
1159	&& log_sys.n_pending_flushes > `0`
1160	&& log_sys.current_flush_lsn >= lsn) {
1161	/ The write + flush will write enough /
1162	log_mutex_exit();
1163	return;
1164	}
1165
1166	log_mutex_exit();
1167
1168	log_write_up_to(lsn, flush);
1169	}
1170
1171	/********************************************************************
1172
1173	Tries to establish a big enough margin of free space in the log buffer, such
1174	that a new log entry can be catenated without an immediate need for a flush. /*
1175	static
1176	void
1177	log_flush_margin(void)
1178	/==================/
1179	{
1180	lsn_t lsn = `0`;
1181
1182	log_mutex_enter();
1183
1184	if (log_sys.buf_free > log_sys.max_buf_free) {
1185	/ We can write during flush /
1186	lsn = log_sys.lsn;
1187	}
1188
1189	log_mutex_exit();
1190
1191	if (lsn) {
1192	log_write_up_to(lsn, false);
1193	}
1194	}
1195
1196	/* Advances the smallest lsn for which there are unflushed dirty blocks in the*
1197	buffer pool.
1198	NOTE: this function may only be called if the calling thread owns no
1199	synchronization objects!
1200	@param[in] new_oldest try to advance oldest_modified_lsn at least to
1201	this lsn
1202	@return false if there was a flush batch of the same type running,
1203	which means that we could not start this flush batch /*
1204	static
1205	bool
1206	log_preflush_pool_modified_pages(
1207	lsn_t new_oldest)
1208	{
1209	bool success;
1210
1211	if (recv_recovery_on) {
1212	/ If the recovery is running, we must first apply all*
1213	log records to their respective file pages to get the
1214	right modify lsn values to these pages: otherwise, there
1215	might be pages on disk which are not yet recovered to the
1216	current lsn, and even after calling this function, we could
1217	not know how up-to-date the disk version of the database is,
1218	and we could not make a new checkpoint on the basis of the
1219	info on the buffer pool only. /*
1220	recv_apply_hashed_log_recs(true);
1221	}
1222
1223	if (new_oldest == LSN_MAX
1224	\|\| !buf_page_cleaner_is_active
1225	\|\| srv_is_being_started) {
1226
1227	ulint n_pages;
1228
1229	success = buf_flush_lists(ULINT_MAX, new_oldest, &n_pages);
1230
1231	buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
1232
1233	if (!success) {
1234	MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
1235	}
1236
1237	MONITOR_INC_VALUE_CUMULATIVE(
1238	MONITOR_FLUSH_SYNC_TOTAL_PAGE,
1239	MONITOR_FLUSH_SYNC_COUNT,
1240	MONITOR_FLUSH_SYNC_PAGES,
1241	n_pages);
1242	} else {
1243	/ better to wait for flushed by page cleaner /
1244
1245	if (srv_flush_sync) {
1246	/ wake page cleaner for IO burst /
1247	buf_flush_request_force(new_oldest);
1248	}
1249
1250	buf_flush_wait_flushed(new_oldest);
1251
1252	success = true;
1253	}
1254
1255	return(success);
1256	}
1257
1258	/****************************************************//**
1259	Completes a checkpoint. /*
1260	static
1261	void
1262	log_complete_checkpoint(void)
1263	/=========================/
1264	{
1265	ut_ad(log_mutex_own());
1266	ut_ad(log_sys.n_pending_checkpoint_writes == `0`);
1267
1268	log_sys.next_checkpoint_no++;
1269
1270	log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
1271	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
1272	log_sys.lsn - log_sys.last_checkpoint_lsn);
1273
1274	DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
1275	", flushed to " LSN_PF,
1276	log_sys.last_checkpoint_lsn,
1277	log_sys.flushed_to_disk_lsn));
1278
1279	rw_lock_x_unlock_gen(&(log_sys.checkpoint_lock), LOG_CHECKPOINT);
1280	}
1281
1282	/* Complete an asynchronous checkpoint write. /
1283	void log_t::complete_checkpoint()
1284	{
1285	ut_ad(this == &log_sys);
1286	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
1287
1288	log_mutex_enter();
1289
1290	ut_ad(n_pending_checkpoint_writes > `0`);
1291
1292	if (!--n_pending_checkpoint_writes) {
1293	log_complete_checkpoint();
1294	}
1295
1296	log_mutex_exit();
1297	}
1298
1299	/* Write checkpoint info to the log header.*
1300	@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction /*
1301	static
1302	void
1303	log_group_checkpoint(lsn_t end_lsn)
1304	{
1305	lsn_t lsn_offset;
1306
1307	ut_ad(!srv_read_only_mode);
1308	ut_ad(log_mutex_own());
1309	ut_ad(end_lsn == `0` \|\| end_lsn >= log_sys.next_checkpoint_lsn);
1310	ut_ad(end_lsn <= log_sys.lsn);
1311	ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys.lsn
1312	\|\| srv_shutdown_state != SRV_SHUTDOWN_NONE);
1313
1314	DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
1315	" written",
1316	log_sys.next_checkpoint_no,
1317	log_sys.next_checkpoint_lsn));
1318
1319	byte* buf = log_sys.checkpoint_buf;
1320	memset(buf, `0`, OS_FILE_LOG_BLOCK_SIZE);
1321
1322	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
1323	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
1324
1325	if (log_sys.is_encrypted()) {
1326	log_crypt_write_checkpoint_buf(buf);
1327	}
1328
1329	lsn_offset = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
1330	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
1331	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
1332	srv_log_buffer_size);
1333	mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
1334
1335	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
1336
1337	MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
1338
1339	log_sys.n_log_ios++;
1340
1341	MONITOR_INC(MONITOR_LOG_IO);
1342
1343	ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
1344	ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
1345
1346	if (log_sys.n_pending_checkpoint_writes++ == `0`) {
1347	rw_lock_x_lock_gen(&log_sys.checkpoint_lock,
1348	LOG_CHECKPOINT);
1349	}
1350
1351	/ Note: We alternate the physical place of the checkpoint info.*
1352	See the (next_checkpoint_no & 1) below. /*
1353
1354	fil_io(IORequestLogWrite, false,
1355	page_id_t (SRV_LOG_SPACE_FIRST_ID, `0`),
1356	univ_page_size,
1357	(log_sys.next_checkpoint_no & `1`)
1358	? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
1359	OS_FILE_LOG_BLOCK_SIZE,
1360	buf, reinterpret_cast<void>(`1`) /* checkpoint write /);
1361	}
1362
1363	/* Read a log group header page to log_sys.checkpoint_buf.*
1364	@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 /*
1365	void log_header_read(ulint header)
1366	{
1367	ut_ad(log_mutex_own());
1368
1369	log_sys.n_log_ios++;
1370
1371	MONITOR_INC(MONITOR_LOG_IO);
1372
1373	fil_io(IORequestLogRead, true,
1374	page_id_t (SRV_LOG_SPACE_FIRST_ID,
1375	header >> srv_page_size_shift),
1376	univ_page_size, header & (srv_page_size - `1`),
1377	OS_FILE_LOG_BLOCK_SIZE, log_sys.checkpoint_buf, NULL);
1378	}
1379
1380	/* Write checkpoint info to the log header and invoke log_mutex_exit().*
1381	@param[in] sync whether to wait for the write to complete
1382	@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction /*
1383	void
1384	log_write_checkpoint_info(bool sync, lsn_t end_lsn)
1385	{
1386	ut_ad(log_mutex_own());
1387	ut_ad(!srv_read_only_mode);
1388
1389	log_group_checkpoint(end_lsn);
1390
1391	log_mutex_exit();
1392
1393	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
1394
1395	if (sync) {
1396	/ Wait for the checkpoint write to complete /
1397	rw_lock_s_lock(&log_sys.checkpoint_lock);
1398	rw_lock_s_unlock(&log_sys.checkpoint_lock);
1399
1400	DBUG_EXECUTE_IF(
1401	"crash_after_checkpoint",
1402	DBUG_SUICIDE(););
1403	}
1404	}
1405
1406	/* Set extra data to be written to the redo log during checkpoint.*
1407	@param[in] buf data to be appended on checkpoint, or NULL
1408	@return pointer to previous data to be appended on checkpoint /*
1409	mtr_buf_t*
1410	log_append_on_checkpoint(
1411	mtr_buf_t* buf)
1412	{
1413	log_mutex_enter();
1414	mtr_buf_t* old = log_sys.append_on_checkpoint;
1415	log_sys.append_on_checkpoint = buf;
1416	log_mutex_exit();
1417	return(old);
1418	}
1419
1420	/* Make a checkpoint. Note that this function does not flush dirty*
1421	blocks from the buffer pool: it only checks what is lsn of the oldest
1422	modification in the pool, and writes information about the lsn in
1423	log files. Use log_make_checkpoint_at() to flush also the pool.
1424	@param[in] sync whether to wait for the write to complete
1425	@param[in] write_always force a write even if no log
1426	has been generated since the latest checkpoint
1427	@return true if success, false if a checkpoint write was already running /*
1428	bool
1429	log_checkpoint(
1430	bool sync,
1431	bool write_always)
1432	{
1433	lsn_t oldest_lsn;
1434
1435	ut_ad(!srv_read_only_mode);
1436
1437	DBUG_EXECUTE_IF("no_checkpoint",
1438	/ We sleep for a long enough time, forcing*
1439	the checkpoint doesn't happen any more. /*
1440	os_thread_sleep(`360000000`););
1441
1442	if (recv_recovery_is_on()) {
1443	recv_apply_hashed_log_recs(true);
1444	}
1445
1446	switch (srv_file_flush_method) {
1447	case SRV_NOSYNC:
1448	break;
1449	case SRV_O_DSYNC:
1450	case SRV_FSYNC:
1451	case SRV_LITTLESYNC:
1452	case SRV_O_DIRECT:
1453	case SRV_O_DIRECT_NO_FSYNC:
1454	#ifdef _WIN32
1455	case SRV_ALL_O_DIRECT_FSYNC:
1456	#endif
1457	fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1458	}
1459
1460	log_mutex_enter();
1461
1462	ut_ad(!recv_no_log_write);
1463	oldest_lsn = log_buf_pool_get_oldest_modification();
1464
1465	/ Because log also contains headers and dummy log records,*
1466	log_buf_pool_get_oldest_modification() will return log_sys.lsn
1467	if the buffer pool contains no dirty buffers.
1468	We must make sure that the log is flushed up to that lsn.
1469	If there are dirty buffers in the buffer pool, then our
1470	write-ahead-logging algorithm ensures that the log has been
1471	flushed up to oldest_lsn. /*
1472
1473	ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
1474	if (!write_always
1475	&& oldest_lsn
1476	<= log_sys.last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
1477	/ Do nothing, because nothing was logged (other than*
1478	a MLOG_CHECKPOINT marker) since the previous checkpoint. /*
1479	log_mutex_exit();
1480	return(true);
1481	}
1482	/ Repeat the MLOG_FILE_NAME records after the checkpoint, in*
1483	case some log records between the checkpoint and log_sys.lsn
1484	need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
1485	apply expects to see a MLOG_CHECKPOINT after the checkpoint,
1486	except on clean shutdown, where the log will be empty after
1487	the checkpoint.
1488	It is important that we write out the redo log before any
1489	further dirty pages are flushed to the tablespace files. At
1490	this point, because log_mutex_own(), mtr_commit() in other
1491	threads will be blocked, and no pages can be added to the
1492	flush lists. /*
1493	lsn_t flush_lsn = oldest_lsn;
1494	const lsn_t end_lsn = log_sys.lsn;
1495	const bool do_write
1496	= srv_shutdown_state == SRV_SHUTDOWN_NONE
1497	\|\| flush_lsn != end_lsn;
1498
1499	if (fil_names_clear(flush_lsn, do_write)) {
1500	ut_ad(log_sys.lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
1501	flush_lsn = log_sys.lsn;
1502	}
1503
1504	log_mutex_exit();
1505
1506	log_write_up_to(flush_lsn, true);
1507
1508	DBUG_EXECUTE_IF(
1509	"using_wa_checkpoint_middle",
1510	if (write_always) {
1511	DEBUG_SYNC_C("wa_checkpoint_middle");
1512
1513	const my_bool b = TRUE;
1514	buf_flush_page_cleaner_disabled_debug_update(
1515	NULL, NULL, NULL, &b);
1516	dict_stats_disabled_debug_update(
1517	NULL, NULL, NULL, &b);
1518	srv_master_thread_disabled_debug_update(
1519	NULL, NULL, NULL, &b);
1520	});
1521
1522	log_mutex_enter();
1523
1524	ut_ad(log_sys.flushed_to_disk_lsn >= flush_lsn);
1525	ut_ad(flush_lsn >= oldest_lsn);
1526
1527	if (log_sys.last_checkpoint_lsn >= oldest_lsn) {
1528	log_mutex_exit();
1529	return(true);
1530	}
1531
1532	if (log_sys.n_pending_checkpoint_writes > `0`) {
1533	/ A checkpoint write is running /
1534	log_mutex_exit();
1535
1536	if (sync) {
1537	/ Wait for the checkpoint write to complete /
1538	rw_lock_s_lock(&log_sys.checkpoint_lock);
1539	rw_lock_s_unlock(&log_sys.checkpoint_lock);
1540	}
1541
1542	return(false);
1543	}
1544
1545	log_sys.next_checkpoint_lsn = oldest_lsn;
1546	log_write_checkpoint_info(sync, end_lsn);
1547	ut_ad(!log_mutex_own());
1548
1549	return(true);
1550	}
1551
1552	/* Make a checkpoint at or after a specified LSN.*
1553	@param[in] lsn the log sequence number, or LSN_MAX
1554	for the latest LSN
1555	@param[in] write_always force a write even if no log
1556	has been generated since the latest checkpoint /*
1557	void
1558	log_make_checkpoint_at(
1559	lsn_t lsn,
1560	bool write_always)
1561	{
1562	/ Preflush pages synchronously /
1563
1564	while (!log_preflush_pool_modified_pages(lsn)) {
1565	/ Flush as much as we can /
1566	}
1567
1568	while (!log_checkpoint(true, write_always)) {
1569	/ Force a checkpoint /
1570	}
1571	}
1572
1573	/**************************************************************//**
1574	Tries to establish a big enough margin of free space in the log groups, such
1575	that a new log entry can be catenated without an immediate need for a
1576	checkpoint. NOTE: this function may only be called if the calling thread
1577	owns no synchronization objects! /*
1578	static
1579	void
1580	log_checkpoint_margin(void)
1581	/=======================/
1582	{
1583	lsn_t age;
1584	lsn_t checkpoint_age;
1585	ib_uint64_t advance;
1586	lsn_t oldest_lsn;
1587	bool success;
1588	loop:
1589	advance = `0`;
1590
1591	log_mutex_enter();
1592	ut_ad(!recv_no_log_write);
1593
1594	if (!log_sys.check_flush_or_checkpoint) {
1595	log_mutex_exit();
1596	return;
1597	}
1598
1599	oldest_lsn = log_buf_pool_get_oldest_modification();
1600
1601	age = log_sys.lsn - oldest_lsn;
1602
1603	if (age > log_sys.max_modified_age_sync) {
1604
1605	/ A flush is urgent: we have to do a synchronous preflush /
1606	advance = age - log_sys.max_modified_age_sync;
1607	}
1608
1609	checkpoint_age = log_sys.lsn - log_sys.last_checkpoint_lsn;
1610
1611	bool checkpoint_sync;
1612	bool do_checkpoint;
1613
1614	if (checkpoint_age > log_sys.max_checkpoint_age) {
1615	/ A checkpoint is urgent: we do it synchronously /
1616	checkpoint_sync = true;
1617	do_checkpoint = true;
1618	} else if (checkpoint_age > log_sys.max_checkpoint_age_async) {
1619	/ A checkpoint is not urgent: do it asynchronously /
1620	do_checkpoint = true;
1621	checkpoint_sync = false;
1622	log_sys.check_flush_or_checkpoint = false;
1623	} else {
1624	do_checkpoint = false;
1625	checkpoint_sync = false;
1626	log_sys.check_flush_or_checkpoint = false;
1627	}
1628
1629	log_mutex_exit();
1630
1631	if (advance) {
1632	lsn_t new_oldest = oldest_lsn + advance;
1633
1634	success = log_preflush_pool_modified_pages(new_oldest);
1635
1636	/ If the flush succeeded, this thread has done its part*
1637	and can proceed. If it did not succeed, there was another
1638	thread doing a flush at the same time. /*
1639	if (!success) {
1640	log_mutex_enter();
1641	log_sys.check_flush_or_checkpoint = true;
1642	log_mutex_exit();
1643	goto loop;
1644	}
1645	}
1646
1647	if (do_checkpoint) {
1648	log_checkpoint(checkpoint_sync, FALSE);
1649
1650	if (checkpoint_sync) {
1651
1652	goto loop;
1653	}
1654	}
1655	}
1656
1657	/**
1658	Checks that there is enough free space in the log to start a new query step.
1659	Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
1660	function may only be called if the calling thread owns no synchronization
1661	objects! /*
1662	void
1663	log_check_margins(void)
1664	{
1665	bool check;
1666
1667	do {
1668	log_flush_margin();
1669	log_checkpoint_margin();
1670	log_mutex_enter();
1671	ut_ad(!recv_no_log_write);
1672	check = log_sys.check_flush_or_checkpoint;
1673	log_mutex_exit();
1674	} while (check);
1675	}
1676
1677	/**************************************************************//**
1678	Makes a checkpoint at the latest lsn and writes it to first page of each
1679	data file in the database, so that we know that the file spaces contain
1680	all modifications up to that lsn. This can only be called at database
1681	shutdown. This function also writes all log in log files to the log archive. /*
1682	void
1683	logs_empty_and_mark_files_at_shutdown(void)
1684	/=======================================/
1685	{
1686	lsn_t lsn;
1687	ulint count = `0`;
1688
1689	ib::info () << "Starting shutdown...";
1690
1691	/ Wait until the master thread and all other operations are idle: our*
1692	algorithm only works if the server is idle at shutdown /*
1693
1694	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
1695	loop:
1696	ut_ad(lock_sys.is_initialised() \|\| !srv_was_started);
1697	ut_ad(log_sys.is_initialised() \|\| !srv_was_started);
1698	ut_ad(fil_system.is_initialised() \|\| !srv_was_started);
1699	os_event_set(srv_buf_resize_event);
1700
1701	if (!srv_read_only_mode) {
1702	os_event_set(srv_error_event);
1703	os_event_set(srv_monitor_event);
1704	os_event_set(srv_buf_dump_event);
1705	if (lock_sys.timeout_thread_active) {
1706	os_event_set(lock_sys.timeout_event);
1707	}
1708	if (dict_stats_event) {
1709	os_event_set(dict_stats_event);
1710	} else {
1711	ut_ad(!srv_dict_stats_thread_active);
1712	}
1713	if (recv_sys && recv_sys->flush_start) {
1714	/ This is in case recv_writer_thread was never*
1715	started, or buf_flush_page_cleaner_coordinator
1716	failed to notice its termination. /*
1717	os_event_set(recv_sys->flush_start);
1718	}
1719	}
1720	#define COUNT_INTERVAL 600U
1721	#define CHECK_INTERVAL 100000U
1722	os_thread_sleep(CHECK_INTERVAL);
1723
1724	count++;
1725
1726	/ Check that there are no longer transactions, except for*
1727	PREPARED ones. We need this wait even for the 'very fast'
1728	shutdown, because the InnoDB layer may have committed or
1729	prepared transactions and we don't want to lose them. /*
1730
1731	if (ulint total_trx = srv_was_started && !srv_read_only_mode
1732	&& srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
1733	? trx_sys.any_active_transactions() : `0`) {
1734
1735	if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1736	service_manager_extend_timeout(
1737	COUNT_INTERVAL * CHECK_INTERVAL/`1000000` * `2`,
1738	"Waiting for %lu active transactions to finish",
1739	(ulong) total_trx);
1740	ib::info () << "Waiting for " << total_trx << " active"
1741	<< " transactions to finish";
1742
1743	count = `0`;
1744	}
1745
1746	goto loop;
1747	}
1748
1749	/ We need these threads to stop early in shutdown. /
1750	const char* thread_name;
1751
1752	if (srv_error_monitor_active) {
1753	thread_name = "srv_error_monitor_thread";
1754	} else if (srv_monitor_active) {
1755	thread_name = "srv_monitor_thread";
1756	} else if (srv_buf_resize_thread_active) {
1757	thread_name = "buf_resize_thread";
1758	goto wait_suspend_loop;
1759	} else if (srv_dict_stats_thread_active) {
1760	thread_name = "dict_stats_thread";
1761	} else if (lock_sys.timeout_thread_active) {
1762	thread_name = "lock_wait_timeout_thread";
1763	} else if (srv_buf_dump_thread_active) {
1764	thread_name = "buf_dump_thread";
1765	goto wait_suspend_loop;
1766	} else if (btr_defragment_thread_active) {
1767	thread_name = "btr_defragment_thread";
1768	} else if (srv_fast_shutdown != `2` && trx_rollback_is_active) {
1769	thread_name = "rollback of recovered transactions";
1770	} else {
1771	thread_name = NULL;
1772	}
1773
1774	if (thread_name) {
1775	ut_ad(!srv_read_only_mode);
1776	wait_suspend_loop:
1777	service_manager_extend_timeout(
1778	COUNT_INTERVAL * CHECK_INTERVAL/`1000000` * `2`,
1779	"Waiting for %s to exit", thread_name);
1780	if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1781	ib::info () << "Waiting for " << thread_name
1782	<< "to exit";
1783	count = `0`;
1784	}
1785	goto loop;
1786	}
1787
1788	/ Check that the background threads are suspended /
1789
1790	switch (srv_get_active_thread_type()) {
1791	case SRV_NONE:
1792	if (!srv_n_fil_crypt_threads_started) {
1793	srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
1794	break;
1795	}
1796	os_event_set(fil_crypt_threads_event);
1797	thread_name = "fil_crypt_thread";
1798	goto wait_suspend_loop;
1799	case SRV_PURGE:
1800	case SRV_WORKER:
1801	ut_ad(!"purge was not shut down");
1802	srv_purge_wakeup();
1803	thread_name = "purge thread";
1804	goto wait_suspend_loop;
1805	case SRV_MASTER:
1806	thread_name = "master thread";
1807	goto wait_suspend_loop;
1808	}
1809
1810	/ At this point only page_cleaner should be active. We wait*
1811	here to let it complete the flushing of the buffer pools
1812	before proceeding further. /*
1813
1814	count = `0`;
1815	service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/`1000000` * `2`,
1816	"Waiting for page cleaner");
1817	while (buf_page_cleaner_is_active) {
1818	++count;
1819	os_thread_sleep(CHECK_INTERVAL);
1820	if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1821	service_manager_extend_timeout(COUNT_INTERVAL * CHECK_INTERVAL/`1000000` * `2`,
1822	"Waiting for page cleaner");
1823	ib::info () << "Waiting for page_cleaner to "
1824	"finish flushing of buffer pool";
1825	count = `0`;
1826	}
1827	}
1828
1829	if (log_scrub_thread_active) {
1830	ut_ad(!srv_read_only_mode);
1831	os_event_set(log_scrub_event);
1832	}
1833
1834	if (log_sys.is_initialised()) {
1835	log_mutex_enter();
1836	const ulint n_write = log_sys.n_pending_checkpoint_writes;
1837	const ulint n_flush = log_sys.n_pending_flushes;
1838	log_mutex_exit();
1839
1840	if (log_scrub_thread_active \|\| n_write \|\| n_flush) {
1841	if (srv_print_verbose_log && count > `600`) {
1842	ib::info () << "Pending checkpoint_writes: "
1843	<< n_write
1844	<< ". Pending log flush writes: "
1845	<< n_flush;
1846	count = `0`;
1847	}
1848	goto loop;
1849	}
1850	}
1851
1852	ut_ad(!log_scrub_thread_active);
1853
1854	if (!buf_pool_ptr) {
1855	ut_ad(!srv_was_started);
1856	} else if (ulint pending_io = buf_pool_check_no_pending_io()) {
1857	if (srv_print_verbose_log && count > `600`) {
1858	ib::info () << "Waiting for " << pending_io << " buffer"
1859	" page I/Os to complete";
1860	count = `0`;
1861	}
1862
1863	goto loop;
1864	}
1865
1866	if (srv_fast_shutdown == `2` \|\| !srv_was_started) {
1867	if (!srv_read_only_mode && srv_was_started) {
1868	ib::info () << "MySQL has requested a very fast"
1869	" shutdown without flushing the InnoDB buffer"
1870	" pool to data files. At the next mysqld"
1871	" startup InnoDB will do a crash recovery!";
1872
1873	/ In this fastest shutdown we do not flush the*
1874	buffer pool:
1875
1876	it is essentially a 'crash' of the InnoDB server.
1877	Make sure that the log is all flushed to disk, so
1878	that we can recover all committed transactions in
1879	a crash recovery. We must not write the lsn stamps
1880	to the data files, since at a startup InnoDB deduces
1881	from the stamps if the previous shutdown was clean. /*
1882
1883	log_buffer_flush_to_disk();
1884	}
1885
1886	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1887
1888	if (fil_system.is_initialised()) {
1889	fil_close_all_files();
1890	}
1891	return;
1892	}
1893
1894	if (!srv_read_only_mode) {
1895	service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1896	"ensuring dirty buffer pool are written to log");
1897	log_make_checkpoint_at(LSN_MAX, TRUE);
1898
1899	log_mutex_enter();
1900
1901	lsn = log_sys.lsn;
1902
1903	const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn;
1904	ut_ad(lsn >= log_sys.last_checkpoint_lsn);
1905
1906	log_mutex_exit();
1907
1908	if (lsn_changed) {
1909	goto loop;
1910	}
1911
1912	/ Ensure that all buffered changes are written to the*
1913	redo log before fil_close_all_files(). /*
1914	fil_flush_file_spaces(FIL_TYPE_LOG);
1915	} else {
1916	lsn = srv_start_lsn;
1917	}
1918
1919	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1920
1921	/ Make some checks that the server really is quiet /
1922	ut_a(srv_get_active_thread_type() == SRV_NONE);
1923
1924	service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1925	"Free innodb buffer pool");
1926	buf_all_freed();
1927
1928	ut_a(lsn == log_sys.lsn
1929	\|\| srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1930
1931	if (lsn < srv_start_lsn) {
1932	ib::error () << "Shutdown LSN=" << lsn
1933	<< " is less than start LSN=" << srv_start_lsn;
1934	}
1935
1936	srv_shutdown_lsn = lsn;
1937
1938	if (!srv_read_only_mode) {
1939	dberr_t err = fil_write_flushed_lsn(lsn);
1940
1941	if (err != DB_SUCCESS) {
1942	ib::error () << "Writing flushed lsn " << lsn
1943	<< " failed; error=" << err;
1944	}
1945	}
1946
1947	fil_close_all_files();
1948
1949	/ Make some checks that the server really is quiet /
1950	ut_a(srv_get_active_thread_type() == SRV_NONE);
1951
1952	ut_a(lsn == log_sys.lsn
1953	\|\| srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1954	}
1955
1956	/****************************************************//**
1957	Peeks the current lsn.
1958	@return TRUE if success, FALSE if could not get the log system mutex /*
1959	ibool
1960	log_peek_lsn(
1961	/=========/
1962	lsn_t* lsn) /!< out: if returns TRUE, current lsn is here /
1963	{
1964	if (`0` == mutex_enter_nowait(&(log_sys.mutex))) {
1965	*lsn = log_sys.lsn;
1966
1967	log_mutex_exit();
1968
1969	return(TRUE);
1970	}
1971
1972	return(FALSE);
1973	}
1974
1975	/****************************************************//**
1976	Prints info of the log. /*
1977	void
1978	log_print(
1979	/======/
1980	FILE* file) /!< in: file where to print /
1981	{
1982	double time_elapsed;
1983	time_t current_time;
1984
1985	log_mutex_enter();
1986
1987	fprintf(file,
1988	"Log sequence number " LSN_PF "\n"
1989	"Log flushed up to " LSN_PF "\n"
1990	"Pages flushed up to " LSN_PF "\n"
1991	"Last checkpoint at " LSN_PF "\n",
1992	log_sys.lsn,
1993	log_sys.flushed_to_disk_lsn,
1994	log_buf_pool_get_oldest_modification(),
1995	log_sys.last_checkpoint_lsn);
1996
1997	current_time = time(NULL);
1998
1999	time_elapsed = difftime(current_time,
2000	log_sys.last_printout_time);
2001
2002	if (time_elapsed <= `0`) {
2003	time_elapsed = `1`;
2004	}
2005
2006	fprintf(file,
2007	ULINTPF " pending log flushes, "
2008	ULINTPF " pending chkp writes\n"
2009	ULINTPF " log i/o's done, %.2f log i/o's/second\n",
2010	log_sys.n_pending_flushes,
2011	log_sys.n_pending_checkpoint_writes,
2012	log_sys.n_log_ios,
2013	static_cast<double>(
2014	log_sys.n_log_ios - log_sys.n_log_ios_old)
2015	/ time_elapsed);
2016
2017	log_sys.n_log_ios_old = log_sys.n_log_ios;
2018	log_sys.last_printout_time = current_time;
2019
2020	log_mutex_exit();
2021	}
2022
2023	/********************************************************************//**
2024	Refreshes the statistics used to print per-second averages. /*
2025	void
2026	log_refresh_stats(void)
2027	/===================/
2028	{
2029	log_sys.n_log_ios_old = log_sys.n_log_ios;
2030	log_sys.last_printout_time = time(NULL);
2031	}
2032
2033	/* Shut down the redo log subsystem. /
2034	void log_t::close()
2035	{
2036	ut_ad(this == &log_sys);
2037	if (!is_initialised()) return;
2038	m_initialised = false;
2039	log.close();
2040
2041	if (!first_in_use)
2042	buf -= srv_log_buffer_size;
2043	ut_free_dodump(buf, srv_log_buffer_size * `2`);
2044	buf = NULL;
2045
2046	os_event_destroy(flush_event);
2047
2048	rw_lock_free(&checkpoint_lock);
2049	/ rw_lock_free() already called checkpoint_lock.~rw_lock_t();*
2050	tame the debug assertions when the destructor will be called once more. /*
2051	ut_ad(checkpoint_lock.magic_n == `0`);
2052	ut_d(checkpoint_lock.magic_n = RW_LOCK_MAGIC_N);
2053
2054	mutex_free(&mutex);
2055	mutex_free(&write_mutex);
2056	mutex_free(&log_flush_order_mutex);
2057
2058	if (!srv_read_only_mode && srv_scrub_log)
2059	os_event_destroy(log_scrub_event);
2060
2061	recv_sys_close();
2062	}
2063
2064	/****************************************************//**
2065	Pads the current log block full with dummy log records. Used in producing
2066	consistent archived log files and scrubbing redo log. /*
2067	static
2068	void
2069	log_pad_current_log_block(void)
2070	/===========================/
2071	{
2072	byte b = MLOG_DUMMY_RECORD;
2073	ulint pad_length;
2074	ulint i;
2075	lsn_t lsn;
2076
2077	ut_ad(!recv_no_log_write);
2078	/ We retrieve lsn only because otherwise gcc crashed on HP-UX /
2079	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
2080
2081	pad_length = OS_FILE_LOG_BLOCK_SIZE
2082	- (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE)
2083	- LOG_BLOCK_TRL_SIZE;
2084	if (pad_length
2085	== (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
2086	- LOG_BLOCK_TRL_SIZE)) {
2087
2088	pad_length = `0`;
2089	}
2090
2091	if (pad_length) {
2092	srv_stats.n_log_scrubs.inc();
2093	}
2094
2095	for (i = `0`; i < pad_length; i++) {
2096	log_write_low(&b, `1`);
2097	}
2098
2099	lsn = log_sys.lsn;
2100
2101	log_close();
2102
2103	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
2104	}
2105
2106	/***************************************************************//*
2107	If no log record has been written for a while, fill current log
2108	block with dummy records. /*
2109	static
2110	void
2111	log_scrub()
2112	/=========/
2113	{
2114	log_mutex_enter();
2115	ulint cur_lbn = log_block_convert_lsn_to_no(log_sys.lsn);
2116
2117	if (next_lbn_to_pad == cur_lbn)
2118	{
2119	log_pad_current_log_block();
2120	}
2121
2122	next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys.lsn);
2123	log_mutex_exit();
2124	}
2125
2126	/ log scrubbing speed, in bytes/sec /
2127	UNIV_INTERN ulonglong innodb_scrub_log_speed;
2128
2129	/***************************************************************//**
2130	This is the main thread for log scrub. It waits for an event and
2131	when waked up fills current log block with dummy records and
2132	sleeps again.
2133	@return this function does not return, it calls os_thread_exit() /*
2134	extern "C" UNIV_INTERN
2135	os_thread_ret_t
2136	DECLARE_THREAD(log_scrub_thread)(void*)
2137	{
2138	ut_ad(!srv_read_only_mode);
2139
2140	while (srv_shutdown_state < SRV_SHUTDOWN_FLUSH_PHASE) {
2141	/ log scrubbing interval in µs. /
2142	ulonglong interval = `1000``1000``512`/innodb_scrub_log_speed;
2143
2144	os_event_wait_time(log_scrub_event, static_cast<ulint>(interval));
2145
2146	log_scrub();
2147
2148	os_event_reset(log_scrub_event);
2149	}
2150
2151	log_scrub_thread_active = false;
2152
2153	/ We count the number of threads in os_thread_exit(). A created*
2154	thread should always use that to exit and not use return() to exit. /*
2155	os_thread_exit();
2156
2157	OS_THREAD_DUMMY_RETURN;
2158	}
2159

Browse the source code of MariaDB/storage/innobase/log/log0log.cc