buf0buf.cc source code [MariaDB/storage/innobase/buf/buf0buf.cc]

1	/*****************************************************************************
2
3	Copyright (c) 1995, 2018, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2008, Google Inc.
5	Copyright (c) 2013, 2018, MariaDB Corporation.
6
7	Portions of this file contain modifications contributed and copyrighted by
8	Google, Inc. Those modifications are gratefully acknowledged and are described
9	briefly in the InnoDB documentation. The contributions by Google are
10	incorporated with their permission, and subject to the conditions contained in
11	the file COPYING.Google.
12
13	This program is free software; you can redistribute it and/or modify it under
14	the terms of the GNU General Public License as published by the Free Software
15	Foundation; version 2 of the License.
16
17	This program is distributed in the hope that it will be useful, but WITHOUT
18	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21	You should have received a copy of the GNU General Public License along with
22	this program; if not, write to the Free Software Foundation, Inc.,
23	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25	*****************************************************************************/
26
27	/************************************************//**
28	@file buf/buf0buf.cc
29	The database buffer buf_pool
30
31	Created 11/5/1995 Heikki Tuuri
32	*******************************************************/
33
34	#include "univ.i"
35	#include "mtr0types.h"
36	#include "mach0data.h"
37	#include "page0size.h"
38	#include "buf0buf.h"
39	#include <string.h>
40
41	#ifdef UNIV_NONINL
42	#include "buf0buf.ic"
43	#endif
44
45	#ifndef UNIV_INNOCHECKSUM
46	#include "mem0mem.h"
47	#include "btr0btr.h"
48	#include "fil0fil.h"
49	#include "fil0crypt.h"
50	#include "fsp0sysspace.h"
51	#include "buf0buddy.h"
52	#include "lock0lock.h"
53	#include "sync0rw.h"
54	#include "btr0sea.h"
55	#include "ibuf0ibuf.h"
56	#include "trx0undo.h"
57	#include "trx0purge.h"
58	#include "log0log.h"
59	#include "dict0stats_bg.h"
60	#include "srv0srv.h"
61	#include "srv0start.h"
62	#include "dict0dict.h"
63	#include "log0recv.h"
64	#include "srv0mon.h"
65	#include "fsp0sysspace.h"
66	#endif /* !UNIV_INNOCHECKSUM */
67	#include "page0zip.h"
68	#include "sync0sync.h"
69	#include "buf0dump.h"
70	#include "ut0new.h"
71	#include <new>
72	#include <map>
73	#include <sstream>
74	#ifndef UNIV_INNOCHECKSUM
75	#include "fil0pagecompress.h"
76	#include "fsp0pagecompress.h"
77	#endif
78	#include "ha_prototypes.h"
79	#include "ut0byte.h"
80	#include <new>
81
82	#ifdef UNIV_LINUX
83	#include <stdlib.h>
84	#endif
85
86	#ifdef HAVE_LZO
87	#include "lzo/lzo1x.h"
88	#endif
89
90	#ifdef HAVE_LIBNUMA
91	#include <numa.h>
92	#include <numaif.h>
93	struct set_numa_interleave_t
94	{
95	set_numa_interleave_t()
96	{
97	if (srv_numa_interleave) {
98
99	struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
100	ib::info () << "Setting NUMA memory policy to"
101	" MPOL_INTERLEAVE";
102	if (set_mempolicy(MPOL_INTERLEAVE,
103	numa_mems_allowed->maskp,
104	numa_mems_allowed->size) != `0`) {
105
106	ib::warn () << "Failed to set NUMA memory"
107	" policy to MPOL_INTERLEAVE: "
108	<< strerror(errno);
109	}
110	}
111	}
112
113	~set_numa_interleave_t()
114	{
115	if (srv_numa_interleave) {
116
117	ib::info () << "Setting NUMA memory policy to"
118	" MPOL_DEFAULT";
119	if (set_mempolicy(MPOL_DEFAULT, NULL, `0`) != `0`) {
120	ib::warn () << "Failed to set NUMA memory"
121	" policy to MPOL_DEFAULT: "
122	<< strerror(errno);
123	}
124	}
125	}
126	};
127
128	#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE set_numa_interleave_t scoped_numa
129	#else
130	#define NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE
131	#endif /* HAVE_LIBNUMA */
132
133	#ifdef HAVE_SNAPPY
134	#include "snappy-c.h"
135	#endif
136
137	inline void* aligned_malloc(size_t size, size_t align) {
138	void *result;
139	#ifdef _MSC_VER
140	result = _aligned_malloc(size, align);
141	#elif defined (HAVE_POSIX_MEMALIGN)
142	if(posix_memalign(&result, align, size)) {
143	result = `0`;
144	}
145	#else
146	/ Use unaligned malloc as fallback /
147	result = malloc(size);
148	#endif
149	return result;
150	}
151
152	inline void aligned_free(void *ptr) {
153	#ifdef _MSC_VER
154	_aligned_free(ptr);
155	#else
156	free(ptr);
157	#endif
158	}
159
160	/*
161	IMPLEMENTATION OF THE BUFFER POOL
162	=================================
163
164	Performance improvement:
165	------------------------
166	Thread scheduling in NT may be so slow that the OS wait mechanism should
167	not be used even in waiting for disk reads to complete.
168	Rather, we should put waiting query threads to the queue of
169	waiting jobs, and let the OS thread do something useful while the i/o
170	is processed. In this way we could remove most OS thread switches in
171	an i/o-intensive benchmark like TPC-C.
172
173	A possibility is to put a user space thread library between the database
174	and NT. User space thread libraries might be very fast.
175
176	SQL Server 7.0 can be configured to use 'fibers' which are lightweight
177	threads in NT. These should be studied.
178
179	Buffer frames and blocks
180	------------------------
181	Following the terminology of Gray and Reuter, we call the memory
182	blocks where file pages are loaded buffer frames. For each buffer
183	frame there is a control block, or shortly, a block, in the buffer
184	control array. The control info which does not need to be stored
185	in the file along with the file page, resides in the control block.
186
187	Buffer pool struct
188	------------------
189	The buffer buf_pool contains a single mutex which protects all the
190	control data structures of the buf_pool. The content of a buffer frame is
191	protected by a separate read-write lock in its control block, though.
192	These locks can be locked and unlocked without owning the buf_pool->mutex.
193	The OS events in the buf_pool struct can be waited for without owning the
194	buf_pool->mutex.
195
196	The buf_pool->mutex is a hot-spot in main memory, causing a lot of
197	memory bus traffic on multiprocessor systems when processors
198	alternately access the mutex. On our Pentium, the mutex is accessed
199	maybe every 10 microseconds. We gave up the solution to have mutexes
200	for each control block, for instance, because it seemed to be
201	complicated.
202
203	A solution to reduce mutex contention of the buf_pool->mutex is to
204	create a separate mutex for the page hash table. On Pentium,
205	accessing the hash table takes 2 microseconds, about half
206	of the total buf_pool->mutex hold time.
207
208	Control blocks
209	--------------
210
211	The control block contains, for instance, the bufferfix count
212	which is incremented when a thread wants a file page to be fixed
213	in a buffer frame. The bufferfix operation does not lock the
214	contents of the frame, however. For this purpose, the control
215	block contains a read-write lock.
216
217	The buffer frames have to be aligned so that the start memory
218	address of a frame is divisible by the universal page size, which
219	is a power of two.
220
221	We intend to make the buffer buf_pool size on-line reconfigurable,
222	that is, the buf_pool size can be changed without closing the database.
223	Then the database administarator may adjust it to be bigger
224	at night, for example. The control block array must
225	contain enough control blocks for the maximum buffer buf_pool size
226	which is used in the particular database.
227	If the buf_pool size is cut, we exploit the virtual memory mechanism of
228	the OS, and just refrain from using frames at high addresses. Then the OS
229	can swap them to disk.
230
231	The control blocks containing file pages are put to a hash table
232	according to the file address of the page.
233	We could speed up the access to an individual page by using
234	"pointer swizzling": we could replace the page references on
235	non-leaf index pages by direct pointers to the page, if it exists
236	in the buf_pool. We could make a separate hash table where we could
237	chain all the page references in non-leaf pages residing in the buf_pool,
238	using the page reference as the hash key,
239	and at the time of reading of a page update the pointers accordingly.
240	Drawbacks of this solution are added complexity and,
241	possibly, extra space required on non-leaf pages for memory pointers.
242	A simpler solution is just to speed up the hash table mechanism
243	in the database, using tables whose size is a power of 2.
244
245	Lists of blocks
246	---------------
247
248	There are several lists of control blocks.
249
250	The free list (buf_pool->free) contains blocks which are currently not
251	used.
252
253	The common LRU list contains all the blocks holding a file page
254	except those for which the bufferfix count is non-zero.
255	The pages are in the LRU list roughly in the order of the last
256	access to the page, so that the oldest pages are at the end of the
257	list. We also keep a pointer to near the end of the LRU list,
258	which we can use when we want to artificially age a page in the
259	buf_pool. This is used if we know that some page is not needed
260	again for some time: we insert the block right after the pointer,
261	causing it to be replaced sooner than would normally be the case.
262	Currently this aging mechanism is used for read-ahead mechanism
263	of pages, and it can also be used when there is a scan of a full
264	table which cannot fit in the memory. Putting the pages near the
265	end of the LRU list, we make sure that most of the buf_pool stays
266	in the main memory, undisturbed.
267
268	The unzip_LRU list contains a subset of the common LRU list. The
269	blocks on the unzip_LRU list hold a compressed file page and the
270	corresponding uncompressed page frame. A block is in unzip_LRU if and
271	only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
272	holds. The blocks in unzip_LRU will be in same order as they are in
273	the common LRU list. That is, each manipulation of the common LRU
274	list will result in the same manipulation of the unzip_LRU list.
275
276	The chain of modified blocks (buf_pool->flush_list) contains the blocks
277	holding file pages that have been modified in the memory
278	but not written to disk yet. The block with the oldest modification
279	which has not yet been written to disk is at the end of the chain.
280	The access to this list is protected by buf_pool->flush_list_mutex.
281
282	The chain of unmodified compressed blocks (buf_pool->zip_clean)
283	contains the control blocks (buf_page_t) of those compressed pages
284	that are not in buf_pool->flush_list and for which no uncompressed
285	page has been allocated in the buffer pool. The control blocks for
286	uncompressed pages are accessible via buf_block_t objects that are
287	reachable via buf_pool->chunks[].
288
289	The chains of free memory blocks (buf_pool->zip_free[]) are used by
290	the buddy allocator (buf0buddy.cc) to keep track of currently unused
291	memory blocks of size sizeof(buf_page_t)..srv_page_size / 2. These
292	blocks are inside the srv_page_size-sized memory blocks of type
293	BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
294	pool. The buddy allocator is solely used for allocating control
295	blocks for compressed pages (buf_page_t) and compressed page frames.
296
297	Loading a file page
298	-------------------
299
300	First, a victim block for replacement has to be found in the
301	buf_pool. It is taken from the free list or searched for from the
302	end of the LRU-list. An exclusive lock is reserved for the frame,
303	the io_fix field is set in the block fixing the block in buf_pool,
304	and the io-operation for loading the page is queued. The io-handler thread
305	releases the X-lock on the frame and resets the io_fix field
306	when the io operation completes.
307
308	A thread may request the above operation using the function
309	buf_page_get(). It may then continue to request a lock on the frame.
310	The lock is granted when the io-handler releases the x-lock.
311
312	Read-ahead
313	----------
314
315	The read-ahead mechanism is intended to be intelligent and
316	isolated from the semantically higher levels of the database
317	index management. From the higher level we only need the
318	information if a file page has a natural successor or
319	predecessor page. On the leaf level of a B-tree index,
320	these are the next and previous pages in the natural
321	order of the pages.
322
323	Let us first explain the read-ahead mechanism when the leafs
324	of a B-tree are scanned in an ascending or descending order.
325	When a read page is the first time referenced in the buf_pool,
326	the buffer manager checks if it is at the border of a so-called
327	linear read-ahead area. The tablespace is divided into these
328	areas of size 64 blocks, for example. So if the page is at the
329	border of such an area, the read-ahead mechanism checks if
330	all the other blocks in the area have been accessed in an
331	ascending or descending order. If this is the case, the system
332	looks at the natural successor or predecessor of the page,
333	checks if that is at the border of another area, and in this case
334	issues read-requests for all the pages in that area. Maybe
335	we could relax the condition that all the pages in the area
336	have to be accessed: if data is deleted from a table, there may
337	appear holes of unused pages in the area.
338
339	A different read-ahead mechanism is used when there appears
340	to be a random access pattern to a file.
341	If a new page is referenced in the buf_pool, and several pages
342	of its random access area (for instance, 32 consecutive pages
343	in a tablespace) have recently been referenced, we may predict
344	that the whole area may be needed in the near future, and issue
345	the read requests for the whole area.
346	*/
347
348	#ifndef UNIV_INNOCHECKSUM
349	/* Value in microseconds /
350	static const int WAIT_FOR_READ = `100`;
351	static const int WAIT_FOR_WRITE = `100`;
352	/* Number of attempts made to read in a page in the buffer pool /
353	static const ulint BUF_PAGE_READ_MAX_RETRIES = `100`;
354	/* Number of pages to read ahead /
355	static const ulint BUF_READ_AHEAD_PAGES = `64`;
356	/* The maximum portion of the buffer pool that can be used for the*
357	read-ahead buffer. (Divide buf_pool size by this amount) /*
358	static const ulint BUF_READ_AHEAD_PORTION = `32`;
359
360	/* The buffer pools of the database /
361	buf_pool_t* buf_pool_ptr;
362
363	/* true when resizing buffer pool is in the critical path. /
364	volatile bool buf_pool_resizing;
365
366	/* true when withdrawing buffer pool pages might cause page relocation /
367	volatile bool buf_pool_withdrawing;
368
369	/* the clock is incremented every time a pointer to a page may become obsolete;*
370	if the withdrwa clock has not changed, the pointer is still valid in buffer
371	pool. if changed, the pointer might not be in buffer pool any more. /*
372	volatile ulint buf_withdraw_clock;
373
374	/* Map of buffer pool chunks by its first frame address*
375	This is newly made by initialization of buffer pool and buf_resize_thread.
376	Currently, no need mutex protection for update. /*
377	typedef std::map<
378	const byte*,
379	buf_chunk_t*,
380	std::less<const byte*>,
381	ut_allocator<std::pair<const byte* const, buf_chunk_t*> > >
382	buf_pool_chunk_map_t;
383
384	static buf_pool_chunk_map_t* buf_chunk_map_reg;
385
386	/* Chunk map to be used to lookup.*
387	The map pointed by this should not be updated /*
388	static buf_pool_chunk_map_t* buf_chunk_map_ref = NULL;
389
390	#ifdef UNIV_DEBUG
391	/* Disable resizing buffer pool to make assertion code not expensive. /
392	my_bool buf_disable_resize_buffer_pool_debug = TRUE;
393	#endif /* UNIV_DEBUG */
394
395	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
396	/* This is used to insert validation operations in execution*
397	in the debug version /*
398	static ulint buf_dbg_counter = `0`;
399	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
400
401	#if defined UNIV_PFS_MUTEX \|\| defined UNIV_PFS_RWLOCK
402	# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
403
404	/ Buffer block mutexes and rwlocks can be registered*
405	in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
406	is defined, register buffer block mutex and rwlock
407	in one group after their initialization. /*
408	# define PFS_GROUP_BUFFER_SYNC
409
410	/ This define caps the number of mutexes/rwlocks can*
411	be registered with performance schema. Developers can
412	modify this define if necessary. Please note, this would
413	be effective only if PFS_GROUP_BUFFER_SYNC is defined. /*
414	# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
415
416	# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
417	#endif /* UNIV_PFS_MUTEX \|\| UNIV_PFS_RWLOCK */
418
419	/* Macro to determine whether the read of write counter is used depending*
420	on the io_type /*
421	#define MONITOR_RW_COUNTER(io_type, counter) \
422	((io_type == BUF_IO_READ) \
423	? (counter##_READ) \
424	: (counter##_WRITTEN))
425
426	/* Registers a chunk to buf_pool_chunk_map*
427	@param[in] chunk chunk of buffers /*
428	static
429	void
430	buf_pool_register_chunk(
431	buf_chunk_t* chunk)
432	{
433	buf_chunk_map_reg->insert(buf_pool_chunk_map_t::value_type (
434	chunk->blocks->frame, chunk));
435	}
436
437	/* Decrypt a page.*
438	@param[in,out] bpage Page control block
439	@param[in,out] space tablespace
440	@return whether the operation was successful /*
441	static
442	bool
443	buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
444	MY_ATTRIBUTE((nonnull));
445
446	/******************************************************************//**
447	Gets the smallest oldest_modification lsn for any page in the pool. Returns
448	zero if all modified pages have been flushed to disk.
449	@return oldest modification in pool, zero if none /*
450	lsn_t
451	buf_pool_get_oldest_modification(void)
452	/==================================/
453	{
454	lsn_t lsn = `0`;
455	lsn_t oldest_lsn = `0`;
456
457	/ When we traverse all the flush lists we don't want another*
458	thread to add a dirty page to any flush list. /*
459	log_flush_order_mutex_enter();
460
461	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
462	buf_pool_t* buf_pool;
463
464	buf_pool = buf_pool_from_array(i);
465
466	buf_flush_list_mutex_enter(buf_pool);
467
468	buf_page_t* bpage;
469
470	/ We don't let log-checkpoint halt because pages from system*
471	temporary are not yet flushed to the disk. Anyway, object
472	residing in system temporary doesn't generate REDO logging. /*
473	for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
474	bpage != NULL
475	&& fsp_is_system_temporary(bpage->id.space());
476	bpage = UT_LIST_GET_PREV(list, bpage)) {
477	/ Do nothing. /
478	}
479
480	if (bpage != NULL) {
481	ut_ad(bpage->in_flush_list);
482	lsn = bpage->oldest_modification;
483	}
484
485	buf_flush_list_mutex_exit(buf_pool);
486
487	if (!oldest_lsn \|\| oldest_lsn > lsn) {
488	oldest_lsn = lsn;
489	}
490	}
491
492	log_flush_order_mutex_exit();
493
494	/ The returned answer may be out of date: the flush_list can*
495	change after the mutex has been released. /*
496
497	return(oldest_lsn);
498	}
499
500	/******************************************************************//**
501	Get total buffer pool statistics. /*
502	void
503	buf_get_total_list_len(
504	/===================/
505	ulint* LRU_len, /!< out: length of all LRU lists /
506	ulint* free_len, /!< out: length of all free lists /
507	ulint* flush_list_len) /!< out: length of all flush lists /
508	{
509	ulint i;
510
511	*LRU_len = `0`;
512	*free_len = `0`;
513	*flush_list_len = `0`;
514
515	for (i = `0`; i < srv_buf_pool_instances; i++) {
516	buf_pool_t* buf_pool;
517
518	buf_pool = buf_pool_from_array(i);
519
520	*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
521	*free_len += UT_LIST_GET_LEN(buf_pool->free);
522	*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
523	}
524	}
525
526	/******************************************************************//**
527	Get total list size in bytes from all buffer pools. /*
528	void
529	buf_get_total_list_size_in_bytes(
530	/=============================/
531	buf_pools_list_size_t* buf_pools_list_size) /!< out: list sizes*
532	in all buffer pools /*
533	{
534	ut_ad(buf_pools_list_size);
535	memset(buf_pools_list_size, `0`, sizeof(*buf_pools_list_size));
536
537	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
538	buf_pool_t* buf_pool;
539
540	buf_pool = buf_pool_from_array(i);
541	/ We don't need mutex protection since this is*
542	for statistics purpose /*
543	buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
544	buf_pools_list_size->unzip_LRU_bytes +=
545	UT_LIST_GET_LEN(buf_pool->unzip_LRU)
546	<< srv_page_size_shift;
547	buf_pools_list_size->flush_list_bytes +=
548	buf_pool->stat.flush_list_bytes;
549	}
550	}
551
552	/******************************************************************//**
553	Get total buffer pool statistics. /*
554	void
555	buf_get_total_stat(
556	/===============/
557	buf_pool_stat_t* tot_stat) /!< out: buffer pool stats /
558	{
559	ulint i;
560
561	memset(tot_stat, `0`, sizeof(*tot_stat));
562
563	for (i = `0`; i < srv_buf_pool_instances; i++) {
564	buf_pool_stat_t*buf_stat;
565	buf_pool_t* buf_pool;
566
567	buf_pool = buf_pool_from_array(i);
568
569	buf_stat = &buf_pool->stat;
570	tot_stat->n_page_gets += buf_stat->n_page_gets;
571	tot_stat->n_pages_read += buf_stat->n_pages_read;
572	tot_stat->n_pages_written += buf_stat->n_pages_written;
573	tot_stat->n_pages_created += buf_stat->n_pages_created;
574	tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
575	tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
576	tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
577	tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
578
579	tot_stat->n_pages_not_made_young +=
580	buf_stat->n_pages_not_made_young;
581	}
582	}
583
584	/******************************************************************//**
585	Allocates a buffer block.
586	@return own: the allocated block, in state BUF_BLOCK_MEMORY /*
587	buf_block_t*
588	buf_block_alloc(
589	/============/
590	buf_pool_t* buf_pool) /!< in/out: buffer pool instance,*
591	or NULL for round-robin selection
592	of the buffer pool /*
593	{
594	buf_block_t* block;
595	ulint index;
596	static ulint buf_pool_index;
597
598	if (buf_pool == NULL) {
599	/ We are allocating memory from any buffer pool, ensure*
600	we spread the grace on all buffer pool instances. /*
601	index = buf_pool_index++ % srv_buf_pool_instances;
602	buf_pool = buf_pool_from_array(index);
603	}
604
605	block = buf_LRU_get_free_block(buf_pool);
606
607	buf_block_set_state(block, BUF_BLOCK_MEMORY);
608
609	return(block);
610	}
611	#endif /* !UNIV_INNOCHECKSUM */
612
613	/* Checks if a page contains only zeroes.*
614	@param[in] read_buf database page
615	@param[in] page_size page size
616	@return true if page is filled with zeroes /*
617	bool
618	buf_page_is_zeroes(
619	const byte* read_buf,
620	const page_size_t& page_size)
621	{
622	for (ulint i = `0`; i < page_size.logical(); i++) {
623	if (read_buf[i] != `0`) {
624	return(false);
625	}
626	}
627	return(true);
628	}
629
630	/* Checks if the page is in crc32 checksum format.*
631	@param[in] read_buf database page
632	@param[in] checksum_field1 new checksum field
633	@param[in] checksum_field2 old checksum field
634	@param[in] use_legacy_big_endian use legacy big endian algorithm
635	@return true if the page is in crc32 checksum format. /*
636	bool
637	buf_page_is_checksum_valid_crc32(
638	const byte* read_buf,
639	ulint checksum_field1,
640	ulint checksum_field2,
641	bool use_legacy_big_endian)
642	{
643	const uint32_t crc32 = buf_calc_page_crc32(read_buf,
644	use_legacy_big_endian);
645
646	#ifdef UNIV_INNOCHECKSUM
647	if (log_file
648	&& srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
649	fprintf(log_file, "page::%llu;"
650	" crc32 calculated = %u;"
651	" recorded checksum field1 = " ULINTPF " recorded"
652	" checksum field2 =" ULINTPF "\n", cur_page_num,
653	crc32, checksum_field1, checksum_field2);
654	}
655	#endif /* UNIV_INNOCHECKSUM */
656
657	if (checksum_field1 != checksum_field2) {
658	goto invalid;
659	}
660
661	if (checksum_field1 == crc32) {
662	return(true);
663	} else {
664	const uint32_t crc32_legacy = buf_calc_page_crc32(read_buf, true);
665
666	if (checksum_field1 == crc32_legacy) {
667	return(true);
668	}
669	}
670
671	invalid:
672	DBUG_LOG("checksum", "Page checksum crc32 not valid"
673	<< " field1 " << checksum_field1
674	<< " field2 " << checksum_field2
675	<< " crc32 " << crc32);
676	return(false);
677	}
678
679	/* Checks if the page is in innodb checksum format.*
680	@param[in] read_buf database page
681	@param[in] checksum_field1 new checksum field
682	@param[in] checksum_field2 old checksum field
683	@return true if the page is in innodb checksum format. /*
684	bool
685	buf_page_is_checksum_valid_innodb(
686	const byte* read_buf,
687	ulint checksum_field1,
688	ulint checksum_field2)
689	{
690	/ There are 2 valid formulas for*
691	checksum_field2 (old checksum field) which algo=innodb could have
692	written to the page:
693
694	1. Very old versions of InnoDB only stored 8 byte lsn to the
695	start and the end of the page.
696
697	2. Newer InnoDB versions store the old formula checksum
698	(buf_calc_page_old_checksum()). /*
699
700	ulint old_checksum = buf_calc_page_old_checksum(read_buf);
701	ulint new_checksum = buf_calc_page_new_checksum(read_buf);
702
703	#ifdef UNIV_INNOCHECKSUM
704	if (log_file
705	&& srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB) {
706	fprintf(log_file, "page::%llu;"
707	" old style: calculated ="
708	" " ULINTPF "; recorded = " ULINTPF "\n",
709	cur_page_num, old_checksum,
710	checksum_field2);
711	fprintf(log_file, "page::%llu;"
712	" new style: calculated ="
713	" " ULINTPF "; crc32 = %u; recorded = " ULINTPF "\n",
714	cur_page_num, new_checksum,
715	buf_calc_page_crc32(read_buf), checksum_field1);
716	}
717
718	if (log_file
719	&& srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
720	fprintf(log_file, "page::%llu;"
721	" old style: calculated ="
722	" " ULINTPF "; recorded checksum = " ULINTPF "\n",
723	cur_page_num, old_checksum,
724	checksum_field2);
725	fprintf(log_file, "page::%llu;"
726	" new style: calculated ="
727	" " ULINTPF "; recorded checksum = " ULINTPF "\n",
728	cur_page_num, new_checksum,
729	checksum_field1);
730	}
731	#endif /* UNIV_INNOCHECKSUM */
732
733
734	if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
735	&& checksum_field2 != old_checksum) {
736	DBUG_LOG("checksum",
737	"Page checksum crc32 not valid"
738	<< " field1 " << checksum_field1
739	<< " field2 " << checksum_field2
740	<< " crc32 " << buf_calc_page_old_checksum(read_buf)
741	<< " lsn " << mach_read_from_4(
742	read_buf + FIL_PAGE_LSN));
743	return(false);
744	}
745
746	/ old field is fine, check the new field /
747
748	/ InnoDB versions < 4.0.14 and < 4.1.1 stored the space id*
749	(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM /*
750
751	if (checksum_field1 != `0` && checksum_field1 != new_checksum) {
752	DBUG_LOG("checksum",
753	"Page checksum crc32 not valid"
754	<< " field1 " << checksum_field1
755	<< " field2 " << checksum_field2
756	<< " crc32 " << buf_calc_page_new_checksum(read_buf)
757	<< " lsn " << mach_read_from_4(
758	read_buf + FIL_PAGE_LSN));
759	return(false);
760	}
761
762	return(true);
763	}
764
765	/* Checks if the page is in none checksum format.*
766	@param[in] read_buf database page
767	@param[in] checksum_field1 new checksum field
768	@param[in] checksum_field2 old checksum field
769	@return true if the page is in none checksum format. /*
770	bool
771	buf_page_is_checksum_valid_none(
772	const byte* read_buf,
773	ulint checksum_field1,
774	ulint checksum_field2)
775	{
776	#ifndef DBUG_OFF
777	if (checksum_field1 != checksum_field2
778	&& checksum_field1 != BUF_NO_CHECKSUM_MAGIC) {
779	DBUG_LOG("checksum",
780	"Page checksum crc32 not valid"
781	<< " field1 " << checksum_field1
782	<< " field2 " << checksum_field2
783	<< " crc32 " << BUF_NO_CHECKSUM_MAGIC
784	<< " lsn " << mach_read_from_4(read_buf
785	+ FIL_PAGE_LSN));
786	}
787	#endif /* DBUG_OFF */
788
789	#ifdef UNIV_INNOCHECKSUM
790	if (log_file
791	&& srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
792	fprintf(log_file,
793	"page::%llu; none checksum: calculated"
794	" = %lu; recorded checksum_field1 = " ULINTPF
795	" recorded checksum_field2 = " ULINTPF "\n",
796	cur_page_num, BUF_NO_CHECKSUM_MAGIC,
797	checksum_field1, checksum_field2);
798	}
799	#endif /* UNIV_INNOCHECKSUM */
800
801	return(checksum_field1 == checksum_field2
802	&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
803	}
804
805	/* Check if a page is corrupt.*
806	@param[in] check_lsn whether the LSN should be checked
807	@param[in] read_buf database page
808	@param[in] page_size page size
809	@param[in] space tablespace
810	@return whether the page is corrupted /*
811	bool
812	buf_page_is_corrupted(
813	bool check_lsn,
814	const byte* read_buf,
815	const page_size_t& page_size,
816	#ifndef UNIV_INNOCHECKSUM
817	const fil_space_t* space)
818	#else
819	const void* space)
820	#endif
821	{
822	size_t checksum_field1 = `0`;
823	size_t checksum_field2 = `0`;
824	#ifndef UNIV_INNOCHECKSUM
825	DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(true); );
826	#endif
827	ulint page_type = mach_read_from_2(read_buf + FIL_PAGE_TYPE);
828
829	/ We can trust page type if page compression is set on tablespace*
830	flags because page compression flag means file must have been
831	created with 10.1 (later than 5.5 code base). In 10.1 page
832	compressed tables do not contain post compression checksum and
833	FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can
834	be null if we are in fil_check_first_page() and first page
835	is not compressed or encrypted. Page checksum is verified
836	after decompression (i.e. normally pages are already
837	decompressed at this stage). /*
838	if ((page_type == FIL_PAGE_PAGE_COMPRESSED \|\|
839	page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
840	#ifndef UNIV_INNOCHECKSUM
841	&& space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)
842	#endif
843	) {
844	return(false);
845	}
846
847	if (!page_size.is_compressed()
848	&& memcmp(read_buf + FIL_PAGE_LSN + `4`,
849	read_buf + page_size.logical()
850	- FIL_PAGE_END_LSN_OLD_CHKSUM + `4`, `4`)) {
851
852	/ Stored log sequence numbers at the start and the end*
853	of page do not match /*
854	#ifndef UNIV_INNOCHECKSUM
855	ib::info () << "Log sequence number at the start "
856	<< mach_read_from_4(read_buf + FIL_PAGE_LSN + `4`)
857	<< " and the end "
858	<< mach_read_from_4(read_buf + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + `4`)
859	<< " do not match";
860	#endif /* UNIV_INNOCHECKSUM */
861	return(true);
862	}
863
864	#ifndef UNIV_INNOCHECKSUM
865	if (check_lsn && recv_lsn_checks_on) {
866	lsn_t current_lsn;
867	const lsn_t page_lsn
868	= mach_read_from_8(read_buf + FIL_PAGE_LSN);
869
870	/ Since we are going to reset the page LSN during the import*
871	phase it makes no sense to spam the log with error messages. /*
872
873	if (log_peek_lsn(&current_lsn) && current_lsn < page_lsn) {
874
875	const ulint space_id = mach_read_from_4(
876	read_buf + FIL_PAGE_SPACE_ID);
877	const ulint page_no = mach_read_from_4(
878	read_buf + FIL_PAGE_OFFSET);
879
880	ib::error () << "Page " << page_id_t (space_id, page_no)
881	<< " log sequence number " << page_lsn
882	<< " is in the future! Current system"
883	<< " log sequence number "
884	<< current_lsn << ".";
885
886	ib::error () << "Your database may be corrupt or"
887	" you may have copied the InnoDB"
888	" tablespace but not the InnoDB"
889	" log files. "
890	<< FORCE_RECOVERY_MSG;
891
892	}
893	}
894	#endif /* !UNIV_INNOCHECKSUM */
895
896	/ Check whether the checksum fields have correct values /
897
898	if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
899	return(false);
900	}
901
902	if (page_size.is_compressed()) {
903	return(!page_zip_verify_checksum(read_buf,
904	page_size.physical()));
905	}
906
907	checksum_field1 = mach_read_from_4(
908	read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
909
910	checksum_field2 = mach_read_from_4(
911	read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM);
912
913	compile_time_assert(!(FIL_PAGE_LSN % `8`));
914
915	/ declare empty pages non-corrupted /
916	if (checksum_field1 == `0`
917	&& checksum_field2 == `0`
918	&& *reinterpret_cast<const ib_uint64_t*>(
919	read_buf + FIL_PAGE_LSN) == `0`) {
920
921	ulint i;
922
923	/ make sure that the page is really empty /
924	for (i = `0`; i < page_size.logical(); ++i) {
925
926	/ The FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID has been*
927	repurposed for page compression. It can be
928	set for uncompressed empty pages. /*
929
930	if ((i < FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
931	\|\| i >= FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)
932	&& read_buf[i] != `0`) {
933
934	#ifndef UNIV_INNOCHECKSUM
935	ib::info () << "Checksum fields zero but page is not empty.";
936	#endif
937
938	break;
939	}
940	}
941	#ifdef UNIV_INNOCHECKSUM
942	if (i >= page_size.logical()) {
943	if (log_file) {
944	fprintf(log_file, "Page::%llu"
945	" is empty and uncorrupted\n",
946	cur_page_num);
947	}
948	return(false);
949	}
950	#else
951	return(i < page_size.logical());
952	#endif /* UNIV_INNOCHECKSUM */
953	}
954
955	#ifndef UNIV_INNOCHECKSUM
956	const page_id_t page_id(mach_read_from_4(
957	read_buf + FIL_PAGE_SPACE_ID),
958	mach_read_from_4(
959	read_buf + FIL_PAGE_OFFSET));
960	#endif /* UNIV_INNOCHECKSUM */
961
962	const srv_checksum_algorithm_t curr_algo =
963	static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
964
965	bool legacy_checksum_checked = false;
966
967	switch (curr_algo) {
968	case SRV_CHECKSUM_ALGORITHM_CRC32:
969	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
970
971	if (buf_page_is_checksum_valid_crc32(read_buf,
972	checksum_field1, checksum_field2, false)) {
973	return(false);
974	}
975
976	if (buf_page_is_checksum_valid_none(read_buf,
977	checksum_field1, checksum_field2)) {
978	if (curr_algo
979	== SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
980	#ifndef UNIV_INNOCHECKSUM
981	page_warn_strict_checksum(
982	curr_algo,
983	SRV_CHECKSUM_ALGORITHM_NONE,
984	page_id);
985	#endif /* !UNIV_INNOCHECKSUM */
986	}
987
988	#ifdef UNIV_INNOCHECKSUM
989	if (log_file) {
990	fprintf(log_file, "page::%llu;"
991	" old style: calculated = %u;"
992	" recorded = " ULINTPF ";\n",
993	cur_page_num,
994	buf_calc_page_old_checksum(read_buf),
995	checksum_field2);
996	fprintf(log_file, "page::%llu;"
997	" new style: calculated = %u;"
998	" crc32 = %u; recorded = " ULINTPF ";\n",
999	cur_page_num,
1000	buf_calc_page_new_checksum(read_buf),
1001	buf_calc_page_crc32(read_buf),
1002	checksum_field1);
1003	}
1004	#endif /* UNIV_INNOCHECKSUM */
1005
1006	return(false);
1007	}
1008
1009	/ We need to check whether the stored checksum matches legacy*
1010	big endian checksum or Innodb checksum. We optimize the order
1011	based on earlier results. if earlier we have found pages
1012	matching legacy big endian checksum, we try to match it first.
1013	Otherwise we check innodb checksum first. /*
1014	if (legacy_big_endian_checksum) {
1015	if (buf_page_is_checksum_valid_crc32(read_buf,
1016	checksum_field1, checksum_field2, true)) {
1017
1018	return(false);
1019	}
1020	legacy_checksum_checked = true;
1021	}
1022
1023	if (buf_page_is_checksum_valid_innodb(read_buf,
1024	checksum_field1, checksum_field2)) {
1025	if (curr_algo
1026	== SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
1027	#ifndef UNIV_INNOCHECKSUM
1028	page_warn_strict_checksum(
1029	curr_algo,
1030	SRV_CHECKSUM_ALGORITHM_INNODB,
1031	page_id);
1032	#endif
1033	}
1034
1035	return(false);
1036	}
1037
1038	/ If legacy checksum is not checked, do it now. /
1039	if (!legacy_checksum_checked && buf_page_is_checksum_valid_crc32(
1040	read_buf, checksum_field1, checksum_field2, true)) {
1041
1042	legacy_big_endian_checksum = true;
1043	return(false);
1044	}
1045
1046	#ifdef UNIV_INNOCHECKSUM
1047	if (log_file) {
1048	fprintf(log_file, "Fail; page::%llu;"
1049	" invalid (fails crc32 checksum)\n",
1050	cur_page_num);
1051	}
1052	#endif /* UNIV_INNOCHECKSUM */
1053	return(true);
1054
1055	case SRV_CHECKSUM_ALGORITHM_INNODB:
1056	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
1057
1058	if (buf_page_is_checksum_valid_innodb(read_buf,
1059	checksum_field1, checksum_field2)) {
1060	return(false);
1061	}
1062
1063	if (buf_page_is_checksum_valid_none(read_buf,
1064	checksum_field1, checksum_field2)) {
1065	if (curr_algo
1066	== SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
1067	#ifndef UNIV_INNOCHECKSUM
1068	page_warn_strict_checksum(
1069	curr_algo,
1070	SRV_CHECKSUM_ALGORITHM_NONE,
1071	page_id);
1072	#endif
1073	}
1074	#ifdef UNIV_INNOCHECKSUM
1075	if (log_file) {
1076	fprintf(log_file, "page::%llu;"
1077	" old style: calculated = %u;"
1078	" recorded = %zu;\n", cur_page_num,
1079	buf_calc_page_old_checksum(read_buf),
1080	checksum_field2);
1081	fprintf(log_file, "page::%llu;"
1082	" new style: calculated = %u;"
1083	" crc32 = %u; recorded = %zu;\n",
1084	cur_page_num,
1085	buf_calc_page_new_checksum(read_buf),
1086	buf_calc_page_crc32(read_buf),
1087	checksum_field1);
1088	}
1089	#endif /* UNIV_INNOCHECKSUM */
1090
1091	return(false);
1092	}
1093
1094	if (buf_page_is_checksum_valid_crc32(read_buf,
1095	checksum_field1, checksum_field2, false)
1096	\|\| buf_page_is_checksum_valid_crc32(read_buf,
1097	checksum_field1, checksum_field2, true)) {
1098
1099	if (curr_algo
1100	== SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
1101	#ifndef UNIV_INNOCHECKSUM
1102	page_warn_strict_checksum(
1103	curr_algo,
1104	SRV_CHECKSUM_ALGORITHM_CRC32,
1105	page_id);
1106	#endif
1107	}
1108
1109	return(false);
1110	}
1111
1112	#ifdef UNIV_INNOCHECKSUM
1113	if (log_file) {
1114	fprintf(log_file, "Fail; page::%llu;"
1115	" invalid (fails innodb checksum)\n",
1116	cur_page_num);
1117	}
1118	#endif /* UNIV_INNOCHECKSUM */
1119
1120	return(true);
1121
1122	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
1123
1124	if (buf_page_is_checksum_valid_none(read_buf,
1125	checksum_field1, checksum_field2)) {
1126	return(false);
1127	}
1128
1129	if (buf_page_is_checksum_valid_crc32(read_buf,
1130	checksum_field1, checksum_field2, false)
1131	\|\| buf_page_is_checksum_valid_crc32(read_buf,
1132	checksum_field1, checksum_field2, true)) {
1133	#ifndef UNIV_INNOCHECKSUM
1134	page_warn_strict_checksum(
1135	curr_algo,
1136	SRV_CHECKSUM_ALGORITHM_CRC32,
1137	page_id);
1138	#endif /* !UNIV_INNOCHECKSUM */
1139	return(false);
1140	}
1141
1142	if (buf_page_is_checksum_valid_innodb(read_buf,
1143	checksum_field1, checksum_field2)) {
1144	#ifndef UNIV_INNOCHECKSUM
1145	page_warn_strict_checksum(
1146	curr_algo,
1147	SRV_CHECKSUM_ALGORITHM_INNODB,
1148	page_id);
1149	#endif /* !UNIV_INNOCHECKSUM */
1150	return(false);
1151	}
1152
1153	#ifdef UNIV_INNOCHECKSUM
1154	if (log_file) {
1155	fprintf(log_file, "Fail; page::%llu;"
1156	" invalid (fails none checksum)\n",
1157	cur_page_num);
1158	}
1159	#endif /* UNIV_INNOCHECKSUM */
1160
1161	return(true);
1162
1163	case SRV_CHECKSUM_ALGORITHM_NONE:
1164	/ should have returned false earlier /
1165	break;
1166	/ no default so the compiler will emit a warning if new enum*
1167	is added and not handled here /*
1168	}
1169
1170	ut_error;
1171	return(false);
1172	}
1173
1174	#ifndef UNIV_INNOCHECKSUM
1175
1176	#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
1177	/* Enable buffers to be dumped to core files*
1178
1179	A convience function, not called anyhwere directly however
1180	it is left available for gdb or any debugger to call
1181	in the event that you want all of the memory to be dumped
1182	to a core file.
1183
1184	Returns number of errors found in madvise calls. /*
1185	int
1186	buf_madvise_do_dump()
1187	{
1188	int ret= `0`;
1189	buf_pool_t* buf_pool;
1190	buf_chunk_t* chunk;
1191
1192	/ mirrors allocation in log_t::create() /
1193	if (log_sys.buf) {
1194	ret+= madvise(log_sys.first_in_use
1195	? log_sys.buf
1196	: log_sys.buf - srv_log_buffer_size,
1197	srv_log_buffer_size * `2`,
1198	MADV_DODUMP);
1199	}
1200	/ mirrors recv_sys_init() /
1201	if (recv_sys->buf)
1202	{
1203	ret+= madvise(recv_sys->buf, recv_sys->len, MADV_DODUMP);
1204	}
1205
1206	buf_pool_mutex_enter_all();
1207
1208	for (ulong i= `0`; i < srv_buf_pool_instances; i++)
1209	{
1210	buf_pool = buf_pool_from_array(i);
1211	chunk = buf_pool->chunks;
1212
1213	for (int n = buf_pool->n_chunks; n--; chunk++)
1214	{
1215	ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP);
1216	}
1217	}
1218
1219	buf_pool_mutex_exit_all();
1220
1221	return ret;
1222	}
1223	#endif
1224
1225	/* Dump a page to stderr.*
1226	@param[in] read_buf database page
1227	@param[in] page_size page size /*
1228	UNIV_INTERN
1229	void
1230	buf_page_print(const byte* read_buf, const page_size_t& page_size)
1231	{
1232	dict_index_t* index;
1233
1234	ib::info () << "Page dump in ascii and hex ("
1235	<< page_size.physical() << " bytes):";
1236
1237	ut_print_buf(stderr, read_buf, page_size.physical());
1238	fputs("\nInnoDB: End of page dump\n", stderr);
1239
1240	if (page_size.is_compressed()) {
1241	/ Print compressed page. /
1242	ib::info () << "Compressed page type ("
1243	<< fil_page_get_type(read_buf)
1244	<< "); stored checksum in field1 "
1245	<< mach_read_from_4(
1246	read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
1247	<< "; calculated checksums for field1: "
1248	<< buf_checksum_algorithm_name(
1249	SRV_CHECKSUM_ALGORITHM_CRC32)
1250	<< " "
1251	<< page_zip_calc_checksum(
1252	read_buf, page_size.physical(),
1253	SRV_CHECKSUM_ALGORITHM_CRC32)
1254	<< "/"
1255	<< page_zip_calc_checksum(
1256	read_buf, page_size.physical(),
1257	SRV_CHECKSUM_ALGORITHM_CRC32, true)
1258	<< ", "
1259	<< buf_checksum_algorithm_name(
1260	SRV_CHECKSUM_ALGORITHM_INNODB)
1261	<< " "
1262	<< page_zip_calc_checksum(
1263	read_buf, page_size.physical(),
1264	SRV_CHECKSUM_ALGORITHM_INNODB)
1265	<< ", "
1266	<< buf_checksum_algorithm_name(
1267	SRV_CHECKSUM_ALGORITHM_NONE)
1268	<< " "
1269	<< page_zip_calc_checksum(
1270	read_buf, page_size.physical(),
1271	SRV_CHECKSUM_ALGORITHM_NONE)
1272	<< "; page LSN "
1273	<< mach_read_from_8(read_buf + FIL_PAGE_LSN)
1274	<< "; page number (if stored to page"
1275	<< " already) "
1276	<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
1277	<< "; space id (if stored to page already) "
1278	<< mach_read_from_4(
1279	read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1280
1281	} else {
1282	const uint32_t crc32 = buf_calc_page_crc32(read_buf);
1283
1284	const uint32_t crc32_legacy = buf_calc_page_crc32(read_buf,
1285	true);
1286	ulint page_type = fil_page_get_type(read_buf);
1287
1288	ib::info () << "Uncompressed page, stored checksum in field1 "
1289	<< mach_read_from_4(
1290	read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
1291	<< ", calculated checksums for field1: "
1292	<< buf_checksum_algorithm_name(
1293	SRV_CHECKSUM_ALGORITHM_CRC32) << " "
1294	<< crc32 << "/" << crc32_legacy
1295	<< ", "
1296	<< buf_checksum_algorithm_name(
1297	SRV_CHECKSUM_ALGORITHM_INNODB) << " "
1298	<< buf_calc_page_new_checksum(read_buf)
1299	<< ", "
1300	<< " page type " << page_type << " == "
1301	<< fil_get_page_type_name(page_type) << "."
1302	<< buf_checksum_algorithm_name(
1303	SRV_CHECKSUM_ALGORITHM_NONE) << " "
1304	<< BUF_NO_CHECKSUM_MAGIC
1305	<< ", stored checksum in field2 "
1306	<< mach_read_from_4(read_buf + page_size.logical()
1307	- FIL_PAGE_END_LSN_OLD_CHKSUM)
1308	<< ", calculated checksums for field2: "
1309	<< buf_checksum_algorithm_name(
1310	SRV_CHECKSUM_ALGORITHM_CRC32) << " "
1311	<< crc32 << "/" << crc32_legacy
1312	<< ", "
1313	<< buf_checksum_algorithm_name(
1314	SRV_CHECKSUM_ALGORITHM_INNODB) << " "
1315	<< buf_calc_page_old_checksum(read_buf)
1316	<< ", "
1317	<< buf_checksum_algorithm_name(
1318	SRV_CHECKSUM_ALGORITHM_NONE) << " "
1319	<< BUF_NO_CHECKSUM_MAGIC
1320	<< ", page LSN "
1321	<< mach_read_from_4(read_buf + FIL_PAGE_LSN)
1322	<< " "
1323	<< mach_read_from_4(read_buf + FIL_PAGE_LSN + `4`)
1324	<< ", low 4 bytes of LSN at page end "
1325	<< mach_read_from_4(read_buf + page_size.logical()
1326	- FIL_PAGE_END_LSN_OLD_CHKSUM + `4`)
1327	<< ", page number (if stored to page already) "
1328	<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
1329	<< ", space id (if created with >= MySQL-4.1.1"
1330	" and stored already) "
1331	<< mach_read_from_4(
1332	read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1333	}
1334
1335	switch (fil_page_get_type(read_buf)) {
1336	index_id_t index_id;
1337	case FIL_PAGE_INDEX:
1338	case FIL_PAGE_TYPE_INSTANT:
1339	case FIL_PAGE_RTREE:
1340	index_id = btr_page_get_index_id(read_buf);
1341	ib::info () << "Page may be an index page where"
1342	" index id is " << index_id;
1343
1344	index = dict_index_find_on_id_low(index_id);
1345	if (index) {
1346	ib::info ()
1347	<< "Index " << index_id
1348	<< " is " << index->name
1349	<< " in table " << index->table->name;
1350	}
1351	break;
1352	case FIL_PAGE_UNDO_LOG:
1353	fputs("InnoDB: Page may be an undo log page\n", stderr);
1354	break;
1355	case FIL_PAGE_INODE:
1356	fputs("InnoDB: Page may be an 'inode' page\n", stderr);
1357	break;
1358	case FIL_PAGE_IBUF_FREE_LIST:
1359	fputs("InnoDB: Page may be an insert buffer free list page\n",
1360	stderr);
1361	break;
1362	case FIL_PAGE_TYPE_ALLOCATED:
1363	fputs("InnoDB: Page may be a freshly allocated page\n",
1364	stderr);
1365	break;
1366	case FIL_PAGE_IBUF_BITMAP:
1367	fputs("InnoDB: Page may be an insert buffer bitmap page\n",
1368	stderr);
1369	break;
1370	case FIL_PAGE_TYPE_SYS:
1371	fputs("InnoDB: Page may be a system page\n",
1372	stderr);
1373	break;
1374	case FIL_PAGE_TYPE_TRX_SYS:
1375	fputs("InnoDB: Page may be a transaction system page\n",
1376	stderr);
1377	break;
1378	case FIL_PAGE_TYPE_FSP_HDR:
1379	fputs("InnoDB: Page may be a file space header page\n",
1380	stderr);
1381	break;
1382	case FIL_PAGE_TYPE_XDES:
1383	fputs("InnoDB: Page may be an extent descriptor page\n",
1384	stderr);
1385	break;
1386	case FIL_PAGE_TYPE_BLOB:
1387	fputs("InnoDB: Page may be a BLOB page\n",
1388	stderr);
1389	break;
1390	case FIL_PAGE_TYPE_ZBLOB:
1391	case FIL_PAGE_TYPE_ZBLOB2:
1392	fputs("InnoDB: Page may be a compressed BLOB page\n",
1393	stderr);
1394	break;
1395	}
1396	}
1397
1398	# ifdef PFS_GROUP_BUFFER_SYNC
1399	extern mysql_pfs_key_t buffer_block_mutex_key;
1400
1401	/******************************************************************//**
1402	This function registers mutexes and rwlocks in buffer blocks with
1403	performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
1404	defined to be a value less than chunk->size, then only mutexes
1405	and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
1406	blocks are registered. /*
1407	static
1408	void
1409	pfs_register_buffer_block(
1410	/======================/
1411	buf_chunk_t* chunk) /!< in/out: chunk of buffers /
1412	{
1413	buf_block_t* block;
1414	ulint num_to_register;
1415
1416	block = chunk->blocks;
1417
1418	num_to_register = ut_min(
1419	chunk->size, PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
1420
1421	for (ulint i = `0`; i < num_to_register; i++) {
1422	# ifdef UNIV_PFS_MUTEX
1423	BPageMutex* mutex;
1424
1425	mutex = &block->mutex;
1426	mutex->pfs_add(buffer_block_mutex_key);
1427	# endif /* UNIV_PFS_MUTEX */
1428
1429	rw_lock_t* rwlock;
1430
1431	# ifdef UNIV_PFS_RWLOCK
1432	rwlock = &block->lock;
1433	ut_a(!rwlock->pfs_psi);
1434	rwlock->pfs_psi = (PSI_server)
1435	? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
1436	: NULL;
1437
1438	# ifdef UNIV_DEBUG
1439	rwlock = &block->debug_latch;
1440	ut_a(!rwlock->pfs_psi);
1441	rwlock->pfs_psi = (PSI_server)
1442	? PSI_server->init_rwlock(buf_block_debug_latch_key,
1443	rwlock)
1444	: NULL;
1445	# endif /* UNIV_DEBUG */
1446
1447	# endif /* UNIV_PFS_RWLOCK */
1448	block++;
1449	}
1450	}
1451	# endif /* PFS_GROUP_BUFFER_SYNC */
1452
1453	/******************************************************************//**
1454	Initializes a buffer control block when the buf_pool is created. /*
1455	static
1456	void
1457	buf_block_init(
1458	/===========/
1459	buf_pool_t* buf_pool, /!< in: buffer pool instance /
1460	buf_block_t* block, /!< in: pointer to control block /
1461	byte* frame) /!< in: pointer to buffer frame /
1462	{
1463	UNIV_MEM_DESC(frame, srv_page_size);
1464
1465	/ This function should only be executed at database startup or by*
1466	buf_pool_resize(). Either way, adaptive hash index must not exist. /*
1467	assert_block_ahi_empty_on_init(block);
1468
1469	block->frame = frame;
1470
1471	block->page.buf_pool_index = buf_pool_index(buf_pool);
1472	block->page.flush_type = BUF_FLUSH_LRU;
1473	block->page.state = BUF_BLOCK_NOT_USED;
1474	block->page.buf_fix_count = `0`;
1475	block->page.io_fix = BUF_IO_NONE;
1476	block->page.flush_observer = NULL;
1477	block->page.encrypted = false;
1478	block->page.real_size = `0`;
1479	block->page.write_size = `0`;
1480	block->modify_clock = `0`;
1481	block->page.slot = NULL;
1482
1483	ut_d(block->page.file_page_was_freed = FALSE);
1484
1485	#ifdef BTR_CUR_HASH_ADAPT
1486	block->index = NULL;
1487	#endif /* BTR_CUR_HASH_ADAPT */
1488	block->skip_flush_check = false;
1489
1490	ut_d(block->page.in_page_hash = FALSE);
1491	ut_d(block->page.in_zip_hash = FALSE);
1492	ut_d(block->page.in_flush_list = FALSE);
1493	ut_d(block->page.in_free_list = FALSE);
1494	ut_d(block->page.in_LRU_list = FALSE);
1495	ut_d(block->in_unzip_LRU_list = FALSE);
1496	ut_d(block->in_withdraw_list = FALSE);
1497
1498	page_zip_des_init(&block->page.zip);
1499
1500	mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
1501
1502	#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK \|\| defined PFS_GROUP_BUFFER_SYNC
1503	/ If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration*
1504	of buffer block rwlock with performance schema.
1505
1506	If PFS_GROUP_BUFFER_SYNC is defined, skip the registration
1507	since buffer block rwlock will be registered later in
1508	pfs_register_buffer_block(). /*
1509
1510	rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
1511
1512	ut_d(rw_lock_create(PFS_NOT_INSTRUMENTED, &block->debug_latch,
1513	SYNC_LEVEL_VARYING));
1514
1515	#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK \|\| PFS_GROUP_BUFFER_SYNC */
1516
1517	rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
1518
1519	ut_d(rw_lock_create(buf_block_debug_latch_key,
1520	&block->debug_latch, SYNC_LEVEL_VARYING));
1521
1522	#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK \|\| PFS_GROUP_BUFFER_SYNC */
1523
1524	block->lock.is_block_lock = `1`;
1525
1526	ut_ad(rw_lock_validate(&(block->lock)));
1527	}
1528
1529	/******************************************************************//**
1530	Allocates a chunk of buffer frames.
1531	@return chunk, or NULL on failure /*
1532	static
1533	buf_chunk_t*
1534	buf_chunk_init(
1535	/===========/
1536	buf_pool_t* buf_pool, /!< in: buffer pool instance /
1537	buf_chunk_t* chunk, /!< out: chunk of buffers /
1538	ulint mem_size) /!< in: requested size in bytes /
1539	{
1540	buf_block_t* block;
1541	byte* frame;
1542	ulint i;
1543
1544	/ Round down to a multiple of page size,*
1545	although it already should be. /*
1546	mem_size = ut_2pow_round(mem_size, ulint(srv_page_size));
1547	/ Reserve space for the block descriptors. /
1548	mem_size += ut_2pow_round((mem_size >> srv_page_size_shift)
1549	* (sizeof *block)
1550	+ (srv_page_size - `1`),
1551	ulint(srv_page_size));
1552
1553	DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
1554
1555	chunk->mem = buf_pool->allocator.allocate_large(mem_size,
1556	&chunk->mem_pfx, true);
1557
1558	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
1559
1560	return(NULL);
1561	}
1562
1563	#ifdef HAVE_LIBNUMA
1564	if (srv_numa_interleave) {
1565	struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
1566	int st = mbind(chunk->mem, chunk->mem_size(),
1567	MPOL_INTERLEAVE,
1568	numa_mems_allowed->maskp,
1569	numa_mems_allowed->size,
1570	MPOL_MF_MOVE);
1571	if (st != `0`) {
1572	ib::warn () << "Failed to set NUMA memory policy of"
1573	" buffer pool page frames to MPOL_INTERLEAVE"
1574	" (error: " << strerror(errno) << ").";
1575	}
1576	}
1577	#endif /* HAVE_LIBNUMA */
1578
1579
1580	/ Allocate the block descriptors from*
1581	the start of the memory block. /*
1582	chunk->blocks = (buf_block_t*) chunk->mem;
1583
1584	/ Align a pointer to the first frame. Note that when*
1585	os_large_page_size is smaller than srv_page_size,
1586	we may allocate one fewer block than requested. When
1587	it is bigger, we may allocate more blocks than requested. /*
1588
1589	frame = (byte*) ut_align(chunk->mem, srv_page_size);
1590	chunk->size = (chunk->mem_pfx.m_size >> srv_page_size_shift)
1591	- (frame != chunk->mem);
1592
1593	/ Subtract the space needed for block descriptors. /
1594	{
1595	ulint size = chunk->size;
1596
1597	while (frame < (byte*) (chunk->blocks + size)) {
1598	frame += srv_page_size;
1599	size--;
1600	}
1601
1602	chunk->size = size;
1603	}
1604
1605	/ Init block structs and assign frames for them. Then we*
1606	assign the frames to the first blocks (we already mapped the
1607	memory above). /*
1608
1609	block = chunk->blocks;
1610
1611	for (i = chunk->size; i--; ) {
1612
1613	buf_block_init(buf_pool, block, frame);
1614	UNIV_MEM_INVALID(block->frame, srv_page_size);
1615
1616	/ Add the block to the free list /
1617	UT_LIST_ADD_LAST(buf_pool->free, &block->page);
1618
1619	ut_d(block->page.in_free_list = TRUE);
1620	ut_ad(buf_pool_from_block(block) == buf_pool);
1621
1622	block++;
1623	frame += srv_page_size;
1624	}
1625
1626	buf_pool_register_chunk(chunk);
1627
1628	#ifdef PFS_GROUP_BUFFER_SYNC
1629	pfs_register_buffer_block(chunk);
1630	#endif /* PFS_GROUP_BUFFER_SYNC */
1631	return(chunk);
1632	}
1633
1634	#ifdef UNIV_DEBUG
1635	/*******************************************************************//**
1636	Finds a block in the given buffer chunk that points to a
1637	given compressed page.
1638	@return buffer block pointing to the compressed page, or NULL /*
1639	static
1640	buf_block_t*
1641	buf_chunk_contains_zip(
1642	/===================/
1643	buf_chunk_t* chunk, /!< in: chunk being checked /
1644	const void* data) /!< in: pointer to compressed page /
1645	{
1646	buf_block_t* block;
1647	ulint i;
1648
1649	block = chunk->blocks;
1650
1651	for (i = chunk->size; i--; block++) {
1652	if (block->page.zip.data == data) {
1653
1654	return(block);
1655	}
1656	}
1657
1658	return(NULL);
1659	}
1660
1661	/*******************************************************************//**
1662	Finds a block in the buffer pool that points to a
1663	given compressed page.
1664	@return buffer block pointing to the compressed page, or NULL /*
1665	buf_block_t*
1666	buf_pool_contains_zip(
1667	/==================/
1668	buf_pool_t* buf_pool, /!< in: buffer pool instance /
1669	const void* data) /!< in: pointer to compressed page /
1670	{
1671	ulint n;
1672	buf_chunk_t* chunk = buf_pool->chunks;
1673
1674	ut_ad(buf_pool);
1675	ut_ad(buf_pool_mutex_own(buf_pool));
1676	for (n = buf_pool->n_chunks; n--; chunk++) {
1677
1678	buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1679
1680	if (block) {
1681	return(block);
1682	}
1683	}
1684
1685	return(NULL);
1686	}
1687	#endif /* UNIV_DEBUG */
1688
1689	/*******************************************************************//**
1690	Checks that all file pages in the buffer chunk are in a replaceable state.
1691	@return address of a non-free block, or NULL if all freed /*
1692	static
1693	const buf_block_t*
1694	buf_chunk_not_freed(
1695	/================/
1696	buf_chunk_t* chunk) /!< in: chunk being checked /
1697	{
1698	buf_block_t* block;
1699	ulint i;
1700
1701	block = chunk->blocks;
1702
1703	for (i = chunk->size; i--; block++) {
1704	ibool ready;
1705
1706	switch (buf_block_get_state(block)) {
1707	case BUF_BLOCK_POOL_WATCH:
1708	case BUF_BLOCK_ZIP_PAGE:
1709	case BUF_BLOCK_ZIP_DIRTY:
1710	/ The uncompressed buffer pool should never*
1711	contain compressed block descriptors. /*
1712	ut_error;
1713	break;
1714	case BUF_BLOCK_NOT_USED:
1715	case BUF_BLOCK_READY_FOR_USE:
1716	case BUF_BLOCK_MEMORY:
1717	case BUF_BLOCK_REMOVE_HASH:
1718	/ Skip blocks that are not being used for*
1719	file pages. /*
1720	break;
1721	case BUF_BLOCK_FILE_PAGE:
1722	if (srv_read_only_mode) {
1723	/ The page cleaner is disabled in*
1724	read-only mode. No pages can be
1725	dirtied, so all of them must be clean. /*
1726	ut_ad(block->page.oldest_modification
1727	== block->page.newest_modification);
1728	ut_ad(block->page.oldest_modification == `0`
1729	\|\| block->page.oldest_modification
1730	== recv_sys->recovered_lsn
1731	\|\| srv_force_recovery
1732	== SRV_FORCE_NO_LOG_REDO);
1733	ut_ad(block->page.buf_fix_count == `0`);
1734	ut_ad(block->page.io_fix == BUF_IO_NONE);
1735	break;
1736	}
1737
1738	buf_page_mutex_enter(block);
1739	ready = buf_flush_ready_for_replace(&block->page);
1740	buf_page_mutex_exit(block);
1741
1742	if (!ready) {
1743	return(block);
1744	}
1745
1746	break;
1747	}
1748	}
1749
1750	return(NULL);
1751	}
1752
1753	/******************************************************************//**
1754	Set buffer pool size variables after resizing it /*
1755	static
1756	void
1757	buf_pool_set_sizes(void)
1758	/====================/
1759	{
1760	ulint i;
1761	ulint curr_size = `0`;
1762
1763	buf_pool_mutex_enter_all();
1764
1765	for (i = `0`; i < srv_buf_pool_instances; i++) {
1766	buf_pool_t* buf_pool;
1767
1768	buf_pool = buf_pool_from_array(i);
1769	curr_size += buf_pool->curr_pool_size;
1770	}
1771
1772	srv_buf_pool_curr_size = curr_size;
1773	srv_buf_pool_old_size = srv_buf_pool_size;
1774	srv_buf_pool_base_size = srv_buf_pool_size;
1775
1776	buf_pool_mutex_exit_all();
1777	}
1778
1779	/******************************************************************//**
1780	Initialize a buffer pool instance.
1781	@return DB_SUCCESS if all goes well. /*
1782	static
1783	ulint
1784	buf_pool_init_instance(
1785	/===================/
1786	buf_pool_t* buf_pool, /!< in: buffer pool instance /
1787	ulint buf_pool_size, /!< in: size in bytes /
1788	ulint instance_no) /!< in: id of the instance /
1789	{
1790	ulint i;
1791	ulint chunk_size;
1792	buf_chunk_t* chunk;
1793
1794	ut_ad(buf_pool_size % srv_buf_pool_chunk_unit == `0`);
1795
1796	/ 1. Initialize general fields*
1797	------------------------------- /*
1798	mutex_create(LATCH_ID_BUF_POOL, &buf_pool->mutex);
1799
1800	mutex_create(LATCH_ID_BUF_POOL_ZIP, &buf_pool->zip_mutex);
1801
1802	new(&buf_pool->allocator)
1803	ut_allocator<unsigned char>(mem_key_buf_buf_pool);
1804
1805	buf_pool_mutex_enter(buf_pool);
1806
1807	if (buf_pool_size > `0`) {
1808	buf_pool->n_chunks
1809	= buf_pool_size / srv_buf_pool_chunk_unit;
1810	chunk_size = srv_buf_pool_chunk_unit;
1811
1812	buf_pool->chunks =
1813	reinterpret_cast<buf_chunk_t*>(ut_zalloc_nokey(
1814	buf_pool->n_chunks * sizeof(*chunk)));
1815	buf_pool->chunks_old = NULL;
1816
1817	UT_LIST_INIT(buf_pool->LRU, &buf_page_t::LRU);
1818	UT_LIST_INIT(buf_pool->free, &buf_page_t::list);
1819	UT_LIST_INIT(buf_pool->withdraw, &buf_page_t::list);
1820	buf_pool->withdraw_target = `0`;
1821	UT_LIST_INIT(buf_pool->flush_list, &buf_page_t::list);
1822	UT_LIST_INIT(buf_pool->unzip_LRU, &buf_block_t::unzip_LRU);
1823
1824	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
1825	UT_LIST_INIT(buf_pool->zip_clean, &buf_page_t::list);
1826	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
1827
1828	for (i = `0`; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
1829	UT_LIST_INIT(
1830	buf_pool->zip_free[i], &buf_buddy_free_t::list);
1831	}
1832
1833	buf_pool->curr_size = `0`;
1834	chunk = buf_pool->chunks;
1835
1836	do {
1837	if (!buf_chunk_init(buf_pool, chunk, chunk_size)) {
1838	while (--chunk >= buf_pool->chunks) {
1839	buf_block_t* block = chunk->blocks;
1840
1841	for (i = chunk->size; i--; block++) {
1842	mutex_free(&block->mutex);
1843	rw_lock_free(&block->lock);
1844
1845	ut_d(rw_lock_free(
1846	&block->debug_latch));
1847	}
1848
1849	buf_pool->allocator.deallocate_large(
1850	chunk->mem, &chunk->mem_pfx, chunk->mem_size(),
1851	true);
1852	}
1853	ut_free(buf_pool->chunks);
1854	buf_pool_mutex_exit(buf_pool);
1855
1856	return(DB_ERROR);
1857	}
1858
1859	buf_pool->curr_size += chunk->size;
1860	} while (++chunk < buf_pool->chunks + buf_pool->n_chunks);
1861
1862	buf_pool->instance_no = instance_no;
1863	buf_pool->read_ahead_area =
1864	ut_min(BUF_READ_AHEAD_PAGES,
1865	ut_2_power_up(buf_pool->curr_size /
1866	BUF_READ_AHEAD_PORTION));
1867	buf_pool->curr_pool_size = buf_pool->curr_size
1868	<< srv_page_size_shift;
1869
1870	buf_pool->old_size = buf_pool->curr_size;
1871	buf_pool->n_chunks_new = buf_pool->n_chunks;
1872
1873	/ Number of locks protecting page_hash must be a*
1874	power of two /*
1875	srv_n_page_hash_locks = static_cast<ulong>(
1876	ut_2_power_up(srv_n_page_hash_locks));
1877	ut_a(srv_n_page_hash_locks != `0`);
1878	ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
1879
1880	buf_pool->page_hash = ib_create(
1881	`2` * buf_pool->curr_size,
1882	LATCH_ID_HASH_TABLE_RW_LOCK,
1883	srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH);
1884
1885	buf_pool->page_hash_old = NULL;
1886
1887	buf_pool->zip_hash = hash_create(`2` * buf_pool->curr_size);
1888
1889	buf_pool->last_printout_time = ut_time();
1890	}
1891	/ 2. Initialize flushing fields*
1892	-------------------------------- /*
1893
1894	mutex_create(LATCH_ID_FLUSH_LIST, &buf_pool->flush_list_mutex);
1895
1896	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1897	buf_pool->no_flush[i] = os_event_create(`0`);
1898	}
1899
1900	buf_pool->watch = (buf_page_t*) ut_zalloc_nokey(
1901	sizeof(buf_pool->watch) BUF_POOL_WATCH_SIZE);
1902	for (i = `0`; i < BUF_POOL_WATCH_SIZE; i++) {
1903	buf_pool->watch[i].buf_pool_index
1904	= unsigned(buf_pool->instance_no);
1905	}
1906
1907	/ All fields are initialized by ut_zalloc_nokey(). /
1908
1909	buf_pool->try_LRU_scan = TRUE;
1910
1911	/ Initialize the hazard pointer for flush_list batches /
1912	new(&buf_pool->flush_hp)
1913	FlushHp (buf_pool, &buf_pool->flush_list_mutex);
1914
1915	/ Initialize the hazard pointer for LRU batches /
1916	new(&buf_pool->lru_hp) LRUHp (buf_pool, &buf_pool->mutex);
1917
1918	/ Initialize the iterator for LRU scan search /
1919	new(&buf_pool->lru_scan_itr) LRUItr (buf_pool, &buf_pool->mutex);
1920
1921	/ Initialize the iterator for single page scan search /
1922	new(&buf_pool->single_scan_itr) LRUItr (buf_pool, &buf_pool->mutex);
1923
1924	/ Initialize the temporal memory array and slots /
1925	buf_pool->tmp_arr = (buf_tmp_array_t )ut_malloc_nokey(sizeof*(buf_tmp_array_t));
1926	memset(buf_pool->tmp_arr, `0`, sizeof(buf_tmp_array_t));
1927	ulint n_slots = (srv_n_read_io_threads + srv_n_write_io_threads) * (`8` * OS_AIO_N_PENDING_IOS_PER_THREAD);
1928	buf_pool->tmp_arr->n_slots = n_slots;
1929	buf_pool->tmp_arr->slots = (buf_tmp_buffer_t)ut_malloc_nokey(sizeof(buf_tmp_buffer_t) n_slots);
1930	memset(buf_pool->tmp_arr->slots, `0`, (sizeof(buf_tmp_buffer_t) * n_slots));
1931
1932	buf_pool_mutex_exit(buf_pool);
1933
1934	DBUG_EXECUTE_IF("buf_pool_init_instance_force_oom",
1935	return(DB_ERROR); );
1936
1937	return(DB_SUCCESS);
1938	}
1939
1940	/******************************************************************//**
1941	free one buffer pool instance /*
1942	static
1943	void
1944	buf_pool_free_instance(
1945	/===================/
1946	buf_pool_t* buf_pool) / in,own: buffer pool instance*
1947	to free /*
1948	{
1949	buf_chunk_t* chunk;
1950	buf_chunk_t* chunks;
1951	buf_page_t* bpage;
1952	buf_page_t* prev_bpage = `0`;
1953
1954	mutex_free(&buf_pool->mutex);
1955	mutex_free(&buf_pool->zip_mutex);
1956	mutex_free(&buf_pool->flush_list_mutex);
1957
1958	if (buf_pool->flush_rbt) {
1959	rbt_free(buf_pool->flush_rbt);
1960	buf_pool->flush_rbt = NULL;
1961	}
1962
1963	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1964	bpage != NULL;
1965	bpage = prev_bpage) {
1966
1967	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
1968	buf_page_state state = buf_page_get_state(bpage);
1969
1970	ut_ad(buf_page_in_file(bpage));
1971	ut_ad(bpage->in_LRU_list);
1972
1973	if (state != BUF_BLOCK_FILE_PAGE) {
1974	/ We must not have any dirty block except*
1975	when doing a fast shutdown. /*
1976	ut_ad(state == BUF_BLOCK_ZIP_PAGE
1977	\|\| srv_fast_shutdown == `2`);
1978	buf_page_free_descriptor(bpage);
1979	}
1980	}
1981
1982	ut_free(buf_pool->watch);
1983	buf_pool->watch = NULL;
1984
1985	chunks = buf_pool->chunks;
1986	chunk = chunks + buf_pool->n_chunks;
1987
1988	while (--chunk >= chunks) {
1989	buf_block_t* block = chunk->blocks;
1990
1991	for (ulint i = chunk->size; i--; block++) {
1992	mutex_free(&block->mutex);
1993	rw_lock_free(&block->lock);
1994
1995	ut_d(rw_lock_free(&block->debug_latch));
1996	}
1997
1998	buf_pool->allocator.deallocate_large(
1999	chunk->mem, &chunk->mem_pfx, true);
2000	}
2001
2002	for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
2003	os_event_destroy(buf_pool->no_flush[i]);
2004	}
2005
2006	ut_free(buf_pool->chunks);
2007	ha_clear(buf_pool->page_hash);
2008	hash_table_free(buf_pool->page_hash);
2009	hash_table_free(buf_pool->zip_hash);
2010
2011	/ Free all used temporary slots /
2012	if (buf_pool->tmp_arr) {
2013	for(ulint i = `0`; i < buf_pool->tmp_arr->n_slots; i++) {
2014	buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]);
2015	if (slot && slot->crypt_buf) {
2016	aligned_free(slot->crypt_buf);
2017	slot->crypt_buf = NULL;
2018	}
2019
2020	if (slot && slot->comp_buf) {
2021	aligned_free(slot->comp_buf);
2022	slot->comp_buf = NULL;
2023	}
2024	}
2025
2026	ut_free(buf_pool->tmp_arr->slots);
2027	ut_free(buf_pool->tmp_arr);
2028	buf_pool->tmp_arr = NULL;
2029	}
2030
2031	buf_pool->allocator.~ut_allocator();
2032	}
2033
2034	/******************************************************************//**
2035	Creates the buffer pool.
2036	@return DB_SUCCESS if success, DB_ERROR if not enough memory or error /*
2037	dberr_t
2038	buf_pool_init(
2039	/==========/
2040	ulint total_size, /!< in: size of the total pool in bytes /
2041	ulint n_instances) /!< in: number of instances /
2042	{
2043	ulint i;
2044	const ulint size = total_size / n_instances;
2045
2046	ut_ad(n_instances > `0`);
2047	ut_ad(n_instances <= MAX_BUFFER_POOLS);
2048	ut_ad(n_instances == srv_buf_pool_instances);
2049
2050	NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
2051
2052	buf_pool_resizing = false;
2053	buf_pool_withdrawing = false;
2054	buf_withdraw_clock = `0`;
2055
2056	buf_pool_ptr = (buf_pool_t*) ut_zalloc_nokey(
2057	n_instances * sizeof *buf_pool_ptr);
2058
2059	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t ());
2060
2061	for (i = `0`; i < n_instances; i++) {
2062	buf_pool_t* ptr = &buf_pool_ptr[i];
2063
2064	if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
2065
2066	/ Free all the instances created so far. /
2067	buf_pool_free(i);
2068
2069	return(DB_ERROR);
2070	}
2071	}
2072
2073	buf_chunk_map_ref = buf_chunk_map_reg;
2074
2075	buf_pool_set_sizes();
2076	buf_LRU_old_ratio_update(`100` * `3`/ `8`, FALSE);
2077
2078	btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / `64`);
2079
2080	return(DB_SUCCESS);
2081	}
2082
2083	/******************************************************************//**
2084	Frees the buffer pool at shutdown. This must not be invoked before
2085	freeing all mutexes. /*
2086	void
2087	buf_pool_free(
2088	/==========/
2089	ulint n_instances) /!< in: numbere of instances to free /
2090	{
2091	for (ulint i = `0`; i < n_instances; i++) {
2092	buf_pool_free_instance(buf_pool_from_array(i));
2093	}
2094
2095	UT_DELETE(buf_chunk_map_reg);
2096	buf_chunk_map_reg = buf_chunk_map_ref = NULL;
2097
2098	ut_free(buf_pool_ptr);
2099	buf_pool_ptr = NULL;
2100	}
2101
2102	/* Reallocate a control block.*
2103	@param[in] buf_pool buffer pool instance
2104	@param[in] block pointer to control block
2105	@retval false if failed because of no free blocks. /*
2106	static
2107	bool
2108	buf_page_realloc(
2109	buf_pool_t* buf_pool,
2110	buf_block_t* block)
2111	{
2112	buf_block_t* new_block;
2113
2114	ut_ad(buf_pool_withdrawing);
2115	ut_ad(buf_pool_mutex_own(buf_pool));
2116	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2117
2118	new_block = buf_LRU_get_free_only(buf_pool);
2119
2120	if (new_block == NULL) {
2121	return(false); / free_list was not enough /
2122	}
2123
2124	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, block->page.id);
2125
2126	rw_lock_x_lock(hash_lock);
2127	mutex_enter(&block->mutex);
2128
2129	if (buf_page_can_relocate(&block->page)) {
2130	mutex_enter(&new_block->mutex);
2131
2132	memcpy(new_block->frame, block->frame, srv_page_size);
2133	memcpy(&new_block->page, &block->page, sizeof block->page);
2134
2135	/ relocate LRU list /
2136	ut_ad(block->page.in_LRU_list);
2137	ut_ad(!block->page.in_zip_hash);
2138	ut_d(block->page.in_LRU_list = FALSE);
2139
2140	buf_LRU_adjust_hp(buf_pool, &block->page);
2141
2142	buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, &block->page);
2143	UT_LIST_REMOVE(buf_pool->LRU, &block->page);
2144
2145	if (prev_b != NULL) {
2146	UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, &new_block->page);
2147	} else {
2148	UT_LIST_ADD_FIRST(buf_pool->LRU, &new_block->page);
2149	}
2150
2151	if (buf_pool->LRU_old == &block->page) {
2152	buf_pool->LRU_old = &new_block->page;
2153	}
2154
2155	ut_ad(new_block->page.in_LRU_list);
2156
2157	/ relocate unzip_LRU list /
2158	if (block->page.zip.data != NULL) {
2159	ut_ad(block->in_unzip_LRU_list);
2160	ut_d(new_block->in_unzip_LRU_list = TRUE);
2161	UNIV_MEM_DESC(&new_block->page.zip.data,
2162	page_zip_get_size(&new_block->page.zip));
2163
2164	buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
2165	UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
2166
2167	ut_d(block->in_unzip_LRU_list = FALSE);
2168	block->page.zip.data = NULL;
2169	page_zip_set_size(&block->page.zip, `0`);
2170
2171	if (prev_block != NULL) {
2172	UT_LIST_INSERT_AFTER(buf_pool->unzip_LRU, prev_block, new_block);
2173	} else {
2174	UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, new_block);
2175	}
2176	} else {
2177	ut_ad(!block->in_unzip_LRU_list);
2178	ut_d(new_block->in_unzip_LRU_list = FALSE);
2179	}
2180
2181	/ relocate buf_pool->page_hash /
2182	ut_ad(block->page.in_page_hash);
2183	ut_ad(&block->page == buf_page_hash_get_low(buf_pool,
2184	block->page.id));
2185	ut_d(block->page.in_page_hash = FALSE);
2186	ulint fold = block->page.id.fold();
2187	ut_ad(fold == new_block->page.id.fold());
2188	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, (&block->page));
2189	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, (&new_block->page));
2190
2191	ut_ad(new_block->page.in_page_hash);
2192
2193	buf_block_modify_clock_inc(block);
2194	memset(block->frame + FIL_PAGE_OFFSET, `0xff`, `4`);
2195	memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, `0xff`, `4`);
2196	UNIV_MEM_INVALID(block->frame, srv_page_size);
2197	buf_block_set_state(block, BUF_BLOCK_REMOVE_HASH);
2198	block->page.id.reset();
2199
2200	/ Relocate buf_pool->flush_list. /
2201	if (block->page.oldest_modification) {
2202	buf_flush_relocate_on_flush_list(
2203	&block->page, &new_block->page);
2204	}
2205
2206	/ set other flags of buf_block_t /
2207
2208	#ifdef BTR_CUR_HASH_ADAPT
2209	/ This code should only be executed by buf_pool_resize(),*
2210	while the adaptive hash index is disabled. /*
2211	assert_block_ahi_empty(block);
2212	assert_block_ahi_empty_on_init(new_block);
2213	ut_ad(!block->index);
2214	new_block->index = NULL;
2215	new_block->n_hash_helps = `0`;
2216	new_block->n_fields = `1`;
2217	new_block->left_side = TRUE;
2218	#endif /* BTR_CUR_HASH_ADAPT */
2219
2220	new_block->lock_hash_val = block->lock_hash_val;
2221	ut_ad(new_block->lock_hash_val == lock_rec_hash(
2222	new_block->page.id.space(),
2223	new_block->page.id.page_no()));
2224
2225	rw_lock_x_unlock(hash_lock);
2226	mutex_exit(&new_block->mutex);
2227
2228	/ free block /
2229	buf_block_set_state(block, BUF_BLOCK_MEMORY);
2230	buf_LRU_block_free_non_file_page(block);
2231
2232	mutex_exit(&block->mutex);
2233	} else {
2234	rw_lock_x_unlock(hash_lock);
2235	mutex_exit(&block->mutex);
2236
2237	/ free new_block /
2238	mutex_enter(&new_block->mutex);
2239	buf_LRU_block_free_non_file_page(new_block);
2240	mutex_exit(&new_block->mutex);
2241	}
2242
2243	return(true); / free_list was enough /
2244	}
2245
2246	/* Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status*
2247	to the specified string. The format and the following parameters are the
2248	same as the ones used for printf(3).
2249	@param[in] fmt format
2250	@param[in] ... extra parameters according to fmt /*
2251	static
2252	void
2253	buf_resize_status(
2254	const char* fmt,
2255	...)
2256	{
2257	va_list ap;
2258
2259	va_start(ap, fmt);
2260
2261	vsnprintf(
2262	export_vars.innodb_buffer_pool_resize_status,
2263	sizeof(export_vars.innodb_buffer_pool_resize_status),
2264	fmt, ap);
2265
2266	va_end(ap);
2267
2268	ib::info () << export_vars.innodb_buffer_pool_resize_status;
2269	}
2270
2271	/* Determines if a block is intended to be withdrawn.*
2272	@param[in] buf_pool buffer pool instance
2273	@param[in] block pointer to control block
2274	@retval true if will be withdrawn /*
2275	bool
2276	buf_block_will_withdrawn(
2277	buf_pool_t* buf_pool,
2278	const buf_block_t* block)
2279	{
2280	ut_ad(buf_pool->curr_size < buf_pool->old_size);
2281	ut_ad(!buf_pool_resizing \|\| buf_pool_mutex_own(buf_pool));
2282
2283	const buf_chunk_t* chunk
2284	= buf_pool->chunks + buf_pool->n_chunks_new;
2285	const buf_chunk_t* echunk
2286	= buf_pool->chunks + buf_pool->n_chunks;
2287
2288	while (chunk < echunk) {
2289	if (block >= chunk->blocks
2290	&& block < chunk->blocks + chunk->size) {
2291	return(true);
2292	}
2293	++chunk;
2294	}
2295
2296	return(false);
2297	}
2298
2299	/* Determines if a frame is intended to be withdrawn.*
2300	@param[in] buf_pool buffer pool instance
2301	@param[in] ptr pointer to a frame
2302	@retval true if will be withdrawn /*
2303	bool
2304	buf_frame_will_withdrawn(
2305	buf_pool_t* buf_pool,
2306	const byte* ptr)
2307	{
2308	ut_ad(buf_pool->curr_size < buf_pool->old_size);
2309	ut_ad(!buf_pool_resizing \|\| buf_pool_mutex_own(buf_pool));
2310
2311	const buf_chunk_t* chunk
2312	= buf_pool->chunks + buf_pool->n_chunks_new;
2313	const buf_chunk_t* echunk
2314	= buf_pool->chunks + buf_pool->n_chunks;
2315
2316	while (chunk < echunk) {
2317	if (ptr >= chunk->blocks->frame
2318	&& ptr < (chunk->blocks + chunk->size - `1`)->frame
2319	+ srv_page_size) {
2320	return(true);
2321	}
2322	++chunk;
2323	}
2324
2325	return(false);
2326	}
2327
2328	/* Withdraw the buffer pool blocks from end of the buffer pool instance*
2329	until withdrawn by buf_pool->withdraw_target.
2330	@param[in] buf_pool buffer pool instance
2331	@retval true if retry is needed /*
2332	static
2333	bool
2334	buf_pool_withdraw_blocks(
2335	buf_pool_t* buf_pool)
2336	{
2337	buf_block_t* block;
2338	ulint loop_count = `0`;
2339	ulint i = buf_pool_index(buf_pool);
2340
2341	ib::info () << "buffer pool " << i
2342	<< " : start to withdraw the last "
2343	<< buf_pool->withdraw_target << " blocks.";
2344
2345	/ Minimize buf_pool->zip_free[i] lists /
2346	buf_pool_mutex_enter(buf_pool);
2347	buf_buddy_condense_free(buf_pool);
2348	buf_pool_mutex_exit(buf_pool);
2349
2350	while (UT_LIST_GET_LEN(buf_pool->withdraw)
2351	< buf_pool->withdraw_target) {
2352
2353	/ try to withdraw from free_list /
2354	ulint count1 = `0`;
2355
2356	buf_pool_mutex_enter(buf_pool);
2357	block = reinterpret_cast<buf_block_t*>(
2358	UT_LIST_GET_FIRST(buf_pool->free));
2359	while (block != NULL
2360	&& UT_LIST_GET_LEN(buf_pool->withdraw)
2361	< buf_pool->withdraw_target) {
2362	ut_ad(block->page.in_free_list);
2363	ut_ad(!block->page.in_flush_list);
2364	ut_ad(!block->page.in_LRU_list);
2365	ut_a(!buf_page_in_file(&block->page));
2366
2367	buf_block_t* next_block;
2368	next_block = reinterpret_cast<buf_block_t*>(
2369	UT_LIST_GET_NEXT(
2370	list, &block->page));
2371
2372	if (buf_block_will_withdrawn(buf_pool, block)) {
2373	/ This should be withdrawn /
2374	UT_LIST_REMOVE(
2375	buf_pool->free,
2376	&block->page);
2377	UT_LIST_ADD_LAST(
2378	buf_pool->withdraw,
2379	&block->page);
2380	ut_d(block->in_withdraw_list = TRUE);
2381	count1++;
2382	}
2383
2384	block = next_block;
2385	}
2386	buf_pool_mutex_exit(buf_pool);
2387
2388	/ reserve free_list length /
2389	if (UT_LIST_GET_LEN(buf_pool->withdraw)
2390	< buf_pool->withdraw_target) {
2391	ulint scan_depth;
2392	flush_counters_t n;
2393
2394	/ cap scan_depth with current LRU size. /
2395	buf_pool_mutex_enter(buf_pool);
2396	scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
2397	buf_pool_mutex_exit(buf_pool);
2398
2399	scan_depth = ut_min(
2400	ut_max(buf_pool->withdraw_target
2401	- UT_LIST_GET_LEN(buf_pool->withdraw),
2402	static_cast<ulint>(srv_LRU_scan_depth)),
2403	scan_depth);
2404
2405	buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU,
2406	scan_depth, `0`, &n);
2407	buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
2408
2409	if (n.flushed) {
2410	MONITOR_INC_VALUE_CUMULATIVE(
2411	MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
2412	MONITOR_LRU_BATCH_FLUSH_COUNT,
2413	MONITOR_LRU_BATCH_FLUSH_PAGES,
2414	n.flushed);
2415	}
2416	}
2417
2418	/ relocate blocks/buddies in withdrawn area /
2419	ulint count2 = `0`;
2420
2421	buf_pool_mutex_enter(buf_pool);
2422	buf_page_t* bpage;
2423	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
2424	while (bpage != NULL) {
2425	BPageMutex* block_mutex;
2426	buf_page_t* next_bpage;
2427
2428	block_mutex = buf_page_get_mutex(bpage);
2429	mutex_enter(block_mutex);
2430
2431	next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
2432
2433	if (bpage->zip.data != NULL
2434	&& buf_frame_will_withdrawn(
2435	buf_pool,
2436	static_cast<byte*>(bpage->zip.data))) {
2437
2438	if (buf_page_can_relocate(bpage)) {
2439	mutex_exit(block_mutex);
2440	buf_pool_mutex_exit_forbid(buf_pool);
2441	if(!buf_buddy_realloc(
2442	buf_pool, bpage->zip.data,
2443	page_zip_get_size(
2444	&bpage->zip))) {
2445
2446	/ failed to allocate block /
2447	buf_pool_mutex_exit_allow(
2448	buf_pool);
2449	break;
2450	}
2451	buf_pool_mutex_exit_allow(buf_pool);
2452	mutex_enter(block_mutex);
2453	count2++;
2454	}
2455	/ NOTE: if the page is in use,*
2456	not reallocated yet /*
2457	}
2458
2459	if (buf_page_get_state(bpage)
2460	== BUF_BLOCK_FILE_PAGE
2461	&& buf_block_will_withdrawn(
2462	buf_pool,
2463	reinterpret_cast<buf_block_t*>(bpage))) {
2464
2465	if (buf_page_can_relocate(bpage)) {
2466	mutex_exit(block_mutex);
2467	buf_pool_mutex_exit_forbid(buf_pool);
2468	if(!buf_page_realloc(
2469	buf_pool,
2470	reinterpret_cast<buf_block_t*>(
2471	bpage))) {
2472	/ failed to allocate block /
2473	buf_pool_mutex_exit_allow(
2474	buf_pool);
2475	break;
2476	}
2477	buf_pool_mutex_exit_allow(buf_pool);
2478	count2++;
2479	} else {
2480	mutex_exit(block_mutex);
2481	}
2482	/ NOTE: if the page is in use,*
2483	not reallocated yet /*
2484	} else {
2485	mutex_exit(block_mutex);
2486	}
2487
2488	bpage = next_bpage;
2489	}
2490	buf_pool_mutex_exit(buf_pool);
2491
2492	buf_resize_status(
2493	"buffer pool %lu : withdrawing blocks. (%lu/%lu)",
2494	i, UT_LIST_GET_LEN(buf_pool->withdraw),
2495	buf_pool->withdraw_target);
2496
2497	ib::info () << "buffer pool " << i << " : withdrew "
2498	<< count1 << " blocks from free list."
2499	<< " Tried to relocate " << count2 << " pages ("
2500	<< UT_LIST_GET_LEN(buf_pool->withdraw) << "/"
2501	<< buf_pool->withdraw_target << ").";
2502
2503	if (++loop_count >= `10`) {
2504	/ give up for now.*
2505	retried after user threads paused. /*
2506
2507	ib::info () << "buffer pool " << i
2508	<< " : will retry to withdraw later.";
2509
2510	/ need retry later /
2511	return(true);
2512	}
2513	}
2514
2515	/ confirm withdrawn enough /
2516	const buf_chunk_t* chunk
2517	= buf_pool->chunks + buf_pool->n_chunks_new;
2518	const buf_chunk_t* echunk
2519	= buf_pool->chunks + buf_pool->n_chunks;
2520
2521	while (chunk < echunk) {
2522	block = chunk->blocks;
2523	for (ulint j = chunk->size; j--; block++) {
2524	/ If !=BUF_BLOCK_NOT_USED block in the*
2525	withdrawn area, it means corruption
2526	something /*
2527	ut_a(buf_block_get_state(block)
2528	== BUF_BLOCK_NOT_USED);
2529	ut_ad(block->in_withdraw_list);
2530	}
2531	++chunk;
2532	}
2533
2534	ib::info () << "buffer pool " << i << " : withdrawn target "
2535	<< UT_LIST_GET_LEN(buf_pool->withdraw) << " blocks.";
2536
2537	/ retry is not needed /
2538	++buf_withdraw_clock;
2539
2540	return(false);
2541	}
2542
2543	/* resize page_hash and zip_hash for a buffer pool instance.*
2544	@param[in] buf_pool buffer pool instance /*
2545	static
2546	void
2547	buf_pool_resize_hash(
2548	buf_pool_t* buf_pool)
2549	{
2550	hash_table_t* new_hash_table;
2551
2552	ut_ad(buf_pool->page_hash_old == NULL);
2553
2554	/ recreate page_hash /
2555	new_hash_table = ib_recreate(
2556	buf_pool->page_hash, `2` * buf_pool->curr_size);
2557
2558	for (ulint i = `0`; i < hash_get_n_cells(buf_pool->page_hash); i++) {
2559	buf_page_t* bpage;
2560
2561	bpage = static_cast<buf_page_t*>(
2562	HASH_GET_FIRST(
2563	buf_pool->page_hash, i));
2564
2565	while (bpage) {
2566	buf_page_t* prev_bpage = bpage;
2567	ulint fold;
2568
2569	bpage = static_cast<buf_page_t*>(
2570	HASH_GET_NEXT(
2571	hash, prev_bpage));
2572
2573	fold = prev_bpage->id.fold();
2574
2575	HASH_DELETE(buf_page_t, hash,
2576	buf_pool->page_hash, fold,
2577	prev_bpage);
2578
2579	HASH_INSERT(buf_page_t, hash,
2580	new_hash_table, fold,
2581	prev_bpage);
2582	}
2583	}
2584
2585	buf_pool->page_hash_old = buf_pool->page_hash;
2586	buf_pool->page_hash = new_hash_table;
2587
2588	/ recreate zip_hash /
2589	new_hash_table = hash_create(`2` * buf_pool->curr_size);
2590
2591	for (ulint i = `0`; i < hash_get_n_cells(buf_pool->zip_hash); i++) {
2592	buf_page_t* bpage;
2593
2594	bpage = static_cast<buf_page_t*>(
2595	HASH_GET_FIRST(buf_pool->zip_hash, i));
2596
2597	while (bpage) {
2598	buf_page_t* prev_bpage = bpage;
2599	ulint fold;
2600
2601	bpage = static_cast<buf_page_t*>(
2602	HASH_GET_NEXT(
2603	hash, prev_bpage));
2604
2605	fold = BUF_POOL_ZIP_FOLD(
2606	reinterpret_cast<buf_block_t*>(
2607	prev_bpage));
2608
2609	HASH_DELETE(buf_page_t, hash,
2610	buf_pool->zip_hash, fold,
2611	prev_bpage);
2612
2613	HASH_INSERT(buf_page_t, hash,
2614	new_hash_table, fold,
2615	prev_bpage);
2616	}
2617	}
2618
2619	hash_table_free(buf_pool->zip_hash);
2620	buf_pool->zip_hash = new_hash_table;
2621	}
2622
2623	#ifndef DBUG_OFF
2624	/* This is a debug routine to inject an memory allocation failure error. /
2625	static
2626	void
2627	buf_pool_resize_chunk_make_null(buf_chunk_t** new_chunks)
2628	{
2629	static int count = `0`;
2630
2631	if (count == `1`) {
2632	ut_free(*new_chunks);
2633	*new_chunks = NULL;
2634	}
2635
2636	count++;
2637	}
2638	#endif // DBUG_OFF
2639
2640	/* Resize the buffer pool based on srv_buf_pool_size from*
2641	srv_buf_pool_old_size. /*
2642	static
2643	void
2644	buf_pool_resize()
2645	{
2646	buf_pool_t* buf_pool;
2647	ulint new_instance_size;
2648	bool warning = false;
2649
2650	NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
2651
2652	ut_ad(!buf_pool_resizing);
2653	ut_ad(!buf_pool_withdrawing);
2654	ut_ad(srv_buf_pool_chunk_unit > `0`);
2655
2656	new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
2657	new_instance_size >>= srv_page_size_shift;
2658
2659	buf_resize_status("Resizing buffer pool from " ULINTPF " to "
2660	ULINTPF " (unit=" ULINTPF ").",
2661	srv_buf_pool_old_size, srv_buf_pool_size,
2662	srv_buf_pool_chunk_unit);
2663
2664	/ set new limit for all buffer pool for resizing /
2665	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
2666	buf_pool = buf_pool_from_array(i);
2667	buf_pool_mutex_enter(buf_pool);
2668
2669	ut_ad(buf_pool->curr_size == buf_pool->old_size);
2670	ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
2671	ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == `0`);
2672	ut_ad(buf_pool->flush_rbt == NULL);
2673
2674	buf_pool->curr_size = new_instance_size;
2675
2676	buf_pool->n_chunks_new =
2677	(new_instance_size << srv_page_size_shift)
2678	/ srv_buf_pool_chunk_unit;
2679
2680	buf_pool_mutex_exit(buf_pool);
2681	}
2682	#ifdef BTR_CUR_HASH_ADAPT
2683	/ disable AHI if needed /
2684	bool btr_search_disabled = false;
2685
2686	buf_resize_status("Disabling adaptive hash index.");
2687
2688	btr_search_s_lock_all();
2689	if (btr_search_enabled) {
2690	btr_search_s_unlock_all();
2691	btr_search_disabled = true;
2692	} else {
2693	btr_search_s_unlock_all();
2694	}
2695
2696	btr_search_disable(true);
2697
2698	if (btr_search_disabled) {
2699	ib::info () << "disabled adaptive hash index.";
2700	}
2701	#endif /* BTR_CUR_HASH_ADAPT */
2702
2703	/ set withdraw target /
2704	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
2705	buf_pool = buf_pool_from_array(i);
2706	if (buf_pool->curr_size < buf_pool->old_size) {
2707	ulint withdraw_target = `0`;
2708
2709	const buf_chunk_t* chunk
2710	= buf_pool->chunks + buf_pool->n_chunks_new;
2711	const buf_chunk_t* echunk
2712	= buf_pool->chunks + buf_pool->n_chunks;
2713
2714	while (chunk < echunk) {
2715	withdraw_target += chunk->size;
2716	++chunk;
2717	}
2718
2719	ut_ad(buf_pool->withdraw_target == `0`);
2720	buf_pool->withdraw_target = withdraw_target;
2721	buf_pool_withdrawing = true;
2722	}
2723	}
2724
2725	buf_resize_status("Withdrawing blocks to be shrunken.");
2726
2727	ib_time_t withdraw_started = ut_time();
2728	ulint message_interval = `60`;
2729	ulint retry_interval = `1`;
2730
2731	withdraw_retry:
2732	bool should_retry_withdraw = false;
2733
2734	/ wait for the number of blocks fit to the new size (if needed)/
2735	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
2736	buf_pool = buf_pool_from_array(i);
2737	if (buf_pool->curr_size < buf_pool->old_size) {
2738
2739	should_retry_withdraw \|=
2740	buf_pool_withdraw_blocks(buf_pool);
2741	}
2742	}
2743
2744	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
2745	/ abort to resize for shutdown. /
2746	buf_pool_withdrawing = false;
2747	return;
2748	}
2749
2750	/ abort buffer pool load /
2751	buf_load_abort();
2752
2753	if (should_retry_withdraw
2754	&& ut_difftime(ut_time(), withdraw_started) >= message_interval) {
2755
2756	if (message_interval > `900`) {
2757	message_interval = `1800`;
2758	} else {
2759	message_interval *= `2`;
2760	}
2761
2762	lock_mutex_enter();
2763	mutex_enter(&trx_sys.mutex);
2764	bool found = false;
2765	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys.trx_list);
2766	trx != NULL;
2767	trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2768	if (trx->state != TRX_STATE_NOT_STARTED
2769	&& trx->mysql_thd != NULL
2770	&& ut_difftime(withdraw_started,
2771	trx->start_time) > `0`) {
2772	if (!found) {
2773	ib::warn () <<
2774	"The following trx might hold"
2775	" the blocks in buffer pool to"
2776	" be withdrawn. Buffer pool"
2777	" resizing can complete only"
2778	" after all the transactions"
2779	" below release the blocks.";
2780	found = true;
2781	}
2782
2783	lock_trx_print_wait_and_mvcc_state(
2784	stderr, trx);
2785	}
2786	}
2787	mutex_exit(&trx_sys.mutex);
2788	lock_mutex_exit();
2789
2790	withdraw_started = ut_time();
2791	}
2792
2793	if (should_retry_withdraw) {
2794	ib::info () << "Will retry to withdraw " << retry_interval
2795	<< " seconds later.";
2796	os_thread_sleep(retry_interval * `1000000`);
2797
2798	if (retry_interval > `5`) {
2799	retry_interval = `10`;
2800	} else {
2801	retry_interval *= `2`;
2802	}
2803
2804	goto withdraw_retry;
2805	}
2806
2807	buf_pool_withdrawing = false;
2808
2809	buf_resize_status("Latching whole of buffer pool.");
2810
2811	#ifndef DBUG_OFF
2812	{
2813	bool should_wait = true;
2814
2815	while (should_wait) {
2816	should_wait = false;
2817	DBUG_EXECUTE_IF(
2818	"ib_buf_pool_resize_wait_before_resize",
2819	should_wait = true; os_thread_sleep(`10000`););
2820	}
2821	}
2822	#endif /* !DBUG_OFF */
2823
2824	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
2825	return;
2826	}
2827
2828	/ Indicate critical path /
2829	buf_pool_resizing = true;
2830
2831	/ Acquire all buf_pool_mutex/hash_lock /
2832	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
2833	buf_pool_t* buf_pool = buf_pool_from_array(i);
2834
2835	buf_pool_mutex_enter(buf_pool);
2836	}
2837	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
2838	buf_pool_t* buf_pool = buf_pool_from_array(i);
2839
2840	hash_lock_x_all(buf_pool->page_hash);
2841	}
2842
2843	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t ());
2844
2845	/ add/delete chunks /
2846	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
2847	buf_pool_t* buf_pool = buf_pool_from_array(i);
2848	buf_chunk_t* chunk;
2849	buf_chunk_t* echunk;
2850
2851	buf_resize_status("buffer pool %lu :"
2852	" resizing with chunks %lu to %lu.",
2853	i, buf_pool->n_chunks, buf_pool->n_chunks_new);
2854
2855	if (buf_pool->n_chunks_new < buf_pool->n_chunks) {
2856	/ delete chunks /
2857	chunk = buf_pool->chunks
2858	+ buf_pool->n_chunks_new;
2859	echunk = buf_pool->chunks + buf_pool->n_chunks;
2860
2861	ulint sum_freed = `0`;
2862
2863	while (chunk < echunk) {
2864	buf_block_t* block = chunk->blocks;
2865
2866	for (ulint j = chunk->size;
2867	j--; block++) {
2868	mutex_free(&block->mutex);
2869	rw_lock_free(&block->lock);
2870
2871	ut_d(rw_lock_free(
2872	&block->debug_latch));
2873	}
2874
2875	buf_pool->allocator.deallocate_large(
2876	chunk->mem, &chunk->mem_pfx, true);
2877
2878	sum_freed += chunk->size;
2879
2880	++chunk;
2881	}
2882
2883	/ discard withdraw list /
2884	UT_LIST_INIT(buf_pool->withdraw,
2885	&buf_page_t::list);
2886	buf_pool->withdraw_target = `0`;
2887
2888	ib::info () << "buffer pool " << i << " : "
2889	<< buf_pool->n_chunks - buf_pool->n_chunks_new
2890	<< " chunks (" << sum_freed
2891	<< " blocks) were freed.";
2892
2893	buf_pool->n_chunks = buf_pool->n_chunks_new;
2894	}
2895
2896	{
2897	/ reallocate buf_pool->chunks /
2898	const ulint new_chunks_size
2899	= buf_pool->n_chunks_new * sizeof(*chunk);
2900
2901	buf_chunk_t* new_chunks
2902	= reinterpret_cast<buf_chunk_t*>(
2903	ut_zalloc_nokey_nofatal(new_chunks_size));
2904
2905	DBUG_EXECUTE_IF("buf_pool_resize_chunk_null",
2906	buf_pool_resize_chunk_make_null(&new_chunks););
2907
2908	if (new_chunks == NULL) {
2909	ib::error () << "buffer pool " << i
2910	<< " : failed to allocate"
2911	" the chunk array.";
2912	buf_pool->n_chunks_new
2913	= buf_pool->n_chunks;
2914	warning = true;
2915	buf_pool->chunks_old = NULL;
2916	for (ulint j = `0`; j < buf_pool->n_chunks_new; j++) {
2917	buf_pool_register_chunk(&(buf_pool->chunks[j]));
2918	}
2919	goto calc_buf_pool_size;
2920	}
2921
2922	ulint n_chunks_copy = ut_min(buf_pool->n_chunks_new,
2923	buf_pool->n_chunks);
2924
2925	memcpy(new_chunks, buf_pool->chunks,
2926	n_chunks_copy * sizeof(*chunk));
2927
2928	for (ulint j = `0`; j < n_chunks_copy; j++) {
2929	buf_pool_register_chunk(&new_chunks[j]);
2930	}
2931
2932	buf_pool->chunks_old = buf_pool->chunks;
2933	buf_pool->chunks = new_chunks;
2934	}
2935
2936
2937	if (buf_pool->n_chunks_new > buf_pool->n_chunks) {
2938	/ add chunks /
2939	chunk = buf_pool->chunks + buf_pool->n_chunks;
2940	echunk = buf_pool->chunks
2941	+ buf_pool->n_chunks_new;
2942
2943	ulint sum_added = `0`;
2944	ulint n_chunks = buf_pool->n_chunks;
2945
2946	while (chunk < echunk) {
2947	ulong unit = srv_buf_pool_chunk_unit;
2948
2949	if (!buf_chunk_init(buf_pool, chunk, unit)) {
2950
2951	ib::error () << "buffer pool " << i
2952	<< " : failed to allocate"
2953	" new memory.";
2954
2955	warning = true;
2956
2957	buf_pool->n_chunks_new
2958	= n_chunks;
2959
2960	break;
2961	}
2962
2963	sum_added += chunk->size;
2964
2965	++n_chunks;
2966	++chunk;
2967	}
2968
2969	ib::info () << "buffer pool " << i << " : "
2970	<< buf_pool->n_chunks_new - buf_pool->n_chunks
2971	<< " chunks (" << sum_added
2972	<< " blocks) were added.";
2973
2974	buf_pool->n_chunks = n_chunks;
2975	}
2976	calc_buf_pool_size:
2977
2978	/ recalc buf_pool->curr_size /
2979	ulint new_size = `0`;
2980
2981	chunk = buf_pool->chunks;
2982	do {
2983	new_size += chunk->size;
2984	} while (++chunk < buf_pool->chunks
2985	+ buf_pool->n_chunks);
2986
2987	buf_pool->curr_size = new_size;
2988	buf_pool->n_chunks_new = buf_pool->n_chunks;
2989
2990	if (buf_pool->chunks_old) {
2991	ut_free(buf_pool->chunks_old);
2992	buf_pool->chunks_old = NULL;
2993	}
2994	}
2995
2996	buf_pool_chunk_map_t* chunk_map_old = buf_chunk_map_ref;
2997	buf_chunk_map_ref = buf_chunk_map_reg;
2998
2999	/ set instance sizes /
3000	{
3001	ulint curr_size = `0`;
3002
3003	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
3004	buf_pool = buf_pool_from_array(i);
3005
3006	ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == `0`);
3007
3008	buf_pool->read_ahead_area =
3009	ut_min(BUF_READ_AHEAD_PAGES,
3010	ut_2_power_up(buf_pool->curr_size /
3011	BUF_READ_AHEAD_PORTION));
3012	buf_pool->curr_pool_size
3013	= buf_pool->curr_size << srv_page_size_shift;
3014	curr_size += buf_pool->curr_pool_size;
3015	buf_pool->old_size = buf_pool->curr_size;
3016	}
3017	srv_buf_pool_curr_size = curr_size;
3018	innodb_set_buf_pool_size(buf_pool_size_align(curr_size));
3019	}
3020
3021	const bool new_size_too_diff
3022	= srv_buf_pool_base_size > srv_buf_pool_size * `2`
3023	\|\| srv_buf_pool_base_size * `2` < srv_buf_pool_size;
3024
3025	/ Normalize page_hash and zip_hash,*
3026	if the new size is too different /*
3027	if (!warning && new_size_too_diff) {
3028
3029	buf_resize_status("Resizing hash tables.");
3030
3031	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
3032	buf_pool_t* buf_pool = buf_pool_from_array(i);
3033
3034	buf_pool_resize_hash(buf_pool);
3035
3036	ib::info () << "buffer pool " << i
3037	<< " : hash tables were resized.";
3038	}
3039	}
3040
3041	/ Release all buf_pool_mutex/page_hash /
3042	for (ulint i = `0`; i < srv_buf_pool_instances; ++i) {
3043	buf_pool_t* buf_pool = buf_pool_from_array(i);
3044
3045	hash_unlock_x_all(buf_pool->page_hash);
3046	buf_pool_mutex_exit(buf_pool);
3047
3048	if (buf_pool->page_hash_old != NULL) {
3049	hash_table_free(buf_pool->page_hash_old);
3050	buf_pool->page_hash_old = NULL;
3051	}
3052	}
3053
3054	UT_DELETE(chunk_map_old);
3055
3056	buf_pool_resizing = false;
3057
3058	/ Normalize other components, if the new size is too different /
3059	if (!warning && new_size_too_diff) {
3060	srv_buf_pool_base_size = srv_buf_pool_size;
3061
3062	buf_resize_status("Resizing also other hash tables.");
3063
3064	/ normalize lock_sys /
3065	srv_lock_table_size = `5`
3066	* (srv_buf_pool_size >> srv_page_size_shift);
3067	lock_sys.resize(srv_lock_table_size);
3068
3069	/ normalize btr_search_sys /
3070	btr_search_sys_resize(
3071	buf_pool_get_curr_size() / sizeof(void*) / `64`);
3072
3073	/ normalize dict_sys /
3074	dict_resize();
3075
3076	ib::info () << "Resized hash tables at lock_sys,"
3077	#ifdef BTR_CUR_HASH_ADAPT
3078	" adaptive hash index,"
3079	#endif /* BTR_CUR_HASH_ADAPT */
3080	" dictionary.";
3081	}
3082
3083	/ normalize ibuf->max_size /
3084	ibuf_max_size_update(srv_change_buffer_max_size);
3085
3086	if (srv_buf_pool_old_size != srv_buf_pool_size) {
3087
3088	ib::info () << "Completed to resize buffer pool from "
3089	<< srv_buf_pool_old_size
3090	<< " to " << srv_buf_pool_size << ".";
3091	srv_buf_pool_old_size = srv_buf_pool_size;
3092	}
3093
3094	#ifdef BTR_CUR_HASH_ADAPT
3095	/ enable AHI if needed /
3096	if (btr_search_disabled) {
3097	btr_search_enable();
3098	ib::info () << "Re-enabled adaptive hash index.";
3099	}
3100	#endif /* BTR_CUR_HASH_ADAPT */
3101
3102	char now[`32`];
3103
3104	ut_sprintf_timestamp(now);
3105	if (!warning) {
3106	buf_resize_status("Completed resizing buffer pool at %s.",
3107	now);
3108	} else {
3109	buf_resize_status("Resizing buffer pool failed,"
3110	" finished resizing at %s.", now);
3111	}
3112
3113	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
3114	ut_a(buf_validate());
3115	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
3116
3117	return;
3118	}
3119
3120	/* This is the thread for resizing buffer pool. It waits for an event and*
3121	when waked up either performs a resizing and sleeps again.
3122	@return this function does not return, calls os_thread_exit()
3123	*/
3124	extern "C"
3125	os_thread_ret_t
3126	DECLARE_THREAD(buf_resize_thread)(void*)
3127	{
3128	my_thread_init();
3129
3130	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
3131	os_event_wait(srv_buf_resize_event);
3132	os_event_reset(srv_buf_resize_event);
3133
3134	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
3135	break;
3136	}
3137
3138	buf_pool_mutex_enter_all();
3139	if (srv_buf_pool_old_size == srv_buf_pool_size) {
3140	buf_pool_mutex_exit_all();
3141	std::ostringstream sout;
3142	sout << "Size did not change (old size = new size = "
3143	<< srv_buf_pool_size << ". Nothing to do.";
3144	buf_resize_status(sout.str().c_str());
3145
3146	/ nothing to do /
3147	continue;
3148	}
3149	buf_pool_mutex_exit_all();
3150
3151	buf_pool_resize();
3152	}
3153
3154	srv_buf_resize_thread_active = false;
3155
3156	my_thread_end();
3157	os_thread_exit();
3158
3159	OS_THREAD_DUMMY_RETURN;
3160	}
3161
3162	#ifdef BTR_CUR_HASH_ADAPT
3163	/* Clear the adaptive hash index on all pages in the buffer pool. /
3164	void
3165	buf_pool_clear_hash_index()
3166	{
3167	ulint p;
3168
3169	ut_ad(btr_search_own_all(RW_LOCK_X));
3170	ut_ad(!buf_pool_resizing);
3171	ut_ad(!btr_search_enabled);
3172
3173	for (p = `0`; p < srv_buf_pool_instances; p++) {
3174	buf_pool_t* buf_pool = buf_pool_from_array(p);
3175	buf_chunk_t* chunks = buf_pool->chunks;
3176	buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
3177
3178	while (--chunk >= chunks) {
3179	buf_block_t* block = chunk->blocks;
3180	ulint i = chunk->size;
3181
3182	for (; i--; block++) {
3183	dict_index_t* index = block->index;
3184	assert_block_ahi_valid(block);
3185
3186	/ We can set block->index = NULL*
3187	and block->n_pointers = 0
3188	when btr_search_own_all(RW_LOCK_X);
3189	see the comments in buf0buf.h /*
3190
3191	if (!index) {
3192	# if defined UNIV_AHI_DEBUG \|\| defined UNIV_DEBUG
3193	ut_a(!block->n_pointers);
3194	# endif /* UNIV_AHI_DEBUG \|\| UNIV_DEBUG */
3195	continue;
3196	}
3197
3198	ut_d(buf_page_state state
3199	= buf_block_get_state(block));
3200	/ Another thread may have set the*
3201	state to BUF_BLOCK_REMOVE_HASH in
3202	buf_LRU_block_remove_hashed().
3203
3204	The state change in buf_page_realloc()
3205	is not observable here, because in
3206	that case we would have !block->index.
3207
3208	In the end, the entire adaptive hash
3209	index will be removed. /*
3210	ut_ad(state == BUF_BLOCK_FILE_PAGE
3211	\|\| state == BUF_BLOCK_REMOVE_HASH);
3212	# if defined UNIV_AHI_DEBUG \|\| defined UNIV_DEBUG
3213	block->n_pointers = `0`;
3214	# endif /* UNIV_AHI_DEBUG \|\| UNIV_DEBUG */
3215	block->index = NULL;
3216	}
3217	}
3218	}
3219	}
3220	#endif /* BTR_CUR_HASH_ADAPT */
3221
3222	/******************************************************************//**
3223	Relocate a buffer control block. Relocates the block on the LRU list
3224	and in buf_pool->page_hash. Does not relocate bpage->list.
3225	The caller must take care of relocating bpage->list. /*
3226	static
3227	void
3228	buf_relocate(
3229	/=========/
3230	buf_page_t* bpage, /!< in/out: control block being relocated;*
3231	buf_page_get_state(bpage) must be
3232	BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE /*
3233	buf_page_t* dpage) /!< in/out: destination control block /
3234	{
3235	buf_page_t* b;
3236	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3237
3238	ut_ad(buf_pool_mutex_own(buf_pool));
3239	ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
3240	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3241	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3242	ut_a(bpage->buf_fix_count == `0`);
3243	ut_ad(bpage->in_LRU_list);
3244	ut_ad(!bpage->in_zip_hash);
3245	ut_ad(bpage->in_page_hash);
3246	ut_ad(bpage == buf_page_hash_get_low(buf_pool, bpage->id));
3247
3248	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
3249	#ifdef UNIV_DEBUG
3250	switch (buf_page_get_state(bpage)) {
3251	case BUF_BLOCK_POOL_WATCH:
3252	case BUF_BLOCK_NOT_USED:
3253	case BUF_BLOCK_READY_FOR_USE:
3254	case BUF_BLOCK_FILE_PAGE:
3255	case BUF_BLOCK_MEMORY:
3256	case BUF_BLOCK_REMOVE_HASH:
3257	ut_error;
3258	case BUF_BLOCK_ZIP_DIRTY:
3259	case BUF_BLOCK_ZIP_PAGE:
3260	break;
3261	}
3262	#endif /* UNIV_DEBUG */
3263
3264	memcpy(dpage, bpage, sizeof *dpage);
3265
3266	/ Important that we adjust the hazard pointer before*
3267	removing bpage from LRU list. /*
3268	buf_LRU_adjust_hp(buf_pool, bpage);
3269
3270	ut_d(bpage->in_LRU_list = FALSE);
3271	ut_d(bpage->in_page_hash = FALSE);
3272
3273	/ relocate buf_pool->LRU /
3274	b = UT_LIST_GET_PREV(LRU, bpage);
3275	UT_LIST_REMOVE(buf_pool->LRU, bpage);
3276
3277	if (b != NULL) {
3278	UT_LIST_INSERT_AFTER(buf_pool->LRU, b, dpage);
3279	} else {
3280	UT_LIST_ADD_FIRST(buf_pool->LRU, dpage);
3281	}
3282
3283	if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
3284	buf_pool->LRU_old = dpage;
3285	#ifdef UNIV_LRU_DEBUG
3286	/ buf_pool->LRU_old must be the first item in the LRU list*
3287	whose "old" flag is set. /*
3288	ut_a(buf_pool->LRU_old->old);
3289	ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
3290	\|\| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
3291	ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
3292	\|\| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
3293	} else {
3294	/ Check that the "old" flag is consistent in*
3295	the block and its neighbours. /*
3296	buf_page_set_old(dpage, buf_page_is_old(dpage));
3297	#endif /* UNIV_LRU_DEBUG */
3298	}
3299
3300	ut_d(CheckInLRUList::validate(buf_pool));
3301
3302	/ relocate buf_pool->page_hash /
3303	ulint fold = bpage->id.fold();
3304	ut_ad(fold == dpage->id.fold());
3305	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
3306	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
3307	}
3308
3309	/* Hazard Pointer implementation. /
3310
3311	/* Set current value*
3312	@param bpage buffer block to be set as hp /*
3313	void
3314	HazardPointer::set(buf_page_t* bpage)
3315	{
3316	ut_ad(mutex_own(m_mutex));
3317	ut_ad(!bpage \|\| buf_pool_from_bpage(bpage) == m_buf_pool);
3318	ut_ad(!bpage \|\| buf_page_in_file(bpage));
3319
3320	m_hp = bpage;
3321	}
3322
3323	/* Checks if a bpage is the hp*
3324	@param bpage buffer block to be compared
3325	@return true if it is hp /*
3326
3327	bool
3328	HazardPointer::is_hp(const buf_page_t* bpage)
3329	{
3330	ut_ad(mutex_own(m_mutex));
3331	ut_ad(!m_hp \|\| buf_pool_from_bpage(m_hp) == m_buf_pool);
3332	ut_ad(!bpage \|\| buf_pool_from_bpage(bpage) == m_buf_pool);
3333
3334	return(bpage == m_hp);
3335	}
3336
3337	/* Adjust the value of hp. This happens when some other thread working*
3338	on the same list attempts to remove the hp from the list.
3339	@param bpage buffer block to be compared /*
3340
3341	void
3342	FlushHp::adjust(const buf_page_t* bpage)
3343	{
3344	ut_ad(bpage != NULL);
3345
3346	/* We only support reverse traversal for now. /
3347	if (is_hp(bpage)) {
3348	m_hp = UT_LIST_GET_PREV(list, m_hp);
3349	}
3350
3351	ut_ad(!m_hp \|\| m_hp->in_flush_list);
3352	}
3353
3354	/* Adjust the value of hp. This happens when some other thread working*
3355	on the same list attempts to remove the hp from the list.
3356	@param bpage buffer block to be compared /*
3357
3358	void
3359	LRUHp::adjust(const buf_page_t* bpage)
3360	{
3361	ut_ad(bpage);
3362
3363	/* We only support reverse traversal for now. /
3364	if (is_hp(bpage)) {
3365	m_hp = UT_LIST_GET_PREV(LRU, m_hp);
3366	}
3367
3368	ut_ad(!m_hp \|\| m_hp->in_LRU_list);
3369	}
3370
3371	/* Selects from where to start a scan. If we have scanned too deep into*
3372	the LRU list it resets the value to the tail of the LRU list.
3373	@return buf_page_t from where to start scan. /*
3374
3375	buf_page_t*
3376	LRUItr::start()
3377	{
3378	ut_ad(mutex_own(m_mutex));
3379
3380	if (!m_hp \|\| m_hp->old) {
3381	m_hp = UT_LIST_GET_LAST(m_buf_pool->LRU);
3382	}
3383
3384	return(m_hp);
3385	}
3386
3387	/* Determine if a block is a sentinel for a buffer pool watch.*
3388	@param[in] buf_pool buffer pool instance
3389	@param[in] bpage block
3390	@return TRUE if a sentinel for a buffer pool watch, FALSE if not /*
3391	ibool
3392	buf_pool_watch_is_sentinel(
3393	const buf_pool_t* buf_pool,
3394	const buf_page_t* bpage)
3395	{
3396	/ We must also own the appropriate hash lock. /
3397	ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
3398	ut_ad(buf_page_in_file(bpage));
3399
3400	if (bpage < &buf_pool->watch[`0`]
3401	\|\| bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
3402
3403	ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
3404	\|\| bpage->zip.data != NULL);
3405
3406	return(FALSE);
3407	}
3408
3409	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
3410	ut_ad(!bpage->in_zip_hash);
3411	ut_ad(bpage->in_page_hash);
3412	ut_ad(bpage->zip.data == NULL);
3413	return(TRUE);
3414	}
3415
3416	/* Add watch for the given page to be read in. Caller must have*
3417	appropriate hash_lock for the bpage. This function may release the
3418	hash_lock and reacquire it.
3419	@param[in] page_id page id
3420	@param[in,out] hash_lock hash_lock currently latched
3421	@return NULL if watch set, block if the page is in the buffer pool /*
3422	static
3423	buf_page_t*
3424	buf_pool_watch_set(
3425	const page_id_t& page_id,
3426	rw_lock_t** hash_lock)
3427	{
3428	buf_page_t* bpage;
3429	ulint i;
3430	buf_pool_t* buf_pool = buf_pool_get(page_id);
3431
3432	ut_ad(*hash_lock == buf_page_hash_lock_get(buf_pool, page_id));
3433
3434	ut_ad(rw_lock_own(*hash_lock, RW_LOCK_X));
3435
3436	bpage = buf_page_hash_get_low(buf_pool, page_id);
3437
3438	if (bpage != NULL) {
3439	page_found:
3440	if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3441	/ The page was loaded meanwhile. /
3442	return(bpage);
3443	}
3444
3445	/ Add to an existing watch. /
3446	buf_block_fix(bpage);
3447	return(NULL);
3448	}
3449
3450	/ From this point this function becomes fairly heavy in terms*
3451	of latching. We acquire the buf_pool mutex as well as all the
3452	hash_locks. buf_pool mutex is needed because any changes to
3453	the page_hash must be covered by it and hash_locks are needed
3454	because we don't want to read any stale information in
3455	buf_pool->watch[]. However, it is not in the critical code path
3456	as this function will be called only by the purge thread. /*
3457
3458	/ To obey latching order first release the hash_lock. /
3459	rw_lock_x_unlock(*hash_lock);
3460
3461	buf_pool_mutex_enter(buf_pool);
3462	hash_lock_x_all(buf_pool->page_hash);
3463
3464	/ If not own buf_pool_mutex, page_hash can be changed. /
3465	*hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
3466
3467	/ We have to recheck that the page*
3468	was not loaded or a watch set by some other
3469	purge thread. This is because of the small
3470	time window between when we release the
3471	hash_lock to acquire buf_pool mutex above. /*
3472
3473	bpage = buf_page_hash_get_low(buf_pool, page_id);
3474	if (UNIV_LIKELY_NULL(bpage)) {
3475	buf_pool_mutex_exit(buf_pool);
3476	hash_unlock_x_all_but(buf_pool->page_hash, *hash_lock);
3477	goto page_found;
3478	}
3479
3480	/ The maximum number of purge threads should never exceed*
3481	BUF_POOL_WATCH_SIZE. So there is no way for purge thread
3482	instance to hold a watch when setting another watch. /*
3483	for (i = `0`; i < BUF_POOL_WATCH_SIZE; i++) {
3484	bpage = &buf_pool->watch[i];
3485
3486	ut_ad(bpage->access_time == `0`);
3487	ut_ad(bpage->newest_modification == `0`);
3488	ut_ad(bpage->oldest_modification == `0`);
3489	ut_ad(bpage->zip.data == NULL);
3490	ut_ad(!bpage->in_zip_hash);
3491
3492	switch (bpage->state) {
3493	case BUF_BLOCK_POOL_WATCH:
3494	ut_ad(!bpage->in_page_hash);
3495	ut_ad(bpage->buf_fix_count == `0`);
3496
3497	/ bpage is pointing to buf_pool->watch[],*
3498	which is protected by buf_pool->mutex.
3499	Normally, buf_page_t objects are protected by
3500	buf_block_t::mutex or buf_pool->zip_mutex or both. /*
3501
3502	bpage->state = BUF_BLOCK_ZIP_PAGE;
3503	bpage->id.copy_from(page_id);
3504	bpage->buf_fix_count = `1`;
3505
3506	ut_d(bpage->in_page_hash = TRUE);
3507	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3508	page_id.fold(), bpage);
3509
3510	buf_pool_mutex_exit(buf_pool);
3511	/ Once the sentinel is in the page_hash we can*
3512	safely release all locks except just the
3513	relevant hash_lock /*
3514	hash_unlock_x_all_but(buf_pool->page_hash,
3515	*hash_lock);
3516
3517	return(NULL);
3518	case BUF_BLOCK_ZIP_PAGE:
3519	ut_ad(bpage->in_page_hash);
3520	ut_ad(bpage->buf_fix_count > `0`);
3521	break;
3522	default:
3523	ut_error;
3524	}
3525	}
3526
3527	/ Allocation failed. Either the maximum number of purge*
3528	threads should never exceed BUF_POOL_WATCH_SIZE, or this code
3529	should be modified to return a special non-NULL value and the
3530	caller should purge the record directly. /*
3531	ut_error;
3532
3533	/ Fix compiler warning /
3534	return(NULL);
3535	}
3536
3537	/* Remove the sentinel block for the watch before replacing it with a*
3538	real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
3539	that the block has been replaced with the real block.
3540	@param[in,out] buf_pool buffer pool instance
3541	@param[in,out] watch sentinel for watch
3542	@return reference count, to be added to the replacement block /*
3543	static
3544	void
3545	buf_pool_watch_remove(
3546	buf_pool_t* buf_pool,
3547	buf_page_t* watch)
3548	{
3549	#ifdef UNIV_DEBUG
3550	/ We must also own the appropriate hash_bucket mutex. /
3551	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, watch->id);
3552	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
3553	#endif /* UNIV_DEBUG */
3554
3555	ut_ad(buf_pool_mutex_own(buf_pool));
3556
3557	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, watch->id.fold(),
3558	watch);
3559	ut_d(watch->in_page_hash = FALSE);
3560	watch->buf_fix_count = `0`;
3561	watch->state = BUF_BLOCK_POOL_WATCH;
3562	}
3563
3564	/* Stop watching if the page has been read in.*
3565	buf_pool_watch_set(same_page_id) must have returned NULL before.
3566	@param[in] page_id page id /*
3567	void
3568	buf_pool_watch_unset(
3569	const page_id_t& page_id)
3570	{
3571	buf_page_t* bpage;
3572	buf_pool_t* buf_pool = buf_pool_get(page_id);
3573
3574	/ We only need to have buf_pool mutex in case where we end*
3575	up calling buf_pool_watch_remove but to obey latching order
3576	we acquire it here before acquiring hash_lock. This should
3577	not cause too much grief as this function is only ever
3578	called from the purge thread. /*
3579	buf_pool_mutex_enter(buf_pool);
3580
3581	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
3582	rw_lock_x_lock(hash_lock);
3583
3584	/ The page must exist because buf_pool_watch_set()*
3585	increments buf_fix_count. /*
3586	bpage = buf_page_hash_get_low(buf_pool, page_id);
3587
3588	if (buf_block_unfix(bpage) == `0`
3589	&& buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3590	buf_pool_watch_remove(buf_pool, bpage);
3591	}
3592
3593	buf_pool_mutex_exit(buf_pool);
3594	rw_lock_x_unlock(hash_lock);
3595	}
3596
3597	/* Check if the page has been read in.*
3598	This may only be called after buf_pool_watch_set(same_page_id)
3599	has returned NULL and before invoking buf_pool_watch_unset(same_page_id).
3600	@param[in] page_id page id
3601	@return FALSE if the given page was not read in, TRUE if it was /*
3602	ibool
3603	buf_pool_watch_occurred(
3604	const page_id_t& page_id)
3605	{
3606	ibool ret;
3607	buf_page_t* bpage;
3608	buf_pool_t* buf_pool = buf_pool_get(page_id);
3609	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
3610
3611	rw_lock_s_lock(hash_lock);
3612
3613	/ If not own buf_pool_mutex, page_hash can be changed. /
3614	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
3615
3616	/ The page must exist because buf_pool_watch_set()*
3617	increments buf_fix_count. /*
3618	bpage = buf_page_hash_get_low(buf_pool, page_id);
3619
3620	ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
3621	rw_lock_s_unlock(hash_lock);
3622
3623	return(ret);
3624	}
3625
3626	/******************************************************************//**
3627	Moves a page to the start of the buffer pool LRU list. This high-level
3628	function can be used to prevent an important page from slipping out of
3629	the buffer pool. /*
3630	void
3631	buf_page_make_young(
3632	/================/
3633	buf_page_t* bpage) /!< in: buffer block of a file page /
3634	{
3635	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3636
3637	buf_pool_mutex_enter(buf_pool);
3638
3639	ut_a(buf_page_in_file(bpage));
3640
3641	buf_LRU_make_block_young(bpage);
3642
3643	buf_pool_mutex_exit(buf_pool);
3644	}
3645
3646	/******************************************************************//**
3647	Moves a page to the start of the buffer pool LRU list if it is too old.
3648	This high-level function can be used to prevent an important page from
3649	slipping out of the buffer pool. /*
3650	static
3651	void
3652	buf_page_make_young_if_needed(
3653	/==========================/
3654	buf_page_t* bpage) /!< in/out: buffer block of a*
3655	file page /*
3656	{
3657	#ifdef UNIV_DEBUG
3658	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3659	ut_ad(!buf_pool_mutex_own(buf_pool));
3660	#endif /* UNIV_DEBUG */
3661	ut_a(buf_page_in_file(bpage));
3662
3663	if (buf_page_peek_if_too_old(bpage)) {
3664	buf_page_make_young(bpage);
3665	}
3666	}
3667
3668	#ifdef UNIV_DEBUG
3669
3670	/* Sets file_page_was_freed TRUE if the page is found in the buffer pool.*
3671	This function should be called when we free a file page and want the
3672	debug version to check that it is not accessed any more unless
3673	reallocated.
3674	@param[in] page_id page id
3675	@return control block if found in page hash table, otherwise NULL /*
3676	buf_page_t*
3677	buf_page_set_file_page_was_freed(
3678	const page_id_t& page_id)
3679	{
3680	buf_page_t* bpage;
3681	buf_pool_t* buf_pool = buf_pool_get(page_id);
3682	rw_lock_t* hash_lock;
3683
3684	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
3685
3686	if (bpage) {
3687	BPageMutex* block_mutex = buf_page_get_mutex(bpage);
3688	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
3689	mutex_enter(block_mutex);
3690	rw_lock_s_unlock(hash_lock);
3691	/ bpage->file_page_was_freed can already hold*
3692	when this code is invoked from dict_drop_index_tree() /*
3693	bpage->file_page_was_freed = TRUE;
3694	mutex_exit(block_mutex);
3695	}
3696
3697	return(bpage);
3698	}
3699
3700	/* Sets file_page_was_freed FALSE if the page is found in the buffer pool.*
3701	This function should be called when we free a file page and want the
3702	debug version to check that it is not accessed any more unless
3703	reallocated.
3704	@param[in] page_id page id
3705	@return control block if found in page hash table, otherwise NULL /*
3706	buf_page_t*
3707	buf_page_reset_file_page_was_freed(
3708	const page_id_t& page_id)
3709	{
3710	buf_page_t* bpage;
3711	buf_pool_t* buf_pool = buf_pool_get(page_id);
3712	rw_lock_t* hash_lock;
3713
3714	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
3715	if (bpage) {
3716	BPageMutex* block_mutex = buf_page_get_mutex(bpage);
3717	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
3718	mutex_enter(block_mutex);
3719	rw_lock_s_unlock(hash_lock);
3720	bpage->file_page_was_freed = FALSE;
3721	mutex_exit(block_mutex);
3722	}
3723
3724	return(bpage);
3725	}
3726	#endif /* UNIV_DEBUG */
3727
3728	/* Attempts to discard the uncompressed frame of a compressed page.*
3729	The caller should not be holding any mutexes when this function is called.
3730	@param[in] page_id page id
3731	@return TRUE if successful, FALSE otherwise. /*
3732	static
3733	void
3734	buf_block_try_discard_uncompressed(
3735	const page_id_t& page_id)
3736	{
3737	buf_page_t* bpage;
3738	buf_pool_t* buf_pool = buf_pool_get(page_id);
3739
3740	/ Since we need to acquire buf_pool mutex to discard*
3741	the uncompressed frame and because page_hash mutex resides
3742	below buf_pool mutex in sync ordering therefore we must
3743	first release the page_hash mutex. This means that the
3744	block in question can move out of page_hash. Therefore
3745	we need to check again if the block is still in page_hash. /*
3746	buf_pool_mutex_enter(buf_pool);
3747
3748	bpage = buf_page_hash_get(buf_pool, page_id);
3749
3750	if (bpage) {
3751	buf_LRU_free_page(bpage, false);
3752	}
3753
3754	buf_pool_mutex_exit(buf_pool);
3755	}
3756
3757	/* Get read access to a compressed page (usually of type*
3758	FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
3759	The page must be released with buf_page_release_zip().
3760	NOTE: the page is not protected by any latch. Mutual exclusion has to
3761	be implemented at a higher level. In other words, all possible
3762	accesses to a given page through this function must be protected by
3763	the same set of mutexes or latches.
3764	@param[in] page_id page id
3765	@param[in] page_size page size
3766	@return pointer to the block /*
3767	buf_page_t*
3768	buf_page_get_zip(
3769	const page_id_t& page_id,
3770	const page_size_t& page_size)
3771	{
3772	buf_page_t* bpage;
3773	BPageMutex* block_mutex;
3774	rw_lock_t* hash_lock;
3775	ibool discard_attempted = FALSE;
3776	ibool must_read;
3777	buf_pool_t* buf_pool = buf_pool_get(page_id);
3778
3779	buf_pool->stat.n_page_gets++;
3780
3781	for (;;) {
3782	lookup:
3783
3784	/ The following call will also grab the page_hash*
3785	mutex if the page is found. /*
3786	bpage = buf_page_hash_get_s_locked(buf_pool, page_id,
3787	&hash_lock);
3788	if (bpage) {
3789	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
3790	break;
3791	}
3792
3793	/ Page not in buf_pool: needs to be read from file /
3794
3795	ut_ad(!hash_lock);
3796	dberr_t err = buf_read_page(page_id, page_size);
3797
3798	if (err != DB_SUCCESS) {
3799	ib::error () << "Reading compressed page " << page_id
3800	<< " failed with error: " << ut_strerr(err);
3801
3802	goto err_exit;
3803	}
3804
3805	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
3806	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
3807	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
3808	}
3809
3810	ut_ad(buf_page_hash_lock_held_s(buf_pool, bpage));
3811
3812	if (!bpage->zip.data) {
3813	/ There is no compressed page. /
3814	err_exit:
3815	rw_lock_s_unlock(hash_lock);
3816	return(NULL);
3817	}
3818
3819	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
3820
3821	switch (buf_page_get_state(bpage)) {
3822	case BUF_BLOCK_POOL_WATCH:
3823	case BUF_BLOCK_NOT_USED:
3824	case BUF_BLOCK_READY_FOR_USE:
3825	case BUF_BLOCK_MEMORY:
3826	case BUF_BLOCK_REMOVE_HASH:
3827	ut_error;
3828
3829	case BUF_BLOCK_ZIP_PAGE:
3830	case BUF_BLOCK_ZIP_DIRTY:
3831	buf_block_fix(bpage);
3832	block_mutex = &buf_pool->zip_mutex;
3833	mutex_enter(block_mutex);
3834	goto got_block;
3835	case BUF_BLOCK_FILE_PAGE:
3836	/ Discard the uncompressed page frame if possible. /
3837	if (!discard_attempted) {
3838	rw_lock_s_unlock(hash_lock);
3839	buf_block_try_discard_uncompressed(page_id);
3840	discard_attempted = TRUE;
3841	goto lookup;
3842	}
3843
3844	buf_block_buf_fix_inc((buf_block_t*) bpage,
3845	__FILE__, __LINE__);
3846
3847	block_mutex = &((buf_block_t*) bpage)->mutex;
3848
3849	mutex_enter(block_mutex);
3850
3851	goto got_block;
3852	}
3853
3854	ut_error;
3855	goto err_exit;
3856
3857	got_block:
3858	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
3859
3860	rw_lock_s_unlock(hash_lock);
3861
3862	ut_ad(!bpage->file_page_was_freed);
3863
3864	buf_page_set_accessed(bpage);
3865
3866	mutex_exit(block_mutex);
3867
3868	buf_page_make_young_if_needed(bpage);
3869
3870	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
3871	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
3872	ut_a(bpage->buf_fix_count > `0`);
3873	ut_a(buf_page_in_file(bpage));
3874	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
3875
3876	if (must_read) {
3877	/ Let us wait until the read operation*
3878	completes /*
3879
3880	for (;;) {
3881	enum buf_io_fix io_fix;
3882
3883	mutex_enter(block_mutex);
3884	io_fix = buf_page_get_io_fix(bpage);
3885	mutex_exit(block_mutex);
3886
3887	if (io_fix == BUF_IO_READ) {
3888
3889	os_thread_sleep(WAIT_FOR_READ);
3890	} else {
3891	break;
3892	}
3893	}
3894	}
3895
3896	#ifdef UNIV_IBUF_COUNT_DEBUG
3897	ut_a(ibuf_count_get(page_id) == `0`);
3898	#endif /* UNIV_IBUF_COUNT_DEBUG */
3899
3900	return(bpage);
3901	}
3902
3903	/******************************************************************//**
3904	Initialize some fields of a control block. /*
3905	UNIV_INLINE
3906	void
3907	buf_block_init_low(
3908	/===============/
3909	buf_block_t* block) /!< in: block to init /
3910	{
3911	block->skip_flush_check = false;
3912	#ifdef BTR_CUR_HASH_ADAPT
3913	/ No adaptive hash index entries may point to a previously*
3914	unused (and now freshly allocated) block. /*
3915	assert_block_ahi_empty_on_init(block);
3916	block->index = NULL;
3917
3918	block->n_hash_helps = `0`;
3919	block->n_fields = `1`;
3920	block->n_bytes = `0`;
3921	block->left_side = TRUE;
3922	#endif /* BTR_CUR_HASH_ADAPT */
3923	}
3924
3925	/******************************************************************//**
3926	Decompress a block.
3927	@return TRUE if successful /*
3928	ibool
3929	buf_zip_decompress(
3930	/===============/
3931	buf_block_t* block, /!< in/out: block /
3932	ibool check) /!< in: TRUE=verify the page checksum /
3933	{
3934	const byte* frame = block->page.zip.data;
3935	ulint size = page_zip_get_size(&block->page.zip);
3936	/ The tablespace will not be found if this function is called*
3937	during IMPORT. /*
3938	fil_space_t* space = fil_space_acquire_for_io(block->page.id.space());
3939	const unsigned key_version = mach_read_from_4(
3940	frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
3941	fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL;
3942	const bool encrypted = crypt_data
3943	&& crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
3944	&& (!crypt_data->is_default_encryption()
3945	\|\| srv_encrypt_tables);
3946
3947	ut_ad(block->page.size.is_compressed());
3948	ut_a(block->page.id.space() != `0`);
3949
3950	if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
3951
3952	ib::error () << "Compressed page checksum mismatch for "
3953	<< (space ? space->chain.start->name : "")
3954	<< block->page.id << ": stored: "
3955	<< mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
3956	<< ", crc32: "
3957	<< page_zip_calc_checksum(
3958	frame, size, SRV_CHECKSUM_ALGORITHM_CRC32)
3959	<< "/"
3960	<< page_zip_calc_checksum(
3961	frame, size, SRV_CHECKSUM_ALGORITHM_CRC32,
3962	true)
3963	<< " innodb: "
3964	<< page_zip_calc_checksum(
3965	frame, size, SRV_CHECKSUM_ALGORITHM_INNODB)
3966	<< ", none: "
3967	<< page_zip_calc_checksum(
3968	frame, size, SRV_CHECKSUM_ALGORITHM_NONE);
3969
3970	goto err_exit;
3971	}
3972
3973	switch (fil_page_get_type(frame)) {
3974	case FIL_PAGE_INDEX:
3975	case FIL_PAGE_RTREE:
3976	if (page_zip_decompress(&block->page.zip,
3977	block->frame, TRUE)) {
3978	if (space) {
3979	space->release_for_io();
3980	}
3981	return(TRUE);
3982	}
3983
3984	ib::error () << "Unable to decompress "
3985	<< (space ? space->chain.start->name : "")
3986	<< block->page.id;
3987	goto err_exit;
3988	case FIL_PAGE_TYPE_ALLOCATED:
3989	case FIL_PAGE_INODE:
3990	case FIL_PAGE_IBUF_BITMAP:
3991	case FIL_PAGE_TYPE_FSP_HDR:
3992	case FIL_PAGE_TYPE_XDES:
3993	case FIL_PAGE_TYPE_ZBLOB:
3994	case FIL_PAGE_TYPE_ZBLOB2:
3995	/ Copy to uncompressed storage. /
3996	memcpy(block->frame, frame, block->page.size.physical());
3997	if (space) {
3998	space->release_for_io();
3999	}
4000
4001	return(TRUE);
4002	}
4003
4004	ib::error () << "Unknown compressed page type "
4005	<< fil_page_get_type(frame)
4006	<< " in " << (space ? space->chain.start->name : "")
4007	<< block->page.id;
4008
4009	err_exit:
4010	if (encrypted) {
4011	ib::info () << "Row compressed page could be encrypted"
4012	" with key_version " << key_version;
4013	block->page.encrypted = true;
4014	}
4015
4016	if (space) {
4017	if (encrypted) {
4018	dict_set_encrypted_by_space(space);
4019	} else {
4020	dict_set_corrupted_by_space(space);
4021	}
4022
4023	space->release_for_io();
4024	}
4025
4026	return(FALSE);
4027	}
4028
4029	#ifdef BTR_CUR_HASH_ADAPT
4030	/* Get a buffer block from an adaptive hash index pointer.*
4031	This function does not return if the block is not identified.
4032	@param[in] ptr pointer to within a page frame
4033	@return pointer to block, never NULL /*
4034	buf_block_t*
4035	buf_block_from_ahi(const byte* ptr)
4036	{
4037	buf_pool_chunk_map_t::iterator it;
4038
4039	buf_pool_chunk_map_t* chunk_map = buf_chunk_map_ref;
4040	ut_ad(buf_chunk_map_ref == buf_chunk_map_reg);
4041	ut_ad(!buf_pool_resizing);
4042
4043	buf_chunk_t* chunk;
4044	it = chunk_map->upper_bound(ptr);
4045
4046	ut_a(it != chunk_map->begin());
4047
4048	if (it == chunk_map->end()) {
4049	chunk = chunk_map->rbegin()->second;
4050	} else {
4051	chunk = (--it)->second;
4052	}
4053
4054	ulint offs = ulint(ptr - chunk->blocks->frame);
4055
4056	offs >>= srv_page_size_shift;
4057
4058	ut_a(offs < chunk->size);
4059
4060	buf_block_t* block = &chunk->blocks[offs];
4061
4062	/ The function buf_chunk_init() invokes buf_block_init() so that*
4063	block[n].frame == block->frame + n srv_page_size. Check it. /
4064	ut_ad(block->frame == page_align(ptr));
4065	/ Read the state of the block without holding a mutex.*
4066	A state transition from BUF_BLOCK_FILE_PAGE to
4067	BUF_BLOCK_REMOVE_HASH is possible during this execution. /*
4068	ut_d(const buf_page_state state = buf_block_get_state(block));
4069	ut_ad(state == BUF_BLOCK_FILE_PAGE \|\| state == BUF_BLOCK_REMOVE_HASH);
4070	return(block);
4071	}
4072	#endif /* BTR_CUR_HASH_ADAPT */
4073
4074	/******************************************************************//**
4075	Find out if a pointer belongs to a buf_block_t. It can be a pointer to
4076	the buf_block_t itself or a member of it. This functions checks one of
4077	the buffer pool instances.
4078	@return TRUE if ptr belongs to a buf_block_t struct /*
4079	static
4080	ibool
4081	buf_pointer_is_block_field_instance(
4082	/================================/
4083	buf_pool_t* buf_pool, /!< in: buffer pool instance /
4084	const void* ptr) /!< in: pointer not dereferenced /
4085	{
4086	const buf_chunk_t* chunk = buf_pool->chunks;
4087	const buf_chunk_t* const echunk = chunk + ut_min(
4088	buf_pool->n_chunks, buf_pool->n_chunks_new);
4089
4090	/ TODO: protect buf_pool->chunks with a mutex (the older pointer will*
4091	currently remain while during buf_pool_resize()) /*
4092	while (chunk < echunk) {
4093	if (ptr >= (void*) chunk->blocks
4094	&& ptr < (void*) (chunk->blocks + chunk->size)) {
4095
4096	return(TRUE);
4097	}
4098
4099	chunk++;
4100	}
4101
4102	return(FALSE);
4103	}
4104
4105	/******************************************************************//**
4106	Find out if a pointer belongs to a buf_block_t. It can be a pointer to
4107	the buf_block_t itself or a member of it
4108	@return TRUE if ptr belongs to a buf_block_t struct /*
4109	ibool
4110	buf_pointer_is_block_field(
4111	/=======================/
4112	const void* ptr) /!< in: pointer not dereferenced /
4113	{
4114	ulint i;
4115
4116	for (i = `0`; i < srv_buf_pool_instances; i++) {
4117	ibool found;
4118
4119	found = buf_pointer_is_block_field_instance(
4120	buf_pool_from_array(i), ptr);
4121	if (found) {
4122	return(TRUE);
4123	}
4124	}
4125
4126	return(FALSE);
4127	}
4128
4129	/******************************************************************//**
4130	Find out if a buffer block was created by buf_chunk_init().
4131	@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() /*
4132	static
4133	ibool
4134	buf_block_is_uncompressed(
4135	/======================/
4136	buf_pool_t* buf_pool, /!< in: buffer pool instance /
4137	const buf_block_t* block) /!< in: pointer to block,*
4138	not dereferenced /*
4139	{
4140	if ((((ulint) block) % sizeof *block) != `0`) {
4141	/ The pointer should be aligned. /
4142	return(FALSE);
4143	}
4144
4145	return(buf_pointer_is_block_field_instance(buf_pool, (void*) block));
4146	}
4147
4148	#if defined UNIV_DEBUG \|\| defined UNIV_IBUF_DEBUG
4149	/******************************************************************//**
4150	Return true if probe is enabled.
4151	@return true if probe enabled. /*
4152	static
4153	bool
4154	buf_debug_execute_is_force_flush()
4155	/==============================/
4156	{
4157	DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); );
4158
4159	/ This is used during queisce testing, we want to ensure maximum*
4160	buffering by the change buffer. /*
4161
4162	if (srv_ibuf_disable_background_merge) {
4163	return(true);
4164	}
4165
4166	return(false);
4167	}
4168	#endif /* UNIV_DEBUG \|\| UNIV_IBUF_DEBUG */
4169
4170	/* Wait for the block to be read in.*
4171	@param[in] block The block to check /*
4172	static
4173	void
4174	buf_wait_for_read(
4175	buf_block_t* block)
4176	{
4177	/ Note:*
4178
4179	We are using the block->lock to check for IO state (and a dirty read).
4180	We set the IO_READ state under the protection of the hash_lock
4181	(and block->mutex). This is safe because another thread can only
4182	access the block (and check for IO state) after the block has been
4183	added to the page hashtable. /*
4184
4185	if (buf_block_get_io_fix(block) == BUF_IO_READ) {
4186
4187	/ Wait until the read operation completes /
4188
4189	BPageMutex* mutex = buf_page_get_mutex(&block->page);
4190
4191	for (;;) {
4192	buf_io_fix io_fix;
4193
4194	mutex_enter(mutex);
4195
4196	io_fix = buf_block_get_io_fix(block);
4197
4198	mutex_exit(mutex);
4199
4200	if (io_fix == BUF_IO_READ) {
4201	/ Wait by temporaly s-latch /
4202	rw_lock_s_lock(&block->lock);
4203	rw_lock_s_unlock(&block->lock);
4204	} else {
4205	break;
4206	}
4207	}
4208	}
4209	}
4210
4211	/* This is the general function used to get access to a database page.*
4212	@param[in] page_id page id
4213	@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
4214	@param[in] guess guessed block or NULL
4215	@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
4216	BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
4217	@param[in] file file name
4218	@param[in] line line where called
4219	@param[in] mtr mini-transaction
4220	@return pointer to the block or NULL /*
4221	buf_block_t*
4222	buf_page_get_gen(
4223	const page_id_t& page_id,
4224	const page_size_t& page_size,
4225	ulint rw_latch,
4226	buf_block_t* guess,
4227	ulint mode,
4228	const char* file,
4229	unsigned line,
4230	mtr_t* mtr,
4231	dberr_t* err)
4232	{
4233	buf_block_t* block;
4234	unsigned access_time;
4235	rw_lock_t* hash_lock;
4236	buf_block_t* fix_block;
4237	ulint retries = `0`;
4238	buf_pool_t* buf_pool = buf_pool_get(page_id);
4239
4240	ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
4241	ut_ad(!mtr \|\| mtr->is_active());
4242	ut_ad((rw_latch == RW_S_LATCH)
4243	\|\| (rw_latch == RW_X_LATCH)
4244	\|\| (rw_latch == RW_SX_LATCH)
4245	\|\| (rw_latch == RW_NO_LATCH));
4246
4247	if (err) {
4248	*err = DB_SUCCESS;
4249	}
4250
4251	#ifdef UNIV_DEBUG
4252	switch (mode) {
4253	case BUF_EVICT_IF_IN_POOL:
4254	/ After DISCARD TABLESPACE, the tablespace would not exist,*
4255	but in IMPORT TABLESPACE, PageConverter::operator() must
4256	replace any old pages, which were not evicted during DISCARD.
4257	Skip the assertion on space_page_size. /*
4258	break;
4259	case BUF_PEEK_IF_IN_POOL:
4260	/ In this mode, the caller may pass a dummy page size,*
4261	because it does not really matter. /*
4262	break;
4263	default:
4264	ut_error;
4265	case BUF_GET_NO_LATCH:
4266	ut_ad(rw_latch == RW_NO_LATCH);
4267	/ fall through /
4268	case BUF_GET:
4269	case BUF_GET_IF_IN_POOL:
4270	case BUF_GET_IF_IN_POOL_OR_WATCH:
4271	case BUF_GET_POSSIBLY_FREED:
4272	bool found;
4273	const page_size_t& space_page_size
4274	= fil_space_get_page_size(page_id.space(), &found);
4275	ut_ad(found);
4276	ut_ad(page_size.equals_to(space_page_size));
4277	}
4278	#endif /* UNIV_DEBUG */
4279
4280	ut_ad(!mtr \|\| !ibuf_inside(mtr)
4281	\|\| ibuf_page_low(page_id, page_size, FALSE, file, line, NULL));
4282
4283	buf_pool->stat.n_page_gets++;
4284	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
4285	loop:
4286	block = guess;
4287
4288	rw_lock_s_lock(hash_lock);
4289
4290	/ If not own buf_pool_mutex, page_hash can be changed. /
4291	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
4292
4293	if (block != NULL) {
4294
4295	/ If the guess is a compressed page descriptor that*
4296	has been allocated by buf_page_alloc_descriptor(),
4297	it may have been freed by buf_relocate(). /*
4298
4299	if (!buf_block_is_uncompressed(buf_pool, block)
4300	\|\| !page_id.equals_to(block->page.id)
4301	\|\| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
4302
4303	/ Our guess was bogus or things have changed*
4304	since. /*
4305	block = guess = NULL;
4306	} else {
4307	ut_ad(!block->page.in_zip_hash);
4308	}
4309	}
4310
4311	if (block == NULL) {
4312	block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
4313	}
4314
4315	if (!block \|\| buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
4316	rw_lock_s_unlock(hash_lock);
4317	block = NULL;
4318	}
4319
4320	if (block == NULL) {
4321
4322	/ Page not in buf_pool: needs to be read from file /
4323
4324	if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
4325	rw_lock_x_lock(hash_lock);
4326
4327	/ If not own buf_pool_mutex,*
4328	page_hash can be changed. /*
4329	hash_lock = buf_page_hash_lock_x_confirm(
4330	hash_lock, buf_pool, page_id);
4331
4332	block = (buf_block_t*) buf_pool_watch_set(
4333	page_id, &hash_lock);
4334
4335	if (block) {
4336	/ We can release hash_lock after we*
4337	increment the fix count to make
4338	sure that no state change takes place. /*
4339	fix_block = block;
4340
4341	if (fsp_is_system_temporary(page_id.space())) {
4342	/ For temporary tablespace,*
4343	the mutex is being used for
4344	synchronization between user
4345	thread and flush thread,
4346	instead of block->lock. See
4347	buf_flush_page() for the flush
4348	thread counterpart. /*
4349
4350	BPageMutex* fix_mutex
4351	= buf_page_get_mutex(
4352	&fix_block->page);
4353	mutex_enter(fix_mutex);
4354	buf_block_fix(fix_block);
4355	mutex_exit(fix_mutex);
4356	} else {
4357	buf_block_fix(fix_block);
4358	}
4359
4360	/ Now safe to release page_hash mutex /
4361	rw_lock_x_unlock(hash_lock);
4362	goto got_block;
4363	}
4364
4365	rw_lock_x_unlock(hash_lock);
4366	}
4367
4368	switch (mode) {
4369	case BUF_GET_IF_IN_POOL:
4370	case BUF_GET_IF_IN_POOL_OR_WATCH:
4371	case BUF_PEEK_IF_IN_POOL:
4372	case BUF_EVICT_IF_IN_POOL:
4373	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
4374	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
4375	return(NULL);
4376	}
4377
4378	/ The call path is buf_read_page() ->*
4379	buf_read_page_low() (fil_io()) ->
4380	buf_page_io_complete() ->
4381	buf_decrypt_after_read(). Here fil_space_t is used*
4382	and we decrypt -> buf_page_check_corrupt() where page
4383	checksums are compared. Decryption, decompression as
4384	well as error handling takes place at a lower level.
4385	Here we only need to know whether the page really is
4386	corrupted, or if an encrypted page with a valid
4387	checksum cannot be decypted. /*
4388
4389	dberr_t local_err = buf_read_page(page_id, page_size);
4390
4391	if (local_err == DB_SUCCESS) {
4392	buf_read_ahead_random(page_id, page_size,
4393	ibuf_inside(mtr));
4394
4395	retries = `0`;
4396	} else if (mode == BUF_GET_POSSIBLY_FREED) {
4397	if (err) {
4398	*err = local_err;
4399	}
4400	return NULL;
4401	} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
4402	++retries;
4403
4404	DBUG_EXECUTE_IF(
4405	"innodb_page_corruption_retries",
4406	retries = BUF_PAGE_READ_MAX_RETRIES;
4407	);
4408	} else {
4409	if (err) {
4410	*err = local_err;
4411	}
4412
4413	/ Pages whose encryption key is unavailable or used*
4414	key, encryption algorithm or encryption method is
4415	incorrect are marked as encrypted in
4416	buf_page_check_corrupt(). Unencrypted page could be
4417	corrupted in a way where the key_id field is
4418	nonzero. There is no checksum on field
4419	FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. /*
4420	if (local_err == DB_DECRYPTION_FAILED) {
4421	return (NULL);
4422	}
4423
4424	/ Try to set table as corrupted instead of*
4425	asserting. /*
4426	if (page_id.space() == TRX_SYS_SPACE) {
4427	} else if (page_id.space() == SRV_TMP_SPACE_ID) {
4428	} else if (fil_space_t* space
4429	= fil_space_acquire_for_io(
4430	page_id.space())) {
4431	bool set = dict_set_corrupted_by_space(space);
4432	space->release_for_io();
4433	if (set) {
4434	return NULL;
4435	}
4436	}
4437
4438	ib::fatal () << "Unable to read page " << page_id
4439	<< " into the buffer pool after "
4440	<< BUF_PAGE_READ_MAX_RETRIES
4441	<< ". The most probable cause"
4442	" of this error may be that the"
4443	" table has been corrupted."
4444	" See https://mariadb.com/kb/en/library/xtradbinnodb-recovery-modes/";
4445	}
4446
4447	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
4448	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
4449	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
4450	goto loop;
4451	} else {
4452	fix_block = block;
4453	}
4454
4455	if (fsp_is_system_temporary(page_id.space())) {
4456	/ For temporary tablespace, the mutex is being used*
4457	for synchronization between user thread and flush
4458	thread, instead of block->lock. See buf_flush_page()
4459	for the flush thread counterpart. /*
4460	BPageMutex* fix_mutex = buf_page_get_mutex(
4461	&fix_block->page);
4462	mutex_enter(fix_mutex);
4463	buf_block_fix(fix_block);
4464	mutex_exit(fix_mutex);
4465	} else {
4466	buf_block_fix(fix_block);
4467	}
4468
4469	/ Now safe to release page_hash mutex /
4470	rw_lock_s_unlock(hash_lock);
4471
4472	got_block:
4473
4474	switch (mode) {
4475	case BUF_GET_IF_IN_POOL:
4476	case BUF_PEEK_IF_IN_POOL:
4477	case BUF_EVICT_IF_IN_POOL:
4478	buf_page_t* fix_page = &fix_block->page;
4479	BPageMutex* fix_mutex = buf_page_get_mutex(fix_page);
4480	mutex_enter(fix_mutex);
4481	const bool must_read
4482	= (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
4483	mutex_exit(fix_mutex);
4484
4485	if (must_read) {
4486	/ The page is being read to buffer pool,*
4487	but we cannot wait around for the read to
4488	complete. /*
4489	buf_block_unfix(fix_block);
4490
4491	return(NULL);
4492	}
4493	}
4494
4495	switch (buf_block_get_state(fix_block)) {
4496	buf_page_t* bpage;
4497
4498	case BUF_BLOCK_FILE_PAGE:
4499	bpage = &block->page;
4500	if (fsp_is_system_temporary(page_id.space())
4501	&& buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
4502	/ This suggests that the page is being flushed.*
4503	Avoid returning reference to this page.
4504	Instead wait for the flush action to complete. /*
4505	buf_block_unfix(fix_block);
4506	os_thread_sleep(WAIT_FOR_WRITE);
4507	goto loop;
4508	}
4509
4510	if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
4511	evict_from_pool:
4512	ut_ad(!fix_block->page.oldest_modification);
4513	buf_pool_mutex_enter(buf_pool);
4514	buf_block_unfix(fix_block);
4515
4516	if (!buf_LRU_free_page(&fix_block->page, true)) {
4517	ut_ad(`0`);
4518	}
4519
4520	buf_pool_mutex_exit(buf_pool);
4521	return(NULL);
4522	}
4523	break;
4524
4525	case BUF_BLOCK_ZIP_PAGE:
4526	case BUF_BLOCK_ZIP_DIRTY:
4527	if (mode == BUF_PEEK_IF_IN_POOL) {
4528	/ This mode is only used for dropping an*
4529	adaptive hash index. There cannot be an
4530	adaptive hash index for a compressed-only
4531	page, so do not bother decompressing the page. /*
4532	buf_block_unfix(fix_block);
4533
4534	return(NULL);
4535	}
4536
4537	bpage = &block->page;
4538
4539	/ Note: We have already buffer fixed this block. /
4540	if (bpage->buf_fix_count > `1`
4541	\|\| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
4542
4543	/ This condition often occurs when the buffer*
4544	is not buffer-fixed, but I/O-fixed by
4545	buf_page_init_for_read(). /*
4546	buf_block_unfix(fix_block);
4547
4548	/ The block is buffer-fixed or I/O-fixed.*
4549	Try again later. /*
4550	os_thread_sleep(WAIT_FOR_READ);
4551
4552	goto loop;
4553	}
4554
4555	if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
4556	goto evict_from_pool;
4557	}
4558
4559	/ Buffer-fix the block so that it cannot be evicted*
4560	or relocated while we are attempting to allocate an
4561	uncompressed page. /*
4562
4563	block = buf_LRU_get_free_block(buf_pool);
4564
4565	buf_pool_mutex_enter(buf_pool);
4566
4567	/ If not own buf_pool_mutex, page_hash can be changed. /
4568	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
4569
4570	rw_lock_x_lock(hash_lock);
4571
4572	/ Buffer-fixing prevents the page_hash from changing. /
4573	ut_ad(bpage == buf_page_hash_get_low(buf_pool, page_id));
4574
4575	buf_block_unfix(fix_block);
4576
4577	buf_page_mutex_enter(block);
4578	mutex_enter(&buf_pool->zip_mutex);
4579
4580	fix_block = block;
4581
4582	if (bpage->buf_fix_count > `0`
4583	\|\| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
4584
4585	mutex_exit(&buf_pool->zip_mutex);
4586	/ The block was buffer-fixed or I/O-fixed while*
4587	buf_pool->mutex was not held by this thread.
4588	Free the block that was allocated and retry.
4589	This should be extremely unlikely, for example,
4590	if buf_page_get_zip() was invoked. /*
4591
4592	buf_LRU_block_free_non_file_page(block);
4593	buf_pool_mutex_exit(buf_pool);
4594	rw_lock_x_unlock(hash_lock);
4595	buf_page_mutex_exit(block);
4596
4597	/ Try again /
4598	goto loop;
4599	}
4600
4601	/ Move the compressed page from bpage to block,*
4602	and uncompress it. /*
4603
4604	/ Note: this is the uncompressed block and it is not*
4605	accessible by other threads yet because it is not in
4606	any list or hash table /*
4607	buf_relocate(bpage, &block->page);
4608
4609	buf_block_init_low(block);
4610
4611	/ Set after buf_relocate(). /
4612	block->page.buf_fix_count = `1`;
4613
4614	block->lock_hash_val = lock_rec_hash(page_id.space(),
4615	page_id.page_no());
4616
4617	UNIV_MEM_DESC(&block->page.zip.data,
4618	page_zip_get_size(&block->page.zip));
4619
4620	if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) {
4621	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
4622	UT_LIST_REMOVE(buf_pool->zip_clean, &block->page);
4623	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
4624	ut_ad(!block->page.in_flush_list);
4625	} else {
4626	/ Relocate buf_pool->flush_list. /
4627	buf_flush_relocate_on_flush_list(bpage, &block->page);
4628	}
4629
4630	/ Buffer-fix, I/O-fix, and X-latch the block*
4631	for the duration of the decompression.
4632	Also add the block to the unzip_LRU list. /*
4633	block->page.state = BUF_BLOCK_FILE_PAGE;
4634
4635	/ Insert at the front of unzip_LRU list /
4636	buf_unzip_LRU_add_block(block, FALSE);
4637
4638	buf_block_set_io_fix(block, BUF_IO_READ);
4639	rw_lock_x_lock_inline(&block->lock, `0`, file, line);
4640
4641	UNIV_MEM_INVALID(bpage, sizeof *bpage);
4642
4643	rw_lock_x_unlock(hash_lock);
4644	buf_pool->n_pend_unzip++;
4645	mutex_exit(&buf_pool->zip_mutex);
4646	buf_pool_mutex_exit(buf_pool);
4647
4648	access_time = buf_page_is_accessed(&block->page);
4649
4650	buf_page_mutex_exit(block);
4651
4652	buf_page_free_descriptor(bpage);
4653
4654	/ Decompress the page while not holding*
4655	buf_pool->mutex or block->mutex. /*
4656
4657	{
4658	bool success = buf_zip_decompress(block, TRUE);
4659
4660	if (!success) {
4661	buf_pool_mutex_enter(buf_pool);
4662	buf_page_mutex_enter(fix_block);
4663	buf_block_set_io_fix(fix_block, BUF_IO_NONE);
4664	buf_page_mutex_exit(fix_block);
4665
4666	--buf_pool->n_pend_unzip;
4667	buf_block_unfix(fix_block);
4668	buf_pool_mutex_exit(buf_pool);
4669	rw_lock_x_unlock(&fix_block->lock);
4670
4671	*err = DB_PAGE_CORRUPTED;
4672	return NULL;
4673	}
4674	}
4675
4676	if (!recv_no_ibuf_operations) {
4677	if (access_time) {
4678	#ifdef UNIV_IBUF_COUNT_DEBUG
4679	ut_a(ibuf_count_get(page_id) == `0`);
4680	#endif /* UNIV_IBUF_COUNT_DEBUG */
4681	} else {
4682	ibuf_merge_or_delete_for_page(
4683	block, page_id, &page_size, TRUE);
4684	}
4685	}
4686
4687	buf_pool_mutex_enter(buf_pool);
4688
4689	buf_page_mutex_enter(fix_block);
4690
4691	buf_block_set_io_fix(fix_block, BUF_IO_NONE);
4692
4693	buf_page_mutex_exit(fix_block);
4694
4695	--buf_pool->n_pend_unzip;
4696
4697	buf_pool_mutex_exit(buf_pool);
4698
4699	rw_lock_x_unlock(&block->lock);
4700
4701	break;
4702
4703	case BUF_BLOCK_POOL_WATCH:
4704	case BUF_BLOCK_NOT_USED:
4705	case BUF_BLOCK_READY_FOR_USE:
4706	case BUF_BLOCK_MEMORY:
4707	case BUF_BLOCK_REMOVE_HASH:
4708	ut_error;
4709	break;
4710	}
4711
4712	ut_ad(block == fix_block);
4713	ut_ad(fix_block->page.buf_fix_count > `0`);
4714
4715	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
4716	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
4717
4718	ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
4719
4720	#if defined UNIV_DEBUG \|\| defined UNIV_IBUF_DEBUG
4721
4722	if ((mode == BUF_GET_IF_IN_POOL \|\| mode == BUF_GET_IF_IN_POOL_OR_WATCH)
4723	&& (ibuf_debug \|\| buf_debug_execute_is_force_flush())) {
4724
4725	/ Try to evict the block from the buffer pool, to use the*
4726	insert buffer (change buffer) as much as possible. /*
4727
4728	buf_pool_mutex_enter(buf_pool);
4729
4730	buf_block_unfix(fix_block);
4731
4732	/ Now we are only holding the buf_pool->mutex,*
4733	not block->mutex or hash_lock. Blocks cannot be
4734	relocated or enter or exit the buf_pool while we
4735	are holding the buf_pool->mutex. /*
4736
4737	if (buf_LRU_free_page(&fix_block->page, true)) {
4738
4739	buf_pool_mutex_exit(buf_pool);
4740
4741	/ If not own buf_pool_mutex,*
4742	page_hash can be changed. /*
4743	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
4744
4745	rw_lock_x_lock(hash_lock);
4746
4747	/ If not own buf_pool_mutex,*
4748	page_hash can be changed. /*
4749	hash_lock = buf_page_hash_lock_x_confirm(
4750	hash_lock, buf_pool, page_id);
4751
4752	if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
4753	/ Set the watch, as it would have*
4754	been set if the page were not in the
4755	buffer pool in the first place. /*
4756	block = (buf_block_t*) buf_pool_watch_set(
4757	page_id, &hash_lock);
4758	} else {
4759	block = (buf_block_t*) buf_page_hash_get_low(
4760	buf_pool, page_id);
4761	}
4762
4763	rw_lock_x_unlock(hash_lock);
4764
4765	if (block != NULL) {
4766	/ Either the page has been read in or*
4767	a watch was set on that in the window
4768	where we released the buf_pool::mutex
4769	and before we acquire the hash_lock
4770	above. Try again. /*
4771	guess = block;
4772
4773	goto loop;
4774	}
4775
4776	return(NULL);
4777	}
4778
4779	buf_page_mutex_enter(fix_block);
4780
4781	if (buf_flush_page_try(buf_pool, fix_block)) {
4782	guess = fix_block;
4783
4784	goto loop;
4785	}
4786
4787	buf_page_mutex_exit(fix_block);
4788
4789	buf_block_fix(fix_block);
4790
4791	/ Failed to evict the page; change it directly /
4792
4793	buf_pool_mutex_exit(buf_pool);
4794	}
4795	#endif /* UNIV_DEBUG \|\| UNIV_IBUF_DEBUG */
4796
4797	ut_ad(fix_block->page.buf_fix_count > `0`);
4798
4799	#ifdef UNIV_DEBUG
4800	/ We have already buffer fixed the page, and we are committed to*
4801	returning this page to the caller. Register for debugging.
4802	Avoid debug latching if page/block belongs to system temporary
4803	tablespace (Not much needed for table with single threaded access.). /*
4804	if (!fsp_is_system_temporary(page_id.space())) {
4805	ibool ret;
4806	ret = rw_lock_s_lock_nowait(
4807	&fix_block->debug_latch, file, line);
4808	ut_a(ret);
4809	}
4810	#endif /* UNIV_DEBUG */
4811
4812	/ While tablespace is reinited the indexes are already freed but the*
4813	blocks related to it still resides in buffer pool. Trying to remove
4814	such blocks from buffer pool would invoke removal of AHI entries
4815	associated with these blocks. Logic to remove AHI entry will try to
4816	load the block but block is already in free state. Handle the said case
4817	with mode = BUF_PEEK_IF_IN_POOL that is invoked from
4818	"btr_search_drop_page_hash_when_freed". /*
4819	ut_ad(mode == BUF_GET_POSSIBLY_FREED
4820	\|\| mode == BUF_PEEK_IF_IN_POOL
4821	\|\| !fix_block->page.file_page_was_freed);
4822
4823	/ Check if this is the first access to the page /
4824	access_time = buf_page_is_accessed(&fix_block->page);
4825
4826	/ This is a heuristic and we don't care about ordering issues. /
4827	if (access_time == `0`) {
4828	buf_page_mutex_enter(fix_block);
4829
4830	buf_page_set_accessed(&fix_block->page);
4831
4832	buf_page_mutex_exit(fix_block);
4833	}
4834
4835	if (mode != BUF_PEEK_IF_IN_POOL) {
4836	buf_page_make_young_if_needed(&fix_block->page);
4837	}
4838
4839	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
4840	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
4841	ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
4842	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
4843
4844	/ We have to wait here because the IO_READ state was set*
4845	under the protection of the hash_lock and not the block->mutex
4846	and block->lock. /*
4847	buf_wait_for_read(fix_block);
4848
4849	mtr_memo_type_t fix_type;
4850
4851	switch (rw_latch) {
4852	case RW_NO_LATCH:
4853
4854	fix_type = MTR_MEMO_BUF_FIX;
4855	break;
4856
4857	case RW_S_LATCH:
4858	rw_lock_s_lock_inline(&fix_block->lock, `0`, file, line);
4859
4860	fix_type = MTR_MEMO_PAGE_S_FIX;
4861	break;
4862
4863	case RW_SX_LATCH:
4864	rw_lock_sx_lock_inline(&fix_block->lock, `0`, file, line);
4865
4866	fix_type = MTR_MEMO_PAGE_SX_FIX;
4867	break;
4868
4869	default:
4870	ut_ad(rw_latch == RW_X_LATCH);
4871	rw_lock_x_lock_inline(&fix_block->lock, `0`, file, line);
4872
4873	fix_type = MTR_MEMO_PAGE_X_FIX;
4874	break;
4875	}
4876
4877	mtr_memo_push(mtr, fix_block, fix_type);
4878
4879	if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
4880	/ In the case of a first access, try to apply linear*
4881	read-ahead /*
4882
4883	buf_read_ahead_linear(page_id, page_size, ibuf_inside(mtr));
4884	}
4885
4886	#ifdef UNIV_IBUF_COUNT_DEBUG
4887	ut_a(ibuf_count_get(fix_block->page.id) == `0`);
4888	#endif
4889
4890	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
4891	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
4892
4893	return(fix_block);
4894	}
4895
4896	/******************************************************************//**
4897	This is the general function used to get optimistic access to a database
4898	page.
4899	@return TRUE if success /*
4900	ibool
4901	buf_page_optimistic_get(
4902	/====================/
4903	ulint rw_latch,/!< in: RW_S_LATCH, RW_X_LATCH /
4904	buf_block_t* block, /!< in: guessed buffer block /
4905	ib_uint64_t modify_clock,/!< in: modify clock value /
4906	const char* file, /!< in: file name /
4907	unsigned line, /!< in: line where called /
4908	mtr_t* mtr) /!< in: mini-transaction /
4909	{
4910	buf_pool_t* buf_pool;
4911	unsigned access_time;
4912	ibool success;
4913
4914	ut_ad(block);
4915	ut_ad(mtr);
4916	ut_ad(mtr->is_active());
4917	ut_ad((rw_latch == RW_S_LATCH) \|\| (rw_latch == RW_X_LATCH));
4918
4919	buf_page_mutex_enter(block);
4920
4921	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
4922
4923	buf_page_mutex_exit(block);
4924
4925	return(FALSE);
4926	}
4927
4928	buf_block_buf_fix_inc(block, file, line);
4929
4930	access_time = buf_page_is_accessed(&block->page);
4931
4932	buf_page_set_accessed(&block->page);
4933
4934	buf_page_mutex_exit(block);
4935
4936	buf_page_make_young_if_needed(&block->page);
4937
4938	ut_ad(!ibuf_inside(mtr)
4939	\|\| ibuf_page(block->page.id, block->page.size, NULL));
4940
4941	mtr_memo_type_t fix_type;
4942
4943	switch (rw_latch) {
4944	case RW_S_LATCH:
4945	success = rw_lock_s_lock_nowait(&block->lock, file, line);
4946
4947	fix_type = MTR_MEMO_PAGE_S_FIX;
4948	break;
4949	case RW_X_LATCH:
4950	success = rw_lock_x_lock_func_nowait_inline(
4951	&block->lock, file, line);
4952
4953	fix_type = MTR_MEMO_PAGE_X_FIX;
4954	break;
4955	default:
4956	ut_error; / RW_SX_LATCH is not implemented yet /
4957	}
4958
4959	if (!success) {
4960	buf_page_mutex_enter(block);
4961	buf_block_buf_fix_dec(block);
4962	buf_page_mutex_exit(block);
4963
4964	return(FALSE);
4965	}
4966
4967	if (modify_clock != block->modify_clock) {
4968
4969	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
4970
4971	if (rw_latch == RW_S_LATCH) {
4972	rw_lock_s_unlock(&block->lock);
4973	} else {
4974	rw_lock_x_unlock(&block->lock);
4975	}
4976
4977	buf_page_mutex_enter(block);
4978	buf_block_buf_fix_dec(block);
4979	buf_page_mutex_exit(block);
4980
4981	return(FALSE);
4982	}
4983
4984	mtr_memo_push(mtr, block, fix_type);
4985
4986	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
4987	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
4988	ut_a(block->page.buf_fix_count > `0`);
4989	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
4990	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
4991
4992	ut_d(buf_page_mutex_enter(block));
4993	ut_ad(!block->page.file_page_was_freed);
4994	ut_d(buf_page_mutex_exit(block));
4995
4996	if (!access_time) {
4997	/ In the case of a first access, try to apply linear*
4998	read-ahead /*
4999	buf_read_ahead_linear(block->page.id, block->page.size,
5000	ibuf_inside(mtr));
5001	}
5002
5003	#ifdef UNIV_IBUF_COUNT_DEBUG
5004	ut_a(ibuf_count_get(block->page.id) == `0`);
5005	#endif /* UNIV_IBUF_COUNT_DEBUG */
5006
5007	buf_pool = buf_pool_from_block(block);
5008	buf_pool->stat.n_page_gets++;
5009
5010	return(TRUE);
5011	}
5012
5013	/******************************************************************//**
5014	This is used to get access to a known database page, when no waiting can be
5015	done. For example, if a search in an adaptive hash index leads us to this
5016	frame.
5017	@return TRUE if success /*
5018	ibool
5019	buf_page_get_known_nowait(
5020	/======================/
5021	ulint rw_latch,/!< in: RW_S_LATCH, RW_X_LATCH /
5022	buf_block_t* block, /!< in: the known page /
5023	ulint mode, /!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD /
5024	const char* file, /!< in: file name /
5025	unsigned line, /!< in: line where called /
5026	mtr_t* mtr) /!< in: mini-transaction /
5027	{
5028	buf_pool_t* buf_pool;
5029	ibool success;
5030
5031	ut_ad(mtr->is_active());
5032	ut_ad((rw_latch == RW_S_LATCH) \|\| (rw_latch == RW_X_LATCH));
5033
5034	buf_page_mutex_enter(block);
5035
5036	if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
5037	/ Another thread is just freeing the block from the LRU list*
5038	of the buffer pool: do not try to access this page; this
5039	attempt to access the page can only come through the hash
5040	index because when the buffer block state is ..._REMOVE_HASH,
5041	we have already removed it from the page address hash table
5042	of the buffer pool. /*
5043
5044	buf_page_mutex_exit(block);
5045
5046	return(FALSE);
5047	}
5048
5049	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
5050
5051	buf_block_buf_fix_inc(block, file, line);
5052
5053	buf_page_set_accessed(&block->page);
5054
5055	buf_page_mutex_exit(block);
5056
5057	buf_pool = buf_pool_from_block(block);
5058
5059	if (mode == BUF_MAKE_YOUNG) {
5060	buf_page_make_young_if_needed(&block->page);
5061	}
5062
5063	ut_ad(!ibuf_inside(mtr) \|\| mode == BUF_KEEP_OLD);
5064
5065	mtr_memo_type_t fix_type;
5066
5067	switch (rw_latch) {
5068	case RW_S_LATCH:
5069	success = rw_lock_s_lock_nowait(&block->lock, file, line);
5070	fix_type = MTR_MEMO_PAGE_S_FIX;
5071	break;
5072	case RW_X_LATCH:
5073	success = rw_lock_x_lock_func_nowait_inline(
5074	&block->lock, file, line);
5075
5076	fix_type = MTR_MEMO_PAGE_X_FIX;
5077	break;
5078	default:
5079	ut_error; / RW_SX_LATCH is not implemented yet /
5080	}
5081
5082	if (!success) {
5083	buf_page_mutex_enter(block);
5084	buf_block_buf_fix_dec(block);
5085	buf_page_mutex_exit(block);
5086
5087	return(FALSE);
5088	}
5089
5090	mtr_memo_push(mtr, block, fix_type);
5091
5092	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
5093	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
5094	ut_a(block->page.buf_fix_count > `0`);
5095	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
5096	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
5097
5098	#ifdef UNIV_DEBUG
5099	if (mode != BUF_KEEP_OLD) {
5100	/ If mode == BUF_KEEP_OLD, we are executing an I/O*
5101	completion routine. Avoid a bogus assertion failure
5102	when ibuf_merge_or_delete_for_page() is processing a
5103	page that was just freed due to DROP INDEX, or
5104	deleting a record from SYS_INDEXES. This check will be
5105	skipped in recv_recover_page() as well. /*
5106
5107	buf_page_mutex_enter(block);
5108	ut_a(!block->page.file_page_was_freed);
5109	buf_page_mutex_exit(block);
5110	}
5111	#endif /* UNIV_DEBUG */
5112
5113	#ifdef UNIV_IBUF_COUNT_DEBUG
5114	ut_a((mode == BUF_KEEP_OLD) \|\| ibuf_count_get(block->page.id) == `0`);
5115	#endif
5116	buf_pool->stat.n_page_gets++;
5117
5118	return(TRUE);
5119	}
5120
5121	/* Given a tablespace id and page number tries to get that page. If the*
5122	page is not in the buffer pool it is not loaded and NULL is returned.
5123	Suitable for using when holding the lock_sys_t::mutex.
5124	@param[in] page_id page id
5125	@param[in] file file name
5126	@param[in] line line where called
5127	@param[in] mtr mini-transaction
5128	@return pointer to a page or NULL /*
5129	buf_block_t*
5130	buf_page_try_get_func(
5131	const page_id_t& page_id,
5132	const char* file,
5133	unsigned line,
5134	mtr_t* mtr)
5135	{
5136	buf_block_t* block;
5137	ibool success;
5138	buf_pool_t* buf_pool = buf_pool_get(page_id);
5139	rw_lock_t* hash_lock;
5140
5141	ut_ad(mtr);
5142	ut_ad(mtr->is_active());
5143
5144	block = buf_block_hash_get_s_locked(buf_pool, page_id, &hash_lock);
5145
5146	if (!block \|\| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
5147	if (block) {
5148	rw_lock_s_unlock(hash_lock);
5149	}
5150	return(NULL);
5151	}
5152
5153	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
5154
5155	buf_page_mutex_enter(block);
5156	rw_lock_s_unlock(hash_lock);
5157
5158	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
5159	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
5160	ut_a(page_id.equals_to(block->page.id));
5161	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
5162
5163	buf_block_buf_fix_inc(block, file, line);
5164	buf_page_mutex_exit(block);
5165
5166	mtr_memo_type_t fix_type = MTR_MEMO_PAGE_S_FIX;
5167	success = rw_lock_s_lock_nowait(&block->lock, file, line);
5168
5169	if (!success) {
5170	/ Let us try to get an X-latch. If the current thread*
5171	is holding an X-latch on the page, we cannot get an
5172	S-latch. /*
5173
5174	fix_type = MTR_MEMO_PAGE_X_FIX;
5175	success = rw_lock_x_lock_func_nowait_inline(&block->lock,
5176	file, line);
5177	}
5178
5179	if (!success) {
5180	buf_page_mutex_enter(block);
5181	buf_block_buf_fix_dec(block);
5182	buf_page_mutex_exit(block);
5183
5184	return(NULL);
5185	}
5186
5187	mtr_memo_push(mtr, block, fix_type);
5188
5189	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
5190	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
5191	ut_a(block->page.buf_fix_count > `0`);
5192	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
5193	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
5194
5195	ut_d(buf_page_mutex_enter(block));
5196	ut_d(ut_a(!block->page.file_page_was_freed));
5197	ut_d(buf_page_mutex_exit(block));
5198
5199	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5200
5201	buf_pool->stat.n_page_gets++;
5202
5203	#ifdef UNIV_IBUF_COUNT_DEBUG
5204	ut_a(ibuf_count_get(block->page.id) == `0`);
5205	#endif /* UNIV_IBUF_COUNT_DEBUG */
5206
5207	return(block);
5208	}
5209
5210	/******************************************************************//**
5211	Initialize some fields of a control block. /*
5212	UNIV_INLINE
5213	void
5214	buf_page_init_low(
5215	/==============/
5216	buf_page_t* bpage) /!< in: block to init /
5217	{
5218	bpage->flush_type = BUF_FLUSH_LRU;
5219	bpage->io_fix = BUF_IO_NONE;
5220	bpage->buf_fix_count = `0`;
5221	bpage->old = `0`;
5222	bpage->freed_page_clock = `0`;
5223	bpage->access_time = `0`;
5224	bpage->newest_modification = `0`;
5225	bpage->oldest_modification = `0`;
5226	bpage->write_size = `0`;
5227	bpage->encrypted = false;
5228	bpage->real_size = `0`;
5229	bpage->slot = NULL;
5230
5231	HASH_INVALIDATE(bpage, hash);
5232
5233	ut_d(bpage->file_page_was_freed = FALSE);
5234	}
5235
5236	/* Inits a page to the buffer buf_pool.*
5237	@param[in,out] buf_pool buffer pool
5238	@param[in] page_id page id
5239	@param[in,out] block block to init /*
5240	static
5241	void
5242	buf_page_init(
5243	buf_pool_t* buf_pool,
5244	const page_id_t& page_id,
5245	const page_size_t& page_size,
5246	buf_block_t* block)
5247	{
5248	buf_page_t* hash_page;
5249
5250	ut_ad(buf_pool == buf_pool_get(page_id));
5251	ut_ad(buf_pool_mutex_own(buf_pool));
5252
5253	ut_ad(buf_page_mutex_own(block));
5254	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
5255
5256	ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, page_id),
5257	RW_LOCK_X));
5258
5259	/ Set the state of the block /
5260	buf_block_set_file_page(block, page_id);
5261
5262	#ifdef UNIV_DEBUG_VALGRIND
5263	if (is_system_tablespace(page_id.space())) {
5264	/ Silence valid Valgrind warnings about uninitialized*
5265	data being written to data files. There are some unused
5266	bytes on some pages that InnoDB does not initialize. /*
5267	UNIV_MEM_VALID(block->frame, srv_page_size);
5268	}
5269	#endif /* UNIV_DEBUG_VALGRIND */
5270
5271	buf_block_init_low(block);
5272
5273	block->lock_hash_val = lock_rec_hash(page_id.space(),
5274	page_id.page_no());
5275
5276	buf_page_init_low(&block->page);
5277
5278	/ Insert into the hash table of file pages /
5279
5280	hash_page = buf_page_hash_get_low(buf_pool, page_id);
5281
5282	if (hash_page == NULL) {
5283	/ Block not found in hash table /
5284	} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
5285	/ Preserve the reference count. /
5286	ib_uint32_t buf_fix_count = hash_page->buf_fix_count;
5287
5288	ut_a(buf_fix_count > `0`);
5289
5290	my_atomic_add32((int32*) &block->page.buf_fix_count, buf_fix_count);
5291
5292	buf_pool_watch_remove(buf_pool, hash_page);
5293	} else {
5294
5295	ib::error () << "Page " << page_id
5296	<< " already found in the hash table: "
5297	<< hash_page << ", " << block;
5298
5299	ut_d(buf_page_mutex_exit(block));
5300	ut_d(buf_pool_mutex_exit(buf_pool));
5301	ut_d(buf_print());
5302	ut_d(buf_LRU_print());
5303	ut_d(buf_validate());
5304	ut_d(buf_LRU_validate());
5305	ut_error;
5306	}
5307
5308	ut_ad(!block->page.in_zip_hash);
5309	ut_ad(!block->page.in_page_hash);
5310	ut_d(block->page.in_page_hash = TRUE);
5311
5312	block->page.id.copy_from(page_id);
5313	block->page.size.copy_from(page_size);
5314
5315	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
5316	page_id.fold(), &block->page);
5317
5318	if (page_size.is_compressed()) {
5319	page_zip_set_size(&block->page.zip, page_size.physical());
5320	}
5321	}
5322
5323	/* Initialize a page for read to the buffer buf_pool. If the page is*
5324	(1) already in buf_pool, or
5325	(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
5326	(3) if the space is deleted or being deleted,
5327	then this function does nothing.
5328	Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
5329	on the buffer frame. The io-handler must take care that the flag is cleared
5330	and the lock released later.
5331	@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
5332	@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
5333	@param[in] page_id page id
5334	@param[in] unzip whether the uncompressed page is
5335	requested (for ROW_FORMAT=COMPRESSED)
5336	@return pointer to the block
5337	@retval NULL in case of an error /*
5338	buf_page_t*
5339	buf_page_init_for_read(
5340	dberr_t* err,
5341	ulint mode,
5342	const page_id_t& page_id,
5343	const page_size_t& page_size,
5344	bool unzip)
5345	{
5346	buf_block_t* block;
5347	buf_page_t* bpage = NULL;
5348	buf_page_t* watch_page;
5349	rw_lock_t* hash_lock;
5350	mtr_t mtr;
5351	bool lru = false;
5352	void* data;
5353	buf_pool_t* buf_pool = buf_pool_get(page_id);
5354
5355	ut_ad(buf_pool);
5356
5357	*err = DB_SUCCESS;
5358
5359	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
5360	/ It is a read-ahead within an ibuf routine /
5361
5362	ut_ad(!ibuf_bitmap_page(page_id, page_size));
5363
5364	ibuf_mtr_start(&mtr);
5365
5366	if (!recv_no_ibuf_operations &&
5367	!ibuf_page(page_id, page_size, &mtr)) {
5368
5369	ibuf_mtr_commit(&mtr);
5370
5371	return(NULL);
5372	}
5373	} else {
5374	ut_ad(mode == BUF_READ_ANY_PAGE);
5375	}
5376
5377	if (page_size.is_compressed() && !unzip && !recv_recovery_is_on()) {
5378	block = NULL;
5379	} else {
5380	block = buf_LRU_get_free_block(buf_pool);
5381	ut_ad(block);
5382	ut_ad(buf_pool_from_block(block) == buf_pool);
5383	}
5384
5385	buf_pool_mutex_enter(buf_pool);
5386
5387	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
5388	rw_lock_x_lock(hash_lock);
5389
5390	watch_page = buf_page_hash_get_low(buf_pool, page_id);
5391	if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
5392	/ The page is already in the buffer pool. /
5393	watch_page = NULL;
5394	rw_lock_x_unlock(hash_lock);
5395	if (block) {
5396	buf_page_mutex_enter(block);
5397	buf_LRU_block_free_non_file_page(block);
5398	buf_page_mutex_exit(block);
5399	}
5400
5401	bpage = NULL;
5402	goto func_exit;
5403	}
5404
5405	if (block) {
5406	bpage = &block->page;
5407
5408	buf_page_mutex_enter(block);
5409
5410	ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
5411
5412	buf_page_init(buf_pool, page_id, page_size, block);
5413
5414	/ Note: We are using the hash_lock for protection. This is*
5415	safe because no other thread can lookup the block from the
5416	page hashtable yet. /*
5417
5418	buf_page_set_io_fix(bpage, BUF_IO_READ);
5419
5420	rw_lock_x_unlock(hash_lock);
5421
5422	/ The block must be put to the LRU list, to the old blocks /
5423	buf_LRU_add_block(bpage, TRUE/* to old blocks */);
5424
5425	/ We set a pass-type x-lock on the frame because then*
5426	the same thread which called for the read operation
5427	(and is running now at this point of code) can wait
5428	for the read to complete by waiting for the x-lock on
5429	the frame; if the x-lock were recursive, the same
5430	thread would illegally get the x-lock before the page
5431	read is completed. The x-lock is cleared by the
5432	io-handler thread. /*
5433
5434	rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
5435
5436	if (page_size.is_compressed()) {
5437	/ buf_pool->mutex may be released and*
5438	reacquired by buf_buddy_alloc(). Thus, we
5439	must release block->mutex in order not to
5440	break the latching order in the reacquisition
5441	of buf_pool->mutex. We also must defer this
5442	operation until after the block descriptor has
5443	been added to buf_pool->LRU and
5444	buf_pool->page_hash. /*
5445	buf_page_mutex_exit(block);
5446	data = buf_buddy_alloc(buf_pool, page_size.physical(),
5447	&lru);
5448	buf_page_mutex_enter(block);
5449	block->page.zip.data = (page_zip_t*) data;
5450
5451	/ To maintain the invariant*
5452	block->in_unzip_LRU_list
5453	== buf_page_belongs_to_unzip_LRU(&block->page)
5454	we have to add this block to unzip_LRU
5455	after block->page.zip.data is set. /*
5456	ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
5457	buf_unzip_LRU_add_block(block, TRUE);
5458	}
5459
5460	buf_page_mutex_exit(block);
5461	} else {
5462	rw_lock_x_unlock(hash_lock);
5463
5464	/ The compressed page must be allocated before the*
5465	control block (bpage), in order to avoid the
5466	invocation of buf_buddy_relocate_block() on
5467	uninitialized data. /*
5468	data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
5469
5470	rw_lock_x_lock(hash_lock);
5471
5472	/ If buf_buddy_alloc() allocated storage from the LRU list,*
5473	it released and reacquired buf_pool->mutex. Thus, we must
5474	check the page_hash again, as it may have been modified. /*
5475	if (UNIV_UNLIKELY(lru)) {
5476
5477	watch_page = buf_page_hash_get_low(buf_pool, page_id);
5478
5479	if (UNIV_UNLIKELY(watch_page
5480	&& !buf_pool_watch_is_sentinel(buf_pool,
5481	watch_page))) {
5482
5483	/ The block was added by some other thread. /
5484	rw_lock_x_unlock(hash_lock);
5485	watch_page = NULL;
5486	buf_buddy_free(buf_pool, data,
5487	page_size.physical());
5488
5489	bpage = NULL;
5490	goto func_exit;
5491	}
5492	}
5493
5494	bpage = buf_page_alloc_descriptor();
5495
5496	/ Initialize the buf_pool pointer. /
5497	bpage->buf_pool_index = buf_pool_index(buf_pool);
5498
5499	page_zip_des_init(&bpage->zip);
5500	page_zip_set_size(&bpage->zip, page_size.physical());
5501	bpage->zip.data = (page_zip_t*) data;
5502
5503	bpage->size.copy_from(page_size);
5504
5505	mutex_enter(&buf_pool->zip_mutex);
5506	UNIV_MEM_DESC(bpage->zip.data, bpage->size.physical());
5507
5508	buf_page_init_low(bpage);
5509
5510	bpage->state = BUF_BLOCK_ZIP_PAGE;
5511	bpage->id.copy_from(page_id);
5512	bpage->flush_observer = NULL;
5513
5514	ut_d(bpage->in_page_hash = FALSE);
5515	ut_d(bpage->in_zip_hash = FALSE);
5516	ut_d(bpage->in_flush_list = FALSE);
5517	ut_d(bpage->in_free_list = FALSE);
5518	ut_d(bpage->in_LRU_list = FALSE);
5519
5520	ut_d(bpage->in_page_hash = TRUE);
5521
5522	if (watch_page != NULL) {
5523
5524	/ Preserve the reference count. /
5525	ib_uint32_t buf_fix_count;
5526
5527	buf_fix_count = watch_page->buf_fix_count;
5528
5529	ut_a(buf_fix_count > `0`);
5530
5531	my_atomic_add32((int32*) &bpage->buf_fix_count, buf_fix_count);
5532
5533	ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
5534	buf_pool_watch_remove(buf_pool, watch_page);
5535	}
5536
5537	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
5538	bpage->id.fold(), bpage);
5539
5540	rw_lock_x_unlock(hash_lock);
5541
5542	/ The block must be put to the LRU list, to the old blocks.*
5543	The zip size is already set into the page zip /*
5544	buf_LRU_add_block(bpage, TRUE/* to old blocks */);
5545	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
5546	buf_LRU_insert_zip_clean(bpage);
5547	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
5548
5549	buf_page_set_io_fix(bpage, BUF_IO_READ);
5550
5551	mutex_exit(&buf_pool->zip_mutex);
5552	}
5553
5554	buf_pool->n_pend_reads++;
5555	func_exit:
5556	buf_pool_mutex_exit(buf_pool);
5557
5558	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
5559
5560	ibuf_mtr_commit(&mtr);
5561	}
5562
5563	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
5564	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
5565	ut_ad(!bpage \|\| buf_page_in_file(bpage));
5566
5567	return(bpage);
5568	}
5569
5570	/* Initializes a page to the buffer buf_pool. The page is usually not read*
5571	from a file even if it cannot be found in the buffer buf_pool. This is one
5572	of the functions which perform to a block a state transition NOT_USED =>
5573	FILE_PAGE (the other is buf_page_get_gen).
5574	@param[in] page_id page id
5575	@param[in] page_size page size
5576	@param[in] mtr mini-transaction
5577	@return pointer to the block, page bufferfixed /*
5578	buf_block_t*
5579	buf_page_create(
5580	const page_id_t& page_id,
5581	const page_size_t& page_size,
5582	mtr_t* mtr)
5583	{
5584	buf_frame_t* frame;
5585	buf_block_t* block;
5586	buf_block_t* free_block = NULL;
5587	buf_pool_t* buf_pool = buf_pool_get(page_id);
5588	rw_lock_t* hash_lock;
5589
5590	ut_ad(mtr->is_active());
5591	ut_ad(page_id.space() != `0` \|\| !page_size.is_compressed());
5592
5593	free_block = buf_LRU_get_free_block(buf_pool);
5594
5595	buf_pool_mutex_enter(buf_pool);
5596
5597	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
5598	rw_lock_x_lock(hash_lock);
5599
5600	block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
5601
5602	if (block
5603	&& buf_page_in_file(&block->page)
5604	&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
5605
5606	#ifdef UNIV_IBUF_COUNT_DEBUG
5607	ut_a(ibuf_count_get(page_id) == `0`);
5608	#endif /* UNIV_IBUF_COUNT_DEBUG */
5609
5610	ut_d(block->page.file_page_was_freed = FALSE);
5611
5612	/ Page can be found in buf_pool /
5613	buf_pool_mutex_exit(buf_pool);
5614	rw_lock_x_unlock(hash_lock);
5615
5616	buf_block_free(free_block);
5617
5618	return(buf_page_get_with_no_latch(page_id, page_size, mtr));
5619	}
5620
5621	/ If we get here, the page was not in buf_pool: init it there /
5622
5623	DBUG_PRINT("ib_buf", ("create page %u:%u",
5624	page_id.space(), page_id.page_no()));
5625
5626	block = free_block;
5627
5628	buf_page_mutex_enter(block);
5629
5630	buf_page_init(buf_pool, page_id, page_size, block);
5631
5632	rw_lock_x_unlock(hash_lock);
5633
5634	/ The block must be put to the LRU list /
5635	buf_LRU_add_block(&block->page, FALSE);
5636
5637	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
5638	buf_pool->stat.n_pages_created++;
5639
5640	if (page_size.is_compressed()) {
5641	void* data;
5642	bool lru;
5643
5644	/ Prevent race conditions during buf_buddy_alloc(),*
5645	which may release and reacquire buf_pool->mutex,
5646	by IO-fixing and X-latching the block. /*
5647
5648	buf_page_set_io_fix(&block->page, BUF_IO_READ);
5649	rw_lock_x_lock(&block->lock);
5650
5651	buf_page_mutex_exit(block);
5652	/ buf_pool->mutex may be released and reacquired by*
5653	buf_buddy_alloc(). Thus, we must release block->mutex
5654	in order not to break the latching order in
5655	the reacquisition of buf_pool->mutex. We also must
5656	defer this operation until after the block descriptor
5657	has been added to buf_pool->LRU and buf_pool->page_hash. /*
5658	data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
5659	buf_page_mutex_enter(block);
5660	block->page.zip.data = (page_zip_t*) data;
5661
5662	/ To maintain the invariant*
5663	block->in_unzip_LRU_list
5664	== buf_page_belongs_to_unzip_LRU(&block->page)
5665	we have to add this block to unzip_LRU after
5666	block->page.zip.data is set. /*
5667	ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
5668	buf_unzip_LRU_add_block(block, FALSE);
5669
5670	buf_page_set_io_fix(&block->page, BUF_IO_NONE);
5671	rw_lock_x_unlock(&block->lock);
5672	}
5673
5674	buf_pool_mutex_exit(buf_pool);
5675
5676	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
5677
5678	buf_page_set_accessed(&block->page);
5679
5680	buf_page_mutex_exit(block);
5681
5682	/ Delete possible entries for the page from the insert buffer:*
5683	such can exist if the page belonged to an index which was dropped /*
5684	ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
5685
5686	frame = block->frame;
5687
5688	memset(frame + FIL_PAGE_PREV, `0xff`, `4`);
5689	memset(frame + FIL_PAGE_NEXT, `0xff`, `4`);
5690	mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
5691
5692	/ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is only used on the*
5693	following pages:
5694	(1) The first page of the InnoDB system tablespace (page 0:0)
5695	(2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages
5696	(3) key_version on encrypted pages (not page 0:0) /*
5697
5698	memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, `0`, `8`);
5699
5700	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
5701	ut_a(++buf_dbg_counter % `5771` \|\| buf_validate());
5702	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
5703	#ifdef UNIV_IBUF_COUNT_DEBUG
5704	ut_a(ibuf_count_get(block->page.id) == `0`);
5705	#endif
5706	return(block);
5707	}
5708
5709	/******************************************************************//**
5710	Monitor the buffer page read/write activity, and increment corresponding
5711	counter value if MONITOR_MODULE_BUF_PAGE (module_buf_page) module is
5712	enabled. /*
5713	static
5714	void
5715	buf_page_monitor(
5716	/=============/
5717	const buf_page_t* bpage, /!< in: pointer to the block /
5718	enum buf_io_fix io_type)/!< in: io_fix types /
5719	{
5720	const byte* frame;
5721	monitor_id_t counter;
5722
5723	/ If the counter module is not turned on, just return /
5724	if (!MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)) {
5725	return;
5726	}
5727
5728	ut_a(io_type == BUF_IO_READ \|\| io_type == BUF_IO_WRITE);
5729
5730	frame = bpage->zip.data
5731	? bpage->zip.data
5732	: ((buf_block_t*) bpage)->frame;
5733
5734	switch (fil_page_get_type(frame)) {
5735	ulint level;
5736	case FIL_PAGE_TYPE_INSTANT:
5737	case FIL_PAGE_INDEX:
5738	case FIL_PAGE_RTREE:
5739	level = btr_page_get_level(frame);
5740
5741	/ Check if it is an index page for insert buffer /
5742	if (fil_page_get_type(frame) == FIL_PAGE_INDEX
5743	&& btr_page_get_index_id(frame)
5744	== (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
5745	if (level == `0`) {
5746	counter = MONITOR_RW_COUNTER(
5747	io_type, MONITOR_INDEX_IBUF_LEAF_PAGE);
5748	} else {
5749	counter = MONITOR_RW_COUNTER(
5750	io_type,
5751	MONITOR_INDEX_IBUF_NON_LEAF_PAGE);
5752	}
5753	} else {
5754	if (level == `0`) {
5755	counter = MONITOR_RW_COUNTER(
5756	io_type, MONITOR_INDEX_LEAF_PAGE);
5757	} else {
5758	counter = MONITOR_RW_COUNTER(
5759	io_type, MONITOR_INDEX_NON_LEAF_PAGE);
5760	}
5761	}
5762	break;
5763
5764	case FIL_PAGE_UNDO_LOG:
5765	counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
5766	break;
5767
5768	case FIL_PAGE_INODE:
5769	counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
5770	break;
5771
5772	case FIL_PAGE_IBUF_FREE_LIST:
5773	counter = MONITOR_RW_COUNTER(io_type,
5774	MONITOR_IBUF_FREELIST_PAGE);
5775	break;
5776
5777	case FIL_PAGE_IBUF_BITMAP:
5778	counter = MONITOR_RW_COUNTER(io_type,
5779	MONITOR_IBUF_BITMAP_PAGE);
5780	break;
5781
5782	case FIL_PAGE_TYPE_SYS:
5783	counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
5784	break;
5785
5786	case FIL_PAGE_TYPE_TRX_SYS:
5787	counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
5788	break;
5789
5790	case FIL_PAGE_TYPE_FSP_HDR:
5791	counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
5792	break;
5793
5794	case FIL_PAGE_TYPE_XDES:
5795	counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
5796	break;
5797
5798	case FIL_PAGE_TYPE_BLOB:
5799	counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
5800	break;
5801
5802	case FIL_PAGE_TYPE_ZBLOB:
5803	counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
5804	break;
5805
5806	case FIL_PAGE_TYPE_ZBLOB2:
5807	counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
5808	break;
5809
5810	default:
5811	counter = MONITOR_RW_COUNTER(io_type, MONITOR_OTHER_PAGE);
5812	}
5813
5814	MONITOR_INC_NOCHECK(counter);
5815	}
5816
5817	/* Mark a table corrupted.*
5818	Also remove the bpage from LRU list. /*
5819	static
5820	void
5821	buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t* space)
5822	{
5823	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
5824	const ibool uncompressed = (buf_page_get_state(bpage)
5825	== BUF_BLOCK_FILE_PAGE);
5826
5827	/ First unfix and release lock on the bpage /
5828	buf_pool_mutex_enter(buf_pool);
5829	mutex_enter(buf_page_get_mutex(bpage));
5830	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
5831	ut_ad(bpage->buf_fix_count == `0`);
5832	ut_ad(bpage->id.space() == space->id);
5833
5834	/ Set BUF_IO_NONE before we remove the block from LRU list /
5835	buf_page_set_io_fix(bpage, BUF_IO_NONE);
5836
5837	if (uncompressed) {
5838	rw_lock_x_unlock_gen(
5839	&((buf_block_t*) bpage)->lock,
5840	BUF_IO_READ);
5841	}
5842
5843	mutex_exit(buf_page_get_mutex(bpage));
5844
5845	/ If block is not encrypted find the table with specified*
5846	space id, and mark it corrupted. Encrypted tables
5847	are marked unusable later e.g. in ::open(). /*
5848	if (!bpage->encrypted) {
5849	dict_set_corrupted_by_space(space);
5850	} else {
5851	dict_set_encrypted_by_space(space);
5852	}
5853
5854	/ After this point bpage can't be referenced. /
5855	buf_LRU_free_one_page(bpage);
5856
5857	ut_ad(buf_pool->n_pend_reads > `0`);
5858	buf_pool->n_pend_reads--;
5859
5860	buf_pool_mutex_exit(buf_pool);
5861	}
5862
5863	/* Check if page is maybe compressed, encrypted or both when we encounter*
5864	corrupted page. Note that we can't be 100% sure if page is corrupted
5865	or decrypt/decompress just failed.
5866	@param[in,out] bpage page
5867	@param[in,out] space tablespace from fil_space_acquire_for_io()
5868	@return whether the operation succeeded
5869	@retval DB_SUCCESS if page has been read and is not corrupted
5870	@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
5871	@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
5872	after decryption normal page checksum does not match.
5873	@retval DB_TABLESPACE_DELETED if accessed tablespace is not found /*
5874	static
5875	dberr_t
5876	buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
5877	{
5878	ut_ad(space->pending_io());
5879
5880	byte* dst_frame = (bpage->zip.data) ? bpage->zip.data :
5881	((buf_block_t*) bpage)->frame;
5882	bool still_encrypted = false;
5883	dberr_t err = DB_SUCCESS;
5884	bool corrupted = false;
5885	fil_space_crypt_t* crypt_data = space->crypt_data;
5886
5887	/ In buf_decrypt_after_read we have either decrypted the page if*
5888	page post encryption checksum matches and used key_id is found
5889	from the encryption plugin. If checksum did not match page was
5890	not decrypted and it could be either encrypted and corrupted
5891	or corrupted or good page. If we decrypted, there page could
5892	still be corrupted if used key does not match. /*
5893	still_encrypted = crypt_data
5894	&& crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
5895	&& !bpage->encrypted
5896	&& fil_space_verify_crypt_checksum(
5897	dst_frame, bpage->size,
5898	bpage->id.space(), bpage->id.page_no());
5899
5900	if (!still_encrypted) {
5901	/ If traditional checksums match, we assume that page is*
5902	not anymore encrypted. /*
5903	corrupted = buf_page_is_corrupted(
5904	true, dst_frame, bpage->size, space);
5905
5906	if (!corrupted) {
5907	bpage->encrypted = false;
5908	} else {
5909	err = DB_PAGE_CORRUPTED;
5910	}
5911	}
5912
5913	/ Pages that we think are unencrypted but do not match the checksum*
5914	checks could be corrupted or encrypted or both. /*
5915	if (corrupted && !bpage->encrypted) {
5916	/ An error will be reported by*
5917	buf_page_io_complete(). /*
5918	} else if (still_encrypted \|\| (bpage->encrypted && corrupted)) {
5919	bpage->encrypted = true;
5920	err = DB_DECRYPTION_FAILED;
5921
5922	ib::error ()
5923	<< "The page " << bpage->id << " in file '"
5924	<< space->chain.start->name
5925	<< "' cannot be decrypted.";
5926
5927	ib::info ()
5928	<< "However key management plugin or used key_version "
5929	<< mach_read_from_4(dst_frame
5930	+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
5931	<< " is not found or"
5932	" used encryption algorithm or method does not match.";
5933
5934	if (bpage->id.space() != TRX_SYS_SPACE) {
5935	ib::info ()
5936	<< "Marking tablespace as missing."
5937	" You may drop this table or"
5938	" install correct key management plugin"
5939	" and key file.";
5940	}
5941	}
5942
5943	return (err);
5944	}
5945
5946	/* Complete a read or write request of a file page to or from the buffer pool.*
5947	@param[in,out] bpage page to complete
5948	@param[in] dblwr whether the doublewrite buffer was used (on write)
5949	@param[in] evict whether or not to evict the page from LRU list
5950	@return whether the operation succeeded
5951	@retval DB_SUCCESS always when writing, or if a read page was OK
5952	@retval DB_TABLESPACE_DELETED if the tablespace does not exist
5953	@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
5954	@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
5955	after decryption normal page checksum does
5956	not match /*
5957	UNIV_INTERN
5958	dberr_t
5959	buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
5960	{
5961	enum buf_io_fix io_type;
5962	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
5963	const bool uncompressed = (buf_page_get_state(bpage)
5964	== BUF_BLOCK_FILE_PAGE);
5965	ut_a(buf_page_in_file(bpage));
5966
5967	/ We do not need protect io_fix here by mutex to read*
5968	it because this is the only function where we can change the value
5969	from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
5970	ensures that this is the only thread that handles the i/o for this
5971	block. /*
5972
5973	io_type = buf_page_get_io_fix(bpage);
5974	ut_ad(io_type == BUF_IO_READ \|\| io_type == BUF_IO_WRITE);
5975	ut_ad(bpage->size.is_compressed() == (bpage->zip.data != NULL));
5976	ut_ad(uncompressed \|\| bpage->zip.data);
5977
5978	if (io_type == BUF_IO_READ) {
5979	ulint read_page_no = `0`;
5980	ulint read_space_id = `0`;
5981	uint key_version = `0`;
5982
5983	ut_ad(bpage->zip.data != NULL \|\| ((buf_block_t*)bpage)->frame != NULL);
5984	fil_space_t* space = fil_space_acquire_for_io(
5985	bpage->id.space());
5986	if (!space) {
5987	return DB_TABLESPACE_DELETED;
5988	}
5989
5990	buf_page_decrypt_after_read(bpage, space);
5991
5992	byte* frame = bpage->zip.data
5993	? bpage->zip.data
5994	: reinterpret_cast<buf_block_t*>(bpage)->frame;
5995	dberr_t err;
5996
5997	if (bpage->zip.data && uncompressed) {
5998	my_atomic_addlint(&buf_pool->n_pend_unzip, `1`);
5999	ibool ok = buf_zip_decompress((buf_block_t*) bpage,
6000	FALSE);
6001	my_atomic_addlint(&buf_pool->n_pend_unzip, ulint(-`1`));
6002
6003	if (!ok) {
6004	ib::info () << "Page "
6005	<< bpage->id
6006	<< " zip_decompress failure.";
6007
6008	err = DB_PAGE_CORRUPTED;
6009	goto database_corrupted;
6010	}
6011	}
6012
6013	/ If this page is not uninitialized and not in the*
6014	doublewrite buffer, then the page number and space id
6015	should be the same as in block. /*
6016	read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
6017	read_space_id = mach_read_from_4(
6018	frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
6019	key_version = mach_read_from_4(
6020	frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
6021
6022	if (bpage->id.space() == TRX_SYS_SPACE
6023	&& buf_dblwr_page_inside(bpage->id.page_no())) {
6024
6025	ib::error () << "Reading page " << bpage->id
6026	<< ", which is in the doublewrite buffer!";
6027
6028	} else if (read_space_id == `0` && read_page_no == `0`) {
6029	/ This is likely an uninitialized page. /
6030	} else if ((bpage->id.space() != TRX_SYS_SPACE
6031	&& bpage->id.space() != read_space_id)
6032	\|\| bpage->id.page_no() != read_page_no) {
6033	/ We did not compare space_id to read_space_id*
6034	in the system tablespace, because the field
6035	was written as garbage before MySQL 4.1.1,
6036	which did not support innodb_file_per_table. /*
6037
6038	ib::error () << "Space id and page no stored in "
6039	"the page, read in are "
6040	<< page_id_t (read_space_id, read_page_no)
6041	<< ", should be " << bpage->id;
6042	}
6043
6044	err = buf_page_check_corrupt(bpage, space);
6045
6046	database_corrupted:
6047
6048	if (err != DB_SUCCESS) {
6049	/ Not a real corruption if it was triggered by*
6050	error injection /*
6051	DBUG_EXECUTE_IF(
6052	"buf_page_import_corrupt_failure",
6053	if (!is_predefined_tablespace(
6054	bpage->id.space())) {
6055	buf_mark_space_corrupt(bpage, space);
6056	ib::info() << "Simulated IMPORT "
6057	"corruption";
6058	space->release_for_io();
6059	return(err);
6060	}
6061	err = DB_SUCCESS;
6062	goto page_not_corrupt;
6063	);
6064
6065	if (err == DB_PAGE_CORRUPTED) {
6066	ib::error ()
6067	<< "Database page corruption on disk"
6068	" or a failed file read of tablespace "
6069	<< space->name << " page " << bpage->id
6070	<< ". You may have to recover from "
6071	<< "a backup.";
6072
6073	buf_page_print(frame, bpage->size);
6074
6075	ib::info ()
6076	<< "It is also possible that your"
6077	" operating system has corrupted"
6078	" its own file cache and rebooting"
6079	" your computer removes the error."
6080	" If the corrupt page is an index page."
6081	" You can also try to fix the"
6082	" corruption by dumping, dropping,"
6083	" and reimporting the corrupt table."
6084	" You can use CHECK TABLE to scan"
6085	" your table for corruption. "
6086	<< FORCE_RECOVERY_MSG;
6087	}
6088
6089	if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
6090
6091	/ If page space id is larger than TRX_SYS_SPACE*
6092	(0), we will attempt to mark the corresponding
6093	table as corrupted instead of crashing server /*
6094	if (bpage->id.space() == TRX_SYS_SPACE) {
6095	ib::fatal () << "Aborting because of"
6096	" a corrupt database page.";
6097	}
6098
6099	buf_mark_space_corrupt(bpage, space);
6100	space->release_for_io();
6101	return(err);
6102	}
6103	}
6104
6105	DBUG_EXECUTE_IF("buf_page_import_corrupt_failure",
6106	page_not_corrupt: bpage = bpage; );
6107
6108	if (recv_recovery_is_on()) {
6109	/ Pages must be uncompressed for crash recovery. /
6110	ut_a(uncompressed);
6111	recv_recover_page(TRUE, (buf_block_t*) bpage);
6112	}
6113
6114	/ If space is being truncated then avoid ibuf operation.*
6115	During re-init we have already freed ibuf entries. /*
6116	if (uncompressed
6117	&& !recv_no_ibuf_operations
6118	&& (bpage->id.space() == `0`
6119	\|\| !is_predefined_tablespace(bpage->id.space()))
6120	&& !srv_is_tablespace_truncated(bpage->id.space())
6121	&& fil_page_get_type(frame) == FIL_PAGE_INDEX
6122	&& page_is_leaf(frame)) {
6123
6124	if (bpage->encrypted) {
6125	ib::warn ()
6126	<< "Table in tablespace "
6127	<< bpage->id.space()
6128	<< " encrypted. However key "
6129	"management plugin or used "
6130	<< "key_version " << key_version
6131	<< "is not found or"
6132	" used encryption algorithm or method does not match."
6133	" Can't continue opening the table.";
6134	} else {
6135
6136	ibuf_merge_or_delete_for_page(
6137	(buf_block_t*) bpage, bpage->id,
6138	&bpage->size, TRUE);
6139	}
6140
6141	}
6142
6143	space->release_for_io();
6144	} else {
6145	/ io_type == BUF_IO_WRITE /
6146	if (bpage->slot) {
6147	/ Mark slot free /
6148	bpage->slot->reserved = false;
6149	bpage->slot = NULL;
6150	}
6151	}
6152
6153	BPageMutex* block_mutex = buf_page_get_mutex(bpage);
6154	buf_pool_mutex_enter(buf_pool);
6155	mutex_enter(block_mutex);
6156
6157	#ifdef UNIV_IBUF_COUNT_DEBUG
6158	if (io_type == BUF_IO_WRITE \|\| uncompressed) {
6159	/ For BUF_IO_READ of compressed-only blocks, the*
6160	buffered operations will be merged by buf_page_get_gen()
6161	after the block has been uncompressed. /*
6162	ut_a(ibuf_count_get(bpage->id) == `0`);
6163	}
6164	#endif
6165	/ Because this thread which does the unlocking is not the same that*
6166	did the locking, we use a pass value != 0 in unlock, which simply
6167	removes the newest lock debug record, without checking the thread
6168	id. /*
6169
6170	buf_page_set_io_fix(bpage, BUF_IO_NONE);
6171	buf_page_monitor(bpage, io_type);
6172
6173	if (io_type == BUF_IO_READ) {
6174	/ NOTE that the call to ibuf may have moved the ownership of*
6175	the x-latch to this OS thread: do not let this confuse you in
6176	debugging! /*
6177
6178	ut_ad(buf_pool->n_pend_reads > `0`);
6179	buf_pool->n_pend_reads--;
6180	buf_pool->stat.n_pages_read++;
6181
6182	if (uncompressed) {
6183	rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
6184	BUF_IO_READ);
6185	}
6186
6187	mutex_exit(block_mutex);
6188	} else {
6189	/ Write means a flush operation: call the completion*
6190	routine in the flush system /*
6191
6192	buf_flush_write_complete(bpage, dblwr);
6193
6194	if (uncompressed) {
6195	rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
6196	BUF_IO_WRITE);
6197	}
6198
6199	buf_pool->stat.n_pages_written++;
6200
6201	/ We decide whether or not to evict the page from the*
6202	LRU list based on the flush_type.
6203	* BUF_FLUSH_LIST: don't evict
6204	* BUF_FLUSH_LRU: always evict
6205	* BUF_FLUSH_SINGLE_PAGE: eviction preference is passed
6206	by the caller explicitly. /*
6207	if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
6208	evict = true;
6209	}
6210
6211	mutex_exit(block_mutex);
6212
6213	if (evict) {
6214	buf_LRU_free_page(bpage, true);
6215	}
6216	}
6217
6218	DBUG_PRINT("ib_buf", ("%s page %u:%u",
6219	io_type == BUF_IO_READ ? "read" : "wrote",
6220	bpage->id.space(), bpage->id.page_no()));
6221
6222	buf_pool_mutex_exit(buf_pool);
6223
6224	return DB_SUCCESS;
6225	}
6226
6227	/*******************************************************************//**
6228	Asserts that all file pages in the buffer are in a replaceable state.
6229	@return TRUE /*
6230	static
6231	ibool
6232	buf_all_freed_instance(
6233	/===================/
6234	buf_pool_t* buf_pool) /!< in: buffer pool instancce /
6235	{
6236	ulint i;
6237	buf_chunk_t* chunk;
6238
6239	ut_ad(buf_pool);
6240
6241	buf_pool_mutex_enter(buf_pool);
6242
6243	chunk = buf_pool->chunks;
6244
6245	for (i = buf_pool->n_chunks; i--; chunk++) {
6246
6247	if (const buf_block_t* block = buf_chunk_not_freed(chunk)) {
6248	ib::fatal () << "Page " << block->page.id
6249	<< " still fixed or dirty";
6250	}
6251	}
6252
6253	buf_pool_mutex_exit(buf_pool);
6254
6255	return(TRUE);
6256	}
6257
6258	/* Refreshes the statistics used to print per-second averages.*
6259	@param[in,out] buf_pool buffer pool instance /*
6260	static
6261	void
6262	buf_refresh_io_stats(
6263	buf_pool_t* buf_pool)
6264	{
6265	buf_pool->last_printout_time = ut_time();
6266	buf_pool->old_stat = buf_pool->stat;
6267	}
6268
6269	/*******************************************************************//**
6270	Invalidates file pages in one buffer pool instance /*
6271	static
6272	void
6273	buf_pool_invalidate_instance(
6274	/=========================/
6275	buf_pool_t* buf_pool) /!< in: buffer pool instance /
6276	{
6277	ulint i;
6278
6279	buf_pool_mutex_enter(buf_pool);
6280
6281	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
6282
6283	/ As this function is called during startup and*
6284	during redo application phase during recovery, InnoDB
6285	is single threaded (apart from IO helper threads) at
6286	this stage. No new write batch can be in intialization
6287	stage at this point. /*
6288	ut_ad(buf_pool->init_flush[i] == FALSE);
6289
6290	/ However, it is possible that a write batch that has*
6291	been posted earlier is still not complete. For buffer
6292	pool invalidation to proceed we must ensure there is NO
6293	write activity happening. /*
6294	if (buf_pool->n_flush[i] > `0`) {
6295	buf_flush_t type = static_cast<buf_flush_t>(i);
6296
6297	buf_pool_mutex_exit(buf_pool);
6298	buf_flush_wait_batch_end(buf_pool, type);
6299	buf_pool_mutex_enter(buf_pool);
6300	}
6301	}
6302
6303	buf_pool_mutex_exit(buf_pool);
6304
6305	ut_ad(buf_all_freed_instance(buf_pool));
6306
6307	buf_pool_mutex_enter(buf_pool);
6308
6309	while (buf_LRU_scan_and_free_block(buf_pool, true)) {
6310	}
6311
6312	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == `0`);
6313	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == `0`);
6314
6315	buf_pool->freed_page_clock = `0`;
6316	buf_pool->LRU_old = NULL;
6317	buf_pool->LRU_old_len = `0`;
6318
6319	memset(&buf_pool->stat, `0x00`, sizeof(buf_pool->stat));
6320	buf_refresh_io_stats(buf_pool);
6321
6322	buf_pool_mutex_exit(buf_pool);
6323	}
6324
6325	/*******************************************************************//**
6326	Invalidates the file pages in the buffer pool when an archive recovery is
6327	completed. All the file pages buffered must be in a replaceable state when
6328	this function is called: not latched and not modified. /*
6329	void
6330	buf_pool_invalidate(void)
6331	/=====================/
6332	{
6333	ulint i;
6334
6335	for (i = `0`; i < srv_buf_pool_instances; i++) {
6336	buf_pool_invalidate_instance(buf_pool_from_array(i));
6337	}
6338	}
6339
6340	#if defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
6341	/*******************************************************************//**
6342	Validates data in one buffer pool instance
6343	@return TRUE /*
6344	static
6345	ibool
6346	buf_pool_validate_instance(
6347	/=======================/
6348	buf_pool_t* buf_pool) /!< in: buffer pool instance /
6349	{
6350	buf_page_t* b;
6351	buf_chunk_t* chunk;
6352	ulint i;
6353	ulint n_lru_flush = `0`;
6354	ulint n_page_flush = `0`;
6355	ulint n_list_flush = `0`;
6356	ulint n_lru = `0`;
6357	ulint n_flush = `0`;
6358	ulint n_free = `0`;
6359	ulint n_zip = `0`;
6360
6361	ut_ad(buf_pool);
6362
6363	buf_pool_mutex_enter(buf_pool);
6364	hash_lock_x_all(buf_pool->page_hash);
6365
6366	chunk = buf_pool->chunks;
6367
6368	/ Check the uncompressed blocks. /
6369
6370	for (i = buf_pool->n_chunks; i--; chunk++) {
6371
6372	ulint j;
6373	buf_block_t* block = chunk->blocks;
6374
6375	for (j = chunk->size; j--; block++) {
6376
6377	buf_page_mutex_enter(block);
6378
6379	switch (buf_block_get_state(block)) {
6380	case BUF_BLOCK_POOL_WATCH:
6381	case BUF_BLOCK_ZIP_PAGE:
6382	case BUF_BLOCK_ZIP_DIRTY:
6383	/ These should only occur on*
6384	zip_clean, zip_free[], or flush_list. /*
6385	ut_error;
6386	break;
6387
6388	case BUF_BLOCK_FILE_PAGE:
6389	ut_a(buf_page_hash_get_low(
6390	buf_pool, block->page.id)
6391	== &block->page);
6392
6393	#ifdef UNIV_IBUF_COUNT_DEBUG
6394	ut_a(buf_page_get_io_fix(&block->page)
6395	== BUF_IO_READ
6396	\|\| !ibuf_count_get(block->page.id));
6397	#endif
6398	switch (buf_page_get_io_fix(&block->page)) {
6399	case BUF_IO_NONE:
6400	break;
6401
6402	case BUF_IO_WRITE:
6403	switch (buf_page_get_flush_type(
6404	&block->page)) {
6405	case BUF_FLUSH_LRU:
6406	n_lru_flush++;
6407	goto assert_s_latched;
6408	case BUF_FLUSH_SINGLE_PAGE:
6409	n_page_flush++;
6410	assert_s_latched:
6411	ut_a(rw_lock_is_locked(
6412	&block->lock,
6413	RW_LOCK_S)
6414	\|\| rw_lock_is_locked(
6415	&block->lock,
6416	RW_LOCK_SX));
6417	break;
6418	case BUF_FLUSH_LIST:
6419	n_list_flush++;
6420	break;
6421	default:
6422	ut_error;
6423	}
6424
6425	break;
6426
6427	case BUF_IO_READ:
6428
6429	ut_a(rw_lock_is_locked(&block->lock,
6430	RW_LOCK_X));
6431	break;
6432
6433	case BUF_IO_PIN:
6434	break;
6435	}
6436
6437	n_lru++;
6438	break;
6439
6440	case BUF_BLOCK_NOT_USED:
6441	n_free++;
6442	break;
6443
6444	case BUF_BLOCK_READY_FOR_USE:
6445	case BUF_BLOCK_MEMORY:
6446	case BUF_BLOCK_REMOVE_HASH:
6447	/ do nothing /
6448	break;
6449	}
6450
6451	buf_page_mutex_exit(block);
6452	}
6453	}
6454
6455	mutex_enter(&buf_pool->zip_mutex);
6456
6457	/ Check clean compressed-only blocks. /
6458
6459	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
6460	b = UT_LIST_GET_NEXT(list, b)) {
6461	ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
6462	switch (buf_page_get_io_fix(b)) {
6463	case BUF_IO_NONE:
6464	case BUF_IO_PIN:
6465	/ All clean blocks should be I/O-unfixed. /
6466	break;
6467	case BUF_IO_READ:
6468	/ In buf_LRU_free_page(), we temporarily set*
6469	b->io_fix = BUF_IO_READ for a newly allocated
6470	control block in order to prevent
6471	buf_page_get_gen() from decompressing the block. /*
6472	break;
6473	default:
6474	ut_error;
6475	break;
6476	}
6477
6478	/ It is OK to read oldest_modification here because*
6479	we have acquired buf_pool->zip_mutex above which acts
6480	as the 'block->mutex' for these bpages. /*
6481	ut_a(!b->oldest_modification);
6482	ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
6483	n_lru++;
6484	n_zip++;
6485	}
6486
6487	/ Check dirty blocks. /
6488
6489	buf_flush_list_mutex_enter(buf_pool);
6490	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
6491	b = UT_LIST_GET_NEXT(list, b)) {
6492	ut_ad(b->in_flush_list);
6493	ut_a(b->oldest_modification);
6494	n_flush++;
6495
6496	switch (buf_page_get_state(b)) {
6497	case BUF_BLOCK_ZIP_DIRTY:
6498	n_lru++;
6499	n_zip++;
6500	switch (buf_page_get_io_fix(b)) {
6501	case BUF_IO_NONE:
6502	case BUF_IO_READ:
6503	case BUF_IO_PIN:
6504	break;
6505	case BUF_IO_WRITE:
6506	switch (buf_page_get_flush_type(b)) {
6507	case BUF_FLUSH_LRU:
6508	n_lru_flush++;
6509	break;
6510	case BUF_FLUSH_SINGLE_PAGE:
6511	n_page_flush++;
6512	break;
6513	case BUF_FLUSH_LIST:
6514	n_list_flush++;
6515	break;
6516	default:
6517	ut_error;
6518	}
6519	break;
6520	}
6521	break;
6522	case BUF_BLOCK_FILE_PAGE:
6523	/ uncompressed page /
6524	break;
6525	case BUF_BLOCK_POOL_WATCH:
6526	case BUF_BLOCK_ZIP_PAGE:
6527	case BUF_BLOCK_NOT_USED:
6528	case BUF_BLOCK_READY_FOR_USE:
6529	case BUF_BLOCK_MEMORY:
6530	case BUF_BLOCK_REMOVE_HASH:
6531	ut_error;
6532	break;
6533	}
6534	ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
6535	}
6536
6537	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
6538
6539	hash_unlock_x_all(buf_pool->page_hash);
6540	buf_flush_list_mutex_exit(buf_pool);
6541
6542	mutex_exit(&buf_pool->zip_mutex);
6543
6544	if (buf_pool->curr_size == buf_pool->old_size
6545	&& n_lru + n_free > buf_pool->curr_size + n_zip) {
6546
6547	ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
6548	<< ", pool " << buf_pool->curr_size
6549	<< " zip " << n_zip << ". Aborting...";
6550	}
6551
6552	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
6553	if (buf_pool->curr_size == buf_pool->old_size
6554	&& UT_LIST_GET_LEN(buf_pool->free) != n_free) {
6555
6556	ib::fatal() << "Free list len "
6557	<< UT_LIST_GET_LEN(buf_pool->free)
6558	<< ", free blocks " << n_free << ". Aborting...";
6559	}
6560
6561	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
6562	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
6563	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
6564
6565	buf_pool_mutex_exit(buf_pool);
6566
6567	ut_a(buf_LRU_validate());
6568	ut_a(buf_flush_validate(buf_pool));
6569
6570	return(TRUE);
6571	}
6572
6573	/*******************************************************************//**
6574	Validates the buffer buf_pool data structure.
6575	@return TRUE /*
6576	ibool
6577	buf_validate(void)
6578	/==============/
6579	{
6580	ulint i;
6581
6582	for (i = `0`; i < srv_buf_pool_instances; i++) {
6583	buf_pool_t* buf_pool;
6584
6585	buf_pool = buf_pool_from_array(i);
6586
6587	buf_pool_validate_instance(buf_pool);
6588	}
6589	return(TRUE);
6590	}
6591
6592	#endif /* UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
6593
6594	#if defined UNIV_DEBUG_PRINT \|\| defined UNIV_DEBUG \|\| defined UNIV_BUF_DEBUG
6595	/*******************************************************************//**
6596	Prints info of the buffer buf_pool data structure for one instance. /*
6597	static
6598	void
6599	buf_print_instance(
6600	/===============/
6601	buf_pool_t* buf_pool)
6602	{
6603	index_id_t* index_ids;
6604	ulint* counts;
6605	ulint size;
6606	ulint i;
6607	ulint j;
6608	index_id_t id;
6609	ulint n_found;
6610	buf_chunk_t* chunk;
6611	dict_index_t* index;
6612
6613	ut_ad(buf_pool);
6614
6615	size = buf_pool->curr_size;
6616
6617	index_ids = static_cast<index_id_t*>(
6618	ut_malloc_nokey(size * sizeof *index_ids));
6619
6620	counts = static_cast<ulint>(ut_malloc_nokey(sizeof(ulint) size));
6621
6622	buf_pool_mutex_enter(buf_pool);
6623	buf_flush_list_mutex_enter(buf_pool);
6624
6625	ib::info() << *buf_pool;
6626
6627	buf_flush_list_mutex_exit(buf_pool);
6628
6629	/ Count the number of blocks belonging to each index in the buffer /
6630
6631	n_found = `0`;
6632
6633	chunk = buf_pool->chunks;
6634
6635	for (i = buf_pool->n_chunks; i--; chunk++) {
6636	buf_block_t* block = chunk->blocks;
6637	ulint n_blocks = chunk->size;
6638
6639	for (; n_blocks--; block++) {
6640	const buf_frame_t* frame = block->frame;
6641
6642	if (fil_page_index_page_check(frame)) {
6643
6644	id = btr_page_get_index_id(frame);
6645
6646	/ Look for the id in the index_ids array /
6647	j = `0`;
6648
6649	while (j < n_found) {
6650
6651	if (index_ids[j] == id) {
6652	counts[j]++;
6653
6654	break;
6655	}
6656	j++;
6657	}
6658
6659	if (j == n_found) {
6660	n_found++;
6661	index_ids[j] = id;
6662	counts[j] = `1`;
6663	}
6664	}
6665	}
6666	}
6667
6668	buf_pool_mutex_exit(buf_pool);
6669
6670	for (i = `0`; i < n_found; i++) {
6671	index = dict_index_get_if_in_cache(index_ids[i]);
6672
6673	if (!index) {
6674	ib::info() << "Block count for index "
6675	<< index_ids[i] << " in buffer is about "
6676	<< counts[i];
6677	} else {
6678	ib::info() << "Block count for index " << index_ids[i]
6679	<< " in buffer is about " << counts[i]
6680	<< ", index " << index->name
6681	<< " of table " << index->table->name;
6682	}
6683	}
6684
6685	ut_free(index_ids);
6686	ut_free(counts);
6687
6688	ut_a(buf_pool_validate_instance(buf_pool));
6689	}
6690
6691	/*******************************************************************//**
6692	Prints info of the buffer buf_pool data structure. /*
6693	void
6694	buf_print(void)
6695	/===========/
6696	{
6697	ulint i;
6698
6699	for (i = `0`; i < srv_buf_pool_instances; i++) {
6700	buf_pool_t* buf_pool;
6701
6702	buf_pool = buf_pool_from_array(i);
6703	buf_print_instance(buf_pool);
6704	}
6705	}
6706	#endif /* UNIV_DEBUG_PRINT \|\| UNIV_DEBUG \|\| UNIV_BUF_DEBUG */
6707
6708	#ifdef UNIV_DEBUG
6709	/*******************************************************************//**
6710	Returns the number of latched pages in the buffer pool.
6711	@return number of latched pages /*
6712	static
6713	ulint
6714	buf_get_latched_pages_number_instance(
6715	/==================================/
6716	buf_pool_t* buf_pool) /!< in: buffer pool instance /
6717	{
6718	buf_page_t* b;
6719	ulint i;
6720	buf_chunk_t* chunk;
6721	ulint fixed_pages_number = `0`;
6722
6723	buf_pool_mutex_enter(buf_pool);
6724
6725	chunk = buf_pool->chunks;
6726
6727	for (i = buf_pool->n_chunks; i--; chunk++) {
6728	buf_block_t* block;
6729	ulint j;
6730
6731	block = chunk->blocks;
6732
6733	for (j = chunk->size; j--; block++) {
6734	if (buf_block_get_state(block)
6735	!= BUF_BLOCK_FILE_PAGE) {
6736
6737	continue;
6738	}
6739
6740	buf_page_mutex_enter(block);
6741
6742	if (block->page.buf_fix_count != `0`
6743	\|\| buf_page_get_io_fix(&block->page)
6744	!= BUF_IO_NONE) {
6745	fixed_pages_number++;
6746	}
6747
6748	buf_page_mutex_exit(block);
6749	}
6750	}
6751
6752	mutex_enter(&buf_pool->zip_mutex);
6753
6754	/ Traverse the lists of clean and dirty compressed-only blocks. /
6755
6756	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
6757	b = UT_LIST_GET_NEXT(list, b)) {
6758	ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
6759	ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
6760
6761	if (b->buf_fix_count != `0`
6762	\|\| buf_page_get_io_fix(b) != BUF_IO_NONE) {
6763	fixed_pages_number++;
6764	}
6765	}
6766
6767	buf_flush_list_mutex_enter(buf_pool);
6768	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
6769	b = UT_LIST_GET_NEXT(list, b)) {
6770	ut_ad(b->in_flush_list);
6771
6772	switch (buf_page_get_state(b)) {
6773	case BUF_BLOCK_ZIP_DIRTY:
6774	if (b->buf_fix_count != `0`
6775	\|\| buf_page_get_io_fix(b) != BUF_IO_NONE) {
6776	fixed_pages_number++;
6777	}
6778	break;
6779	case BUF_BLOCK_FILE_PAGE:
6780	/ uncompressed page /
6781	break;
6782	case BUF_BLOCK_POOL_WATCH:
6783	case BUF_BLOCK_ZIP_PAGE:
6784	case BUF_BLOCK_NOT_USED:
6785	case BUF_BLOCK_READY_FOR_USE:
6786	case BUF_BLOCK_MEMORY:
6787	case BUF_BLOCK_REMOVE_HASH:
6788	ut_error;
6789	break;
6790	}
6791	}
6792
6793	buf_flush_list_mutex_exit(buf_pool);
6794	mutex_exit(&buf_pool->zip_mutex);
6795	buf_pool_mutex_exit(buf_pool);
6796
6797	return(fixed_pages_number);
6798	}
6799
6800	/*******************************************************************//**
6801	Returns the number of latched pages in all the buffer pools.
6802	@return number of latched pages /*
6803	ulint
6804	buf_get_latched_pages_number(void)
6805	/==============================/
6806	{
6807	ulint i;
6808	ulint total_latched_pages = `0`;
6809
6810	for (i = `0`; i < srv_buf_pool_instances; i++) {
6811	buf_pool_t* buf_pool;
6812
6813	buf_pool = buf_pool_from_array(i);
6814
6815	total_latched_pages += buf_get_latched_pages_number_instance(
6816	buf_pool);
6817	}
6818
6819	return(total_latched_pages);
6820	}
6821
6822	#endif /* UNIV_DEBUG */
6823
6824	/*******************************************************************//**
6825	Returns the number of pending buf pool read ios.
6826	@return number of pending read I/O operations /*
6827	ulint
6828	buf_get_n_pending_read_ios(void)
6829	/============================/
6830	{
6831	ulint pend_ios = `0`;
6832
6833	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
6834	pend_ios += buf_pool_from_array(i)->n_pend_reads;
6835	}
6836
6837	return(pend_ios);
6838	}
6839
6840	/*******************************************************************//**
6841	Returns the ratio in percents of modified pages in the buffer pool /
6842	database pages in the buffer pool.
6843	@return modified page percentage ratio /*
6844	double
6845	buf_get_modified_ratio_pct(void)
6846	/============================/
6847	{
6848	double ratio;
6849	ulint lru_len = `0`;
6850	ulint free_len = `0`;
6851	ulint flush_list_len = `0`;
6852
6853	buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
6854
6855	ratio = static_cast<double>(`100` * flush_list_len)
6856	/ (`1` + lru_len + free_len);
6857
6858	/ 1 + is there to avoid division by zero /
6859
6860	return(ratio);
6861	}
6862
6863	/*****************************************************************//**
6864	Aggregates a pool stats information with the total buffer pool stats /*
6865	static
6866	void
6867	buf_stats_aggregate_pool_info(
6868	/==========================/
6869	buf_pool_info_t* total_info, /!< in/out: the buffer pool*
6870	info to store aggregated
6871	result /*
6872	const buf_pool_info_t* pool_info) /!< in: individual buffer pool*
6873	stats info /*
6874	{
6875	ut_a(total_info && pool_info);
6876
6877	/ Nothing to copy if total_info is the same as pool_info /
6878	if (total_info == pool_info) {
6879	return;
6880	}
6881
6882	total_info->pool_size += pool_info->pool_size;
6883	total_info->lru_len += pool_info->lru_len;
6884	total_info->old_lru_len += pool_info->old_lru_len;
6885	total_info->free_list_len += pool_info->free_list_len;
6886	total_info->flush_list_len += pool_info->flush_list_len;
6887	total_info->n_pend_unzip += pool_info->n_pend_unzip;
6888	total_info->n_pend_reads += pool_info->n_pend_reads;
6889	total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru;
6890	total_info->n_pending_flush_list += pool_info->n_pending_flush_list;
6891	total_info->n_pages_made_young += pool_info->n_pages_made_young;
6892	total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young;
6893	total_info->n_pages_read += pool_info->n_pages_read;
6894	total_info->n_pages_created += pool_info->n_pages_created;
6895	total_info->n_pages_written += pool_info->n_pages_written;
6896	total_info->n_page_gets += pool_info->n_page_gets;
6897	total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
6898	total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
6899	total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
6900	total_info->page_made_young_rate += pool_info->page_made_young_rate;
6901	total_info->page_not_made_young_rate +=
6902	pool_info->page_not_made_young_rate;
6903	total_info->pages_read_rate += pool_info->pages_read_rate;
6904	total_info->pages_created_rate += pool_info->pages_created_rate;
6905	total_info->pages_written_rate += pool_info->pages_written_rate;
6906	total_info->n_page_get_delta += pool_info->n_page_get_delta;
6907	total_info->page_read_delta += pool_info->page_read_delta;
6908	total_info->young_making_delta += pool_info->young_making_delta;
6909	total_info->not_young_making_delta += pool_info->not_young_making_delta;
6910	total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
6911	total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
6912	total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
6913	total_info->unzip_lru_len += pool_info->unzip_lru_len;
6914	total_info->io_sum += pool_info->io_sum;
6915	total_info->io_cur += pool_info->io_cur;
6916	total_info->unzip_sum += pool_info->unzip_sum;
6917	total_info->unzip_cur += pool_info->unzip_cur;
6918	}
6919	/*****************************************************************//**
6920	Collect buffer pool stats information for a buffer pool. Also
6921	record aggregated stats if there are more than one buffer pool
6922	in the server /*
6923	void
6924	buf_stats_get_pool_info(
6925	/====================/
6926	buf_pool_t* buf_pool, /!< in: buffer pool /
6927	ulint pool_id, /!< in: buffer pool ID /
6928	buf_pool_info_t* all_pool_info) /!< in/out: buffer pool info*
6929	to fill /*
6930	{
6931	buf_pool_info_t* pool_info;
6932	time_t current_time;
6933	double time_elapsed;
6934
6935	/ Find appropriate pool_info to store stats for this buffer pool /
6936	pool_info = &all_pool_info[pool_id];
6937
6938	buf_pool_mutex_enter(buf_pool);
6939	buf_flush_list_mutex_enter(buf_pool);
6940
6941	pool_info->pool_unique_id = pool_id;
6942
6943	pool_info->pool_size = buf_pool->curr_size;
6944
6945	pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
6946
6947	pool_info->old_lru_len = buf_pool->LRU_old_len;
6948
6949	pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free);
6950
6951	pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list);
6952
6953	pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
6954
6955	pool_info->n_pend_reads = buf_pool->n_pend_reads;
6956
6957	pool_info->n_pending_flush_lru =
6958	(buf_pool->n_flush[BUF_FLUSH_LRU]
6959	+ buf_pool->init_flush[BUF_FLUSH_LRU]);
6960
6961	pool_info->n_pending_flush_list =
6962	(buf_pool->n_flush[BUF_FLUSH_LIST]
6963	+ buf_pool->init_flush[BUF_FLUSH_LIST]);
6964
6965	pool_info->n_pending_flush_single_page =
6966	(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
6967	+ buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]);
6968
6969	buf_flush_list_mutex_exit(buf_pool);
6970
6971	current_time = time(NULL);
6972	time_elapsed = `0.001` + difftime(current_time,
6973	buf_pool->last_printout_time);
6974
6975	pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young;
6976
6977	pool_info->n_pages_not_made_young =
6978	buf_pool->stat.n_pages_not_made_young;
6979
6980	pool_info->n_pages_read = buf_pool->stat.n_pages_read;
6981
6982	pool_info->n_pages_created = buf_pool->stat.n_pages_created;
6983
6984	pool_info->n_pages_written = buf_pool->stat.n_pages_written;
6985
6986	pool_info->n_page_gets = buf_pool->stat.n_page_gets;
6987
6988	pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
6989	pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
6990
6991	pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
6992
6993	pool_info->page_made_young_rate =
6994	(buf_pool->stat.n_pages_made_young
6995	- buf_pool->old_stat.n_pages_made_young) / time_elapsed;
6996
6997	pool_info->page_not_made_young_rate =
6998	(buf_pool->stat.n_pages_not_made_young
6999	- buf_pool->old_stat.n_pages_not_made_young) / time_elapsed;
7000
7001	pool_info->pages_read_rate =
7002	(buf_pool->stat.n_pages_read
7003	- buf_pool->old_stat.n_pages_read) / time_elapsed;
7004
7005	pool_info->pages_created_rate =
7006	(buf_pool->stat.n_pages_created
7007	- buf_pool->old_stat.n_pages_created) / time_elapsed;
7008
7009	pool_info->pages_written_rate =
7010	(buf_pool->stat.n_pages_written
7011	- buf_pool->old_stat.n_pages_written) / time_elapsed;
7012
7013	pool_info->n_page_get_delta = buf_pool->stat.n_page_gets
7014	- buf_pool->old_stat.n_page_gets;
7015
7016	if (pool_info->n_page_get_delta) {
7017	pool_info->page_read_delta = buf_pool->stat.n_pages_read
7018	- buf_pool->old_stat.n_pages_read;
7019
7020	pool_info->young_making_delta =
7021	buf_pool->stat.n_pages_made_young
7022	- buf_pool->old_stat.n_pages_made_young;
7023
7024	pool_info->not_young_making_delta =
7025	buf_pool->stat.n_pages_not_made_young
7026	- buf_pool->old_stat.n_pages_not_made_young;
7027	}
7028	pool_info->pages_readahead_rnd_rate =
7029	(buf_pool->stat.n_ra_pages_read_rnd
7030	- buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
7031
7032
7033	pool_info->pages_readahead_rate =
7034	(buf_pool->stat.n_ra_pages_read
7035	- buf_pool->old_stat.n_ra_pages_read) / time_elapsed;
7036
7037	pool_info->pages_evicted_rate =
7038	(buf_pool->stat.n_ra_pages_evicted
7039	- buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed;
7040
7041	pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
7042
7043	pool_info->io_sum = buf_LRU_stat_sum.io;
7044
7045	pool_info->io_cur = buf_LRU_stat_cur.io;
7046
7047	pool_info->unzip_sum = buf_LRU_stat_sum.unzip;
7048
7049	pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
7050
7051	buf_refresh_io_stats(buf_pool);
7052	buf_pool_mutex_exit(buf_pool);
7053	}
7054
7055	/*******************************************************************//**
7056	Prints info of the buffer i/o. /*
7057	static
7058	void
7059	buf_print_io_instance(
7060	/==================/
7061	buf_pool_info_tpool_info, /!< in: buffer pool info /*
7062	FILE* file) /!< in/out: buffer where to print /
7063	{
7064	ut_ad(pool_info);
7065
7066	fprintf(file,
7067	"Buffer pool size " ULINTPF "\n"
7068	"Free buffers " ULINTPF "\n"
7069	"Database pages " ULINTPF "\n"
7070	"Old database pages " ULINTPF "\n"
7071	"Modified db pages " ULINTPF "\n"
7072	"Percent of dirty pages(LRU & free pages): %.3f\n"
7073	"Max dirty pages percent: %.3f\n"
7074	"Pending reads " ULINTPF "\n"
7075	"Pending writes: LRU " ULINTPF ", flush list " ULINTPF
7076	", single page " ULINTPF "\n",
7077	pool_info->pool_size,
7078	pool_info->free_list_len,
7079	pool_info->lru_len,
7080	pool_info->old_lru_len,
7081	pool_info->flush_list_len,
7082	(((double) pool_info->flush_list_len) /
7083	(pool_info->lru_len + pool_info->free_list_len + `1.0`)) * `100.0`,
7084	srv_max_buf_pool_modified_pct,
7085	pool_info->n_pend_reads,
7086	pool_info->n_pending_flush_lru,
7087	pool_info->n_pending_flush_list,
7088	pool_info->n_pending_flush_single_page);
7089
7090	fprintf(file,
7091	"Pages made young " ULINTPF ", not young " ULINTPF "\n"
7092	"%.2f youngs/s, %.2f non-youngs/s\n"
7093	"Pages read " ULINTPF ", created " ULINTPF
7094	", written " ULINTPF "\n"
7095	"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
7096	pool_info->n_pages_made_young,
7097	pool_info->n_pages_not_made_young,
7098	pool_info->page_made_young_rate,
7099	pool_info->page_not_made_young_rate,
7100	pool_info->n_pages_read,
7101	pool_info->n_pages_created,
7102	pool_info->n_pages_written,
7103	pool_info->pages_read_rate,
7104	pool_info->pages_created_rate,
7105	pool_info->pages_written_rate);
7106
7107	if (pool_info->n_page_get_delta) {
7108	double hit_rate = double(pool_info->page_read_delta)
7109	/ pool_info->n_page_get_delta;
7110
7111	if (hit_rate > `1`) {
7112	hit_rate = `1`;
7113	}
7114
7115	fprintf(file,
7116	"Buffer pool hit rate " ULINTPF " / 1000,"
7117	" young-making rate " ULINTPF " / 1000 not "
7118	ULINTPF " / 1000\n",
7119	ulint(`1000` * (`1` - hit_rate)),
7120	ulint(`1000` * double(pool_info->young_making_delta)
7121	/ pool_info->n_page_get_delta),
7122	ulint(`1000` * double(pool_info->not_young_making_delta)
7123	/ pool_info->n_page_get_delta));
7124	} else {
7125	fputs("No buffer pool page gets since the last printout\n",
7126	file);
7127	}
7128
7129	/ Statistics about read ahead algorithm /
7130	fprintf(file, "Pages read ahead %.2f/s,"
7131	" evicted without access %.2f/s,"
7132	" Random read ahead %.2f/s\n",
7133
7134	pool_info->pages_readahead_rate,
7135	pool_info->pages_evicted_rate,
7136	pool_info->pages_readahead_rnd_rate);
7137
7138	/ Print some values to help us with visualizing what is*
7139	happening with LRU eviction. /*
7140	fprintf(file,
7141	"LRU len: " ULINTPF ", unzip_LRU len: " ULINTPF "\n"
7142	"I/O sum[" ULINTPF "]:cur[" ULINTPF "], "
7143	"unzip sum[" ULINTPF "]:cur[" ULINTPF "]\n",
7144	pool_info->lru_len, pool_info->unzip_lru_len,
7145	pool_info->io_sum, pool_info->io_cur,
7146	pool_info->unzip_sum, pool_info->unzip_cur);
7147	}
7148
7149	/*******************************************************************//**
7150	Prints info of the buffer i/o. /*
7151	void
7152	buf_print_io(
7153	/=========/
7154	FILE* file) /!< in/out: buffer where to print /
7155	{
7156	ulint i;
7157	buf_pool_info_t* pool_info;
7158	buf_pool_info_t* pool_info_total;
7159
7160	/ If srv_buf_pool_instances is greater than 1, allocate*
7161	one extra buf_pool_info_t, the last one stores
7162	aggregated/total values from all pools /*
7163	if (srv_buf_pool_instances > `1`) {
7164	pool_info = (buf_pool_info_t*) ut_zalloc_nokey((
7165	srv_buf_pool_instances + `1`) * sizeof *pool_info);
7166
7167	pool_info_total = &pool_info[srv_buf_pool_instances];
7168	} else {
7169	ut_a(srv_buf_pool_instances == `1`);
7170
7171	pool_info_total = pool_info =
7172	static_cast<buf_pool_info_t*>(
7173	ut_zalloc_nokey(sizeof *pool_info));
7174	}
7175
7176	for (i = `0`; i < srv_buf_pool_instances; i++) {
7177	buf_pool_t* buf_pool;
7178
7179	buf_pool = buf_pool_from_array(i);
7180
7181	/ Fetch individual buffer pool info and calculate*
7182	aggregated stats along the way /*
7183	buf_stats_get_pool_info(buf_pool, i, pool_info);
7184
7185	/ If we have more than one buffer pool, store*
7186	the aggregated stats /*
7187	if (srv_buf_pool_instances > `1`) {
7188	buf_stats_aggregate_pool_info(pool_info_total,
7189	&pool_info[i]);
7190	}
7191	}
7192
7193	/ Print the aggreate buffer pool info /
7194	buf_print_io_instance(pool_info_total, file);
7195
7196	/ If there are more than one buffer pool, print each individual pool*
7197	info /*
7198	if (srv_buf_pool_instances > `1`) {
7199	fputs("----------------------\n"
7200	"INDIVIDUAL BUFFER POOL INFO\n"
7201	"----------------------\n", file);
7202
7203	for (i = `0`; i < srv_buf_pool_instances; i++) {
7204	fprintf(file, "---BUFFER POOL " ULINTPF "\n", i);
7205	buf_print_io_instance(&pool_info[i], file);
7206	}
7207	}
7208
7209	ut_free(pool_info);
7210	}
7211
7212	/********************************************************************//**
7213	Refreshes the statistics used to print per-second averages. /*
7214	void
7215	buf_refresh_io_stats_all(void)
7216	/==========================/
7217	{
7218	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
7219	buf_pool_t* buf_pool;
7220
7221	buf_pool = buf_pool_from_array(i);
7222
7223	buf_refresh_io_stats(buf_pool);
7224	}
7225	}
7226
7227	/********************************************************************//**
7228	Check if all pages in all buffer pools are in a replacable state.
7229	@return FALSE if not /*
7230	ibool
7231	buf_all_freed(void)
7232	/===============/
7233	{
7234	for (ulint i = `0`; i < srv_buf_pool_instances; i++) {
7235	buf_pool_t* buf_pool;
7236
7237	buf_pool = buf_pool_from_array(i);
7238
7239	if (!buf_all_freed_instance(buf_pool)) {
7240	return(FALSE);
7241	}
7242	}
7243
7244	return(TRUE);
7245	}
7246
7247	/*******************************************************************//**
7248	Checks that there currently are no pending i/o-operations for the buffer
7249	pool.
7250	@return number of pending i/o /*
7251	ulint
7252	buf_pool_check_no_pending_io(void)
7253	/==============================/
7254	{
7255	ulint i;
7256	ulint pending_io = `0`;
7257
7258	buf_pool_mutex_enter_all();
7259
7260	for (i = `0`; i < srv_buf_pool_instances; i++) {
7261	const buf_pool_t* buf_pool;
7262
7263	buf_pool = buf_pool_from_array(i);
7264
7265	pending_io += buf_pool->n_pend_reads
7266	+ buf_pool->n_flush[BUF_FLUSH_LRU]
7267	+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
7268	+ buf_pool->n_flush[BUF_FLUSH_LIST];
7269
7270	}
7271
7272	buf_pool_mutex_exit_all();
7273
7274	return(pending_io);
7275	}
7276
7277	/* Print the given page_id_t object.*
7278	@param[in,out] out the output stream
7279	@param[in] page_id the page_id_t object to be printed
7280	@return the output stream /*
7281	std::ostream&
7282	operator<<(
7283	std::ostream& out,
7284	const page_id_t& page_id)
7285	{
7286	out << "[page id: space=" << page_id.m_space
7287	<< ", page number=" << page_id.m_page_no << "]";
7288	return(out);
7289	}
7290
7291	/* Print the given buf_pool_t object.*
7292	@param[in,out] out the output stream
7293	@param[in] buf_pool the buf_pool_t object to be printed
7294	@return the output stream /*
7295	std::ostream&
7296	operator<<(
7297	std::ostream& out,
7298	const buf_pool_t& buf_pool)
7299	{
7300	out << "[buffer pool instance: "
7301	<< "buf_pool size=" << buf_pool.curr_size
7302	<< ", database pages=" << UT_LIST_GET_LEN(buf_pool.LRU)
7303	<< ", free pages=" << UT_LIST_GET_LEN(buf_pool.free)
7304	<< ", modified database pages="
7305	<< UT_LIST_GET_LEN(buf_pool.flush_list)
7306	<< ", n pending decompressions=" << buf_pool.n_pend_unzip
7307	<< ", n pending reads=" << buf_pool.n_pend_reads
7308	<< ", n pending flush LRU=" << buf_pool.n_flush[BUF_FLUSH_LRU]
7309	<< " list=" << buf_pool.n_flush[BUF_FLUSH_LIST]
7310	<< " single page=" << buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE]
7311	<< ", pages made young=" << buf_pool.stat.n_pages_made_young
7312	<< ", not young=" << buf_pool.stat.n_pages_not_made_young
7313	<< ", pages read=" << buf_pool.stat.n_pages_read
7314	<< ", created=" << buf_pool.stat.n_pages_created
7315	<< ", written=" << buf_pool.stat.n_pages_written << "]";
7316	return(out);
7317	}
7318
7319	/******************************************************************//**
7320	Reserve unused slot from temporary memory array and allocate necessary
7321	temporary memory if not yet allocated.
7322	@return reserved slot /*
7323	UNIV_INTERN
7324	buf_tmp_buffer_t*
7325	buf_pool_reserve_tmp_slot(
7326	/======================/
7327	buf_pool_t* buf_pool, /!< in: buffer pool where to*
7328	reserve /*
7329	bool compressed) /!< in: is file space compressed /
7330	{
7331	buf_tmp_buffer_t *free_slot=NULL;
7332
7333	/ Array is protected by buf_pool mutex /
7334	buf_pool_mutex_enter(buf_pool);
7335
7336	for(ulint i = `0`; i < buf_pool->tmp_arr->n_slots; i++) {
7337	buf_tmp_buffer_t *slot = &buf_pool->tmp_arr->slots[i];
7338
7339	if(slot->reserved == false) {
7340	free_slot = slot;
7341	break;
7342	}
7343	}
7344
7345	/ We assume that free slot is found /
7346	ut_a(free_slot != NULL);
7347	free_slot->reserved = true;
7348	/ Now that we have reserved this slot we can release*
7349	buf_pool mutex /*
7350	buf_pool_mutex_exit(buf_pool);
7351
7352	/ Allocate temporary memory for encryption/decryption /
7353	if (free_slot->crypt_buf == NULL) {
7354	free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(srv_page_size, srv_page_size));
7355	memset(free_slot->crypt_buf, `0`, srv_page_size);
7356	}
7357
7358	/ For page compressed tables allocate temporary memory for*
7359	compression/decompression /*
7360	if (compressed && free_slot->comp_buf == NULL) {
7361	ulint size = srv_page_size;
7362
7363	/ Both snappy and lzo compression methods require that*
7364	output buffer used for compression is bigger than input
7365	buffer. Increase the allocated buffer size accordingly. /*
7366	#if defined(HAVE_SNAPPY)
7367	size = snappy_max_compressed_length(size);
7368	#endif
7369	#if defined(HAVE_LZO)
7370	size += LZO1X_1_15_MEM_COMPRESS;
7371	#endif
7372	free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, srv_page_size));
7373	memset(free_slot->comp_buf, `0`, size);
7374	}
7375
7376	return (free_slot);
7377	}
7378
7379	/* Encryption and page_compression hook that is called just before*
7380	a page is written to disk.
7381	@param[in,out] space tablespace
7382	@param[in,out] bpage buffer page
7383	@param[in] src_frame physical page frame that is being encrypted
7384	@return page frame to be written to file
7385	(may be src_frame or an encrypted/compressed copy of it) /*
7386	UNIV_INTERN
7387	byte*
7388	buf_page_encrypt_before_write(
7389	fil_space_t* space,
7390	buf_page_t* bpage,
7391	byte* src_frame)
7392	{
7393	ut_ad(space->id == bpage->id.space());
7394	bpage->real_size = srv_page_size;
7395
7396	fil_page_type_validate(src_frame);
7397
7398	switch (bpage->id.page_no()) {
7399	case `0`:
7400	/ Page 0 of a tablespace is not encrypted/compressed /
7401	return src_frame;
7402	case TRX_SYS_PAGE_NO:
7403	if (bpage->id.space() == TRX_SYS_SPACE) {
7404	/ don't encrypt/compress page as it contains*
7405	address to dblwr buffer /*
7406	return src_frame;
7407	}
7408	}
7409
7410	fil_space_crypt_t* crypt_data = space->crypt_data;
7411
7412	const bool encrypted = crypt_data
7413	&& !crypt_data->not_encrypted()
7414	&& crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
7415	&& (!crypt_data->is_default_encryption()
7416	\|\| srv_encrypt_tables);
7417
7418	bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
7419
7420	if (!encrypted && !page_compressed) {
7421	/ No need to encrypt or page compress the page.*
7422	Clear key-version & crypt-checksum. /*
7423	memset(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, `0`, `8`);
7424	return src_frame;
7425	}
7426
7427	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
7428	/ Find free slot from temporary memory array /
7429	buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
7430	slot->out_buf = NULL;
7431	bpage->slot = slot;
7432
7433	byte *dst_frame = slot->crypt_buf;
7434
7435	if (!page_compressed) {
7436	/ Encrypt page content /
7437	byte* tmp = fil_space_encrypt(space,
7438	bpage->id.page_no(),
7439	bpage->newest_modification,
7440	src_frame,
7441	dst_frame);
7442
7443	slot->out_buf = dst_frame = tmp;
7444
7445	ut_d(fil_page_type_validate(tmp));
7446	} else {
7447	/ First we compress the page content /
7448	ulint out_len = `0`;
7449
7450	byte *tmp = fil_compress_page(
7451	space,
7452	(byte *)src_frame,
7453	slot->comp_buf,
7454	srv_page_size,
7455	fsp_flags_get_page_compression_level(space->flags),
7456	fil_space_get_block_size(space, bpage->id.page_no()),
7457	encrypted,
7458	&out_len);
7459
7460	bpage->real_size = out_len;
7461
7462	/ Workaround for MDEV-15527. /
7463	memset(tmp + out_len, `0` , srv_page_size - out_len);
7464	#ifdef UNIV_DEBUG
7465	fil_page_type_validate(tmp);
7466	#endif
7467
7468	if(encrypted) {
7469
7470	/ And then we encrypt the page content /
7471	tmp = fil_space_encrypt(space,
7472	bpage->id.page_no(),
7473	bpage->newest_modification,
7474	tmp,
7475	dst_frame);
7476	}
7477
7478	slot->out_buf = dst_frame = tmp;
7479	}
7480
7481	ut_d(fil_page_type_validate(dst_frame));
7482
7483	// return dst_frame which will be written
7484	return dst_frame;
7485	}
7486
7487	/* Decrypt a page.*
7488	@param[in,out] bpage Page control block
7489	@param[in,out] space tablespace
7490	@return whether the operation was successful /*
7491	static
7492	bool
7493	buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
7494	{
7495	ut_ad(space->pending_io());
7496	ut_ad(space->id == bpage->id.space());
7497
7498	bool compressed = bpage->size.is_compressed();
7499	const page_size_t& size = bpage->size;
7500	byte* dst_frame = compressed ? bpage->zip.data :
7501	((buf_block_t*) bpage)->frame;
7502	unsigned key_version =
7503	mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
7504	bool page_compressed = fil_page_is_compressed(dst_frame);
7505	bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame);
7506	buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
7507	bool success = true;
7508
7509	if (bpage->id.page_no() == `0`) {
7510	/ File header pages are not encrypted/compressed /
7511	return (true);
7512	}
7513
7514	/ Page is encrypted if encryption information is found from*
7515	tablespace and page contains used key_version. This is true
7516	also for pages first compressed and then encrypted. /*
7517	if (!space->crypt_data) {
7518	key_version = `0`;
7519	}
7520
7521	if (page_compressed) {
7522	/ the page we read is unencrypted /
7523	/ Find free slot from temporary memory array /
7524	buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
7525
7526	ut_d(fil_page_type_validate(dst_frame));
7527
7528	/ decompress using comp_buf to dst_frame /
7529	fil_decompress_page(slot->comp_buf,
7530	dst_frame,
7531	ulong(size.logical()),
7532	&bpage->write_size);
7533
7534	/ Mark this slot as free /
7535	slot->reserved = false;
7536	key_version = `0`;
7537
7538	ut_d(fil_page_type_validate(dst_frame));
7539	} else {
7540	buf_tmp_buffer_t* slot = NULL;
7541
7542	if (key_version) {
7543	/ Verify encryption checksum before we even try to*
7544	decrypt. /*
7545	if (!fil_space_verify_crypt_checksum(
7546	dst_frame, size,
7547	bpage->id.space(), bpage->id.page_no())) {
7548	if (space->crypt_data->type
7549	!= CRYPT_SCHEME_UNENCRYPTED) {
7550	bpage->encrypted = true;
7551	}
7552	return (false);
7553	}
7554
7555	/ Find free slot from temporary memory array /
7556	slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
7557
7558	ut_d(fil_page_type_validate(dst_frame));
7559
7560	/ decrypt using crypt_buf to dst_frame /
7561	if (!fil_space_decrypt(space, slot->crypt_buf,
7562	dst_frame, &bpage->encrypted)) {
7563	success = false;
7564	}
7565
7566	ut_d(fil_page_type_validate(dst_frame));
7567	}
7568
7569	if (page_compressed_encrypted && success) {
7570	if (!slot) {
7571	slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
7572	}
7573
7574	ut_d(fil_page_type_validate(dst_frame));
7575	/ decompress using comp_buf to dst_frame /
7576	fil_decompress_page(slot->comp_buf,
7577	dst_frame,
7578	ulong(size.logical()),
7579	&bpage->write_size);
7580	ut_d(fil_page_type_validate(dst_frame));
7581	}
7582
7583	/ Mark this slot as free /
7584	if (slot) {
7585	slot->reserved = false;
7586	}
7587	}
7588
7589	ut_ad(space->pending_io());
7590	return success;
7591	}
7592
7593	/**
7594	Should we punch hole to deallocate unused portion of the page.
7595	@param[in] bpage Page control block
7596	@return true if punch hole should be used, false if not /*
7597	bool
7598	buf_page_should_punch_hole(
7599	const buf_page_t* bpage)
7600	{
7601	return (bpage->real_size != bpage->size.physical());
7602	}
7603
7604	/**
7605	Calculate the length of trim (punch_hole) operation.
7606	@param[in] bpage Page control block
7607	@param[in] write_length Write length
7608	@return length of the trim or zero. /*
7609	ulint
7610	buf_page_get_trim_length(
7611	const buf_page_t* bpage,
7612	ulint write_length)
7613	{
7614	return (bpage->size.physical() - write_length);
7615	}
7616
7617
7618	#endif /* !UNIV_INNOCHECKSUM */
7619

Browse the source code of MariaDB/storage/innobase/buf/buf0buf.cc