1/*****************************************************************************
2
3Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2013, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file include/buf0buf.h
22The database buffer pool high-level routines
23
24Created 11/5/1995 Heikki Tuuri
25*******************************************************/
26
27#ifndef buf0buf_h
28#define buf0buf_h
29
30/** Magic value to use instead of checksums when they are disabled */
31#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
32
33#include "fil0fil.h"
34#include "mtr0types.h"
35#include "buf0types.h"
36#ifndef UNIV_INNOCHECKSUM
37#include "hash0hash.h"
38#include "ut0byte.h"
39#include "page0types.h"
40#include "ut0rbt.h"
41#include "os0proc.h"
42#include "log0log.h"
43#include "srv0srv.h"
44#include <ostream>
45
46// Forward declaration
47struct fil_addr_t;
48
49/** @name Modes for buf_page_get_gen */
50/* @{ */
51#define BUF_GET 10 /*!< get always */
52#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
53#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
54 the block young in the LRU list */
55#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
56 set no latch; we have
57 separated this case, because
58 it is error-prone programming
59 not to set a latch, and it
60 should be used with care */
61#define BUF_GET_IF_IN_POOL_OR_WATCH 15
62 /*!< Get the page only if it's in the
63 buffer pool, if not then set a watch
64 on the page. */
65#define BUF_GET_POSSIBLY_FREED 16
66 /*!< Like BUF_GET, but do not mind
67 if the file page has been freed. */
68#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
69/* @} */
70/** @name Modes for buf_page_get_known_nowait */
71/* @{ */
72#define BUF_MAKE_YOUNG 51 /*!< Move the block to the
73 start of the LRU list if there
74 is a danger that the block
75 would drift out of the buffer
76 pool*/
77#define BUF_KEEP_OLD 52 /*!< Preserve the current LRU
78 position of the block. */
79/* @} */
80
81#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing
82 a buffer pool ID */
83
84#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS)
85 /*!< The maximum number of buffer
86 pools that can be defined */
87
88#define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1)
89 /*!< Maximum number of concurrent
90 buffer pool watches */
91#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of
92 page_hash locks */
93
94extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools
95 of the database */
96
97extern volatile bool buf_pool_withdrawing; /*!< true when withdrawing buffer
98 pool pages might cause page relocation */
99
100extern volatile ulint buf_withdraw_clock; /*!< the clock is incremented
101 every time a pointer to a page may
102 become obsolete */
103
104# ifdef UNIV_DEBUG
105extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
106 buffer pool is not allowed. */
107# endif /* UNIV_DEBUG */
108
109/** @brief States of a control block
110@see buf_page_t
111
112The enumeration values must be 0..7. */
113enum buf_page_state {
114 BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool
115 watch, element of buf_pool->watch[] */
116 BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
117 compressed page */
118 BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
119 page that is in the
120 buf_pool->flush_list */
121
122 BUF_BLOCK_NOT_USED, /*!< is in the free list;
123 must be after the BUF_BLOCK_ZIP_
124 constants for compressed-only pages
125 @see buf_block_state_valid() */
126 BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block
127 returns a block, it is in this state */
128 BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */
129 BUF_BLOCK_MEMORY, /*!< contains some main memory
130 object */
131 BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed
132 before putting to the free list */
133};
134
135
136/** This structure defines information we will fetch from each buffer pool. It
137will be used to print table IO stats */
138struct buf_pool_info_t{
139 /* General buffer pool info */
140 ulint pool_unique_id; /*!< Buffer Pool ID */
141 ulint pool_size; /*!< Buffer Pool size in pages */
142 ulint lru_len; /*!< Length of buf_pool->LRU */
143 ulint old_lru_len; /*!< buf_pool->LRU_old_len */
144 ulint free_list_len; /*!< Length of buf_pool->free list */
145 ulint flush_list_len; /*!< Length of buf_pool->flush_list */
146 ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages
147 pending decompress */
148 ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages
149 pending read */
150 ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */
151 ulint n_pending_flush_single_page;/*!< Pages pending to be
152 flushed as part of single page
153 flushes issued by various user
154 threads */
155 ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH
156 LIST */
157 ulint n_pages_made_young; /*!< number of pages made young */
158 ulint n_pages_not_made_young; /*!< number of pages not made young */
159 ulint n_pages_read; /*!< buf_pool->n_pages_read */
160 ulint n_pages_created; /*!< buf_pool->n_pages_created */
161 ulint n_pages_written; /*!< buf_pool->n_pages_written */
162 ulint n_page_gets; /*!< buf_pool->n_page_gets */
163 ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd,
164 number of pages readahead */
165 ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number
166 of pages readahead */
167 ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted,
168 number of readahead pages evicted
169 without access */
170 ulint n_page_get_delta; /*!< num of buffer pool page gets since
171 last printout */
172
173 /* Buffer pool access stats */
174 double page_made_young_rate; /*!< page made young rate in pages
175 per second */
176 double page_not_made_young_rate;/*!< page not made young rate
177 in pages per second */
178 double pages_read_rate; /*!< num of pages read per second */
179 double pages_created_rate; /*!< num of pages create per second */
180 double pages_written_rate; /*!< num of pages written per second */
181 ulint page_read_delta; /*!< num of pages read since last
182 printout */
183 ulint young_making_delta; /*!< num of pages made young since
184 last printout */
185 ulint not_young_making_delta; /*!< num of pages not make young since
186 last printout */
187
188 /* Statistics about read ahead algorithm. */
189 double pages_readahead_rnd_rate;/*!< random readahead rate in pages per
190 second */
191 double pages_readahead_rate; /*!< readahead rate in pages per
192 second */
193 double pages_evicted_rate; /*!< rate of readahead page evicted
194 without access, in pages per second */
195
196 /* Stats about LRU eviction */
197 ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU
198 list */
199 /* Counters for LRU policy */
200 ulint io_sum; /*!< buf_LRU_stat_sum.io */
201 ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO
202 for current interval */
203 ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */
204 ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num
205 pages decompressed in current
206 interval */
207};
208
209/** The occupied bytes of lists in all buffer pools */
210struct buf_pools_list_size_t {
211 ulint LRU_bytes; /*!< LRU size in bytes */
212 ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
213 ulint flush_list_bytes; /*!< flush_list size in bytes */
214};
215#endif /* !UNIV_INNOCHECKSUM */
216
217/** Page identifier. */
218class page_id_t {
219public:
220
221 /** Constructor from (space, page_no).
222 @param[in] space tablespace id
223 @param[in] page_no page number */
224 page_id_t(ulint space, ulint page_no)
225 :
226 m_space(static_cast<ib_uint32_t>(space)),
227 m_page_no(static_cast<ib_uint32_t>(page_no)),
228 m_fold(ULINT_UNDEFINED)
229 {
230 ut_ad(space <= 0xFFFFFFFFU);
231 ut_ad(page_no <= 0xFFFFFFFFU);
232 }
233
234 /** Retrieve the tablespace id.
235 @return tablespace id */
236 inline ib_uint32_t space() const
237 {
238 return(m_space);
239 }
240
241 /** Retrieve the page number.
242 @return page number */
243 inline ib_uint32_t page_no() const
244 {
245 return(m_page_no);
246 }
247
248 /** Retrieve the fold value.
249 @return fold value */
250 inline ulint fold() const
251 {
252 /* Initialize m_fold if it has not been initialized yet. */
253 if (m_fold == ULINT_UNDEFINED) {
254 m_fold = (m_space << 20) + m_space + m_page_no;
255 ut_ad(m_fold != ULINT_UNDEFINED);
256 }
257
258 return(m_fold);
259 }
260
261 /** Copy the values from a given page_id_t object.
262 @param[in] src page id object whose values to fetch */
263 inline void copy_from(const page_id_t& src)
264 {
265 m_space = src.space();
266 m_page_no = src.page_no();
267 m_fold = src.fold();
268 }
269
270 /** Reset the object. */
271 void reset() { m_space= ~0U; m_page_no= ~0U; m_fold= ULINT_UNDEFINED; }
272
273 /** Reset the page number only.
274 @param[in] page_no page number */
275 inline void set_page_no(ulint page_no)
276 {
277 m_page_no = static_cast<ib_uint32_t>(page_no);
278 m_fold = ULINT_UNDEFINED;
279
280 ut_ad(page_no <= 0xFFFFFFFFU);
281 }
282
283 /** Check if a given page_id_t object is equal to the current one.
284 @param[in] a page_id_t object to compare
285 @return true if equal */
286 inline bool equals_to(const page_id_t& a) const
287 {
288 return(a.space() == m_space && a.page_no() == m_page_no);
289 }
290
291private:
292
293 /** Tablespace id. */
294 ib_uint32_t m_space;
295
296 /** Page number. */
297 ib_uint32_t m_page_no;
298
299 /** A fold value derived from m_space and m_page_no,
300 used in hashing. */
301 mutable ulint m_fold;
302
303 /* Disable implicit copying. */
304 void operator=(const page_id_t&);
305
306 /** Declare the overloaded global operator<< as a friend of this
307 class. Refer to the global declaration for further details. Print
308 the given page_id_t object.
309 @param[in,out] out the output stream
310 @param[in] page_id the page_id_t object to be printed
311 @return the output stream */
312 friend
313 std::ostream&
314 operator<<(
315 std::ostream& out,
316 const page_id_t& page_id);
317};
318
319/** Print the given page_id_t object.
320@param[in,out] out the output stream
321@param[in] page_id the page_id_t object to be printed
322@return the output stream */
323std::ostream&
324operator<<(
325 std::ostream& out,
326 const page_id_t& page_id);
327
328#ifndef UNIV_INNOCHECKSUM
329/********************************************************************//**
330Acquire mutex on all buffer pool instances */
331UNIV_INLINE
332void
333buf_pool_mutex_enter_all(void);
334/*===========================*/
335
336/********************************************************************//**
337Release mutex on all buffer pool instances */
338UNIV_INLINE
339void
340buf_pool_mutex_exit_all(void);
341/*==========================*/
342
343/********************************************************************//**
344Creates the buffer pool.
345@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
346dberr_t
347buf_pool_init(
348/*=========*/
349 ulint size, /*!< in: Size of the total pool in bytes */
350 ulint n_instances); /*!< in: Number of instances */
351/********************************************************************//**
352Frees the buffer pool at shutdown. This must not be invoked before
353freeing all mutexes. */
354void
355buf_pool_free(
356/*==========*/
357 ulint n_instances); /*!< in: numbere of instances to free */
358
359/** Determines if a block is intended to be withdrawn.
360@param[in] buf_pool buffer pool instance
361@param[in] block pointer to control block
362@retval true if will be withdrawn */
363bool
364buf_block_will_withdrawn(
365 buf_pool_t* buf_pool,
366 const buf_block_t* block);
367
368/** Determines if a frame is intended to be withdrawn.
369@param[in] buf_pool buffer pool instance
370@param[in] ptr pointer to a frame
371@retval true if will be withdrawn */
372bool
373buf_frame_will_withdrawn(
374 buf_pool_t* buf_pool,
375 const byte* ptr);
376
377/** This is the thread for resizing buffer pool. It waits for an event and
378when waked up either performs a resizing and sleeps again.
379@return this function does not return, calls os_thread_exit()
380*/
381extern "C"
382os_thread_ret_t
383DECLARE_THREAD(buf_resize_thread)(void*);
384
385#ifdef BTR_CUR_HASH_ADAPT
386/** Clear the adaptive hash index on all pages in the buffer pool. */
387void
388buf_pool_clear_hash_index();
389#endif /* BTR_CUR_HASH_ADAPT */
390
391/*********************************************************************//**
392Gets the current size of buffer buf_pool in bytes.
393@return size in bytes */
394UNIV_INLINE
395ulint
396buf_pool_get_curr_size(void);
397/*========================*/
398/*********************************************************************//**
399Gets the current size of buffer buf_pool in frames.
400@return size in pages */
401UNIV_INLINE
402ulint
403buf_pool_get_n_pages(void);
404/*=======================*/
405/********************************************************************//**
406Gets the smallest oldest_modification lsn for any page in the pool. Returns
407zero if all modified pages have been flushed to disk.
408@return oldest modification in pool, zero if none */
409lsn_t
410buf_pool_get_oldest_modification(void);
411/*==================================*/
412
413/********************************************************************//**
414Allocates a buf_page_t descriptor. This function must succeed. In case
415of failure we assert in this function. */
416UNIV_INLINE
417buf_page_t*
418buf_page_alloc_descriptor(void)
419/*===========================*/
420 MY_ATTRIBUTE((malloc));
421/********************************************************************//**
422Free a buf_page_t descriptor. */
423UNIV_INLINE
424void
425buf_page_free_descriptor(
426/*=====================*/
427 buf_page_t* bpage) /*!< in: bpage descriptor to free. */
428 MY_ATTRIBUTE((nonnull));
429
430/********************************************************************//**
431Allocates a buffer block.
432@return own: the allocated block, in state BUF_BLOCK_MEMORY */
433buf_block_t*
434buf_block_alloc(
435/*============*/
436 buf_pool_t* buf_pool); /*!< in: buffer pool instance,
437 or NULL for round-robin selection
438 of the buffer pool */
439/********************************************************************//**
440Frees a buffer block which does not contain a file page. */
441UNIV_INLINE
442void
443buf_block_free(
444/*===========*/
445 buf_block_t* block); /*!< in, own: block to be freed */
446
447/*********************************************************************//**
448Copies contents of a buffer frame to a given buffer.
449@return buf */
450UNIV_INLINE
451byte*
452buf_frame_copy(
453/*===========*/
454 byte* buf, /*!< in: buffer to copy to */
455 const buf_frame_t* frame); /*!< in: buffer frame */
456
457/**************************************************************//**
458NOTE! The following macros should be used instead of buf_page_get_gen,
459to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
460in LA! */
461#define buf_page_get(ID, SIZE, LA, MTR) \
462 buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR, NULL)
463/**************************************************************//**
464Use these macros to bufferfix a page with no latching. Remember not to
465read the contents of the page unless you know it is safe. Do not modify
466the contents of the page! We have separated this case, because it is
467error-prone programming not to set a latch, and it should be used
468with care. */
469#define buf_page_get_with_no_latch(ID, SIZE, MTR) \
470 buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
471 __FILE__, __LINE__, MTR, NULL)
472/********************************************************************//**
473This is the general function used to get optimistic access to a database
474page.
475@return TRUE if success */
476ibool
477buf_page_optimistic_get(
478/*====================*/
479 ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
480 buf_block_t* block, /*!< in: guessed block */
481 ib_uint64_t modify_clock,/*!< in: modify clock value */
482 const char* file, /*!< in: file name */
483 unsigned line, /*!< in: line where called */
484 mtr_t* mtr); /*!< in: mini-transaction */
485/********************************************************************//**
486This is used to get access to a known database page, when no waiting can be
487done.
488@return TRUE if success */
489ibool
490buf_page_get_known_nowait(
491/*======================*/
492 ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
493 buf_block_t* block, /*!< in: the known page */
494 ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
495 const char* file, /*!< in: file name */
496 unsigned line, /*!< in: line where called */
497 mtr_t* mtr); /*!< in: mini-transaction */
498
499/** Given a tablespace id and page number tries to get that page. If the
500page is not in the buffer pool it is not loaded and NULL is returned.
501Suitable for using when holding the lock_sys_t::mutex.
502@param[in] page_id page id
503@param[in] file file name
504@param[in] line line where called
505@param[in] mtr mini-transaction
506@return pointer to a page or NULL */
507buf_block_t*
508buf_page_try_get_func(
509 const page_id_t& page_id,
510 const char* file,
511 unsigned line,
512 mtr_t* mtr);
513
514/** Tries to get a page.
515If the page is not in the buffer pool it is not loaded. Suitable for using
516when holding the lock_sys_t::mutex.
517@param[in] page_id page identifier
518@param[in] mtr mini-transaction
519@return the page if in buffer pool, NULL if not */
520#define buf_page_try_get(page_id, mtr) \
521 buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
522
523/** Get read access to a compressed page (usually of type
524FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
525The page must be released with buf_page_release_zip().
526NOTE: the page is not protected by any latch. Mutual exclusion has to
527be implemented at a higher level. In other words, all possible
528accesses to a given page through this function must be protected by
529the same set of mutexes or latches.
530@param[in] page_id page id
531@param[in] page_size page size
532@return pointer to the block */
533buf_page_t*
534buf_page_get_zip(
535 const page_id_t& page_id,
536 const page_size_t& page_size);
537
538/** This is the general function used to get access to a database page.
539@param[in] page_id page id
540@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
541@param[in] guess guessed block or NULL
542@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
543BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
544@param[in] file file name
545@param[in] line line where called
546@param[in] mtr mini-transaction
547@param[out] err DB_SUCCESS or error code
548@return pointer to the block or NULL */
549buf_block_t*
550buf_page_get_gen(
551 const page_id_t& page_id,
552 const page_size_t& page_size,
553 ulint rw_latch,
554 buf_block_t* guess,
555 ulint mode,
556 const char* file,
557 unsigned line,
558 mtr_t* mtr,
559 dberr_t* err);
560
561/** Initializes a page to the buffer buf_pool. The page is usually not read
562from a file even if it cannot be found in the buffer buf_pool. This is one
563of the functions which perform to a block a state transition NOT_USED =>
564FILE_PAGE (the other is buf_page_get_gen).
565@param[in] page_id page id
566@param[in] page_size page size
567@param[in] mtr mini-transaction
568@return pointer to the block, page bufferfixed */
569buf_block_t*
570buf_page_create(
571 const page_id_t& page_id,
572 const page_size_t& page_size,
573 mtr_t* mtr);
574
575/********************************************************************//**
576Releases a compressed-only page acquired with buf_page_get_zip(). */
577UNIV_INLINE
578void
579buf_page_release_zip(
580/*=================*/
581 buf_page_t* bpage); /*!< in: buffer block */
582/********************************************************************//**
583Releases a latch, if specified. */
584UNIV_INLINE
585void
586buf_page_release_latch(
587/*=====================*/
588 buf_block_t* block, /*!< in: buffer block */
589 ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
590 RW_NO_LATCH */
591/********************************************************************//**
592Moves a page to the start of the buffer pool LRU list. This high-level
593function can be used to prevent an important page from slipping out of
594the buffer pool. */
595void
596buf_page_make_young(
597/*================*/
598 buf_page_t* bpage); /*!< in: buffer block of a file page */
599
600/** Returns TRUE if the page can be found in the buffer pool hash table.
601NOTE that it is possible that the page is not yet read from disk,
602though.
603@param[in] page_id page id
604@return TRUE if found in the page hash table */
605UNIV_INLINE
606ibool
607buf_page_peek(
608 const page_id_t& page_id);
609
610#ifdef UNIV_DEBUG
611
612/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
613This function should be called when we free a file page and want the
614debug version to check that it is not accessed any more unless
615reallocated.
616@param[in] page_id page id
617@return control block if found in page hash table, otherwise NULL */
618buf_page_t*
619buf_page_set_file_page_was_freed(
620 const page_id_t& page_id);
621
622/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
623This function should be called when we free a file page and want the
624debug version to check that it is not accessed any more unless
625reallocated.
626@param[in] page_id page id
627@return control block if found in page hash table, otherwise NULL */
628buf_page_t*
629buf_page_reset_file_page_was_freed(
630 const page_id_t& page_id);
631
632#endif /* UNIV_DEBUG */
633/********************************************************************//**
634Reads the freed_page_clock of a buffer block.
635@return freed_page_clock */
636UNIV_INLINE
637unsigned
638buf_page_get_freed_page_clock(
639/*==========================*/
640 const buf_page_t* bpage) /*!< in: block */
641 MY_ATTRIBUTE((warn_unused_result));
642/********************************************************************//**
643Reads the freed_page_clock of a buffer block.
644@return freed_page_clock */
645UNIV_INLINE
646unsigned
647buf_block_get_freed_page_clock(
648/*===========================*/
649 const buf_block_t* block) /*!< in: block */
650 MY_ATTRIBUTE((warn_unused_result));
651
652/********************************************************************//**
653Tells if a block is still close enough to the MRU end of the LRU list
654meaning that it is not in danger of getting evicted and also implying
655that it has been accessed recently.
656Note that this is for heuristics only and does not reserve buffer pool
657mutex.
658@return TRUE if block is close to MRU end of LRU */
659UNIV_INLINE
660ibool
661buf_page_peek_if_young(
662/*===================*/
663 const buf_page_t* bpage); /*!< in: block */
664/********************************************************************//**
665Recommends a move of a block to the start of the LRU list if there is danger
666of dropping from the buffer pool. NOTE: does not reserve the buffer pool
667mutex.
668@return TRUE if should be made younger */
669UNIV_INLINE
670ibool
671buf_page_peek_if_too_old(
672/*=====================*/
673 const buf_page_t* bpage); /*!< in: block to make younger */
674/********************************************************************//**
675Gets the youngest modification log sequence number for a frame.
676Returns zero if not file page or no modification occurred yet.
677@return newest modification to page */
678UNIV_INLINE
679lsn_t
680buf_page_get_newest_modification(
681/*=============================*/
682 const buf_page_t* bpage); /*!< in: block containing the
683 page frame */
684/********************************************************************//**
685Increments the modify clock of a frame by 1. The caller must (1) own the
686buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
687on the block. */
688UNIV_INLINE
689void
690buf_block_modify_clock_inc(
691/*=======================*/
692 buf_block_t* block); /*!< in: block */
693/********************************************************************//**
694Returns the value of the modify clock. The caller must have an s-lock
695or x-lock on the block.
696@return value */
697UNIV_INLINE
698ib_uint64_t
699buf_block_get_modify_clock(
700/*=======================*/
701 buf_block_t* block); /*!< in: block */
702/*******************************************************************//**
703Increments the bufferfix count. */
704UNIV_INLINE
705void
706buf_block_buf_fix_inc_func(
707/*=======================*/
708# ifdef UNIV_DEBUG
709 const char* file, /*!< in: file name */
710 unsigned line, /*!< in: line */
711# endif /* UNIV_DEBUG */
712 buf_block_t* block) /*!< in/out: block to bufferfix */
713 MY_ATTRIBUTE((nonnull));
714
715/** Increments the bufferfix count.
716@param[in,out] bpage block to bufferfix
717@return the count */
718UNIV_INLINE
719ulint
720buf_block_fix(
721 buf_page_t* bpage);
722
723/** Increments the bufferfix count.
724@param[in,out] block block to bufferfix
725@return the count */
726UNIV_INLINE
727ulint
728buf_block_fix(
729 buf_block_t* block);
730
731/** Decrements the bufferfix count.
732@param[in,out] bpage block to bufferunfix
733@return the remaining buffer-fix count */
734UNIV_INLINE
735ulint
736buf_block_unfix(
737 buf_page_t* bpage);
738/** Decrements the bufferfix count.
739@param[in,out] block block to bufferunfix
740@return the remaining buffer-fix count */
741UNIV_INLINE
742ulint
743buf_block_unfix(
744 buf_block_t* block);
745
746# ifdef UNIV_DEBUG
747/** Increments the bufferfix count.
748@param[in,out] b block to bufferfix
749@param[in] f file name where requested
750@param[in] l line number where requested */
751# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
752# else /* UNIV_DEBUG */
753/** Increments the bufferfix count.
754@param[in,out] b block to bufferfix
755@param[in] f file name where requested
756@param[in] l line number where requested */
757# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
758# endif /* UNIV_DEBUG */
759#endif /* !UNIV_INNOCHECKSUM */
760
761/** Checks if the page is in crc32 checksum format.
762@param[in] read_buf database page
763@param[in] checksum_field1 new checksum field
764@param[in] checksum_field2 old checksum field
765@param[in] use_legacy_big_endian use legacy big endian algorithm
766@return true if the page is in crc32 checksum format. */
767bool
768buf_page_is_checksum_valid_crc32(
769 const byte* read_buf,
770 ulint checksum_field1,
771 ulint checksum_field2,
772 bool use_legacy_big_endian)
773 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
774
775/** Checks if the page is in innodb checksum format.
776@param[in] read_buf database page
777@param[in] checksum_field1 new checksum field
778@param[in] checksum_field2 old checksum field
779@return true if the page is in innodb checksum format. */
780bool
781buf_page_is_checksum_valid_innodb(
782 const byte* read_buf,
783 ulint checksum_field1,
784 ulint checksum_field2)
785 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
786
787/** Checks if the page is in none checksum format.
788@param[in] read_buf database page
789@param[in] checksum_field1 new checksum field
790@param[in] checksum_field2 old checksum field
791@return true if the page is in none checksum format. */
792bool
793buf_page_is_checksum_valid_none(
794 const byte* read_buf,
795 ulint checksum_field1,
796 ulint checksum_field2)
797 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
798
799/** Checks if a page contains only zeroes.
800@param[in] read_buf database page
801@param[in] page_size page size
802@return true if page is filled with zeroes */
803bool
804buf_page_is_zeroes(
805 const byte* read_buf,
806 const page_size_t& page_size);
807
808/** Check if a page is corrupt.
809@param[in] check_lsn whether the LSN should be checked
810@param[in] read_buf database page
811@param[in] page_size page size
812@param[in] space tablespace
813@return whether the page is corrupted */
814bool
815buf_page_is_corrupted(
816 bool check_lsn,
817 const byte* read_buf,
818 const page_size_t& page_size,
819#ifndef UNIV_INNOCHECKSUM
820 const fil_space_t* space = NULL)
821#else
822 const void* space = NULL)
823#endif
824 MY_ATTRIBUTE((warn_unused_result));
825
826
827#ifndef UNIV_INNOCHECKSUM
828
829/**********************************************************************//**
830Gets the space id, page offset, and byte offset within page of a
831pointer pointing to a buffer frame containing a file page. */
832UNIV_INLINE
833void
834buf_ptr_get_fsp_addr(
835/*=================*/
836 const void* ptr, /*!< in: pointer to a buffer frame */
837 ulint* space, /*!< out: space id */
838 fil_addr_t* addr); /*!< out: page offset and byte offset */
839/**********************************************************************//**
840Gets the hash value of a block. This can be used in searches in the
841lock hash table.
842@return lock hash value */
843UNIV_INLINE
844unsigned
845buf_block_get_lock_hash_val(
846/*========================*/
847 const buf_block_t* block) /*!< in: block */
848 MY_ATTRIBUTE((warn_unused_result));
849#ifdef UNIV_DEBUG
850/*********************************************************************//**
851Finds a block in the buffer pool that points to a
852given compressed page.
853@return buffer block pointing to the compressed page, or NULL */
854buf_block_t*
855buf_pool_contains_zip(
856/*==================*/
857 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
858 const void* data); /*!< in: pointer to compressed page */
859#endif /* UNIV_DEBUG */
860
861/***********************************************************************
862FIXME_FTS: Gets the frame the pointer is pointing to. */
863UNIV_INLINE
864buf_frame_t*
865buf_frame_align(
866/*============*/
867 /* out: pointer to frame */
868 byte* ptr); /* in: pointer to a frame */
869
870
871#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
872/*********************************************************************//**
873Validates the buffer pool data structure.
874@return TRUE */
875ibool
876buf_validate(void);
877/*==============*/
878#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
879#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
880/*********************************************************************//**
881Prints info of the buffer pool data structure. */
882void
883buf_print(void);
884/*============*/
885#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
886
887/** Dump a page to stderr.
888@param[in] read_buf database page
889@param[in] page_size page size */
890UNIV_INTERN
891void
892buf_page_print(const byte* read_buf, const page_size_t& page_size)
893 ATTRIBUTE_COLD __attribute__((nonnull));
894/********************************************************************//**
895Decompress a block.
896@return TRUE if successful */
897ibool
898buf_zip_decompress(
899/*===============*/
900 buf_block_t* block, /*!< in/out: block */
901 ibool check); /*!< in: TRUE=verify the page checksum */
902
903#ifdef UNIV_DEBUG
904/*********************************************************************//**
905Returns the number of latched pages in the buffer pool.
906@return number of latched pages */
907ulint
908buf_get_latched_pages_number(void);
909/*==============================*/
910#endif /* UNIV_DEBUG */
911/*********************************************************************//**
912Returns the number of pending buf pool read ios.
913@return number of pending read I/O operations */
914ulint
915buf_get_n_pending_read_ios(void);
916/*============================*/
917/*********************************************************************//**
918Prints info of the buffer i/o. */
919void
920buf_print_io(
921/*=========*/
922 FILE* file); /*!< in: file where to print */
923/*******************************************************************//**
924Collect buffer pool stats information for a buffer pool. Also
925record aggregated stats if there are more than one buffer pool
926in the server */
927void
928buf_stats_get_pool_info(
929/*====================*/
930 buf_pool_t* buf_pool, /*!< in: buffer pool */
931 ulint pool_id, /*!< in: buffer pool ID */
932 buf_pool_info_t* all_pool_info); /*!< in/out: buffer pool info
933 to fill */
934/** Return the ratio in percents of modified pages in the buffer pool /
935database pages in the buffer pool.
936@return modified page percentage ratio */
937double
938buf_get_modified_ratio_pct(void);
939/** Refresh the statistics used to print per-second averages. */
940void
941buf_refresh_io_stats_all(void);
942/** Assert that all file pages in the buffer are in a replaceable state.
943@return TRUE */
944ibool
945buf_all_freed(void);
946/*********************************************************************//**
947Checks that there currently are no pending i/o-operations for the buffer
948pool.
949@return number of pending i/o operations */
950ulint
951buf_pool_check_no_pending_io(void);
952/*==============================*/
953/*********************************************************************//**
954Invalidates the file pages in the buffer pool when an archive recovery is
955completed. All the file pages buffered must be in a replaceable state when
956this function is called: not latched and not modified. */
957void
958buf_pool_invalidate(void);
959/*=====================*/
960
961/*========================================================================
962--------------------------- LOWER LEVEL ROUTINES -------------------------
963=========================================================================*/
964
965#ifdef UNIV_DEBUG
966/*********************************************************************//**
967Adds latch level info for the rw-lock protecting the buffer frame. This
968should be called in the debug version after a successful latching of a
969page if we know the latching order level of the acquired latch. */
970UNIV_INLINE
971void
972buf_block_dbg_add_level(
973/*====================*/
974 buf_block_t* block, /*!< in: buffer page
975 where we have acquired latch */
976 latch_level_t level); /*!< in: latching order level */
977#else /* UNIV_DEBUG */
978# define buf_block_dbg_add_level(block, level) /* nothing */
979#endif /* UNIV_DEBUG */
980/*********************************************************************//**
981Gets the state of a block.
982@return state */
983UNIV_INLINE
984enum buf_page_state
985buf_page_get_state(
986/*===============*/
987 const buf_page_t* bpage); /*!< in: pointer to the control
988 block */
989/*********************************************************************//**
990Gets the state name for state of a block
991@return name or "CORRUPTED" */
992UNIV_INLINE
993const char*
994buf_get_state_name(
995/*===============*/
996 const buf_block_t* block); /*!< in: pointer to the control
997 block */
998/*********************************************************************//**
999Gets the state of a block.
1000@return state */
1001UNIV_INLINE
1002enum buf_page_state
1003buf_block_get_state(
1004/*================*/
1005 const buf_block_t* block) /*!< in: pointer to the control block */
1006 MY_ATTRIBUTE((warn_unused_result));
1007/*********************************************************************//**
1008Sets the state of a block. */
1009UNIV_INLINE
1010void
1011buf_page_set_state(
1012/*===============*/
1013 buf_page_t* bpage, /*!< in/out: pointer to control block */
1014 enum buf_page_state state); /*!< in: state */
1015/*********************************************************************//**
1016Sets the state of a block. */
1017UNIV_INLINE
1018void
1019buf_block_set_state(
1020/*================*/
1021 buf_block_t* block, /*!< in/out: pointer to control block */
1022 enum buf_page_state state); /*!< in: state */
1023/*********************************************************************//**
1024Determines if a block is mapped to a tablespace.
1025@return TRUE if mapped */
1026UNIV_INLINE
1027ibool
1028buf_page_in_file(
1029/*=============*/
1030 const buf_page_t* bpage) /*!< in: pointer to control block */
1031 MY_ATTRIBUTE((warn_unused_result));
1032
1033/*********************************************************************//**
1034Determines if a block should be on unzip_LRU list.
1035@return TRUE if block belongs to unzip_LRU */
1036UNIV_INLINE
1037ibool
1038buf_page_belongs_to_unzip_LRU(
1039/*==========================*/
1040 const buf_page_t* bpage) /*!< in: pointer to control block */
1041 MY_ATTRIBUTE((warn_unused_result));
1042
1043/*********************************************************************//**
1044Gets the mutex of a block.
1045@return pointer to mutex protecting bpage */
1046UNIV_INLINE
1047BPageMutex*
1048buf_page_get_mutex(
1049/*===============*/
1050 const buf_page_t* bpage) /*!< in: pointer to control block */
1051 MY_ATTRIBUTE((warn_unused_result));
1052
1053/*********************************************************************//**
1054Get the flush type of a page.
1055@return flush type */
1056UNIV_INLINE
1057buf_flush_t
1058buf_page_get_flush_type(
1059/*====================*/
1060 const buf_page_t* bpage) /*!< in: buffer page */
1061 MY_ATTRIBUTE((warn_unused_result));
1062/*********************************************************************//**
1063Set the flush type of a page. */
1064UNIV_INLINE
1065void
1066buf_page_set_flush_type(
1067/*====================*/
1068 buf_page_t* bpage, /*!< in: buffer page */
1069 buf_flush_t flush_type); /*!< in: flush type */
1070
1071/** Map a block to a file page.
1072@param[in,out] block pointer to control block
1073@param[in] page_id page id */
1074UNIV_INLINE
1075void
1076buf_block_set_file_page(
1077 buf_block_t* block,
1078 const page_id_t& page_id);
1079
1080/*********************************************************************//**
1081Gets the io_fix state of a block.
1082@return io_fix state */
1083UNIV_INLINE
1084enum buf_io_fix
1085buf_page_get_io_fix(
1086/*================*/
1087 const buf_page_t* bpage) /*!< in: pointer to the control block */
1088 MY_ATTRIBUTE((warn_unused_result));
1089/*********************************************************************//**
1090Gets the io_fix state of a block.
1091@return io_fix state */
1092UNIV_INLINE
1093enum buf_io_fix
1094buf_block_get_io_fix(
1095/*================*/
1096 const buf_block_t* block) /*!< in: pointer to the control block */
1097 MY_ATTRIBUTE((warn_unused_result));
1098/*********************************************************************//**
1099Sets the io_fix state of a block. */
1100UNIV_INLINE
1101void
1102buf_page_set_io_fix(
1103/*================*/
1104 buf_page_t* bpage, /*!< in/out: control block */
1105 enum buf_io_fix io_fix);/*!< in: io_fix state */
1106/*********************************************************************//**
1107Sets the io_fix state of a block. */
1108UNIV_INLINE
1109void
1110buf_block_set_io_fix(
1111/*=================*/
1112 buf_block_t* block, /*!< in/out: control block */
1113 enum buf_io_fix io_fix);/*!< in: io_fix state */
1114/*********************************************************************//**
1115Makes a block sticky. A sticky block implies that even after we release
1116the buf_pool->mutex and the block->mutex:
1117* it cannot be removed from the flush_list
1118* the block descriptor cannot be relocated
1119* it cannot be removed from the LRU list
1120Note that:
1121* the block can still change its position in the LRU list
1122* the next and previous pointers can change. */
1123UNIV_INLINE
1124void
1125buf_page_set_sticky(
1126/*================*/
1127 buf_page_t* bpage); /*!< in/out: control block */
1128/*********************************************************************//**
1129Removes stickiness of a block. */
1130UNIV_INLINE
1131void
1132buf_page_unset_sticky(
1133/*==================*/
1134 buf_page_t* bpage); /*!< in/out: control block */
1135/********************************************************************//**
1136Determine if a buffer block can be relocated in memory. The block
1137can be dirty, but it must not be I/O-fixed or bufferfixed. */
1138UNIV_INLINE
1139ibool
1140buf_page_can_relocate(
1141/*==================*/
1142 const buf_page_t* bpage) /*!< control block being relocated */
1143 MY_ATTRIBUTE((warn_unused_result));
1144
1145/*********************************************************************//**
1146Determine if a block has been flagged old.
1147@return TRUE if old */
1148UNIV_INLINE
1149ibool
1150buf_page_is_old(
1151/*============*/
1152 const buf_page_t* bpage) /*!< in: control block */
1153 MY_ATTRIBUTE((warn_unused_result));
1154/*********************************************************************//**
1155Flag a block old. */
1156UNIV_INLINE
1157void
1158buf_page_set_old(
1159/*=============*/
1160 buf_page_t* bpage, /*!< in/out: control block */
1161 bool old); /*!< in: old */
1162/*********************************************************************//**
1163Determine the time of first access of a block in the buffer pool.
1164@return ut_time_ms() at the time of first access, 0 if not accessed */
1165UNIV_INLINE
1166unsigned
1167buf_page_is_accessed(
1168/*=================*/
1169 const buf_page_t* bpage) /*!< in: control block */
1170 MY_ATTRIBUTE((warn_unused_result));
1171/*********************************************************************//**
1172Flag a block accessed. */
1173UNIV_INLINE
1174void
1175buf_page_set_accessed(
1176/*==================*/
1177 buf_page_t* bpage) /*!< in/out: control block */
1178 MY_ATTRIBUTE((nonnull));
1179/*********************************************************************//**
1180Gets the buf_block_t handle of a buffered file block if an uncompressed
1181page frame exists, or NULL. Note: even though bpage is not declared a
1182const we don't update its value.
1183@return control block, or NULL */
1184UNIV_INLINE
1185buf_block_t*
1186buf_page_get_block(
1187/*===============*/
1188 buf_page_t* bpage) /*!< in: control block, or NULL */
1189 MY_ATTRIBUTE((warn_unused_result));
1190
1191#ifdef UNIV_DEBUG
1192/*********************************************************************//**
1193Gets a pointer to the memory frame of a block.
1194@return pointer to the frame */
1195UNIV_INLINE
1196buf_frame_t*
1197buf_block_get_frame(
1198/*================*/
1199 const buf_block_t* block) /*!< in: pointer to the control block */
1200 MY_ATTRIBUTE((warn_unused_result));
1201#else /* UNIV_DEBUG */
1202# define buf_block_get_frame(block) (block)->frame
1203#endif /* UNIV_DEBUG */
1204
1205/*********************************************************************//**
1206Gets the compressed page descriptor corresponding to an uncompressed page
1207if applicable. */
1208#define buf_block_get_page_zip(block) \
1209 ((block)->page.zip.data ? &(block)->page.zip : NULL)
1210
1211#ifdef BTR_CUR_HASH_ADAPT
1212/** Get a buffer block from an adaptive hash index pointer.
1213This function does not return if the block is not identified.
1214@param[in] ptr pointer to within a page frame
1215@return pointer to block, never NULL */
1216buf_block_t*
1217buf_block_from_ahi(const byte* ptr);
1218#endif /* BTR_CUR_HASH_ADAPT */
1219
1220/********************************************************************//**
1221Find out if a pointer belongs to a buf_block_t. It can be a pointer to
1222the buf_block_t itself or a member of it
1223@return TRUE if ptr belongs to a buf_block_t struct */
1224ibool
1225buf_pointer_is_block_field(
1226/*=======================*/
1227 const void* ptr); /*!< in: pointer not
1228 dereferenced */
1229/** Find out if a pointer corresponds to a buf_block_t::mutex.
1230@param m in: mutex candidate
1231@return TRUE if m is a buf_block_t::mutex */
1232#define buf_pool_is_block_mutex(m) \
1233 buf_pointer_is_block_field((const void*)(m))
1234/** Find out if a pointer corresponds to a buf_block_t::lock.
1235@param l in: rw-lock candidate
1236@return TRUE if l is a buf_block_t::lock */
1237#define buf_pool_is_block_lock(l) \
1238 buf_pointer_is_block_field((const void*)(l))
1239
1240/** Initialize a page for read to the buffer buf_pool. If the page is
1241(1) already in buf_pool, or
1242(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
1243(3) if the space is deleted or being deleted,
1244then this function does nothing.
1245Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
1246on the buffer frame. The io-handler must take care that the flag is cleared
1247and the lock released later.
1248@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
1249@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
1250@param[in] page_id page id
1251@param[in] unzip whether the uncompressed page is
1252 requested (for ROW_FORMAT=COMPRESSED)
1253@return pointer to the block
1254@retval NULL in case of an error */
1255buf_page_t*
1256buf_page_init_for_read(
1257 dberr_t* err,
1258 ulint mode,
1259 const page_id_t& page_id,
1260 const page_size_t& page_size,
1261 bool unzip);
1262
1263/** Complete a read or write request of a file page to or from the buffer pool.
1264@param[in,out] bpage page to complete
1265@param[in] dblwr whether the doublewrite buffer was used (on write)
1266@param[in] evict whether or not to evict the page from LRU list
1267@return whether the operation succeeded
1268@retval DB_SUCCESS always when writing, or if a read page was OK
1269@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
1270@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
1271 after decryption normal page checksum does
1272 not match */
1273UNIV_INTERN
1274dberr_t
1275buf_page_io_complete(buf_page_t* bpage, bool dblwr = false, bool evict = false)
1276 MY_ATTRIBUTE((nonnull));
1277
1278/********************************************************************//**
1279Calculates the index of a buffer pool to the buf_pool[] array.
1280@return the position of the buffer pool in buf_pool[] */
1281UNIV_INLINE
1282unsigned
1283buf_pool_index(
1284/*===========*/
1285 const buf_pool_t* buf_pool) /*!< in: buffer pool */
1286 MY_ATTRIBUTE((warn_unused_result));
1287/******************************************************************//**
1288Returns the buffer pool instance given a page instance
1289@return buf_pool */
1290UNIV_INLINE
1291buf_pool_t*
1292buf_pool_from_bpage(
1293/*================*/
1294 const buf_page_t* bpage); /*!< in: buffer pool page */
1295/******************************************************************//**
1296Returns the buffer pool instance given a block instance
1297@return buf_pool */
1298UNIV_INLINE
1299buf_pool_t*
1300buf_pool_from_block(
1301/*================*/
1302 const buf_block_t* block); /*!< in: block */
1303
1304/** Returns the buffer pool instance given a page id.
1305@param[in] page_id page id
1306@return buffer pool */
1307UNIV_INLINE
1308buf_pool_t*
1309buf_pool_get(
1310 const page_id_t& page_id);
1311
1312/******************************************************************//**
1313Returns the buffer pool instance given its array index
1314@return buffer pool */
1315UNIV_INLINE
1316buf_pool_t*
1317buf_pool_from_array(
1318/*================*/
1319 ulint index); /*!< in: array index to get
1320 buffer pool instance from */
1321
1322/** Returns the control block of a file page, NULL if not found.
1323@param[in] buf_pool buffer pool instance
1324@param[in] page_id page id
1325@return block, NULL if not found */
1326UNIV_INLINE
1327buf_page_t*
1328buf_page_hash_get_low(
1329 buf_pool_t* buf_pool,
1330 const page_id_t& page_id);
1331
1332/** Returns the control block of a file page, NULL if not found.
1333If the block is found and lock is not NULL then the appropriate
1334page_hash lock is acquired in the specified lock mode. Otherwise,
1335mode value is ignored. It is up to the caller to release the
1336lock. If the block is found and the lock is NULL then the page_hash
1337lock is released by this function.
1338@param[in] buf_pool buffer pool instance
1339@param[in] page_id page id
1340@param[in,out] lock lock of the page hash acquired if bpage is
1341found, NULL otherwise. If NULL is passed then the hash_lock is released by
1342this function.
1343@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
1344lock == NULL
1345@param[in] watch if true, return watch sentinel also.
1346@return pointer to the bpage or NULL; if NULL, lock is also NULL or
1347a watch sentinel. */
1348UNIV_INLINE
1349buf_page_t*
1350buf_page_hash_get_locked(
1351 buf_pool_t* buf_pool,
1352 const page_id_t& page_id,
1353 rw_lock_t** lock,
1354 ulint lock_mode,
1355 bool watch = false);
1356
1357/** Returns the control block of a file page, NULL if not found.
1358If the block is found and lock is not NULL then the appropriate
1359page_hash lock is acquired in the specified lock mode. Otherwise,
1360mode value is ignored. It is up to the caller to release the
1361lock. If the block is found and the lock is NULL then the page_hash
1362lock is released by this function.
1363@param[in] buf_pool buffer pool instance
1364@param[in] page_id page id
1365@param[in,out] lock lock of the page hash acquired if bpage is
1366found, NULL otherwise. If NULL is passed then the hash_lock is released by
1367this function.
1368@param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if
1369lock == NULL
1370@return pointer to the block or NULL; if NULL, lock is also NULL. */
1371UNIV_INLINE
1372buf_block_t*
1373buf_block_hash_get_locked(
1374 buf_pool_t* buf_pool,
1375 const page_id_t& page_id,
1376 rw_lock_t** lock,
1377 ulint lock_mode);
1378
1379/* There are four different ways we can try to get a bpage or block
1380from the page hash:
13811) Caller already holds the appropriate page hash lock: in the case call
1382buf_page_hash_get_low() function.
13832) Caller wants to hold page hash lock in x-mode
13843) Caller wants to hold page hash lock in s-mode
13854) Caller doesn't want to hold page hash lock */
1386#define buf_page_hash_get_s_locked(b, page_id, l) \
1387 buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
1388#define buf_page_hash_get_x_locked(b, page_id, l) \
1389 buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
1390#define buf_page_hash_get(b, page_id) \
1391 buf_page_hash_get_locked(b, page_id, NULL, 0)
1392#define buf_page_get_also_watch(b, page_id) \
1393 buf_page_hash_get_locked(b, page_id, NULL, 0, true)
1394
1395#define buf_block_hash_get_s_locked(b, page_id, l) \
1396 buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
1397#define buf_block_hash_get_x_locked(b, page_id, l) \
1398 buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
1399#define buf_block_hash_get(b, page_id) \
1400 buf_block_hash_get_locked(b, page_id, NULL, 0)
1401
1402/********************************************************************//**
1403Determine if a block is a sentinel for a buffer pool watch.
1404@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1405ibool
1406buf_pool_watch_is_sentinel(
1407/*=======================*/
1408 const buf_pool_t* buf_pool, /*!< buffer pool instance */
1409 const buf_page_t* bpage) /*!< in: block */
1410 MY_ATTRIBUTE((nonnull, warn_unused_result));
1411
1412/** Stop watching if the page has been read in.
1413buf_pool_watch_set(space,offset) must have returned NULL before.
1414@param[in] page_id page id */
1415void
1416buf_pool_watch_unset(
1417 const page_id_t& page_id);
1418
1419/** Check if the page has been read in.
1420This may only be called after buf_pool_watch_set(space,offset)
1421has returned NULL and before invoking buf_pool_watch_unset(space,offset).
1422@param[in] page_id page id
1423@return FALSE if the given page was not read in, TRUE if it was */
1424ibool
1425buf_pool_watch_occurred(
1426 const page_id_t& page_id)
1427MY_ATTRIBUTE((warn_unused_result));
1428
1429/********************************************************************//**
1430Get total buffer pool statistics. */
1431void
1432buf_get_total_list_len(
1433/*===================*/
1434 ulint* LRU_len, /*!< out: length of all LRU lists */
1435 ulint* free_len, /*!< out: length of all free lists */
1436 ulint* flush_list_len);/*!< out: length of all flush lists */
1437/********************************************************************//**
1438Get total list size in bytes from all buffer pools. */
1439void
1440buf_get_total_list_size_in_bytes(
1441/*=============================*/
1442 buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes
1443 in all buffer pools */
1444/********************************************************************//**
1445Get total buffer pool statistics. */
1446void
1447buf_get_total_stat(
1448/*===============*/
1449 buf_pool_stat_t*tot_stat); /*!< out: buffer pool stats */
1450/*********************************************************************//**
1451Get the nth chunk's buffer block in the specified buffer pool.
1452@return the nth chunk's buffer block. */
1453UNIV_INLINE
1454buf_block_t*
1455buf_get_nth_chunk_block(
1456/*====================*/
1457 const buf_pool_t* buf_pool, /*!< in: buffer pool instance */
1458 ulint n, /*!< in: nth chunk in the buffer pool */
1459 ulint* chunk_size); /*!< in: chunk size */
1460
1461/** Verify the possibility that a stored page is not in buffer pool.
1462@param[in] withdraw_clock withdraw clock when stored the page
1463@retval true if the page might be relocated */
1464UNIV_INLINE
1465bool
1466buf_pool_is_obsolete(
1467 ulint withdraw_clock);
1468
1469/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
1470if needed.
1471@param[in] size size in bytes
1472@return aligned size */
1473UNIV_INLINE
1474ulint
1475buf_pool_size_align(
1476 ulint size);
1477
1478/** Calculate the checksum of a page from compressed table and update the
1479page.
1480@param[in,out] page page to update
1481@param[in] size compressed page size
1482@param[in] lsn LSN to stamp on the page */
1483void
1484buf_flush_update_zip_checksum(
1485 buf_frame_t* page,
1486 ulint size,
1487 lsn_t lsn);
1488
1489/** Encryption and page_compression hook that is called just before
1490a page is written to disk.
1491@param[in,out] space tablespace
1492@param[in,out] bpage buffer page
1493@param[in] src_frame physical page frame that is being encrypted
1494@return page frame to be written to file
1495(may be src_frame or an encrypted/compressed copy of it) */
1496UNIV_INTERN
1497byte*
1498buf_page_encrypt_before_write(
1499 fil_space_t* space,
1500 buf_page_t* bpage,
1501 byte* src_frame);
1502
1503/** @brief The temporary memory structure.
1504
1505NOTE! The definition appears here only for other modules of this
1506directory (buf) to see it. Do not use from outside! */
1507
1508typedef struct {
1509 bool reserved; /*!< true if this slot is reserved
1510 */
1511 byte* crypt_buf; /*!< for encryption the data needs to be
1512 copied to a separate buffer before it's
1513 encrypted&written. this as a page can be
1514 read while it's being flushed */
1515 byte* comp_buf; /*!< for compression we need
1516 temporal buffer because page
1517 can be read while it's being flushed */
1518 byte* out_buf; /*!< resulting buffer after
1519 encryption/compression. This is a
1520 pointer and not allocated. */
1521} buf_tmp_buffer_t;
1522
1523/** The common buffer control block structure
1524for compressed and uncompressed frames */
1525
1526/** Number of bits used for buffer page states. */
1527#define BUF_PAGE_STATE_BITS 3
1528
1529class buf_page_t {
1530public:
1531 /** @name General fields
1532 None of these bit-fields must be modified without holding
1533 buf_page_get_mutex() [buf_block_t::mutex or
1534 buf_pool->zip_mutex], since they can be stored in the same
1535 machine word. Some of these fields are additionally protected
1536 by buf_pool->mutex. */
1537 /* @{ */
1538
1539 /** Page id. Protected by buf_pool mutex. */
1540 page_id_t id;
1541
1542 /** Page size. Protected by buf_pool mutex. */
1543 page_size_t size;
1544
1545 /** Count of how manyfold this block is currently bufferfixed. */
1546 ib_uint32_t buf_fix_count;
1547
1548 /** type of pending I/O operation; also protected by
1549 buf_pool->mutex for writes only */
1550 buf_io_fix io_fix;
1551
1552 /** Block state. @see buf_page_in_file */
1553 buf_page_state state;
1554
1555 unsigned flush_type:2; /*!< if this block is currently being
1556 flushed to disk, this tells the
1557 flush_type.
1558 @see buf_flush_t */
1559 unsigned buf_pool_index:6;/*!< index number of the buffer pool
1560 that this block belongs to */
1561# if MAX_BUFFER_POOLS > 64
1562# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
1563# endif
1564 /* @} */
1565 page_zip_des_t zip; /*!< compressed page; zip.data
1566 (but not the data it points to) is
1567 also protected by buf_pool->mutex;
1568 state == BUF_BLOCK_ZIP_PAGE and
1569 zip.data == NULL means an active
1570 buf_pool->watch */
1571
1572 ulint write_size; /* Write size is set when this
1573 page is first time written and then
1574 if written again we check is TRIM
1575 operation needed. */
1576
1577 bool encrypted; /*!< page is still encrypted */
1578
1579 ulint real_size; /*!< Real size of the page
1580 Normal pages == srv_page_size
1581 page compressed pages, payload
1582 size alligned to sector boundary.
1583 */
1584
1585 buf_tmp_buffer_t* slot; /*!< Slot for temporary memory
1586 used for encryption/compression
1587 or NULL */
1588 buf_page_t* hash; /*!< node used in chaining to
1589 buf_pool->page_hash or
1590 buf_pool->zip_hash */
1591#ifdef UNIV_DEBUG
1592 ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
1593 ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
1594#endif /* UNIV_DEBUG */
1595
1596 /** @name Page flushing fields
1597 All these are protected by buf_pool->mutex. */
1598 /* @{ */
1599
1600 UT_LIST_NODE_T(buf_page_t) list;
1601 /*!< based on state, this is a
1602 list node, protected either by
1603 buf_pool->mutex or by
1604 buf_pool->flush_list_mutex,
1605 in one of the following lists in
1606 buf_pool:
1607
1608 - BUF_BLOCK_NOT_USED: free, withdraw
1609 - BUF_BLOCK_FILE_PAGE: flush_list
1610 - BUF_BLOCK_ZIP_DIRTY: flush_list
1611 - BUF_BLOCK_ZIP_PAGE: zip_clean
1612
1613 If bpage is part of flush_list
1614 then the node pointers are
1615 covered by buf_pool->flush_list_mutex.
1616 Otherwise these pointers are
1617 protected by buf_pool->mutex.
1618
1619 The contents of the list node
1620 is undefined if !in_flush_list
1621 && state == BUF_BLOCK_FILE_PAGE,
1622 or if state is one of
1623 BUF_BLOCK_MEMORY,
1624 BUF_BLOCK_REMOVE_HASH or
1625 BUF_BLOCK_READY_IN_USE. */
1626
1627#ifdef UNIV_DEBUG
1628 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
1629 when buf_pool->flush_list_mutex is
1630 free, the following should hold:
1631 in_flush_list
1632 == (state == BUF_BLOCK_FILE_PAGE
1633 || state == BUF_BLOCK_ZIP_DIRTY)
1634 Writes to this field must be
1635 covered by both block->mutex
1636 and buf_pool->flush_list_mutex. Hence
1637 reads can happen while holding
1638 any one of the two mutexes */
1639 ibool in_free_list; /*!< TRUE if in buf_pool->free; when
1640 buf_pool->mutex is free, the following
1641 should hold: in_free_list
1642 == (state == BUF_BLOCK_NOT_USED) */
1643#endif /* UNIV_DEBUG */
1644
1645 FlushObserver* flush_observer; /*!< flush observer */
1646
1647 lsn_t newest_modification;
1648 /*!< log sequence number of
1649 the youngest modification to
1650 this block, zero if not
1651 modified. Protected by block
1652 mutex */
1653 lsn_t oldest_modification;
1654 /*!< log sequence number of
1655 the START of the log entry
1656 written of the oldest
1657 modification to this block
1658 which has not yet been flushed
1659 on disk; zero if all
1660 modifications are on disk.
1661 Writes to this field must be
1662 covered by both block->mutex
1663 and buf_pool->flush_list_mutex. Hence
1664 reads can happen while holding
1665 any one of the two mutexes */
1666 /* @} */
1667 /** @name LRU replacement algorithm fields
1668 These fields are protected by buf_pool->mutex only (not
1669 buf_pool->zip_mutex or buf_block_t::mutex). */
1670 /* @{ */
1671
1672 UT_LIST_NODE_T(buf_page_t) LRU;
1673 /*!< node of the LRU list */
1674#ifdef UNIV_DEBUG
1675 ibool in_LRU_list; /*!< TRUE if the page is in
1676 the LRU list; used in
1677 debugging */
1678#endif /* UNIV_DEBUG */
1679 unsigned old:1; /*!< TRUE if the block is in the old
1680 blocks in buf_pool->LRU_old */
1681 unsigned freed_page_clock:31;/*!< the value of
1682 buf_pool->freed_page_clock
1683 when this block was the last
1684 time put to the head of the
1685 LRU list; a thread is allowed
1686 to read this for heuristic
1687 purposes without holding any
1688 mutex or latch */
1689 /* @} */
1690 unsigned access_time; /*!< time of first access, or
1691 0 if the block was never accessed
1692 in the buffer pool. Protected by
1693 block mutex */
1694# ifdef UNIV_DEBUG
1695 ibool file_page_was_freed;
1696 /*!< this is set to TRUE when
1697 fsp frees a page in buffer pool;
1698 protected by buf_pool->zip_mutex
1699 or buf_block_t::mutex. */
1700# endif /* UNIV_DEBUG */
1701};
1702
1703/** The buffer control block structure */
1704
1705struct buf_block_t{
1706
1707 /** @name General fields */
1708 /* @{ */
1709
1710 buf_page_t page; /*!< page information; this must
1711 be the first field, so that
1712 buf_pool->page_hash can point
1713 to buf_page_t or buf_block_t */
1714 byte* frame; /*!< pointer to buffer frame which
1715 is of size srv_page_size, and
1716 aligned to an address divisible by
1717 srv_page_size */
1718 BPageLock lock; /*!< read-write lock of the buffer
1719 frame */
1720 UT_LIST_NODE_T(buf_block_t) unzip_LRU;
1721 /*!< node of the decompressed LRU list;
1722 a block is in the unzip_LRU list
1723 if page.state == BUF_BLOCK_FILE_PAGE
1724 and page.zip.data != NULL */
1725#ifdef UNIV_DEBUG
1726 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
1727 decompressed LRU list;
1728 used in debugging */
1729 ibool in_withdraw_list;
1730#endif /* UNIV_DEBUG */
1731 uint32_t lock_hash_val; /*!< hashed value of the page address
1732 in the record lock hash table;
1733 protected by buf_block_t::lock
1734 (or buf_block_t::mutex, buf_pool->mutex
1735 in buf_page_get_gen(),
1736 buf_page_init_for_read()
1737 and buf_page_create()) */
1738 /* @} */
1739 /** @name Optimistic search field */
1740 /* @{ */
1741
1742 ib_uint64_t modify_clock; /*!< this clock is incremented every
1743 time a pointer to a record on the
1744 page may become obsolete; this is
1745 used in the optimistic cursor
1746 positioning: if the modify clock has
1747 not changed, we know that the pointer
1748 is still valid; this field may be
1749 changed if the thread (1) owns the
1750 pool mutex and the page is not
1751 bufferfixed, or (2) the thread has an
1752 x-latch on the block */
1753 /* @} */
1754#ifdef BTR_CUR_HASH_ADAPT
1755 /** @name Hash search fields (unprotected)
1756 NOTE that these fields are NOT protected by any semaphore! */
1757 /* @{ */
1758
1759 ulint n_hash_helps; /*!< counter which controls building
1760 of a new hash index for the page */
1761 volatile ulint n_bytes; /*!< recommended prefix length for hash
1762 search: number of bytes in
1763 an incomplete last field */
1764 volatile ulint n_fields; /*!< recommended prefix length for hash
1765 search: number of full fields */
1766 volatile bool left_side; /*!< true or false, depending on
1767 whether the leftmost record of several
1768 records with the same prefix should be
1769 indexed in the hash index */
1770 /* @} */
1771
1772 /** @name Hash search fields
1773 These 5 fields may only be modified when:
1774 we are holding the appropriate x-latch in btr_search_latches[], and
1775 one of the following holds:
1776 (1) the block state is BUF_BLOCK_FILE_PAGE, and
1777 we are holding an s-latch or x-latch on buf_block_t::lock, or
1778 (2) buf_block_t::buf_fix_count == 0, or
1779 (3) the block state is BUF_BLOCK_REMOVE_HASH.
1780
1781 An exception to this is when we init or create a page
1782 in the buffer pool in buf0buf.cc.
1783
1784 Another exception for buf_pool_clear_hash_index() is that
1785 assigning block->index = NULL (and block->n_pointers = 0)
1786 is allowed whenever btr_search_own_all(RW_LOCK_X).
1787
1788 Another exception is that ha_insert_for_fold_func() may
1789 decrement n_pointers without holding the appropriate latch
1790 in btr_search_latches[]. Thus, n_pointers must be
1791 protected by atomic memory access.
1792
1793 This implies that the fields may be read without race
1794 condition whenever any of the following hold:
1795 - the btr_search_latches[] s-latch or x-latch is being held, or
1796 - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
1797 and holding some latch prevents the state from changing to that.
1798
1799 Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
1800 is prone to race conditions while buf_pool_clear_hash_index() is
1801 executing (the adaptive hash index is being disabled). Such use
1802 is explicitly commented. */
1803
1804 /* @{ */
1805
1806# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1807 ulint n_pointers; /*!< used in debugging: the number of
1808 pointers in the adaptive hash index
1809 pointing to this frame;
1810 protected by atomic memory access
1811 or btr_search_own_all(). */
1812# define assert_block_ahi_empty(block) \
1813 ut_a(my_atomic_addlint(&(block)->n_pointers, 0) == 0)
1814# define assert_block_ahi_empty_on_init(block) do { \
1815 UNIV_MEM_VALID(&(block)->n_pointers, sizeof (block)->n_pointers); \
1816 assert_block_ahi_empty(block); \
1817} while (0)
1818# define assert_block_ahi_valid(block) \
1819 ut_a((block)->index \
1820 || my_atomic_loadlint(&(block)->n_pointers) == 0)
1821# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1822# define assert_block_ahi_empty(block) /* nothing */
1823# define assert_block_ahi_empty_on_init(block) /* nothing */
1824# define assert_block_ahi_valid(block) /* nothing */
1825# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1826 unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
1827 number of full fields */
1828 unsigned curr_n_bytes:15;/*!< number of bytes in hash
1829 indexing */
1830 unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
1831 dict_index_t* index; /*!< Index for which the
1832 adaptive hash index has been
1833 created, or NULL if the page
1834 does not exist in the
1835 index. Note that it does not
1836 guarantee that the index is
1837 complete, though: there may
1838 have been hash collisions,
1839 record deletions, etc. */
1840 /* @} */
1841#else /* BTR_CUR_HASH_ADAPT */
1842# define assert_block_ahi_empty(block) /* nothing */
1843# define assert_block_ahi_empty_on_init(block) /* nothing */
1844# define assert_block_ahi_valid(block) /* nothing */
1845#endif /* BTR_CUR_HASH_ADAPT */
1846 bool skip_flush_check;
1847 /*!< Skip check in buf_dblwr_check_block
1848 during bulk load, protected by lock.*/
1849# ifdef UNIV_DEBUG
1850 /** @name Debug fields */
1851 /* @{ */
1852 rw_lock_t debug_latch; /*!< in the debug version, each thread
1853 which bufferfixes the block acquires
1854 an s-latch here; so we can use the
1855 debug utilities in sync0rw */
1856 /* @} */
1857# endif
1858 BPageMutex mutex; /*!< mutex protecting this block:
1859 state (also protected by the buffer
1860 pool mutex), io_fix, buf_fix_count,
1861 and accessed; we introduce this new
1862 mutex in InnoDB-5.1 to relieve
1863 contention on the buffer pool mutex */
1864};
1865
1866/** Check if a buf_block_t object is in a valid state
1867@param block buffer block
1868@return TRUE if valid */
1869#define buf_block_state_valid(block) \
1870(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
1871 && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
1872
1873
1874/**********************************************************************//**
1875Compute the hash fold value for blocks in buf_pool->zip_hash. */
1876/* @{ */
1877#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
1878#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
1879#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
1880/* @} */
1881
1882/** A "Hazard Pointer" class used to iterate over page lists
1883inside the buffer pool. A hazard pointer is a buf_page_t pointer
1884which we intend to iterate over next and we want it remain valid
1885even after we release the buffer pool mutex. */
1886class HazardPointer {
1887
1888public:
1889 /** Constructor
1890 @param buf_pool buffer pool instance
1891 @param mutex mutex that is protecting the hp. */
1892 HazardPointer(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1893 :
1894 m_buf_pool(buf_pool)
1895#ifdef UNIV_DEBUG
1896 , m_mutex(mutex)
1897#endif /* UNIV_DEBUG */
1898 , m_hp() {}
1899
1900 /** Destructor */
1901 virtual ~HazardPointer() {}
1902
1903 /** Get current value */
1904 buf_page_t* get() const
1905 {
1906 ut_ad(mutex_own(m_mutex));
1907 return(m_hp);
1908 }
1909
1910 /** Set current value
1911 @param bpage buffer block to be set as hp */
1912 void set(buf_page_t* bpage);
1913
1914 /** Checks if a bpage is the hp
1915 @param bpage buffer block to be compared
1916 @return true if it is hp */
1917 bool is_hp(const buf_page_t* bpage);
1918
1919 /** Adjust the value of hp. This happens when some
1920 other thread working on the same list attempts to
1921 remove the hp from the list. Must be implemented
1922 by the derived classes.
1923 @param bpage buffer block to be compared */
1924 virtual void adjust(const buf_page_t*) = 0;
1925
1926protected:
1927 /** Disable copying */
1928 HazardPointer(const HazardPointer&);
1929 HazardPointer& operator=(const HazardPointer&);
1930
1931 /** Buffer pool instance */
1932 const buf_pool_t* m_buf_pool;
1933
1934#ifdef UNIV_DEBUG
1935 /** mutex that protects access to the m_hp. */
1936 const ib_mutex_t* m_mutex;
1937#endif /* UNIV_DEBUG */
1938
1939 /** hazard pointer. */
1940 buf_page_t* m_hp;
1941};
1942
1943/** Class implementing buf_pool->flush_list hazard pointer */
1944class FlushHp: public HazardPointer {
1945
1946public:
1947 /** Constructor
1948 @param buf_pool buffer pool instance
1949 @param mutex mutex that is protecting the hp. */
1950 FlushHp(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1951 :
1952 HazardPointer(buf_pool, mutex) {}
1953
1954 /** Destructor */
1955 virtual ~FlushHp() {}
1956
1957 /** Adjust the value of hp. This happens when some
1958 other thread working on the same list attempts to
1959 remove the hp from the list.
1960 @param bpage buffer block to be compared */
1961 void adjust(const buf_page_t* bpage);
1962};
1963
1964/** Class implementing buf_pool->LRU hazard pointer */
1965class LRUHp: public HazardPointer {
1966
1967public:
1968 /** Constructor
1969 @param buf_pool buffer pool instance
1970 @param mutex mutex that is protecting the hp. */
1971 LRUHp(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1972 :
1973 HazardPointer(buf_pool, mutex) {}
1974
1975 /** Destructor */
1976 virtual ~LRUHp() {}
1977
1978 /** Adjust the value of hp. This happens when some
1979 other thread working on the same list attempts to
1980 remove the hp from the list.
1981 @param bpage buffer block to be compared */
1982 void adjust(const buf_page_t* bpage);
1983};
1984
1985/** Special purpose iterators to be used when scanning the LRU list.
1986The idea is that when one thread finishes the scan it leaves the
1987itr in that position and the other thread can start scan from
1988there */
1989class LRUItr: public LRUHp {
1990
1991public:
1992 /** Constructor
1993 @param buf_pool buffer pool instance
1994 @param mutex mutex that is protecting the hp. */
1995 LRUItr(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1996 :
1997 LRUHp(buf_pool, mutex) {}
1998
1999 /** Destructor */
2000 virtual ~LRUItr() {}
2001
2002 /** Selects from where to start a scan. If we have scanned
2003 too deep into the LRU list it resets the value to the tail
2004 of the LRU list.
2005 @return buf_page_t from where to start scan. */
2006 buf_page_t* start();
2007};
2008
2009/** Struct that is embedded in the free zip blocks */
2010struct buf_buddy_free_t {
2011 union {
2012 ulint size; /*!< size of the block */
2013 byte bytes[FIL_PAGE_DATA];
2014 /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
2015 == BUF_BUDDY_FREE_STAMP denotes a free
2016 block. If the space_id field of buddy
2017 block != BUF_BUDDY_FREE_STAMP, the block
2018 is not in any zip_free list. If the
2019 space_id is BUF_BUDDY_FREE_STAMP then
2020 stamp[0] will contain the
2021 buddy block size. */
2022 } stamp;
2023
2024 buf_page_t bpage; /*!< Embedded bpage descriptor */
2025 UT_LIST_NODE_T(buf_buddy_free_t) list;
2026 /*!< Node of zip_free list */
2027};
2028
2029/** @brief The buffer pool statistics structure. */
2030struct buf_pool_stat_t{
2031 ulint n_page_gets; /*!< number of page gets performed;
2032 also successful searches through
2033 the adaptive hash index are
2034 counted as page gets; this field
2035 is NOT protected by the buffer
2036 pool mutex */
2037 ulint n_pages_read; /*!< number read operations */
2038 ulint n_pages_written;/*!< number write operations */
2039 ulint n_pages_created;/*!< number of pages created
2040 in the pool with no read */
2041 ulint n_ra_pages_read_rnd;/*!< number of pages read in
2042 as part of random read ahead */
2043 ulint n_ra_pages_read;/*!< number of pages read in
2044 as part of read ahead */
2045 ulint n_ra_pages_evicted;/*!< number of read ahead
2046 pages that are evicted without
2047 being accessed */
2048 ulint n_pages_made_young; /*!< number of pages made young, in
2049 calls to buf_LRU_make_block_young() */
2050 ulint n_pages_not_made_young; /*!< number of pages not made
2051 young because the first access
2052 was not long enough ago, in
2053 buf_page_peek_if_too_old() */
2054 ulint LRU_bytes; /*!< LRU size in bytes */
2055 ulint flush_list_bytes;/*!< flush_list size in bytes */
2056};
2057
2058/** Statistics of buddy blocks of a given size. */
2059struct buf_buddy_stat_t {
2060 /** Number of blocks allocated from the buddy system. */
2061 ulint used;
2062 /** Number of blocks relocated by the buddy system. */
2063 ib_uint64_t relocated;
2064 /** Total duration of block relocations, in microseconds. */
2065 ib_uint64_t relocated_usec;
2066};
2067
2068/** @brief The temporary memory array structure.
2069
2070NOTE! The definition appears here only for other modules of this
2071directory (buf) to see it. Do not use from outside! */
2072
2073typedef struct {
2074 ulint n_slots; /*!< Total number of slots */
2075 buf_tmp_buffer_t *slots; /*!< Pointer to the slots in the
2076 array */
2077} buf_tmp_array_t;
2078
2079/** @brief The buffer pool structure.
2080
2081NOTE! The definition appears here only for other modules of this
2082directory (buf) to see it. Do not use from outside! */
2083
2084struct buf_pool_t{
2085
2086 /** @name General fields */
2087 /* @{ */
2088 BufPoolMutex mutex; /*!< Buffer pool mutex of this
2089 instance */
2090 BufPoolZipMutex zip_mutex; /*!< Zip mutex of this buffer
2091 pool instance, protects compressed
2092 only pages (of type buf_page_t, not
2093 buf_block_t */
2094 ulint instance_no; /*!< Array index of this buffer
2095 pool instance */
2096 ulint curr_pool_size; /*!< Current pool size in bytes */
2097 ulint LRU_old_ratio; /*!< Reserve this much of the buffer
2098 pool for "old" blocks */
2099#ifdef UNIV_DEBUG
2100 ulint buddy_n_frames; /*!< Number of frames allocated from
2101 the buffer pool to the buddy system */
2102#endif
2103#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2104 ulint mutex_exit_forbidden; /*!< Forbid release mutex */
2105#endif
2106 ut_allocator<unsigned char> allocator; /*!< Allocator used for
2107 allocating memory for the the "chunks"
2108 member. */
2109 volatile ulint n_chunks; /*!< number of buffer pool chunks */
2110 volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
2111 buf_chunk_t* chunks; /*!< buffer pool chunks */
2112 buf_chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
2113 after resizing buffer pool */
2114 ulint curr_size; /*!< current pool size in pages */
2115 ulint old_size; /*!< previous pool size in pages */
2116 ulint read_ahead_area;/*!< size in pages of the area which
2117 the read-ahead algorithms read if
2118 invoked */
2119 hash_table_t* page_hash; /*!< hash table of buf_page_t or
2120 buf_block_t file pages,
2121 buf_page_in_file() == TRUE,
2122 indexed by (space_id, offset).
2123 page_hash is protected by an
2124 array of mutexes.
2125 Changes in page_hash are protected
2126 by buf_pool->mutex and the relevant
2127 page_hash mutex. Lookups can happen
2128 while holding the buf_pool->mutex or
2129 the relevant page_hash mutex. */
2130 hash_table_t* page_hash_old; /*!< old pointer to page_hash to be
2131 freed after resizing buffer pool */
2132 hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
2133 whose frames are allocated to the
2134 zip buddy system,
2135 indexed by block->frame */
2136 ulint n_pend_reads; /*!< number of pending read
2137 operations */
2138 ulint n_pend_unzip; /*!< number of pending decompressions */
2139
2140 time_t last_printout_time;
2141 /*!< when buf_print_io was last time
2142 called */
2143 buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
2144 /*!< Statistics of buddy system,
2145 indexed by block size */
2146 buf_pool_stat_t stat; /*!< current statistics */
2147 buf_pool_stat_t old_stat; /*!< old statistics */
2148
2149 /* @} */
2150
2151 /** @name Page flushing algorithm fields */
2152
2153 /* @{ */
2154
2155 FlushListMutex flush_list_mutex;/*!< mutex protecting the
2156 flush list access. This mutex
2157 protects flush_list, flush_rbt
2158 and bpage::list pointers when
2159 the bpage is on flush_list. It
2160 also protects writes to
2161 bpage::oldest_modification and
2162 flush_list_hp */
2163 FlushHp flush_hp;/*!< "hazard pointer"
2164 used during scan of flush_list
2165 while doing flush list batch.
2166 Protected by flush_list_mutex */
2167 UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
2168 /*!< base node of the modified block
2169 list */
2170 ibool init_flush[BUF_FLUSH_N_TYPES];
2171 /*!< this is TRUE when a flush of the
2172 given type is being initialized */
2173 ulint n_flush[BUF_FLUSH_N_TYPES];
2174 /*!< this is the number of pending
2175 writes in the given flush type */
2176 os_event_t no_flush[BUF_FLUSH_N_TYPES];
2177 /*!< this is in the set state
2178 when there is no flush batch
2179 of the given type running;
2180 os_event_set() and os_event_reset()
2181 are protected by buf_pool_t::mutex */
2182 ib_rbt_t* flush_rbt; /*!< a red-black tree is used
2183 exclusively during recovery to
2184 speed up insertions in the
2185 flush_list. This tree contains
2186 blocks in order of
2187 oldest_modification LSN and is
2188 kept in sync with the
2189 flush_list.
2190 Each member of the tree MUST
2191 also be on the flush_list.
2192 This tree is relevant only in
2193 recovery and is set to NULL
2194 once the recovery is over.
2195 Protected by flush_list_mutex */
2196 unsigned freed_page_clock;/*!< a sequence number used
2197 to count the number of buffer
2198 blocks removed from the end of
2199 the LRU list; NOTE that this
2200 counter may wrap around at 4
2201 billion! A thread is allowed
2202 to read this for heuristic
2203 purposes without holding any
2204 mutex or latch */
2205 ibool try_LRU_scan; /*!< Set to FALSE when an LRU
2206 scan for free block fails. This
2207 flag is used to avoid repeated
2208 scans of LRU list when we know
2209 that there is no free block
2210 available in the scan depth for
2211 eviction. Set to TRUE whenever
2212 we flush a batch from the
2213 buffer pool. Protected by the
2214 buf_pool->mutex */
2215 /* @} */
2216
2217 /** @name LRU replacement algorithm fields */
2218 /* @{ */
2219
2220 UT_LIST_BASE_NODE_T(buf_page_t) free;
2221 /*!< base node of the free
2222 block list */
2223
2224 UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
2225 /*!< base node of the withdraw
2226 block list. It is only used during
2227 shrinking buffer pool size, not to
2228 reuse the blocks will be removed */
2229
2230 ulint withdraw_target;/*!< target length of withdraw
2231 block list, when withdrawing */
2232
2233 /** "hazard pointer" used during scan of LRU while doing
2234 LRU list batch. Protected by buf_pool::mutex */
2235 LRUHp lru_hp;
2236
2237 /** Iterator used to scan the LRU list when searching for
2238 replacable victim. Protected by buf_pool::mutex. */
2239 LRUItr lru_scan_itr;
2240
2241 /** Iterator used to scan the LRU list when searching for
2242 single page flushing victim. Protected by buf_pool::mutex. */
2243 LRUItr single_scan_itr;
2244
2245 UT_LIST_BASE_NODE_T(buf_page_t) LRU;
2246 /*!< base node of the LRU list */
2247
2248 buf_page_t* LRU_old; /*!< pointer to the about
2249 LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
2250 oldest blocks in the LRU list;
2251 NULL if LRU length less than
2252 BUF_LRU_OLD_MIN_LEN;
2253 NOTE: when LRU_old != NULL, its length
2254 should always equal LRU_old_len */
2255 ulint LRU_old_len; /*!< length of the LRU list from
2256 the block to which LRU_old points
2257 onward, including that block;
2258 see buf0lru.cc for the restrictions
2259 on this value; 0 if LRU_old == NULL;
2260 NOTE: LRU_old_len must be adjusted
2261 whenever LRU_old shrinks or grows! */
2262
2263 UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
2264 /*!< base node of the
2265 unzip_LRU list */
2266
2267 /* @} */
2268 /** @name Buddy allocator fields
2269 The buddy allocator is used for allocating compressed page
2270 frames and buf_page_t descriptors of blocks that exist
2271 in the buffer pool only in compressed form. */
2272 /* @{ */
2273#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2274 UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
2275 /*!< unmodified compressed pages */
2276#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2277 UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
2278 /*!< buddy free lists */
2279
2280 buf_page_t* watch;
2281 /*!< Sentinel records for buffer
2282 pool watches. Protected by
2283 buf_pool->mutex. */
2284
2285 buf_tmp_array_t* tmp_arr;
2286 /*!< Array for temporal memory
2287 used in compression and encryption */
2288
2289#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
2290# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
2291#endif
2292 /* @} */
2293};
2294
2295/** Print the given buf_pool_t object.
2296@param[in,out] out the output stream
2297@param[in] buf_pool the buf_pool_t object to be printed
2298@return the output stream */
2299std::ostream&
2300operator<<(
2301 std::ostream& out,
2302 const buf_pool_t& buf_pool);
2303
2304/** @name Accessors for buf_pool->mutex.
2305Use these instead of accessing buf_pool->mutex directly. */
2306/* @{ */
2307
2308/** Test if a buffer pool mutex is owned. */
2309#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
2310/** Acquire a buffer pool mutex. */
2311#define buf_pool_mutex_enter(b) do { \
2312 ut_ad(!(b)->zip_mutex.is_owned()); \
2313 mutex_enter(&(b)->mutex); \
2314} while (0)
2315
2316/** Test if flush list mutex is owned. */
2317#define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
2318
2319/** Acquire the flush list mutex. */
2320#define buf_flush_list_mutex_enter(b) do { \
2321 mutex_enter(&(b)->flush_list_mutex); \
2322} while (0)
2323/** Release the flush list mutex. */
2324# define buf_flush_list_mutex_exit(b) do { \
2325 mutex_exit(&(b)->flush_list_mutex); \
2326} while (0)
2327
2328
2329/** Test if block->mutex is owned. */
2330#define buf_page_mutex_own(b) (b)->mutex.is_owned()
2331
2332/** Acquire the block->mutex. */
2333#define buf_page_mutex_enter(b) do { \
2334 mutex_enter(&(b)->mutex); \
2335} while (0)
2336
2337/** Release the trx->mutex. */
2338#define buf_page_mutex_exit(b) do { \
2339 (b)->mutex.exit(); \
2340} while (0)
2341
2342
2343/** Get appropriate page_hash_lock. */
2344UNIV_INLINE
2345rw_lock_t*
2346buf_page_hash_lock_get(const buf_pool_t* buf_pool, const page_id_t& page_id)
2347{
2348 return hash_get_lock(buf_pool->page_hash, page_id.fold());
2349}
2350
2351/** If not appropriate page_hash_lock, relock until appropriate. */
2352# define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
2353 hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2354
2355# define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id)\
2356 hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2357
2358#ifdef UNIV_DEBUG
2359/** Test if page_hash lock is held in s-mode. */
2360# define buf_page_hash_lock_held_s(buf_pool, bpage) \
2361 rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
2362
2363/** Test if page_hash lock is held in x-mode. */
2364# define buf_page_hash_lock_held_x(buf_pool, bpage) \
2365 rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
2366
2367/** Test if page_hash lock is held in x or s-mode. */
2368# define buf_page_hash_lock_held_s_or_x(buf_pool, bpage)\
2369 (buf_page_hash_lock_held_s((buf_pool), (bpage)) \
2370 || buf_page_hash_lock_held_x((buf_pool), (bpage)))
2371
2372# define buf_block_hash_lock_held_s(buf_pool, block) \
2373 buf_page_hash_lock_held_s((buf_pool), &(block)->page)
2374
2375# define buf_block_hash_lock_held_x(buf_pool, block) \
2376 buf_page_hash_lock_held_x((buf_pool), &(block)->page)
2377
2378# define buf_block_hash_lock_held_s_or_x(buf_pool, block) \
2379 buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
2380#else /* UNIV_DEBUG */
2381# define buf_page_hash_lock_held_s(b, p) (TRUE)
2382# define buf_page_hash_lock_held_x(b, p) (TRUE)
2383# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE)
2384# define buf_block_hash_lock_held_s(b, p) (TRUE)
2385# define buf_block_hash_lock_held_x(b, p) (TRUE)
2386# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE)
2387#endif /* UNIV_DEBUG */
2388
2389#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2390/** Forbid the release of the buffer pool mutex. */
2391# define buf_pool_mutex_exit_forbid(b) do { \
2392 ut_ad(buf_pool_mutex_own(b)); \
2393 b->mutex_exit_forbidden++; \
2394} while (0)
2395/** Allow the release of the buffer pool mutex. */
2396# define buf_pool_mutex_exit_allow(b) do { \
2397 ut_ad(buf_pool_mutex_own(b)); \
2398 ut_a(b->mutex_exit_forbidden); \
2399 b->mutex_exit_forbidden--; \
2400} while (0)
2401/** Release the buffer pool mutex. */
2402# define buf_pool_mutex_exit(b) do { \
2403 ut_a(!b->mutex_exit_forbidden); \
2404 mutex_exit(&b->mutex); \
2405} while (0)
2406#else
2407/** Forbid the release of the buffer pool mutex. */
2408# define buf_pool_mutex_exit_forbid(b) ((void) 0)
2409/** Allow the release of the buffer pool mutex. */
2410# define buf_pool_mutex_exit_allow(b) ((void) 0)
2411/** Release the buffer pool mutex. */
2412# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
2413#endif
2414/* @} */
2415
2416/**********************************************************************
2417Let us list the consistency conditions for different control block states.
2418
2419NOT_USED: is in free list, not in LRU list, not in flush list, nor
2420 page hash table
2421READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
2422 hash table
2423MEMORY: is not in free list, LRU list, or flush list, nor page
2424 hash table
2425FILE_PAGE: space and offset are defined, is in page hash table
2426 if io_fix == BUF_IO_WRITE,
2427 pool: no_flush[flush_type] is in reset state,
2428 pool: n_flush[flush_type] > 0
2429
2430 (1) if buf_fix_count == 0, then
2431 is in LRU list, not in free list
2432 is in flush list,
2433 if and only if oldest_modification > 0
2434 is x-locked,
2435 if and only if io_fix == BUF_IO_READ
2436 is s-locked,
2437 if and only if io_fix == BUF_IO_WRITE
2438
2439 (2) if buf_fix_count > 0, then
2440 is not in LRU list, not in free list
2441 is in flush list,
2442 if and only if oldest_modification > 0
2443 if io_fix == BUF_IO_READ,
2444 is x-locked
2445 if io_fix == BUF_IO_WRITE,
2446 is s-locked
2447
2448State transitions:
2449
2450NOT_USED => READY_FOR_USE
2451READY_FOR_USE => MEMORY
2452READY_FOR_USE => FILE_PAGE
2453MEMORY => NOT_USED
2454FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
2455 (1) buf_fix_count == 0,
2456 (2) oldest_modification == 0, and
2457 (3) io_fix == 0.
2458*/
2459
2460#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2461/** Functor to validate the LRU list. */
2462struct CheckInLRUList {
2463 void operator()(const buf_page_t* elem) const
2464 {
2465 ut_a(elem->in_LRU_list);
2466 }
2467
2468 static void validate(const buf_pool_t* buf_pool)
2469 {
2470 CheckInLRUList check;
2471 ut_list_validate(buf_pool->LRU, check);
2472 }
2473};
2474
2475/** Functor to validate the LRU list. */
2476struct CheckInFreeList {
2477 void operator()(const buf_page_t* elem) const
2478 {
2479 ut_a(elem->in_free_list);
2480 }
2481
2482 static void validate(const buf_pool_t* buf_pool)
2483 {
2484 CheckInFreeList check;
2485 ut_list_validate(buf_pool->free, check);
2486 }
2487};
2488
2489struct CheckUnzipLRUAndLRUList {
2490 void operator()(const buf_block_t* elem) const
2491 {
2492 ut_a(elem->page.in_LRU_list);
2493 ut_a(elem->in_unzip_LRU_list);
2494 }
2495
2496 static void validate(const buf_pool_t* buf_pool)
2497 {
2498 CheckUnzipLRUAndLRUList check;
2499 ut_list_validate(buf_pool->unzip_LRU, check);
2500 }
2501};
2502#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
2503
2504#include "buf0buf.ic"
2505
2506#endif /* !UNIV_INNOCHECKSUM */
2507
2508#endif
2509