| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2014, 2018, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file include/buf0flu.h |
| 22 | The database buffer pool flush algorithm |
| 23 | |
| 24 | Created 11/5/1995 Heikki Tuuri |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #ifndef buf0flu_h |
| 28 | #define buf0flu_h |
| 29 | |
| 30 | #include "univ.i" |
| 31 | #include "ut0byte.h" |
| 32 | #include "log0log.h" |
| 33 | #include "buf0types.h" |
| 34 | |
| 35 | /** Flag indicating if the page_cleaner is in active state. */ |
| 36 | extern bool buf_page_cleaner_is_active; |
| 37 | |
| 38 | #ifdef UNIV_DEBUG |
| 39 | |
| 40 | /** Value of MySQL global variable used to disable page cleaner. */ |
| 41 | extern my_bool innodb_page_cleaner_disabled_debug; |
| 42 | |
| 43 | #endif /* UNIV_DEBUG */ |
| 44 | |
| 45 | /** Event to synchronise with the flushing. */ |
| 46 | extern os_event_t buf_flush_event; |
| 47 | |
| 48 | class ut_stage_alter_t; |
| 49 | |
| 50 | /** Handled page counters for a single flush */ |
| 51 | struct flush_counters_t { |
| 52 | ulint flushed; /*!< number of dirty pages flushed */ |
| 53 | ulint evicted; /*!< number of clean pages evicted */ |
| 54 | ulint unzip_LRU_evicted;/*!< number of uncompressed page images |
| 55 | evicted */ |
| 56 | }; |
| 57 | |
| 58 | /********************************************************************//** |
| 59 | Remove a block from the flush list of modified blocks. */ |
| 60 | void |
| 61 | buf_flush_remove( |
| 62 | /*=============*/ |
| 63 | buf_page_t* bpage); /*!< in: pointer to the block in question */ |
| 64 | /*******************************************************************//** |
| 65 | Relocates a buffer control block on the flush_list. |
| 66 | Note that it is assumed that the contents of bpage has already been |
| 67 | copied to dpage. */ |
| 68 | void |
| 69 | buf_flush_relocate_on_flush_list( |
| 70 | /*=============================*/ |
| 71 | buf_page_t* bpage, /*!< in/out: control block being moved */ |
| 72 | buf_page_t* dpage); /*!< in/out: destination block */ |
| 73 | /** Update the flush system data structures when a write is completed. |
| 74 | @param[in,out] bpage flushed page |
| 75 | @param[in] dblwr whether the doublewrite buffer was used */ |
| 76 | void buf_flush_write_complete(buf_page_t* bpage, bool dblwr); |
| 77 | /** Initialize a page for writing to the tablespace. |
| 78 | @param[in] block buffer block; NULL if bypassing the buffer pool |
| 79 | @param[in,out] page page frame |
| 80 | @param[in,out] page_zip_ compressed page, or NULL if uncompressed |
| 81 | @param[in] newest_lsn newest modification LSN to the page */ |
| 82 | void |
| 83 | buf_flush_init_for_writing( |
| 84 | const buf_block_t* block, |
| 85 | byte* page, |
| 86 | void* page_zip_, |
| 87 | lsn_t newest_lsn); |
| 88 | |
| 89 | # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG |
| 90 | /********************************************************************//** |
| 91 | Writes a flushable page asynchronously from the buffer pool to a file. |
| 92 | NOTE: buf_pool->mutex and block->mutex must be held upon entering this |
| 93 | function, and they will be released by this function after flushing. |
| 94 | This is loosely based on buf_flush_batch() and buf_flush_page(). |
| 95 | @return TRUE if the page was flushed and the mutexes released */ |
| 96 | ibool |
| 97 | buf_flush_page_try( |
| 98 | /*===============*/ |
| 99 | buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ |
| 100 | buf_block_t* block) /*!< in/out: buffer control block */ |
| 101 | MY_ATTRIBUTE((warn_unused_result)); |
| 102 | # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ |
| 103 | /** Do flushing batch of a given type. |
| 104 | NOTE: The calling thread is not allowed to own any latches on pages! |
| 105 | @param[in,out] buf_pool buffer pool instance |
| 106 | @param[in] type flush type |
| 107 | @param[in] min_n wished minimum mumber of blocks flushed |
| 108 | (it is not guaranteed that the actual number is that big, though) |
| 109 | @param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose |
| 110 | oldest_modification is smaller than this should be flushed (if their number |
| 111 | does not exceed min_n), otherwise ignored |
| 112 | @param[out] n the number of pages which were processed is |
| 113 | passed back to caller. Ignored if NULL |
| 114 | @retval true if a batch was queued successfully. |
| 115 | @retval false if another batch of same type was already running. */ |
| 116 | bool |
| 117 | buf_flush_do_batch( |
| 118 | buf_pool_t* buf_pool, |
| 119 | buf_flush_t type, |
| 120 | ulint min_n, |
| 121 | lsn_t lsn_limit, |
| 122 | flush_counters_t* n); |
| 123 | |
| 124 | /** This utility flushes dirty blocks from the end of the flush list of all |
| 125 | buffer pool instances. |
| 126 | NOTE: The calling thread is not allowed to own any latches on pages! |
| 127 | @param[in] min_n wished minimum mumber of blocks flushed (it is |
| 128 | not guaranteed that the actual number is that big, though) |
| 129 | @param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose |
| 130 | oldest_modification is smaller than this should be flushed (if their number |
| 131 | does not exceed min_n), otherwise ignored |
| 132 | @param[out] n_processed the number of pages which were processed is |
| 133 | passed back to caller. Ignored if NULL. |
| 134 | @return true if a batch was queued successfully for each buffer pool |
| 135 | instance. false if another batch of same type was already running in |
| 136 | at least one of the buffer pool instance */ |
| 137 | bool |
| 138 | buf_flush_lists( |
| 139 | ulint min_n, |
| 140 | lsn_t lsn_limit, |
| 141 | ulint* n_processed); |
| 142 | |
| 143 | /******************************************************************//** |
| 144 | This function picks up a single page from the tail of the LRU |
| 145 | list, flushes it (if it is dirty), removes it from page_hash and LRU |
| 146 | list and puts it on the free list. It is called from user threads when |
| 147 | they are unable to find a replaceable page at the tail of the LRU |
| 148 | list i.e.: when the background LRU flushing in the page_cleaner thread |
| 149 | is not fast enough to keep pace with the workload. |
| 150 | @return true if success. */ |
| 151 | bool |
| 152 | buf_flush_single_page_from_LRU( |
| 153 | /*===========================*/ |
| 154 | buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */ |
| 155 | /******************************************************************//** |
| 156 | Waits until a flush batch of the given type ends */ |
| 157 | void |
| 158 | buf_flush_wait_batch_end( |
| 159 | /*=====================*/ |
| 160 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
| 161 | buf_flush_t type); /*!< in: BUF_FLUSH_LRU |
| 162 | or BUF_FLUSH_LIST */ |
| 163 | /** |
| 164 | Waits until a flush batch of the given lsn ends |
| 165 | @param[in] new_oldest target oldest_modified_lsn to wait for */ |
| 166 | |
| 167 | void |
| 168 | buf_flush_wait_flushed( |
| 169 | lsn_t new_oldest); |
| 170 | |
| 171 | /******************************************************************//** |
| 172 | Waits until a flush batch of the given type ends. This is called by |
| 173 | a thread that only wants to wait for a flush to end but doesn't do |
| 174 | any flushing itself. */ |
| 175 | void |
| 176 | buf_flush_wait_batch_end_wait_only( |
| 177 | /*===============================*/ |
| 178 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
| 179 | buf_flush_t type); /*!< in: BUF_FLUSH_LRU |
| 180 | or BUF_FLUSH_LIST */ |
| 181 | /********************************************************************//** |
| 182 | This function should be called at a mini-transaction commit, if a page was |
| 183 | modified in it. Puts the block to the list of modified blocks, if it not |
| 184 | already in it. */ |
| 185 | UNIV_INLINE |
| 186 | void |
| 187 | buf_flush_note_modification( |
| 188 | /*========================*/ |
| 189 | buf_block_t* block, /*!< in: block which is modified */ |
| 190 | lsn_t start_lsn, /*!< in: start lsn of the first mtr in a |
| 191 | set of mtr's */ |
| 192 | lsn_t end_lsn, /*!< in: end lsn of the last mtr in the |
| 193 | set of mtr's */ |
| 194 | FlushObserver* observer); /*!< in: flush observer */ |
| 195 | |
| 196 | /********************************************************************//** |
| 197 | This function should be called when recovery has modified a buffer page. */ |
| 198 | UNIV_INLINE |
| 199 | void |
| 200 | buf_flush_recv_note_modification( |
| 201 | /*=============================*/ |
| 202 | buf_block_t* block, /*!< in: block which is modified */ |
| 203 | lsn_t start_lsn, /*!< in: start lsn of the first mtr in a |
| 204 | set of mtr's */ |
| 205 | lsn_t end_lsn); /*!< in: end lsn of the last mtr in the |
| 206 | set of mtr's */ |
| 207 | /********************************************************************//** |
| 208 | Returns TRUE if the file page block is immediately suitable for replacement, |
| 209 | i.e., transition FILE_PAGE => NOT_USED allowed. |
| 210 | @return TRUE if can replace immediately */ |
| 211 | ibool |
| 212 | buf_flush_ready_for_replace( |
| 213 | /*========================*/ |
| 214 | buf_page_t* bpage); /*!< in: buffer control block, must be |
| 215 | buf_page_in_file(bpage) and in the LRU list */ |
| 216 | |
| 217 | #ifdef UNIV_DEBUG |
| 218 | /** Disables page cleaner threads (coordinator and workers). |
| 219 | It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0). |
| 220 | @param[in] save immediate result from check function */ |
| 221 | void buf_flush_page_cleaner_disabled_debug_update(THD*, |
| 222 | st_mysql_sys_var*, void*, |
| 223 | const void* save); |
| 224 | #endif /* UNIV_DEBUG */ |
| 225 | |
| 226 | /******************************************************************//** |
| 227 | page_cleaner thread tasked with flushing dirty pages from the buffer |
| 228 | pools. As of now we'll have only one coordinator of this thread. |
| 229 | @return a dummy parameter */ |
| 230 | extern "C" |
| 231 | os_thread_ret_t |
| 232 | DECLARE_THREAD(buf_flush_page_cleaner_coordinator)( |
| 233 | /*===============================================*/ |
| 234 | void* arg); /*!< in: a dummy parameter required by |
| 235 | os_thread_create */ |
| 236 | |
| 237 | /** Adjust thread count for page cleaner workers. |
| 238 | @param[in] new_cnt Number of threads to be used */ |
| 239 | void |
| 240 | buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt); |
| 241 | |
| 242 | /******************************************************************//** |
| 243 | Worker thread of page_cleaner. |
| 244 | @return a dummy parameter */ |
| 245 | extern "C" |
| 246 | os_thread_ret_t |
| 247 | DECLARE_THREAD(buf_flush_page_cleaner_worker)( |
| 248 | /*==========================================*/ |
| 249 | void* arg); /*!< in: a dummy parameter required by |
| 250 | os_thread_create */ |
| 251 | /** Initialize page_cleaner. */ |
| 252 | void |
| 253 | buf_flush_page_cleaner_init(void); |
| 254 | |
| 255 | /** Wait for any possible LRU flushes that are in progress to end. */ |
| 256 | void |
| 257 | buf_flush_wait_LRU_batch_end(void); |
| 258 | |
| 259 | #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG |
| 260 | /******************************************************************//** |
| 261 | Validates the flush list. |
| 262 | @return TRUE if ok */ |
| 263 | ibool |
| 264 | buf_flush_validate( |
| 265 | /*===============*/ |
| 266 | buf_pool_t* buf_pool); |
| 267 | #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ |
| 268 | |
| 269 | /********************************************************************//** |
| 270 | Initialize the red-black tree to speed up insertions into the flush_list |
| 271 | during recovery process. Should be called at the start of recovery |
| 272 | process before any page has been read/written. */ |
| 273 | void |
| 274 | buf_flush_init_flush_rbt(void); |
| 275 | /*==========================*/ |
| 276 | |
| 277 | /********************************************************************//** |
| 278 | Frees up the red-black tree. */ |
| 279 | void |
| 280 | buf_flush_free_flush_rbt(void); |
| 281 | /*==========================*/ |
| 282 | |
| 283 | /********************************************************************//** |
| 284 | Writes a flushable page asynchronously from the buffer pool to a file. |
| 285 | NOTE: in simulated aio we must call |
| 286 | os_aio_simulated_wake_handler_threads after we have posted a batch of |
| 287 | writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be |
| 288 | held upon entering this function, and they will be released by this |
| 289 | function. |
| 290 | @return TRUE if page was flushed */ |
| 291 | ibool |
| 292 | buf_flush_page( |
| 293 | /*===========*/ |
| 294 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
| 295 | buf_page_t* bpage, /*!< in: buffer control block */ |
| 296 | buf_flush_t flush_type, /*!< in: type of flush */ |
| 297 | bool sync); /*!< in: true if sync IO request */ |
| 298 | /********************************************************************//** |
| 299 | Returns true if the block is modified and ready for flushing. |
| 300 | @return true if can flush immediately */ |
| 301 | bool |
| 302 | buf_flush_ready_for_flush( |
| 303 | /*======================*/ |
| 304 | buf_page_t* bpage, /*!< in: buffer control block, must be |
| 305 | buf_page_in_file(bpage) */ |
| 306 | buf_flush_t flush_type)/*!< in: type of flush */ |
| 307 | MY_ATTRIBUTE((warn_unused_result)); |
| 308 | |
| 309 | /******************************************************************//** |
| 310 | Check if there are any dirty pages that belong to a space id in the flush |
| 311 | list in a particular buffer pool. |
| 312 | @return number of dirty pages present in a single buffer pool */ |
| 313 | ulint |
| 314 | buf_pool_get_dirty_pages_count( |
| 315 | /*===========================*/ |
| 316 | buf_pool_t* buf_pool, /*!< in: buffer pool */ |
| 317 | ulint id, /*!< in: space id to check */ |
| 318 | FlushObserver* observer); /*!< in: flush observer to check */ |
| 319 | |
| 320 | /*******************************************************************//** |
| 321 | Synchronously flush dirty blocks from the end of the flush list of all buffer |
| 322 | pool instances. |
| 323 | NOTE: The calling thread is not allowed to own any latches on pages! */ |
| 324 | void |
| 325 | buf_flush_sync_all_buf_pools(void); |
| 326 | /*==============================*/ |
| 327 | |
| 328 | /** Request IO burst and wake page_cleaner up. |
| 329 | @param[in] lsn_limit upper limit of LSN to be flushed */ |
| 330 | void |
| 331 | buf_flush_request_force( |
| 332 | lsn_t lsn_limit); |
| 333 | |
| 334 | /** We use FlushObserver to track flushing of non-redo logged pages in bulk |
| 335 | create index(BtrBulk.cc).Since we disable redo logging during a index build, |
| 336 | we need to make sure that all dirty pages modifed by the index build are |
| 337 | flushed to disk before any redo logged operations go to the index. */ |
| 338 | |
| 339 | class FlushObserver { |
| 340 | public: |
| 341 | /** Constructor |
| 342 | @param[in,out] space tablespace |
| 343 | @param[in] trx trx instance |
| 344 | @param[in] stage performance schema accounting object, |
| 345 | used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages() |
| 346 | for accounting. */ |
| 347 | FlushObserver(fil_space_t* space, trx_t* trx, ut_stage_alter_t* stage); |
| 348 | |
| 349 | /** Deconstructor */ |
| 350 | ~FlushObserver(); |
| 351 | |
| 352 | /** Check pages have been flushed and removed from the flush list |
| 353 | in a buffer pool instance. |
| 354 | @param[in] instance_no buffer pool instance no |
| 355 | @return true if the pages were removed from the flush list */ |
| 356 | bool is_complete(ulint instance_no) |
| 357 | { |
| 358 | return(m_flushed->at(instance_no) == m_removed->at(instance_no) |
| 359 | || m_interrupted); |
| 360 | } |
| 361 | |
| 362 | /** @return whether to flush only some pages of the tablespace */ |
| 363 | bool is_partial_flush() const { return m_stage != NULL; } |
| 364 | |
| 365 | /** @return whether the operation was interrupted */ |
| 366 | bool is_interrupted() const { return m_interrupted; } |
| 367 | |
| 368 | /** Interrupt observer not to wait. */ |
| 369 | void interrupted() |
| 370 | { |
| 371 | m_interrupted = true; |
| 372 | } |
| 373 | |
| 374 | /** Check whether trx is interrupted |
| 375 | @return true if trx is interrupted */ |
| 376 | bool check_interrupted(); |
| 377 | |
| 378 | /** Flush dirty pages. */ |
| 379 | void flush(); |
| 380 | /** Notify observer of flushing a page |
| 381 | @param[in] buf_pool buffer pool instance |
| 382 | @param[in] bpage buffer page to flush */ |
| 383 | void notify_flush( |
| 384 | buf_pool_t* buf_pool, |
| 385 | buf_page_t* bpage); |
| 386 | |
| 387 | /** Notify observer of removing a page from flush list |
| 388 | @param[in] buf_pool buffer pool instance |
| 389 | @param[in] bpage buffer page flushed */ |
| 390 | void notify_remove( |
| 391 | buf_pool_t* buf_pool, |
| 392 | buf_page_t* bpage); |
| 393 | private: |
| 394 | /** Tablespace */ |
| 395 | fil_space_t* m_space; |
| 396 | |
| 397 | /** Trx instance */ |
| 398 | trx_t* const m_trx; |
| 399 | |
| 400 | /** Performance schema accounting object, used by ALTER TABLE. |
| 401 | If not NULL, then stage->begin_phase_flush() will be called initially, |
| 402 | specifying the number of pages to be attempted to be flushed and |
| 403 | subsequently, stage->inc() will be called for each page we attempt to |
| 404 | flush. */ |
| 405 | ut_stage_alter_t* m_stage; |
| 406 | |
| 407 | /* Flush request sent */ |
| 408 | std::vector<ulint>* m_flushed; |
| 409 | |
| 410 | /* Flush request finished */ |
| 411 | std::vector<ulint>* m_removed; |
| 412 | |
| 413 | /* True if the operation was interrupted. */ |
| 414 | bool m_interrupted; |
| 415 | }; |
| 416 | |
| 417 | #include "buf0flu.ic" |
| 418 | |
| 419 | #endif |
| 420 | |