1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2014, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file include/buf0flu.h |
22 | The database buffer pool flush algorithm |
23 | |
24 | Created 11/5/1995 Heikki Tuuri |
25 | *******************************************************/ |
26 | |
27 | #ifndef buf0flu_h |
28 | #define buf0flu_h |
29 | |
30 | #include "univ.i" |
31 | #include "ut0byte.h" |
32 | #include "log0log.h" |
33 | #include "buf0types.h" |
34 | |
35 | /** Flag indicating if the page_cleaner is in active state. */ |
36 | extern bool buf_page_cleaner_is_active; |
37 | |
38 | #ifdef UNIV_DEBUG |
39 | |
40 | /** Value of MySQL global variable used to disable page cleaner. */ |
41 | extern my_bool innodb_page_cleaner_disabled_debug; |
42 | |
43 | #endif /* UNIV_DEBUG */ |
44 | |
45 | /** Event to synchronise with the flushing. */ |
46 | extern os_event_t buf_flush_event; |
47 | |
48 | class ut_stage_alter_t; |
49 | |
50 | /** Handled page counters for a single flush */ |
51 | struct flush_counters_t { |
52 | ulint flushed; /*!< number of dirty pages flushed */ |
53 | ulint evicted; /*!< number of clean pages evicted */ |
54 | ulint unzip_LRU_evicted;/*!< number of uncompressed page images |
55 | evicted */ |
56 | }; |
57 | |
58 | /********************************************************************//** |
59 | Remove a block from the flush list of modified blocks. */ |
60 | void |
61 | buf_flush_remove( |
62 | /*=============*/ |
63 | buf_page_t* bpage); /*!< in: pointer to the block in question */ |
64 | /*******************************************************************//** |
65 | Relocates a buffer control block on the flush_list. |
66 | Note that it is assumed that the contents of bpage has already been |
67 | copied to dpage. */ |
68 | void |
69 | buf_flush_relocate_on_flush_list( |
70 | /*=============================*/ |
71 | buf_page_t* bpage, /*!< in/out: control block being moved */ |
72 | buf_page_t* dpage); /*!< in/out: destination block */ |
73 | /** Update the flush system data structures when a write is completed. |
74 | @param[in,out] bpage flushed page |
75 | @param[in] dblwr whether the doublewrite buffer was used */ |
76 | void buf_flush_write_complete(buf_page_t* bpage, bool dblwr); |
77 | /** Initialize a page for writing to the tablespace. |
78 | @param[in] block buffer block; NULL if bypassing the buffer pool |
79 | @param[in,out] page page frame |
80 | @param[in,out] page_zip_ compressed page, or NULL if uncompressed |
81 | @param[in] newest_lsn newest modification LSN to the page */ |
82 | void |
83 | buf_flush_init_for_writing( |
84 | const buf_block_t* block, |
85 | byte* page, |
86 | void* page_zip_, |
87 | lsn_t newest_lsn); |
88 | |
89 | # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG |
90 | /********************************************************************//** |
91 | Writes a flushable page asynchronously from the buffer pool to a file. |
92 | NOTE: buf_pool->mutex and block->mutex must be held upon entering this |
93 | function, and they will be released by this function after flushing. |
94 | This is loosely based on buf_flush_batch() and buf_flush_page(). |
95 | @return TRUE if the page was flushed and the mutexes released */ |
96 | ibool |
97 | buf_flush_page_try( |
98 | /*===============*/ |
99 | buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ |
100 | buf_block_t* block) /*!< in/out: buffer control block */ |
101 | MY_ATTRIBUTE((warn_unused_result)); |
102 | # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ |
103 | /** Do flushing batch of a given type. |
104 | NOTE: The calling thread is not allowed to own any latches on pages! |
105 | @param[in,out] buf_pool buffer pool instance |
106 | @param[in] type flush type |
107 | @param[in] min_n wished minimum mumber of blocks flushed |
108 | (it is not guaranteed that the actual number is that big, though) |
109 | @param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose |
110 | oldest_modification is smaller than this should be flushed (if their number |
111 | does not exceed min_n), otherwise ignored |
112 | @param[out] n the number of pages which were processed is |
113 | passed back to caller. Ignored if NULL |
114 | @retval true if a batch was queued successfully. |
115 | @retval false if another batch of same type was already running. */ |
116 | bool |
117 | buf_flush_do_batch( |
118 | buf_pool_t* buf_pool, |
119 | buf_flush_t type, |
120 | ulint min_n, |
121 | lsn_t lsn_limit, |
122 | flush_counters_t* n); |
123 | |
124 | /** This utility flushes dirty blocks from the end of the flush list of all |
125 | buffer pool instances. |
126 | NOTE: The calling thread is not allowed to own any latches on pages! |
127 | @param[in] min_n wished minimum mumber of blocks flushed (it is |
128 | not guaranteed that the actual number is that big, though) |
129 | @param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose |
130 | oldest_modification is smaller than this should be flushed (if their number |
131 | does not exceed min_n), otherwise ignored |
132 | @param[out] n_processed the number of pages which were processed is |
133 | passed back to caller. Ignored if NULL. |
134 | @return true if a batch was queued successfully for each buffer pool |
135 | instance. false if another batch of same type was already running in |
136 | at least one of the buffer pool instance */ |
137 | bool |
138 | buf_flush_lists( |
139 | ulint min_n, |
140 | lsn_t lsn_limit, |
141 | ulint* n_processed); |
142 | |
143 | /******************************************************************//** |
144 | This function picks up a single page from the tail of the LRU |
145 | list, flushes it (if it is dirty), removes it from page_hash and LRU |
146 | list and puts it on the free list. It is called from user threads when |
147 | they are unable to find a replaceable page at the tail of the LRU |
148 | list i.e.: when the background LRU flushing in the page_cleaner thread |
149 | is not fast enough to keep pace with the workload. |
150 | @return true if success. */ |
151 | bool |
152 | buf_flush_single_page_from_LRU( |
153 | /*===========================*/ |
154 | buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */ |
155 | /******************************************************************//** |
156 | Waits until a flush batch of the given type ends */ |
157 | void |
158 | buf_flush_wait_batch_end( |
159 | /*=====================*/ |
160 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
161 | buf_flush_t type); /*!< in: BUF_FLUSH_LRU |
162 | or BUF_FLUSH_LIST */ |
163 | /** |
164 | Waits until a flush batch of the given lsn ends |
165 | @param[in] new_oldest target oldest_modified_lsn to wait for */ |
166 | |
167 | void |
168 | buf_flush_wait_flushed( |
169 | lsn_t new_oldest); |
170 | |
171 | /******************************************************************//** |
172 | Waits until a flush batch of the given type ends. This is called by |
173 | a thread that only wants to wait for a flush to end but doesn't do |
174 | any flushing itself. */ |
175 | void |
176 | buf_flush_wait_batch_end_wait_only( |
177 | /*===============================*/ |
178 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
179 | buf_flush_t type); /*!< in: BUF_FLUSH_LRU |
180 | or BUF_FLUSH_LIST */ |
181 | /********************************************************************//** |
182 | This function should be called at a mini-transaction commit, if a page was |
183 | modified in it. Puts the block to the list of modified blocks, if it not |
184 | already in it. */ |
185 | UNIV_INLINE |
186 | void |
187 | buf_flush_note_modification( |
188 | /*========================*/ |
189 | buf_block_t* block, /*!< in: block which is modified */ |
190 | lsn_t start_lsn, /*!< in: start lsn of the first mtr in a |
191 | set of mtr's */ |
192 | lsn_t end_lsn, /*!< in: end lsn of the last mtr in the |
193 | set of mtr's */ |
194 | FlushObserver* observer); /*!< in: flush observer */ |
195 | |
196 | /********************************************************************//** |
197 | This function should be called when recovery has modified a buffer page. */ |
198 | UNIV_INLINE |
199 | void |
200 | buf_flush_recv_note_modification( |
201 | /*=============================*/ |
202 | buf_block_t* block, /*!< in: block which is modified */ |
203 | lsn_t start_lsn, /*!< in: start lsn of the first mtr in a |
204 | set of mtr's */ |
205 | lsn_t end_lsn); /*!< in: end lsn of the last mtr in the |
206 | set of mtr's */ |
207 | /********************************************************************//** |
208 | Returns TRUE if the file page block is immediately suitable for replacement, |
209 | i.e., transition FILE_PAGE => NOT_USED allowed. |
210 | @return TRUE if can replace immediately */ |
211 | ibool |
212 | buf_flush_ready_for_replace( |
213 | /*========================*/ |
214 | buf_page_t* bpage); /*!< in: buffer control block, must be |
215 | buf_page_in_file(bpage) and in the LRU list */ |
216 | |
217 | #ifdef UNIV_DEBUG |
218 | /** Disables page cleaner threads (coordinator and workers). |
219 | It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0). |
220 | @param[in] save immediate result from check function */ |
221 | void buf_flush_page_cleaner_disabled_debug_update(THD*, |
222 | st_mysql_sys_var*, void*, |
223 | const void* save); |
224 | #endif /* UNIV_DEBUG */ |
225 | |
226 | /******************************************************************//** |
227 | page_cleaner thread tasked with flushing dirty pages from the buffer |
228 | pools. As of now we'll have only one coordinator of this thread. |
229 | @return a dummy parameter */ |
230 | extern "C" |
231 | os_thread_ret_t |
232 | DECLARE_THREAD(buf_flush_page_cleaner_coordinator)( |
233 | /*===============================================*/ |
234 | void* arg); /*!< in: a dummy parameter required by |
235 | os_thread_create */ |
236 | |
237 | /** Adjust thread count for page cleaner workers. |
238 | @param[in] new_cnt Number of threads to be used */ |
239 | void |
240 | buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt); |
241 | |
242 | /******************************************************************//** |
243 | Worker thread of page_cleaner. |
244 | @return a dummy parameter */ |
245 | extern "C" |
246 | os_thread_ret_t |
247 | DECLARE_THREAD(buf_flush_page_cleaner_worker)( |
248 | /*==========================================*/ |
249 | void* arg); /*!< in: a dummy parameter required by |
250 | os_thread_create */ |
251 | /** Initialize page_cleaner. */ |
252 | void |
253 | buf_flush_page_cleaner_init(void); |
254 | |
255 | /** Wait for any possible LRU flushes that are in progress to end. */ |
256 | void |
257 | buf_flush_wait_LRU_batch_end(void); |
258 | |
259 | #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG |
260 | /******************************************************************//** |
261 | Validates the flush list. |
262 | @return TRUE if ok */ |
263 | ibool |
264 | buf_flush_validate( |
265 | /*===============*/ |
266 | buf_pool_t* buf_pool); |
267 | #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ |
268 | |
269 | /********************************************************************//** |
270 | Initialize the red-black tree to speed up insertions into the flush_list |
271 | during recovery process. Should be called at the start of recovery |
272 | process before any page has been read/written. */ |
273 | void |
274 | buf_flush_init_flush_rbt(void); |
275 | /*==========================*/ |
276 | |
277 | /********************************************************************//** |
278 | Frees up the red-black tree. */ |
279 | void |
280 | buf_flush_free_flush_rbt(void); |
281 | /*==========================*/ |
282 | |
283 | /********************************************************************//** |
284 | Writes a flushable page asynchronously from the buffer pool to a file. |
285 | NOTE: in simulated aio we must call |
286 | os_aio_simulated_wake_handler_threads after we have posted a batch of |
287 | writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be |
288 | held upon entering this function, and they will be released by this |
289 | function. |
290 | @return TRUE if page was flushed */ |
291 | ibool |
292 | buf_flush_page( |
293 | /*===========*/ |
294 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
295 | buf_page_t* bpage, /*!< in: buffer control block */ |
296 | buf_flush_t flush_type, /*!< in: type of flush */ |
297 | bool sync); /*!< in: true if sync IO request */ |
298 | /********************************************************************//** |
299 | Returns true if the block is modified and ready for flushing. |
300 | @return true if can flush immediately */ |
301 | bool |
302 | buf_flush_ready_for_flush( |
303 | /*======================*/ |
304 | buf_page_t* bpage, /*!< in: buffer control block, must be |
305 | buf_page_in_file(bpage) */ |
306 | buf_flush_t flush_type)/*!< in: type of flush */ |
307 | MY_ATTRIBUTE((warn_unused_result)); |
308 | |
309 | /******************************************************************//** |
310 | Check if there are any dirty pages that belong to a space id in the flush |
311 | list in a particular buffer pool. |
312 | @return number of dirty pages present in a single buffer pool */ |
313 | ulint |
314 | buf_pool_get_dirty_pages_count( |
315 | /*===========================*/ |
316 | buf_pool_t* buf_pool, /*!< in: buffer pool */ |
317 | ulint id, /*!< in: space id to check */ |
318 | FlushObserver* observer); /*!< in: flush observer to check */ |
319 | |
320 | /*******************************************************************//** |
321 | Synchronously flush dirty blocks from the end of the flush list of all buffer |
322 | pool instances. |
323 | NOTE: The calling thread is not allowed to own any latches on pages! */ |
324 | void |
325 | buf_flush_sync_all_buf_pools(void); |
326 | /*==============================*/ |
327 | |
328 | /** Request IO burst and wake page_cleaner up. |
329 | @param[in] lsn_limit upper limit of LSN to be flushed */ |
330 | void |
331 | buf_flush_request_force( |
332 | lsn_t lsn_limit); |
333 | |
334 | /** We use FlushObserver to track flushing of non-redo logged pages in bulk |
335 | create index(BtrBulk.cc).Since we disable redo logging during a index build, |
336 | we need to make sure that all dirty pages modifed by the index build are |
337 | flushed to disk before any redo logged operations go to the index. */ |
338 | |
339 | class FlushObserver { |
340 | public: |
341 | /** Constructor |
342 | @param[in,out] space tablespace |
343 | @param[in] trx trx instance |
344 | @param[in] stage performance schema accounting object, |
345 | used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages() |
346 | for accounting. */ |
347 | FlushObserver(fil_space_t* space, trx_t* trx, ut_stage_alter_t* stage); |
348 | |
349 | /** Deconstructor */ |
350 | ~FlushObserver(); |
351 | |
352 | /** Check pages have been flushed and removed from the flush list |
353 | in a buffer pool instance. |
354 | @param[in] instance_no buffer pool instance no |
355 | @return true if the pages were removed from the flush list */ |
356 | bool is_complete(ulint instance_no) |
357 | { |
358 | return(m_flushed->at(instance_no) == m_removed->at(instance_no) |
359 | || m_interrupted); |
360 | } |
361 | |
362 | /** @return whether to flush only some pages of the tablespace */ |
363 | bool is_partial_flush() const { return m_stage != NULL; } |
364 | |
365 | /** @return whether the operation was interrupted */ |
366 | bool is_interrupted() const { return m_interrupted; } |
367 | |
368 | /** Interrupt observer not to wait. */ |
369 | void interrupted() |
370 | { |
371 | m_interrupted = true; |
372 | } |
373 | |
374 | /** Check whether trx is interrupted |
375 | @return true if trx is interrupted */ |
376 | bool check_interrupted(); |
377 | |
378 | /** Flush dirty pages. */ |
379 | void flush(); |
380 | /** Notify observer of flushing a page |
381 | @param[in] buf_pool buffer pool instance |
382 | @param[in] bpage buffer page to flush */ |
383 | void notify_flush( |
384 | buf_pool_t* buf_pool, |
385 | buf_page_t* bpage); |
386 | |
387 | /** Notify observer of removing a page from flush list |
388 | @param[in] buf_pool buffer pool instance |
389 | @param[in] bpage buffer page flushed */ |
390 | void notify_remove( |
391 | buf_pool_t* buf_pool, |
392 | buf_page_t* bpage); |
393 | private: |
394 | /** Tablespace */ |
395 | fil_space_t* m_space; |
396 | |
397 | /** Trx instance */ |
398 | trx_t* const m_trx; |
399 | |
400 | /** Performance schema accounting object, used by ALTER TABLE. |
401 | If not NULL, then stage->begin_phase_flush() will be called initially, |
402 | specifying the number of pages to be attempted to be flushed and |
403 | subsequently, stage->inc() will be called for each page we attempt to |
404 | flush. */ |
405 | ut_stage_alter_t* m_stage; |
406 | |
407 | /* Flush request sent */ |
408 | std::vector<ulint>* m_flushed; |
409 | |
410 | /* Flush request finished */ |
411 | std::vector<ulint>* m_removed; |
412 | |
413 | /* True if the operation was interrupted. */ |
414 | bool m_interrupted; |
415 | }; |
416 | |
417 | #include "buf0flu.ic" |
418 | |
419 | #endif |
420 | |