1/*****************************************************************************
2
3Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2017, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file buf/buf0rea.cc
22The database buffer read
23
24Created 11/5/1995 Heikki Tuuri
25*******************************************************/
26
27#include "ha_prototypes.h"
28#include <mysql/service_thd_wait.h>
29
30#include "buf0rea.h"
31#include "fil0fil.h"
32#include "mtr0mtr.h"
33#include "buf0buf.h"
34#include "buf0flu.h"
35#include "buf0lru.h"
36#include "buf0dblwr.h"
37#include "ibuf0ibuf.h"
38#include "log0recv.h"
39#include "trx0sys.h"
40#include "os0file.h"
41#include "srv0start.h"
42#include "srv0srv.h"
43
44/** There must be at least this many pages in buf_pool in the area to start
45a random read-ahead */
46#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \
47 (5 + BUF_READ_AHEAD_AREA(b) / 8)
48
49/** If there are buf_pool->curr_size per the number below pending reads, then
50read-ahead is not done: this is to prevent flooding the buffer pool with
51i/o-fixed buffer blocks */
52#define BUF_READ_AHEAD_PEND_LIMIT 2
53
54/********************************************************************//**
55Unfixes the pages, unlatches the page,
56removes it from page_hash and removes it from LRU. */
57static
58void
59buf_read_page_handle_error(
60/*=======================*/
61 buf_page_t* bpage) /*!< in: pointer to the block */
62{
63 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
64 const bool uncompressed = (buf_page_get_state(bpage)
65 == BUF_BLOCK_FILE_PAGE);
66
67 /* First unfix and release lock on the bpage */
68 buf_pool_mutex_enter(buf_pool);
69 mutex_enter(buf_page_get_mutex(bpage));
70 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
71 ut_ad(bpage->buf_fix_count == 0);
72
73 /* Set BUF_IO_NONE before we remove the block from LRU list */
74 buf_page_set_io_fix(bpage, BUF_IO_NONE);
75
76 if (uncompressed) {
77 rw_lock_x_unlock_gen(
78 &((buf_block_t*) bpage)->lock,
79 BUF_IO_READ);
80 }
81
82 mutex_exit(buf_page_get_mutex(bpage));
83
84 /* remove the block from LRU list */
85 buf_LRU_free_one_page(bpage);
86
87 ut_ad(buf_pool->n_pend_reads > 0);
88 buf_pool->n_pend_reads--;
89
90 buf_pool_mutex_exit(buf_pool);
91}
92
93/** Low-level function which reads a page asynchronously from a file to the
94buffer buf_pool if it is not already there, in which case does nothing.
95Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
96flag is cleared and the x-lock released by an i/o-handler thread.
97
98@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED or
99 DB_TABLESPACE_TRUNCATED if we are trying
100 to read from a non-existent tablespace, a
101 tablespace which is just now being dropped,
102 or a tablespace which is truncated
103@param[in] sync true if synchronous aio is desired
104@param[in] type IO type, SIMULATED, IGNORE_MISSING
105@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
106@param[in] page_id page id
107@param[in] unzip true=request uncompressed page
108@param[in] ignore_missing_space true=ignore missing space when reading
109@return 1 if a read request was queued, 0 if the page already resided
110in buf_pool, or if the page is in the doublewrite buffer blocks in
111which case it is never read into the pool, or if the tablespace does
112not exist or is being dropped */
113static
114ulint
115buf_read_page_low(
116 dberr_t* err,
117 bool sync,
118 ulint type,
119 ulint mode,
120 const page_id_t& page_id,
121 const page_size_t& page_size,
122 bool unzip,
123 bool ignore_missing_space = false)
124{
125 buf_page_t* bpage;
126
127 *err = DB_SUCCESS;
128
129 if (page_id.space() == TRX_SYS_SPACE
130 && buf_dblwr_page_inside(page_id.page_no())) {
131
132 ib::error() << "Trying to read doublewrite buffer page "
133 << page_id;
134 return(0);
135 }
136
137 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
138
139 /* Trx sys header is so low in the latching order that we play
140 safe and do not leave the i/o-completion to an asynchronous
141 i/o-thread. Ibuf bitmap pages must always be read with
142 syncronous i/o, to make sure they do not get involved in
143 thread deadlocks. */
144
145 sync = true;
146 }
147
148 /* The following call will also check if the tablespace does not exist
149 or is being dropped; if we succeed in initing the page in the buffer
150 pool for read, then DISCARD cannot proceed until the read has
151 completed */
152 bpage = buf_page_init_for_read(err, mode, page_id, page_size, unzip);
153
154 if (bpage == NULL) {
155
156 return(0);
157 }
158
159 DBUG_LOG("ib_buf",
160 "read page " << page_id << " size=" << page_size.physical()
161 << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
162
163 ut_ad(buf_page_in_file(bpage));
164
165 if (sync) {
166 thd_wait_begin(NULL, THD_WAIT_DISKIO);
167 }
168
169 void* dst;
170
171 if (page_size.is_compressed()) {
172 dst = bpage->zip.data;
173 } else {
174 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
175
176 dst = ((buf_block_t*) bpage)->frame;
177 }
178
179 DBUG_EXECUTE_IF(
180 "innodb_invalid_read_after_truncate",
181 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
182 if (!strcmp(space->name, "test/t1")
183 && page_id.page_no() == space->size - 1) {
184 type = 0;
185 sync = true;
186 }
187 space->release();
188 });
189
190 IORequest request(type | IORequest::READ);
191
192 *err = fil_io(
193 request, sync, page_id, page_size, 0, page_size.physical(),
194 dst, bpage, ignore_missing_space);
195
196 if (sync) {
197 thd_wait_end(NULL);
198 }
199
200 if (*err != DB_SUCCESS) {
201 if (*err == DB_TABLESPACE_TRUNCATED) {
202 /* Remove the page which is outside the
203 truncated tablespace bounds when recovering
204 from a crash happened during a truncation */
205 buf_read_page_handle_error(bpage);
206 if (recv_recovery_on) {
207 mutex_enter(&recv_sys->mutex);
208 ut_ad(recv_sys->n_addrs > 0);
209 recv_sys->n_addrs--;
210 mutex_exit(&recv_sys->mutex);
211 }
212 return(0);
213 } else if (IORequest::ignore_missing(type)
214 || *err == DB_TABLESPACE_DELETED) {
215 buf_read_page_handle_error(bpage);
216 return(0);
217 }
218
219 ut_error;
220 }
221
222 if (sync) {
223 /* The i/o is already completed when we arrive from
224 fil_read */
225 *err = buf_page_io_complete(bpage);
226
227 if (*err != DB_SUCCESS) {
228 return(0);
229 }
230 }
231
232 return(1);
233}
234
235/** Applies a random read-ahead in buf_pool if there are at least a threshold
236value of accessed pages from the random read-ahead area. Does not read any
237page, not even the one at the position (space, offset), if the read-ahead
238mechanism is not activated. NOTE 1: the calling thread may own latches on
239pages: to avoid deadlocks this function must be written such that it cannot
240end up waiting for these latches! NOTE 2: the calling thread must want
241access to the page given: this rule is set to prevent unintended read-aheads
242performed by ibuf routines, a situation which could result in a deadlock if
243the OS does not support asynchronous i/o.
244@param[in] page_id page id of a page which the current thread
245wants to access
246@param[in] page_size page size
247@param[in] inside_ibuf TRUE if we are inside ibuf routine
248@return number of page read requests issued; NOTE that if we read ibuf
249pages, it may happen that the page at the given page number does not
250get read even if we return a positive value! */
251ulint
252buf_read_ahead_random(
253 const page_id_t& page_id,
254 const page_size_t& page_size,
255 ibool inside_ibuf)
256{
257 buf_pool_t* buf_pool = buf_pool_get(page_id);
258 ulint recent_blocks = 0;
259 ulint ibuf_mode;
260 ulint count;
261 ulint low, high;
262 dberr_t err = DB_SUCCESS;
263 ulint i;
264 const ulint buf_read_ahead_random_area
265 = BUF_READ_AHEAD_AREA(buf_pool);
266
267 if (!srv_random_read_ahead) {
268 /* Disabled by user */
269 return(0);
270 }
271
272 if (srv_startup_is_before_trx_rollback_phase) {
273 /* No read-ahead to avoid thread deadlocks */
274 return(0);
275 }
276
277 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
278
279 /* If it is an ibuf bitmap page or trx sys hdr, we do
280 no read-ahead, as that could break the ibuf page access
281 order */
282
283 return(0);
284 }
285
286 low = (page_id.page_no() / buf_read_ahead_random_area)
287 * buf_read_ahead_random_area;
288
289 high = (page_id.page_no() / buf_read_ahead_random_area + 1)
290 * buf_read_ahead_random_area;
291
292 /* Remember the tablespace version before we ask the tablespace size
293 below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
294 do not try to read outside the bounds of the tablespace! */
295 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
296
297#ifdef UNIV_DEBUG
298 if (srv_file_per_table) {
299 ulint size = 0;
300
301 for (const fil_node_t* node =
302 UT_LIST_GET_FIRST(space->chain);
303 node != NULL;
304 node = UT_LIST_GET_NEXT(chain, node)) {
305
306 size += ulint(os_file_get_size(node->handle)
307 / page_size.physical());
308 }
309
310 ut_ad(size == space->size);
311 }
312#endif /* UNIV_DEBUG */
313
314 if (high > space->size) {
315 high = space->size;
316 }
317 space->release();
318 } else {
319 return(0);
320 }
321
322 buf_pool_mutex_enter(buf_pool);
323
324 if (buf_pool->n_pend_reads
325 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
326 buf_pool_mutex_exit(buf_pool);
327
328 return(0);
329 }
330
331 /* Count how many blocks in the area have been recently accessed,
332 that is, reside near the start of the LRU list. */
333
334 for (i = low; i < high; i++) {
335 DBUG_EXECUTE_IF(
336 "innodb_invalid_read_after_truncate",
337 if (fil_space_t* space = fil_space_acquire(
338 page_id.space())) {
339 bool skip = !strcmp(space->name, "test/t1");
340 space->release();
341 if (skip) {
342 high = space->size;
343 buf_pool_mutex_exit(buf_pool);
344 goto read_ahead;
345 }
346 });
347
348 const buf_page_t* bpage = buf_page_hash_get(
349 buf_pool, page_id_t(page_id.space(), i));
350
351 if (bpage != NULL
352 && buf_page_is_accessed(bpage)
353 && buf_page_peek_if_young(bpage)) {
354
355 recent_blocks++;
356
357 if (recent_blocks
358 >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
359
360 buf_pool_mutex_exit(buf_pool);
361 goto read_ahead;
362 }
363 }
364 }
365
366 buf_pool_mutex_exit(buf_pool);
367 /* Do nothing */
368 return(0);
369
370read_ahead:
371 /* Read all the suitable blocks within the area */
372
373 if (inside_ibuf) {
374 ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
375 } else {
376 ibuf_mode = BUF_READ_ANY_PAGE;
377 }
378
379 count = 0;
380
381 for (i = low; i < high; i++) {
382 /* It is only sensible to do read-ahead in the non-sync aio
383 mode: hence FALSE as the first parameter */
384
385 const page_id_t cur_page_id(page_id.space(), i);
386
387 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
388 count += buf_read_page_low(
389 &err, false,
390 IORequest::DO_NOT_WAKE,
391 ibuf_mode,
392 cur_page_id, page_size, false);
393
394 switch (err) {
395 case DB_SUCCESS:
396 case DB_TABLESPACE_TRUNCATED:
397 case DB_ERROR:
398 break;
399 case DB_TABLESPACE_DELETED:
400 ib::info() << "Random readahead trying to"
401 " access page " << cur_page_id
402 << " in nonexisting or"
403 " being-dropped tablespace";
404 break;
405 default:
406 ut_error;
407 }
408 }
409 }
410
411 /* In simulated aio we wake the aio handler threads only after
412 queuing all aio requests, in native aio the following call does
413 nothing: */
414
415 os_aio_simulated_wake_handler_threads();
416
417 if (count) {
418 DBUG_PRINT("ib_buf", ("random read-ahead %u pages, %u:%u",
419 (unsigned) count,
420 (unsigned) page_id.space(),
421 (unsigned) page_id.page_no()));
422 }
423
424 /* Read ahead is considered one I/O operation for the purpose of
425 LRU policy decision. */
426 buf_LRU_stat_inc_io();
427
428 buf_pool->stat.n_ra_pages_read_rnd += count;
429 srv_stats.buf_pool_reads.add(count);
430 return(count);
431}
432
433/** High-level function which reads a page asynchronously from a file to the
434buffer buf_pool if it is not already there. Sets the io_fix flag and sets
435an exclusive lock on the buffer frame. The flag is cleared and the x-lock
436released by the i/o-handler thread.
437@param[in] page_id page id
438@param[in] page_size page size
439@retval DB_SUCCESS if the page was read and is not corrupted,
440@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
441@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
442after decryption normal page checksum does not match.
443@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
444dberr_t
445buf_read_page(
446 const page_id_t& page_id,
447 const page_size_t& page_size)
448{
449 ulint count;
450 dberr_t err = DB_SUCCESS;
451
452 /* We do synchronous IO because our AIO completion code
453 is sub-optimal. See buf_page_io_complete(), we have to
454 acquire the buffer pool mutex before acquiring the block
455 mutex, required for updating the page state. The acquire
456 of the buffer pool mutex becomes an expensive bottleneck. */
457
458 count = buf_read_page_low(
459 &err, true,
460 0, BUF_READ_ANY_PAGE, page_id, page_size, false);
461
462 srv_stats.buf_pool_reads.add(count);
463
464 if (err == DB_TABLESPACE_DELETED) {
465 ib::info() << "trying to read page " << page_id
466 << " in nonexisting or being-dropped tablespace";
467 }
468
469 /* Increment number of I/O operations used for LRU policy. */
470 buf_LRU_stat_inc_io();
471
472 return(err);
473}
474
475/** High-level function which reads a page asynchronously from a file to the
476buffer buf_pool if it is not already there. Sets the io_fix flag and sets
477an exclusive lock on the buffer frame. The flag is cleared and the x-lock
478released by the i/o-handler thread.
479@param[in] page_id page id
480@param[in] page_size page size
481@param[in] sync true if synchronous aio is desired */
482void
483buf_read_page_background(
484 const page_id_t& page_id,
485 const page_size_t& page_size,
486 bool sync)
487{
488 ulint count;
489 dberr_t err;
490
491 count = buf_read_page_low(
492 &err, sync,
493 IORequest::DO_NOT_WAKE | IORequest::IGNORE_MISSING,
494 BUF_READ_ANY_PAGE,
495 page_id, page_size, false);
496
497 switch (err) {
498 case DB_SUCCESS:
499 case DB_TABLESPACE_TRUNCATED:
500 case DB_ERROR:
501 break;
502 case DB_TABLESPACE_DELETED:
503 ib::info() << "trying to read page " << page_id
504 << " in the background"
505 " in a non-existing or being-dropped tablespace";
506 break;
507 case DB_PAGE_CORRUPTED:
508 case DB_DECRYPTION_FAILED:
509 ib::error()
510 << "Background Page read failed to "
511 "read or decrypt " << page_id;
512 break;
513 default:
514 ib::fatal() << "Error " << err << " in background read of "
515 << page_id;
516 }
517
518 srv_stats.buf_pool_reads.add(count);
519
520 /* We do not increment number of I/O operations used for LRU policy
521 here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
522 about evicting uncompressed version of compressed pages from the
523 buffer pool. Since this function is called from buffer pool load
524 these IOs are deliberate and are not part of normal workload we can
525 ignore these in our heuristics. */
526}
527
528/** Applies linear read-ahead if in the buf_pool the page is a border page of
529a linear read-ahead area and all the pages in the area have been accessed.
530Does not read any page if the read-ahead mechanism is not activated. Note
531that the algorithm looks at the 'natural' adjacent successor and
532predecessor of the page, which on the leaf level of a B-tree are the next
533and previous page in the chain of leaves. To know these, the page specified
534in (space, offset) must already be present in the buf_pool. Thus, the
535natural way to use this function is to call it when a page in the buf_pool
536is accessed the first time, calling this function just after it has been
537bufferfixed.
538NOTE 1: as this function looks at the natural predecessor and successor
539fields on the page, what happens, if these are not initialized to any
540sensible value? No problem, before applying read-ahead we check that the
541area to read is within the span of the space, if not, read-ahead is not
542applied. An uninitialized value may result in a useless read operation, but
543only very improbably.
544NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
545function must be written such that it cannot end up waiting for these
546latches!
547NOTE 3: the calling thread must want access to the page given: this rule is
548set to prevent unintended read-aheads performed by ibuf routines, a situation
549which could result in a deadlock if the OS does not support asynchronous io.
550@param[in] page_id page id; see NOTE 3 above
551@param[in] page_size page size
552@param[in] inside_ibuf TRUE if we are inside ibuf routine
553@return number of page read requests issued */
554ulint
555buf_read_ahead_linear(
556 const page_id_t& page_id,
557 const page_size_t& page_size,
558 ibool inside_ibuf)
559{
560 buf_pool_t* buf_pool = buf_pool_get(page_id);
561 buf_page_t* bpage;
562 buf_frame_t* frame;
563 buf_page_t* pred_bpage = NULL;
564 ulint pred_offset;
565 ulint succ_offset;
566 int asc_or_desc;
567 ulint new_offset;
568 ulint fail_count;
569 ulint low, high;
570 dberr_t err = DB_SUCCESS;
571 ulint i;
572 const ulint buf_read_ahead_linear_area
573 = BUF_READ_AHEAD_AREA(buf_pool);
574 ulint threshold;
575
576 /* check if readahead is disabled */
577 if (!srv_read_ahead_threshold) {
578 return(0);
579 }
580
581 if (srv_startup_is_before_trx_rollback_phase) {
582 /* No read-ahead to avoid thread deadlocks */
583 return(0);
584 }
585
586 low = (page_id.page_no() / buf_read_ahead_linear_area)
587 * buf_read_ahead_linear_area;
588 high = (page_id.page_no() / buf_read_ahead_linear_area + 1)
589 * buf_read_ahead_linear_area;
590
591 if ((page_id.page_no() != low) && (page_id.page_no() != high - 1)) {
592 /* This is not a border page of the area: return */
593
594 return(0);
595 }
596
597 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
598
599 /* If it is an ibuf bitmap page or trx sys hdr, we do
600 no read-ahead, as that could break the ibuf page access
601 order */
602
603 return(0);
604 }
605
606 /* Remember the tablespace version before we ask te tablespace size
607 below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
608 do not try to read outside the bounds of the tablespace! */
609 ulint space_size;
610
611 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
612 space_size = space->size;
613 space->release();
614
615 if (high > space_size) {
616 /* The area is not whole */
617 return(0);
618 }
619 } else {
620 return(0);
621 }
622
623 buf_pool_mutex_enter(buf_pool);
624
625 if (buf_pool->n_pend_reads
626 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
627 buf_pool_mutex_exit(buf_pool);
628
629 return(0);
630 }
631
632 /* Check that almost all pages in the area have been accessed; if
633 offset == low, the accesses must be in a descending order, otherwise,
634 in an ascending order. */
635
636 asc_or_desc = 1;
637
638 if (page_id.page_no() == low) {
639 asc_or_desc = -1;
640 }
641
642 /* How many out of order accessed pages can we ignore
643 when working out the access pattern for linear readahead */
644 threshold = ut_min(static_cast<ulint>(64 - srv_read_ahead_threshold),
645 BUF_READ_AHEAD_AREA(buf_pool));
646
647 fail_count = 0;
648
649 for (i = low; i < high; i++) {
650 bpage = buf_page_hash_get(buf_pool,
651 page_id_t(page_id.space(), i));
652
653 if (bpage == NULL || !buf_page_is_accessed(bpage)) {
654 /* Not accessed */
655 fail_count++;
656
657 } else if (pred_bpage) {
658 /* Note that buf_page_is_accessed() returns
659 the time of the first access. If some blocks
660 of the extent existed in the buffer pool at
661 the time of a linear access pattern, the first
662 access times may be nonmonotonic, even though
663 the latest access times were linear. The
664 threshold (srv_read_ahead_factor) should help
665 a little against this. */
666 int res = ut_ulint_cmp(
667 buf_page_is_accessed(bpage),
668 buf_page_is_accessed(pred_bpage));
669 /* Accesses not in the right order */
670 if (res != 0 && res != asc_or_desc) {
671 fail_count++;
672 }
673 }
674
675 if (fail_count > threshold) {
676 /* Too many failures: return */
677 buf_pool_mutex_exit(buf_pool);
678 return(0);
679 }
680
681 if (bpage && buf_page_is_accessed(bpage)) {
682 pred_bpage = bpage;
683 }
684 }
685
686 /* If we got this far, we know that enough pages in the area have
687 been accessed in the right order: linear read-ahead can be sensible */
688
689 bpage = buf_page_hash_get(buf_pool, page_id);
690
691 if (bpage == NULL) {
692 buf_pool_mutex_exit(buf_pool);
693
694 return(0);
695 }
696
697 switch (buf_page_get_state(bpage)) {
698 case BUF_BLOCK_ZIP_PAGE:
699 frame = bpage->zip.data;
700 break;
701 case BUF_BLOCK_FILE_PAGE:
702 frame = ((buf_block_t*) bpage)->frame;
703 break;
704 default:
705 ut_error;
706 break;
707 }
708
709 /* Read the natural predecessor and successor page addresses from
710 the page; NOTE that because the calling thread may have an x-latch
711 on the page, we do not acquire an s-latch on the page, this is to
712 prevent deadlocks. Even if we read values which are nonsense, the
713 algorithm will work. */
714
715 pred_offset = fil_page_get_prev(frame);
716 succ_offset = fil_page_get_next(frame);
717
718 buf_pool_mutex_exit(buf_pool);
719
720 if ((page_id.page_no() == low)
721 && (succ_offset == page_id.page_no() + 1)) {
722
723 /* This is ok, we can continue */
724 new_offset = pred_offset;
725
726 } else if ((page_id.page_no() == high - 1)
727 && (pred_offset == page_id.page_no() - 1)) {
728
729 /* This is ok, we can continue */
730 new_offset = succ_offset;
731 } else {
732 /* Successor or predecessor not in the right order */
733
734 return(0);
735 }
736
737 low = (new_offset / buf_read_ahead_linear_area)
738 * buf_read_ahead_linear_area;
739 high = (new_offset / buf_read_ahead_linear_area + 1)
740 * buf_read_ahead_linear_area;
741
742 if ((new_offset != low) && (new_offset != high - 1)) {
743 /* This is not a border page of the area: return */
744
745 return(0);
746 }
747
748 if (high > space_size) {
749 /* The area is not whole, return */
750
751 return(0);
752 }
753
754 ulint count = 0;
755
756 /* If we got this far, read-ahead can be sensible: do it */
757
758 ulint ibuf_mode;
759
760 ibuf_mode = inside_ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
761
762 /* Since Windows XP seems to schedule the i/o handler thread
763 very eagerly, and consequently it does not wait for the
764 full read batch to be posted, we use special heuristics here */
765
766 os_aio_simulated_put_read_threads_to_sleep();
767
768 for (i = low; i < high; i++) {
769 /* It is only sensible to do read-ahead in the non-sync
770 aio mode: hence FALSE as the first parameter */
771
772 const page_id_t cur_page_id(page_id.space(), i);
773
774 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
775 count += buf_read_page_low(
776 &err, false,
777 IORequest::DO_NOT_WAKE,
778 ibuf_mode, cur_page_id, page_size, false);
779
780 switch (err) {
781 case DB_SUCCESS:
782 case DB_TABLESPACE_TRUNCATED:
783 case DB_TABLESPACE_DELETED:
784 case DB_ERROR:
785 break;
786 case DB_PAGE_CORRUPTED:
787 case DB_DECRYPTION_FAILED:
788 ib::error() << "linear readahead failed to"
789 " read or decrypt "
790 << page_id_t(page_id.space(), i);
791 break;
792 default:
793 ut_error;
794 }
795 }
796 }
797
798 /* In simulated aio we wake the aio handler threads only after
799 queuing all aio requests, in native aio the following call does
800 nothing: */
801
802 os_aio_simulated_wake_handler_threads();
803
804 if (count) {
805 DBUG_PRINT("ib_buf", ("linear read-ahead " ULINTPF " pages, "
806 "%u:%u",
807 count,
808 page_id.space(),
809 page_id.page_no()));
810 }
811
812 /* Read ahead is considered one I/O operation for the purpose of
813 LRU policy decision. */
814 buf_LRU_stat_inc_io();
815
816 buf_pool->stat.n_ra_pages_read += count;
817 return(count);
818}
819
820/********************************************************************//**
821Issues read requests for pages which the ibuf module wants to read in, in
822order to contract the insert buffer tree. Technically, this function is like
823a read-ahead function. */
824void
825buf_read_ibuf_merge_pages(
826/*======================*/
827 bool sync, /*!< in: true if the caller
828 wants this function to wait
829 for the highest address page
830 to get read in, before this
831 function returns */
832 const ulint* space_ids, /*!< in: array of space ids */
833 const ulint* page_nos, /*!< in: array of page numbers
834 to read, with the highest page
835 number the last in the
836 array */
837 ulint n_stored) /*!< in: number of elements
838 in the arrays */
839{
840#ifdef UNIV_IBUF_DEBUG
841 ut_a(n_stored < srv_page_size);
842#endif
843
844 for (ulint i = 0; i < n_stored; i++) {
845 bool found;
846 const page_size_t page_size(fil_space_get_page_size(
847 space_ids[i], &found));
848
849 if (!found) {
850tablespace_deleted:
851 /* The tablespace was not found: remove all
852 entries for it */
853 ibuf_delete_for_discarded_space(space_ids[i]);
854 while (i + 1 < n_stored
855 && space_ids[i + 1] == space_ids[i]) {
856 i++;
857 }
858 continue;
859 }
860
861 const page_id_t page_id(space_ids[i], page_nos[i]);
862
863 buf_pool_t* buf_pool = buf_pool_get(page_id);
864
865 while (buf_pool->n_pend_reads
866 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
867 os_thread_sleep(500000);
868 }
869
870 dberr_t err;
871
872 buf_read_page_low(&err,
873 sync && (i + 1 == n_stored),
874 0,
875 BUF_READ_ANY_PAGE, page_id, page_size,
876 true, true /* ignore_missing_space */);
877
878 switch(err) {
879 case DB_SUCCESS:
880 case DB_TABLESPACE_TRUNCATED:
881 case DB_ERROR:
882 break;
883 case DB_TABLESPACE_DELETED:
884 goto tablespace_deleted;
885 case DB_PAGE_CORRUPTED:
886 case DB_DECRYPTION_FAILED:
887 ib::error() << "Failed to read or decrypt " << page_id
888 << " for change buffer merge";
889 break;
890 default:
891 ut_error;
892 }
893 }
894
895 os_aio_simulated_wake_handler_threads();
896
897 if (n_stored) {
898 DBUG_PRINT("ib_buf",
899 ("ibuf merge read-ahead %u pages, space %u",
900 unsigned(n_stored), unsigned(space_ids[0])));
901 }
902}
903
904/** Issues read requests for pages which recovery wants to read in.
905@param[in] sync true if the caller wants this function to wait
906for the highest address page to get read in, before this function returns
907@param[in] space_id tablespace id
908@param[in] page_nos array of page numbers to read, with the
909highest page number the last in the array
910@param[in] n_stored number of page numbers in the array */
911void
912buf_read_recv_pages(
913 bool sync,
914 ulint space_id,
915 const ulint* page_nos,
916 ulint n_stored)
917{
918 fil_space_t* space = fil_space_get(space_id);
919
920 if (space == NULL) {
921 /* The tablespace is missing: do nothing */
922 return;
923 }
924
925 fil_space_open_if_needed(space);
926
927 const page_size_t page_size(space->flags);
928
929 for (ulint i = 0; i < n_stored; i++) {
930 buf_pool_t* buf_pool;
931 const page_id_t cur_page_id(space_id, page_nos[i]);
932
933 ulint count = 0;
934
935 buf_pool = buf_pool_get(cur_page_id);
936 while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
937
938 os_aio_simulated_wake_handler_threads();
939 os_thread_sleep(10000);
940
941 count++;
942
943 if (!(count % 1000)) {
944
945 ib::error()
946 << "Waited for " << count / 100
947 << " seconds for "
948 << buf_pool->n_pend_reads
949 << " pending reads";
950 }
951 }
952
953 dberr_t err;
954
955 if (sync && i + 1 == n_stored) {
956 buf_read_page_low(
957 &err, true,
958 0,
959 BUF_READ_ANY_PAGE,
960 cur_page_id, page_size, true);
961 } else {
962 buf_read_page_low(
963 &err, false,
964 IORequest::DO_NOT_WAKE,
965 BUF_READ_ANY_PAGE,
966 cur_page_id, page_size, true);
967 }
968
969 if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
970 ib::error() << "Recovery failed to read or decrypt "
971 << cur_page_id;
972 }
973 }
974
975 os_aio_simulated_wake_handler_threads();
976
977 DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)",
978 unsigned(n_stored)));
979}
980