1/* Copyright (C) 2000-2008 MySQL AB, 2008-2011 Monty Program Ab
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/*
17 These functions handle page caching for Maria tables.
18
19 One cache can handle many files.
20 It must contain buffers of the same blocksize.
21 init_pagecache() should be used to init cache handler.
22
23 The free list (free_block_list) is a stack like structure.
24 When a block is freed by free_block(), it is pushed onto the stack.
25 When a new block is required it is first tried to pop one from the stack.
26 If the stack is empty, it is tried to get a never-used block from the pool.
27 If this is empty too, then a block is taken from the LRU ring, flushing it
28 to disk, if necessary. This is handled in find_block().
29 With the new free list, the blocks can have three temperatures:
30 hot, warm and cold (which is free). This is remembered in the block header
31 by the enum PCBLOCK_TEMPERATURE temperature variable. Remembering the
32 temperature is necessary to correctly count the number of warm blocks,
33 which is required to decide when blocks are allowed to become hot. Whenever
34 a block is inserted to another (sub-)chain, we take the old and new
35 temperature into account to decide if we got one more or less warm block.
36 blocks_unused is the sum of never used blocks in the pool and of currently
37 free blocks. blocks_used is the number of blocks fetched from the pool and
38 as such gives the maximum number of in-use blocks at any time.
39
40 TODO: Write operation locks whole cache till the end of the operation.
41 Should be fixed.
42*/
43
44#include "maria_def.h"
45#include <m_string.h>
46#include "ma_pagecache.h"
47#include "ma_blockrec.h"
48#include <my_bit.h>
49#include <errno.h>
50
51/*
52 Some compilation flags have been added specifically for this module
53 to control the following:
54 - not to let a thread to yield the control when reading directly
55 from page cache, which might improve performance in many cases;
56 to enable this add:
57 #define SERIALIZED_READ_FROM_CACHE
58 - to set an upper bound for number of threads simultaneously
59 using the page cache; this setting helps to determine an optimal
60 size for hash table and improve performance when the number of
61 blocks in the page cache much less than the number of threads
62 accessing it;
63 to set this number equal to <N> add
64 #define MAX_THREADS <N>
65 - to substitute calls of mysql_cond_wait for calls of
66 mysql_cond_timedwait (wait with timeout set up);
67 this setting should be used only when you want to trap a deadlock
68 situation, which theoretically should not happen;
69 to set timeout equal to <T> seconds add
70 #define PAGECACHE_TIMEOUT <T>
71 - to enable the module traps and to send debug information from
72 page cache module to a special debug log add:
73 #define PAGECACHE_DEBUG
74 the name of this debug log file <LOG NAME> can be set through:
75 #define PAGECACHE_DEBUG_LOG <LOG NAME>
76 if the name is not defined, it's set by default;
77 if the PAGECACHE_DEBUG flag is not set up and we are in a debug
78 mode, i.e. when ! defined(DBUG_OFF), the debug information from the
79 module is sent to the regular debug log.
80
81 Example of the settings:
82 #define SERIALIZED_READ_FROM_CACHE
83 #define MAX_THREADS 100
84 #define PAGECACHE_TIMEOUT 1
85 #define PAGECACHE_DEBUG
86 #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
87*/
88
89/*
90 In key cache we have external raw locking here we use
91 SERIALIZED_READ_FROM_CACHE to avoid problem of reading
92 not consistent data from the page.
93 (keycache functions (key_cache_read(), key_cache_insert() and
94 key_cache_write()) rely on external MyISAM lock, we don't)
95*/
96#define SERIALIZED_READ_FROM_CACHE yes
97
98#define PCBLOCK_INFO(B) \
99 DBUG_PRINT("info", \
100 ("block: %p fd: %lu page: %lu status: 0x%x " \
101 "hshL: %p requests: %u/%u wrlocks: %u rdlocks: %u " \
102 "rdlocks_q: %u pins: %u type: %s", \
103 (B), \
104 (ulong)((B)->hash_link ? \
105 (B)->hash_link->file.file : \
106 0), \
107 (ulong)((B)->hash_link ? \
108 (B)->hash_link->pageno : \
109 0), \
110 (uint) (B)->status, \
111 (B)->hash_link, \
112 (uint) (B)->requests, \
113 (uint)((B)->hash_link ? \
114 (B)->hash_link->requests : \
115 0), \
116 (B)->wlocks, (B)->rlocks, (B)->rlocks_queue, \
117 (uint)(B)->pins, \
118 page_cache_page_type_str[(B)->type]))
119
120/* TODO: put it to my_static.c */
121my_bool my_disable_flush_pagecache_blocks= 0;
122
123#define STRUCT_PTR(TYPE, MEMBER, a) \
124 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
125
126/* types of condition variables */
127#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
128#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
129#define COND_FOR_WRLOCK 2 /* queue of write lock */
130#define COND_SIZE 3 /* number of COND_* queues */
131
132typedef mysql_cond_t KEYCACHE_CONDVAR;
133
134/* descriptor of the page in the page cache block buffer */
135struct st_pagecache_page
136{
137 PAGECACHE_FILE file; /* file to which the page belongs to */
138 pgcache_page_no_t pageno; /* number of the page in the file */
139};
140
141/* element in the chain of a hash table bucket */
142struct st_pagecache_hash_link
143{
144 struct st_pagecache_hash_link
145 *next, **prev; /* to connect links in the same bucket */
146 struct st_pagecache_block_link
147 *block; /* reference to the block for the page: */
148 PAGECACHE_FILE file; /* from such a file */
149 pgcache_page_no_t pageno; /* this page */
150 uint requests; /* number of requests for the page */
151};
152
153/* simple states of a block */
154#define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */
155#define PCBLOCK_READ 2 /* the is page in the block buffer */
156
157/*
158 A tread is reading the data to the page.
159 If the page contained old changed data, it will be written out with
160 this state set on the block.
161 The page is not yet ready to be used for reading.
162*/
163#define PCBLOCK_IN_SWITCH 4
164/*
165 Block does not accept new requests for old page that would cause
166 the page to be pinned or written to.
167 (Reads that copies the block can still continue).
168 This state happens when another thread is waiting for readers to finish
169 to read data to the block (after the block, if it was changed, has been
170 flushed out to disk).
171*/
172#define PCBLOCK_REASSIGNED 8
173#define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */
174#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
175#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
176#define PCBLOCK_DEL_WRITE 128 /* should be written on delete */
177
178/* page status, returned by find_block */
179#define PAGE_READ 0
180#define PAGE_TO_BE_READ 1
181#define PAGE_WAIT_TO_BE_READ 2
182
183/* block temperature determines in which (sub-)chain the block currently is */
184enum PCBLOCK_TEMPERATURE { PCBLOCK_COLD /*free*/ , PCBLOCK_WARM , PCBLOCK_HOT };
185
186/* debug info */
187#ifndef DBUG_OFF
188static const char *page_cache_page_type_str[]=
189{
190 /* used only for control page type changing during debugging */
191 "EMPTY",
192 "PLAIN",
193 "LSN",
194 "READ_UNKNOWN"
195};
196
197static const char *page_cache_page_write_mode_str[]=
198{
199 "DELAY",
200 "DONE"
201};
202
203static const char *page_cache_page_lock_str[]=
204{
205 "free -> free",
206 "read -> read",
207 "write -> write",
208 "free -> read",
209 "free -> write",
210 "read -> free",
211 "write -> free",
212 "write -> read"
213};
214
215static const char *page_cache_page_pin_str[]=
216{
217 "pinned -> pinned",
218 "unpinned -> unpinned",
219 "unpinned -> pinned",
220 "pinned -> unpinned"
221};
222
223
224typedef struct st_pagecache_pin_info
225{
226 struct st_pagecache_pin_info *next, **prev;
227 struct st_my_thread_var *thread;
228} PAGECACHE_PIN_INFO;
229
230/*
231 st_pagecache_lock_info structure should be kept in next, prev, thread part
232 compatible with st_pagecache_pin_info to be compatible in functions.
233*/
234
235typedef struct st_pagecache_lock_info
236{
237 struct st_pagecache_lock_info *next, **prev;
238 struct st_my_thread_var *thread;
239 my_bool write_lock;
240} PAGECACHE_LOCK_INFO;
241
242
243/* service functions maintain debugging info about pin & lock */
244
245
246/*
247 Links information about thread pinned/locked the block to the list
248
249 SYNOPSIS
250 info_link()
251 list the list to link in
252 node the node which should be linked
253*/
254
255static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
256{
257 if ((node->next= *list))
258 node->next->prev= &(node->next);
259 *list= node;
260 node->prev= list;
261}
262
263
264/*
265 Unlinks information about thread pinned/locked the block from the list
266
267 SYNOPSIS
268 info_unlink()
269 node the node which should be unlinked
270*/
271
272static void info_unlink(PAGECACHE_PIN_INFO *node)
273{
274 if ((*node->prev= node->next))
275 node->next->prev= node->prev;
276}
277
278
279/*
280 Finds information about given thread in the list of threads which
281 pinned/locked this block.
282
283 SYNOPSIS
284 info_find()
285 list the list where to find the thread
286 thread thread ID (reference to the st_my_thread_var
287 of the thread)
288 any return any thread of the list
289
290 RETURN
291 0 - the thread was not found
292 pointer to the information node of the thread in the list, or, if 'any',
293 to any thread of the list.
294*/
295
296static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
297 struct st_my_thread_var *thread,
298 my_bool any)
299{
300 register PAGECACHE_PIN_INFO *i= list;
301 if (any)
302 return i;
303 for(; i != 0; i= i->next)
304 if (i->thread == thread)
305 return i;
306 return 0;
307}
308
309#endif /* !DBUG_OFF */
310
311/* page cache block */
312struct st_pagecache_block_link
313{
314 struct st_pagecache_block_link
315 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
316 struct st_pagecache_block_link
317 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
318 struct st_pagecache_hash_link
319 *hash_link; /* backward ptr to referring hash_link */
320#ifndef DBUG_OFF
321 PAGECACHE_PIN_INFO *pin_list;
322 PAGECACHE_LOCK_INFO *lock_list;
323#endif
324 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
325 uchar *buffer; /* buffer for the block page */
326 pthread_t write_locker;
327
328 ulonglong last_hit_time; /* timestamp of the last hit */
329 WQUEUE
330 wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */
331 uint32 requests; /* number of requests for the block */
332 uint32 pins; /* pin counter */
333 uint32 wlocks; /* write locks counter */
334 uint32 rlocks; /* read locks counter */
335 uint32 rlocks_queue; /* rd. locks waiting wr. lock of this thread */
336 uint16 status; /* state of the block */
337 int16 error; /* error code for block in case of error */
338 enum PCBLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot*/
339 enum pagecache_page_type type; /* type of the block */
340 uint hits_left; /* number of hits left until promotion */
341 /** @brief LSN when first became dirty; LSN_MAX means "not yet set" */
342 LSN rec_lsn;
343};
344
345/** @brief information describing a run of flush_pagecache_blocks_int() */
346struct st_file_in_flush
347{
348 File file;
349 /**
350 @brief threads waiting for the thread currently flushing this file to be
351 done
352 */
353 WQUEUE flush_queue;
354 /**
355 @brief if the thread currently flushing the file has a non-empty
356 first_in_switch list.
357 */
358 my_bool first_in_switch;
359};
360
361#ifndef DBUG_OFF
362/* debug checks */
363
364#ifdef NOT_USED
365static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
366 enum pagecache_page_pin mode
367 __attribute__((unused)))
368{
369 struct st_my_thread_var *thread= my_thread_var;
370 PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread);
371 DBUG_ENTER("info_check_pin");
372 DBUG_PRINT("enter", ("thread: 0x%lx pin: %s",
373 (ulong) thread, page_cache_page_pin_str[mode]));
374 if (info)
375 {
376 if (mode == PAGECACHE_PIN_LEFT_UNPINNED)
377 {
378 DBUG_PRINT("info",
379 ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_UNPINNED!!!",
380 (ulong)thread, (ulong)block));
381 DBUG_RETURN(1);
382 }
383 else if (mode == PAGECACHE_PIN)
384 {
385 DBUG_PRINT("info",
386 ("info_check_pin: thread: 0x%lx block: 0x%lx ; PIN!!!",
387 (ulong)thread, (ulong)block));
388 DBUG_RETURN(1);
389 }
390 }
391 else
392 {
393 if (mode == PAGECACHE_PIN_LEFT_PINNED)
394 {
395 DBUG_PRINT("info",
396 ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_PINNED!!!",
397 (ulong)thread, (ulong)block));
398 DBUG_RETURN(1);
399 }
400 else if (mode == PAGECACHE_UNPIN)
401 {
402 DBUG_PRINT("info",
403 ("info_check_pin: thread: 0x%lx block: 0x%lx ; UNPIN!!!",
404 (ulong)thread, (ulong)block));
405 DBUG_RETURN(1);
406 }
407 }
408 DBUG_RETURN(0);
409}
410
411
412/*
413 Debug function which checks current lock/pin state and requested changes
414
415 SYNOPSIS
416 info_check_lock()
417 lock requested lock changes
418 pin requested pin changes
419
420 RETURN
421 0 - OK
422 1 - Error
423*/
424
425static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
426 enum pagecache_page_lock lock,
427 enum pagecache_page_pin pin)
428{
429 struct st_my_thread_var *thread= my_thread_var;
430 PAGECACHE_LOCK_INFO *info=
431 (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
432 thread);
433 DBUG_ENTER("info_check_lock");
434 switch(lock) {
435 case PAGECACHE_LOCK_LEFT_UNLOCKED:
436 if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
437 info)
438 goto error;
439 break;
440 case PAGECACHE_LOCK_LEFT_READLOCKED:
441 if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
442 pin != PAGECACHE_PIN_LEFT_PINNED) ||
443 info == 0 || info->write_lock)
444 goto error;
445 break;
446 case PAGECACHE_LOCK_LEFT_WRITELOCKED:
447 if (pin != PAGECACHE_PIN_LEFT_PINNED ||
448 info == 0 || !info->write_lock)
449 goto error;
450 break;
451 case PAGECACHE_LOCK_READ:
452 if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
453 pin != PAGECACHE_PIN) ||
454 info != 0)
455 goto error;
456 break;
457 case PAGECACHE_LOCK_WRITE:
458 if (pin != PAGECACHE_PIN ||
459 info != 0)
460 goto error;
461 break;
462 case PAGECACHE_LOCK_READ_UNLOCK:
463 if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
464 pin != PAGECACHE_UNPIN) ||
465 info == 0 || info->write_lock)
466 goto error;
467 break;
468 case PAGECACHE_LOCK_WRITE_UNLOCK:
469 if (pin != PAGECACHE_UNPIN ||
470 info == 0 || !info->write_lock)
471 goto error;
472 break;
473 case PAGECACHE_LOCK_WRITE_TO_READ:
474 if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
475 pin != PAGECACHE_UNPIN) ||
476 info == 0 || !info->write_lock)
477 goto error;
478 break;
479 }
480 DBUG_RETURN(0);
481error:
482 DBUG_PRINT("info",
483 ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d,"
484 "to lock: %s, to pin: %s",
485 (ulong) thread, (ulong) block, MY_TEST(info),
486 (info ? info->write_lock : 0),
487 page_cache_page_lock_str[lock],
488 page_cache_page_pin_str[pin]));
489 DBUG_RETURN(1);
490}
491#endif /* NOT_USED */
492#endif /* !DBUG_OFF */
493
494#define FLUSH_CACHE 2000 /* sort this many blocks at once */
495
496static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
497 my_bool abort_if_pinned);
498static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link);
499#ifndef DBUG_OFF
500static void test_key_cache(PAGECACHE *pagecache,
501 const char *where, my_bool lock);
502#endif
503
504#define PAGECACHE_HASH(p, f, pos) (((size_t) (pos) + \
505 (size_t) (f).file) & (p->hash_entries-1))
506#define FILE_HASH(f,cache) ((uint) (f).file & (cache->changed_blocks_hash_size-1))
507
508#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log"
509
510#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
511#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG
512#endif
513
514#if defined(PAGECACHE_DEBUG_LOG)
515static FILE *pagecache_debug_log= NULL;
516static void pagecache_debug_print _VARARGS((const char *fmt, ...));
517#define PAGECACHE_DEBUG_OPEN \
518 if (!pagecache_debug_log) \
519 { \
520 pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \
521 (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
522 }
523
524#define PAGECACHE_DEBUG_CLOSE \
525 if (pagecache_debug_log) \
526 { \
527 fclose(pagecache_debug_log); \
528 pagecache_debug_log= 0; \
529 }
530#else
531#define PAGECACHE_DEBUG_OPEN
532#define PAGECACHE_DEBUG_CLOSE
533#endif /* defined(PAGECACHE_DEBUG_LOG) */
534
535#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
536#define KEYCACHE_PRINT(l, m) KEYCACHE_DBUG_PRINT(l,m)
537#define KEYCACHE_DBUG_PRINT(l, m) \
538 { if (pagecache_debug_log) \
539 fprintf(pagecache_debug_log, "%s: ", l); \
540 pagecache_debug_print m; }
541
542#define KEYCACHE_DBUG_ASSERT(a) \
543 { if (! (a) && pagecache_debug_log) \
544 fclose(pagecache_debug_log); \
545 DBUG_ASSERT(a); }
546#else
547#define KEYCACHE_PRINT(l, m)
548#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
549#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
550#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
551
552#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
553static long pagecache_thread_id;
554#define KEYCACHE_THREAD_TRACE(l) \
555 KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
556
557#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
558 { struct st_my_thread_var *thread_var= my_thread_var; \
559 pagecache_thread_id= thread_var->id; \
560 KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
561
562#define KEYCACHE_THREAD_TRACE_END(l) \
563 KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
564#else
565#define KEYCACHE_PRINT(l,m)
566#define KEYCACHE_THREAD_TRACE_BEGIN(l)
567#define KEYCACHE_THREAD_TRACE_END(l)
568#define KEYCACHE_THREAD_TRACE(l)
569#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */
570
571#define PCBLOCK_NUMBER(p, b) \
572 ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK)))
573#define PAGECACHE_HASH_LINK_NUMBER(p, h) \
574 ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \
575 sizeof(PAGECACHE_HASH_LINK)))
576
577#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
578static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
579 mysql_mutex_t *mutex);
580#else
581#define pagecache_pthread_cond_wait mysql_cond_wait
582#endif
583
584#if defined(PAGECACHE_DEBUG)
585static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex);
586static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex);
587static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond);
588#define pagecache_pthread_mutex_lock(M) \
589{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
590 ___pagecache_pthread_mutex_lock(M);}
591#define pagecache_pthread_mutex_unlock(M) \
592{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
593 ___pagecache_pthread_mutex_unlock(M);}
594#define pagecache_pthread_cond_signal(M) \
595{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
596 ___pagecache_pthread_cond_signal(M);}
597#else
598#define pagecache_pthread_mutex_lock mysql_mutex_lock
599#define pagecache_pthread_mutex_unlock mysql_mutex_unlock
600#define pagecache_pthread_cond_signal mysql_cond_signal
601#endif /* defined(PAGECACHE_DEBUG) */
602
603extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
604
605/*
606 Write page to the disk
607
608 SYNOPSIS
609 pagecache_fwrite()
610 pagecache - page cache pointer
611 filedesc - pagecache file descriptor structure
612 buffer - buffer which we will write
613 type - page type (plain or with LSN)
614 flags - MYF() flags
615
616 RETURN
617 0 - OK
618 1 - Error
619*/
620
621static my_bool pagecache_fwrite(PAGECACHE *pagecache,
622 PAGECACHE_FILE *filedesc,
623 uchar *buffer,
624 pgcache_page_no_t pageno,
625 enum pagecache_page_type type
626 __attribute__((unused)),
627 myf flags)
628{
629 int res;
630 PAGECACHE_IO_HOOK_ARGS args;
631 DBUG_ENTER("pagecache_fwrite");
632 DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
633
634#ifdef EXTRA_DEBUG_BITMAP
635 /*
636 This code is very good when debugging changes in bitmaps or dirty lists
637 The above define should be defined for all Aria files if you want to
638 debug either of the above issues.
639 */
640
641 if (pagecache->extra_debug)
642 {
643 char buff[80];
644 uint len= my_sprintf(buff,
645 (buff, "fwrite: fd: %d id: %u page: %llu",
646 filedesc->file,
647 _ma_file_callback_to_id(filedesc->callback_data),
648 pageno));
649 (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
650 (uchar*) buff, len);
651 }
652#endif
653
654 /* initialize hooks args */
655 args.page= buffer;
656 args.pageno= pageno;
657 args.data= filedesc->callback_data;
658
659 /* Todo: Integrate this with write_callback so we have only one callback */
660 if ((*filedesc->flush_log_callback)(&args))
661 DBUG_RETURN(1);
662 DBUG_PRINT("info", ("pre_write_hook:%p data: %p",
663 filedesc->pre_write_hook,
664 filedesc->callback_data));
665 if ((*filedesc->pre_write_hook)(&args))
666 {
667 DBUG_PRINT("error", ("write callback problem"));
668 DBUG_RETURN(1);
669 }
670 res= (int)my_pwrite(filedesc->file, args.page, pagecache->block_size,
671 ((my_off_t) pageno << pagecache->shift), flags);
672 (*filedesc->post_write_hook)(res, &args);
673 DBUG_RETURN(res);
674}
675
676
677/*
678 Read page from the disk
679
680 SYNOPSIS
681 pagecache_fread()
682 pagecache - page cache pointer
683 filedesc - pagecache file descriptor structure
684 buffer - buffer in which we will read
685 pageno - page number
686 flags - MYF() flags
687*/
688#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \
689 mysql_file_pread((filedesc)->file, buffer, pagecache->block_size, \
690 ((my_off_t) pageno << pagecache->shift), flags)
691
692
693/**
694 @brief set rec_lsn of pagecache block (if it is needed)
695
696 @param block block where to set rec_lsn
697 @param first_REDO_LSN_for_page the LSN to set
698*/
699
700static inline void pagecache_set_block_rec_lsn(PAGECACHE_BLOCK_LINK *block,
701 LSN first_REDO_LSN_for_page)
702{
703 if (block->rec_lsn == LSN_MAX)
704 block->rec_lsn= first_REDO_LSN_for_page;
705 else
706 DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
707 first_REDO_LSN_for_page) <= 0);
708}
709
710
711/*
712 next_power(value) is 2 at the power of (1+floor(log2(value)));
713 e.g. next_power(2)=4, next_power(3)=4.
714*/
715static inline uint next_power(uint value)
716{
717 return (uint) my_round_up_to_next_power((uint32) value) << 1;
718}
719
720
721/*
722 Initialize a page cache
723
724 SYNOPSIS
725 init_pagecache()
726 pagecache pointer to a page cache data structure
727 key_cache_block_size size of blocks to keep cached data
728 use_mem total memory to use for the key cache
729 division_limit division limit (may be zero)
730 age_threshold age threshold (may be zero)
731 block_size size of block (should be power of 2)
732 my_read_flags Flags used for all pread/pwrite calls
733 Usually MY_WME in case of recovery
734
735 RETURN VALUE
736 number of blocks in the key cache, if successful,
737 0 - otherwise.
738
739 NOTES.
740 if pagecache->inited != 0 we assume that the key cache
741 is already initialized. This is for now used by myisamchk, but shouldn't
742 be something that a program should rely on!
743
744 It's assumed that no two threads call this function simultaneously
745 referring to the same key cache handle.
746
747*/
748
749size_t init_pagecache(PAGECACHE *pagecache, size_t use_mem,
750 uint division_limit, uint age_threshold,
751 uint block_size, uint changed_blocks_hash_size,
752 myf my_readwrite_flags)
753{
754 size_t blocks, hash_links, length;
755 int error;
756 DBUG_ENTER("init_pagecache");
757 DBUG_ASSERT(block_size >= 512);
758
759 PAGECACHE_DEBUG_OPEN;
760 if (pagecache->inited && pagecache->disk_blocks > 0)
761 {
762 DBUG_PRINT("warning",("key cache already in use"));
763 DBUG_RETURN(0);
764 }
765
766 pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
767 pagecache->global_cache_read= pagecache->global_cache_write= 0;
768 pagecache->disk_blocks= -1;
769 if (! pagecache->inited)
770 {
771 if (mysql_mutex_init(key_PAGECACHE_cache_lock,
772 &pagecache->cache_lock, MY_MUTEX_INIT_FAST) ||
773 my_hash_init(&pagecache->files_in_flush, &my_charset_bin, 32,
774 offsetof(struct st_file_in_flush, file),
775 sizeof(((struct st_file_in_flush *)NULL)->file),
776 NULL, NULL, 0))
777 goto err;
778 pagecache->inited= 1;
779 pagecache->in_init= 0;
780 pagecache->resize_queue.last_thread= NULL;
781 }
782
783 pagecache->mem_size= use_mem;
784 pagecache->block_size= block_size;
785 pagecache->shift= my_bit_log2(block_size);
786 pagecache->readwrite_flags= my_readwrite_flags | MY_NABP | MY_WAIT_IF_FULL;
787 pagecache->org_readwrite_flags= pagecache->readwrite_flags;
788 DBUG_PRINT("info", ("block_size: %u", block_size));
789 DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size);
790
791 blocks= use_mem / (sizeof(PAGECACHE_BLOCK_LINK) +
792 2 * sizeof(PAGECACHE_HASH_LINK) +
793 sizeof(PAGECACHE_HASH_LINK*) *
794 5/4 + block_size);
795 /* Changed blocks hash needs to be a power of 2 */
796 changed_blocks_hash_size= my_round_up_to_next_power(MY_MAX(changed_blocks_hash_size,
797 MIN_PAGECACHE_CHANGED_BLOCKS_HASH_SIZE));
798
799 /*
800 We need to support page cache with just one block to be able to do
801 scanning of rows-in-block files
802 */
803 for ( ; ; )
804 {
805 if (blocks < 8)
806 {
807 my_message(ENOMEM, "Not enough memory to allocate 8 pagecache pages",
808 MYF(0));
809 my_errno= ENOMEM;
810 goto err;
811 }
812 /* Set my_hash_entries to the next bigger 2 power */
813 if ((pagecache->hash_entries= next_power((uint)blocks)) <
814 (blocks) * 5/4)
815 pagecache->hash_entries<<= 1;
816 hash_links= 2 * blocks;
817#if defined(MAX_THREADS)
818 if (hash_links < MAX_THREADS + blocks - 1)
819 hash_links= MAX_THREADS + blocks - 1;
820#endif
821 while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) +
822 ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
823 pagecache->hash_entries) +
824 ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
825 sizeof(PAGECACHE_BLOCK_LINK*)* (changed_blocks_hash_size*2))) +
826 (blocks << pagecache->shift) > use_mem && blocks > 8)
827 blocks--;
828 /* Allocate memory for cache page buffers */
829 if ((pagecache->block_mem=
830 my_large_malloc(blocks * pagecache->block_size,
831 MYF(MY_WME))))
832 {
833 /*
834 Allocate memory for blocks, hash_links and hash entries;
835 For each block 2 hash links are allocated
836 */
837 if (my_multi_malloc_large(MYF(MY_ZEROFILL),
838 &pagecache->block_root,
839 (ulonglong) (blocks *
840 sizeof(PAGECACHE_BLOCK_LINK)),
841 &pagecache->hash_root,
842 (ulonglong) (sizeof(PAGECACHE_HASH_LINK*) *
843 pagecache->hash_entries),
844 &pagecache->hash_link_root,
845 (ulonglong) (hash_links *
846 sizeof(PAGECACHE_HASH_LINK)),
847 &pagecache->changed_blocks,
848 (ulonglong) (sizeof(PAGECACHE_BLOCK_LINK*) *
849 changed_blocks_hash_size),
850 &pagecache->file_blocks,
851 (ulonglong) (sizeof(PAGECACHE_BLOCK_LINK*) *
852 changed_blocks_hash_size),
853 NullS))
854 break;
855 my_large_free(pagecache->block_mem);
856 pagecache->block_mem= 0;
857 }
858 blocks= blocks / 4*3;
859 }
860 pagecache->blocks_unused= blocks;
861 pagecache->disk_blocks= blocks;
862 pagecache->hash_links= hash_links;
863 pagecache->hash_links_used= 0;
864 pagecache->free_hash_list= NULL;
865 pagecache->blocks_used= pagecache->blocks_changed= 0;
866
867 pagecache->global_blocks_changed= 0;
868 pagecache->blocks_available=0; /* For debugging */
869
870 /* The LRU chain is empty after initialization */
871 pagecache->used_last= NULL;
872 pagecache->used_ins= NULL;
873 pagecache->free_block_list= NULL;
874 pagecache->time= 0;
875 pagecache->warm_blocks= 0;
876 pagecache->min_warm_blocks= (division_limit ?
877 blocks * division_limit / 100 + 1 :
878 blocks);
879 pagecache->age_threshold= (age_threshold ?
880 blocks * age_threshold / 100 :
881 blocks);
882 pagecache->changed_blocks_hash_size= changed_blocks_hash_size;
883
884 pagecache->cnt_for_resize_op= 0;
885 pagecache->resize_in_flush= 0;
886 pagecache->can_be_used= 1;
887
888 pagecache->waiting_for_hash_link.last_thread= NULL;
889 pagecache->waiting_for_block.last_thread= NULL;
890 DBUG_PRINT("exit",
891 ("disk_blocks: %zu block_root: %p hash_entries: %zu\
892 hash_root: %p hash_links: %zu hash_link_root: %p",
893 (size_t)pagecache->disk_blocks, pagecache->block_root,
894 pagecache->hash_entries, pagecache->hash_root,
895 (size_t)pagecache->hash_links, pagecache->hash_link_root));
896
897 pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
898 DBUG_RETURN((size_t)pagecache->disk_blocks);
899
900err:
901 error= my_errno;
902 pagecache->disk_blocks= 0;
903 pagecache->blocks= 0;
904 if (pagecache->block_mem)
905 {
906 my_large_free(pagecache->block_mem);
907 pagecache->block_mem= NULL;
908 }
909 if (pagecache->block_root)
910 {
911 my_free(pagecache->block_root);
912 pagecache->block_root= NULL;
913 }
914 my_errno= error;
915 pagecache->can_be_used= 0;
916 DBUG_RETURN(0);
917}
918
919
920/*
921 Flush all blocks in the key cache to disk
922*/
923
924#ifdef NOT_USED
925static int flush_all_key_blocks(PAGECACHE *pagecache)
926{
927#if defined(PAGECACHE_DEBUG)
928 uint cnt=0;
929#endif
930 while (pagecache->blocks_changed > 0)
931 {
932 PAGECACHE_BLOCK_LINK *block;
933 for (block= pagecache->used_last->next_used ; ; block=block->next_used)
934 {
935 if (block->hash_link)
936 {
937#if defined(PAGECACHE_DEBUG)
938 cnt++;
939 KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
940#endif
941 if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file,
942 FLUSH_RELEASE, NULL, NULL))
943 return 1;
944 break;
945 }
946 if (block == pagecache->used_last)
947 break;
948 }
949 }
950 return 0;
951}
952#endif /* NOT_USED */
953
954/*
955 Resize a key cache
956
957 SYNOPSIS
958 resize_pagecache()
959 pagecache pointer to a page cache data structure
960 use_mem total memory to use for the new key cache
961 division_limit new division limit (if not zero)
962 age_threshold new age threshold (if not zero)
963
964 RETURN VALUE
965 number of blocks in the key cache, if successful,
966 0 - otherwise.
967
968 NOTES.
969 The function first compares the memory size parameter
970 with the key cache value.
971
972 If they differ the function free the the memory allocated for the
973 old key cache blocks by calling the end_pagecache function and
974 then rebuilds the key cache with new blocks by calling
975 init_key_cache.
976
977 The function starts the operation only when all other threads
978 performing operations with the key cache let her to proceed
979 (when cnt_for_resize=0).
980
981 Before being usable, this function needs:
982 - to receive fixes for BUG#17332 "changing key_buffer_size on a running
983 server can crash under load" similar to those done to the key cache
984 - to have us (Sanja) look at the additional constraints placed on
985 resizing, due to the page locking specific to this page cache.
986 So we disable it for now.
987*/
988#ifdef NOT_USED /* keep disabled until code is fixed see above !! */
989size_t resize_pagecache(PAGECACHE *pagecache,
990 size_t use_mem, uint division_limit,
991 uint age_threshold, uint changed_blocks_hash_size)
992{
993 size_t blocks;
994 struct st_my_thread_var *thread;
995 WQUEUE *wqueue;
996 DBUG_ENTER("resize_pagecache");
997
998 if (!pagecache->inited)
999 DBUG_RETURN(pagecache->disk_blocks);
1000
1001 if(use_mem == pagecache->mem_size)
1002 {
1003 change_pagecache_param(pagecache, division_limit, age_threshold);
1004 DBUG_RETURN(pagecache->disk_blocks);
1005 }
1006
1007 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
1008
1009 wqueue= &pagecache->resize_queue;
1010 thread= my_thread_var;
1011 wqueue_link_into_queue(wqueue, thread);
1012
1013 while (wqueue->last_thread->next != thread)
1014 {
1015 pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
1016 }
1017
1018 pagecache->resize_in_flush= 1;
1019 if (flush_all_key_blocks(pagecache))
1020 {
1021 /* TODO: if this happens, we should write a warning in the log file ! */
1022 pagecache->resize_in_flush= 0;
1023 blocks= 0;
1024 pagecache->can_be_used= 0;
1025 goto finish;
1026 }
1027 pagecache->resize_in_flush= 0;
1028 pagecache->can_be_used= 0;
1029 while (pagecache->cnt_for_resize_op)
1030 {
1031 DBUG_PRINT("wait", ("suspend thread %s %ld", thread->name, thread->id));
1032 pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
1033 }
1034
1035 end_pagecache(pagecache, 0); /* Don't free mutex */
1036 /* The following will work even if use_mem is 0 */
1037 blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
1038 division_limit, age_threshold, changed_blocks_hash_size,
1039 pagecache->readwrite_flags);
1040
1041finish:
1042 wqueue_unlink_from_queue(wqueue, thread);
1043 /* Signal for the next resize request to proceeed if any */
1044 if (wqueue->last_thread)
1045 {
1046 DBUG_PRINT("signal",
1047 ("thread %s %ld", wqueue->last_thread->next->name,
1048 wqueue->last_thread->next->id));
1049 pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
1050 }
1051 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
1052 DBUG_RETURN(blocks);
1053}
1054#endif /* 0 */
1055
1056
1057/*
1058 Increment counter blocking resize key cache operation
1059*/
1060static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
1061{
1062 mysql_mutex_assert_owner(&pagecache->cache_lock);
1063 pagecache->cnt_for_resize_op++;
1064}
1065
1066
1067/*
1068 Decrement counter blocking resize key cache operation;
1069 Signal the operation to proceed when counter becomes equal zero
1070*/
1071
1072static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
1073{
1074 struct st_my_thread_var *last_thread;
1075 mysql_mutex_assert_owner(&pagecache->cache_lock);
1076 if (!--pagecache->cnt_for_resize_op &&
1077 (last_thread= pagecache->resize_queue.last_thread))
1078 {
1079 DBUG_PRINT("signal",
1080 ("thread %s %ld", last_thread->next->name,
1081 (ulong) last_thread->next->id));
1082 pagecache_pthread_cond_signal(&last_thread->next->suspend);
1083 }
1084}
1085
1086/*
1087 Change the page cache parameters
1088
1089 SYNOPSIS
1090 change_pagecache_param()
1091 pagecache pointer to a page cache data structure
1092 division_limit new division limit (if not zero)
1093 age_threshold new age threshold (if not zero)
1094
1095 RETURN VALUE
1096 none
1097
1098 NOTES.
1099 Presently the function resets the key cache parameters
1100 concerning midpoint insertion strategy - division_limit and
1101 age_threshold.
1102*/
1103
1104void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
1105 uint age_threshold)
1106{
1107 DBUG_ENTER("change_pagecache_param");
1108
1109 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
1110 if (division_limit)
1111 pagecache->min_warm_blocks= (pagecache->disk_blocks *
1112 division_limit / 100 + 1);
1113 if (age_threshold)
1114 pagecache->age_threshold= (pagecache->disk_blocks *
1115 age_threshold / 100);
1116 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
1117 DBUG_VOID_RETURN;
1118}
1119
1120
1121/*
1122 Check that pagecache was used and cleaned up properly.
1123*/
1124
1125#ifndef DBUG_OFF
1126void check_pagecache_is_cleaned_up(PAGECACHE *pagecache)
1127{
1128 DBUG_ENTER("check_pagecache_is_cleaned_up");
1129 /*
1130 Ensure we called inc_counter_for_resize_op and dec_counter_for_resize_op
1131 the same number of times. (If not, a resize() could never happen.
1132 */
1133 DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
1134
1135 if (pagecache->disk_blocks > 0)
1136 {
1137 if (pagecache->block_mem)
1138 {
1139 uint i;
1140 for (i=0 ; i < pagecache->blocks_used ; i++)
1141 {
1142 DBUG_ASSERT(pagecache->block_root[i].status == 0);
1143 DBUG_ASSERT(pagecache->block_root[i].type == PAGECACHE_EMPTY_PAGE);
1144 }
1145 }
1146 }
1147 DBUG_VOID_RETURN;
1148}
1149#endif
1150
1151
1152/*
1153 Removes page cache from memory. Does NOT flush pages to disk.
1154
1155 SYNOPSIS
1156 end_pagecache()
1157 pagecache page cache handle
1158 cleanup Complete free (Free also mutex for key cache)
1159
1160 RETURN VALUE
1161 none
1162*/
1163
1164void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
1165{
1166 DBUG_ENTER("end_pagecache");
1167 DBUG_PRINT("enter", ("key_cache: %p", pagecache));
1168
1169 if (!pagecache->inited)
1170 DBUG_VOID_RETURN;
1171
1172 if (pagecache->disk_blocks > 0)
1173 {
1174#ifndef DBUG_OFF
1175 check_pagecache_is_cleaned_up(pagecache);
1176#endif
1177
1178 if (pagecache->block_mem)
1179 {
1180 my_large_free(pagecache->block_mem);
1181 pagecache->block_mem= NULL;
1182 my_free(pagecache->block_root);
1183 pagecache->block_root= NULL;
1184 }
1185 pagecache->disk_blocks= -1;
1186 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
1187 pagecache->blocks_changed= 0;
1188 }
1189
1190 DBUG_PRINT("status", ("used: %zu changed: %zu w_requests: %llu "
1191 "writes: %llu r_requests: %llu reads: %llu",
1192 pagecache->blocks_used,
1193 pagecache->global_blocks_changed,
1194 pagecache->global_cache_w_requests,
1195 pagecache->global_cache_write,
1196 pagecache->global_cache_r_requests,
1197 pagecache->global_cache_read));
1198
1199 if (cleanup)
1200 {
1201 my_hash_free(&pagecache->files_in_flush);
1202 mysql_mutex_destroy(&pagecache->cache_lock);
1203 pagecache->inited= pagecache->can_be_used= 0;
1204 PAGECACHE_DEBUG_CLOSE;
1205 }
1206 DBUG_VOID_RETURN;
1207} /* end_pagecache */
1208
1209
1210/*
1211 Unlink a block from the chain of dirty/clean blocks
1212*/
1213
1214static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block)
1215{
1216 if (block->next_changed)
1217 block->next_changed->prev_changed= block->prev_changed;
1218 *block->prev_changed= block->next_changed;
1219}
1220
1221
1222/*
1223 Link a block into the chain of dirty/clean blocks
1224*/
1225
1226static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
1227 PAGECACHE_BLOCK_LINK **phead)
1228{
1229 block->prev_changed= phead;
1230 if ((block->next_changed= *phead))
1231 (*phead)->prev_changed= &block->next_changed;
1232 *phead= block;
1233}
1234
1235
1236/*
1237 Unlink a block from the chain of dirty/clean blocks, if it's asked for,
1238 and link it to the chain of clean blocks for the specified file
1239*/
1240
1241static void link_to_file_list(PAGECACHE *pagecache,
1242 PAGECACHE_BLOCK_LINK *block,
1243 PAGECACHE_FILE *file, my_bool unlink_flag)
1244{
1245 if (unlink_flag)
1246 unlink_changed(block);
1247 link_changed(block, &pagecache->file_blocks[FILE_HASH(*file, pagecache)]);
1248 if (block->status & PCBLOCK_CHANGED)
1249 {
1250 block->status&= ~(PCBLOCK_CHANGED | PCBLOCK_DEL_WRITE);
1251 block->rec_lsn= LSN_MAX;
1252 pagecache->blocks_changed--;
1253 pagecache->global_blocks_changed--;
1254 }
1255}
1256
1257
1258/*
1259 Unlink a block from the chain of clean blocks for the specified
1260 file and link it to the chain of dirty blocks for this file
1261*/
1262
1263static inline void link_to_changed_list(PAGECACHE *pagecache,
1264 PAGECACHE_BLOCK_LINK *block)
1265{
1266 unlink_changed(block);
1267 link_changed(block,
1268 &pagecache->changed_blocks[FILE_HASH(block->hash_link->file, pagecache)]);
1269 block->status|=PCBLOCK_CHANGED;
1270 pagecache->blocks_changed++;
1271 pagecache->global_blocks_changed++;
1272}
1273
1274
1275/*
1276 Link a block to the LRU chain at the beginning or at the end of
1277 one of two parts.
1278
1279 SYNOPSIS
1280 link_block()
1281 pagecache pointer to a page cache data structure
1282 block pointer to the block to link to the LRU chain
1283 hot <-> to link the block into the hot subchain
1284 at_end <-> to link the block at the end of the subchain
1285
1286 RETURN VALUE
1287 none
1288
1289 NOTES.
1290 The LRU chain is represented by a circular list of block structures.
1291 The list is double-linked of the type (**prev,*next) type.
1292 The LRU chain is divided into two parts - hot and warm.
1293 There are two pointers to access the last blocks of these two
1294 parts. The beginning of the warm part follows right after the
1295 end of the hot part.
1296 Only blocks of the warm part can be used for replacement.
1297 The first block from the beginning of this subchain is always
1298 taken for eviction (pagecache->last_used->next)
1299
1300 LRU chain: +------+ H O T +------+
1301 +----| end |----...<----| beg |----+
1302 | +------+last +------+ |
1303 v<-link in latest hot (new end) |
1304 | link in latest warm (new end)->^
1305 | +------+ W A R M +------+ |
1306 +----| beg |---->...----| end |----+
1307 +------+ +------+ins
1308 first for eviction
1309*/
1310
1311static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
1312 my_bool hot, my_bool at_end)
1313{
1314 PAGECACHE_BLOCK_LINK *ins;
1315 PAGECACHE_BLOCK_LINK **ptr_ins;
1316 DBUG_ENTER("link_block");
1317
1318 PCBLOCK_INFO(block);
1319 KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
1320 if (!hot && pagecache->waiting_for_block.last_thread)
1321 {
1322 /* Signal that in the LRU warm sub-chain an available block has appeared */
1323 struct st_my_thread_var *last_thread=
1324 pagecache->waiting_for_block.last_thread;
1325 struct st_my_thread_var *first_thread= last_thread->next;
1326 struct st_my_thread_var *next_thread= first_thread;
1327 PAGECACHE_HASH_LINK *hash_link=
1328 (PAGECACHE_HASH_LINK *) first_thread->keycache_link;
1329 struct st_my_thread_var *thread;
1330
1331 DBUG_ASSERT(block->requests + block->wlocks + block->rlocks +
1332 block->pins == 0);
1333 DBUG_ASSERT(block->next_used == NULL);
1334
1335 do
1336 {
1337 thread= next_thread;
1338 next_thread= thread->next;
1339 /*
1340 We notify about the event all threads that ask
1341 for the same page as the first thread in the queue
1342 */
1343 if ((PAGECACHE_HASH_LINK *) thread->keycache_link == hash_link)
1344 {
1345 DBUG_PRINT("signal", ("thread: %s %ld", thread->name,
1346 (ulong) thread->id));
1347 pagecache_pthread_cond_signal(&thread->suspend);
1348 wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
1349 block->requests++;
1350 }
1351 }
1352 while (thread != last_thread);
1353 hash_link->block= block;
1354 /* Ensure that no other thread tries to use this block */
1355 block->status|= PCBLOCK_REASSIGNED;
1356
1357 DBUG_PRINT("signal", ("after signal"));
1358#if defined(PAGECACHE_DEBUG)
1359 KEYCACHE_DBUG_PRINT("link_block",
1360 ("linked,unlinked block: %u status: %x #requests: %u #available: %u",
1361 PCBLOCK_NUMBER(pagecache, block), block->status,
1362 block->requests, pagecache->blocks_available));
1363#endif
1364 DBUG_VOID_RETURN;
1365 }
1366 ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
1367 ins= *ptr_ins;
1368 if (ins)
1369 {
1370 ins->next_used->prev_used= &block->next_used;
1371 block->next_used= ins->next_used;
1372 block->prev_used= &ins->next_used;
1373 ins->next_used= block;
1374 if (at_end)
1375 *ptr_ins= block;
1376 }
1377 else
1378 {
1379 /* The LRU chain is empty */
1380 pagecache->used_last= pagecache->used_ins= block->next_used= block;
1381 block->prev_used= &block->next_used;
1382 }
1383 KEYCACHE_THREAD_TRACE("link_block");
1384#if defined(PAGECACHE_DEBUG)
1385 pagecache->blocks_available++;
1386 KEYCACHE_DBUG_PRINT("link_block",
1387 ("linked block: %u:%1u status: %x #requests: %u #available: %u",
1388 PCBLOCK_NUMBER(pagecache, block), at_end, block->status,
1389 block->requests, pagecache->blocks_available));
1390 KEYCACHE_DBUG_ASSERT(pagecache->blocks_available <=
1391 pagecache->blocks_used);
1392#endif
1393 DBUG_VOID_RETURN;
1394}
1395
1396
1397/*
1398 Unlink a block from the LRU chain
1399
1400 SYNOPSIS
1401 unlink_block()
1402 pagecache pointer to a page cache data structure
1403 block pointer to the block to unlink from the LRU chain
1404
1405 RETURN VALUE
1406 none
1407
1408 NOTES.
1409 See NOTES for link_block
1410*/
1411
1412static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
1413{
1414 DBUG_ENTER("unlink_block");
1415 DBUG_PRINT("pagecache", ("unlink %p", block));
1416 DBUG_ASSERT(block->next_used != NULL);
1417 if (block->next_used == block)
1418 {
1419 /* The list contains only one member */
1420 pagecache->used_last= pagecache->used_ins= NULL;
1421 }
1422 else
1423 {
1424 block->next_used->prev_used= block->prev_used;
1425 *block->prev_used= block->next_used;
1426 if (pagecache->used_last == block)
1427 pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
1428 next_used, block->prev_used);
1429 if (pagecache->used_ins == block)
1430 pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
1431 next_used, block->prev_used);
1432 }
1433 block->next_used= NULL;
1434
1435 KEYCACHE_THREAD_TRACE("unlink_block");
1436#if defined(PAGECACHE_DEBUG)
1437 KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
1438 pagecache->blocks_available--;
1439 KEYCACHE_DBUG_PRINT("pagecache",
1440 ("unlinked block: %p (%u) status: %x #requests: %u #available: %u",
1441 block, PCBLOCK_NUMBER(pagecache, block),
1442 block->status,
1443 block->requests, pagecache->blocks_available));
1444 PCBLOCK_INFO(block);
1445#endif
1446 DBUG_VOID_RETURN;
1447}
1448
1449
1450/*
1451 Register requests for a block
1452
1453 SYNOPSIS
1454 reg_requests()
1455 pagecache this page cache reference
1456 block the block we request reference
1457 count how many requests we register (it is 1 everywhere)
1458
1459 NOTE
1460 Registration of request means we are going to use this block so we exclude
1461 it from the LRU if it is first request
1462*/
1463static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
1464 int count)
1465{
1466 DBUG_ENTER("reg_requests");
1467 PCBLOCK_INFO(block);
1468 if (! block->requests)
1469 /* First request for the block unlinks it */
1470 unlink_block(pagecache, block);
1471 block->requests+= count;
1472 DBUG_VOID_RETURN;
1473}
1474
1475
1476/*
1477 Unregister request for a block
1478 linking it to the LRU chain if it's the last request
1479
1480 SYNOPSIS
1481 unreg_request()
1482 pagecache pointer to a page cache data structure
1483 block pointer to the block to link to the LRU chain
1484 at_end <-> to link the block at the end of the LRU chain
1485
1486 RETURN VALUE
1487 none
1488
1489 NOTES.
1490 Every linking to the LRU chain decrements by one a special block
1491 counter (if it's positive). If the at_end parameter is TRUE the block is
1492 added either at the end of warm sub-chain or at the end of hot sub-chain.
1493 It is added to the hot subchain if its counter is zero and number of
1494 blocks in warm sub-chain is not less than some low limit (determined by
1495 the division_limit parameter). Otherwise the block is added to the warm
1496 sub-chain. If the at_end parameter is FALSE the block is always added
1497 at beginning of the warm sub-chain.
1498 Thus a warm block can be promoted to the hot sub-chain when its counter
1499 becomes zero for the first time.
1500 At the same time the block at the very beginning of the hot subchain
1501 might be moved to the beginning of the warm subchain if it stays untouched
1502 for a too long time (this time is determined by parameter age_threshold).
1503*/
1504
1505static void unreg_request(PAGECACHE *pagecache,
1506 PAGECACHE_BLOCK_LINK *block, int at_end)
1507{
1508 DBUG_ENTER("unreg_request");
1509 DBUG_PRINT("enter", ("block %p (%u) status: %x requests: %u",
1510 block, PCBLOCK_NUMBER(pagecache, block),
1511 block->status, block->requests));
1512 PCBLOCK_INFO(block);
1513 DBUG_ASSERT(block->requests > 0);
1514 if (! --block->requests)
1515 {
1516 my_bool hot;
1517 if (block->hits_left)
1518 block->hits_left--;
1519 hot= !block->hits_left && at_end &&
1520 pagecache->warm_blocks > pagecache->min_warm_blocks;
1521 if (hot)
1522 {
1523 if (block->temperature == PCBLOCK_WARM)
1524 pagecache->warm_blocks--;
1525 block->temperature= PCBLOCK_HOT;
1526 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %zu",
1527 pagecache->warm_blocks));
1528 }
1529 link_block(pagecache, block, hot, (my_bool)at_end);
1530 block->last_hit_time= pagecache->time;
1531 pagecache->time++;
1532
1533 block= pagecache->used_ins;
1534 /* Check if we should link a hot block to the warm block */
1535 if (block && pagecache->time - block->last_hit_time >
1536 pagecache->age_threshold)
1537 {
1538 unlink_block(pagecache, block);
1539 link_block(pagecache, block, 0, 0);
1540 if (block->temperature != PCBLOCK_WARM)
1541 {
1542 pagecache->warm_blocks++;
1543 block->temperature= PCBLOCK_WARM;
1544 }
1545 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %zu",
1546 pagecache->warm_blocks));
1547 }
1548 }
1549 DBUG_VOID_RETURN;
1550}
1551
1552/*
1553 Remove a reader of the page in block
1554*/
1555
1556static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
1557{
1558 DBUG_ENTER("remove_reader");
1559 PCBLOCK_INFO(block);
1560 DBUG_ASSERT(block->hash_link->requests > 0);
1561 if (! --block->hash_link->requests && block->condvar)
1562 pagecache_pthread_cond_signal(block->condvar);
1563 DBUG_VOID_RETURN;
1564}
1565
1566
1567/*
1568 Wait until the last reader of the page in block
1569 signals on its termination
1570*/
1571
1572static inline void wait_for_readers(PAGECACHE *pagecache
1573 __attribute__((unused)),
1574 PAGECACHE_BLOCK_LINK *block
1575 __attribute__((unused)))
1576{
1577 struct st_my_thread_var *thread= my_thread_var;
1578 DBUG_ASSERT(block->condvar == NULL);
1579 while (block->hash_link->requests)
1580 {
1581 DBUG_ENTER("wait_for_readers");
1582 DBUG_PRINT("wait",
1583 ("suspend thread: %s %ld block: %u",
1584 thread->name, (ulong) thread->id,
1585 PCBLOCK_NUMBER(pagecache, block)));
1586 block->condvar= &thread->suspend;
1587 pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
1588 block->condvar= NULL;
1589 DBUG_VOID_RETURN;
1590 }
1591}
1592
1593
1594/*
1595 Wait until the flush of the page is done.
1596*/
1597
1598static void wait_for_flush(PAGECACHE *pagecache
1599 __attribute__((unused)),
1600 PAGECACHE_BLOCK_LINK *block
1601 __attribute__((unused)))
1602{
1603 struct st_my_thread_var *thread= my_thread_var;
1604 DBUG_ENTER("wait_for_flush");
1605 wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
1606 do
1607 {
1608 DBUG_PRINT("wait",
1609 ("suspend thread %s %ld", thread->name, (ulong) thread->id));
1610 pagecache_pthread_cond_wait(&thread->suspend,
1611 &pagecache->cache_lock);
1612 }
1613 while(thread->next);
1614 DBUG_VOID_RETURN;
1615}
1616
1617
1618/*
1619 Add a hash link to a bucket in the hash_table
1620*/
1621
1622static inline void link_hash(PAGECACHE_HASH_LINK **start,
1623 PAGECACHE_HASH_LINK *hash_link)
1624{
1625 if (*start)
1626 (*start)->prev= &hash_link->next;
1627 hash_link->next= *start;
1628 hash_link->prev= start;
1629 *start= hash_link;
1630}
1631
1632
1633/*
1634 Remove a hash link from the hash table
1635*/
1636
1637static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
1638{
1639 DBUG_ENTER("unlink_hash");
1640 DBUG_PRINT("enter", ("hash_link: %p fd: %u pos: %lu requests: %u",
1641 hash_link, (uint) hash_link->file.file,
1642 (ulong) hash_link->pageno,
1643 hash_link->requests));
1644 DBUG_ASSERT(hash_link->requests == 0);
1645 DBUG_ASSERT(!hash_link->block || hash_link->block->pins == 0);
1646
1647 if ((*hash_link->prev= hash_link->next))
1648 hash_link->next->prev= hash_link->prev;
1649 hash_link->block= NULL;
1650 if (pagecache->waiting_for_hash_link.last_thread)
1651 {
1652 /* Signal that a free hash link has appeared */
1653 struct st_my_thread_var *last_thread=
1654 pagecache->waiting_for_hash_link.last_thread;
1655 struct st_my_thread_var *first_thread= last_thread->next;
1656 struct st_my_thread_var *next_thread= first_thread;
1657 PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->keycache_link);
1658 struct st_my_thread_var *thread;
1659
1660 hash_link->file= first_page->file;
1661 DBUG_ASSERT(first_page->pageno < ((1ULL) << 40));
1662 hash_link->pageno= first_page->pageno;
1663 do
1664 {
1665 PAGECACHE_PAGE *page;
1666 thread= next_thread;
1667 page= (PAGECACHE_PAGE *) thread->keycache_link;
1668 next_thread= thread->next;
1669 /*
1670 We notify about the event all threads that ask
1671 for the same page as the first thread in the queue
1672 */
1673 if (page->file.file == hash_link->file.file &&
1674 page->pageno == hash_link->pageno)
1675 {
1676 DBUG_PRINT("signal", ("thread %s %ld", thread->name,
1677 (ulong) thread->id));
1678 pagecache_pthread_cond_signal(&thread->suspend);
1679 wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
1680 }
1681 }
1682 while (thread != last_thread);
1683
1684 /*
1685 Add this to the hash, so that the waiting threads can find it
1686 when they retry the call to get_hash_link(). This entry is special
1687 in that it has no associated block.
1688 */
1689 link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
1690 hash_link->file,
1691 hash_link->pageno)],
1692 hash_link);
1693 DBUG_VOID_RETURN;
1694 }
1695
1696 /* Add hash to free hash list */
1697 hash_link->next= pagecache->free_hash_list;
1698 pagecache->free_hash_list= hash_link;
1699 DBUG_VOID_RETURN;
1700}
1701
1702
1703/*
1704 Get the hash link for the page if it is in the cache (do not put the
1705 page in the cache if it is absent there)
1706
1707 SYNOPSIS
1708 get_present_hash_link()
1709 pagecache Pagecache reference
1710 file file ID
1711 pageno page number in the file
1712 start where to put pointer to found hash bucket (for
1713 direct referring it)
1714
1715 RETURN
1716 found hashlink pointer
1717*/
1718
1719static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
1720 PAGECACHE_FILE *file,
1721 pgcache_page_no_t pageno,
1722 PAGECACHE_HASH_LINK ***start)
1723{
1724 reg1 PAGECACHE_HASH_LINK *hash_link;
1725#if defined(PAGECACHE_DEBUG)
1726 int cnt;
1727#endif
1728 DBUG_ENTER("get_present_hash_link");
1729 DBUG_PRINT("enter", ("fd: %u pos: %lu", (uint) file->file, (ulong) pageno));
1730
1731 /*
1732 Find the bucket in the hash table for the pair (file, pageno);
1733 start contains the head of the bucket list,
1734 hash_link points to the first member of the list
1735 */
1736 hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache,
1737 *file, pageno)]);
1738#if defined(PAGECACHE_DEBUG)
1739 cnt= 0;
1740#endif
1741 /* Look for an element for the pair (file, pageno) in the bucket chain */
1742 while (hash_link &&
1743 (hash_link->pageno != pageno ||
1744 hash_link->file.file != file->file))
1745 {
1746 hash_link= hash_link->next;
1747#if defined(PAGECACHE_DEBUG)
1748 cnt++;
1749 if (! (cnt <= pagecache->hash_links_used))
1750 {
1751 int i;
1752 for (i=0, hash_link= **start ;
1753 i < cnt ; i++, hash_link= hash_link->next)
1754 {
1755 KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
1756 (uint) hash_link->file.file, (ulong) hash_link->pageno));
1757 }
1758 }
1759 KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
1760#endif
1761 }
1762 if (hash_link)
1763 {
1764 DBUG_PRINT("exit", ("hash_link: %p", hash_link));
1765 /* Register the request for the page */
1766 hash_link->requests++;
1767 }
1768 /*
1769 As soon as the caller will release the page cache's lock, "hash_link"
1770 will be potentially obsolete (unusable) information.
1771 */
1772 DBUG_RETURN(hash_link);
1773}
1774
1775
1776/*
1777 Get the hash link for a page
1778*/
1779
1780static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
1781 PAGECACHE_FILE *file,
1782 pgcache_page_no_t pageno)
1783{
1784 reg1 PAGECACHE_HASH_LINK *hash_link;
1785 PAGECACHE_HASH_LINK **start;
1786 DBUG_ENTER("get_hash_link");
1787
1788restart:
1789 /* try to find the page in the cache */
1790 hash_link= get_present_hash_link(pagecache, file, pageno,
1791 &start);
1792 if (!hash_link)
1793 {
1794 /* There is no hash link in the hash table for the pair (file, pageno) */
1795 if (pagecache->free_hash_list)
1796 {
1797 DBUG_PRINT("info", ("free_hash_list: %p free_hash_list->next: %p",
1798 pagecache->free_hash_list,
1799 pagecache->free_hash_list->next));
1800 hash_link= pagecache->free_hash_list;
1801 pagecache->free_hash_list= hash_link->next;
1802 }
1803 else if (pagecache->hash_links_used < pagecache->hash_links)
1804 {
1805 hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++];
1806 }
1807 else
1808 {
1809 /* Wait for a free hash link */
1810 struct st_my_thread_var *thread= my_thread_var;
1811 PAGECACHE_PAGE page;
1812 page.file= *file;
1813 page.pageno= pageno;
1814 thread->keycache_link= (void *) &page;
1815 wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
1816 DBUG_PRINT("wait",
1817 ("suspend thread %s %ld", thread->name, (ulong) thread->id));
1818 pagecache_pthread_cond_wait(&thread->suspend,
1819 &pagecache->cache_lock);
1820 thread->keycache_link= NULL;
1821 DBUG_PRINT("thread", ("restarting..."));
1822 goto restart;
1823 }
1824 hash_link->file= *file;
1825 DBUG_ASSERT(pageno < ((1ULL) << 40));
1826 hash_link->pageno= pageno;
1827 link_hash(start, hash_link);
1828 /* Register the request for the page */
1829 hash_link->requests++;
1830 DBUG_ASSERT(hash_link->block == 0);
1831 DBUG_ASSERT(hash_link->requests == 1);
1832 }
1833 else
1834 {
1835 /*
1836 We have to copy the flush_log callback, as it may change if the table
1837 goes from non_transactional to transactional during recovery
1838 */
1839 hash_link->file.flush_log_callback= file->flush_log_callback;
1840 }
1841 DBUG_PRINT("exit", ("hash_link: %p block: %p", hash_link,
1842 hash_link->block));
1843 DBUG_RETURN(hash_link);
1844}
1845
1846
1847/*
1848 Get a block for the file page requested by a pagecache read/write operation;
1849 If the page is not in the cache return a free block, if there is none
1850 return the lru block after saving its buffer if the page is dirty.
1851
1852 SYNOPSIS
1853
1854 find_block()
1855 pagecache pointer to a page cache data structure
1856 file handler for the file to read page from
1857 pageno number of the page in the file
1858 init_hits_left how initialize the block counter for the page
1859 wrmode <-> get for writing
1860 block_is_copied 1 if block will be copied from page cache under
1861 the pagelock mutex.
1862 reg_req Register request to the page. Normally all pages
1863 should be registered; The only time it's ok to
1864 not register a page is when the page is already
1865 pinned (and thus registered) by the same thread.
1866 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1867
1868 RETURN VALUE
1869 Pointer to the found block if successful, 0 - otherwise
1870
1871 NOTES.
1872 For the page from file positioned at pageno the function checks whether
1873 the page is in the key cache specified by the first parameter.
1874 If this is the case it immediately returns the block.
1875 If not, the function first chooses a block for this page. If there is
1876 no not used blocks in the key cache yet, the function takes the block
1877 at the very beginning of the warm sub-chain. It saves the page in that
1878 block if it's dirty before returning the pointer to it.
1879 The function returns in the page_st parameter the following values:
1880 PAGE_READ - if page already in the block,
1881 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1882 WAIT_TO_BE_READ - if it is to be read by another thread
1883 If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
1884 It might happen that there are no blocks in LRU chain (in warm part) -
1885 all blocks are unlinked for some read/write operations. Then the function
1886 waits until first of this operations links any block back.
1887*/
1888
1889static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
1890 PAGECACHE_FILE *file,
1891 pgcache_page_no_t pageno,
1892 int init_hits_left,
1893 my_bool wrmode,
1894 my_bool block_is_copied,
1895 my_bool reg_req,
1896 int *page_st)
1897{
1898 PAGECACHE_HASH_LINK *hash_link;
1899 PAGECACHE_BLOCK_LINK *block;
1900 int error= 0;
1901 int page_status;
1902 DBUG_ENTER("find_block");
1903 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d block_is_copied: %d",
1904 file->file, (ulong) pageno, wrmode, block_is_copied));
1905 KEYCACHE_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
1906 file->file, (ulong) pageno,
1907 wrmode));
1908#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1909 DBUG_EXECUTE("check_pagecache",
1910 test_key_cache(pagecache, "start of find_block", 0););
1911#endif
1912
1913restart:
1914 /* Find the hash link for the requested page (file, pageno) */
1915 hash_link= get_hash_link(pagecache, file, pageno);
1916
1917 page_status= -1;
1918 if ((block= hash_link->block) &&
1919 block->hash_link == hash_link && (block->status & PCBLOCK_READ))
1920 page_status= PAGE_READ;
1921
1922 if (wrmode && pagecache->resize_in_flush)
1923 {
1924 /* This is a write request during the flush phase of a resize operation */
1925
1926 if (page_status != PAGE_READ)
1927 {
1928 /* We don't need the page in the cache: we are going to write on disk */
1929 DBUG_ASSERT(hash_link->requests > 0);
1930 hash_link->requests--;
1931 unlink_hash(pagecache, hash_link);
1932 return 0;
1933 }
1934 if (!(block->status & PCBLOCK_IN_FLUSH))
1935 {
1936 DBUG_ASSERT(hash_link->requests > 0);
1937 hash_link->requests--;
1938 /*
1939 Remove block to invalidate the page in the block buffer
1940 as we are going to write directly on disk.
1941 Although we have an exclusive lock for the updated key part
1942 the control can be yielded by the current thread as we might
1943 have unfinished readers of other key parts in the block
1944 buffer. Still we are guaranteed not to have any readers
1945 of the key part we are writing into until the block is
1946 removed from the cache as we set the PCBLOCK_REASSIGNED
1947 flag (see the code below that handles reading requests).
1948 */
1949 free_block(pagecache, block, 0);
1950 return 0;
1951 }
1952 /* Wait until the page is flushed on disk */
1953 DBUG_ASSERT(hash_link->requests > 0);
1954 hash_link->requests--;
1955 wait_for_flush(pagecache, block);
1956
1957 /* Invalidate page in the block if it has not been done yet */
1958 DBUG_ASSERT(block->status); /* Should always be true */
1959 if (block->status)
1960 free_block(pagecache, block, 0);
1961 return 0;
1962 }
1963
1964 if (page_status == PAGE_READ &&
1965 (block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)))
1966 {
1967 /* This is a request for a page to be removed from cache */
1968
1969 KEYCACHE_DBUG_PRINT("find_block",
1970 ("request for old page in block: %u "
1971 "wrmode: %d block->status: %d",
1972 PCBLOCK_NUMBER(pagecache, block), wrmode,
1973 block->status));
1974 /*
1975 Only reading requests can proceed until the old dirty page is flushed,
1976 all others are to be suspended, then resubmitted
1977 */
1978 if (!wrmode && block_is_copied && !(block->status & PCBLOCK_REASSIGNED))
1979 {
1980 if (reg_req)
1981 reg_requests(pagecache, block, 1);
1982 }
1983 else
1984 {
1985 /*
1986 When we come here either PCBLOCK_REASSIGNED or PCBLOCK_IN_SWITCH are
1987 active. In both cases wqueue_release_queue() is called when the
1988 state changes.
1989 */
1990 DBUG_ASSERT(block->hash_link == hash_link);
1991 remove_reader(block);
1992 KEYCACHE_DBUG_PRINT("find_block",
1993 ("request waiting for old page to be saved"));
1994 {
1995 struct st_my_thread_var *thread= my_thread_var;
1996 /* Put the request into the queue of those waiting for the old page */
1997 wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
1998 /* Wait until the request can be resubmitted */
1999 do
2000 {
2001 DBUG_PRINT("wait",
2002 ("suspend thread %s %ld", thread->name,
2003 (ulong) thread->id));
2004 pagecache_pthread_cond_wait(&thread->suspend,
2005 &pagecache->cache_lock);
2006 }
2007 while(thread->next);
2008 }
2009 KEYCACHE_DBUG_PRINT("find_block",
2010 ("request for old page resubmitted"));
2011 DBUG_PRINT("info", ("restarting..."));
2012 /* Resubmit the request */
2013 goto restart;
2014 }
2015 }
2016 else
2017 {
2018 /* This is a request for a new page or for a page not to be removed */
2019 if (! block)
2020 {
2021 /* No block is assigned for the page yet */
2022 if (pagecache->blocks_unused)
2023 {
2024 if (pagecache->free_block_list)
2025 {
2026 /* There is a block in the free list. */
2027 block= pagecache->free_block_list;
2028 pagecache->free_block_list= block->next_used;
2029 block->next_used= NULL;
2030 }
2031 else
2032 {
2033 /* There are some never used blocks, take first of them */
2034 block= &pagecache->block_root[pagecache->blocks_used];
2035 block->buffer= ADD_TO_PTR(pagecache->block_mem,
2036 (pagecache->blocks_used*
2037 pagecache->block_size),
2038 uchar*);
2039 pagecache->blocks_used++;
2040 }
2041 pagecache->blocks_unused--;
2042 DBUG_ASSERT(block->wlocks == 0);
2043 DBUG_ASSERT(block->rlocks == 0);
2044 DBUG_ASSERT(block->rlocks_queue == 0);
2045 DBUG_ASSERT(block->pins == 0);
2046 block->status= 0;
2047#ifdef DBUG_ASSERT_EXISTS
2048 block->type= PAGECACHE_EMPTY_PAGE;
2049#endif
2050 DBUG_ASSERT(reg_req);
2051 block->requests= 1;
2052 block->temperature= PCBLOCK_COLD;
2053 block->hits_left= init_hits_left;
2054 block->last_hit_time= 0;
2055 block->rec_lsn= LSN_MAX;
2056 link_to_file_list(pagecache, block, file, 0);
2057 block->hash_link= hash_link;
2058 hash_link->block= block;
2059 page_status= PAGE_TO_BE_READ;
2060 DBUG_PRINT("info", ("page to be read set for page %p (%u)",
2061 block, PCBLOCK_NUMBER(pagecache, block)));
2062 KEYCACHE_PRINT("find_block",
2063 ("got free or never used block %u",
2064 PCBLOCK_NUMBER(pagecache, block)));
2065 }
2066 else
2067 {
2068 /* There are no never used blocks, use a block from the LRU chain */
2069
2070 /*
2071 Ensure that we are going to register the block.
2072 (This should be true as a new block could not have been
2073 pinned by caller).
2074 */
2075 DBUG_ASSERT(reg_req);
2076
2077 if (! pagecache->used_last)
2078 {
2079 /*
2080 Wait until a new block is added to the LRU chain;
2081 several threads might wait here for the same page,
2082 all of them must get the same block.
2083
2084 The block is given to us by the next thread executing
2085 link_block().
2086 */
2087
2088 struct st_my_thread_var *thread= my_thread_var;
2089 thread->keycache_link= (void *) hash_link;
2090 wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
2091 do
2092 {
2093 DBUG_PRINT("wait",
2094 ("suspend thread %s %ld", thread->name,
2095 (ulong) thread->id));
2096 pagecache_pthread_cond_wait(&thread->suspend,
2097 &pagecache->cache_lock);
2098 }
2099 while (thread->next);
2100 thread->keycache_link= NULL;
2101 block= hash_link->block;
2102 /* Ensure that the block is registered */
2103 DBUG_ASSERT(block->requests >= 1);
2104 }
2105 else
2106 {
2107 /*
2108 Take the first block from the LRU chain
2109 unlinking it from the chain
2110 */
2111 block= pagecache->used_last->next_used;
2112 if (reg_req)
2113 reg_requests(pagecache, block, 1);
2114 hash_link->block= block;
2115 DBUG_ASSERT(block->requests == 1);
2116 }
2117
2118 PCBLOCK_INFO(block);
2119
2120 DBUG_ASSERT(block->hash_link == hash_link ||
2121 !(block->status & PCBLOCK_IN_SWITCH));
2122
2123 if (block->hash_link != hash_link &&
2124 ! (block->status & PCBLOCK_IN_SWITCH) )
2125 {
2126 /* If another thread is flushing the block, wait for it. */
2127 if (block->status & PCBLOCK_IN_FLUSH)
2128 wait_for_flush(pagecache, block);
2129
2130 /* this is a primary request for a new page */
2131 DBUG_ASSERT(block->wlocks == 0);
2132 DBUG_ASSERT(block->rlocks == 0);
2133 DBUG_ASSERT(block->rlocks_queue == 0);
2134 DBUG_ASSERT(block->pins == 0);
2135 block->status|= PCBLOCK_IN_SWITCH;
2136
2137 KEYCACHE_DBUG_PRINT("find_block",
2138 ("got block %u for new page",
2139 PCBLOCK_NUMBER(pagecache, block)));
2140
2141 if (block->status & PCBLOCK_CHANGED)
2142 {
2143 /* The block contains a dirty page - push it out of the cache */
2144
2145 KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
2146
2147 /*
2148 The call is thread safe because only the current
2149 thread might change the block->hash_link value
2150 */
2151 DBUG_ASSERT(block->pins == 0);
2152 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2153 error= pagecache_fwrite(pagecache,
2154 &block->hash_link->file,
2155 block->buffer,
2156 block->hash_link->pageno,
2157 block->type,
2158 pagecache->readwrite_flags);
2159 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2160 pagecache->global_cache_write++;
2161 }
2162
2163 block->status|= PCBLOCK_REASSIGNED;
2164 if (block->hash_link)
2165 {
2166 /*
2167 Wait until all pending read requests
2168 for this page are executed
2169 (we could have avoided this waiting, if we had read
2170 a page in the cache in a sweep, without yielding control)
2171 */
2172 wait_for_readers(pagecache, block);
2173
2174 /* Remove the hash link for this page from the hash table */
2175 unlink_hash(pagecache, block->hash_link);
2176
2177 /* All pending requests for this page must be resubmitted */
2178 if (block->wqueue[COND_FOR_SAVED].last_thread)
2179 wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
2180 }
2181 link_to_file_list(pagecache, block, file,
2182 (my_bool)(block->hash_link ? 1 : 0));
2183
2184 block->hash_link= hash_link;
2185 PCBLOCK_INFO(block);
2186 block->hits_left= init_hits_left;
2187 block->last_hit_time= 0;
2188 block->status= error ? PCBLOCK_ERROR : 0;
2189 block->error= error ? (int16) my_errno : 0;
2190#ifdef DBUG_ASSERT_EXISTS
2191 block->type= PAGECACHE_EMPTY_PAGE;
2192 if (error)
2193 my_debug_put_break_here();
2194#endif
2195 page_status= PAGE_TO_BE_READ;
2196 DBUG_PRINT("info", ("page to be read set for page %p", block));
2197
2198 KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2199 KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2200 }
2201 else
2202 {
2203 /* This is for secondary requests for a new page only */
2204 KEYCACHE_DBUG_PRINT("find_block",
2205 ("block->hash_link: %p hash_link: %p "
2206 "block->status: %u", block->hash_link,
2207 hash_link, block->status ));
2208 page_status= (((block->hash_link == hash_link) &&
2209 (block->status & PCBLOCK_READ)) ?
2210 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2211 }
2212 }
2213 }
2214 else
2215 {
2216 /*
2217 The block was found in the cache. It's either a already read
2218 block or a block waiting to be read by another thread.
2219 */
2220 if (reg_req)
2221 reg_requests(pagecache, block, 1);
2222 KEYCACHE_DBUG_PRINT("find_block",
2223 ("block->hash_link: %p hash_link: %p "
2224 "block->status: %u", block->hash_link,
2225 hash_link, block->status ));
2226 /*
2227 block->hash_link != hash_link can only happen when
2228 the block is in PCBLOCK_IN_SWITCH above (is flushed out
2229 to be replaced by another block). The SWITCH code will change
2230 block->hash_link to point to hash_link.
2231 */
2232 KEYCACHE_DBUG_ASSERT(block->hash_link == hash_link ||
2233 block->status & PCBLOCK_IN_SWITCH);
2234 page_status= (((block->hash_link == hash_link) &&
2235 (block->status & PCBLOCK_READ)) ?
2236 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2237 }
2238 }
2239
2240 KEYCACHE_DBUG_ASSERT(page_status != -1);
2241 *page_st= page_status;
2242 DBUG_PRINT("info",
2243 ("block: %p fd: %u pos: %lu block->status: %u page_status: %u",
2244 block, (uint) file->file,
2245 (ulong) pageno, block->status, (uint) page_status));
2246 KEYCACHE_PRINT("find_block",
2247 ("block: %p fd: %d pos: %lu block->status: %u page_status: %d",
2248 block, file->file, (ulong) pageno, block->status,
2249 page_status));
2250
2251#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2252 DBUG_EXECUTE("check_pagecache",
2253 test_key_cache(pagecache, "end of find_block",0););
2254#endif
2255 KEYCACHE_THREAD_TRACE("find_block:end");
2256 DBUG_RETURN(block);
2257}
2258
2259
2260static void add_pin(PAGECACHE_BLOCK_LINK *block)
2261{
2262 DBUG_ENTER("add_pin");
2263 DBUG_PRINT("enter", ("block: %p pins: %u", block, block->pins));
2264 PCBLOCK_INFO(block);
2265 block->pins++;
2266#ifndef DBUG_OFF
2267 {
2268 PAGECACHE_PIN_INFO *info=
2269 (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0));
2270 info->thread= my_thread_var;
2271 info_link(&block->pin_list, info);
2272 }
2273#endif
2274 DBUG_VOID_RETURN;
2275}
2276
2277static void remove_pin(PAGECACHE_BLOCK_LINK *block, my_bool any
2278#ifdef DBUG_OFF
2279 __attribute__((unused))
2280#endif
2281 )
2282{
2283 DBUG_ENTER("remove_pin");
2284 DBUG_PRINT("enter", ("block: %p pins: %u any: %d", block, block->pins,
2285 (int)any));
2286 PCBLOCK_INFO(block);
2287 DBUG_ASSERT(block->pins > 0);
2288 block->pins--;
2289#ifndef DBUG_OFF
2290 {
2291 PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var, any);
2292 DBUG_ASSERT(info != 0);
2293 info_unlink(info);
2294 my_free(info);
2295 }
2296#endif
2297 DBUG_VOID_RETURN;
2298}
2299#ifndef DBUG_OFF
2300static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
2301{
2302 PAGECACHE_LOCK_INFO *info=
2303 (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
2304 info->thread= my_thread_var;
2305 info->write_lock= wl;
2306 info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
2307 (PAGECACHE_PIN_INFO *)info);
2308}
2309static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
2310{
2311 PAGECACHE_LOCK_INFO *info=
2312 (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
2313 my_thread_var, FALSE);
2314 DBUG_ASSERT(info != 0);
2315 info_unlink((PAGECACHE_PIN_INFO *)info);
2316 my_free(info);
2317}
2318static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
2319{
2320 PAGECACHE_LOCK_INFO *info=
2321 (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
2322 my_thread_var, FALSE);
2323 DBUG_ASSERT(info != 0);
2324 DBUG_ASSERT(info->write_lock != wl);
2325 info->write_lock= wl;
2326}
2327#else
2328#define info_add_lock(B,W)
2329#define info_remove_lock(B)
2330#define info_change_lock(B,W)
2331#endif
2332
2333
2334/**
2335 @brief waiting for lock for read and write lock
2336
2337 @parem pagecache pointer to a page cache data structure
2338 @parem block the block to work with
2339 @param file file of the block when it was locked
2340 @param pageno page number of the block when it was locked
2341 @param lock_type MY_PTHREAD_LOCK_READ or MY_PTHREAD_LOCK_WRITE
2342
2343 @retval 0 OK
2344 @retval 1 Can't lock this block, need retry
2345*/
2346
2347static my_bool pagecache_wait_lock(PAGECACHE *pagecache,
2348 PAGECACHE_BLOCK_LINK *block,
2349 PAGECACHE_FILE file,
2350 pgcache_page_no_t pageno,
2351 uint lock_type)
2352{
2353 /* Lock failed we will wait */
2354 struct st_my_thread_var *thread= my_thread_var;
2355 DBUG_ENTER("pagecache_wait_lock");
2356 DBUG_PRINT("info", ("fail to lock, waiting... %p", block));
2357 thread->lock_type= lock_type;
2358 wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
2359 dec_counter_for_resize_op(pagecache);
2360 do
2361 {
2362 DBUG_PRINT("wait",
2363 ("suspend thread %s %ld", thread->name, (ulong) thread->id));
2364 pagecache_pthread_cond_wait(&thread->suspend,
2365 &pagecache->cache_lock);
2366 }
2367 while(thread->next);
2368 inc_counter_for_resize_op(pagecache);
2369 PCBLOCK_INFO(block);
2370 if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) ||
2371 !block->hash_link ||
2372 file.file != block->hash_link->file.file ||
2373 pageno != block->hash_link->pageno)
2374 {
2375 DBUG_PRINT("info", ("the block %p changed => need retry "
2376 "status: %x files %d != %d or pages %lu != %lu",
2377 block, block->status, file.file,
2378 block->hash_link ? block->hash_link->file.file : -1,
2379 (ulong) pageno,
2380 (ulong) (block->hash_link ? block->hash_link->pageno : 0)));
2381 DBUG_RETURN(1);
2382 }
2383 DBUG_RETURN(0);
2384}
2385
2386/**
2387 @brief Put on the block write lock
2388
2389 @parem pagecache pointer to a page cache data structure
2390 @parem block the block to work with
2391
2392 @note We have loose scheme for locking by the same thread:
2393 * Downgrade to read lock if no other locks are taken
2394 * Our scheme of locking allow for the same thread
2395 - the same kind of lock
2396 - taking read lock if write lock present
2397 - downgrading to read lock if still other place the same
2398 thread keep write lock
2399 * But unlock operation number should be the same to lock operation.
2400 * If we try to get read lock having active write locks we put read
2401 locks to queue, and as soon as write lock(s) gone the read locks
2402 from queue came in force.
2403 * If read lock is unlocked earlier then it came to force it
2404 just removed from the queue
2405
2406 @retval 0 OK
2407 @retval 1 Can't lock this block, need retry
2408*/
2409
2410static my_bool get_wrlock(PAGECACHE *pagecache,
2411 PAGECACHE_BLOCK_LINK *block)
2412{
2413 PAGECACHE_FILE file= block->hash_link->file;
2414 pgcache_page_no_t pageno= block->hash_link->pageno;
2415 pthread_t locker= pthread_self();
2416 DBUG_ENTER("get_wrlock");
2417 DBUG_PRINT("info", ("the block %p "
2418 "files %d(%d) pages %lu(%lu)",
2419 block, file.file, block->hash_link->file.file,
2420 (ulong) pageno, (ulong) block->hash_link->pageno));
2421 PCBLOCK_INFO(block);
2422 /*
2423 We assume that the same thread will try write lock on block on which it
2424 has already read lock.
2425 */
2426 while ((block->wlocks && !pthread_equal(block->write_locker, locker)) ||
2427 block->rlocks)
2428 {
2429 /* Lock failed we will wait */
2430 if (pagecache_wait_lock(pagecache, block, file, pageno,
2431 MY_PTHREAD_LOCK_WRITE))
2432 DBUG_RETURN(1);
2433 }
2434 /* we are doing it by global cache mutex protection, so it is OK */
2435 block->wlocks++;
2436 block->write_locker= locker;
2437 DBUG_PRINT("info", ("WR lock set, block %p", block));
2438 DBUG_RETURN(0);
2439}
2440
2441
2442/*
2443 @brief Put on the block read lock
2444
2445 @param pagecache pointer to a page cache data structure
2446 @param block the block to work with
2447 @param user_file Unique handler per handler file. Used to check if
2448 we request many write locks withing the same
2449 statement
2450
2451 @note see note for get_wrlock().
2452
2453 @retvalue 0 OK
2454 @retvalue 1 Can't lock this block, need retry
2455*/
2456
2457static my_bool get_rdlock(PAGECACHE *pagecache,
2458 PAGECACHE_BLOCK_LINK *block)
2459{
2460 PAGECACHE_FILE file= block->hash_link->file;
2461 pgcache_page_no_t pageno= block->hash_link->pageno;
2462 pthread_t locker= pthread_self();
2463 DBUG_ENTER("get_rdlock");
2464 DBUG_PRINT("info", ("the block %p "
2465 "files %d(%d) pages %lu(%lu)",
2466 block, file.file, block->hash_link->file.file,
2467 (ulong) pageno, (ulong) block->hash_link->pageno));
2468 PCBLOCK_INFO(block);
2469 while (block->wlocks && !pthread_equal(block->write_locker, locker))
2470 {
2471 /* Lock failed we will wait */
2472 if (pagecache_wait_lock(pagecache, block, file, pageno,
2473 MY_PTHREAD_LOCK_READ))
2474 DBUG_RETURN(1);
2475 }
2476 /* we are doing it by global cache mutex protection, so it is OK */
2477 if (block->wlocks)
2478 {
2479 DBUG_ASSERT(pthread_equal(block->write_locker, locker));
2480 block->rlocks_queue++;
2481 DBUG_PRINT("info", ("RD lock put into queue, block %p", block));
2482 }
2483 else
2484 {
2485 block->rlocks++;
2486 DBUG_PRINT("info", ("RD lock set, block %p", block));
2487 }
2488 DBUG_RETURN(0);
2489}
2490
2491
2492/*
2493 @brief Remove write lock from the block
2494
2495 @param pagecache pointer to a page cache data structure
2496 @param block the block to work with
2497 @param read_lock downgrade to read lock
2498
2499 @note see note for get_wrlock().
2500*/
2501
2502static void release_wrlock(PAGECACHE_BLOCK_LINK *block, my_bool read_lock)
2503{
2504 DBUG_ENTER("release_wrlock");
2505 PCBLOCK_INFO(block);
2506 DBUG_ASSERT(block->wlocks > 0);
2507 DBUG_ASSERT(block->rlocks == 0);
2508 DBUG_ASSERT(block->pins > 0);
2509 if (read_lock)
2510 block->rlocks_queue++;
2511 if (block->wlocks == 1)
2512 {
2513 block->rlocks= block->rlocks_queue;
2514 block->rlocks_queue= 0;
2515 }
2516 block->wlocks--;
2517 if (block->wlocks > 0)
2518 DBUG_VOID_RETURN; /* Multiple write locked */
2519 DBUG_PRINT("info", ("WR lock reset, block %p", block));
2520 /* release all threads waiting for read lock or one waiting for write */
2521 if (block->wqueue[COND_FOR_WRLOCK].last_thread)
2522 wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
2523 PCBLOCK_INFO(block);
2524 DBUG_VOID_RETURN;
2525}
2526
2527/*
2528 @brief Remove read lock from the block
2529
2530 @param pagecache pointer to a page cache data structure
2531 @param block the block to work with
2532
2533 @note see note for get_wrlock().
2534*/
2535
2536static void release_rdlock(PAGECACHE_BLOCK_LINK *block)
2537{
2538 DBUG_ENTER("release_wrlock");
2539 PCBLOCK_INFO(block);
2540 if (block->wlocks)
2541 {
2542 DBUG_ASSERT(pthread_equal(block->write_locker, pthread_self()));
2543 DBUG_ASSERT(block->rlocks == 0);
2544 DBUG_ASSERT(block->rlocks_queue > 0);
2545 block->rlocks_queue--;
2546 DBUG_PRINT("info", ("RD lock queue decreased, block %p", block));
2547 DBUG_VOID_RETURN;
2548 }
2549 DBUG_ASSERT(block->rlocks > 0);
2550 DBUG_ASSERT(block->rlocks_queue == 0);
2551 block->rlocks--;
2552 DBUG_PRINT("info", ("RD lock decreased, block %p", block));
2553 if (block->rlocks > 0)
2554 DBUG_VOID_RETURN; /* Multiple write locked */
2555 DBUG_PRINT("info", ("RD lock reset, block %p", block));
2556 /* release all threads waiting for read lock or one waiting for write */
2557 if (block->wqueue[COND_FOR_WRLOCK].last_thread)
2558 wqueue_release_one_locktype_from_queue(&block->wqueue[COND_FOR_WRLOCK]);
2559 PCBLOCK_INFO(block);
2560 DBUG_VOID_RETURN;
2561}
2562
2563/**
2564 @brief Try to lock/unlock and pin/unpin the block
2565
2566 @param pagecache pointer to a page cache data structure
2567 @param block the block to work with
2568 @param lock lock change mode
2569 @param pin pinchange mode
2570 @param file File handler requesting pin
2571 @param any allow unpinning block pinned by any thread; possible
2572 only if not locked, see pagecache_unlock_by_link()
2573
2574 @retval 0 OK
2575 @retval 1 Try to lock the block failed
2576*/
2577
2578static my_bool make_lock_and_pin(PAGECACHE *pagecache,
2579 PAGECACHE_BLOCK_LINK *block,
2580 enum pagecache_page_lock lock,
2581 enum pagecache_page_pin pin,
2582 my_bool any)
2583{
2584 DBUG_ENTER("make_lock_and_pin");
2585 DBUG_PRINT("enter", ("block: %p (%u) lock: %s pin: %s any %d",
2586 block, PCBLOCK_NUMBER(pagecache, block),
2587 page_cache_page_lock_str[lock],
2588 page_cache_page_pin_str[pin], (int)any));
2589 PCBLOCK_INFO(block);
2590
2591 DBUG_ASSERT(block);
2592 DBUG_ASSERT(!any ||
2593 ((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) &&
2594 (pin == PAGECACHE_UNPIN)));
2595 DBUG_ASSERT(block->hash_link->block == block);
2596
2597 switch (lock) {
2598 case PAGECACHE_LOCK_WRITE: /* free -> write */
2599 /* Writelock and pin the buffer */
2600 if (get_wrlock(pagecache, block))
2601 {
2602 /* Couldn't lock because block changed status => need retry */
2603 goto retry;
2604 }
2605
2606 /* The cache is locked so nothing afraid of */
2607 add_pin(block);
2608 info_add_lock(block, 1);
2609 break;
2610 case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */
2611 case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */
2612 /* Removes write lock and puts read lock */
2613 release_wrlock(block, lock == PAGECACHE_LOCK_WRITE_TO_READ);
2614 /* fall through */
2615 case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */
2616 if (lock == PAGECACHE_LOCK_READ_UNLOCK)
2617 release_rdlock(block);
2618 /* fall through */
2619 case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */
2620 if (pin == PAGECACHE_UNPIN)
2621 {
2622 remove_pin(block, FALSE);
2623 }
2624 if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
2625 {
2626 info_change_lock(block, 0);
2627 }
2628 else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2629 lock == PAGECACHE_LOCK_READ_UNLOCK)
2630 {
2631 info_remove_lock(block);
2632 }
2633 break;
2634 case PAGECACHE_LOCK_READ: /* free -> read */
2635 if (get_rdlock(pagecache, block))
2636 {
2637 /* Couldn't lock because block changed status => need retry */
2638 goto retry;
2639 }
2640
2641 if (pin == PAGECACHE_PIN)
2642 {
2643 /* The cache is locked so nothing afraid off */
2644 add_pin(block);
2645 }
2646 info_add_lock(block, 0);
2647 break;
2648 case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */
2649 if (pin == PAGECACHE_UNPIN)
2650 {
2651 remove_pin(block, any);
2652 }
2653 /* fall through */
2654 case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */
2655 break; /* do nothing */
2656 default:
2657 DBUG_ASSERT(0); /* Never should happened */
2658 }
2659
2660 PCBLOCK_INFO(block);
2661 DBUG_RETURN(0);
2662retry:
2663 DBUG_PRINT("INFO", ("Retry block %p", block));
2664 PCBLOCK_INFO(block);
2665 DBUG_ASSERT(block->hash_link->requests > 0);
2666 block->hash_link->requests--;
2667 DBUG_RETURN(1);
2668
2669}
2670
2671
2672/*
2673 Read into a key cache block buffer from disk.
2674
2675 SYNOPSIS
2676
2677 read_block()
2678 pagecache pointer to a page cache data structure
2679 block block to which buffer the data is to be read
2680 primary <-> the current thread will read the data
2681
2682 RETURN VALUE
2683 None
2684
2685 NOTES.
2686 The function either reads a page data from file to the block buffer,
2687 or waits until another thread reads it. What page to read is determined
2688 by a block parameter - reference to a hash link for this page.
2689 If an error occurs THE PCBLOCK_ERROR bit is set in the block status.
2690
2691 On entry cache_lock is locked
2692*/
2693
2694static void read_block(PAGECACHE *pagecache,
2695 PAGECACHE_BLOCK_LINK *block,
2696 my_bool primary)
2697{
2698 DBUG_ENTER("read_block");
2699 DBUG_PRINT("enter", ("read block: %p primary: %d", block, primary));
2700 if (primary)
2701 {
2702 size_t error;
2703 PAGECACHE_IO_HOOK_ARGS args;
2704 /*
2705 This code is executed only by threads
2706 that submitted primary requests
2707 */
2708
2709 pagecache->global_cache_read++;
2710 /*
2711 Page is not in buffer yet, is to be read from disk
2712 Here other threads may step in and register as secondary readers.
2713 They will register in block->wqueue[COND_FOR_REQUESTED].
2714 */
2715 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2716 args.page= block->buffer;
2717 args.pageno= block->hash_link->pageno;
2718 args.data= block->hash_link->file.callback_data;
2719 error= (*block->hash_link->file.pre_read_hook)(&args);
2720 if (!error)
2721 {
2722 error= pagecache_fread(pagecache, &block->hash_link->file,
2723 args.page,
2724 block->hash_link->pageno,
2725 pagecache->readwrite_flags);
2726 }
2727 error= (*block->hash_link->file.post_read_hook)(error != 0, &args);
2728 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2729 if (error)
2730 {
2731 DBUG_ASSERT(maria_in_recovery || !maria_assert_if_crashed_table);
2732 block->status|= PCBLOCK_ERROR;
2733 block->error= (int16) my_errno;
2734 my_debug_put_break_here();
2735 }
2736 else
2737 {
2738 block->status|= PCBLOCK_READ;
2739 }
2740 DBUG_PRINT("read_block",
2741 ("primary request: new page in cache"));
2742 /* Signal that all pending requests for this page now can be processed */
2743 if (block->wqueue[COND_FOR_REQUESTED].last_thread)
2744 wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
2745 }
2746 else
2747 {
2748 /*
2749 This code is executed only by threads
2750 that submitted secondary requests
2751 */
2752
2753 struct st_my_thread_var *thread= my_thread_var;
2754 /* Put the request into a queue and wait until it can be processed */
2755 wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
2756 do
2757 {
2758 DBUG_PRINT("wait",
2759 ("suspend thread %s %ld", thread->name,
2760 (ulong) thread->id));
2761 pagecache_pthread_cond_wait(&thread->suspend,
2762 &pagecache->cache_lock);
2763 }
2764 while (thread->next);
2765 DBUG_PRINT("read_block",
2766 ("secondary request: new page in cache"));
2767 }
2768 DBUG_VOID_RETURN;
2769}
2770
2771
2772/**
2773 @brief Set LSN on the page to the given one if the given LSN is bigger
2774
2775 @param pagecache pointer to a page cache data structure
2776 @param lsn LSN to set
2777 @param block block to check and set
2778*/
2779
2780static void check_and_set_lsn(PAGECACHE *pagecache,
2781 LSN lsn, PAGECACHE_BLOCK_LINK *block)
2782{
2783 LSN old;
2784 DBUG_ENTER("check_and_set_lsn");
2785 /*
2786 In recovery, we can _ma_unpin_all_pages() to put a LSN on page, though
2787 page would be PAGECACHE_PLAIN_PAGE (transactionality temporarily disabled
2788 to not log REDOs).
2789 */
2790 DBUG_ASSERT((block->type == PAGECACHE_LSN_PAGE) || maria_in_recovery);
2791 old= lsn_korr(block->buffer);
2792 DBUG_PRINT("info", ("old lsn: " LSN_FMT " new lsn: " LSN_FMT,
2793 LSN_IN_PARTS(old), LSN_IN_PARTS(lsn)));
2794 if (cmp_translog_addr(lsn, old) > 0)
2795 {
2796
2797 DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE);
2798 lsn_store(block->buffer, lsn);
2799 /* we stored LSN in page so we dirtied it */
2800 if (!(block->status & PCBLOCK_CHANGED))
2801 link_to_changed_list(pagecache, block);
2802 }
2803 DBUG_VOID_RETURN;
2804}
2805
2806
2807/**
2808 @brief Unlock/unpin page and put LSN stamp if it need
2809
2810 @param pagecache pointer to a page cache data structure
2811 @pagam file handler for the file for the block of data to be read
2812 @param pageno number of the block of data in the file
2813 @param lock lock change
2814 @param pin pin page
2815 @param first_REDO_LSN_for_page do not set it if it is zero
2816 @param lsn if it is not LSN_IMPOSSIBLE (0) and it
2817 is bigger then LSN on the page it will be written on
2818 the page
2819 @param was_changed should be true if the page was write locked with
2820 direct link giving and the page was changed
2821
2822 @note
2823 Pininig uses requests registration mechanism it works following way:
2824 | beginnig | ending |
2825 | of func. | of func. |
2826 ----------------------------+-------------+---------------+
2827 PAGECACHE_PIN_LEFT_PINNED | - | - |
2828 PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request |
2829 PAGECACHE_PIN | reg request | - |
2830 PAGECACHE_UNPIN | - | unreg request |
2831
2832
2833*/
2834
2835void pagecache_unlock(PAGECACHE *pagecache,
2836 PAGECACHE_FILE *file,
2837 pgcache_page_no_t pageno,
2838 enum pagecache_page_lock lock,
2839 enum pagecache_page_pin pin,
2840 LSN first_REDO_LSN_for_page,
2841 LSN lsn, my_bool was_changed)
2842{
2843 PAGECACHE_BLOCK_LINK *block;
2844 int page_st;
2845 DBUG_ENTER("pagecache_unlock");
2846 DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
2847 (uint) file->file, (ulong) pageno,
2848 page_cache_page_lock_str[lock],
2849 page_cache_page_pin_str[pin]));
2850 /* we do not allow any lock/pin increasing here */
2851 DBUG_ASSERT(pin != PAGECACHE_PIN);
2852 DBUG_ASSERT(lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE);
2853
2854 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2855 /*
2856 As soon as we keep lock cache can be used, and we have lock because want
2857 to unlock.
2858 */
2859 DBUG_ASSERT(pagecache->can_be_used);
2860
2861 inc_counter_for_resize_op(pagecache);
2862 /* See NOTE for pagecache_unlock about registering requests */
2863 block= find_block(pagecache, file, pageno, 0, 0, 0,
2864 pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
2865 PCBLOCK_INFO(block);
2866 DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
2867 if (first_REDO_LSN_for_page)
2868 {
2869 DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK);
2870 DBUG_ASSERT(pin == PAGECACHE_UNPIN);
2871 pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
2872 }
2873 if (lsn != LSN_IMPOSSIBLE)
2874 check_and_set_lsn(pagecache, lsn, block);
2875
2876 /* if we lock for write we must link the block to changed blocks */
2877 DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
2878 (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2879 lock == PAGECACHE_LOCK_WRITE_TO_READ ||
2880 lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
2881 /*
2882 if was_changed then status should be PCBLOCK_DIRECT_W or marked
2883 as dirty
2884 */
2885 DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
2886 (block->status & PCBLOCK_CHANGED));
2887 if ((block->status & PCBLOCK_DIRECT_W) &&
2888 (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
2889 lock == PAGECACHE_LOCK_WRITE_TO_READ))
2890 {
2891 if (!(block->status & PCBLOCK_CHANGED) && was_changed)
2892 link_to_changed_list(pagecache, block);
2893 block->status&= ~PCBLOCK_DIRECT_W;
2894 DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
2895 }
2896
2897 if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
2898 {
2899 DBUG_ASSERT(0); /* should not happend */
2900 }
2901
2902 remove_reader(block);
2903 /*
2904 Link the block into the LRU chain if it's the last submitted request
2905 for the block and block will not be pinned.
2906 See NOTE for pagecache_unlock about registering requests.
2907 */
2908 if (pin != PAGECACHE_PIN_LEFT_PINNED)
2909 unreg_request(pagecache, block, 1);
2910
2911 dec_counter_for_resize_op(pagecache);
2912
2913 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2914
2915 DBUG_VOID_RETURN;
2916}
2917
2918
2919/*
2920 Unpin page
2921
2922 SYNOPSIS
2923 pagecache_unpin()
2924 pagecache pointer to a page cache data structure
2925 file handler for the file for the block of data to be read
2926 pageno number of the block of data in the file
2927 lsn if it is not LSN_IMPOSSIBLE (0) and it
2928 is bigger then LSN on the page it will be written on
2929 the page
2930*/
2931
2932void pagecache_unpin(PAGECACHE *pagecache,
2933 PAGECACHE_FILE *file,
2934 pgcache_page_no_t pageno,
2935 LSN lsn)
2936{
2937 PAGECACHE_BLOCK_LINK *block;
2938 int page_st;
2939 DBUG_ENTER("pagecache_unpin");
2940 DBUG_PRINT("enter", ("fd: %u page: %lu",
2941 (uint) file->file, (ulong) pageno));
2942 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
2943 /*
2944 As soon as we keep lock cache can be used, and we have lock bacause want
2945 aunlock.
2946 */
2947 DBUG_ASSERT(pagecache->can_be_used);
2948
2949 inc_counter_for_resize_op(pagecache);
2950 /* See NOTE for pagecache_unlock about registering requests */
2951 block= find_block(pagecache, file, pageno, 0, 0, 0, 0, &page_st);
2952 DBUG_ASSERT(block != 0);
2953 DBUG_ASSERT(page_st == PAGE_READ);
2954 /* we can't unpin such page without unlock */
2955 DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
2956
2957 if (lsn != LSN_IMPOSSIBLE)
2958 check_and_set_lsn(pagecache, lsn, block);
2959
2960 /*
2961 we can just unpin only with keeping read lock because:
2962 a) we can't pin without any lock
2963 b) we can't unpin keeping write lock
2964 */
2965 if (make_lock_and_pin(pagecache, block,
2966 PAGECACHE_LOCK_LEFT_READLOCKED,
2967 PAGECACHE_UNPIN, FALSE))
2968 DBUG_ASSERT(0); /* should not happend */
2969
2970 remove_reader(block);
2971 /*
2972 Link the block into the LRU chain if it's the last submitted request
2973 for the block and block will not be pinned.
2974 See NOTE for pagecache_unlock about registering requests
2975 */
2976 unreg_request(pagecache, block, 1);
2977
2978 dec_counter_for_resize_op(pagecache);
2979
2980 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
2981
2982 DBUG_VOID_RETURN;
2983}
2984
2985
2986/**
2987 @brief Unlock/unpin page and put LSN stamp if it need
2988 (uses direct block/page pointer)
2989
2990 @param pagecache pointer to a page cache data structure
2991 @param link direct link to page (returned by read or write)
2992 @param lock lock change
2993 @param pin pin page
2994 @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
2995 @param lsn if it is not LSN_IMPOSSIBLE and it is bigger then
2996 LSN on the page it will be written on the page
2997 @param was_changed should be true if the page was write locked with
2998 direct link giving and the page was changed
2999 @param any allow unpinning block pinned by any thread; possible
3000 only if not locked
3001
3002 @note 'any' is a hack so that _ma_bitmap_unpin_all() is allowed to unpin
3003 non-locked bitmap pages pinned by other threads. Because it always uses
3004 PAGECACHE_LOCK_LEFT_UNLOCKED and PAGECACHE_UNPIN
3005 (see write_changed_bitmap()), the hack is limited to these conditions.
3006*/
3007
3008void pagecache_unlock_by_link(PAGECACHE *pagecache,
3009 PAGECACHE_BLOCK_LINK *block,
3010 enum pagecache_page_lock lock,
3011 enum pagecache_page_pin pin,
3012 LSN first_REDO_LSN_for_page,
3013 LSN lsn, my_bool was_changed,
3014 my_bool any)
3015{
3016 DBUG_ENTER("pagecache_unlock_by_link");
3017 DBUG_PRINT("enter", ("block: %p fd: %u page: %lu changed: %d %s %s",
3018 block, (uint) block->hash_link->file.file,
3019 (ulong) block->hash_link->pageno, was_changed,
3020 page_cache_page_lock_str[lock],
3021 page_cache_page_pin_str[pin]));
3022 /*
3023 We do not allow any lock/pin increasing here and page can't be
3024 unpinned because we use direct link.
3025 */
3026 DBUG_ASSERT(pin != PAGECACHE_PIN);
3027 DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
3028 DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
3029 DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
3030 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3031 if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
3032 lock == PAGECACHE_LOCK_READ_UNLOCK)
3033 {
3034 if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
3035 DBUG_ASSERT(0); /* should not happend */
3036 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3037 DBUG_VOID_RETURN;
3038 }
3039
3040 /*
3041 As soon as we keep lock cache can be used, and we have lock because want
3042 unlock.
3043 */
3044 DBUG_ASSERT(pagecache->can_be_used);
3045
3046 inc_counter_for_resize_op(pagecache);
3047 if (was_changed)
3048 {
3049 if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
3050 {
3051 /*
3052 LOCK_READ_UNLOCK is ok here as the page may have first locked
3053 with WRITE lock that was temporarly converted to READ lock before
3054 it's unpinned
3055 */
3056 DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
3057 lock == PAGECACHE_LOCK_READ_UNLOCK);
3058 DBUG_ASSERT(pin == PAGECACHE_UNPIN);
3059 pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
3060 }
3061 if (lsn != LSN_IMPOSSIBLE)
3062 check_and_set_lsn(pagecache, lsn, block);
3063 /*
3064 Reset error flag. Mark also that page is active; This may not have
3065 been the case if there was an error reading the page
3066 */
3067 block->status= (block->status & ~PCBLOCK_ERROR) | PCBLOCK_READ;
3068 }
3069
3070 /* if we lock for write we must link the block to changed blocks */
3071 DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
3072 (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
3073 lock == PAGECACHE_LOCK_WRITE_TO_READ ||
3074 lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
3075 /*
3076 If was_changed then status should be PCBLOCK_DIRECT_W or marked
3077 as dirty
3078 */
3079 DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
3080 (block->status & PCBLOCK_CHANGED));
3081 if ((block->status & PCBLOCK_DIRECT_W) &&
3082 (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
3083 lock == PAGECACHE_LOCK_WRITE_TO_READ))
3084 {
3085 if (!(block->status & PCBLOCK_CHANGED) && was_changed)
3086 link_to_changed_list(pagecache, block);
3087 block->status&= ~PCBLOCK_DIRECT_W;
3088 DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
3089 }
3090
3091 if (make_lock_and_pin(pagecache, block, lock, pin, any))
3092 DBUG_ASSERT(0); /* should not happend */
3093
3094 /*
3095 Link the block into the LRU chain if it's the last submitted request
3096 for the block and block will not be pinned.
3097 See NOTE for pagecache_unlock about registering requests.
3098 */
3099 if (pin != PAGECACHE_PIN_LEFT_PINNED)
3100 unreg_request(pagecache, block, 1);
3101
3102 dec_counter_for_resize_op(pagecache);
3103
3104 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3105
3106 DBUG_VOID_RETURN;
3107}
3108
3109
3110/*
3111 Unpin page
3112 (uses direct block/page pointer)
3113
3114 SYNOPSIS
3115 pagecache_unpin_by_link()
3116 pagecache pointer to a page cache data structure
3117 link direct link to page (returned by read or write)
3118 lsn if it is not LSN_IMPOSSIBLE (0) and it
3119 is bigger then LSN on the page it will be written on
3120 the page
3121*/
3122
3123void pagecache_unpin_by_link(PAGECACHE *pagecache,
3124 PAGECACHE_BLOCK_LINK *block,
3125 LSN lsn)
3126{
3127 DBUG_ENTER("pagecache_unpin_by_link");
3128 DBUG_PRINT("enter", ("block: %p fd: %u page: %lu",
3129 block, (uint) block->hash_link->file.file,
3130 (ulong) block->hash_link->pageno));
3131
3132 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3133 /*
3134 As soon as we keep lock cache can be used, and we have lock because want
3135 unlock.
3136 */
3137 DBUG_ASSERT(pagecache->can_be_used);
3138 /* we can't unpin such page without unlock */
3139 DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
3140
3141 inc_counter_for_resize_op(pagecache);
3142
3143 if (lsn != LSN_IMPOSSIBLE)
3144 check_and_set_lsn(pagecache, lsn, block);
3145
3146 /*
3147 We can just unpin only with keeping read lock because:
3148 a) we can't pin without any lock
3149 b) we can't unpin keeping write lock
3150 */
3151 if (make_lock_and_pin(pagecache, block,
3152 PAGECACHE_LOCK_LEFT_READLOCKED,
3153 PAGECACHE_UNPIN, FALSE))
3154 DBUG_ASSERT(0); /* should not happend */
3155
3156 /*
3157 Link the block into the LRU chain if it's the last submitted request
3158 for the block and block will not be pinned.
3159 See NOTE for pagecache_unlock about registering requests.
3160 */
3161 unreg_request(pagecache, block, 1);
3162
3163 dec_counter_for_resize_op(pagecache);
3164
3165 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3166
3167 DBUG_VOID_RETURN;
3168}
3169
3170/* description of how to change lock before and after read/write */
3171struct rw_lock_change
3172{
3173 my_bool need_lock_change; /* need changing of lock at the end */
3174 enum pagecache_page_lock new_lock; /* lock at the beginning */
3175 enum pagecache_page_lock unlock_lock; /* lock at the end */
3176};
3177
3178/* description of how to change pin before and after read/write */
3179struct rw_pin_change
3180{
3181 enum pagecache_page_pin new_pin; /* pin status at the beginning */
3182 enum pagecache_page_pin unlock_pin; /* pin status at the end */
3183};
3184
3185/**
3186 Depending on the lock which the user wants in pagecache_read(), we
3187 need to acquire a first type of lock at start of pagecache_read(), and
3188 downgrade it to a second type of lock at end. For example, if user
3189 asked for no lock (PAGECACHE_LOCK_LEFT_UNLOCKED) this translates into
3190 taking first a read lock PAGECACHE_LOCK_READ (to rightfully block on
3191 existing write locks) then read then unlock the lock i.e. change lock
3192 to PAGECACHE_LOCK_READ_UNLOCK (the "1" below tells that a change is
3193 needed).
3194*/
3195
3196static struct rw_lock_change lock_to_read[8]=
3197{
3198 { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3199 1,
3200 PAGECACHE_LOCK_READ, PAGECACHE_LOCK_READ_UNLOCK
3201 },
3202 { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3203 0,
3204 PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_LEFT_READLOCKED
3205 },
3206 { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3207 0,
3208 PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_LEFT_WRITELOCKED
3209 },
3210 { /*PAGECACHE_LOCK_READ*/
3211 1,
3212 PAGECACHE_LOCK_READ, PAGECACHE_LOCK_LEFT_READLOCKED
3213 },
3214 { /*PAGECACHE_LOCK_WRITE*/
3215 1,
3216 PAGECACHE_LOCK_WRITE, PAGECACHE_LOCK_LEFT_WRITELOCKED
3217 },
3218 { /*PAGECACHE_LOCK_READ_UNLOCK*/
3219 1,
3220 PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_READ_UNLOCK
3221 },
3222 { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3223 1,
3224 PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_UNLOCK
3225 },
3226 { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3227 1,
3228 PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_TO_READ
3229 }
3230};
3231
3232/**
3233 Two sets of pin modes (every as for lock upper but for pinning). The
3234 difference between sets if whether we are going to provide caller with
3235 reference on the block or not
3236*/
3237
3238static struct rw_pin_change lock_to_pin[2][8]=
3239{
3240 {
3241 { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3242 PAGECACHE_PIN_LEFT_UNPINNED,
3243 PAGECACHE_PIN_LEFT_UNPINNED
3244 },
3245 { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3246 PAGECACHE_PIN_LEFT_UNPINNED,
3247 PAGECACHE_PIN_LEFT_UNPINNED,
3248 },
3249 { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3250 PAGECACHE_PIN_LEFT_PINNED,
3251 PAGECACHE_PIN_LEFT_PINNED
3252 },
3253 { /*PAGECACHE_LOCK_READ*/
3254 PAGECACHE_PIN_LEFT_UNPINNED,
3255 PAGECACHE_PIN_LEFT_UNPINNED
3256 },
3257 { /*PAGECACHE_LOCK_WRITE*/
3258 PAGECACHE_PIN,
3259 PAGECACHE_PIN_LEFT_PINNED
3260 },
3261 { /*PAGECACHE_LOCK_READ_UNLOCK*/
3262 PAGECACHE_PIN_LEFT_UNPINNED,
3263 PAGECACHE_PIN_LEFT_UNPINNED
3264 },
3265 { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3266 PAGECACHE_PIN_LEFT_PINNED,
3267 PAGECACHE_UNPIN
3268 },
3269 { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3270 PAGECACHE_PIN_LEFT_PINNED,
3271 PAGECACHE_UNPIN
3272 }
3273 },
3274 {
3275 { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
3276 PAGECACHE_PIN_LEFT_UNPINNED,
3277 PAGECACHE_PIN_LEFT_UNPINNED
3278 },
3279 { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
3280 PAGECACHE_PIN_LEFT_UNPINNED,
3281 PAGECACHE_PIN_LEFT_UNPINNED,
3282 },
3283 { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
3284 PAGECACHE_PIN_LEFT_PINNED,
3285 PAGECACHE_PIN_LEFT_PINNED
3286 },
3287 { /*PAGECACHE_LOCK_READ*/
3288 PAGECACHE_PIN,
3289 PAGECACHE_PIN_LEFT_PINNED
3290 },
3291 { /*PAGECACHE_LOCK_WRITE*/
3292 PAGECACHE_PIN,
3293 PAGECACHE_PIN_LEFT_PINNED
3294 },
3295 { /*PAGECACHE_LOCK_READ_UNLOCK*/
3296 PAGECACHE_PIN_LEFT_UNPINNED,
3297 PAGECACHE_PIN_LEFT_UNPINNED
3298 },
3299 { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
3300 PAGECACHE_PIN_LEFT_PINNED,
3301 PAGECACHE_UNPIN
3302 },
3303 { /*PAGECACHE_LOCK_WRITE_TO_READ*/
3304 PAGECACHE_PIN_LEFT_PINNED,
3305 PAGECACHE_PIN_LEFT_PINNED,
3306 }
3307 }
3308};
3309
3310
3311/*
3312 @brief Read a block of data from a cached file into a buffer;
3313
3314 @param pagecache pointer to a page cache data structure
3315 @param file handler for the file for the block of data to be read
3316 @param pageno number of the block of data in the file
3317 @param level determines the weight of the data
3318 @param buff buffer to where the data must be placed
3319 @param type type of the page
3320 @param lock lock change
3321 @param link link to the page if we pin it
3322
3323 @return address from where the data is placed if successful, 0 - otherwise.
3324
3325 @note Pin will be chosen according to lock parameter (see lock_to_pin)
3326
3327 @note 'buff', if not NULL, must be long-aligned.
3328
3329 @note If buff==0 then we provide reference on the page so should keep the
3330 page pinned.
3331*/
3332
3333uchar *pagecache_read(PAGECACHE *pagecache,
3334 PAGECACHE_FILE *file,
3335 pgcache_page_no_t pageno,
3336 uint level,
3337 uchar *buff,
3338 enum pagecache_page_type type,
3339 enum pagecache_page_lock lock,
3340 PAGECACHE_BLOCK_LINK **page_link)
3341{
3342 my_bool error= 0;
3343 enum pagecache_page_pin
3344 new_pin= lock_to_pin[buff==0][lock].new_pin,
3345 unlock_pin= lock_to_pin[buff==0][lock].unlock_pin;
3346 PAGECACHE_BLOCK_LINK *fake_link;
3347 my_bool reg_request;
3348#ifndef DBUG_OFF
3349 char llbuf[22];
3350 DBUG_ENTER("pagecache_read");
3351 DBUG_PRINT("enter", ("fd: %u page: %s buffer: %p level: %u "
3352 "t:%s (%d)%s->%s %s->%s",
3353 (uint) file->file, ullstr(pageno, llbuf),
3354 buff, level,
3355 page_cache_page_type_str[type],
3356 lock_to_read[lock].need_lock_change,
3357 page_cache_page_lock_str[lock_to_read[lock].new_lock],
3358 page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
3359 page_cache_page_pin_str[new_pin],
3360 page_cache_page_pin_str[unlock_pin]));
3361 DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
3362 unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
3363 DBUG_ASSERT(pageno < ((1ULL) << 40));
3364#endif
3365
3366 if (!page_link)
3367 page_link= &fake_link;
3368 *page_link= 0; /* Catch errors */
3369
3370restart:
3371
3372 if (pagecache->can_be_used)
3373 {
3374 /* Key cache is used */
3375 PAGECACHE_BLOCK_LINK *block;
3376 uint status;
3377 int UNINIT_VAR(page_st);
3378
3379 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3380 if (!pagecache->can_be_used)
3381 {
3382 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3383 goto no_key_cache;
3384 }
3385
3386 inc_counter_for_resize_op(pagecache);
3387 pagecache->global_cache_r_requests++;
3388 /* See NOTE for pagecache_unlock about registering requests. */
3389 reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
3390 (new_pin == PAGECACHE_PIN));
3391 block= find_block(pagecache, file, pageno, level,
3392 lock == PAGECACHE_LOCK_WRITE, buff != 0,
3393 reg_request, &page_st);
3394 DBUG_PRINT("info", ("Block type: %s current type %s",
3395 page_cache_page_type_str[block->type],
3396 page_cache_page_type_str[type]));
3397 if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ))
3398 {
3399 /* The requested page is to be read into the block buffer */
3400 read_block(pagecache, block,
3401 (my_bool)(page_st == PAGE_TO_BE_READ));
3402 DBUG_PRINT("info", ("read is done"));
3403 }
3404 /*
3405 Assert after block is read. Imagine two concurrent SELECTs on same
3406 table (thread1 and 2), which want to pagecache_read() the same
3407 pageno/fileno. Thread1 calls find_block(), decides to evict a dirty
3408 page from LRU; while it's writing this dirty page to disk, it is
3409 pre-empted and thread2 runs its find_block(), gets the block (in
3410 PAGE_TO_BE_READ state). This block is still containing the in-eviction
3411 dirty page so has an its type, which cannot be tested.
3412 So thread2 has to wait for read_block() to finish (when it wakes up in
3413 read_block(), it's woken up by read_block() of thread1, which implies
3414 that block's type was set to EMPTY by thread1 as part of find_block()).
3415 */
3416 DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
3417 block->type == type ||
3418 type == PAGECACHE_LSN_PAGE ||
3419 type == PAGECACHE_READ_UNKNOWN_PAGE ||
3420 block->type == PAGECACHE_READ_UNKNOWN_PAGE);
3421 if (type != PAGECACHE_READ_UNKNOWN_PAGE ||
3422 block->type == PAGECACHE_EMPTY_PAGE)
3423 block->type= type;
3424
3425 if (make_lock_and_pin(pagecache, block, lock_to_read[lock].new_lock,
3426 new_pin, FALSE))
3427 {
3428 /*
3429 We failed to write lock the block, cache is unlocked,
3430 we will try to get the block again.
3431 */
3432 if (reg_request)
3433 unreg_request(pagecache, block, 1);
3434 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3435 DBUG_PRINT("info", ("restarting..."));
3436 goto restart;
3437 }
3438
3439 status= block->status;
3440 if (!buff)
3441 {
3442 buff= block->buffer;
3443 /* possibly we will write here (resolved on unlock) */
3444 if ((lock == PAGECACHE_LOCK_WRITE ||
3445 lock == PAGECACHE_LOCK_LEFT_WRITELOCKED))
3446 {
3447 block->status|= PCBLOCK_DIRECT_W;
3448 DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: %p", block));
3449 }
3450 }
3451 else
3452 {
3453 if (status & PCBLOCK_READ)
3454 {
3455#if !defined(SERIALIZED_READ_FROM_CACHE)
3456 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3457#endif
3458
3459 DBUG_ASSERT((pagecache->block_size & 511) == 0);
3460 /* Copy data from the cache buffer */
3461 memcpy(buff, block->buffer, pagecache->block_size);
3462
3463#if !defined(SERIALIZED_READ_FROM_CACHE)
3464 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3465#endif
3466 }
3467 if (status & PCBLOCK_ERROR)
3468 my_errno= block->error;
3469 }
3470
3471 remove_reader(block);
3472 if (lock_to_read[lock].need_lock_change)
3473 {
3474 if (make_lock_and_pin(pagecache, block,
3475 lock_to_read[lock].unlock_lock,
3476 unlock_pin, FALSE))
3477 {
3478 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3479 DBUG_ASSERT(0);
3480 return (uchar*) 0;
3481 }
3482 }
3483 /*
3484 Link the block into the LRU chain if it's the last submitted request
3485 for the block and block will not be pinned.
3486 See NOTE for pagecache_unlock about registering requests.
3487 */
3488 if (unlock_pin == PAGECACHE_PIN_LEFT_UNPINNED ||
3489 unlock_pin == PAGECACHE_UNPIN)
3490 unreg_request(pagecache, block, 1);
3491 else
3492 *page_link= block;
3493
3494 dec_counter_for_resize_op(pagecache);
3495
3496 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3497
3498 if (status & PCBLOCK_ERROR)
3499 {
3500 DBUG_ASSERT(my_errno != 0);
3501 DBUG_PRINT("error", ("Got error %d when doing page read", my_errno));
3502 DBUG_RETURN((uchar *) 0);
3503 }
3504
3505 DBUG_RETURN(buff);
3506 }
3507
3508no_key_cache: /* Key cache is not used */
3509
3510 /* We can't use mutex here as the key cache may not be initialized */
3511 pagecache->global_cache_r_requests++;
3512 pagecache->global_cache_read++;
3513
3514 {
3515 PAGECACHE_IO_HOOK_ARGS args;
3516 args.page= buff;
3517 args.pageno= pageno;
3518 args.data= file->callback_data;
3519 error= (* file->pre_read_hook)(&args);
3520 if (!error)
3521 {
3522 error= pagecache_fread(pagecache, file, args.page, pageno,
3523 pagecache->readwrite_flags) != 0;
3524 }
3525 error= (* file->post_read_hook)(error, &args);
3526 }
3527
3528 DBUG_RETURN(error ? (uchar*) 0 : buff);
3529}
3530
3531
3532/*
3533 @brief Set/reset flag that page always should be flushed on delete
3534
3535 @param pagecache pointer to a page cache data structure
3536 @param link direct link to page (returned by read or write)
3537 @param write write on delete flag value
3538
3539*/
3540
3541void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block)
3542{
3543 DBUG_ENTER("pagecache_set_write_on_delete_by_link");
3544 DBUG_PRINT("enter", ("fd: %d block %p %d -> TRUE",
3545 block->hash_link->file.file,
3546 block, (int) block->status & PCBLOCK_DEL_WRITE));
3547 DBUG_ASSERT(block->pins); /* should be pinned */
3548 DBUG_ASSERT(block->wlocks); /* should be write locked */
3549
3550 block->status|= PCBLOCK_DEL_WRITE;
3551
3552 DBUG_VOID_RETURN;
3553}
3554
3555
3556/*
3557 @brief Delete page from the buffer (common part for link and file/page)
3558
3559 @param pagecache pointer to a page cache data structure
3560 @param block direct link to page (returned by read or write)
3561 @param page_link hash link of the block
3562 @param flush flush page if it is dirty
3563
3564 @retval 0 deleted or was not present at all
3565 @retval 1 error
3566
3567*/
3568
3569static my_bool pagecache_delete_internal(PAGECACHE *pagecache,
3570 PAGECACHE_BLOCK_LINK *block,
3571 PAGECACHE_HASH_LINK *page_link,
3572 my_bool flush)
3573{
3574 my_bool error= 0;
3575 if (block->status & PCBLOCK_IN_FLUSH)
3576 {
3577 /*
3578 this call is just 'hint' for the cache to free the page so we will
3579 not interferes with flushing process but must return success
3580 */
3581 goto out;
3582 }
3583 if (block->status & PCBLOCK_CHANGED)
3584 {
3585 flush= (flush || (block->status & PCBLOCK_DEL_WRITE));
3586 if (flush)
3587 {
3588 /* The block contains a dirty page - push it out of the cache */
3589
3590 KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
3591
3592 /*
3593 The call is thread safe because only the current
3594 thread might change the block->hash_link value
3595 */
3596 DBUG_ASSERT(block->pins == 1);
3597 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3598 error= pagecache_fwrite(pagecache,
3599 &block->hash_link->file,
3600 block->buffer,
3601 block->hash_link->pageno,
3602 block->type,
3603 pagecache->readwrite_flags);
3604 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3605 pagecache->global_cache_write++;
3606
3607 if (error)
3608 {
3609 block->status|= PCBLOCK_ERROR;
3610 block->error= (int16) my_errno;
3611 my_debug_put_break_here();
3612 goto out;
3613 }
3614 }
3615 else
3616 {
3617 PAGECACHE_IO_HOOK_ARGS args;
3618 PAGECACHE_FILE *filedesc= &block->hash_link->file;
3619 args.page= block->buffer;
3620 args.pageno= block->hash_link->pageno;
3621 args.data= filedesc->callback_data;
3622 /* We are not going to write the page but have to call callbacks */
3623 DBUG_PRINT("info", ("flush_callback: %p data: %p",
3624 filedesc->flush_log_callback,
3625 filedesc->callback_data));
3626 if ((*filedesc->flush_log_callback)(&args))
3627 {
3628 DBUG_PRINT("error", ("flush or write callback problem"));
3629 error= 1;
3630 goto out;
3631 }
3632 }
3633 pagecache->blocks_changed--;
3634 pagecache->global_blocks_changed--;
3635 /*
3636 free_block() will change the status and rec_lsn of the block so no
3637 need to change them here.
3638 */
3639 }
3640 /* Cache is locked, so we can relese page before freeing it */
3641 if (make_lock_and_pin(pagecache, block,
3642 PAGECACHE_LOCK_WRITE_UNLOCK,
3643 PAGECACHE_UNPIN, FALSE))
3644 DBUG_ASSERT(0);
3645 DBUG_ASSERT(block->hash_link->requests > 0);
3646 page_link->requests--;
3647 /* See NOTE for pagecache_unlock() about registering requests. */
3648 free_block(pagecache, block, 0);
3649 dec_counter_for_resize_op(pagecache);
3650 return 0;
3651
3652out:
3653 /* Cache is locked, so we can relese page before freeing it */
3654 if (make_lock_and_pin(pagecache, block,
3655 PAGECACHE_LOCK_WRITE_UNLOCK,
3656 PAGECACHE_UNPIN, FALSE))
3657 DBUG_ASSERT(0);
3658 page_link->requests--;
3659 unreg_request(pagecache, block, 1);
3660 dec_counter_for_resize_op(pagecache);
3661 return error;
3662}
3663
3664
3665/*
3666 @brief Delete page from the buffer by link
3667
3668 @param pagecache pointer to a page cache data structure
3669 @param link direct link to page (returned by read or write)
3670 @param lock lock change
3671 @param flush flush page if it is dirty
3672
3673 @retval 0 deleted or was not present at all
3674 @retval 1 error
3675
3676 @note lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
3677 write locked before) or PAGECACHE_LOCK_WRITE (delete will write
3678 lock page before delete)
3679*/
3680
3681my_bool pagecache_delete_by_link(PAGECACHE *pagecache,
3682 PAGECACHE_BLOCK_LINK *block,
3683 enum pagecache_page_lock lock,
3684 my_bool flush)
3685{
3686 my_bool error= 0;
3687 enum pagecache_page_pin pin= PAGECACHE_PIN_LEFT_PINNED;
3688 DBUG_ENTER("pagecache_delete_by_link");
3689 DBUG_PRINT("enter", ("fd: %d block %p %s %s",
3690 block->hash_link->file.file,
3691 block,
3692 page_cache_page_lock_str[lock],
3693 page_cache_page_pin_str[pin]));
3694 DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
3695 lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
3696 DBUG_ASSERT(block->pins != 0); /* should be pinned */
3697
3698 if (pagecache->can_be_used)
3699 {
3700 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3701 if (!pagecache->can_be_used)
3702 goto end;
3703
3704 /*
3705 This block should be pinned (i.e. has not zero request counter) =>
3706 Such block can't be chosen for eviction.
3707 */
3708 DBUG_ASSERT((block->status &
3709 (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED)) == 0);
3710
3711 inc_counter_for_resize_op(pagecache);
3712 /*
3713 make_lock_and_pin() can't fail here, because we are keeping pin on the
3714 block and it can't be evicted (which is cause of lock fail and retry)
3715 */
3716 if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
3717 DBUG_ASSERT(0);
3718
3719 /*
3720 get_present_hash_link() side effect emulation before call
3721 pagecache_delete_internal()
3722 */
3723 block->hash_link->requests++;
3724
3725 error= pagecache_delete_internal(pagecache, block, block->hash_link,
3726 flush);
3727end:
3728 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3729 }
3730
3731 DBUG_RETURN(error);
3732}
3733
3734
3735/**
3736 @brief Returns "hits" for promotion
3737
3738 @return "hits" for promotion
3739*/
3740
3741uint pagecache_pagelevel(PAGECACHE_BLOCK_LINK *block)
3742{
3743 return block->hits_left;
3744}
3745
3746/*
3747 @brief Adds "hits" to the page
3748
3749 @param link direct link to page (returned by read or write)
3750 @param level number of "hits" which we add to the page
3751*/
3752
3753void pagecache_add_level_by_link(PAGECACHE_BLOCK_LINK *block,
3754 uint level)
3755{
3756 DBUG_ASSERT(block->pins != 0); /* should be pinned */
3757 /*
3758 Operation is just for statistics so it is not really important
3759 if it interfere with other hit increasing => we are doing it without
3760 locking the pagecache.
3761 */
3762 block->hits_left+= level;
3763}
3764
3765/*
3766 @brief Delete page from the buffer
3767
3768 @param pagecache pointer to a page cache data structure
3769 @param file handler for the file for the block of data to be read
3770 @param pageno number of the block of data in the file
3771 @param lock lock change
3772 @param flush flush page if it is dirty
3773
3774 @retval 0 deleted or was not present at all
3775 @retval 1 error
3776
3777 @note lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was
3778 write locked before) or PAGECACHE_LOCK_WRITE (delete will write
3779 lock page before delete)
3780*/
3781static enum pagecache_page_pin lock_to_pin_one_phase[8]=
3782{
3783 PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
3784 PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
3785 PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
3786 PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
3787 PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/,
3788 PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
3789 PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
3790 PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/
3791};
3792
3793my_bool pagecache_delete(PAGECACHE *pagecache,
3794 PAGECACHE_FILE *file,
3795 pgcache_page_no_t pageno,
3796 enum pagecache_page_lock lock,
3797 my_bool flush)
3798{
3799 my_bool error= 0;
3800 enum pagecache_page_pin pin= lock_to_pin_one_phase[lock];
3801 DBUG_ENTER("pagecache_delete");
3802 DBUG_PRINT("enter", ("fd: %u page: %lu %s %s",
3803 (uint) file->file, (ulong) pageno,
3804 page_cache_page_lock_str[lock],
3805 page_cache_page_pin_str[pin]));
3806 DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
3807 lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
3808 DBUG_ASSERT(pin == PAGECACHE_PIN ||
3809 pin == PAGECACHE_PIN_LEFT_PINNED);
3810restart:
3811
3812 DBUG_ASSERT(pageno < ((1ULL) << 40));
3813 if (pagecache->can_be_used)
3814 {
3815 /* Key cache is used */
3816 reg1 PAGECACHE_BLOCK_LINK *block;
3817 PAGECACHE_HASH_LINK **unused_start, *page_link;
3818
3819 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
3820 if (!pagecache->can_be_used)
3821 goto end;
3822
3823 inc_counter_for_resize_op(pagecache);
3824 page_link= get_present_hash_link(pagecache, file, pageno, &unused_start);
3825 if (!page_link)
3826 {
3827 DBUG_PRINT("info", ("There is no such page in the cache"));
3828 dec_counter_for_resize_op(pagecache);
3829 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3830 DBUG_RETURN(0);
3831 }
3832 block= page_link->block;
3833 if (block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH))
3834 {
3835 DBUG_PRINT("info", ("Block %p already is %s",
3836 block,
3837 ((block->status & PCBLOCK_REASSIGNED) ?
3838 "reassigned" : "in switch")));
3839 PCBLOCK_INFO(block);
3840 page_link->requests--;
3841 dec_counter_for_resize_op(pagecache);
3842 goto end;
3843 }
3844 /* See NOTE for pagecache_unlock about registering requests. */
3845 if (pin == PAGECACHE_PIN)
3846 reg_requests(pagecache, block, 1);
3847 if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
3848 {
3849 /*
3850 We failed to writelock the block, cache is unlocked, and last write
3851 lock is released, we will try to get the block again.
3852 */
3853 if (pin == PAGECACHE_PIN)
3854 unreg_request(pagecache, block, 1);
3855 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3856 DBUG_PRINT("info", ("restarting..."));
3857 goto restart;
3858 }
3859
3860 /* we can't delete with opened direct link for write */
3861 DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
3862
3863 error= pagecache_delete_internal(pagecache, block, page_link, flush);
3864end:
3865 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
3866 }
3867
3868 DBUG_RETURN(error);
3869}
3870
3871
3872my_bool pagecache_delete_pages(PAGECACHE *pagecache,
3873 PAGECACHE_FILE *file,
3874 pgcache_page_no_t pageno,
3875 uint page_count,
3876 enum pagecache_page_lock lock,
3877 my_bool flush)
3878{
3879 pgcache_page_no_t page_end;
3880 DBUG_ENTER("pagecache_delete_pages");
3881 DBUG_ASSERT(page_count > 0);
3882
3883 page_end= pageno + page_count;
3884 do
3885 {
3886 if (pagecache_delete(pagecache, file, pageno,
3887 lock, flush))
3888 DBUG_RETURN(1);
3889 } while (++pageno != page_end);
3890 DBUG_RETURN(0);
3891}
3892
3893
3894/**
3895 @brief Writes a buffer into a cached file.
3896
3897 @param pagecache pointer to a page cache data structure
3898 @param file handler for the file to write data to
3899 @param pageno number of the block of data in the file
3900 @param level determines the weight of the data
3901 @param buff buffer with the data
3902 @param type type of the page
3903 @param lock lock change
3904 @param pin pin page
3905 @param write_mode how to write page
3906 @param link link to the page if we pin it
3907 @param first_REDO_LSN_for_page the lsn to set rec_lsn
3908 @param offset offset in the page
3909 @param size size of data
3910 @param validator read page validator
3911 @param validator_data the validator data
3912
3913 @retval 0 if a success.
3914 @retval 1 Error.
3915*/
3916
3917static struct rw_lock_change write_lock_change_table[]=
3918{
3919 {1,
3920 PAGECACHE_LOCK_WRITE,
3921 PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
3922 {0, /*unsupported (we can't write having the block read locked) */
3923 PAGECACHE_LOCK_LEFT_UNLOCKED,
3924 PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
3925 {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
3926 {1,
3927 PAGECACHE_LOCK_WRITE,
3928 PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/,
3929 {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/,
3930 {0, /*unsupported (we can't write having the block read locked) */
3931 PAGECACHE_LOCK_LEFT_UNLOCKED,
3932 PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/,
3933 {1,
3934 PAGECACHE_LOCK_LEFT_WRITELOCKED,
3935 PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
3936 {1,
3937 PAGECACHE_LOCK_LEFT_WRITELOCKED,
3938 PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
3939};
3940
3941
3942static struct rw_pin_change write_pin_change_table[]=
3943{
3944 {PAGECACHE_PIN_LEFT_PINNED,
3945 PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
3946 {PAGECACHE_PIN,
3947 PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/,
3948 {PAGECACHE_PIN,
3949 PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/,
3950 {PAGECACHE_PIN_LEFT_PINNED,
3951 PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/
3952};
3953
3954
3955/**
3956 @note 'buff', if not NULL, must be long-aligned.
3957*/
3958
3959my_bool pagecache_write_part(PAGECACHE *pagecache,
3960 PAGECACHE_FILE *file,
3961 pgcache_page_no_t pageno,
3962 uint level,
3963 uchar *buff,
3964 enum pagecache_page_type type,
3965 enum pagecache_page_lock lock,
3966 enum pagecache_page_pin pin,
3967 enum pagecache_write_mode write_mode,
3968 PAGECACHE_BLOCK_LINK **page_link,
3969 LSN first_REDO_LSN_for_page,
3970 uint offset, uint size)
3971{
3972 PAGECACHE_BLOCK_LINK *block= NULL;
3973 PAGECACHE_BLOCK_LINK *fake_link;
3974 my_bool error= 0;
3975 int need_lock_change= write_lock_change_table[lock].need_lock_change;
3976 my_bool reg_request;
3977#ifndef DBUG_OFF
3978 char llbuf[22];
3979 DBUG_ENTER("pagecache_write_part");
3980 DBUG_PRINT("enter", ("fd: %u page: %s level: %u type: %s lock: %s "
3981 "pin: %s mode: %s offset: %u size %u",
3982 (uint) file->file, ullstr(pageno, llbuf), level,
3983 page_cache_page_type_str[type],
3984 page_cache_page_lock_str[lock],
3985 page_cache_page_pin_str[pin],
3986 page_cache_page_write_mode_str[write_mode],
3987 offset, size));
3988 DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
3989 DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED);
3990 DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
3991 DBUG_ASSERT(offset + size <= pagecache->block_size);
3992 DBUG_ASSERT(pageno < ((1ULL) << 40));
3993#endif
3994
3995 if (!page_link)
3996 page_link= &fake_link;
3997 *page_link= 0;
3998
3999restart:
4000
4001#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
4002 DBUG_EXECUTE("check_pagecache",
4003 test_key_cache(pagecache, "start of key_cache_write", 1););
4004#endif
4005
4006 if (pagecache->can_be_used)
4007 {
4008 /* Key cache is used */
4009 int page_st;
4010 my_bool need_page_ready_signal= FALSE;
4011
4012 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4013 if (!pagecache->can_be_used)
4014 {
4015 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4016 goto no_key_cache;
4017 }
4018
4019 inc_counter_for_resize_op(pagecache);
4020 pagecache->global_cache_w_requests++;
4021 /*
4022 Here we register a request if the page was not already pinned.
4023 See NOTE for pagecache_unlock about registering requests.
4024 */
4025 reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
4026 (pin == PAGECACHE_PIN));
4027 block= find_block(pagecache, file, pageno, level,
4028 TRUE, FALSE,
4029 reg_request, &page_st);
4030 if (!block)
4031 {
4032 DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
4033 /* It happens only for requests submitted during resize operation */
4034 dec_counter_for_resize_op(pagecache);
4035 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4036 /* Write to the disk key cache is in resize at the moment*/
4037 goto no_key_cache;
4038 }
4039 DBUG_PRINT("info", ("page status: %d", page_st));
4040 if (!(block->status & PCBLOCK_ERROR) &&
4041 ((page_st == PAGE_TO_BE_READ &&
4042 (offset || size < pagecache->block_size)) ||
4043 (page_st == PAGE_WAIT_TO_BE_READ)))
4044 {
4045 /* The requested page is to be read into the block buffer */
4046 read_block(pagecache, block,
4047 (my_bool)(page_st == PAGE_TO_BE_READ));
4048 DBUG_PRINT("info", ("read is done"));
4049 }
4050 else if (page_st == PAGE_TO_BE_READ)
4051 {
4052 need_page_ready_signal= TRUE;
4053 }
4054
4055 DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
4056 block->type == PAGECACHE_READ_UNKNOWN_PAGE ||
4057 block->type == type ||
4058 /* this is for when going to non-trans to trans */
4059 (block->type == PAGECACHE_PLAIN_PAGE &&
4060 type == PAGECACHE_LSN_PAGE));
4061 block->type= type;
4062 /* we write to the page so it has no sense to keep the flag */
4063 block->status&= ~PCBLOCK_DIRECT_W;
4064 DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: %p", block));
4065
4066 if (make_lock_and_pin(pagecache, block,
4067 write_lock_change_table[lock].new_lock,
4068 (need_lock_change ?
4069 write_pin_change_table[pin].new_pin :
4070 pin), FALSE))
4071 {
4072 /*
4073 We failed to writelock the block, cache is unlocked, and last write
4074 lock is released, we will try to get the block again.
4075 */
4076 if (reg_request)
4077 unreg_request(pagecache, block, 1);
4078 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4079 DBUG_PRINT("info", ("restarting..."));
4080 goto restart;
4081 }
4082
4083 if (write_mode == PAGECACHE_WRITE_DONE)
4084 {
4085 if (block->status & PCBLOCK_ERROR)
4086 {
4087 my_debug_put_break_here();
4088 DBUG_PRINT("warning", ("Writing on page with error"));
4089 }
4090 else
4091 {
4092 /* Copy data from buff */
4093 memcpy(block->buffer + offset, buff, size);
4094 block->status= PCBLOCK_READ;
4095 KEYCACHE_DBUG_PRINT("key_cache_insert",
4096 ("Page injection"));
4097 /* Signal that all pending requests for this now can be processed. */
4098 if (block->wqueue[COND_FOR_REQUESTED].last_thread)
4099 wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
4100 }
4101 }
4102 else
4103 {
4104 if (! (block->status & PCBLOCK_CHANGED))
4105 link_to_changed_list(pagecache, block);
4106
4107 memcpy(block->buffer + offset, buff, size);
4108 block->status|= PCBLOCK_READ;
4109 /* Page is correct again if we made a full write in it */
4110 if (size == pagecache->block_size)
4111 block->status&= ~PCBLOCK_ERROR;
4112 }
4113
4114 if (need_page_ready_signal &&
4115 block->wqueue[COND_FOR_REQUESTED].last_thread)
4116 wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
4117
4118 if (first_REDO_LSN_for_page)
4119 {
4120 /* single write action of the last write action */
4121 DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
4122 lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
4123 DBUG_ASSERT(pin == PAGECACHE_UNPIN ||
4124 pin == PAGECACHE_PIN_LEFT_UNPINNED);
4125 pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
4126 }
4127
4128 if (need_lock_change)
4129 {
4130 /*
4131 We don't set rec_lsn of the block; this is ok as for the
4132 Maria-block-record's pages, we always keep pages pinned here.
4133 */
4134 if (make_lock_and_pin(pagecache, block,
4135 write_lock_change_table[lock].unlock_lock,
4136 write_pin_change_table[pin].unlock_pin, FALSE))
4137 DBUG_ASSERT(0);
4138 }
4139
4140 /* Unregister the request */
4141 DBUG_ASSERT(block->hash_link->requests > 0);
4142 block->hash_link->requests--;
4143 /* See NOTE for pagecache_unlock about registering requests. */
4144 if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
4145 {
4146 unreg_request(pagecache, block, 1);
4147 DBUG_ASSERT(page_link == &fake_link);
4148 }
4149 else
4150 *page_link= block;
4151
4152 if (block->status & PCBLOCK_ERROR)
4153 {
4154 error= 1;
4155 my_debug_put_break_here();
4156 }
4157
4158 dec_counter_for_resize_op(pagecache);
4159
4160 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4161
4162 goto end;
4163 }
4164
4165no_key_cache:
4166 /*
4167 We can't by pass the normal page cache operations because need
4168 whole page for calling callbacks & so on.
4169 This branch should not be used for now (but it is fixed as it
4170 should be just to avoid confusing)
4171 */
4172 DBUG_ASSERT(0);
4173 /* Key cache is not used */
4174 if (write_mode == PAGECACHE_WRITE_DELAY)
4175 {
4176 /* We can't use mutex here as the key cache may not be initialized */
4177 pagecache->global_cache_w_requests++;
4178 pagecache->global_cache_write++;
4179 if (offset != 0 || size != pagecache->block_size)
4180 {
4181 uchar *page_buffer= (uchar *) alloca(pagecache->block_size);
4182 PAGECACHE_IO_HOOK_ARGS args;
4183 args.page= page_buffer;
4184 args.pageno= pageno;
4185 args.data= file->callback_data;
4186
4187 pagecache->global_cache_read++;
4188 error= (*file->pre_read_hook)(&args);
4189 if (!error)
4190 {
4191 error= pagecache_fread(pagecache, file,
4192 page_buffer,
4193 pageno,
4194 pagecache->readwrite_flags) != 0;
4195 }
4196 if ((*file->post_read_hook)(error, &args))
4197 {
4198 DBUG_PRINT("error", ("read callback problem"));
4199 error= 1;
4200 goto end;
4201 }
4202 memcpy((char *)page_buffer + offset, buff, size);
4203 buff= page_buffer;
4204 }
4205 if (pagecache_fwrite(pagecache, file, buff, pageno, type,
4206 pagecache->readwrite_flags))
4207 error= 1;
4208 }
4209
4210end:
4211#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
4212 DBUG_EXECUTE("exec",
4213 test_key_cache(pagecache, "end of key_cache_write", 1););
4214#endif
4215 if (block)
4216 PCBLOCK_INFO(block);
4217 else
4218 DBUG_PRINT("info", ("No block"));
4219 DBUG_RETURN(error);
4220}
4221
4222
4223/*
4224 Free block: remove reference to it from hash table,
4225 remove it from the chain file of dirty/clean blocks
4226 and add it to the free list.
4227*/
4228
4229static my_bool free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
4230 my_bool abort_if_pinned)
4231{
4232 uint status= block->status;
4233 KEYCACHE_THREAD_TRACE("free block");
4234 KEYCACHE_DBUG_PRINT("free_block",
4235 ("block: %u hash_link %p",
4236 PCBLOCK_NUMBER(pagecache, block),
4237 block->hash_link));
4238 mysql_mutex_assert_owner(&pagecache->cache_lock);
4239 if (block->hash_link)
4240 {
4241 /*
4242 While waiting for readers to finish, new readers might request the
4243 block. But since we set block->status|= PCBLOCK_REASSIGNED, they
4244 will wait on block->wqueue[COND_FOR_SAVED]. They must be signaled
4245 later.
4246 */
4247 block->status|= PCBLOCK_REASSIGNED;
4248 wait_for_readers(pagecache, block);
4249 if (unlikely(abort_if_pinned) && unlikely(block->pins))
4250 {
4251 /*
4252 Block got pinned while waiting for readers.
4253 This can only happens when called from flush_pagecache_blocks_int()
4254 when flushing blocks as part of prepare for maria_close() or from
4255 flush_cached_blocks()
4256 */
4257 block->status&= ~PCBLOCK_REASSIGNED;
4258 unreg_request(pagecache, block, 0);
4259
4260 /* All pending requests for this page must be resubmitted. */
4261 if (block->wqueue[COND_FOR_SAVED].last_thread)
4262 wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
4263 return 1;
4264 }
4265 unlink_hash(pagecache, block->hash_link);
4266 }
4267
4268 unlink_changed(block);
4269 DBUG_ASSERT(block->wlocks == 0);
4270 DBUG_ASSERT(block->rlocks == 0);
4271 DBUG_ASSERT(block->rlocks_queue == 0);
4272 DBUG_ASSERT(block->pins == 0);
4273 DBUG_ASSERT((block->status & ~(PCBLOCK_ERROR | PCBLOCK_READ | PCBLOCK_IN_FLUSH | PCBLOCK_CHANGED | PCBLOCK_REASSIGNED | PCBLOCK_DEL_WRITE)) == 0);
4274 DBUG_ASSERT(block->requests >= 1);
4275 DBUG_ASSERT(block->next_used == NULL);
4276 block->status= 0;
4277#ifdef DBUG_ASSERT_EXISTS
4278 block->type= PAGECACHE_EMPTY_PAGE;
4279#endif
4280 block->rec_lsn= LSN_MAX;
4281 block->hash_link= NULL;
4282 if (block->temperature == PCBLOCK_WARM)
4283 pagecache->warm_blocks--;
4284 block->temperature= PCBLOCK_COLD;
4285 KEYCACHE_THREAD_TRACE("free block");
4286 KEYCACHE_DBUG_PRINT("free_block",
4287 ("block is freed"));
4288 unreg_request(pagecache, block, 0);
4289
4290 /*
4291 Block->requests is != 0 if unreg_requests()/link_block() gave the block
4292 to a waiting thread
4293 */
4294 if (!block->requests)
4295 {
4296 DBUG_ASSERT(block->next_used != 0);
4297
4298 /* Remove the free block from the LRU ring. */
4299 unlink_block(pagecache, block);
4300 /* Insert the free block in the free list. */
4301 block->next_used= pagecache->free_block_list;
4302 pagecache->free_block_list= block;
4303 /* Keep track of the number of currently unused blocks. */
4304 pagecache->blocks_unused++;
4305 }
4306 else
4307 {
4308 /* keep flag set by link_block() */
4309 block->status= status & PCBLOCK_REASSIGNED;
4310 }
4311
4312 /* All pending requests for this page must be resubmitted. */
4313 if (block->wqueue[COND_FOR_SAVED].last_thread)
4314 wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
4315
4316 return 0;
4317}
4318
4319
4320static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b)
4321{
4322 return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 :
4323 ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0);
4324}
4325
4326
4327/**
4328 @brief Flush a portion of changed blocks to disk, free used blocks
4329 if requested
4330
4331 @param pagecache This page cache reference.
4332 @param file File which should be flushed
4333 @param cache Beginning of array of the block.
4334 @param end Reference to the block after last in the array.
4335 @param flush_type Type of the flush.
4336 @param first_errno Where to store first errno of the flush.
4337
4338
4339 @return Operation status
4340 @retval PCFLUSH_OK OK
4341 @retval PCFLUSH_ERROR There was errors during the flush process.
4342 @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4343 @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4344*/
4345
4346static int flush_cached_blocks(PAGECACHE *pagecache,
4347 PAGECACHE_FILE *file,
4348 PAGECACHE_BLOCK_LINK **cache,
4349 PAGECACHE_BLOCK_LINK **end,
4350 enum flush_type type,
4351 int *first_errno)
4352{
4353 int rc= PCFLUSH_OK;
4354 my_bool error;
4355 uint count= (uint) (end-cache);
4356 DBUG_ENTER("flush_cached_blocks");
4357 *first_errno= 0;
4358
4359 /* Don't lock the cache during the flush */
4360 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4361 /*
4362 As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH
4363 we are guaranteed that no thread will change them
4364 */
4365 qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
4366
4367 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4368 for (; cache != end; cache++)
4369 {
4370 PAGECACHE_BLOCK_LINK *block= *cache;
4371
4372 /*
4373 In the case of non_transactional tables we want to flush also
4374 block pinned with reads. This is becasue we may have other
4375 threads reading the block during flush, as non transactional
4376 tables can have many readers while the one writer is doing the
4377 flush.
4378 We don't want to do flush pinned blocks during checkpoint.
4379 We detect the checkpoint case by checking if type is LAZY.
4380 */
4381 if ((type == FLUSH_KEEP_LAZY && block->pins) || block->wlocks)
4382 {
4383 KEYCACHE_DBUG_PRINT("flush_cached_blocks",
4384 ("block: %u (%p) pinned",
4385 PCBLOCK_NUMBER(pagecache, block), block));
4386 DBUG_PRINT("info", ("block: %u (%p) pinned",
4387 PCBLOCK_NUMBER(pagecache, block), block));
4388 PCBLOCK_INFO(block);
4389 /* undo the mark put by flush_pagecache_blocks_int(): */
4390 block->status&= ~PCBLOCK_IN_FLUSH;
4391 rc|= PCFLUSH_PINNED;
4392 DBUG_PRINT("warning", ("Page pinned"));
4393 unreg_request(pagecache, block, 1);
4394 if (!*first_errno)
4395 *first_errno= HA_ERR_INTERNAL_ERROR;
4396 continue;
4397 }
4398 if (make_lock_and_pin(pagecache, block,
4399 PAGECACHE_LOCK_READ, PAGECACHE_PIN, FALSE))
4400 DBUG_ASSERT(0);
4401
4402 KEYCACHE_PRINT("flush_cached_blocks",
4403 ("block: %u (%p) to be flushed",
4404 PCBLOCK_NUMBER(pagecache, block), block));
4405 DBUG_PRINT("info", ("block: %u (%p) to be flushed",
4406 PCBLOCK_NUMBER(pagecache, block), block));
4407 PCBLOCK_INFO(block);
4408
4409 /**
4410 @todo IO If page is contiguous with next page to flush, group flushes
4411 in one single my_pwrite().
4412 */
4413 /**
4414 It is important to use block->hash_link->file below and not 'file', as
4415 the first one is right and the second may have different out-of-date
4416 content (see StaleFilePointersInFlush in ma_checkpoint.c).
4417 @todo change argument of functions to be File.
4418 */
4419 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4420 error= pagecache_fwrite(pagecache, &block->hash_link->file,
4421 block->buffer,
4422 block->hash_link->pageno,
4423 block->type,
4424 pagecache->readwrite_flags);
4425 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4426
4427 if (make_lock_and_pin(pagecache, block,
4428 PAGECACHE_LOCK_READ_UNLOCK,
4429 PAGECACHE_UNPIN, FALSE))
4430 DBUG_ASSERT(0);
4431
4432 pagecache->global_cache_write++;
4433 if (error)
4434 {
4435 block->status|= PCBLOCK_ERROR;
4436 block->error= (int16) my_errno;
4437 my_debug_put_break_here();
4438 if (!*first_errno)
4439 *first_errno= my_errno ? my_errno : -1;
4440 rc|= PCFLUSH_ERROR;
4441 }
4442 /*
4443 Let to proceed for possible waiting requests to write to the block page.
4444 It might happen only during an operation to resize the key cache.
4445 */
4446 if (block->wqueue[COND_FOR_SAVED].last_thread)
4447 wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
4448 /* type will never be FLUSH_IGNORE_CHANGED here */
4449 if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
4450 type == FLUSH_FORCE_WRITE))
4451 {
4452 if (!free_block(pagecache, block, 1))
4453 {
4454 pagecache->blocks_changed--;
4455 pagecache->global_blocks_changed--;
4456 }
4457 else
4458 {
4459 block->status&= ~PCBLOCK_IN_FLUSH;
4460 link_to_file_list(pagecache, block, file, 1);
4461 }
4462 }
4463 else
4464 {
4465 block->status&= ~PCBLOCK_IN_FLUSH;
4466 link_to_file_list(pagecache, block, file, 1);
4467 unreg_request(pagecache, block, 1);
4468 }
4469 }
4470 DBUG_RETURN(rc);
4471}
4472
4473
4474/**
4475 @brief flush all blocks for a file to disk but don't do any mutex locks
4476
4477 @param pagecache pointer to a pagecache data structure
4478 @param file handler for the file to flush to
4479 @param flush_type type of the flush
4480 @param filter optional function which tells what blocks to flush;
4481 can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
4482 or FLUSH_FORCE_WRITE.
4483 @param filter_arg an argument to pass to 'filter'. Information about
4484 the block will be passed too.
4485
4486 @note
4487 Flushes all blocks having the same OS file descriptor as 'file->file', so
4488 can flush blocks having '*block->hash_link->file' != '*file'.
4489
4490 @note
4491 This function doesn't do any mutex locks because it needs to be called
4492 both from flush_pagecache_blocks and flush_all_key_blocks (the later one
4493 does the mutex lock in the resize_pagecache() function).
4494
4495 @note
4496 This function can cause problems if two threads call it
4497 concurrently on the same file (look for "PageCacheFlushConcurrencyBugs"
4498 in ma_checkpoint.c); to avoid them, it has internal logic to serialize in
4499 this situation.
4500
4501 @return Operation status
4502 @retval PCFLUSH_OK OK
4503 @retval PCFLUSH_ERROR There was errors during the flush process.
4504 @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4505 @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4506*/
4507
4508static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
4509 PAGECACHE_FILE *file,
4510 enum flush_type type,
4511 PAGECACHE_FLUSH_FILTER filter,
4512 void *filter_arg)
4513{
4514 PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
4515 int last_errno= 0;
4516 int rc= PCFLUSH_OK;
4517 DBUG_ENTER("flush_pagecache_blocks_int");
4518 DBUG_PRINT("enter",
4519 ("fd: %d blocks_used: %zu blocks_changed: %zu type: %d",
4520 file->file, pagecache->blocks_used, pagecache->blocks_changed,
4521 type));
4522
4523#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
4524 DBUG_EXECUTE("check_pagecache",
4525 test_key_cache(pagecache,
4526 "start of flush_pagecache_blocks", 0););
4527#endif
4528
4529 cache= cache_buff;
4530 if (pagecache->disk_blocks > 0 &&
4531 (!my_disable_flush_pagecache_blocks ||
4532 (type != FLUSH_KEEP && type != FLUSH_KEEP_LAZY)))
4533 {
4534 /*
4535 Key cache exists. If my_disable_flush_pagecache_blocks is true it
4536 disables the operation but only FLUSH_KEEP[_LAZY]: other flushes still
4537 need to be allowed: FLUSH_RELEASE has to free blocks, and
4538 FLUSH_FORCE_WRITE is to overrule my_disable_flush_pagecache_blocks.
4539 */
4540 int error= 0;
4541 uint count= 0;
4542 PAGECACHE_BLOCK_LINK **pos, **end;
4543 PAGECACHE_BLOCK_LINK *first_in_switch= NULL;
4544 PAGECACHE_BLOCK_LINK *block, *next;
4545#if defined(PAGECACHE_DEBUG)
4546 uint cnt= 0;
4547#endif
4548
4549 struct st_file_in_flush us_flusher, *other_flusher;
4550 us_flusher.file= file->file;
4551 us_flusher.flush_queue.last_thread= NULL;
4552 us_flusher.first_in_switch= FALSE;
4553 while ((other_flusher= (struct st_file_in_flush *)
4554 my_hash_search(&pagecache->files_in_flush, (uchar *)&file->file,
4555 sizeof(file->file))))
4556 {
4557 /*
4558 File is in flush already: wait, unless FLUSH_KEEP_LAZY. "Flusher"
4559 means "who can mark PCBLOCK_IN_FLUSH", i.e. caller of
4560 flush_pagecache_blocks_int().
4561 */
4562 struct st_my_thread_var *thread;
4563 if (type == FLUSH_KEEP_LAZY)
4564 {
4565 DBUG_PRINT("info",("FLUSH_KEEP_LAZY skips"));
4566 DBUG_RETURN(0);
4567 }
4568 thread= my_thread_var;
4569 wqueue_add_to_queue(&other_flusher->flush_queue, thread);
4570 do
4571 {
4572 DBUG_PRINT("wait",
4573 ("(1) suspend thread %s %ld",
4574 thread->name, (ulong) thread->id));
4575 pagecache_pthread_cond_wait(&thread->suspend,
4576 &pagecache->cache_lock);
4577 }
4578 while (thread->next);
4579 }
4580 /* we are the only flusher of this file now */
4581 while (my_hash_insert(&pagecache->files_in_flush, (uchar *)&us_flusher))
4582 {
4583 /*
4584 Out of memory, wait for flushers to empty the hash and retry; should
4585 rarely happen. Other threads are flushing the file; when done, they
4586 are going to remove themselves from the hash, and thus memory will
4587 appear again. However, this memory may be stolen by yet another thread
4588 (for a purpose unrelated to page cache), before we retry
4589 my_hash_insert(). So the loop may run for long. Only if the thread was
4590 killed do we abort the loop, returning 1 (error) which can cause the
4591 table to be marked as corrupted (cf maria_chk_size(), maria_close())
4592 and thus require a table check.
4593 */
4594 DBUG_ASSERT(0);
4595 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4596 if (my_thread_var->abort)
4597 DBUG_RETURN(1); /* End if aborted by user */
4598 sleep(10);
4599 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4600 }
4601
4602 if (type != FLUSH_IGNORE_CHANGED)
4603 {
4604 /*
4605 Count how many key blocks we have to cache to be able
4606 to flush all dirty pages with minimum seek moves.
4607 */
4608 for (block= pagecache->changed_blocks[FILE_HASH(*file, pagecache)] ;
4609 block;
4610 block= block->next_changed)
4611 {
4612 if (block->hash_link->file.file == file->file)
4613 {
4614 count++;
4615 KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
4616 }
4617 }
4618 count++; /* Allocate one extra for easy end-of-buffer test */
4619 /* Allocate a new buffer only if its bigger than the one we have */
4620 if (count > FLUSH_CACHE &&
4621 !(cache=
4622 (PAGECACHE_BLOCK_LINK**)
4623 my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0))))
4624 {
4625 cache= cache_buff;
4626 count= FLUSH_CACHE;
4627 }
4628 }
4629
4630 /* Retrieve the blocks and write them to a buffer to be flushed */
4631restart:
4632 end= (pos= cache)+count;
4633 for (block= pagecache->changed_blocks[FILE_HASH(*file, pagecache)] ;
4634 block;
4635 block= next)
4636 {
4637#if defined(PAGECACHE_DEBUG)
4638 cnt++;
4639 KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4640#endif
4641 next= block->next_changed;
4642 if (block->hash_link->file.file != file->file)
4643 continue;
4644 if (filter != NULL)
4645 {
4646 int filter_res= (*filter)(block->type, block->hash_link->pageno,
4647 block->rec_lsn, filter_arg);
4648 DBUG_PRINT("info",("filter returned %d", filter_res));
4649 if (filter_res == FLUSH_FILTER_SKIP_TRY_NEXT)
4650 continue;
4651 if (filter_res == FLUSH_FILTER_SKIP_ALL)
4652 break;
4653 DBUG_ASSERT(filter_res == FLUSH_FILTER_OK);
4654 }
4655 {
4656 DBUG_ASSERT(!(block->status & PCBLOCK_IN_FLUSH));
4657 /*
4658 We care only for the blocks for which flushing was not
4659 initiated by other threads as a result of page swapping
4660 */
4661 if (! (block->status & PCBLOCK_IN_SWITCH))
4662 {
4663 /*
4664 Mark the block with BLOCK_IN_FLUSH in order not to let
4665 other threads to use it for new pages and interfere with
4666 our sequence of flushing dirty file pages
4667 */
4668 block->status|= PCBLOCK_IN_FLUSH;
4669
4670 reg_requests(pagecache, block, 1);
4671 if (type != FLUSH_IGNORE_CHANGED)
4672 {
4673 *pos++= block;
4674 /* It's not a temporary file */
4675 if (pos == end)
4676 {
4677 /*
4678 This happens only if there is not enough
4679 memory for the big block
4680 */
4681 if ((rc|= flush_cached_blocks(pagecache, file, cache,
4682 end, type, &error)) &
4683 (PCFLUSH_ERROR | PCFLUSH_PINNED))
4684 last_errno=error;
4685 DBUG_PRINT("info", ("restarting..."));
4686 /*
4687 Restart the scan as some other thread might have changed
4688 the changed blocks chain: the blocks that were in switch
4689 state before the flush started have to be excluded
4690 */
4691 goto restart;
4692 }
4693 }
4694 else
4695 {
4696 /* It's a temporary file */
4697 pagecache->blocks_changed--;
4698 pagecache->global_blocks_changed--;
4699 free_block(pagecache, block, 0);
4700 }
4701 }
4702 else if (type != FLUSH_KEEP_LAZY)
4703 {
4704 /*
4705 Link the block into a list of blocks 'in switch', and then we will
4706 wait for this list to be empty, which means they have been flushed
4707 */
4708 unlink_changed(block);
4709 link_changed(block, &first_in_switch);
4710 us_flusher.first_in_switch= TRUE;
4711 }
4712 }
4713 }
4714 if (pos != cache)
4715 {
4716 if ((rc|= flush_cached_blocks(pagecache, file, cache, pos, type,
4717 &error)) &
4718 (PCFLUSH_ERROR | PCFLUSH_PINNED))
4719 last_errno= error;
4720 }
4721 /* Wait until list of blocks in switch is empty */
4722 while (first_in_switch)
4723 {
4724#if defined(PAGECACHE_DEBUG)
4725 cnt= 0;
4726#endif
4727 block= first_in_switch;
4728 {
4729 struct st_my_thread_var *thread= my_thread_var;
4730 wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
4731 do
4732 {
4733 DBUG_PRINT("wait",
4734 ("(2) suspend thread %s %ld",
4735 thread->name, (ulong) thread->id));
4736 pagecache_pthread_cond_wait(&thread->suspend,
4737 &pagecache->cache_lock);
4738 }
4739 while (thread->next);
4740 }
4741#if defined(PAGECACHE_DEBUG)
4742 cnt++;
4743 KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4744#endif
4745 }
4746 us_flusher.first_in_switch= FALSE;
4747 /* The following happens very seldom */
4748 if (! (type == FLUSH_KEEP || type == FLUSH_KEEP_LAZY ||
4749 type == FLUSH_FORCE_WRITE))
4750 {
4751 /*
4752 this code would free all blocks while filter maybe handled only a
4753 few, that is not possible.
4754 */
4755 DBUG_ASSERT(filter == NULL);
4756#if defined(PAGECACHE_DEBUG)
4757 cnt=0;
4758#endif
4759 for (block= pagecache->file_blocks[FILE_HASH(*file, pagecache)] ;
4760 block;
4761 block= next)
4762 {
4763#if defined(PAGECACHE_DEBUG)
4764 cnt++;
4765 KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
4766#endif
4767 next= block->next_changed;
4768 if (block->hash_link->file.file == file->file &&
4769 !block->pins &&
4770 (! (block->status & PCBLOCK_CHANGED)
4771 || type == FLUSH_IGNORE_CHANGED))
4772 {
4773 reg_requests(pagecache, block, 1);
4774 free_block(pagecache, block, 1);
4775 }
4776 }
4777 }
4778 /* wake up others waiting to flush this file */
4779 my_hash_delete(&pagecache->files_in_flush, (uchar *)&us_flusher);
4780 if (us_flusher.flush_queue.last_thread)
4781 wqueue_release_queue(&us_flusher.flush_queue);
4782 }
4783
4784 DBUG_EXECUTE("check_pagecache",
4785 test_key_cache(pagecache, "end of flush_pagecache_blocks", 0););
4786 if (cache != cache_buff)
4787 my_free(cache);
4788 if (rc != 0)
4789 {
4790 if (last_errno)
4791 my_errno= last_errno; /* Return first error */
4792 DBUG_PRINT("error", ("Got error: %d", my_errno));
4793 }
4794 DBUG_RETURN(rc);
4795}
4796
4797
4798/**
4799 @brief flush all blocks for a file to disk
4800
4801 @param pagecache pointer to a pagecache data structure
4802 @param file handler for the file to flush to
4803 @param flush_type type of the flush
4804 @param filter optional function which tells what blocks to flush;
4805 can be non-NULL only if FLUSH_KEEP, FLUSH_KEEP_LAZY
4806 or FLUSH_FORCE_WRITE.
4807 @param filter_arg an argument to pass to 'filter'. Information about
4808 the block will be passed too.
4809
4810 @return Operation status
4811 @retval PCFLUSH_OK OK
4812 @retval PCFLUSH_ERROR There was errors during the flush process.
4813 @retval PCFLUSH_PINNED Pinned blocks was met and skipped.
4814 @retval PCFLUSH_PINNED_AND_ERROR PCFLUSH_ERROR and PCFLUSH_PINNED.
4815*/
4816
4817int flush_pagecache_blocks_with_filter(PAGECACHE *pagecache,
4818 PAGECACHE_FILE *file,
4819 enum flush_type type,
4820 PAGECACHE_FLUSH_FILTER filter,
4821 void *filter_arg)
4822{
4823 int res;
4824 DBUG_ENTER("flush_pagecache_blocks_with_filter");
4825 DBUG_PRINT("enter", ("pagecache: %p", pagecache));
4826
4827 if (pagecache->disk_blocks <= 0)
4828 DBUG_RETURN(0);
4829 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4830 inc_counter_for_resize_op(pagecache);
4831 res= flush_pagecache_blocks_int(pagecache, file, type, filter, filter_arg);
4832 dec_counter_for_resize_op(pagecache);
4833 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
4834 DBUG_RETURN(res);
4835}
4836
4837
4838/*
4839 Reset the counters of a key cache.
4840
4841 SYNOPSIS
4842 reset_pagecache_counters()
4843 name the name of a key cache
4844 pagecache pointer to the pagecache to be reset
4845
4846 DESCRIPTION
4847 This procedure is used to reset the counters of all currently used key
4848 caches, both the default one and the named ones.
4849
4850 RETURN
4851 0 on success (always because it can't fail)
4852*/
4853
4854int reset_pagecache_counters(const char *name __attribute__((unused)),
4855 PAGECACHE *pagecache)
4856{
4857 DBUG_ENTER("reset_pagecache_counters");
4858 if (!pagecache->inited)
4859 {
4860 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4861 DBUG_RETURN(0);
4862 }
4863 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4864
4865 pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4866 pagecache->global_cache_r_requests= 0; /* Key_read_requests */
4867 pagecache->global_cache_read= 0; /* Key_reads */
4868 pagecache->global_cache_w_requests= 0; /* Key_write_requests */
4869 pagecache->global_cache_write= 0; /* Key_writes */
4870 DBUG_RETURN(0);
4871}
4872
4873
4874/**
4875 @brief Allocates a buffer and stores in it some info about all dirty pages
4876
4877 Does the allocation because the caller cannot know the size itself.
4878 Memory freeing is to be done by the caller (if the "str" member of the
4879 LEX_STRING is not NULL).
4880 Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
4881 are not interesting for a checkpoint record.
4882 The caller has the intention of doing checkpoints.
4883
4884 @param pagecache pointer to the page cache
4885 @param[out] str pointer to where the allocated buffer, and
4886 its size, will be put
4887 @param[out] min_rec_lsn pointer to where the minimum rec_lsn of all
4888 relevant dirty pages will be put
4889 @return Operation status
4890 @retval 0 OK
4891 @retval 1 Error
4892*/
4893
4894my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
4895 LEX_STRING *str,
4896 LSN *min_rec_lsn)
4897{
4898 my_bool error= 0;
4899 size_t stored_list_size= 0;
4900 uint file_hash;
4901 char *ptr;
4902 LSN minimum_rec_lsn= LSN_MAX;
4903 DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
4904
4905 DBUG_ASSERT(NULL == str->str);
4906 /*
4907 We lock the entire cache but will be quick, just reading/writing a few MBs
4908 of memory at most.
4909 */
4910 pagecache_pthread_mutex_lock(&pagecache->cache_lock);
4911 for (;;)
4912 {
4913 struct st_file_in_flush *other_flusher;
4914 for (file_hash= 0;
4915 (other_flusher= (struct st_file_in_flush *)
4916 my_hash_element(&pagecache->files_in_flush, file_hash)) != NULL &&
4917 !other_flusher->first_in_switch;
4918 file_hash++)
4919 {}
4920 if (other_flusher == NULL)
4921 break;
4922 /*
4923 other_flusher.first_in_switch is true: some thread is flushing a file
4924 and has removed dirty blocks from changed_blocks[] while they were still
4925 dirty (they were being evicted (=>flushed) by yet another thread, which
4926 may not have flushed the block yet so it may still be dirty).
4927 If Checkpoint proceeds now, it will not see the page. If there is a
4928 crash right after writing the checkpoint record, before the page is
4929 flushed, at recovery the page will be wrongly ignored because it won't
4930 be in the dirty pages list in the checkpoint record. So wait.
4931 */
4932 {
4933 struct st_my_thread_var *thread= my_thread_var;
4934 wqueue_add_to_queue(&other_flusher->flush_queue, thread);
4935 do
4936 {
4937 DBUG_PRINT("wait",
4938 ("suspend thread %s %ld", thread->name,
4939 (ulong) thread->id));
4940 pagecache_pthread_cond_wait(&thread->suspend,
4941 &pagecache->cache_lock);
4942 }
4943 while (thread->next);
4944 }
4945 }
4946
4947 /* Count how many dirty pages are interesting */
4948 for (file_hash= 0; file_hash < pagecache->changed_blocks_hash_size; file_hash++)
4949 {
4950 PAGECACHE_BLOCK_LINK *block;
4951 for (block= pagecache->changed_blocks[file_hash] ;
4952 block;
4953 block= block->next_changed)
4954 {
4955 /*
4956 Q: is there something subtle with block->hash_link: can it be NULL?
4957 does it have to be == hash_link->block... ?
4958 */
4959 DBUG_ASSERT(block->hash_link != NULL);
4960 DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
4961 /*
4962 Note that we don't store bitmap pages, or pages from non-transactional
4963 (like temporary) tables. Don't checkpoint during Recovery which uses
4964 PAGECACHE_PLAIN_PAGE.
4965 */
4966 if (block->type != PAGECACHE_LSN_PAGE)
4967 continue; /* no need to store it */
4968 stored_list_size++;
4969 }
4970 }
4971
4972 compile_time_assert(sizeof(pagecache->blocks) <= 8);
4973 str->length= 8 + /* number of dirty pages */
4974 (2 + /* table id */
4975 1 + /* data or index file */
4976 5 + /* pageno */
4977 LSN_STORE_SIZE /* rec_lsn */
4978 ) * stored_list_size;
4979 if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
4980 goto err;
4981 ptr= str->str;
4982 int8store(ptr, (ulonglong)stored_list_size);
4983 ptr+= 8;
4984 DBUG_PRINT("info", ("found %zu dirty pages", stored_list_size));
4985 if (stored_list_size == 0)
4986 goto end;
4987 for (file_hash= 0; file_hash < pagecache->changed_blocks_hash_size; file_hash++)
4988 {
4989 PAGECACHE_BLOCK_LINK *block;
4990 for (block= pagecache->changed_blocks[file_hash] ;
4991 block;
4992 block= block->next_changed)
4993 {
4994 uint16 table_id;
4995 MARIA_SHARE *share;
4996 if (block->type != PAGECACHE_LSN_PAGE)
4997 continue; /* no need to store it in the checkpoint record */
4998 share= (MARIA_SHARE *)(block->hash_link->file.callback_data);
4999 table_id= share->id;
5000 int2store(ptr, table_id);
5001 ptr+= 2;
5002 ptr[0]= (share->kfile.file == block->hash_link->file.file);
5003 ptr++;
5004 DBUG_ASSERT(block->hash_link->pageno < ((1ULL) << 40));
5005 page_store(ptr, block->hash_link->pageno);
5006 ptr+= PAGE_STORE_SIZE;
5007 lsn_store(ptr, block->rec_lsn);
5008 ptr+= LSN_STORE_SIZE;
5009 if (block->rec_lsn != LSN_MAX)
5010 {
5011 DBUG_ASSERT(LSN_VALID(block->rec_lsn));
5012 if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0)
5013 minimum_rec_lsn= block->rec_lsn;
5014 } /* otherwise, some trn->rec_lsn should hold the correct info */
5015 }
5016 }
5017end:
5018 pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
5019 *min_rec_lsn= minimum_rec_lsn;
5020 DBUG_RETURN(error);
5021
5022err:
5023 error= 1;
5024 goto end;
5025}
5026
5027
5028#ifndef DBUG_OFF
5029
5030/**
5031 Verifies that a file has no dirty pages.
5032*/
5033
5034void pagecache_file_no_dirty_page(PAGECACHE *pagecache, PAGECACHE_FILE *file)
5035{
5036 File fd= file->file;
5037 PAGECACHE_BLOCK_LINK *block;
5038 for (block= pagecache->changed_blocks[FILE_HASH(*file, pagecache)];
5039 block != NULL;
5040 block= block->next_changed)
5041 if (block->hash_link->file.file == fd)
5042 {
5043 DBUG_PRINT("info", ("pagecache_file_not_in error"));
5044 PCBLOCK_INFO(block);
5045 DBUG_ASSERT(0);
5046 }
5047}
5048
5049
5050/*
5051 Test if disk-cache is ok
5052*/
5053static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
5054 const char *where __attribute__((unused)),
5055 my_bool lock __attribute__((unused)))
5056{
5057 /* TODO */
5058}
5059#endif
5060
5061uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block)
5062{
5063 return block->buffer;
5064}
5065
5066#if defined(PAGECACHE_TIMEOUT)
5067
5068#define KEYCACHE_DUMP_FILE "pagecache_dump.txt"
5069#define MAX_QUEUE_LEN 100
5070
5071
5072static void pagecache_dump(PAGECACHE *pagecache)
5073{
5074 FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
5075 struct st_my_thread_var *last;
5076 struct st_my_thread_var *thread;
5077 PAGECACHE_BLOCK_LINK *block;
5078 PAGECACHE_HASH_LINK *hash_link;
5079 PAGECACHE_PAGE *page;
5080 uint i;
5081
5082 fprintf(pagecache_dump_file, "thread: %s %ld\n", thread->name,
5083 (ulong) thread->id);
5084
5085 i=0;
5086 thread=last=waiting_for_hash_link.last_thread;
5087 fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n");
5088 if (thread)
5089 do
5090 {
5091 thread= thread->next;
5092 page= (PAGECACHE_PAGE *) thread->keycache_link;
5093 fprintf(pagecache_dump_file,
5094 "thread: %s %ld, (file,pageno)=(%u,%lu)\n",
5095 thread->name, (ulong) thread->id,
5096 (uint) page->file.file,(ulong) page->pageno);
5097 if (++i == MAX_QUEUE_LEN)
5098 break;
5099 }
5100 while (thread != last);
5101
5102 i=0;
5103 thread=last=waiting_for_block.last_thread;
5104 fprintf(pagecache_dump_file, "queue of threads waiting for block\n");
5105 if (thread)
5106 do
5107 {
5108 thread=thread->next;
5109 hash_link= (PAGECACHE_HASH_LINK *) thread->keycache_link;
5110 fprintf(pagecache_dump_file,
5111 "thread: %s %u hash_link:%u (file,pageno)=(%u,%lu)\n",
5112 thread->name, (ulong) thread->id,
5113 (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
5114 (uint) hash_link->file.file,(ulong) hash_link->pageno);
5115 if (++i == MAX_QUEUE_LEN)
5116 break;
5117 }
5118 while (thread != last);
5119
5120 for (i=0 ; i < pagecache->blocks_used ; i++)
5121 {
5122 int j;
5123 block= &pagecache->block_root[i];
5124 hash_link= block->hash_link;
5125 fprintf(pagecache_dump_file,
5126 "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
5127 i, (int) (hash_link ?
5128 PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) :
5129 -1),
5130 block->status, block->requests, block->condvar ? 1 : 0);
5131 for (j=0 ; j < COND_SIZE; j++)
5132 {
5133 PAGECACHE_WQUEUE *wqueue=&block->wqueue[j];
5134 thread= last= wqueue->last_thread;
5135 fprintf(pagecache_dump_file, "queue #%d\n", j);
5136 if (thread)
5137 {
5138 do
5139 {
5140 thread=thread->next;
5141 fprintf(pagecache_dump_file,
5142 "thread: %s %ld\n", thread->name, (ulong) thread->id);
5143 if (++i == MAX_QUEUE_LEN)
5144 break;
5145 }
5146 while (thread != last);
5147 }
5148 }
5149 }
5150 fprintf(pagecache_dump_file, "LRU chain:");
5151 block= pagecache= used_last;
5152 if (block)
5153 {
5154 do
5155 {
5156 block= block->next_used;
5157 fprintf(pagecache_dump_file,
5158 "block:%u, ", PCBLOCK_NUMBER(pagecache, block));
5159 }
5160 while (block != pagecache->used_last);
5161 }
5162 fprintf(pagecache_dump_file, "\n");
5163
5164 fclose(pagecache_dump_file);
5165}
5166
5167#endif /* defined(PAGECACHE_TIMEOUT) */
5168
5169#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
5170
5171
5172static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
5173 mysql_mutex_t *mutex)
5174{
5175 int rc;
5176 struct timeval now; /* time when we started waiting */
5177 struct timespec timeout; /* timeout value for the wait function */
5178 struct timezone tz;
5179#if defined(PAGECACHE_DEBUG)
5180 int cnt=0;
5181#endif
5182
5183 /* Get current time */
5184 gettimeofday(&now, &tz);
5185 /* Prepare timeout value */
5186 timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT;
5187 /*
5188 timeval uses microseconds.
5189 timespec uses nanoseconds.
5190 1 nanosecond = 1000 micro seconds
5191 */
5192 timeout.tv_nsec= now.tv_usec * 1000;
5193 KEYCACHE_THREAD_TRACE_END("started waiting");
5194#if defined(PAGECACHE_DEBUG)
5195 cnt++;
5196 if (cnt % 100 == 0)
5197 fprintf(pagecache_debug_log, "waiting...\n");
5198 fflush(pagecache_debug_log);
5199#endif
5200 rc= mysql_cond_timedwait(cond, mutex, &timeout);
5201 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
5202 if (rc == ETIMEDOUT || rc == ETIME)
5203 {
5204#if defined(PAGECACHE_DEBUG)
5205 fprintf(pagecache_debug_log,"aborted by pagecache timeout\n");
5206 fclose(pagecache_debug_log);
5207 abort();
5208#endif
5209 pagecache_dump();
5210 }
5211
5212#if defined(PAGECACHE_DEBUG)
5213 KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
5214#else
5215 assert(rc != ETIMEDOUT);
5216#endif
5217 return rc;
5218}
5219#else
5220#if defined(PAGECACHE_DEBUG)
5221static int pagecache_pthread_cond_wait(mysql_cond_t *cond,
5222 mysql_mutex_t *mutex)
5223{
5224 int rc;
5225 KEYCACHE_THREAD_TRACE_END("started waiting");
5226 rc= mysql_cond_wait(cond, mutex);
5227 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
5228 return rc;
5229}
5230#endif
5231#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
5232
5233#if defined(PAGECACHE_DEBUG)
5234static int ___pagecache_pthread_mutex_lock(mysql_mutex_t *mutex)
5235{
5236 int rc;
5237 rc= mysql_mutex_lock(mutex);
5238 KEYCACHE_THREAD_TRACE_BEGIN("");
5239 return rc;
5240}
5241
5242
5243static void ___pagecache_pthread_mutex_unlock(mysql_mutex_t *mutex)
5244{
5245 KEYCACHE_THREAD_TRACE_END("");
5246 mysql_mutex_unlock(mutex);
5247}
5248
5249
5250static int ___pagecache_pthread_cond_signal(mysql_cond_t *cond)
5251{
5252 int rc;
5253 KEYCACHE_THREAD_TRACE("signal");
5254 rc= mysql_cond_signal(cond);
5255 return rc;
5256}
5257
5258
5259#if defined(PAGECACHE_DEBUG_LOG)
5260
5261
5262static void pagecache_debug_print(const char * fmt, ...)
5263{
5264 va_list args;
5265 va_start(args,fmt);
5266 if (pagecache_debug_log)
5267 {
5268 VOID(vfprintf(pagecache_debug_log, fmt, args));
5269 VOID(fputc('\n',pagecache_debug_log));
5270 }
5271 va_end(args);
5272}
5273#endif /* defined(PAGECACHE_DEBUG_LOG) */
5274
5275#if defined(PAGECACHE_DEBUG_LOG)
5276
5277
5278void pagecache_debug_log_close(void)
5279{
5280 if (pagecache_debug_log)
5281 fclose(pagecache_debug_log);
5282}
5283#endif /* defined(PAGECACHE_DEBUG_LOG) */
5284
5285#endif /* defined(PAGECACHE_DEBUG) */
5286
5287/**
5288 @brief null hooks
5289*/
5290
5291static my_bool null_pre_hook(PAGECACHE_IO_HOOK_ARGS *args
5292 __attribute__((unused)))
5293{
5294 return 0;
5295}
5296
5297static my_bool null_post_read_hook(int res, PAGECACHE_IO_HOOK_ARGS *args
5298 __attribute__((unused)))
5299{
5300 return res != 0;
5301}
5302
5303static void null_post_write_hook(int res __attribute__((unused)),
5304 PAGECACHE_IO_HOOK_ARGS *args
5305 __attribute__((unused)))
5306{
5307 return;
5308}
5309
5310void
5311pagecache_file_set_null_hooks(PAGECACHE_FILE *file)
5312{
5313 file->pre_read_hook= null_pre_hook;
5314 file->post_read_hook= null_post_read_hook;
5315 file->pre_write_hook= null_pre_hook;
5316 file->post_write_hook= null_post_write_hook;
5317 file->flush_log_callback= null_pre_hook;
5318 file->callback_data= NULL;
5319}
5320