1/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2 Copyright (c) 2017, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17
18
19/**
20 @file
21 The file contains the following modules:
22
23 Simple Key Cache Module
24
25 Partitioned Key Cache Module
26
27 Key Cache Interface Module
28
29*/
30
31#include "mysys_priv.h"
32#include "mysys_err.h"
33#include <keycache.h>
34#include "my_static.h"
35#include <m_string.h>
36#include <my_bit.h>
37#include <errno.h>
38#include <stdarg.h>
39#include "probes_mysql.h"
40
41/******************************************************************************
42 Simple Key Cache Module
43
44 The module contains implementations of all key cache interface functions
45 employed by partitioned key caches.
46
47******************************************************************************/
48
49/*
50 These functions handle keyblock cacheing for ISAM and MyISAM tables.
51
52 One cache can handle many files.
53 It must contain buffers of the same blocksize.
54
55 init_key_cache() should be used to init cache handler.
56
57 The free list (free_block_list) is a stack like structure.
58 When a block is freed by free_block(), it is pushed onto the stack.
59 When a new block is required it is first tried to pop one from the stack.
60 If the stack is empty, it is tried to get a never-used block from the pool.
61 If this is empty too, then a block is taken from the LRU ring, flushing it
62 to disk, if necessary. This is handled in find_key_block().
63 With the new free list, the blocks can have three temperatures:
64 hot, warm and cold (which is free). This is remembered in the block header
65 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
66 temperature is necessary to correctly count the number of warm blocks,
67 which is required to decide when blocks are allowed to become hot. Whenever
68 a block is inserted to another (sub-)chain, we take the old and new
69 temperature into account to decide if we got one more or less warm block.
70 blocks_unused is the sum of never used blocks in the pool and of currently
71 free blocks. blocks_used is the number of blocks fetched from the pool and
72 as such gives the maximum number of in-use blocks at any time.
73
74 Key Cache Locking
75 =================
76
77 All key cache locking is done with a single mutex per key cache:
78 keycache->cache_lock. This mutex is locked almost all the time
79 when executing code in this file (mf_keycache.c).
80 However it is released for I/O and some copy operations.
81
82 The cache_lock is also released when waiting for some event. Waiting
83 and signalling is done via condition variables. In most cases the
84 thread waits on its thread->suspend condition variable. Every thread
85 has a my_thread_var structure, which contains this variable and a
86 '*next' and '**prev' pointer. These pointers are used to insert the
87 thread into a wait queue.
88
89 A thread can wait for one block and thus be in one wait queue at a
90 time only.
91
92 Before starting to wait on its condition variable with
93 mysql_cond_wait(), the thread enters itself to a specific wait queue
94 with link_into_queue() (double linked with '*next' + '**prev') or
95 wait_on_queue() (single linked with '*next').
96
97 Another thread, when releasing a resource, looks up the waiting thread
98 in the related wait queue. It sends a signal with
99 mysql_cond_signal() to the waiting thread.
100
101 NOTE: Depending on the particular wait situation, either the sending
102 thread removes the waiting thread from the wait queue with
103 unlink_from_queue() or release_whole_queue() respectively, or the waiting
104 thread removes itself.
105
106 There is one exception from this locking scheme when one thread wants
107 to reuse a block for some other address. This works by first marking
108 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
109 threads that are reading the block to finish. Each block has a
110 reference to a condition variable (condvar). It holds a reference to
111 the thread->suspend condition variable for the waiting thread (if such
112 a thread exists). When that thread is signaled, the reference is
113 cleared. The number of readers of a block is registered in
114 block->hash_link->requests. See wait_for_readers() / remove_reader()
115 for details. This is similar to the above, but it clearly means that
116 only one thread can wait for a particular block. There is no queue in
117 this case. Strangely enough block->convar is used for waiting for the
118 assigned hash_link only. More precisely it is used to wait for all
119 requests to be unregistered from the assigned hash_link.
120
121 The resize_queue serves two purposes:
122 1. Threads that want to do a resize wait there if in_resize is set.
123 This is not used in the server. The server refuses a second resize
124 request if one is already active. keycache->in_init is used for the
125 synchronization. See set_var.cc.
126 2. Threads that want to access blocks during resize wait here during
127 the re-initialization phase.
128 When the resize is done, all threads on the queue are signalled.
129 Hypothetical resizers can compete for resizing, and read/write
130 requests will restart to request blocks from the freshly resized
131 cache. If the cache has been resized too small, it is disabled and
132 'can_be_used' is false. In this case read/write requests bypass the
133 cache. Since they increment and decrement 'cnt_for_resize_op', the
134 next resizer can wait on the queue 'waiting_for_resize_cnt' until all
135 I/O finished.
136*/
137
138/* declare structures that is used by st_key_cache */
139
140struct st_block_link;
141typedef struct st_block_link BLOCK_LINK;
142struct st_keycache_page;
143typedef struct st_keycache_page KEYCACHE_PAGE;
144struct st_hash_link;
145typedef struct st_hash_link HASH_LINK;
146
147/* info about requests in a waiting queue */
148typedef struct st_keycache_wqueue
149{
150 struct st_my_thread_var *last_thread; /* circular list of waiting threads */
151} KEYCACHE_WQUEUE;
152
153/* Default size of hash for changed files */
154#define MIN_CHANGED_BLOCKS_HASH_SIZE 128
155
156/* Control block for a simple (non-partitioned) key cache */
157
158typedef struct st_simple_key_cache_cb
159{
160 my_bool key_cache_inited; /* <=> control block is allocated */
161 my_bool in_resize; /* true during resize operation */
162 my_bool resize_in_flush; /* true during flush of resize operation */
163 my_bool can_be_used; /* usage of cache for read/write is allowed */
164 size_t key_cache_mem_size; /* specified size of the cache memory */
165 uint key_cache_block_size; /* size of the page buffer of a cache block */
166 ulong min_warm_blocks; /* min number of warm blocks; */
167 ulong age_threshold; /* age threshold for hot blocks */
168 ulonglong keycache_time; /* total number of block link operations */
169 uint hash_entries; /* max number of entries in the hash table */
170 uint changed_blocks_hash_size; /* Number of hash buckets for file blocks */
171 int hash_links; /* max number of hash links */
172 int hash_links_used; /* number of hash links currently used */
173 int disk_blocks; /* max number of blocks in the cache */
174 ulong blocks_used; /* maximum number of concurrently used blocks */
175 ulong blocks_unused; /* number of currently unused blocks */
176 ulong blocks_changed; /* number of currently dirty blocks */
177 ulong warm_blocks; /* number of blocks in warm sub-chain */
178 ulong cnt_for_resize_op; /* counter to block resize operation */
179 long blocks_available; /* number of blocks available in the LRU chain */
180 HASH_LINK **hash_root; /* arr. of entries into hash table buckets */
181 HASH_LINK *hash_link_root; /* memory for hash table links */
182 HASH_LINK *free_hash_list; /* list of free hash links */
183 BLOCK_LINK *free_block_list; /* list of free blocks */
184 BLOCK_LINK *block_root; /* memory for block links */
185 uchar *block_mem; /* memory for block buffers */
186 BLOCK_LINK *used_last; /* ptr to the last block of the LRU chain */
187 BLOCK_LINK *used_ins; /* ptr to the insertion block in LRU chain */
188 mysql_mutex_t cache_lock; /* to lock access to the cache structure */
189 KEYCACHE_WQUEUE resize_queue; /* threads waiting during resize operation */
190 /*
191 Waiting for a zero resize count. Using a queue for symmetry though
192 only one thread can wait here.
193 */
194 KEYCACHE_WQUEUE waiting_for_resize_cnt;
195 KEYCACHE_WQUEUE waiting_for_hash_link; /* waiting for a free hash link */
196 KEYCACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */
197 BLOCK_LINK **changed_blocks; /* hash for dirty file bl.*/
198 BLOCK_LINK **file_blocks; /* hash for other file bl.*/
199
200 /* Statistics variables. These are reset in reset_key_cache_counters(). */
201 ulong global_blocks_changed; /* number of currently dirty blocks */
202 ulonglong global_cache_w_requests;/* number of write requests (write hits) */
203 ulonglong global_cache_write; /* number of writes from cache to files */
204 ulonglong global_cache_r_requests;/* number of read requests (read hits) */
205 ulonglong global_cache_read; /* number of reads from files to cache */
206
207 int blocks; /* max number of blocks in the cache */
208 uint hash_factor; /* factor used to calculate hash function */
209 my_bool in_init; /* Set to 1 in MySQL during init/resize */
210} SIMPLE_KEY_CACHE_CB;
211
212/*
213 Some compilation flags have been added specifically for this module
214 to control the following:
215 - not to let a thread to yield the control when reading directly
216 from key cache, which might improve performance in many cases;
217 to enable this add:
218 #define SERIALIZED_READ_FROM_CACHE
219 - to set an upper bound for number of threads simultaneously
220 using the key cache; this setting helps to determine an optimal
221 size for hash table and improve performance when the number of
222 blocks in the key cache much less than the number of threads
223 accessing it;
224 to set this number equal to <N> add
225 #define MAX_THREADS <N>
226 - to substitute calls of mysql_cond_wait for calls of
227 mysql_cond_timedwait (wait with timeout set up);
228 this setting should be used only when you want to trap a deadlock
229 situation, which theoretically should not happen;
230 to set timeout equal to <T> seconds add
231 #define KEYCACHE_TIMEOUT <T>
232 - to enable the module traps and to send debug information from
233 key cache module to a special debug log add:
234 #define KEYCACHE_DEBUG
235 the name of this debug log file <LOG NAME> can be set through:
236 #define KEYCACHE_DEBUG_LOG <LOG NAME>
237 if the name is not defined, it's set by default;
238 if the KEYCACHE_DEBUG flag is not set up and we are in a debug
239 mode, i.e. when ! defined(DBUG_OFF), the debug information from the
240 module is sent to the regular debug log.
241
242 Example of the settings:
243 #define SERIALIZED_READ_FROM_CACHE
244 #define MAX_THREADS 100
245 #define KEYCACHE_TIMEOUT 1
246 #define KEYCACHE_DEBUG
247 #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
248*/
249
250#define STRUCT_PTR(TYPE, MEMBER, a) \
251 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
252
253/* types of condition variables */
254#define COND_FOR_REQUESTED 0
255#define COND_FOR_SAVED 1
256#define COND_FOR_READERS 2
257
258typedef mysql_cond_t KEYCACHE_CONDVAR;
259
260/* descriptor of the page in the key cache block buffer */
261struct st_keycache_page
262{
263 int file; /* file to which the page belongs to */
264 my_off_t filepos; /* position of the page in the file */
265};
266
267/* element in the chain of a hash table bucket */
268struct st_hash_link
269{
270 struct st_hash_link *next, **prev; /* to connect links in the same bucket */
271 struct st_block_link *block; /* reference to the block for the page: */
272 File file; /* from such a file */
273 my_off_t diskpos; /* with such an offset */
274 uint requests; /* number of requests for the page */
275};
276
277/* simple states of a block */
278#define BLOCK_ERROR 1U/* an error occurred when performing file i/o */
279#define BLOCK_READ 2U/* file block is in the block buffer */
280#define BLOCK_IN_SWITCH 4U/* block is preparing to read new page */
281#define BLOCK_REASSIGNED 8U/* blk does not accept requests for old page */
282#define BLOCK_IN_FLUSH 16U/* block is selected for flush */
283#define BLOCK_CHANGED 32U/* block buffer contains a dirty page */
284#define BLOCK_IN_USE 64U/* block is not free */
285#define BLOCK_IN_EVICTION 128U/* block is selected for eviction */
286#define BLOCK_IN_FLUSHWRITE 256U/* block is in write to file */
287#define BLOCK_FOR_UPDATE 512U/* block is selected for buffer modification */
288
289/* page status, returned by find_key_block */
290#define PAGE_READ 0
291#define PAGE_TO_BE_READ 1
292#define PAGE_WAIT_TO_BE_READ 2
293
294/* block temperature determines in which (sub-)chain the block currently is */
295enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
296
297/* key cache block */
298struct st_block_link
299{
300 struct st_block_link
301 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
302 struct st_block_link
303 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
304 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
305 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
306 uint requests; /* number of requests for the block */
307 uchar *buffer; /* buffer for the block page */
308 uint offset; /* beginning of modified data in the buffer */
309 uint length; /* end of data in the buffer */
310 uint status; /* state of the block */
311 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
312 uint hits_left; /* number of hits left until promotion */
313 ulonglong last_hit_time; /* timestamp of the last hit */
314 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
315};
316
317KEY_CACHE dflt_key_cache_var;
318KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
319
320#define FLUSH_CACHE 2000 /* sort this many blocks at once */
321
322static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache);
323static void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup);
324static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
325 mysql_mutex_t *mutex);
326static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
327static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block);
328#ifndef DBUG_OFF
329static void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
330 const char *where, my_bool lock);
331#endif
332#define KEYCACHE_BASE_EXPR(f, pos) \
333 ((ulong) ((pos) / keycache->key_cache_block_size) + (ulong) (f))
334#define KEYCACHE_HASH(f, pos) \
335 ((KEYCACHE_BASE_EXPR(f, pos) / keycache->hash_factor) & \
336 (keycache->hash_entries-1))
337#define FILE_HASH(f, cache) ((uint) (f) & (cache->changed_blocks_hash_size-1))
338
339#define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
340
341#if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
342#define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
343#endif
344
345#if defined(KEYCACHE_DEBUG_LOG)
346static FILE *keycache_debug_log=NULL;
347static void keycache_debug_print(const char *fmt,...);
348#define KEYCACHE_DEBUG_OPEN \
349 if (!keycache_debug_log) \
350 { \
351 keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
352 (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
353 }
354
355#define KEYCACHE_DEBUG_CLOSE \
356 if (keycache_debug_log) \
357 { \
358 fclose(keycache_debug_log); \
359 keycache_debug_log= 0; \
360 }
361#else
362#define KEYCACHE_DEBUG_OPEN
363#define KEYCACHE_DEBUG_CLOSE
364#endif /* defined(KEYCACHE_DEBUG_LOG) */
365
366#if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
367#define KEYCACHE_DBUG_PRINT(l, m) \
368 { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
369 keycache_debug_print m; }
370
371#define KEYCACHE_DBUG_ASSERT(a) \
372 { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
373 assert(a); }
374#else
375#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
376#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
377#endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
378
379#if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
380static long keycache_thread_id;
381#define KEYCACHE_THREAD_TRACE(l) \
382 KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
383
384#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
385 { struct st_my_thread_var *thread_var= my_thread_var; \
386 keycache_thread_id= thread_var->id; \
387 KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
388
389#define KEYCACHE_THREAD_TRACE_END(l) \
390 KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
391#else
392#define KEYCACHE_THREAD_TRACE_BEGIN(l)
393#define KEYCACHE_THREAD_TRACE_END(l)
394#define KEYCACHE_THREAD_TRACE(l)
395#endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
396
397#define BLOCK_NUMBER(b) \
398 ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
399#define HASH_LINK_NUMBER(h) \
400 ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
401
402#if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
403static int keycache_pthread_cond_wait(mysql_cond_t *cond,
404 mysql_mutex_t *mutex);
405#else
406#define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
407#endif
408
409#if defined(KEYCACHE_DEBUG)
410static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
411static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
412static int keycache_pthread_cond_signal(mysql_cond_t *cond);
413#else
414#define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
415#define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
416#define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
417#endif /* defined(KEYCACHE_DEBUG) */
418
419#if !defined(DBUG_OFF)
420#if defined(inline)
421#undef inline
422#endif
423#define inline /* disabled inline for easier debugging */
424static int fail_hlink(HASH_LINK *hlink);
425static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache);
426#endif
427#ifdef DBUG_ASSERT_EXISTS
428static int fail_block(BLOCK_LINK *block);
429#endif
430
431static inline uint next_power(uint value)
432{
433 return (uint) my_round_up_to_next_power((uint32) value) << 1;
434}
435
436
437/*
438 Initialize a simple key cache
439
440 SYNOPSIS
441 init_simple_key_cache()
442 keycache pointer to the control block of a simple key cache
443 key_cache_block_size size of blocks to keep cached data
444 use_mem memory to use for the key cache buferrs/structures
445 division_limit division limit (may be zero)
446 age_threshold age threshold (may be zero)
447
448 DESCRIPTION
449 This function is the implementation of the init_key_cache interface
450 function that is employed by simple (non-partitioned) key caches.
451 The function builds a simple key cache and initializes the control block
452 structure of the type SIMPLE_KEY_CACHE_CB that is used for this key cache.
453 The parameter keycache is supposed to point to this structure.
454 The parameter key_cache_block_size specifies the size of the blocks in
455 the key cache to be built. The parameters division_limit and age_threshold
456 determine the initial values of those characteristics of the key cache
457 that are used for midpoint insertion strategy. The parameter use_mem
458 specifies the total amount of memory to be allocated for key cache blocks
459 and auxiliary structures.
460
461 RETURN VALUE
462 number of blocks in the key cache, if successful,
463 <= 0 - otherwise.
464
465 NOTES.
466 if keycache->key_cache_inited != 0 we assume that the key cache
467 is already initialized. This is for now used by myisamchk, but shouldn't
468 be something that a program should rely on!
469
470 It's assumed that no two threads call this function simultaneously
471 referring to the same key cache handle.
472*/
473
474static
475int init_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
476 uint key_cache_block_size,
477 size_t use_mem, uint division_limit,
478 uint age_threshold, uint changed_blocks_hash_size)
479{
480 ulong blocks, hash_links;
481 size_t length;
482 int error;
483 DBUG_ENTER("init_simple_key_cache");
484 DBUG_ASSERT(key_cache_block_size >= 512);
485
486 KEYCACHE_DEBUG_OPEN;
487 if (keycache->key_cache_inited && keycache->disk_blocks > 0)
488 {
489 DBUG_PRINT("warning",("key cache already in use"));
490 DBUG_RETURN(0);
491 }
492
493 keycache->blocks_used= keycache->blocks_unused= 0;
494 keycache->global_blocks_changed= 0;
495 keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
496 keycache->global_cache_read= keycache->global_cache_write= 0;
497 keycache->disk_blocks= -1;
498 if (! keycache->key_cache_inited)
499 {
500 keycache->key_cache_inited= 1;
501 keycache->hash_factor= 1;
502 /*
503 Initialize these variables once only.
504 Their value must survive re-initialization during resizing.
505 */
506 keycache->in_resize= 0;
507 keycache->resize_in_flush= 0;
508 keycache->cnt_for_resize_op= 0;
509 keycache->waiting_for_resize_cnt.last_thread= NULL;
510 keycache->in_init= 0;
511 mysql_mutex_init(key_KEY_CACHE_cache_lock,
512 &keycache->cache_lock, MY_MUTEX_INIT_FAST);
513 keycache->resize_queue.last_thread= NULL;
514 }
515
516 keycache->key_cache_mem_size= use_mem;
517 keycache->key_cache_block_size= key_cache_block_size;
518 DBUG_PRINT("info", ("key_cache_block_size: %u",
519 key_cache_block_size));
520
521 blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
522 sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
523
524 /* Changed blocks hash needs to be a power of 2 */
525 changed_blocks_hash_size= my_round_up_to_next_power(MY_MAX(changed_blocks_hash_size,
526 MIN_CHANGED_BLOCKS_HASH_SIZE));
527
528 /* It doesn't make sense to have too few blocks (less than 8) */
529 if (blocks >= 8)
530 {
531 for ( ; ; )
532 {
533 /* Set my_hash_entries to the next bigger 2 power */
534 if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
535 keycache->hash_entries<<= 1;
536 hash_links= 2 * blocks;
537#if defined(MAX_THREADS)
538 if (hash_links < MAX_THREADS + blocks - 1)
539 hash_links= MAX_THREADS + blocks - 1;
540#endif
541 while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
542 ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
543 ALIGN_SIZE(sizeof(HASH_LINK*) *
544 keycache->hash_entries) +
545 sizeof(BLOCK_LINK*)* (changed_blocks_hash_size*2))) +
546 ((size_t) blocks * keycache->key_cache_block_size) > use_mem && blocks > 8)
547 blocks--;
548 /* Allocate memory for cache page buffers */
549 if ((keycache->block_mem=
550 my_large_malloc((size_t) blocks * keycache->key_cache_block_size,
551 MYF(0))))
552 {
553 /*
554 Allocate memory for blocks, hash_links and hash entries;
555 For each block 2 hash links are allocated
556 */
557 if (my_multi_malloc_large(MYF(MY_ZEROFILL),
558 &keycache->block_root,
559 (ulonglong) (blocks * sizeof(BLOCK_LINK)),
560 &keycache->hash_root,
561 (ulonglong) (sizeof(HASH_LINK*) *
562 keycache->hash_entries),
563 &keycache->hash_link_root,
564 (ulonglong) (hash_links * sizeof(HASH_LINK)),
565 &keycache->changed_blocks,
566 (ulonglong) (sizeof(BLOCK_LINK*) *
567 changed_blocks_hash_size),
568 &keycache->file_blocks,
569 (ulonglong) (sizeof(BLOCK_LINK*) *
570 changed_blocks_hash_size),
571 NullS))
572 break;
573 my_large_free(keycache->block_mem);
574 keycache->block_mem= 0;
575 }
576 if (blocks < 8)
577 {
578 my_errno= ENOMEM;
579 my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
580 blocks * keycache->key_cache_block_size);
581 goto err;
582 }
583 blocks= blocks / 4*3;
584 }
585 keycache->blocks_unused= blocks;
586 keycache->disk_blocks= (int) blocks;
587 keycache->hash_links= hash_links;
588 keycache->hash_links_used= 0;
589 keycache->free_hash_list= NULL;
590 keycache->blocks_used= keycache->blocks_changed= 0;
591
592 keycache->global_blocks_changed= 0;
593 keycache->blocks_available=0; /* For debugging */
594
595 /* The LRU chain is empty after initialization */
596 keycache->used_last= NULL;
597 keycache->used_ins= NULL;
598 keycache->free_block_list= NULL;
599 keycache->keycache_time= 0;
600 keycache->warm_blocks= 0;
601 keycache->min_warm_blocks= (division_limit ?
602 blocks * division_limit / 100 + 1 :
603 blocks);
604 keycache->age_threshold= (age_threshold ?
605 blocks * age_threshold / 100 :
606 blocks);
607 keycache->changed_blocks_hash_size= changed_blocks_hash_size;
608 keycache->can_be_used= 1;
609
610 keycache->waiting_for_hash_link.last_thread= NULL;
611 keycache->waiting_for_block.last_thread= NULL;
612 DBUG_PRINT("exit",
613 ("disk_blocks: %d block_root: %p hash_entries: %d\
614 hash_root: %p hash_links: %d hash_link_root: %p",
615 keycache->disk_blocks, keycache->block_root,
616 keycache->hash_entries, keycache->hash_root,
617 keycache->hash_links, keycache->hash_link_root));
618 }
619 else
620 {
621 /* key_buffer_size is specified too small. Disable the cache. */
622 keycache->can_be_used= 0;
623 }
624
625 keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
626 DBUG_RETURN((int) keycache->disk_blocks);
627
628err:
629 error= my_errno;
630 keycache->disk_blocks= 0;
631 keycache->blocks= 0;
632 if (keycache->block_mem)
633 {
634 my_large_free((uchar*) keycache->block_mem);
635 keycache->block_mem= NULL;
636 }
637 if (keycache->block_root)
638 {
639 my_free(keycache->block_root);
640 keycache->block_root= NULL;
641 }
642 my_errno= error;
643 keycache->can_be_used= 0;
644 DBUG_RETURN(0);
645}
646
647
648/*
649 Prepare for resizing a simple key cache
650
651 SYNOPSIS
652 prepare_resize_simple_key_cache()
653 keycache pointer to the control block of a simple key cache
654 release_lock <=> release the key cache lock before return
655
656 DESCRIPTION
657 This function flushes all dirty pages from a simple key cache and after
658 this it destroys the key cache calling end_simple_key_cache. The function
659 takes the parameter keycache as a pointer to the control block
660 structure of the type SIMPLE_KEY_CACHE_CB for this key cache.
661 The parameter release_lock says whether the key cache lock must be
662 released before return from the function.
663
664 RETURN VALUE
665 0 - on success,
666 1 - otherwise.
667
668 NOTES
669 This function is the called by resize_simple_key_cache and
670 resize_partitioned_key_cache that resize simple and partitioned key caches
671 respectively.
672*/
673
674static
675int prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
676 my_bool release_lock)
677{
678 int res= 0;
679 DBUG_ENTER("prepare_resize_simple_key_cache");
680
681 keycache_pthread_mutex_lock(&keycache->cache_lock);
682
683 /*
684 We may need to wait for another thread which is doing a resize
685 already. This cannot happen in the MySQL server though. It allows
686 one resizer only. In set_var.cc keycache->in_init is used to block
687 multiple attempts.
688 */
689 while (keycache->in_resize)
690 {
691 /* purecov: begin inspected */
692 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
693 /* purecov: end */
694 }
695
696 /*
697 Mark the operation in progress. This blocks other threads from doing
698 a resize in parallel. It prohibits new blocks to enter the cache.
699 Read/write requests can bypass the cache during the flush phase.
700 */
701 keycache->in_resize= 1;
702
703 /* Need to flush only if keycache is enabled. */
704 if (keycache->can_be_used)
705 {
706 /* Start the flush phase. */
707 keycache->resize_in_flush= 1;
708
709 if (flush_all_key_blocks(keycache))
710 {
711 /* TODO: if this happens, we should write a warning in the log file ! */
712 keycache->resize_in_flush= 0;
713 keycache->can_be_used= 0;
714 res= 1;
715 goto finish;
716 }
717 DBUG_SLOW_ASSERT(cache_empty(keycache));
718
719 /* End the flush phase. */
720 keycache->resize_in_flush= 0;
721 }
722
723 /*
724 Some direct read/write operations (bypassing the cache) may still be
725 unfinished. Wait until they are done. If the key cache can be used,
726 direct I/O is done in increments of key_cache_block_size. That is,
727 every block is checked if it is in the cache. We need to wait for
728 pending I/O before re-initializing the cache, because we may change
729 the block size. Otherwise they could check for blocks at file
730 positions where the new block division has none. We do also want to
731 wait for I/O done when (if) the cache was disabled. It must not
732 run in parallel with normal cache operation.
733 */
734 while (keycache->cnt_for_resize_op)
735 wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
736
737 end_simple_key_cache(keycache, 0);
738
739finish:
740 if (release_lock)
741 keycache_pthread_mutex_unlock(&keycache->cache_lock);
742 DBUG_RETURN(res);
743}
744
745
746/*
747 Finalize resizing a simple key cache
748
749 SYNOPSIS
750 finish_resize_simple_key_cache()
751 keycache pointer to the control block of a simple key cache
752
753 DESCRIPTION
754 This function performs finalizing actions for the operation of
755 resizing a simple key cache. The function takes the parameter
756 keycache as a pointer to the control block structure of the type
757 SIMPLE_KEY_CACHE_CB for this key cache. The function sets the flag
758 in_resize in this structure to FALSE.
759
760 RETURN VALUE
761 none
762
763 NOTES
764 This function is the called by resize_simple_key_cache and
765 resize_partitioned_key_cache that resize simple and partitioned key caches
766 respectively.
767*/
768
769static
770void finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache)
771{
772 DBUG_ENTER("finish_resize_simple_key_cache");
773
774 mysql_mutex_assert_owner(&keycache->cache_lock);
775
776 /*
777 Mark the resize finished. This allows other threads to start a
778 resize or to request new cache blocks.
779 */
780 keycache->in_resize= 0;
781
782
783 /* Signal waiting threads. */
784 release_whole_queue(&keycache->resize_queue);
785
786
787 keycache_pthread_mutex_unlock(&keycache->cache_lock);
788
789 DBUG_VOID_RETURN;
790}
791
792
793/*
794 Resize a simple key cache
795
796 SYNOPSIS
797 resize_simple_key_cache()
798 keycache pointer to the control block of a simple key cache
799 key_cache_block_size size of blocks to keep cached data
800 use_mem memory to use for the key cache buffers/structures
801 division_limit new division limit (if not zero)
802 age_threshold new age threshold (if not zero)
803
804 DESCRIPTION
805 This function is the implementation of the resize_key_cache interface
806 function that is employed by simple (non-partitioned) key caches.
807 The function takes the parameter keycache as a pointer to the
808 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
809 cache to be resized.
810 The parameter key_cache_block_size specifies the new size of the blocks in
811 the key cache. The parameters division_limit and age_threshold
812 determine the new initial values of those characteristics of the key cache
813 that are used for midpoint insertion strategy. The parameter use_mem
814 specifies the total amount of memory to be allocated for key cache blocks
815 and auxiliary structures in the new key cache.
816
817 RETURN VALUE
818 number of blocks in the key cache, if successful,
819 0 - otherwise.
820
821 NOTES.
822 The function first calls the function prepare_resize_simple_key_cache
823 to flush all dirty blocks from key cache, to free memory used
824 for key cache blocks and auxiliary structures. After this the
825 function builds a new key cache with new parameters.
826
827 This implementation doesn't block the calls and executions of other
828 functions from the key cache interface. However it assumes that the
829 calls of resize_simple_key_cache itself are serialized.
830
831 The function starts the operation only when all other threads
832 performing operations with the key cache let her to proceed
833 (when cnt_for_resize=0).
834*/
835
836static
837int resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
838 uint key_cache_block_size,
839 size_t use_mem, uint division_limit,
840 uint age_threshold, uint changed_blocks_hash_size)
841{
842 int blocks= 0;
843 DBUG_ENTER("resize_simple_key_cache");
844
845 DBUG_ASSERT(keycache->key_cache_inited);
846
847 /*
848 Note that the cache_lock mutex and the resize_queue are left untouched.
849 We do not lose the cache_lock and will release it only at the end of
850 this function.
851 */
852 if (prepare_resize_simple_key_cache(keycache, 0))
853 goto finish;
854
855 /* The following will work even if use_mem is 0 */
856 blocks= init_simple_key_cache(keycache, key_cache_block_size, use_mem,
857 division_limit, age_threshold,
858 changed_blocks_hash_size);
859
860finish:
861 finish_resize_simple_key_cache(keycache);
862
863 DBUG_RETURN(blocks);
864}
865
866
867/*
868 Increment counter blocking resize key cache operation
869*/
870static inline void inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
871{
872 keycache->cnt_for_resize_op++;
873}
874
875
876/*
877 Decrement counter blocking resize key cache operation;
878 Signal the operation to proceed when counter becomes equal zero
879*/
880static inline void dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
881{
882 if (!--keycache->cnt_for_resize_op)
883 release_whole_queue(&keycache->waiting_for_resize_cnt);
884}
885
886
887/*
888 Change key cache parameters of a simple key cache
889
890 SYNOPSIS
891 change_simple_key_cache_param()
892 keycache pointer to the control block of a simple key cache
893 division_limit new division limit (if not zero)
894 age_threshold new age threshold (if not zero)
895
896 DESCRIPTION
897 This function is the implementation of the change_key_cache_param interface
898 function that is employed by simple (non-partitioned) key caches.
899 The function takes the parameter keycache as a pointer to the
900 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
901 cache where new values of the division limit and the age threshold used
902 for midpoint insertion strategy are to be set. The parameters
903 division_limit and age_threshold provide these new values.
904
905 RETURN VALUE
906 none
907
908 NOTES.
909 Presently the function resets the key cache parameters concerning
910 midpoint insertion strategy - division_limit and age_threshold.
911 This function changes some parameters of a given key cache without
912 reformatting it. The function does not touch the contents the key
913 cache blocks.
914*/
915
916static
917void change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB *keycache, uint division_limit,
918 uint age_threshold)
919{
920 DBUG_ENTER("change_simple_key_cache_param");
921 keycache_pthread_mutex_lock(&keycache->cache_lock);
922 if (division_limit)
923 keycache->min_warm_blocks= (keycache->disk_blocks *
924 division_limit / 100 + 1);
925 if (age_threshold)
926 keycache->age_threshold= (keycache->disk_blocks *
927 age_threshold / 100);
928 keycache_pthread_mutex_unlock(&keycache->cache_lock);
929 DBUG_VOID_RETURN;
930}
931
932
933/*
934 Destroy a simple key cache
935
936 SYNOPSIS
937 end_simple_key_cache()
938 keycache pointer to the control block of a simple key cache
939 cleanup <=> complete free (free also mutex for key cache)
940
941 DESCRIPTION
942 This function is the implementation of the end_key_cache interface
943 function that is employed by simple (non-partitioned) key caches.
944 The function takes the parameter keycache as a pointer to the
945 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
946 cache to be destroyed.
947 The function frees the memory allocated for the key cache blocks and
948 auxiliary structures. If the value of the parameter cleanup is TRUE
949 then even the key cache mutex is freed.
950
951 RETURN VALUE
952 none
953*/
954
955static
956void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup)
957{
958 DBUG_ENTER("end_simple_key_cache");
959 DBUG_PRINT("enter", ("key_cache: %p", keycache));
960
961 if (!keycache->key_cache_inited)
962 DBUG_VOID_RETURN;
963
964 if (keycache->disk_blocks > 0)
965 {
966 if (keycache->block_mem)
967 {
968 my_large_free((uchar*) keycache->block_mem);
969 keycache->block_mem= NULL;
970 my_free(keycache->block_root);
971 keycache->block_root= NULL;
972 }
973 keycache->disk_blocks= -1;
974 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
975 keycache->blocks_changed= 0;
976 }
977
978 DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
979 "writes: %lu r_requests: %lu reads: %lu",
980 keycache->blocks_used, keycache->global_blocks_changed,
981 (ulong) keycache->global_cache_w_requests,
982 (ulong) keycache->global_cache_write,
983 (ulong) keycache->global_cache_r_requests,
984 (ulong) keycache->global_cache_read));
985
986 /*
987 Reset these values to be able to detect a disabled key cache.
988 See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
989 */
990 keycache->blocks_used= 0;
991 keycache->blocks_unused= 0;
992
993 if (cleanup)
994 {
995 mysql_mutex_destroy(&keycache->cache_lock);
996 keycache->key_cache_inited= keycache->can_be_used= 0;
997 KEYCACHE_DEBUG_CLOSE;
998 }
999 DBUG_VOID_RETURN;
1000} /* end_key_cache */
1001
1002
1003/*
1004 Link a thread into double-linked queue of waiting threads.
1005
1006 SYNOPSIS
1007 link_into_queue()
1008 wqueue pointer to the queue structure
1009 thread pointer to the thread to be added to the queue
1010
1011 RETURN VALUE
1012 none
1013
1014 NOTES.
1015 Queue is represented by a circular list of the thread structures
1016 The list is double-linked of the type (**prev,*next), accessed by
1017 a pointer to the last element.
1018*/
1019
1020static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
1021 struct st_my_thread_var *thread)
1022{
1023 struct st_my_thread_var *last;
1024 DBUG_ASSERT(!thread->next && !thread->prev);
1025
1026 if (! (last= wqueue->last_thread))
1027 {
1028 /* Queue is empty */
1029 thread->next= thread;
1030 thread->prev= &thread->next;
1031 }
1032 else
1033 {
1034 DBUG_ASSERT(last->next->prev == &last->next);
1035 /* Add backlink to previous element */
1036 thread->prev= last->next->prev;
1037 /* Fix first in list to point backwords to current */
1038 last->next->prev= &thread->next;
1039 /* Next should point to the first element in list */
1040 thread->next= last->next;
1041 /* Fix old element to point to new one */
1042 last->next= thread;
1043 }
1044 wqueue->last_thread= thread;
1045}
1046
1047/*
1048 Unlink a thread from double-linked queue of waiting threads
1049
1050 SYNOPSIS
1051 unlink_from_queue()
1052 wqueue pointer to the queue structure
1053 thread pointer to the thread to be removed from the queue
1054
1055 RETURN VALUE
1056 none
1057
1058 NOTES.
1059 See NOTES for link_into_queue
1060*/
1061
1062static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
1063 struct st_my_thread_var *thread)
1064{
1065 KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", (ulong) thread->id));
1066 DBUG_ASSERT(thread->next && thread->prev);
1067
1068 if (thread->next == thread)
1069 {
1070 /* The queue contains only one member */
1071 wqueue->last_thread= NULL;
1072 }
1073 else
1074 {
1075 /* Remove current element from list */
1076 thread->next->prev= thread->prev;
1077 *thread->prev= thread->next;
1078 /* If first element, change list pointer to point to previous element */
1079 if (wqueue->last_thread == thread)
1080 wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
1081 thread->prev);
1082 }
1083 thread->next= NULL;
1084#ifdef DBUG_ASSERT_EXISTS
1085 /*
1086 This makes it easier to see it's not in a chain during debugging.
1087 And some DBUG_ASSERT() rely on it.
1088 */
1089 thread->prev= NULL;
1090#endif
1091}
1092
1093
1094/*
1095 Add a thread to single-linked queue of waiting threads
1096
1097 SYNOPSIS
1098 wait_on_queue()
1099 wqueue Pointer to the queue structure.
1100 mutex Cache_lock to acquire after awake.
1101
1102 RETURN VALUE
1103 none
1104
1105 NOTES.
1106 Queue is represented by a circular list of the thread structures
1107 The list is single-linked of the type (*next), accessed by a pointer
1108 to the last element.
1109
1110 The function protects against stray signals by verifying that the
1111 current thread is unlinked from the queue when awaking. However,
1112 since several threads can wait for the same event, it might be
1113 necessary for the caller of the function to check again if the
1114 condition for awake is indeed matched.
1115*/
1116
1117static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
1118 mysql_mutex_t *mutex)
1119{
1120 struct st_my_thread_var *last;
1121 struct st_my_thread_var *thread= my_thread_var;
1122 DBUG_ASSERT(!thread->next);
1123 DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
1124 mysql_mutex_assert_owner(mutex);
1125
1126 /* Add to queue. */
1127 if (! (last= wqueue->last_thread))
1128 thread->next= thread;
1129 else
1130 {
1131 thread->next= last->next;
1132 last->next= thread;
1133 }
1134 wqueue->last_thread= thread;
1135
1136 /*
1137 Wait until thread is removed from queue by the signaling thread.
1138 The loop protects against stray signals.
1139 */
1140 do
1141 {
1142 KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", (ulong) thread->id));
1143 keycache_pthread_cond_wait(&thread->suspend, mutex);
1144 }
1145 while (thread->next);
1146}
1147
1148
1149/*
1150 Remove all threads from queue signaling them to proceed
1151
1152 SYNOPSIS
1153 release_whole_queue()
1154 wqueue pointer to the queue structure
1155
1156 RETURN VALUE
1157 none
1158
1159 NOTES.
1160 See notes for wait_on_queue().
1161 When removed from the queue each thread is signaled via condition
1162 variable thread->suspend.
1163*/
1164
1165static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
1166{
1167 struct st_my_thread_var *last;
1168 struct st_my_thread_var *next;
1169 struct st_my_thread_var *thread;
1170
1171 /* Queue may be empty. */
1172 if (!(last= wqueue->last_thread))
1173 return;
1174
1175 next= last->next; /* First (oldest) element */
1176 do
1177 {
1178 thread=next;
1179 DBUG_ASSERT(thread && thread->init == 1);
1180 KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
1181 ("thread %ld", (ulong) thread->id));
1182 /* Take thread from queue. */
1183 next= thread->next;
1184 thread->next= NULL;
1185
1186 /* Signal the thread. */
1187 keycache_pthread_cond_signal(&thread->suspend);
1188 }
1189 while (thread != last);
1190
1191 /* Now queue is definitely empty. */
1192 wqueue->last_thread= NULL;
1193}
1194
1195
1196/*
1197 Unlink a block from the chain of dirty/clean blocks
1198*/
1199
1200static inline void unlink_changed(BLOCK_LINK *block)
1201{
1202 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1203 if (block->next_changed)
1204 block->next_changed->prev_changed= block->prev_changed;
1205 *block->prev_changed= block->next_changed;
1206
1207#ifdef DBUG_ASSERT_EXISTS
1208 /*
1209 This makes it easier to see it's not in a chain during debugging.
1210 And some DBUG_ASSERT() rely on it.
1211 */
1212 block->next_changed= NULL;
1213 block->prev_changed= NULL;
1214#endif
1215}
1216
1217
1218/*
1219 Link a block into the chain of dirty/clean blocks
1220*/
1221
1222static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
1223{
1224 DBUG_ASSERT(!block->next_changed);
1225 DBUG_ASSERT(!block->prev_changed);
1226 block->prev_changed= phead;
1227 if ((block->next_changed= *phead))
1228 (*phead)->prev_changed= &block->next_changed;
1229 *phead= block;
1230}
1231
1232
1233/*
1234 Link a block in a chain of clean blocks of a file.
1235
1236 SYNOPSIS
1237 link_to_file_list()
1238 keycache Key cache handle
1239 block Block to relink
1240 file File to be linked to
1241 unlink If to unlink first
1242
1243 DESCRIPTION
1244 Unlink a block from whichever chain it is linked in, if it's
1245 asked for, and link it to the chain of clean blocks of the
1246 specified file.
1247
1248 NOTE
1249 Please do never set/clear BLOCK_CHANGED outside of
1250 link_to_file_list() or link_to_changed_list().
1251 You would risk to damage correct counting of changed blocks
1252 and to find blocks in the wrong hash.
1253
1254 RETURN
1255 void
1256*/
1257
1258static void link_to_file_list(SIMPLE_KEY_CACHE_CB *keycache,
1259 BLOCK_LINK *block, int file,
1260 my_bool unlink_block)
1261{
1262 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1263 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1264 DBUG_ASSERT(block->hash_link->file == file);
1265 if (unlink_block)
1266 unlink_changed(block);
1267 link_changed(block, &keycache->file_blocks[FILE_HASH(file, keycache)]);
1268 if (block->status & BLOCK_CHANGED)
1269 {
1270 block->status&= ~BLOCK_CHANGED;
1271 keycache->blocks_changed--;
1272 keycache->global_blocks_changed--;
1273 }
1274}
1275
1276
1277/*
1278 Re-link a block from the clean chain to the dirty chain of a file.
1279
1280 SYNOPSIS
1281 link_to_changed_list()
1282 keycache key cache handle
1283 block block to relink
1284
1285 DESCRIPTION
1286 Unlink a block from the chain of clean blocks of a file
1287 and link it to the chain of dirty blocks of the same file.
1288
1289 NOTE
1290 Please do never set/clear BLOCK_CHANGED outside of
1291 link_to_file_list() or link_to_changed_list().
1292 You would risk to damage correct counting of changed blocks
1293 and to find blocks in the wrong hash.
1294
1295 RETURN
1296 void
1297*/
1298
1299static void link_to_changed_list(SIMPLE_KEY_CACHE_CB *keycache,
1300 BLOCK_LINK *block)
1301{
1302 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1303 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1304 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1305
1306 unlink_changed(block);
1307 link_changed(block,
1308 &keycache->changed_blocks[FILE_HASH(block->hash_link->file, keycache)]);
1309 block->status|=BLOCK_CHANGED;
1310 keycache->blocks_changed++;
1311 keycache->global_blocks_changed++;
1312}
1313
1314
1315/*
1316 Link a block to the LRU chain at the beginning or at the end of
1317 one of two parts.
1318
1319 SYNOPSIS
1320 link_block()
1321 keycache pointer to a key cache data structure
1322 block pointer to the block to link to the LRU chain
1323 hot <-> to link the block into the hot subchain
1324 at_end <-> to link the block at the end of the subchain
1325
1326 RETURN VALUE
1327 none
1328
1329 NOTES.
1330 The LRU ring is represented by a circular list of block structures.
1331 The list is double-linked of the type (**prev,*next) type.
1332 The LRU ring is divided into two parts - hot and warm.
1333 There are two pointers to access the last blocks of these two
1334 parts. The beginning of the warm part follows right after the
1335 end of the hot part.
1336 Only blocks of the warm part can be used for eviction.
1337 The first block from the beginning of this subchain is always
1338 taken for eviction (keycache->last_used->next)
1339
1340 LRU chain: +------+ H O T +------+
1341 +----| end |----...<----| beg |----+
1342 | +------+last +------+ |
1343 v<-link in latest hot (new end) |
1344 | link in latest warm (new end)->^
1345 | +------+ W A R M +------+ |
1346 +----| beg |---->...----| end |----+
1347 +------+ +------+ins
1348 first for eviction
1349
1350 It is also possible that the block is selected for eviction and thus
1351 not linked in the LRU ring.
1352*/
1353
1354static void link_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block,
1355 my_bool hot, my_bool at_end)
1356{
1357 BLOCK_LINK *ins;
1358 BLOCK_LINK **pins;
1359
1360 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1361 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1362 DBUG_ASSERT(!block->requests);
1363 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1364 DBUG_ASSERT(!block->next_used);
1365 DBUG_ASSERT(!block->prev_used);
1366 if (!hot && keycache->waiting_for_block.last_thread)
1367 {
1368 /* Signal that in the LRU warm sub-chain an available block has appeared */
1369 struct st_my_thread_var *last_thread=
1370 keycache->waiting_for_block.last_thread;
1371 struct st_my_thread_var *first_thread= last_thread->next;
1372 struct st_my_thread_var *next_thread= first_thread;
1373 HASH_LINK *hash_link= (HASH_LINK *) first_thread->keycache_link;
1374 struct st_my_thread_var *thread;
1375 do
1376 {
1377 thread= next_thread;
1378 next_thread= thread->next;
1379 /*
1380 We notify about the event all threads that ask
1381 for the same page as the first thread in the queue
1382 */
1383 if ((HASH_LINK *) thread->keycache_link == hash_link)
1384 {
1385 KEYCACHE_DBUG_PRINT("link_block: signal",
1386 ("thread %ld", (ulong) thread->id));
1387 keycache_pthread_cond_signal(&thread->suspend);
1388 unlink_from_queue(&keycache->waiting_for_block, thread);
1389 block->requests++;
1390 }
1391 }
1392 while (thread != last_thread);
1393 hash_link->block= block;
1394 /*
1395 NOTE: We assigned the block to the hash_link and signalled the
1396 requesting thread(s). But it is possible that other threads runs
1397 first. These threads see the hash_link assigned to a block which
1398 is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1399 This can be a problem for functions that do not select the block
1400 via its hash_link: flush and free. They do only see a block which
1401 is in a "normal" state and don't know that it will be evicted soon.
1402
1403 We cannot set BLOCK_IN_SWITCH here because only one of the
1404 requesting threads must handle the eviction. All others must wait
1405 for it to complete. If we set the flag here, the threads would not
1406 know who is in charge of the eviction. Without the flag, the first
1407 thread takes the stick and sets the flag.
1408
1409 But we need to note in the block that is has been selected for
1410 eviction. It must not be freed. The evicting thread will not
1411 expect the block in the free list. Before freeing we could also
1412 check if block->requests > 1. But I think including another flag
1413 in the check of block->status is slightly more efficient and
1414 probably easier to read.
1415 */
1416 block->status|= BLOCK_IN_EVICTION;
1417 KEYCACHE_THREAD_TRACE("link_block: after signaling");
1418#if defined(KEYCACHE_DEBUG)
1419 KEYCACHE_DBUG_PRINT("link_block",
1420 ("linked,unlinked block %u status=%x #requests=%u #available=%u",
1421 BLOCK_NUMBER(block), block->status,
1422 block->requests, keycache->blocks_available));
1423#endif
1424 return;
1425 }
1426 pins= hot ? &keycache->used_ins : &keycache->used_last;
1427 ins= *pins;
1428 if (ins)
1429 {
1430 ins->next_used->prev_used= &block->next_used;
1431 block->next_used= ins->next_used;
1432 block->prev_used= &ins->next_used;
1433 ins->next_used= block;
1434 if (at_end)
1435 *pins= block;
1436 }
1437 else
1438 {
1439 /* The LRU ring is empty. Let the block point to itself. */
1440 keycache->used_last= keycache->used_ins= block->next_used= block;
1441 block->prev_used= &block->next_used;
1442 }
1443 KEYCACHE_THREAD_TRACE("link_block");
1444#if defined(KEYCACHE_DEBUG)
1445 keycache->blocks_available++;
1446 KEYCACHE_DBUG_PRINT("link_block",
1447 ("linked block %u:%1u status=%x #requests=%u #available=%u",
1448 BLOCK_NUMBER(block), at_end, block->status,
1449 block->requests, keycache->blocks_available));
1450 KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1451 keycache->blocks_used);
1452#endif
1453}
1454
1455
1456/*
1457 Unlink a block from the LRU chain
1458
1459 SYNOPSIS
1460 unlink_block()
1461 keycache pointer to a key cache data structure
1462 block pointer to the block to unlink from the LRU chain
1463
1464 RETURN VALUE
1465 none
1466
1467 NOTES.
1468 See NOTES for link_block
1469*/
1470
1471static void unlink_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
1472{
1473 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1474 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1475 DBUG_ASSERT(!block->requests);
1476 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1477 DBUG_ASSERT(block->next_used && block->prev_used &&
1478 (block->next_used->prev_used == &block->next_used) &&
1479 (*block->prev_used == block));
1480 if (block->next_used == block)
1481 /* The list contains only one member */
1482 keycache->used_last= keycache->used_ins= NULL;
1483 else
1484 {
1485 block->next_used->prev_used= block->prev_used;
1486 *block->prev_used= block->next_used;
1487 if (keycache->used_last == block)
1488 keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1489 if (keycache->used_ins == block)
1490 keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1491 }
1492 block->next_used= NULL;
1493#ifdef DBUG_ASSERT_EXISTS
1494 /*
1495 This makes it easier to see it's not in a chain during debugging.
1496 And some DBUG_ASSERT() rely on it.
1497 */
1498 block->prev_used= NULL;
1499#endif
1500
1501 KEYCACHE_THREAD_TRACE("unlink_block");
1502#if defined(KEYCACHE_DEBUG)
1503 KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1504 keycache->blocks_available--;
1505 KEYCACHE_DBUG_PRINT("unlink_block",
1506 ("unlinked block %u status=%x #requests=%u #available=%u",
1507 BLOCK_NUMBER(block), block->status,
1508 block->requests, keycache->blocks_available));
1509#endif
1510}
1511
1512
1513/*
1514 Register requests for a block.
1515
1516 SYNOPSIS
1517 reg_requests()
1518 keycache Pointer to a key cache data structure.
1519 block Pointer to the block to register a request on.
1520 count Number of requests. Always 1.
1521
1522 NOTE
1523 The first request unlinks the block from the LRU ring. This means
1524 that it is protected against eveiction.
1525
1526 RETURN
1527 void
1528*/
1529static void reg_requests(SIMPLE_KEY_CACHE_CB *keycache,
1530 BLOCK_LINK *block, int count)
1531{
1532 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1533 DBUG_ASSERT(block->hash_link);
1534
1535 if (!block->requests)
1536 unlink_block(keycache, block);
1537 block->requests+=count;
1538}
1539
1540
1541/*
1542 Unregister request for a block
1543 linking it to the LRU chain if it's the last request
1544
1545 SYNOPSIS
1546 unreg_request()
1547 keycache pointer to a key cache data structure
1548 block pointer to the block to link to the LRU chain
1549 at_end <-> to link the block at the end of the LRU chain
1550
1551 RETURN VALUE
1552 none
1553
1554 NOTES.
1555 Every linking to the LRU ring decrements by one a special block
1556 counter (if it's positive). If the at_end parameter is TRUE the block is
1557 added either at the end of warm sub-chain or at the end of hot sub-chain.
1558 It is added to the hot subchain if its counter is zero and number of
1559 blocks in warm sub-chain is not less than some low limit (determined by
1560 the division_limit parameter). Otherwise the block is added to the warm
1561 sub-chain. If the at_end parameter is FALSE the block is always added
1562 at beginning of the warm sub-chain.
1563 Thus a warm block can be promoted to the hot sub-chain when its counter
1564 becomes zero for the first time.
1565 At the same time the block at the very beginning of the hot subchain
1566 might be moved to the beginning of the warm subchain if it stays untouched
1567 for a too long time (this time is determined by parameter age_threshold).
1568
1569 It is also possible that the block is selected for eviction and thus
1570 not linked in the LRU ring.
1571*/
1572
1573static void unreg_request(SIMPLE_KEY_CACHE_CB *keycache,
1574 BLOCK_LINK *block, int at_end)
1575{
1576 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1577 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1578 DBUG_ASSERT(block->requests);
1579 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1580 DBUG_ASSERT(!block->next_used);
1581 DBUG_ASSERT(!block->prev_used);
1582 /*
1583 Unregister the request, but do not link erroneous blocks into the
1584 LRU ring.
1585 */
1586 if (!--block->requests && !(block->status & BLOCK_ERROR))
1587 {
1588 my_bool hot;
1589 if (block->hits_left)
1590 block->hits_left--;
1591 hot= !block->hits_left && at_end &&
1592 keycache->warm_blocks > keycache->min_warm_blocks;
1593 if (hot)
1594 {
1595 if (block->temperature == BLOCK_WARM)
1596 keycache->warm_blocks--;
1597 block->temperature= BLOCK_HOT;
1598 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1599 keycache->warm_blocks));
1600 }
1601 link_block(keycache, block, hot, (my_bool)at_end);
1602 block->last_hit_time= keycache->keycache_time;
1603 keycache->keycache_time++;
1604 /*
1605 At this place, the block might be in the LRU ring or not. If an
1606 evicter was waiting for a block, it was selected for eviction and
1607 not linked in the LRU ring.
1608 */
1609
1610 /*
1611 Check if we should link a hot block to the warm block sub-chain.
1612 It is possible that we select the same block as above. But it can
1613 also be another block. In any case a block from the LRU ring is
1614 selected. In other words it works even if the above block was
1615 selected for eviction and not linked in the LRU ring. Since this
1616 happens only if the LRU ring is empty, the block selected below
1617 would be NULL and the rest of the function skipped.
1618 */
1619 block= keycache->used_ins;
1620 if (block && keycache->keycache_time - block->last_hit_time >
1621 keycache->age_threshold)
1622 {
1623 unlink_block(keycache, block);
1624 link_block(keycache, block, 0, 0);
1625 if (block->temperature != BLOCK_WARM)
1626 {
1627 keycache->warm_blocks++;
1628 block->temperature= BLOCK_WARM;
1629 }
1630 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1631 keycache->warm_blocks));
1632 }
1633 }
1634}
1635
1636/*
1637 Remove a reader of the page in block
1638*/
1639
1640static void remove_reader(BLOCK_LINK *block)
1641{
1642 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1643 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1644 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1645 DBUG_ASSERT(!block->next_used);
1646 DBUG_ASSERT(!block->prev_used);
1647 DBUG_ASSERT(block->hash_link->requests);
1648 if (! --block->hash_link->requests && block->condvar)
1649 keycache_pthread_cond_signal(block->condvar);
1650}
1651
1652
1653/*
1654 Wait until the last reader of the page in block
1655 signals on its termination
1656*/
1657
1658static void wait_for_readers(SIMPLE_KEY_CACHE_CB *keycache,
1659 BLOCK_LINK *block)
1660{
1661 struct st_my_thread_var *thread= my_thread_var;
1662 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1663 DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1664 DBUG_ASSERT(block->hash_link);
1665 DBUG_ASSERT(block->hash_link->block == block);
1666 /* Linked in file_blocks or changed_blocks hash. */
1667 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1668 /* Not linked in LRU ring. */
1669 DBUG_ASSERT(!block->next_used);
1670 DBUG_ASSERT(!block->prev_used);
1671 while (block->hash_link->requests)
1672 {
1673 KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1674 ("suspend thread %ld block %u",
1675 (ulong) thread->id, BLOCK_NUMBER(block)));
1676 /* There must be no other waiter. We have no queue here. */
1677 DBUG_ASSERT(!block->condvar);
1678 block->condvar= &thread->suspend;
1679 keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1680 block->condvar= NULL;
1681 }
1682}
1683
1684
1685/*
1686 Add a hash link to a bucket in the hash_table
1687*/
1688
1689static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1690{
1691 if (*start)
1692 (*start)->prev= &hash_link->next;
1693 hash_link->next= *start;
1694 hash_link->prev= start;
1695 *start= hash_link;
1696}
1697
1698
1699/*
1700 Remove a hash link from the hash table
1701*/
1702
1703static void unlink_hash(SIMPLE_KEY_CACHE_CB *keycache, HASH_LINK *hash_link)
1704{
1705 KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1706 (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1707 KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1708 if ((*hash_link->prev= hash_link->next))
1709 hash_link->next->prev= hash_link->prev;
1710 hash_link->block= NULL;
1711 if (keycache->waiting_for_hash_link.last_thread)
1712 {
1713 /* Signal that a free hash link has appeared */
1714 struct st_my_thread_var *last_thread=
1715 keycache->waiting_for_hash_link.last_thread;
1716 struct st_my_thread_var *first_thread= last_thread->next;
1717 struct st_my_thread_var *next_thread= first_thread;
1718 KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->keycache_link);
1719 struct st_my_thread_var *thread;
1720
1721 hash_link->file= first_page->file;
1722 hash_link->diskpos= first_page->filepos;
1723 do
1724 {
1725 KEYCACHE_PAGE *page;
1726 thread= next_thread;
1727 page= (KEYCACHE_PAGE *) thread->keycache_link;
1728 next_thread= thread->next;
1729 /*
1730 We notify about the event all threads that ask
1731 for the same page as the first thread in the queue
1732 */
1733 if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1734 {
1735 KEYCACHE_DBUG_PRINT("unlink_hash: signal",
1736 ("thread %ld", (ulong) thread->id));
1737 keycache_pthread_cond_signal(&thread->suspend);
1738 unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1739 }
1740 }
1741 while (thread != last_thread);
1742 link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1743 hash_link->diskpos)],
1744 hash_link);
1745 return;
1746 }
1747 hash_link->next= keycache->free_hash_list;
1748 keycache->free_hash_list= hash_link;
1749}
1750
1751
1752/*
1753 Get the hash link for a page
1754*/
1755
1756static HASH_LINK *get_hash_link(SIMPLE_KEY_CACHE_CB *keycache,
1757 int file, my_off_t filepos)
1758{
1759 reg1 HASH_LINK *hash_link, **start;
1760#if defined(KEYCACHE_DEBUG)
1761 int cnt;
1762#endif
1763
1764 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1765 (uint) file,(ulong) filepos));
1766
1767restart:
1768 /*
1769 Find the bucket in the hash table for the pair (file, filepos);
1770 start contains the head of the bucket list,
1771 hash_link points to the first member of the list
1772 */
1773 hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1774#if defined(KEYCACHE_DEBUG)
1775 cnt= 0;
1776#endif
1777 /* Look for an element for the pair (file, filepos) in the bucket chain */
1778 while (hash_link &&
1779 (hash_link->diskpos != filepos || hash_link->file != file))
1780 {
1781 hash_link= hash_link->next;
1782#if defined(KEYCACHE_DEBUG)
1783 cnt++;
1784 if (! (cnt <= keycache->hash_links_used))
1785 {
1786 int i;
1787 for (i=0, hash_link= *start ;
1788 i < cnt ; i++, hash_link= hash_link->next)
1789 {
1790 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1791 (uint) hash_link->file,(ulong) hash_link->diskpos));
1792 }
1793 }
1794 KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1795#endif
1796 }
1797 if (! hash_link)
1798 {
1799 /* There is no hash link in the hash table for the pair (file, filepos) */
1800 if (keycache->free_hash_list)
1801 {
1802 hash_link= keycache->free_hash_list;
1803 keycache->free_hash_list= hash_link->next;
1804 }
1805 else if (keycache->hash_links_used < keycache->hash_links)
1806 {
1807 hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1808 }
1809 else
1810 {
1811 /* Wait for a free hash link */
1812 struct st_my_thread_var *thread= my_thread_var;
1813 KEYCACHE_PAGE page;
1814 KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1815 page.file= file;
1816 page.filepos= filepos;
1817 thread->keycache_link= (void *) &page;
1818 link_into_queue(&keycache->waiting_for_hash_link, thread);
1819 KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1820 ("suspend thread %ld", (ulong) thread->id));
1821 keycache_pthread_cond_wait(&thread->suspend,
1822 &keycache->cache_lock);
1823 thread->keycache_link= NULL;
1824 goto restart;
1825 }
1826 hash_link->file= file;
1827 hash_link->diskpos= filepos;
1828 link_hash(start, hash_link);
1829 }
1830 /* Register the request for the page */
1831 hash_link->requests++;
1832
1833 return hash_link;
1834}
1835
1836
1837/*
1838 Get a block for the file page requested by a keycache read/write operation;
1839 If the page is not in the cache return a free block, if there is none
1840 return the lru block after saving its buffer if the page is dirty.
1841
1842 SYNOPSIS
1843
1844 find_key_block()
1845 keycache pointer to a key cache data structure
1846 file handler for the file to read page from
1847 filepos position of the page in the file
1848 init_hits_left how initialize the block counter for the page
1849 wrmode <-> get for writing
1850 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1851
1852 RETURN VALUE
1853 Pointer to the found block if successful, 0 - otherwise
1854
1855 NOTES.
1856 For the page from file positioned at filepos the function checks whether
1857 the page is in the key cache specified by the first parameter.
1858 If this is the case it immediately returns the block.
1859 If not, the function first chooses a block for this page. If there is
1860 no not used blocks in the key cache yet, the function takes the block
1861 at the very beginning of the warm sub-chain. It saves the page in that
1862 block if it's dirty before returning the pointer to it.
1863 The function returns in the page_st parameter the following values:
1864 PAGE_READ - if page already in the block,
1865 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1866 WAIT_TO_BE_READ - if it is to be read by another thread
1867 If an error occurs THE BLOCK_ERROR bit is set in the block status.
1868 It might happen that there are no blocks in LRU chain (in warm part) -
1869 all blocks are unlinked for some read/write operations. Then the function
1870 waits until first of this operations links any block back.
1871*/
1872
1873static BLOCK_LINK *find_key_block(SIMPLE_KEY_CACHE_CB *keycache,
1874 File file, my_off_t filepos,
1875 int init_hits_left,
1876 int wrmode, int *page_st)
1877{
1878 HASH_LINK *hash_link;
1879 BLOCK_LINK *block;
1880 int error= 0;
1881 int page_status;
1882
1883 DBUG_ENTER("find_key_block");
1884 KEYCACHE_THREAD_TRACE("find_key_block:begin");
1885 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1886 file, (ulong) filepos, wrmode));
1887 KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d",
1888 file, (ulong) filepos,
1889 wrmode));
1890#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1891 DBUG_EXECUTE("check_keycache2",
1892 test_key_cache(keycache, "start of find_key_block", 0););
1893#endif
1894
1895restart:
1896 /*
1897 If the flush phase of a resize operation fails, the cache is left
1898 unusable. This will be detected only after "goto restart".
1899 */
1900 if (!keycache->can_be_used)
1901 DBUG_RETURN(0);
1902
1903 /*
1904 Find the hash_link for the requested file block (file, filepos). We
1905 do always get a hash_link here. It has registered our request so
1906 that no other thread can use it for another file block until we
1907 release the request (which is done by remove_reader() usually). The
1908 hash_link can have a block assigned to it or not. If there is a
1909 block, it may be assigned to this hash_link or not. In cases where a
1910 block is evicted from the cache, it is taken from the LRU ring and
1911 referenced by the new hash_link. But the block can still be assigned
1912 to its old hash_link for some time if it needs to be flushed first,
1913 or if there are other threads still reading it.
1914
1915 Summary:
1916 hash_link is always returned.
1917 hash_link->block can be:
1918 - NULL or
1919 - not assigned to this hash_link or
1920 - assigned to this hash_link. If assigned, the block can have
1921 - invalid data (when freshly assigned) or
1922 - valid data. Valid data can be
1923 - changed over the file contents (dirty) or
1924 - not changed (clean).
1925 */
1926 hash_link= get_hash_link(keycache, file, filepos);
1927 DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1928
1929 page_status= -1;
1930 if ((block= hash_link->block) &&
1931 block->hash_link == hash_link && (block->status & BLOCK_READ))
1932 {
1933 /* Assigned block with valid (changed or unchanged) contents. */
1934 page_status= PAGE_READ;
1935 }
1936 /*
1937 else (page_status == -1)
1938 - block == NULL or
1939 - block not assigned to this hash_link or
1940 - block assigned but not yet read from file (invalid data).
1941 */
1942
1943 if (keycache->in_resize)
1944 {
1945 /* This is a request during a resize operation */
1946
1947 if (!block)
1948 {
1949 struct st_my_thread_var *thread;
1950
1951 /*
1952 The file block is not in the cache. We don't need it in the
1953 cache: we are going to read or write directly to file. Cancel
1954 the request. We can simply decrement hash_link->requests because
1955 we did not release cache_lock since increasing it. So no other
1956 thread can wait for our request to become released.
1957 */
1958 if (hash_link->requests == 1)
1959 {
1960 /*
1961 We are the only one to request this hash_link (this file/pos).
1962 Free the hash_link.
1963 */
1964 hash_link->requests--;
1965 unlink_hash(keycache, hash_link);
1966 DBUG_RETURN(0);
1967 }
1968
1969 /*
1970 More requests on the hash_link. Someone tries to evict a block
1971 for this hash_link (could have started before resizing started).
1972 This means that the LRU ring is empty. Otherwise a block could
1973 be assigned immediately. Behave like a thread that wants to
1974 evict a block for this file/pos. Add to the queue of threads
1975 waiting for a block. Wait until there is one assigned.
1976
1977 Refresh the request on the hash-link so that it cannot be reused
1978 for another file/pos.
1979 */
1980 thread= my_thread_var;
1981 thread->keycache_link= (void *) hash_link;
1982 link_into_queue(&keycache->waiting_for_block, thread);
1983 do
1984 {
1985 KEYCACHE_DBUG_PRINT("find_key_block: wait",
1986 ("suspend thread %ld", (ulong) thread->id));
1987 keycache_pthread_cond_wait(&thread->suspend,
1988 &keycache->cache_lock);
1989 } while (thread->next);
1990 thread->keycache_link= NULL;
1991 /*
1992 A block should now be assigned to the hash_link. But it may
1993 still need to be evicted. Anyway, we should re-check the
1994 situation. page_status must be set correctly.
1995 */
1996 hash_link->requests--;
1997 goto restart;
1998 } /* end of if (!block) */
1999
2000 /*
2001 There is a block for this file/pos in the cache. Register a
2002 request on it. This unlinks it from the LRU ring (if it is there)
2003 and hence protects it against eviction (if not already in
2004 eviction). We need this for returning the block to the caller, for
2005 calling remove_reader() (for debugging purposes), and for calling
2006 free_block(). The only case where we don't need the request is if
2007 the block is in eviction. In that case we have to unregister the
2008 request later.
2009 */
2010 reg_requests(keycache, block, 1);
2011
2012 if (page_status != PAGE_READ)
2013 {
2014 /*
2015 - block not assigned to this hash_link or
2016 - block assigned but not yet read from file (invalid data).
2017
2018 This must be a block in eviction. It will be read soon. We need
2019 to wait here until this happened. Otherwise the caller could
2020 access a wrong block or a block which is in read. While waiting
2021 we cannot lose hash_link nor block. We have registered a request
2022 on the hash_link. Everything can happen to the block but changes
2023 in the hash_link -> block relationship. In other words:
2024 everything can happen to the block but free or another completed
2025 eviction.
2026
2027 Note that we bahave like a secondary requestor here. We just
2028 cannot return with PAGE_WAIT_TO_BE_READ. This would work for
2029 read requests and writes on dirty blocks that are not in flush
2030 only. Waiting here on COND_FOR_REQUESTED works in all
2031 situations.
2032 */
2033 DBUG_ASSERT(((block->hash_link != hash_link) &&
2034 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2035 ((block->hash_link == hash_link) &&
2036 !(block->status & BLOCK_READ)));
2037 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2038 /*
2039 Here we can trust that the block has been assigned to this
2040 hash_link (block->hash_link == hash_link) and read into the
2041 buffer (BLOCK_READ). The worst things possible here are that the
2042 block is in free (BLOCK_REASSIGNED). But the block is still
2043 assigned to the hash_link. The freeing thread waits until we
2044 release our request on the hash_link. The block must not be
2045 again in eviction because we registered an request on it before
2046 starting to wait.
2047 */
2048 DBUG_ASSERT(block->hash_link == hash_link);
2049 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2050 DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
2051 }
2052 /*
2053 The block is in the cache. Assigned to the hash_link. Valid data.
2054 Note that in case of page_st == PAGE_READ, the block can be marked
2055 for eviction. In any case it can be marked for freeing.
2056 */
2057
2058 if (!wrmode)
2059 {
2060 /* A reader can just read the block. */
2061 *page_st= PAGE_READ;
2062 DBUG_ASSERT((hash_link->file == file) &&
2063 (hash_link->diskpos == filepos) &&
2064 (block->hash_link == hash_link));
2065 DBUG_RETURN(block);
2066 }
2067
2068 /*
2069 This is a writer. No two writers for the same block can exist.
2070 This must be assured by locks outside of the key cache.
2071 */
2072 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
2073
2074 while (block->status & BLOCK_IN_FLUSH)
2075 {
2076 /*
2077 Wait until the block is flushed to file. Do not release the
2078 request on the hash_link yet to prevent that the block is freed
2079 or reassigned while we wait. While we wait, several things can
2080 happen to the block, including another flush. But the block
2081 cannot be reassigned to another hash_link until we release our
2082 request on it. But it can be marked BLOCK_REASSIGNED from free
2083 or eviction, while they wait for us to release the hash_link.
2084 */
2085 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2086 /*
2087 If the flush phase failed, the resize could have finished while
2088 we waited here.
2089 */
2090 if (!keycache->in_resize)
2091 {
2092 remove_reader(block);
2093 unreg_request(keycache, block, 1);
2094 goto restart;
2095 }
2096 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2097 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
2098 DBUG_ASSERT(block->hash_link == hash_link);
2099 }
2100
2101 if (block->status & BLOCK_CHANGED)
2102 {
2103 /*
2104 We want to write a block with changed contents. If the cache
2105 block size is bigger than the callers block size (e.g. MyISAM),
2106 the caller may replace part of the block only. Changes of the
2107 other part of the block must be preserved. Since the block has
2108 not yet been selected for flush, we can still add our changes.
2109 */
2110 *page_st= PAGE_READ;
2111 DBUG_ASSERT((hash_link->file == file) &&
2112 (hash_link->diskpos == filepos) &&
2113 (block->hash_link == hash_link));
2114 DBUG_RETURN(block);
2115 }
2116
2117 /*
2118 This is a write request for a clean block. We do not want to have
2119 new dirty blocks in the cache while resizing. We will free the
2120 block and write directly to file. If the block is in eviction or
2121 in free, we just let it go.
2122
2123 Unregister from the hash_link. This must be done before freeing
2124 the block. And it must be done if not freeing the block. Because
2125 we could have waited above, we need to call remove_reader(). Other
2126 threads could wait for us to release our request on the hash_link.
2127 */
2128 remove_reader(block);
2129
2130 /* If the block is not in eviction and not in free, we can free it. */
2131 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2132 BLOCK_REASSIGNED)))
2133 {
2134 /*
2135 Free block as we are going to write directly to file.
2136 Although we have an exlusive lock for the updated key part,
2137 the control can be yielded by the current thread as we might
2138 have unfinished readers of other key parts in the block
2139 buffer. Still we are guaranteed not to have any readers
2140 of the key part we are writing into until the block is
2141 removed from the cache as we set the BLOCK_REASSIGNED
2142 flag (see the code below that handles reading requests).
2143 */
2144 free_block(keycache, block);
2145 }
2146 else
2147 {
2148 /*
2149 The block will be evicted/freed soon. Don't touch it in any way.
2150 Unregister the request that we registered above.
2151 */
2152 unreg_request(keycache, block, 1);
2153
2154 /*
2155 The block is still assigned to the hash_link (the file/pos that
2156 we are going to write to). Wait until the eviction/free is
2157 complete. Otherwise the direct write could complete before all
2158 readers are done with the block. So they could read outdated
2159 data.
2160
2161 Since we released our request on the hash_link, it can be reused
2162 for another file/pos. Hence we cannot just check for
2163 block->hash_link == hash_link. As long as the resize is
2164 proceeding the block cannot be reassigned to the same file/pos
2165 again. So we can terminate the loop when the block is no longer
2166 assigned to this file/pos.
2167 */
2168 do
2169 {
2170 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2171 &keycache->cache_lock);
2172 /*
2173 If the flush phase failed, the resize could have finished
2174 while we waited here.
2175 */
2176 if (!keycache->in_resize)
2177 goto restart;
2178 } while (block->hash_link &&
2179 (block->hash_link->file == file) &&
2180 (block->hash_link->diskpos == filepos));
2181 }
2182 DBUG_RETURN(0);
2183 }
2184
2185 if (page_status == PAGE_READ &&
2186 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2187 BLOCK_REASSIGNED)))
2188 {
2189 /*
2190 This is a request for a block to be removed from cache. The block
2191 is assigned to this hash_link and contains valid data, but is
2192 marked for eviction or to be freed. Possible reasons why it has
2193 not yet been evicted/freed can be a flush before reassignment
2194 (BLOCK_IN_SWITCH), readers of the block have not finished yet
2195 (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
2196 the block has been selected for it (BLOCK_IN_EVICTION).
2197 */
2198
2199 KEYCACHE_DBUG_PRINT("find_key_block",
2200 ("request for old page in block %u "
2201 "wrmode: %d block->status: %d",
2202 BLOCK_NUMBER(block), wrmode, block->status));
2203 /*
2204 Only reading requests can proceed until the old dirty page is flushed,
2205 all others are to be suspended, then resubmitted
2206 */
2207 if (!wrmode && !(block->status & BLOCK_REASSIGNED))
2208 {
2209 /*
2210 This is a read request and the block not yet reassigned. We can
2211 register our request and proceed. This unlinks the block from
2212 the LRU ring and protects it against eviction.
2213 */
2214 reg_requests(keycache, block, 1);
2215 }
2216 else
2217 {
2218 /*
2219 Either this is a write request for a block that is in eviction
2220 or in free. We must not use it any more. Instead we must evict
2221 another block. But we cannot do this before the eviction/free is
2222 done. Otherwise we would find the same hash_link + block again
2223 and again.
2224
2225 Or this is a read request for a block in eviction/free that does
2226 not require a flush, but waits for readers to finish with the
2227 block. We do not read this block to let the eviction/free happen
2228 as soon as possible. Again we must wait so that we don't find
2229 the same hash_link + block again and again.
2230 */
2231 DBUG_ASSERT(hash_link->requests);
2232 hash_link->requests--;
2233 KEYCACHE_DBUG_PRINT("find_key_block",
2234 ("request waiting for old page to be saved"));
2235 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2236 KEYCACHE_DBUG_PRINT("find_key_block",
2237 ("request for old page resubmitted"));
2238 /*
2239 The block is no longer assigned to this hash_link.
2240 Get another one.
2241 */
2242 goto restart;
2243 }
2244 }
2245 else
2246 {
2247 /*
2248 This is a request for a new block or for a block not to be removed.
2249 Either
2250 - block == NULL or
2251 - block not assigned to this hash_link or
2252 - block assigned but not yet read from file,
2253 or
2254 - block assigned with valid (changed or unchanged) data and
2255 - it will not be reassigned/freed.
2256 */
2257 if (! block)
2258 {
2259 /* No block is assigned to the hash_link yet. */
2260 if (keycache->blocks_unused)
2261 {
2262 if (keycache->free_block_list)
2263 {
2264 /* There is a block in the free list. */
2265 block= keycache->free_block_list;
2266 keycache->free_block_list= block->next_used;
2267 block->next_used= NULL;
2268 }
2269 else
2270 {
2271 size_t block_mem_offset;
2272 /* There are some never used blocks, take first of them */
2273 DBUG_ASSERT(keycache->blocks_used <
2274 (ulong) keycache->disk_blocks);
2275 block= &keycache->block_root[keycache->blocks_used];
2276 block_mem_offset=
2277 ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2278 block->buffer= ADD_TO_PTR(keycache->block_mem,
2279 block_mem_offset,
2280 uchar*);
2281 keycache->blocks_used++;
2282 DBUG_ASSERT(!block->next_used);
2283 }
2284 DBUG_ASSERT(!block->prev_used);
2285 DBUG_ASSERT(!block->next_changed);
2286 DBUG_ASSERT(!block->prev_changed);
2287 DBUG_ASSERT(!block->hash_link);
2288 DBUG_ASSERT(!block->status);
2289 DBUG_ASSERT(!block->requests);
2290 keycache->blocks_unused--;
2291 block->status= BLOCK_IN_USE;
2292 block->length= 0;
2293 block->offset= keycache->key_cache_block_size;
2294 block->requests= 1;
2295 block->temperature= BLOCK_COLD;
2296 block->hits_left= init_hits_left;
2297 block->last_hit_time= 0;
2298 block->hash_link= hash_link;
2299 hash_link->block= block;
2300 link_to_file_list(keycache, block, file, 0);
2301 page_status= PAGE_TO_BE_READ;
2302 KEYCACHE_DBUG_PRINT("find_key_block",
2303 ("got free or never used block %u",
2304 BLOCK_NUMBER(block)));
2305 }
2306 else
2307 {
2308 /*
2309 There are no free blocks and no never used blocks, use a block
2310 from the LRU ring.
2311 */
2312
2313 if (! keycache->used_last)
2314 {
2315 /*
2316 The LRU ring is empty. Wait until a new block is added to
2317 it. Several threads might wait here for the same hash_link,
2318 all of them must get the same block. While waiting for a
2319 block, after a block is selected for this hash_link, other
2320 threads can run first before this one awakes. During this
2321 time interval other threads find this hash_link pointing to
2322 the block, which is still assigned to another hash_link. In
2323 this case the block is not marked BLOCK_IN_SWITCH yet, but
2324 it is marked BLOCK_IN_EVICTION.
2325 */
2326
2327 struct st_my_thread_var *thread= my_thread_var;
2328 thread->keycache_link= (void *) hash_link;
2329 link_into_queue(&keycache->waiting_for_block, thread);
2330 do
2331 {
2332 KEYCACHE_DBUG_PRINT("find_key_block: wait",
2333 ("suspend thread %ld", (ulong) thread->id));
2334 keycache_pthread_cond_wait(&thread->suspend,
2335 &keycache->cache_lock);
2336 }
2337 while (thread->next);
2338 thread->keycache_link= NULL;
2339 /* Assert that block has a request registered. */
2340 DBUG_ASSERT(hash_link->block->requests);
2341 /* Assert that block is not in LRU ring. */
2342 DBUG_ASSERT(!hash_link->block->next_used);
2343 DBUG_ASSERT(!hash_link->block->prev_used);
2344 }
2345 /*
2346 If we waited above, hash_link->block has been assigned by
2347 link_block(). Otherwise it is still NULL. In the latter case
2348 we need to grab a block from the LRU ring ourselves.
2349 */
2350 block= hash_link->block;
2351 if (! block)
2352 {
2353 /* Select the last block from the LRU ring. */
2354 block= keycache->used_last->next_used;
2355 block->hits_left= init_hits_left;
2356 block->last_hit_time= 0;
2357 hash_link->block= block;
2358 /*
2359 Register a request on the block. This unlinks it from the
2360 LRU ring and protects it against eviction.
2361 */
2362 DBUG_ASSERT(!block->requests);
2363 reg_requests(keycache, block,1);
2364 /*
2365 We do not need to set block->status|= BLOCK_IN_EVICTION here
2366 because we will set block->status|= BLOCK_IN_SWITCH
2367 immediately without releasing the lock in between. This does
2368 also support debugging. When looking at the block, one can
2369 see if the block has been selected by link_block() after the
2370 LRU ring was empty, or if it was grabbed directly from the
2371 LRU ring in this branch.
2372 */
2373 }
2374
2375 /*
2376 If we had to wait above, there is a small chance that another
2377 thread grabbed this block for the same file block already. But
2378 in most cases the first condition is true.
2379 */
2380 if (block->hash_link != hash_link &&
2381 ! (block->status & BLOCK_IN_SWITCH) )
2382 {
2383 /* this is a primary request for a new page */
2384 block->status|= BLOCK_IN_SWITCH;
2385
2386 KEYCACHE_DBUG_PRINT("find_key_block",
2387 ("got block %u for new page", BLOCK_NUMBER(block)));
2388
2389 if (block->status & BLOCK_CHANGED)
2390 {
2391 /* The block contains a dirty page - push it out of the cache */
2392
2393 KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2394 if (block->status & BLOCK_IN_FLUSH)
2395 {
2396 /*
2397 The block is marked for flush. If we do not wait here,
2398 it could happen that we write the block, reassign it to
2399 another file block, then, before the new owner can read
2400 the new file block, the flusher writes the cache block
2401 (which still has the old contents) to the new file block!
2402 */
2403 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2404 &keycache->cache_lock);
2405 /*
2406 The block is marked BLOCK_IN_SWITCH. It should be left
2407 alone except for reading. No free, no write.
2408 */
2409 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2410 DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2411 BLOCK_CHANGED |
2412 BLOCK_FOR_UPDATE)));
2413 }
2414 else
2415 {
2416 block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2417 /*
2418 BLOCK_IN_EVICTION may be true or not. Other flags must
2419 have a fixed value.
2420 */
2421 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2422 (BLOCK_READ | BLOCK_IN_SWITCH |
2423 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2424 BLOCK_CHANGED | BLOCK_IN_USE));
2425 DBUG_ASSERT(block->hash_link);
2426
2427 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2428 /*
2429 The call is thread safe because only the current
2430 thread might change the block->hash_link value
2431 */
2432 error= (int)my_pwrite(block->hash_link->file,
2433 block->buffer + block->offset,
2434 block->length - block->offset,
2435 block->hash_link->diskpos + block->offset,
2436 MYF(MY_NABP | MY_WAIT_IF_FULL));
2437 keycache_pthread_mutex_lock(&keycache->cache_lock);
2438
2439 /* Block status must not have changed. */
2440 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2441 (BLOCK_READ | BLOCK_IN_SWITCH |
2442 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2443 BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2444 keycache->global_cache_write++;
2445 }
2446 }
2447
2448 block->status|= BLOCK_REASSIGNED;
2449 /*
2450 The block comes from the LRU ring. It must have a hash_link
2451 assigned.
2452 */
2453 DBUG_ASSERT(block->hash_link);
2454 if (block->hash_link)
2455 {
2456 /*
2457 All pending requests for this page must be resubmitted.
2458 This must be done before waiting for readers. They could
2459 wait for the flush to complete. And we must also do it
2460 after the wait. Flushers might try to free the block while
2461 we wait. They would wait until the reassignment is
2462 complete. Also the block status must reflect the correct
2463 situation: The block is not changed nor in flush any more.
2464 Note that we must not change the BLOCK_CHANGED flag
2465 outside of link_to_file_list() so that it is always in the
2466 correct queue and the *blocks_changed counters are
2467 correct.
2468 */
2469 block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2470 link_to_file_list(keycache, block, block->hash_link->file, 1);
2471 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2472 /*
2473 The block is still assigned to its old hash_link.
2474 Wait until all pending read requests
2475 for this page are executed
2476 (we could have avoided this waiting, if we had read
2477 a page in the cache in a sweep, without yielding control)
2478 */
2479 wait_for_readers(keycache, block);
2480 DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2481 block->prev_changed);
2482 /* The reader must not have been a writer. */
2483 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2484
2485 /* Wake flushers that might have found the block in between. */
2486 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2487
2488 /* Remove the hash link for the old file block from the hash. */
2489 unlink_hash(keycache, block->hash_link);
2490
2491 /*
2492 For sanity checks link_to_file_list() asserts that block
2493 and hash_link refer to each other. Hence we need to assign
2494 the hash_link first, but then we would not know if it was
2495 linked before. Hence we would not know if to unlink it. So
2496 unlink it here and call link_to_file_list(..., FALSE).
2497 */
2498 unlink_changed(block);
2499 }
2500 block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2501 block->length= 0;
2502 block->offset= keycache->key_cache_block_size;
2503 block->hash_link= hash_link;
2504 link_to_file_list(keycache, block, file, 0);
2505 page_status= PAGE_TO_BE_READ;
2506
2507 KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2508 KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2509 }
2510 else
2511 {
2512 /*
2513 Either (block->hash_link == hash_link),
2514 or (block->status & BLOCK_IN_SWITCH).
2515
2516 This is for secondary requests for a new file block only.
2517 Either it is already assigned to the new hash_link meanwhile
2518 (if we had to wait due to empty LRU), or it is already in
2519 eviction by another thread. Since this block has been
2520 grabbed from the LRU ring and attached to this hash_link,
2521 another thread cannot grab the same block from the LRU ring
2522 anymore. If the block is in eviction already, it must become
2523 attached to the same hash_link and as such destined for the
2524 same file block.
2525 */
2526 KEYCACHE_DBUG_PRINT("find_key_block",
2527 ("block->hash_link: %p hash_link: %p "
2528 "block->status: %u", block->hash_link,
2529 hash_link, block->status ));
2530 page_status= (((block->hash_link == hash_link) &&
2531 (block->status & BLOCK_READ)) ?
2532 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2533 }
2534 }
2535 }
2536 else
2537 {
2538 /*
2539 Block is not NULL. This hash_link points to a block.
2540 Either
2541 - block not assigned to this hash_link (yet) or
2542 - block assigned but not yet read from file,
2543 or
2544 - block assigned with valid (changed or unchanged) data and
2545 - it will not be reassigned/freed.
2546
2547 The first condition means hash_link points to a block in
2548 eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2549 But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2550 link_block(). In both cases it is destined for this hash_link
2551 and its file block address. When this hash_link got its block
2552 address, the block was removed from the LRU ring and cannot be
2553 selected for eviction (for another hash_link) again.
2554
2555 Register a request on the block. This is another protection
2556 against eviction.
2557 */
2558 DBUG_ASSERT(((block->hash_link != hash_link) &&
2559 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2560 ((block->hash_link == hash_link) &&
2561 !(block->status & BLOCK_READ)) ||
2562 ((block->status & BLOCK_READ) &&
2563 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2564 reg_requests(keycache, block, 1);
2565 KEYCACHE_DBUG_PRINT("find_key_block",
2566 ("block->hash_link: %p hash_link: %p "
2567 "block->status: %u", block->hash_link,
2568 hash_link, block->status ));
2569 page_status= (((block->hash_link == hash_link) &&
2570 (block->status & BLOCK_READ)) ?
2571 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2572 }
2573 }
2574
2575 KEYCACHE_DBUG_ASSERT(page_status != -1);
2576 /* Same assert basically, but be very sure. */
2577 KEYCACHE_DBUG_ASSERT(block);
2578 /* Assert that block has a request and is not in LRU ring. */
2579 DBUG_ASSERT(block->requests);
2580 DBUG_ASSERT(!block->next_used);
2581 DBUG_ASSERT(!block->prev_used);
2582 /* Assert that we return the correct block. */
2583 DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2584 ((block->hash_link->file == file) &&
2585 (block->hash_link->diskpos == filepos)));
2586 *page_st=page_status;
2587 KEYCACHE_DBUG_PRINT("find_key_block",
2588 ("fd: %d pos: %lu block->status: %u page_status: %d",
2589 file, (ulong) filepos, block->status,
2590 page_status));
2591
2592#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2593 DBUG_EXECUTE("check_keycache2",
2594 test_key_cache(keycache, "end of find_key_block",0););
2595#endif
2596 KEYCACHE_THREAD_TRACE("find_key_block:end");
2597 DBUG_RETURN(block);
2598}
2599
2600
2601/*
2602 Read into a key cache block buffer from disk.
2603
2604 SYNOPSIS
2605
2606 read_block_{primary|secondary}()
2607 keycache pointer to a key cache data structure
2608 block block to which buffer the data is to be read
2609 read_length size of data to be read
2610 min_length at least so much data must be read
2611
2612 RETURN VALUE
2613 None
2614
2615 NOTES.
2616 The function either reads a page data from file to the block buffer,
2617 or waits until another thread reads it. What page to read is determined
2618 by a block parameter - reference to a hash link for this page.
2619 If an error occurs THE BLOCK_ERROR bit is set in the block status.
2620 We do not report error when the size of successfully read
2621 portion is less than read_length, but not less than min_length.
2622*/
2623
2624static void read_block_primary(SIMPLE_KEY_CACHE_CB *keycache,
2625 BLOCK_LINK *block, uint read_length,
2626 uint min_length)
2627{
2628 size_t got_length;
2629
2630 /* On entry cache_lock is locked */
2631
2632 KEYCACHE_THREAD_TRACE("read_block_primary");
2633
2634 /*
2635 This code is executed only by threads that submitted primary
2636 requests. Until block->status contains BLOCK_READ, all other
2637 request for the block become secondary requests. For a primary
2638 request the block must be properly initialized.
2639 */
2640 DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2641 fail_block(block));
2642 DBUG_ASSERT((block->length == 0) || fail_block(block));
2643 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2644 fail_block(block));
2645 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2646
2647 KEYCACHE_DBUG_PRINT("read_block_primary",
2648 ("page to be read by primary request"));
2649
2650 keycache->global_cache_read++;
2651 /* Page is not in buffer yet, is to be read from disk */
2652 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2653 /*
2654 Here other threads may step in and register as secondary readers.
2655 They will register in block->wqueue[COND_FOR_REQUESTED].
2656 */
2657 got_length= my_pread(block->hash_link->file, block->buffer,
2658 read_length, block->hash_link->diskpos, MYF(0));
2659 keycache_pthread_mutex_lock(&keycache->cache_lock);
2660 /*
2661 The block can now have been marked for free (in case of
2662 FLUSH_RELEASE). Otherwise the state must be unchanged.
2663 */
2664 DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2665 BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2666 fail_block(block));
2667 DBUG_ASSERT((block->length == 0) || fail_block(block));
2668 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2669 fail_block(block));
2670 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2671
2672 if (got_length < min_length)
2673 block->status|= BLOCK_ERROR;
2674 else
2675 {
2676 block->status|= BLOCK_READ;
2677 block->length= (uint)got_length;
2678 /*
2679 Do not set block->offset here. If this block is marked
2680 BLOCK_CHANGED later, we want to flush only the modified part. So
2681 only a writer may set block->offset down from
2682 keycache->key_cache_block_size.
2683 */
2684 }
2685 KEYCACHE_DBUG_PRINT("read_block_primary",
2686 ("primary request: new page in cache"));
2687 /* Signal that all pending requests for this page now can be processed */
2688 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2689
2690 DBUG_ASSERT(keycache->can_be_used);
2691}
2692
2693
2694static void read_block_secondary(SIMPLE_KEY_CACHE_CB *keycache,
2695 BLOCK_LINK *block)
2696{
2697 KEYCACHE_THREAD_TRACE("read_block_secondary");
2698
2699 /*
2700 This code is executed only by threads that submitted secondary
2701 requests. At this point it could happen that the cache block is
2702 not yet assigned to the hash_link for the requested file block.
2703 But at awake from the wait this should be the case. Unfortunately
2704 we cannot assert this here because we do not know the hash_link
2705 for the requested file block nor the file and position. So we have
2706 to assert this in the caller.
2707 */
2708 KEYCACHE_DBUG_PRINT("read_block_secondary",
2709 ("secondary request waiting for new page to be read"));
2710
2711 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2712
2713 KEYCACHE_DBUG_PRINT("read_block_secondary",
2714 ("secondary request: new page in cache"));
2715
2716 DBUG_ASSERT(keycache->can_be_used);
2717 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2718}
2719
2720
2721/*
2722 Read a block of data from a simple key cache into a buffer
2723
2724 SYNOPSIS
2725
2726 simple_key_cache_read()
2727 keycache pointer to the control block of a simple key cache
2728 file handler for the file for the block of data to be read
2729 filepos position of the block of data in the file
2730 level determines the weight of the data
2731 buff buffer to where the data must be placed
2732 length length of the buffer
2733 block_length length of the read data from a key cache block
2734 return_buffer return pointer to the key cache buffer with the data
2735
2736 DESCRIPTION
2737 This function is the implementation of the key_cache_read interface
2738 function that is employed by simple (non-partitioned) key caches.
2739 The function takes the parameter keycache as a pointer to the
2740 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
2741 cache.
2742 In a general case the function reads a block of data from the key cache
2743 into the buffer buff of the size specified by the parameter length. The
2744 beginning of the block of data to be read is specified by the parameters
2745 file and filepos. The length of the read data is the same as the length
2746 of the buffer. The data is read into the buffer in key_cache_block_size
2747 increments. If the next portion of the data is not found in any key cache
2748 block, first it is read from file into the key cache.
2749 If the parameter return_buffer is not ignored and its value is TRUE, and
2750 the data to be read of the specified size block_length can be read from one
2751 key cache buffer, then the function returns a pointer to the data in the
2752 key cache buffer.
2753 The function takse into account parameters block_length and return buffer
2754 only in a single-threaded environment.
2755 The parameter 'level' is used only by the midpoint insertion strategy
2756 when the data or its portion cannot be found in the key cache.
2757
2758 RETURN VALUE
2759 Returns address from where the data is placed if successful, 0 - otherwise.
2760
2761 NOTES
2762 Filepos must be a multiple of 'block_length', but it doesn't
2763 have to be a multiple of key_cache_block_size;
2764*/
2765
2766uchar *simple_key_cache_read(SIMPLE_KEY_CACHE_CB *keycache,
2767 File file, my_off_t filepos, int level,
2768 uchar *buff, uint length,
2769 uint block_length __attribute__((unused)),
2770 int return_buffer __attribute__((unused)))
2771{
2772 my_bool locked_and_incremented= FALSE;
2773 int error=0;
2774 uchar *start= buff;
2775 DBUG_ENTER("simple_key_cache_read");
2776 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2777 (uint) file, (ulong) filepos, length));
2778
2779 if (keycache->key_cache_inited)
2780 {
2781 /* Key cache is used */
2782 reg1 BLOCK_LINK *block;
2783 uint read_length;
2784 uint offset;
2785 int page_st;
2786
2787 if (MYSQL_KEYCACHE_READ_START_ENABLED())
2788 {
2789 MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2790 (ulong) (keycache->blocks_used *
2791 keycache->key_cache_block_size),
2792 (ulong) (keycache->blocks_unused *
2793 keycache->key_cache_block_size));
2794 }
2795
2796 /*
2797 When the key cache is once initialized, we use the cache_lock to
2798 reliably distinguish the cases of normal operation, resizing, and
2799 disabled cache. We always increment and decrement
2800 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2801 */
2802 keycache_pthread_mutex_lock(&keycache->cache_lock);
2803 /*
2804 Cache resizing has two phases: Flushing and re-initializing. In
2805 the flush phase read requests are allowed to bypass the cache for
2806 blocks not in the cache. find_key_block() returns NULL in this
2807 case.
2808
2809 After the flush phase new I/O requests must wait until the
2810 re-initialization is done. The re-initialization can be done only
2811 if no I/O request is in progress. The reason is that
2812 key_cache_block_size can change. With enabled cache, I/O is done
2813 in chunks of key_cache_block_size. Every chunk tries to use a
2814 cache block first. If the block size changes in the middle, a
2815 block could be missed and old data could be read.
2816 */
2817 while (keycache->in_resize && !keycache->resize_in_flush)
2818 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2819 /* Register the I/O for the next resize. */
2820 inc_counter_for_resize_op(keycache);
2821 locked_and_incremented= TRUE;
2822 /* Requested data may not always be aligned to cache blocks. */
2823 offset= (uint) (filepos % keycache->key_cache_block_size);
2824 /* Read data in key_cache_block_size increments */
2825 do
2826 {
2827 /* Cache could be disabled in a later iteration. */
2828 if (!keycache->can_be_used)
2829 {
2830 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2831 goto no_key_cache;
2832 }
2833 /* Start reading at the beginning of the cache block. */
2834 filepos-= offset;
2835 /* Do not read beyond the end of the cache block. */
2836 read_length= length;
2837 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2838 KEYCACHE_DBUG_ASSERT(read_length > 0);
2839
2840 /* Request the cache block that matches file/pos. */
2841 keycache->global_cache_r_requests++;
2842
2843 MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2844
2845 block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2846 if (!block)
2847 {
2848 /*
2849 This happens only for requests submitted during key cache
2850 resize. The block is not in the cache and shall not go in.
2851 Read directly from file.
2852 */
2853 keycache->global_cache_read++;
2854 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2855 error= (my_pread(file, (uchar*) buff, read_length,
2856 filepos + offset, MYF(MY_NABP)) != 0);
2857 keycache_pthread_mutex_lock(&keycache->cache_lock);
2858 goto next_block;
2859 }
2860 if (!(block->status & BLOCK_ERROR))
2861 {
2862 if (page_st == PAGE_TO_BE_READ)
2863 {
2864 MYSQL_KEYCACHE_READ_MISS();
2865 read_block_primary(keycache, block,
2866 keycache->key_cache_block_size, read_length+offset);
2867 }
2868 else if (page_st == PAGE_WAIT_TO_BE_READ)
2869 {
2870 MYSQL_KEYCACHE_READ_MISS();
2871 /* The requested page is to be read into the block buffer */
2872 read_block_secondary(keycache, block);
2873
2874 /*
2875 A secondary request must now have the block assigned to the
2876 requested file block.
2877 */
2878 DBUG_ASSERT(block->hash_link->file == file);
2879 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2880 }
2881 else if (block->length < read_length + offset)
2882 {
2883 /*
2884 Impossible if nothing goes wrong:
2885 this could only happen if we are using a file with
2886 small key blocks and are trying to read outside the file
2887 */
2888 my_errno= -1;
2889 block->status|= BLOCK_ERROR;
2890 }
2891 else
2892 {
2893 MYSQL_KEYCACHE_READ_HIT();
2894 }
2895 }
2896
2897 /* block status may have added BLOCK_ERROR in the above 'if'. */
2898 if (!(block->status & BLOCK_ERROR))
2899 {
2900 {
2901 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2902#if !defined(SERIALIZED_READ_FROM_CACHE)
2903 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2904#endif
2905
2906 /* Copy data from the cache buffer */
2907 memcpy(buff, block->buffer+offset, (size_t) read_length);
2908
2909#if !defined(SERIALIZED_READ_FROM_CACHE)
2910 keycache_pthread_mutex_lock(&keycache->cache_lock);
2911 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2912#endif
2913 }
2914 }
2915
2916 remove_reader(block);
2917
2918 /* Error injection for coverage testing. */
2919 DBUG_EXECUTE_IF("key_cache_read_block_error",
2920 block->status|= BLOCK_ERROR;);
2921
2922 /* Do not link erroneous blocks into the LRU ring, but free them. */
2923 if (!(block->status & BLOCK_ERROR))
2924 {
2925 /*
2926 Link the block into the LRU ring if it's the last submitted
2927 request for the block. This enables eviction for the block.
2928 */
2929 unreg_request(keycache, block, 1);
2930 }
2931 else
2932 {
2933 free_block(keycache, block);
2934 error= 1;
2935 break;
2936 }
2937
2938 next_block:
2939 buff+= read_length;
2940 filepos+= read_length+offset;
2941 offset= 0;
2942
2943 } while ((length-= read_length));
2944 if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2945 {
2946 MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2947 keycache->key_cache_block_size),
2948 (ulong) (keycache->blocks_unused *
2949 keycache->key_cache_block_size));
2950 }
2951 goto end;
2952 }
2953 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2954
2955no_key_cache:
2956 /* Key cache is not used */
2957
2958 keycache->global_cache_r_requests++;
2959 keycache->global_cache_read++;
2960
2961 if (locked_and_incremented)
2962 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2963 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2964 error= 1;
2965 if (locked_and_incremented)
2966 keycache_pthread_mutex_lock(&keycache->cache_lock);
2967
2968end:
2969 if (locked_and_incremented)
2970 {
2971 dec_counter_for_resize_op(keycache);
2972 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2973 }
2974 DBUG_PRINT("exit", ("error: %d", error ));
2975 DBUG_RETURN(error ? (uchar*) 0 : start);
2976}
2977
2978
2979/*
2980 Insert a block of file data from a buffer into a simple key cache
2981
2982 SYNOPSIS
2983 simple_key_cache_insert()
2984 keycache pointer to the control block of a simple key cache
2985 file handler for the file to insert data from
2986 filepos position of the block of data in the file to insert
2987 level determines the weight of the data
2988 buff buffer to read data from
2989 length length of the data in the buffer
2990
2991 DESCRIPTION
2992 This function is the implementation of the key_cache_insert interface
2993 function that is employed by simple (non-partitioned) key caches.
2994 The function takes the parameter keycache as a pointer to the
2995 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
2996 cache.
2997 The function writes a block of file data from a buffer into the key cache.
2998 The buffer is specified with the parameters buff and length - the pointer
2999 to the beginning of the buffer and its size respectively. It's assumed
3000 the buffer contains the data from 'file' allocated from the position
3001 filepos. The data is copied from the buffer in key_cache_block_size
3002 increments.
3003 The parameter level is used to set one characteristic for the key buffers
3004 loaded with the data from buff. The characteristic is used only by the
3005 midpoint insertion strategy.
3006
3007 RETURN VALUE
3008 0 if a success, 1 - otherwise.
3009
3010 NOTES
3011 The function is used by MyISAM to move all blocks from a index file to
3012 the key cache. It can be performed in parallel with reading the file data
3013 from the key buffers by other threads.
3014
3015*/
3016
3017static
3018int simple_key_cache_insert(SIMPLE_KEY_CACHE_CB *keycache,
3019 File file, my_off_t filepos, int level,
3020 uchar *buff, uint length)
3021{
3022 int error= 0;
3023 DBUG_ENTER("key_cache_insert");
3024 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
3025 (uint) file,(ulong) filepos, length));
3026
3027 if (keycache->key_cache_inited)
3028 {
3029 /* Key cache is used */
3030 reg1 BLOCK_LINK *block;
3031 uint read_length;
3032 uint offset;
3033 int page_st;
3034 my_bool locked_and_incremented= FALSE;
3035
3036 /*
3037 When the keycache is once initialized, we use the cache_lock to
3038 reliably distinguish the cases of normal operation, resizing, and
3039 disabled cache. We always increment and decrement
3040 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3041 */
3042 keycache_pthread_mutex_lock(&keycache->cache_lock);
3043 /*
3044 We do not load index data into a disabled cache nor into an
3045 ongoing resize.
3046 */
3047 if (!keycache->can_be_used || keycache->in_resize)
3048 goto no_key_cache;
3049 /* Register the pseudo I/O for the next resize. */
3050 inc_counter_for_resize_op(keycache);
3051 locked_and_incremented= TRUE;
3052 /* Loaded data may not always be aligned to cache blocks. */
3053 offset= (uint) (filepos % keycache->key_cache_block_size);
3054 /* Load data in key_cache_block_size increments. */
3055 do
3056 {
3057 /* Cache could be disabled or resizing in a later iteration. */
3058 if (!keycache->can_be_used || keycache->in_resize)
3059 goto no_key_cache;
3060 /* Start loading at the beginning of the cache block. */
3061 filepos-= offset;
3062 /* Do not load beyond the end of the cache block. */
3063 read_length= length;
3064 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3065 KEYCACHE_DBUG_ASSERT(read_length > 0);
3066
3067 /* The block has been read by the caller already. */
3068 keycache->global_cache_read++;
3069 /* Request the cache block that matches file/pos. */
3070 keycache->global_cache_r_requests++;
3071 block= find_key_block(keycache, file, filepos, level, 0, &page_st);
3072 if (!block)
3073 {
3074 /*
3075 This happens only for requests submitted during key cache
3076 resize. The block is not in the cache and shall not go in.
3077 Stop loading index data.
3078 */
3079 goto no_key_cache;
3080 }
3081 if (!(block->status & BLOCK_ERROR))
3082 {
3083 if (page_st == PAGE_WAIT_TO_BE_READ)
3084 {
3085 /*
3086 this is a secondary request for a block to be read into the
3087 cache. The block is in eviction. It is not yet assigned to
3088 the requested file block (It does not point to the right
3089 hash_link). So we cannot call remove_reader() on the block.
3090 And we cannot access the hash_link directly here. We need to
3091 wait until the assignment is complete. read_block_secondary()
3092 executes the correct wait.
3093 */
3094 read_block_secondary(keycache, block);
3095
3096 /*
3097 A secondary request must now have the block assigned to the
3098 requested file block.
3099 */
3100 DBUG_ASSERT(block->hash_link->file == file);
3101 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3102 }
3103 else if (page_st == PAGE_TO_BE_READ &&
3104 (offset || (read_length < keycache->key_cache_block_size)))
3105 {
3106 /*
3107 this is a primary request for a block to be read into the
3108 cache and the supplied data does not fill the whole block.
3109
3110 This function is called on behalf of a LOAD INDEX INTO CACHE
3111 statement, which is a read-only task and allows other
3112 readers. It is possible that a parallel running reader tries
3113 to access this block. If it needs more data than has been
3114 supplied here, it would report an error. To be sure that we
3115 have all data in the block that is available in the file, we
3116 read the block ourselves.
3117
3118 Though reading again what the caller did read already is an
3119 expensive operation, we need to do this for correctness.
3120 */
3121 read_block_primary(keycache, block, keycache->key_cache_block_size,
3122 read_length + offset);
3123 }
3124 else if (page_st == PAGE_TO_BE_READ)
3125 {
3126 /*
3127 This is a new block in the cache. If we come here, we have
3128 data for the whole block.
3129 */
3130 DBUG_ASSERT(block->hash_link->requests);
3131 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3132 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
3133 (block->status & BLOCK_READ));
3134
3135#if !defined(SERIALIZED_READ_FROM_CACHE)
3136 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3137 /*
3138 Here other threads may step in and register as secondary readers.
3139 They will register in block->wqueue[COND_FOR_REQUESTED].
3140 */
3141#endif
3142
3143 /* Copy data from buff */
3144 memcpy(block->buffer+offset, buff, (size_t) read_length);
3145
3146#if !defined(SERIALIZED_READ_FROM_CACHE)
3147 keycache_pthread_mutex_lock(&keycache->cache_lock);
3148 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3149 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
3150 (block->status & BLOCK_READ));
3151#endif
3152 /*
3153 After the data is in the buffer, we can declare the block
3154 valid. Now other threads do not need to register as
3155 secondary readers any more. They can immediately access the
3156 block.
3157 */
3158 block->status|= BLOCK_READ;
3159 block->length= read_length+offset;
3160 /*
3161 Do not set block->offset here. If this block is marked
3162 BLOCK_CHANGED later, we want to flush only the modified part. So
3163 only a writer may set block->offset down from
3164 keycache->key_cache_block_size.
3165 */
3166 KEYCACHE_DBUG_PRINT("key_cache_insert",
3167 ("primary request: new page in cache"));
3168 /* Signal all pending requests. */
3169 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3170 }
3171 else
3172 {
3173 /*
3174 page_st == PAGE_READ. The block is in the buffer. All data
3175 must already be present. Blocks are always read with all
3176 data available on file. Assert that the block does not have
3177 less contents than the preloader supplies. If the caller has
3178 data beyond block->length, it means that a file write has
3179 been done while this block was in cache and not extended
3180 with the new data. If the condition is met, we can simply
3181 ignore the block.
3182 */
3183 DBUG_ASSERT((page_st == PAGE_READ) &&
3184 (read_length + offset <= block->length));
3185 }
3186
3187 /*
3188 A secondary request must now have the block assigned to the
3189 requested file block. It does not hurt to check it for primary
3190 requests too.
3191 */
3192 DBUG_ASSERT(block->hash_link->file == file);
3193 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3194 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3195 } /* end of if (!(block->status & BLOCK_ERROR)) */
3196
3197 remove_reader(block);
3198
3199 /* Error injection for coverage testing. */
3200 DBUG_EXECUTE_IF("key_cache_insert_block_error",
3201 block->status|= BLOCK_ERROR; errno=EIO;);
3202
3203 /* Do not link erroneous blocks into the LRU ring, but free them. */
3204 if (!(block->status & BLOCK_ERROR))
3205 {
3206 /*
3207 Link the block into the LRU ring if it's the last submitted
3208 request for the block. This enables eviction for the block.
3209 */
3210 unreg_request(keycache, block, 1);
3211 }
3212 else
3213 {
3214 free_block(keycache, block);
3215 error= 1;
3216 break;
3217 }
3218
3219 buff+= read_length;
3220 filepos+= read_length+offset;
3221 offset= 0;
3222
3223 } while ((length-= read_length));
3224
3225 no_key_cache:
3226 if (locked_and_incremented)
3227 dec_counter_for_resize_op(keycache);
3228 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3229 }
3230 DBUG_RETURN(error);
3231}
3232
3233
3234/*
3235 Write a buffer into a simple key cache
3236
3237 SYNOPSIS
3238
3239 simple_key_cache_write()
3240 keycache pointer to the control block of a simple key cache
3241 file handler for the file to write data to
3242 file_extra maps of key cache partitions containing
3243 dirty pages from file
3244 filepos position in the file to write data to
3245 level determines the weight of the data
3246 buff buffer with the data
3247 length length of the buffer
3248 dont_write if is 0 then all dirty pages involved in writing
3249 should have been flushed from key cache
3250
3251 DESCRIPTION
3252 This function is the implementation of the key_cache_write interface
3253 function that is employed by simple (non-partitioned) key caches.
3254 The function takes the parameter keycache as a pointer to the
3255 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
3256 cache.
3257 In a general case the function copies data from a buffer into the key
3258 cache. The buffer is specified with the parameters buff and length -
3259 the pointer to the beginning of the buffer and its size respectively.
3260 It's assumed the buffer contains the data to be written into 'file'
3261 starting from the position filepos. The data is copied from the buffer
3262 in key_cache_block_size increments.
3263 If the value of the parameter dont_write is FALSE then the function
3264 also writes the data into file.
3265 The parameter level is used to set one characteristic for the key buffers
3266 filled with the data from buff. The characteristic is employed only by
3267 the midpoint insertion strategy.
3268 The parameter file_extra currently makes sense only for simple key caches
3269 that are elements of a partitioned key cache. It provides a pointer to the
3270 shared bitmap of the partitions that may contains dirty pages for the file.
3271 This bitmap is used to optimize the function
3272 flush_partitioned_key_cache_blocks.
3273
3274 RETURN VALUE
3275 0 if a success, 1 - otherwise.
3276
3277 NOTES
3278 This implementation exploits the fact that the function is called only
3279 when a thread has got an exclusive lock for the key file.
3280*/
3281
3282static
3283int simple_key_cache_write(SIMPLE_KEY_CACHE_CB *keycache,
3284 File file, void *file_extra __attribute__((unused)),
3285 my_off_t filepos, int level,
3286 uchar *buff, uint length,
3287 uint block_length __attribute__((unused)),
3288 int dont_write)
3289{
3290 my_bool locked_and_incremented= FALSE;
3291 int error=0;
3292 DBUG_ENTER("simple_key_cache_write");
3293 DBUG_PRINT("enter",
3294 ("fd: %u pos: %lu length: %u block_length: %u"
3295 " key_block_length: %u",
3296 (uint) file, (ulong) filepos, length, block_length,
3297 keycache ? keycache->key_cache_block_size : 0));
3298
3299 if (!dont_write)
3300 {
3301 /* purecov: begin inspected */
3302 /* Not used in the server. */
3303 /* Force writing from buff into disk. */
3304 keycache->global_cache_w_requests++;
3305 keycache->global_cache_write++;
3306 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3307 DBUG_RETURN(1);
3308 /* purecov: end */
3309 }
3310
3311#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3312 DBUG_EXECUTE("check_keycache",
3313 test_key_cache(keycache, "start of key_cache_write", 1););
3314#endif
3315
3316 if (keycache->key_cache_inited)
3317 {
3318 /* Key cache is used */
3319 reg1 BLOCK_LINK *block;
3320 uint read_length;
3321 uint offset;
3322 int page_st;
3323
3324 if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3325 {
3326 MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3327 (ulong) (keycache->blocks_used *
3328 keycache->key_cache_block_size),
3329 (ulong) (keycache->blocks_unused *
3330 keycache->key_cache_block_size));
3331 }
3332
3333 /*
3334 When the key cache is once initialized, we use the cache_lock to
3335 reliably distinguish the cases of normal operation, resizing, and
3336 disabled cache. We always increment and decrement
3337 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3338 */
3339 keycache_pthread_mutex_lock(&keycache->cache_lock);
3340 /*
3341 Cache resizing has two phases: Flushing and re-initializing. In
3342 the flush phase write requests can modify dirty blocks that are
3343 not yet in flush. Otherwise they are allowed to bypass the cache.
3344 find_key_block() returns NULL in both cases (clean blocks and
3345 non-cached blocks).
3346
3347 After the flush phase new I/O requests must wait until the
3348 re-initialization is done. The re-initialization can be done only
3349 if no I/O request is in progress. The reason is that
3350 key_cache_block_size can change. With enabled cache I/O is done in
3351 chunks of key_cache_block_size. Every chunk tries to use a cache
3352 block first. If the block size changes in the middle, a block
3353 could be missed and data could be written below a cached block.
3354 */
3355 while (keycache->in_resize && !keycache->resize_in_flush)
3356 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3357 /* Register the I/O for the next resize. */
3358 inc_counter_for_resize_op(keycache);
3359 locked_and_incremented= TRUE;
3360 /* Requested data may not always be aligned to cache blocks. */
3361 offset= (uint) (filepos % keycache->key_cache_block_size);
3362 /* Write data in key_cache_block_size increments. */
3363 do
3364 {
3365 /* Cache could be disabled in a later iteration. */
3366 if (!keycache->can_be_used)
3367 goto no_key_cache;
3368
3369 MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3370 /* Start writing at the beginning of the cache block. */
3371 filepos-= offset;
3372 /* Do not write beyond the end of the cache block. */
3373 read_length= length;
3374 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3375 KEYCACHE_DBUG_ASSERT(read_length > 0);
3376
3377 /* Request the cache block that matches file/pos. */
3378 keycache->global_cache_w_requests++;
3379 block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3380 if (!block)
3381 {
3382 /*
3383 This happens only for requests submitted during key cache
3384 resize. The block is not in the cache and shall not go in.
3385 Write directly to file.
3386 */
3387 if (dont_write)
3388 {
3389 /* Used in the server. */
3390 keycache->global_cache_write++;
3391 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3392 if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3393 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3394 error=1;
3395 keycache_pthread_mutex_lock(&keycache->cache_lock);
3396 }
3397 goto next_block;
3398 }
3399 /*
3400 Prevent block from flushing and from being selected for to be
3401 freed. This must be set when we release the cache_lock.
3402 However, we must not set the status of the block before it is
3403 assigned to this file/pos.
3404 */
3405 if (page_st != PAGE_WAIT_TO_BE_READ)
3406 block->status|= BLOCK_FOR_UPDATE;
3407 /*
3408 We must read the file block first if it is not yet in the cache
3409 and we do not replace all of its contents.
3410
3411 In cases where the cache block is big enough to contain (parts
3412 of) index blocks of different indexes, our request can be
3413 secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3414 reading the file block. If the read completes after us, it
3415 overwrites our new contents with the old contents. So we have to
3416 wait for the other thread to complete the read of this block.
3417 read_block_primary|secondary() takes care for the wait.
3418 */
3419 if (!(block->status & BLOCK_ERROR))
3420 {
3421 if (page_st == PAGE_TO_BE_READ &&
3422 (offset || read_length < keycache->key_cache_block_size))
3423 {
3424 read_block_primary(keycache, block,
3425 offset + read_length >= keycache->key_cache_block_size?
3426 offset : keycache->key_cache_block_size,
3427 offset);
3428 /*
3429 Prevent block from flushing and from being selected for to be
3430 freed. This must be set when we release the cache_lock.
3431 Here we set it in case we could not set it above.
3432 */
3433 block->status|= BLOCK_FOR_UPDATE;
3434 }
3435 else if (page_st == PAGE_WAIT_TO_BE_READ)
3436 {
3437 read_block_secondary(keycache, block);
3438 block->status|= BLOCK_FOR_UPDATE;
3439 }
3440 }
3441 /*
3442 The block should always be assigned to the requested file block
3443 here. It need not be BLOCK_READ when overwriting the whole block.
3444 */
3445 DBUG_ASSERT(block->hash_link->file == file);
3446 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3447 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3448 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3449 /*
3450 The block to be written must not be marked BLOCK_REASSIGNED.
3451 Otherwise it could be freed in dirty state or reused without
3452 another flush during eviction. It must also not be in flush.
3453 Otherwise the old contens may have been flushed already and
3454 the flusher could clear BLOCK_CHANGED without flushing the
3455 new changes again.
3456 */
3457 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3458
3459 while (block->status & BLOCK_IN_FLUSHWRITE)
3460 {
3461 /*
3462 Another thread is flushing the block. It was dirty already.
3463 Wait until the block is flushed to file. Otherwise we could
3464 modify the buffer contents just while it is written to file.
3465 An unpredictable file block contents would be the result.
3466 While we wait, several things can happen to the block,
3467 including another flush. But the block cannot be reassigned to
3468 another hash_link until we release our request on it.
3469 */
3470 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3471 DBUG_ASSERT(keycache->can_be_used);
3472 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3473 /* Still must not be marked for free. */
3474 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3475 DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3476 }
3477
3478 /*
3479 We could perhaps release the cache_lock during access of the
3480 data like in the other functions. Locks outside of the key cache
3481 assure that readers and a writer do not access the same range of
3482 data. Parallel accesses should happen only if the cache block
3483 contains multiple index block(fragment)s. So different parts of
3484 the buffer would be read/written. An attempt to flush during
3485 memcpy() is prevented with BLOCK_FOR_UPDATE.
3486 */
3487 if (!(block->status & BLOCK_ERROR))
3488 {
3489#if !defined(SERIALIZED_READ_FROM_CACHE)
3490 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3491#endif
3492 memcpy(block->buffer+offset, buff, (size_t) read_length);
3493
3494#if !defined(SERIALIZED_READ_FROM_CACHE)
3495 keycache_pthread_mutex_lock(&keycache->cache_lock);
3496#endif
3497 }
3498
3499 if (!dont_write)
3500 {
3501 /* Not used in the server. buff has been written to disk at start. */
3502 if ((block->status & BLOCK_CHANGED) &&
3503 (!offset && read_length >= keycache->key_cache_block_size))
3504 link_to_file_list(keycache, block, block->hash_link->file, 1);
3505 }
3506 else if (! (block->status & BLOCK_CHANGED))
3507 link_to_changed_list(keycache, block);
3508 block->status|=BLOCK_READ;
3509 /*
3510 Allow block to be selected for to be freed. Since it is marked
3511 BLOCK_CHANGED too, it won't be selected for to be freed without
3512 a flush.
3513 */
3514 block->status&= ~BLOCK_FOR_UPDATE;
3515 set_if_smaller(block->offset, offset);
3516 set_if_bigger(block->length, read_length+offset);
3517
3518 /* Threads may be waiting for the changes to be complete. */
3519 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3520
3521 /*
3522 If only a part of the cache block is to be replaced, and the
3523 rest has been read from file, then the cache lock has been
3524 released for I/O and it could be possible that another thread
3525 wants to evict or free the block and waits for it to be
3526 released. So we must not just decrement hash_link->requests, but
3527 also wake a waiting thread.
3528 */
3529 remove_reader(block);
3530
3531 /* Error injection for coverage testing. */
3532 DBUG_EXECUTE_IF("key_cache_write_block_error",
3533 block->status|= BLOCK_ERROR;);
3534
3535 /* Do not link erroneous blocks into the LRU ring, but free them. */
3536 if (!(block->status & BLOCK_ERROR))
3537 {
3538 /*
3539 Link the block into the LRU ring if it's the last submitted
3540 request for the block. This enables eviction for the block.
3541 */
3542 unreg_request(keycache, block, 1);
3543 }
3544 else
3545 {
3546 /* Pretend a "clean" block to avoid complications. */
3547 block->status&= ~(BLOCK_CHANGED);
3548 free_block(keycache, block);
3549 error= 1;
3550 break;
3551 }
3552
3553 next_block:
3554 buff+= read_length;
3555 filepos+= read_length+offset;
3556 offset= 0;
3557
3558 } while ((length-= read_length));
3559 goto end;
3560 }
3561
3562no_key_cache:
3563 /* Key cache is not used */
3564 if (dont_write)
3565 {
3566 /* Used in the server. */
3567 keycache->global_cache_w_requests++;
3568 keycache->global_cache_write++;
3569 if (locked_and_incremented)
3570 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3571 if (my_pwrite(file, (uchar*) buff, length, filepos,
3572 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3573 error=1;
3574 if (locked_and_incremented)
3575 keycache_pthread_mutex_lock(&keycache->cache_lock);
3576 }
3577
3578end:
3579 if (locked_and_incremented)
3580 {
3581 dec_counter_for_resize_op(keycache);
3582 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3583 }
3584
3585 if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3586 {
3587 MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3588 keycache->key_cache_block_size),
3589 (ulong) (keycache->blocks_unused *
3590 keycache->key_cache_block_size));
3591 }
3592
3593#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3594 DBUG_EXECUTE("exec",
3595 test_key_cache(keycache, "end of key_cache_write", 1););
3596#endif
3597 DBUG_RETURN(error);
3598}
3599
3600
3601/*
3602 Free block.
3603
3604 SYNOPSIS
3605 free_block()
3606 keycache Pointer to a key cache data structure
3607 block Pointer to the block to free
3608
3609 DESCRIPTION
3610 Remove reference to block from hash table.
3611 Remove block from the chain of clean blocks.
3612 Add block to the free list.
3613
3614 NOTE
3615 Block must not be free (status == 0).
3616 Block must not be in free_block_list.
3617 Block must not be in the LRU ring.
3618 Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3619 Block must not be in free (BLOCK_REASSIGNED).
3620 Block must not be in flush (BLOCK_IN_FLUSH).
3621 Block must not be dirty (BLOCK_CHANGED).
3622 Block must not be in changed_blocks (dirty) hash.
3623 Block must be in file_blocks (clean) hash.
3624 Block must refer to a hash_link.
3625 Block must have a request registered on it.
3626*/
3627
3628static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
3629{
3630 KEYCACHE_THREAD_TRACE("free block");
3631 KEYCACHE_DBUG_PRINT("free_block",
3632 ("block %u to be freed, hash_link %p status: %u",
3633 BLOCK_NUMBER(block), block->hash_link,
3634 block->status));
3635 /*
3636 Assert that the block is not free already. And that it is in a clean
3637 state. Note that the block might just be assigned to a hash_link and
3638 not yet read (BLOCK_READ may not be set here). In this case a reader
3639 is registered in the hash_link and free_block() will wait for it
3640 below.
3641 */
3642 DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3643 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3644 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3645 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3646 /* Assert that the block is in a file_blocks chain. */
3647 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3648 /* Assert that the block is not in the LRU ring. */
3649 DBUG_ASSERT(!block->next_used && !block->prev_used);
3650 /*
3651 IMHO the below condition (if()) makes no sense. I can't see how it
3652 could be possible that free_block() is entered with a NULL hash_link
3653 pointer. The only place where it can become NULL is in free_block()
3654 (or before its first use ever, but for those blocks free_block() is
3655 not called). I don't remove the conditional as it cannot harm, but
3656 place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3657 condition (if()) can be removed.
3658 */
3659 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3660 if (block->hash_link)
3661 {
3662 /*
3663 While waiting for readers to finish, new readers might request the
3664 block. But since we set block->status|= BLOCK_REASSIGNED, they
3665 will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3666 later.
3667 */
3668 block->status|= BLOCK_REASSIGNED;
3669 wait_for_readers(keycache, block);
3670 /*
3671 The block must not have been freed by another thread. Repeat some
3672 checks. An additional requirement is that it must be read now
3673 (BLOCK_READ).
3674 */
3675 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3676 DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3677 BLOCK_REASSIGNED)) &&
3678 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3679 BLOCK_IN_FLUSH | BLOCK_CHANGED |
3680 BLOCK_FOR_UPDATE)));
3681 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3682 DBUG_ASSERT(!block->prev_used);
3683 /*
3684 Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3685 thread (through unreg_request() below), other threads must not see
3686 this flag. They could become confused.
3687 */
3688 block->status&= ~BLOCK_REASSIGNED;
3689 /*
3690 Do not release the hash_link until the block is off all lists.
3691 At least not if we hand it over for eviction in unreg_request().
3692 */
3693 }
3694
3695 /*
3696 Unregister the block request and link the block into the LRU ring.
3697 This enables eviction for the block. If the LRU ring was empty and
3698 threads are waiting for a block, then the block wil be handed over
3699 for eviction immediately. Otherwise we will unlink it from the LRU
3700 ring again, without releasing the lock in between. So decrementing
3701 the request counter and updating statistics are the only relevant
3702 operation in this case. Assert that there are no other requests
3703 registered.
3704 */
3705 DBUG_ASSERT(block->requests == 1);
3706 unreg_request(keycache, block, 0);
3707 /*
3708 Note that even without releasing the cache lock it is possible that
3709 the block is immediately selected for eviction by link_block() and
3710 thus not added to the LRU ring. In this case we must not touch the
3711 block any more.
3712 */
3713 if (block->status & BLOCK_IN_EVICTION)
3714 return;
3715
3716 /* Error blocks are not put into the LRU ring. */
3717 if (!(block->status & BLOCK_ERROR))
3718 {
3719 /* Here the block must be in the LRU ring. Unlink it again. */
3720 DBUG_ASSERT(block->next_used && block->prev_used &&
3721 *block->prev_used == block);
3722 unlink_block(keycache, block);
3723 }
3724 if (block->temperature == BLOCK_WARM)
3725 keycache->warm_blocks--;
3726 block->temperature= BLOCK_COLD;
3727
3728 /* Remove from file_blocks hash. */
3729 unlink_changed(block);
3730
3731 /* Remove reference to block from hash table. */
3732 unlink_hash(keycache, block->hash_link);
3733 block->hash_link= NULL;
3734
3735 block->status= 0;
3736 block->length= 0;
3737 block->offset= keycache->key_cache_block_size;
3738 KEYCACHE_THREAD_TRACE("free block");
3739 KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3740
3741 /* Enforced by unlink_changed(), but just to be sure. */
3742 DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3743 /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3744 DBUG_ASSERT(!block->next_used && !block->prev_used);
3745 /* Insert the free block in the free list. */
3746 block->next_used= keycache->free_block_list;
3747 keycache->free_block_list= block;
3748 /* Keep track of the number of currently unused blocks. */
3749 keycache->blocks_unused++;
3750
3751 /* All pending requests for this page must be resubmitted. */
3752 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3753}
3754
3755
3756static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3757{
3758 return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3759 ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3760}
3761
3762
3763/*
3764 Flush a portion of changed blocks to disk,
3765 free used blocks if requested
3766*/
3767
3768static int flush_cached_blocks(SIMPLE_KEY_CACHE_CB *keycache,
3769 File file, BLOCK_LINK **cache,
3770 BLOCK_LINK **end,
3771 enum flush_type type)
3772{
3773 int error;
3774 int last_errno= 0;
3775 uint count= (uint) (end-cache);
3776
3777 /* Don't lock the cache during the flush */
3778 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3779 /*
3780 As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3781 we are guarunteed no thread will change them
3782 */
3783 my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3784
3785 keycache_pthread_mutex_lock(&keycache->cache_lock);
3786 /*
3787 Note: Do not break the loop. We have registered a request on every
3788 block in 'cache'. These must be unregistered by free_block() or
3789 unreg_request().
3790 */
3791 for ( ; cache != end ; cache++)
3792 {
3793 BLOCK_LINK *block= *cache;
3794
3795 KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3796 ("block %u to be flushed", BLOCK_NUMBER(block)));
3797 /*
3798 If the block contents is going to be changed, we abandon the flush
3799 for this block. flush_key_blocks_int() will restart its search and
3800 handle the block properly.
3801 */
3802 if (!(block->status & BLOCK_FOR_UPDATE))
3803 {
3804 /* Blocks coming here must have a certain status. */
3805 DBUG_ASSERT(block->hash_link);
3806 DBUG_ASSERT(block->hash_link->block == block);
3807 DBUG_ASSERT(block->hash_link->file == file);
3808 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3809 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3810 block->status|= BLOCK_IN_FLUSHWRITE;
3811 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3812 error= (int)my_pwrite(file, block->buffer + block->offset,
3813 block->length - block->offset,
3814 block->hash_link->diskpos + block->offset,
3815 MYF(MY_NABP | MY_WAIT_IF_FULL));
3816 keycache_pthread_mutex_lock(&keycache->cache_lock);
3817 keycache->global_cache_write++;
3818 if (error)
3819 {
3820 block->status|= BLOCK_ERROR;
3821 if (!last_errno)
3822 last_errno= errno ? errno : -1;
3823 }
3824 block->status&= ~BLOCK_IN_FLUSHWRITE;
3825 /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3826 DBUG_ASSERT(block->hash_link);
3827 DBUG_ASSERT(block->hash_link->block == block);
3828 DBUG_ASSERT(block->hash_link->file == file);
3829 DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3830 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3831 /*
3832 Set correct status and link in right queue for free or later use.
3833 free_block() must not see BLOCK_CHANGED and it may need to wait
3834 for readers of the block. These should not see the block in the
3835 wrong hash. If not freeing the block, we need to have it in the
3836 right queue anyway.
3837 */
3838 link_to_file_list(keycache, block, file, 1);
3839 }
3840 block->status&= ~BLOCK_IN_FLUSH;
3841 /*
3842 Let to proceed for possible waiting requests to write to the block page.
3843 It might happen only during an operation to resize the key cache.
3844 */
3845 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3846 /* type will never be FLUSH_IGNORE_CHANGED here */
3847 if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3848 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3849 BLOCK_FOR_UPDATE)))
3850 {
3851 /*
3852 Note that a request has been registered against the block in
3853 flush_key_blocks_int().
3854 */
3855 free_block(keycache, block);
3856 }
3857 else
3858 {
3859 /*
3860 Link the block into the LRU ring if it's the last submitted
3861 request for the block. This enables eviction for the block.
3862 Note that a request has been registered against the block in
3863 flush_key_blocks_int().
3864 */
3865 unreg_request(keycache, block, 1);
3866 }
3867
3868 } /* end of for ( ; cache != end ; cache++) */
3869 return last_errno;
3870}
3871
3872
3873/*
3874 Flush all key blocks for a file to disk, but don't do any mutex locks
3875
3876 SYNOPSIS
3877 flush_key_blocks_int()
3878 keycache pointer to a key cache data structure
3879 file handler for the file to flush to
3880 flush_type type of the flush
3881
3882 NOTES
3883 This function doesn't do any mutex locks because it needs to be called both
3884 from flush_key_blocks and flush_all_key_blocks (the later one does the
3885 mutex lock in the resize_key_cache() function).
3886
3887 We do only care about changed blocks that exist when the function is
3888 entered. We do not guarantee that all changed blocks of the file are
3889 flushed if more blocks change while this function is running.
3890
3891 RETURN
3892 0 ok
3893 1 error
3894*/
3895
3896static int flush_key_blocks_int(SIMPLE_KEY_CACHE_CB *keycache,
3897 File file, enum flush_type type)
3898{
3899 BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3900 int last_errno= 0;
3901 int last_errcnt= 0;
3902 DBUG_ENTER("flush_key_blocks_int");
3903 DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3904 file, keycache->blocks_used, keycache->blocks_changed));
3905
3906#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3907 DBUG_EXECUTE("check_keycache",
3908 test_key_cache(keycache, "start of flush_key_blocks", 0););
3909#endif
3910
3911 DBUG_ASSERT(type != FLUSH_KEEP_LAZY);
3912 cache= cache_buff;
3913 if (keycache->disk_blocks > 0 &&
3914 (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3915 {
3916 /* Key cache exists and flush is not disabled */
3917 int error= 0;
3918 uint count= FLUSH_CACHE;
3919 BLOCK_LINK **pos,**end;
3920 BLOCK_LINK *first_in_switch= NULL;
3921 BLOCK_LINK *last_in_flush;
3922 BLOCK_LINK *last_for_update;
3923 BLOCK_LINK *block, *next;
3924#if defined(KEYCACHE_DEBUG)
3925 uint cnt=0;
3926#endif
3927
3928 if (type != FLUSH_IGNORE_CHANGED)
3929 {
3930 /*
3931 Count how many key blocks we have to cache to be able
3932 to flush all dirty pages with minimum seek moves
3933 */
3934 count= 0;
3935 for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ;
3936 block ;
3937 block= block->next_changed)
3938 {
3939 if ((block->hash_link->file == file) &&
3940 !(block->status & BLOCK_IN_FLUSH))
3941 {
3942 count++;
3943 KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3944 }
3945 }
3946 /*
3947 Allocate a new buffer only if its bigger than the one we have.
3948 Assure that we always have some entries for the case that new
3949 changed blocks appear while we need to wait for something.
3950 */
3951 if ((count > FLUSH_CACHE) &&
3952 !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3953 MYF(0))))
3954 cache= cache_buff;
3955 /*
3956 After a restart there could be more changed blocks than now.
3957 So we should not let count become smaller than the fixed buffer.
3958 */
3959 if (cache == cache_buff)
3960 count= FLUSH_CACHE;
3961 }
3962
3963 /* Retrieve the blocks and write them to a buffer to be flushed */
3964restart:
3965 last_in_flush= NULL;
3966 last_for_update= NULL;
3967 end= (pos= cache)+count;
3968 for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ;
3969 block ;
3970 block= next)
3971 {
3972#if defined(KEYCACHE_DEBUG)
3973 cnt++;
3974 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3975#endif
3976 next= block->next_changed;
3977 if (block->hash_link->file == file)
3978 {
3979 if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3980 {
3981 /*
3982 Note: The special handling of BLOCK_IN_SWITCH is obsolete
3983 since we set BLOCK_IN_FLUSH if the eviction includes a
3984 flush. It can be removed in a later version.
3985 */
3986 if (!(block->status & BLOCK_IN_SWITCH))
3987 {
3988 /*
3989 We care only for the blocks for which flushing was not
3990 initiated by another thread and which are not in eviction.
3991 Registering a request on the block unlinks it from the LRU
3992 ring and protects against eviction.
3993 */
3994 reg_requests(keycache, block, 1);
3995 if (type != FLUSH_IGNORE_CHANGED)
3996 {
3997 /* It's not a temporary file */
3998 if (pos == end)
3999 {
4000 /*
4001 This should happen relatively seldom. Remove the
4002 request because we won't do anything with the block
4003 but restart and pick it again in the next iteration.
4004 */
4005 unreg_request(keycache, block, 0);
4006 /*
4007 This happens only if there is not enough
4008 memory for the big block
4009 */
4010 if ((error= flush_cached_blocks(keycache, file, cache,
4011 end,type)))
4012 {
4013 /* Do not loop infinitely trying to flush in vain. */
4014 if ((last_errno == error) && (++last_errcnt > 5))
4015 goto err;
4016 last_errno= error;
4017 }
4018 /*
4019 Restart the scan as some other thread might have changed
4020 the changed blocks chain: the blocks that were in switch
4021 state before the flush started have to be excluded
4022 */
4023 goto restart;
4024 }
4025 /*
4026 Mark the block with BLOCK_IN_FLUSH in order not to let
4027 other threads to use it for new pages and interfere with
4028 our sequence of flushing dirty file pages. We must not
4029 set this flag before actually putting the block on the
4030 write burst array called 'cache'.
4031 */
4032 block->status|= BLOCK_IN_FLUSH;
4033 /* Add block to the array for a write burst. */
4034 *pos++= block;
4035 }
4036 else
4037 {
4038 /* It's a temporary file */
4039 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
4040 /*
4041 free_block() must not be called with BLOCK_CHANGED. Note
4042 that we must not change the BLOCK_CHANGED flag outside of
4043 link_to_file_list() so that it is always in the correct
4044 queue and the *blocks_changed counters are correct.
4045 */
4046 link_to_file_list(keycache, block, file, 1);
4047 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
4048 {
4049 /* A request has been registered against the block above. */
4050 free_block(keycache, block);
4051 }
4052 else
4053 {
4054 /*
4055 Link the block into the LRU ring if it's the last
4056 submitted request for the block. This enables eviction
4057 for the block. A request has been registered against
4058 the block above.
4059 */
4060 unreg_request(keycache, block, 1);
4061 }
4062 }
4063 }
4064 else
4065 {
4066 /*
4067 Link the block into a list of blocks 'in switch'.
4068
4069 WARNING: Here we introduce a place where a changed block
4070 is not in the changed_blocks hash! This is acceptable for
4071 a BLOCK_IN_SWITCH. Never try this for another situation.
4072 Other parts of the key cache code rely on changed blocks
4073 being in the changed_blocks hash.
4074 */
4075 unlink_changed(block);
4076 link_changed(block, &first_in_switch);
4077 }
4078 }
4079 else if (type != FLUSH_KEEP)
4080 {
4081 /*
4082 During the normal flush at end of statement (FLUSH_KEEP) we
4083 do not need to ensure that blocks in flush or update by
4084 other threads are flushed. They will be flushed by them
4085 later. In all other cases we must assure that we do not have
4086 any changed block of this file in the cache when this
4087 function returns.
4088 */
4089 if (block->status & BLOCK_IN_FLUSH)
4090 {
4091 /* Remember the last block found to be in flush. */
4092 last_in_flush= block;
4093 }
4094 else
4095 {
4096 /* Remember the last block found to be selected for update. */
4097 last_for_update= block;
4098 }
4099 }
4100 }
4101 }
4102 if (pos != cache)
4103 {
4104 if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
4105 {
4106 /* Do not loop inifnitely trying to flush in vain. */
4107 if ((last_errno == error) && (++last_errcnt > 5))
4108 goto err;
4109 last_errno= error;
4110 }
4111 /*
4112 Do not restart here during the normal flush at end of statement
4113 (FLUSH_KEEP). We have now flushed at least all blocks that were
4114 changed when entering this function. In all other cases we must
4115 assure that we do not have any changed block of this file in the
4116 cache when this function returns.
4117 */
4118 if (type != FLUSH_KEEP)
4119 goto restart;
4120 }
4121 if (last_in_flush)
4122 {
4123 /*
4124 There are no blocks to be flushed by this thread, but blocks in
4125 flush by other threads. Wait until one of the blocks is flushed.
4126 Re-check the condition for last_in_flush. We may have unlocked
4127 the cache_lock in flush_cached_blocks(). The state of the block
4128 could have changed.
4129 */
4130 if (last_in_flush->status & BLOCK_IN_FLUSH)
4131 wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
4132 &keycache->cache_lock);
4133 /* Be sure not to lose a block. They may be flushed in random order. */
4134 goto restart;
4135 }
4136 if (last_for_update)
4137 {
4138 /*
4139 There are no blocks to be flushed by this thread, but blocks for
4140 update by other threads. Wait until one of the blocks is updated.
4141 Re-check the condition for last_for_update. We may have unlocked
4142 the cache_lock in flush_cached_blocks(). The state of the block
4143 could have changed.
4144 */
4145 if (last_for_update->status & BLOCK_FOR_UPDATE)
4146 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
4147 &keycache->cache_lock);
4148 /* The block is now changed. Flush it. */
4149 goto restart;
4150 }
4151
4152 /*
4153 Wait until the list of blocks in switch is empty. The threads that
4154 are switching these blocks will relink them to clean file chains
4155 while we wait and thus empty the 'first_in_switch' chain.
4156 */
4157 while (first_in_switch)
4158 {
4159#if defined(KEYCACHE_DEBUG)
4160 cnt= 0;
4161#endif
4162 wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
4163 &keycache->cache_lock);
4164#if defined(KEYCACHE_DEBUG)
4165 cnt++;
4166 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
4167#endif
4168 /*
4169 Do not restart here. We have flushed all blocks that were
4170 changed when entering this function and were not marked for
4171 eviction. Other threads have now flushed all remaining blocks in
4172 the course of their eviction.
4173 */
4174 }
4175
4176 if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
4177 {
4178 BLOCK_LINK *last_in_switch= NULL;
4179 uint total_found= 0;
4180 uint found;
4181 last_for_update= NULL;
4182
4183 /*
4184 Finally free all clean blocks for this file.
4185 During resize this may be run by two threads in parallel.
4186 */
4187 do
4188 {
4189 found= 0;
4190 for (block= keycache->file_blocks[FILE_HASH(file, keycache)] ;
4191 block ;
4192 block= next)
4193 {
4194 /* Remember the next block. After freeing we cannot get at it. */
4195 next= block->next_changed;
4196
4197 /* Changed blocks cannot appear in the file_blocks hash. */
4198 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
4199 if (block->hash_link->file == file)
4200 {
4201 /* We must skip blocks that will be changed. */
4202 if (block->status & BLOCK_FOR_UPDATE)
4203 {
4204 last_for_update= block;
4205 continue;
4206 }
4207
4208 /*
4209 We must not free blocks in eviction (BLOCK_IN_EVICTION |
4210 BLOCK_IN_SWITCH) or blocks intended to be freed
4211 (BLOCK_REASSIGNED).
4212 */
4213 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
4214 BLOCK_REASSIGNED)))
4215 {
4216 struct st_hash_link *UNINIT_VAR(next_hash_link);
4217 my_off_t UNINIT_VAR(next_diskpos);
4218 File UNINIT_VAR(next_file);
4219 uint UNINIT_VAR(next_status);
4220 uint UNINIT_VAR(hash_requests);
4221
4222 total_found++;
4223 found++;
4224 KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
4225
4226 /*
4227 Register a request. This unlinks the block from the LRU
4228 ring and protects it against eviction. This is required
4229 by free_block().
4230 */
4231 reg_requests(keycache, block, 1);
4232
4233 /*
4234 free_block() may need to wait for readers of the block.
4235 This is the moment where the other thread can move the
4236 'next' block from the chain. free_block() needs to wait
4237 if there are requests for the block pending.
4238 */
4239 if (next && (hash_requests= block->hash_link->requests))
4240 {
4241 /* Copy values from the 'next' block and its hash_link. */
4242 next_status= next->status;
4243 next_hash_link= next->hash_link;
4244 next_diskpos= next_hash_link->diskpos;
4245 next_file= next_hash_link->file;
4246 DBUG_ASSERT(next == next_hash_link->block);
4247 }
4248
4249 free_block(keycache, block);
4250 /*
4251 If we had to wait and the state of the 'next' block
4252 changed, break the inner loop. 'next' may no longer be
4253 part of the current chain.
4254
4255 We do not want to break the loop after every free_block(),
4256 not even only after waits. The chain might be quite long
4257 and contain blocks for many files. Traversing it again and
4258 again to find more blocks for this file could become quite
4259 inefficient.
4260 */
4261 if (next && hash_requests &&
4262 ((next_status != next->status) ||
4263 (next_hash_link != next->hash_link) ||
4264 (next_file != next_hash_link->file) ||
4265 (next_diskpos != next_hash_link->diskpos) ||
4266 (next != next_hash_link->block)))
4267 break;
4268 }
4269 else
4270 {
4271 last_in_switch= block;
4272 }
4273 }
4274 } /* end for block in file_blocks */
4275 } while (found);
4276
4277 /*
4278 If any clean block has been found, we may have waited for it to
4279 become free. In this case it could be possible that another clean
4280 block became dirty. This is possible if the write request existed
4281 before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
4282 */
4283 if (total_found)
4284 goto restart;
4285
4286 /*
4287 To avoid an infinite loop, wait until one of the blocks marked
4288 for update is updated.
4289 */
4290 if (last_for_update)
4291 {
4292 /* We did not wait. Block must not have changed status. */
4293 DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
4294 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
4295 &keycache->cache_lock);
4296 goto restart;
4297 }
4298
4299 /*
4300 To avoid an infinite loop wait until one of the blocks marked
4301 for eviction is switched.
4302 */
4303 if (last_in_switch)
4304 {
4305 /* We did not wait. Block must not have changed status. */
4306 DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
4307 BLOCK_IN_SWITCH |
4308 BLOCK_REASSIGNED));
4309 wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4310 &keycache->cache_lock);
4311 goto restart;
4312 }
4313
4314 } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4315
4316 } /* if (keycache->disk_blocks > 0 */
4317
4318 DBUG_EXECUTE("check_keycache",
4319 test_key_cache(keycache, "end of flush_key_blocks", 0););
4320err:
4321 if (cache != cache_buff)
4322 my_free(cache);
4323 if (last_errno)
4324 errno=last_errno; /* Return first error */
4325 DBUG_RETURN(last_errno != 0);
4326}
4327
4328
4329/*
4330 Flush all blocks for a file from key buffers of a simple key cache
4331
4332 SYNOPSIS
4333
4334 flush_simple_key_blocks()
4335 keycache pointer to the control block of a simple key cache
4336 file handler for the file to flush to
4337 file_extra maps of key cache partitions containing
4338 dirty pages from file (not used)
4339 flush_type type of the flush operation
4340
4341 DESCRIPTION
4342 This function is the implementation of the flush_key_blocks interface
4343 function that is employed by simple (non-partitioned) key caches.
4344 The function takes the parameter keycache as a pointer to the
4345 control block structure of the type S_KEY_CACHE_CB for a simple key
4346 cache.
4347 In a general case the function flushes the data from all dirty key
4348 buffers related to the file 'file' into this file. The function does
4349 exactly this if the value of the parameter type is FLUSH_KEEP. If the
4350 value of this parameter is FLUSH_RELEASE, the function additionally
4351 releases the key buffers containing data from 'file' for new usage.
4352 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
4353 just releases the key buffers containing data from 'file'.
4354 The parameter file_extra currently is not used by this function.
4355
4356 RETURN
4357 0 ok
4358 1 error
4359
4360 NOTES
4361 This implementation exploits the fact that the function is called only
4362 when a thread has got an exclusive lock for the key file.
4363*/
4364
4365static
4366int flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB *keycache,
4367 File file,
4368 void *file_extra __attribute__((unused)),
4369 enum flush_type type)
4370{
4371 int res= 0;
4372 DBUG_ENTER("flush_key_blocks");
4373 DBUG_PRINT("enter", ("keycache: %p", keycache));
4374
4375 if (!keycache->key_cache_inited)
4376 DBUG_RETURN(0);
4377
4378 keycache_pthread_mutex_lock(&keycache->cache_lock);
4379 /* While waiting for lock, keycache could have been ended. */
4380 if (keycache->disk_blocks > 0)
4381 {
4382 inc_counter_for_resize_op(keycache);
4383 res= flush_key_blocks_int(keycache, file, type);
4384 dec_counter_for_resize_op(keycache);
4385 }
4386 keycache_pthread_mutex_unlock(&keycache->cache_lock);
4387 DBUG_RETURN(res);
4388}
4389
4390
4391/*
4392 Flush all blocks in the key cache to disk.
4393
4394 SYNOPSIS
4395 flush_all_key_blocks()
4396 keycache pointer to key cache root structure
4397
4398 DESCRIPTION
4399
4400 Flushing of the whole key cache is done in two phases.
4401
4402 1. Flush all changed blocks, waiting for them if necessary. Loop
4403 until there is no changed block left in the cache.
4404
4405 2. Free all clean blocks. Normally this means free all blocks. The
4406 changed blocks were flushed in phase 1 and became clean. However we
4407 may need to wait for blocks that are read by other threads. While we
4408 wait, a clean block could become changed if that operation started
4409 before the resize operation started. To be safe we must restart at
4410 phase 1.
4411
4412 When we can run through the changed_blocks and file_blocks hashes
4413 without finding a block any more, then we are done.
4414
4415 Note that we hold keycache->cache_lock all the time unless we need
4416 to wait for something.
4417
4418 RETURN
4419 0 OK
4420 != 0 Error
4421*/
4422
4423static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache)
4424{
4425 BLOCK_LINK *block;
4426 uint total_found;
4427 uint found;
4428 uint idx;
4429 uint changed_blocks_hash_size= keycache->changed_blocks_hash_size;
4430 DBUG_ENTER("flush_all_key_blocks");
4431
4432 do
4433 {
4434 mysql_mutex_assert_owner(&keycache->cache_lock);
4435 total_found= 0;
4436
4437 /*
4438 Phase1: Flush all changed blocks, waiting for them if necessary.
4439 Loop until there is no changed block left in the cache.
4440 */
4441 do
4442 {
4443 found= 0;
4444 /* Step over the whole changed_blocks hash array. */
4445 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4446 {
4447 /*
4448 If an array element is non-empty, use the first block from its
4449 chain to find a file for flush. All changed blocks for this
4450 file are flushed. So the same block will not appear at this
4451 place again with the next iteration. New writes for blocks are
4452 not accepted during the flush. If multiple files share the
4453 same hash bucket, one of them will be flushed per iteration
4454 of the outer loop of phase 1.
4455 */
4456 while ((block= keycache->changed_blocks[idx]))
4457 {
4458 found++;
4459 /*
4460 Flush dirty blocks but do not free them yet. They can be used
4461 for reading until all other blocks are flushed too.
4462 */
4463 if (flush_key_blocks_int(keycache, block->hash_link->file,
4464 FLUSH_FORCE_WRITE))
4465 DBUG_RETURN(1);
4466 }
4467 }
4468 } while (found);
4469
4470 /*
4471 Phase 2: Free all clean blocks. Normally this means free all
4472 blocks. The changed blocks were flushed in phase 1 and became
4473 clean. However we may need to wait for blocks that are read by
4474 other threads. While we wait, a clean block could become changed
4475 if that operation started before the resize operation started. To
4476 be safe we must restart at phase 1.
4477 */
4478 do
4479 {
4480 found= 0;
4481 /* Step over the whole file_blocks hash array. */
4482 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4483 {
4484 /*
4485 If an array element is non-empty, use the first block from its
4486 chain to find a file for flush. All blocks for this file are
4487 freed. So the same block will not appear at this place again
4488 with the next iteration. If multiple files share the
4489 same hash bucket, one of them will be flushed per iteration
4490 of the outer loop of phase 2.
4491 */
4492 while ((block= keycache->file_blocks[idx]))
4493 {
4494 total_found++;
4495 found++;
4496 if (flush_key_blocks_int(keycache, block->hash_link->file,
4497 FLUSH_RELEASE))
4498 DBUG_RETURN(1);
4499 }
4500 }
4501 } while (found);
4502
4503 /*
4504 If any clean block has been found, we may have waited for it to
4505 become free. In this case it could be possible that another clean
4506 block became dirty. This is possible if the write request existed
4507 before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4508 */
4509 } while (total_found);
4510
4511#ifndef DBUG_OFF
4512 /* Now there should not exist any block any more. */
4513 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4514 {
4515 DBUG_ASSERT(!keycache->changed_blocks[idx]);
4516 DBUG_ASSERT(!keycache->file_blocks[idx]);
4517 }
4518#endif
4519
4520 DBUG_RETURN(0);
4521}
4522
4523
4524/*
4525 Reset the counters of a simple key cache
4526
4527 SYNOPSIS
4528 reset_simple_key_cache_counters()
4529 name the name of a key cache
4530 keycache pointer to the control block of a simple key cache
4531
4532 DESCRIPTION
4533 This function is the implementation of the reset_key_cache_counters
4534 interface function that is employed by simple (non-partitioned) key caches.
4535 The function takes the parameter keycache as a pointer to the
4536 control block structure of the type S_KEY_CACHE_CB for a simple key cache.
4537 This function resets the values of all statistical counters for the key
4538 cache to 0.
4539 The parameter name is currently not used.
4540
4541 RETURN
4542 0 on success (always because it can't fail)
4543*/
4544
4545static
4546int reset_simple_key_cache_counters(const char *name __attribute__((unused)),
4547 SIMPLE_KEY_CACHE_CB *keycache)
4548{
4549 DBUG_ENTER("reset_simple_key_cache_counters");
4550 if (!keycache->key_cache_inited)
4551 {
4552 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4553 DBUG_RETURN(0);
4554 }
4555 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4556
4557 keycache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4558 keycache->global_cache_r_requests= 0; /* Key_read_requests */
4559 keycache->global_cache_read= 0; /* Key_reads */
4560 keycache->global_cache_w_requests= 0; /* Key_write_requests */
4561 keycache->global_cache_write= 0; /* Key_writes */
4562 DBUG_RETURN(0);
4563}
4564
4565
4566#ifndef DBUG_OFF
4567/*
4568 Test if disk-cache is ok
4569*/
4570static
4571void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache __attribute__((unused)),
4572 const char *where __attribute__((unused)),
4573 my_bool lock __attribute__((unused)))
4574{
4575 /* TODO */
4576}
4577#endif
4578
4579#if defined(KEYCACHE_TIMEOUT)
4580
4581#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
4582#define MAX_QUEUE_LEN 100
4583
4584
4585static void keycache_dump(SIMPLE_KEY_CACHE_CB *keycache)
4586{
4587 FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4588 struct st_my_thread_var *last;
4589 struct st_my_thread_var *thread;
4590 BLOCK_LINK *block;
4591 HASH_LINK *hash_link;
4592 KEYCACHE_PAGE *page;
4593 uint i;
4594
4595 fprintf(keycache_dump_file, "thread:%lu\n", (ulong) thread->id);
4596
4597 i=0;
4598 thread=last=waiting_for_hash_link.last_thread;
4599 fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4600 if (thread)
4601 do
4602 {
4603 thread=thread->next;
4604 page= (KEYCACHE_PAGE *) thread->keycache_link;
4605 fprintf(keycache_dump_file,
4606 "thread:%lu, (file,filepos)=(%u,%lu)\n",
4607 (ulong) thread->id,(uint) page->file,(ulong) page->filepos);
4608 if (++i == MAX_QUEUE_LEN)
4609 break;
4610 }
4611 while (thread != last);
4612
4613 i=0;
4614 thread=last=waiting_for_block.last_thread;
4615 fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4616 if (thread)
4617 do
4618 {
4619 thread=thread->next;
4620 hash_link= (HASH_LINK *) thread->keycache_link;
4621 fprintf(keycache_dump_file,
4622 "thread:%lu hash_link:%u (file,filepos)=(%u,%lu)\n",
4623 (ulong) thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4624 (uint) hash_link->file,(ulong) hash_link->diskpos);
4625 if (++i == MAX_QUEUE_LEN)
4626 break;
4627 }
4628 while (thread != last);
4629
4630 for (i=0 ; i< keycache->blocks_used ; i++)
4631 {
4632 int j;
4633 block= &keycache->block_root[i];
4634 hash_link= block->hash_link;
4635 fprintf(keycache_dump_file,
4636 "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4637 i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4638 block->status, block->requests, block->condvar ? 1 : 0);
4639 for (j=0 ; j < 2; j++)
4640 {
4641 KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4642 thread= last= wqueue->last_thread;
4643 fprintf(keycache_dump_file, "queue #%d\n", j);
4644 if (thread)
4645 {
4646 do
4647 {
4648 thread=thread->next;
4649 fprintf(keycache_dump_file,
4650 "thread:%lu\n", (ulong) thread->id);
4651 if (++i == MAX_QUEUE_LEN)
4652 break;
4653 }
4654 while (thread != last);
4655 }
4656 }
4657 }
4658 fprintf(keycache_dump_file, "LRU chain:");
4659 block= keycache= used_last;
4660 if (block)
4661 {
4662 do
4663 {
4664 block= block->next_used;
4665 fprintf(keycache_dump_file,
4666 "block:%u, ", BLOCK_NUMBER(block));
4667 }
4668 while (block != keycache->used_last);
4669 }
4670 fprintf(keycache_dump_file, "\n");
4671
4672 fclose(keycache_dump_file);
4673}
4674
4675#endif /* defined(KEYCACHE_TIMEOUT) */
4676
4677#if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4678
4679
4680static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4681 mysql_mutex_t *mutex)
4682{
4683 int rc;
4684 struct timeval now; /* time when we started waiting */
4685 struct timespec timeout; /* timeout value for the wait function */
4686 struct timezone tz;
4687#if defined(KEYCACHE_DEBUG)
4688 int cnt=0;
4689#endif
4690
4691 /* Get current time */
4692 gettimeofday(&now, &tz);
4693 /* Prepare timeout value */
4694 timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4695 /*
4696 timeval uses microseconds.
4697 timespec uses nanoseconds.
4698 1 nanosecond = 1000 micro seconds
4699 */
4700 timeout.tv_nsec= now.tv_usec * 1000;
4701 KEYCACHE_THREAD_TRACE_END("started waiting");
4702#if defined(KEYCACHE_DEBUG)
4703 cnt++;
4704 if (cnt % 100 == 0)
4705 fprintf(keycache_debug_log, "waiting...\n");
4706 fflush(keycache_debug_log);
4707#endif
4708 rc= mysql_cond_timedwait(cond, mutex, &timeout);
4709 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4710 if (rc == ETIMEDOUT || rc == ETIME)
4711 {
4712#if defined(KEYCACHE_DEBUG)
4713 fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4714 fclose(keycache_debug_log);
4715 abort();
4716#endif
4717 keycache_dump();
4718 }
4719
4720#if defined(KEYCACHE_DEBUG)
4721 KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4722#else
4723 assert(rc != ETIMEDOUT);
4724#endif
4725 return rc;
4726}
4727#else
4728#if defined(KEYCACHE_DEBUG)
4729static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4730 mysql_mutex_t *mutex)
4731{
4732 int rc;
4733 KEYCACHE_THREAD_TRACE_END("started waiting");
4734 rc= mysql_cond_wait(cond, mutex);
4735 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4736 return rc;
4737}
4738#endif
4739#endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4740
4741#if defined(KEYCACHE_DEBUG)
4742
4743
4744static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4745{
4746 int rc;
4747 rc= mysql_mutex_lock(mutex);
4748 KEYCACHE_THREAD_TRACE_BEGIN("");
4749 return rc;
4750}
4751
4752
4753static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4754{
4755 KEYCACHE_THREAD_TRACE_END("");
4756 mysql_mutex_unlock(mutex);
4757}
4758
4759
4760static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4761{
4762 int rc;
4763 KEYCACHE_THREAD_TRACE("signal");
4764 rc= mysql_cond_signal(cond);
4765 return rc;
4766}
4767
4768
4769#if defined(KEYCACHE_DEBUG_LOG)
4770
4771
4772static void keycache_debug_print(const char * fmt,...)
4773{
4774 va_list args;
4775 va_start(args,fmt);
4776 if (keycache_debug_log)
4777 {
4778 (void) vfprintf(keycache_debug_log, fmt, args);
4779 (void) fputc('\n',keycache_debug_log);
4780 }
4781 va_end(args);
4782}
4783#endif /* defined(KEYCACHE_DEBUG_LOG) */
4784
4785#if defined(KEYCACHE_DEBUG_LOG)
4786
4787
4788void keycache_debug_log_close(void)
4789{
4790 if (keycache_debug_log)
4791 fclose(keycache_debug_log);
4792}
4793#endif /* defined(KEYCACHE_DEBUG_LOG) */
4794
4795#endif /* defined(KEYCACHE_DEBUG) */
4796
4797#ifdef DBUG_ASSERT_EXISTS
4798#define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4799
4800static int fail_block(BLOCK_LINK *block __attribute__((unused)))
4801{
4802#ifndef DBUG_OFF
4803 F_B_PRT("block->next_used: %p\n", block->next_used);
4804 F_B_PRT("block->prev_used: %p\n", block->prev_used);
4805 F_B_PRT("block->next_changed: %p\n", block->next_changed);
4806 F_B_PRT("block->prev_changed: %p\n", block->prev_changed);
4807 F_B_PRT("block->hash_link: %p\n", block->hash_link);
4808 F_B_PRT("block->status: %u\n", block->status);
4809 F_B_PRT("block->length: %u\n", block->length);
4810 F_B_PRT("block->offset: %u\n", block->offset);
4811 F_B_PRT("block->requests: %u\n", block->requests);
4812 F_B_PRT("block->temperature: %u\n", block->temperature);
4813#endif
4814 return 0; /* Let the assert fail. */
4815}
4816#endif
4817
4818#ifndef DBUG_OFF
4819static int fail_hlink(HASH_LINK *hlink __attribute__((unused)))
4820{
4821 F_B_PRT("hlink->next: %p\n", hlink->next);
4822 F_B_PRT("hlink->prev: %p\n", hlink->prev);
4823 F_B_PRT("hlink->block: %p\n", hlink->block);
4824 F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4825 F_B_PRT("hlink->file: %d\n", hlink->file);
4826 return 0; /* Let the assert fail. */
4827}
4828
4829static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache)
4830{
4831 int errcnt= 0;
4832 int idx;
4833 if (keycache->disk_blocks <= 0)
4834 return 1;
4835 for (idx= 0; idx < keycache->disk_blocks; idx++)
4836 {
4837 BLOCK_LINK *block= keycache->block_root + idx;
4838 if (block->status || block->requests || block->hash_link)
4839 {
4840 fprintf(stderr, "block index: %u\n", idx);
4841 fail_block(block);
4842 errcnt++;
4843 }
4844 }
4845 for (idx= 0; idx < keycache->hash_links; idx++)
4846 {
4847 HASH_LINK *hash_link= keycache->hash_link_root + idx;
4848 if (hash_link->requests || hash_link->block)
4849 {
4850 fprintf(stderr, "hash_link index: %u\n", idx);
4851 fail_hlink(hash_link);
4852 errcnt++;
4853 }
4854 }
4855 if (errcnt)
4856 {
4857 fprintf(stderr, "blocks: %d used: %lu\n",
4858 keycache->disk_blocks, keycache->blocks_used);
4859 fprintf(stderr, "hash_links: %d used: %d\n",
4860 keycache->hash_links, keycache->hash_links_used);
4861 fprintf(stderr, "\n");
4862 }
4863 return !errcnt;
4864}
4865#endif
4866
4867
4868/*
4869 Get statistics for a simple key cache
4870
4871 SYNOPSIS
4872 get_simple_key_cache_statistics()
4873 keycache pointer to the control block of a simple key cache
4874 partition_no partition number (not used)
4875 key_cache_stats OUT pointer to the structure for the returned statistics
4876
4877 DESCRIPTION
4878 This function is the implementation of the get_key_cache_statistics
4879 interface function that is employed by simple (non-partitioned) key caches.
4880 The function takes the parameter keycache as a pointer to the
4881 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
4882 cache. This function returns the statistical data for the key cache.
4883 The parameter partition_no is not used by this function.
4884
4885 RETURN
4886 none
4887*/
4888
4889static
4890void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache,
4891 uint partition_no __attribute__((unused)),
4892 KEY_CACHE_STATISTICS *keycache_stats)
4893{
4894 DBUG_ENTER("simple_get_key_cache_statistics");
4895
4896 keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
4897 keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
4898 keycache_stats->blocks_used= keycache->blocks_used;
4899 keycache_stats->blocks_unused= keycache->blocks_unused;
4900 keycache_stats->blocks_changed= keycache->global_blocks_changed;
4901 keycache_stats->blocks_warm= keycache->warm_blocks;
4902 keycache_stats->read_requests= keycache->global_cache_r_requests;
4903 keycache_stats->reads= keycache->global_cache_read;
4904 keycache_stats->write_requests= keycache->global_cache_w_requests;
4905 keycache_stats->writes= keycache->global_cache_write;
4906 DBUG_VOID_RETURN;
4907}
4908
4909
4910/*
4911 The array of pointer to the key cache interface functions used for simple
4912 key caches. Any simple key cache objects including those incorporated into
4913 partitioned keys caches exploit this array.
4914
4915 The current implementation of these functions allows to call them from
4916 the MySQL server code directly. We don't do it though.
4917*/
4918
4919static KEY_CACHE_FUNCS simple_key_cache_funcs =
4920{
4921 (INIT_KEY_CACHE) init_simple_key_cache,
4922 (RESIZE_KEY_CACHE) resize_simple_key_cache,
4923 (CHANGE_KEY_CACHE_PARAM) change_simple_key_cache_param,
4924 (KEY_CACHE_READ) simple_key_cache_read,
4925 (KEY_CACHE_INSERT) simple_key_cache_insert,
4926 (KEY_CACHE_WRITE) simple_key_cache_write,
4927 (FLUSH_KEY_BLOCKS) flush_simple_key_cache_blocks,
4928 (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters,
4929 (END_KEY_CACHE) end_simple_key_cache,
4930 (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics,
4931};
4932
4933
4934/******************************************************************************
4935 Partitioned Key Cache Module
4936
4937 The module contains implementations of all key cache interface functions
4938 employed by partitioned key caches.
4939
4940 A partitioned key cache is a collection of structures for simple key caches
4941 called key cache partitions. Any page from a file can be placed into a buffer
4942 of only one partition. The number of the partition is calculated from
4943 the file number and the position of the page in the file, and it's always the
4944 same for the page. The function that maps pages into partitions takes care
4945 of even distribution of pages among partitions.
4946
4947 Partition key cache mitigate one of the major problem of simple key cache:
4948 thread contention for key cache lock (mutex). Every call of a key cache
4949 interface function must acquire this lock. So threads compete for this lock
4950 even in the case when they have acquired shared locks for the file and
4951 pages they want read from are in the key cache buffers.
4952 When working with a partitioned key cache any key cache interface function
4953 that needs only one page has to acquire the key cache lock only for the
4954 partition the page is ascribed to. This makes the chances for threads not
4955 compete for the same key cache lock better. Unfortunately if we use a
4956 partitioned key cache with N partitions for B-tree indexes we can't say
4957 that the chances becomes N times less. The fact is that any index lookup
4958 operation requires reading from the root page that, for any index, is always
4959 ascribed to the same partition. To resolve this problem we should have
4960 employed more sophisticated mechanisms of working with root pages.
4961
4962 Currently the number of partitions in a partitioned key cache is limited
4963 by 64. We could increase this limit. Simultaneously we would have to increase
4964 accordingly the size of the bitmap dirty_part_map from the MYISAM_SHARE
4965 structure.
4966
4967******************************************************************************/
4968
4969/* Control block for a partitioned key cache */
4970
4971typedef struct st_partitioned_key_cache_cb
4972{
4973 my_bool key_cache_inited; /*<=> control block is allocated */
4974 SIMPLE_KEY_CACHE_CB **partition_array; /* the key cache partitions */
4975 size_t key_cache_mem_size; /* specified size of the cache memory */
4976 uint key_cache_block_size; /* size of the page buffer of a cache block */
4977 uint partitions; /* number of partitions in the key cache */
4978} PARTITIONED_KEY_CACHE_CB;
4979
4980static
4981void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
4982 my_bool cleanup);
4983
4984static int
4985reset_partitioned_key_cache_counters(const char *name,
4986 PARTITIONED_KEY_CACHE_CB *keycache);
4987
4988/*
4989 Determine the partition to which the index block to read is ascribed
4990
4991 SYNOPSIS
4992 get_key_cache_partition()
4993 keycache pointer to the control block of a partitioned key cache
4994 file handler for the file for the block of data to be read
4995 filepos position of the block of data in the file
4996
4997 DESCRIPTION
4998 The function determines the number of the partition in whose buffer the
4999 block from 'file' at the position filepos has to be placed for reading.
5000 The function returns the control block of the simple key cache for this
5001 partition to the caller.
5002
5003 RETURN VALUE
5004 The pointer to the control block of the partition to which the specified
5005 file block is ascribed.
5006*/
5007
5008static
5009SIMPLE_KEY_CACHE_CB *
5010get_key_cache_partition(PARTITIONED_KEY_CACHE_CB *keycache,
5011 File file, my_off_t filepos)
5012{
5013 uint i= KEYCACHE_BASE_EXPR(file, filepos) % keycache->partitions;
5014 return keycache->partition_array[i];
5015}
5016
5017
5018/*
5019 Determine the partition to which the index block to write is ascribed
5020
5021 SYNOPSIS
5022 get_key_cache_partition()
5023 keycache pointer to the control block of a partitioned key cache
5024 file handler for the file for the block of data to be read
5025 filepos position of the block of data in the file
5026 dirty_part_map pointer to the bitmap of dirty partitions for the file
5027
5028 DESCRIPTION
5029 The function determines the number of the partition in whose buffer the
5030 block from 'file' at the position filepos has to be placed for writing and
5031 marks the partition as dirty in the dirty_part_map bitmap.
5032 The function returns the control block of the simple key cache for this
5033 partition to the caller.
5034
5035 RETURN VALUE
5036 The pointer to the control block of the partition to which the specified
5037 file block is ascribed.
5038*/
5039
5040static SIMPLE_KEY_CACHE_CB
5041*get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB *keycache,
5042 File file, my_off_t filepos,
5043 ulonglong* dirty_part_map)
5044{
5045 uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions;
5046 *dirty_part_map|= 1ULL << i;
5047 return keycache->partition_array[i];
5048}
5049
5050
5051/*
5052 Initialize a partitioned key cache
5053
5054 SYNOPSIS
5055 init_partitioned_key_cache()
5056 keycache pointer to the control block of a partitioned key cache
5057 key_cache_block_size size of blocks to keep cached data
5058 use_mem total memory to use for all key cache partitions
5059 division_limit division limit (may be zero)
5060 age_threshold age threshold (may be zero)
5061
5062 DESCRIPTION
5063 This function is the implementation of the init_key_cache
5064 interface function that is employed by partitioned key caches.
5065
5066 The function builds and initializes an array of simple key caches,
5067 and then initializes the control block structure of the type
5068 PARTITIONED_KEY_CACHE_CB that is used for a partitioned key
5069 cache. The parameter keycache is supposed to point to this
5070 structure. The number of partitions in the partitioned key cache
5071 to be built must be passed through the field 'partitions' of this
5072 structure.
5073 The parameter key_cache_block_size specifies the size of the
5074 blocks in the the simple key caches to be built.
5075 The parameters division_limit and age_threshold determine the initial
5076 values of those characteristics of the simple key caches that are used for
5077 midpoint insertion strategy. The parameter use_mem specifies the total
5078 amount of memory to be allocated for the key cache blocks in all simple key
5079 caches and for all auxiliary structures.
5080
5081 RETURN VALUE
5082 total number of blocks in key cache partitions, if successful,
5083 <= 0 - otherwise.
5084
5085 NOTES
5086 If keycache->key_cache_inited != 0 then we assume that the memory for
5087 the array of partitions has been already allocated.
5088
5089 It's assumed that no two threads call this function simultaneously
5090 referring to the same key cache handle.
5091*/
5092
5093static
5094int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5095 uint key_cache_block_size,
5096 size_t use_mem, uint division_limit,
5097 uint age_threshold, uint changed_blocks_hash_size)
5098{
5099 int i;
5100 size_t mem_per_cache;
5101 size_t mem_decr;
5102 int cnt;
5103 SIMPLE_KEY_CACHE_CB *partition;
5104 SIMPLE_KEY_CACHE_CB **partition_ptr;
5105 uint partitions= keycache->partitions;
5106 int blocks= 0;
5107 DBUG_ENTER("partitioned_init_key_cache");
5108
5109 keycache->key_cache_block_size = key_cache_block_size;
5110
5111 if (keycache->key_cache_inited)
5112 partition_ptr= keycache->partition_array;
5113 else
5114 {
5115 if(!(partition_ptr=
5116 (SIMPLE_KEY_CACHE_CB **) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB *) *
5117 partitions, MYF(MY_WME))))
5118 DBUG_RETURN(-1);
5119 bzero(partition_ptr, sizeof(SIMPLE_KEY_CACHE_CB *) * partitions);
5120 keycache->partition_array= partition_ptr;
5121 }
5122
5123 mem_per_cache = use_mem / partitions;
5124 mem_decr= mem_per_cache / 5;
5125
5126 for (i= 0; i < (int) partitions; i++)
5127 {
5128 my_bool key_cache_inited= keycache->key_cache_inited;
5129 if (key_cache_inited)
5130 partition= *partition_ptr;
5131 else
5132 {
5133 if (!(partition=
5134 (SIMPLE_KEY_CACHE_CB *) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB),
5135 MYF(MY_WME))))
5136 continue;
5137 partition->key_cache_inited= 0;
5138 }
5139
5140 cnt= init_simple_key_cache(partition, key_cache_block_size, mem_per_cache,
5141 division_limit, age_threshold,
5142 changed_blocks_hash_size);
5143 if (cnt <= 0)
5144 {
5145 end_simple_key_cache(partition, 1);
5146 if (!key_cache_inited)
5147 {
5148 my_free(partition);
5149 partition= 0;
5150 }
5151 if ((i == 0 && cnt < 0) || i > 0)
5152 {
5153 /*
5154 Here we have two cases:
5155 1. i == 0 and cnt < 0
5156 cnt < 0 => mem_per_cache is not big enough to allocate minimal
5157 number of key blocks in the key cache of the partition.
5158 Decrease the the number of the partitions by 1 and start again.
5159 2. i > 0
5160 There is not enough memory for one of the succeeding partitions.
5161 Just skip this partition decreasing the number of partitions in
5162 the key cache by one.
5163 Do not change the value of mem_per_cache in both cases.
5164 */
5165 if (key_cache_inited)
5166 {
5167 my_free(partition);
5168 partition= 0;
5169 if(key_cache_inited)
5170 memmove(partition_ptr, partition_ptr+1,
5171 sizeof(partition_ptr)*(partitions-i-1));
5172 }
5173 if (!--partitions)
5174 break;
5175 }
5176 else
5177 {
5178 /*
5179 We come here when i == 0 && cnt == 0.
5180 cnt == 0 => the memory allocator fails to allocate a block of
5181 memory of the size mem_per_cache. Decrease the value of
5182 mem_per_cache without changing the current number of partitions
5183 and start again. Make sure that such a decrease may happen not
5184 more than 5 times in total.
5185 */
5186 if (use_mem <= mem_decr)
5187 break;
5188 use_mem-= mem_decr;
5189 }
5190 i--;
5191 mem_per_cache= use_mem/partitions;
5192 continue;
5193 }
5194 else
5195 {
5196 blocks+= cnt;
5197 *partition_ptr++= partition;
5198 }
5199 }
5200
5201 keycache->partitions= partitions= (uint) (partition_ptr-keycache->partition_array);
5202 keycache->key_cache_mem_size= mem_per_cache * partitions;
5203 for (i= 0; i < (int) partitions; i++)
5204 keycache->partition_array[i]->hash_factor= partitions;
5205
5206 keycache->key_cache_inited= 1;
5207
5208 if (!partitions)
5209 blocks= -1;
5210
5211 DBUG_RETURN(blocks);
5212}
5213
5214
5215/*
5216 Resize a partitioned key cache
5217
5218 SYNOPSIS
5219 resize_partitioned_key_cache()
5220 keycache pointer to the control block of a partitioned key cache
5221 key_cache_block_size size of blocks to keep cached data
5222 use_mem total memory to use for the new key cache
5223 division_limit new division limit (if not zero)
5224 age_threshold new age threshold (if not zero)
5225
5226 DESCRIPTION
5227 This function is the implementation of the resize_key_cache interface
5228 function that is employed by partitioned key caches.
5229 The function takes the parameter keycache as a pointer to the
5230 control block structure of the type PARTITIONED_KEY_CACHE_CB for the
5231 partitioned key cache to be resized.
5232 The parameter key_cache_block_size specifies the new size of the blocks in
5233 the simple key caches that comprise the partitioned key cache.
5234 The parameters division_limit and age_threshold determine the new initial
5235 values of those characteristics of the simple key cache that are used for
5236 midpoint insertion strategy. The parameter use-mem specifies the total
5237 amount of memory to be allocated for the key cache blocks in all new
5238 simple key caches and for all auxiliary structures.
5239
5240 RETURN VALUE
5241 number of blocks in the key cache, if successful,
5242 0 - otherwise.
5243
5244 NOTES.
5245 The function first calls prepare_resize_simple_key_cache for each simple
5246 key cache effectively flushing all dirty pages from it and destroying
5247 the key cache. Then init_partitioned_key_cache is called. This call builds
5248 a new array of simple key caches containing the same number of elements
5249 as the old one. After this the function calls the function
5250 finish_resize_simple_key_cache for each simple key cache from this array.
5251
5252 This implementation doesn't block the calls and executions of other
5253 functions from the key cache interface. However it assumes that the
5254 calls of resize_partitioned_key_cache itself are serialized.
5255*/
5256
5257static
5258int resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5259 uint key_cache_block_size,
5260 size_t use_mem, uint division_limit,
5261 uint age_threshold,
5262 uint changed_blocks_hash_size)
5263{
5264 uint i;
5265 uint partitions= keycache->partitions;
5266 my_bool cleanup= use_mem == 0;
5267 int blocks= -1;
5268 int err= 0;
5269 DBUG_ENTER("partitioned_resize_key_cache");
5270 if (cleanup)
5271 {
5272 end_partitioned_key_cache(keycache, 0);
5273 DBUG_RETURN(-1);
5274 }
5275 for (i= 0; i < partitions; i++)
5276 {
5277 err|= prepare_resize_simple_key_cache(keycache->partition_array[i], 1);
5278 }
5279 if (!err)
5280 blocks= init_partitioned_key_cache(keycache, key_cache_block_size,
5281 use_mem, division_limit, age_threshold,
5282 changed_blocks_hash_size);
5283 if (blocks > 0)
5284 {
5285 for (i= 0; i < partitions; i++)
5286 {
5287 keycache_pthread_mutex_lock(&keycache->partition_array[i]->cache_lock);
5288 finish_resize_simple_key_cache(keycache->partition_array[i]);
5289 }
5290 }
5291 DBUG_RETURN(blocks);
5292}
5293
5294
5295/*
5296 Change key cache parameters of a partitioned key cache
5297
5298 SYNOPSIS
5299 partitioned_change_key_cache_param()
5300 keycache pointer to the control block of a partitioned key cache
5301 division_limit new division limit (if not zero)
5302 age_threshold new age threshold (if not zero)
5303
5304 DESCRIPTION
5305 This function is the implementation of the change_key_cache_param interface
5306 function that is employed by partitioned key caches.
5307 The function takes the parameter keycache as a pointer to the
5308 control block structure of the type PARTITIONED_KEY_CACHE_CB for the simple
5309 key cache where new values of the division limit and the age threshold used
5310 for midpoint insertion strategy are to be set. The parameters
5311 division_limit and age_threshold provide these new values.
5312
5313 RETURN VALUE
5314 none
5315
5316 NOTES
5317 The function just calls change_simple_key_cache_param for each element from
5318 the array of simple caches that comprise the partitioned key cache.
5319*/
5320
5321static
5322void change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB *keycache,
5323 uint division_limit,
5324 uint age_threshold)
5325{
5326 uint i;
5327 uint partitions= keycache->partitions;
5328 DBUG_ENTER("partitioned_change_key_cache_param");
5329 for (i= 0; i < partitions; i++)
5330 {
5331 change_simple_key_cache_param(keycache->partition_array[i], division_limit,
5332 age_threshold);
5333 }
5334 DBUG_VOID_RETURN;
5335}
5336
5337
5338/*
5339 Destroy a partitioned key cache
5340
5341 SYNOPSIS
5342 end_partitioned_key_cache()
5343 keycache pointer to the control block of a partitioned key cache
5344 cleanup <=> complete free (free also control block structures
5345 for all simple key caches)
5346
5347 DESCRIPTION
5348 This function is the implementation of the end_key_cache interface
5349 function that is employed by partitioned key caches.
5350 The function takes the parameter keycache as a pointer to the
5351 control block structure of the type PARTITIONED_KEY_CACHE_CB for the
5352 partitioned key cache to be destroyed.
5353 The function frees the memory allocated for the cache blocks and
5354 auxiliary structures used by simple key caches that comprise the
5355 partitioned key cache. If the value of the parameter cleanup is TRUE
5356 then even the memory used for control blocks of the simple key caches
5357 and the array of pointers to them are freed.
5358
5359 RETURN VALUE
5360 none
5361*/
5362
5363static
5364void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5365 my_bool cleanup)
5366{
5367 uint i;
5368 uint partitions= keycache->partitions;
5369 DBUG_ENTER("partitioned_end_key_cache");
5370 DBUG_PRINT("enter", ("key_cache: %p", keycache));
5371
5372 for (i= 0; i < partitions; i++)
5373 {
5374 end_simple_key_cache(keycache->partition_array[i], cleanup);
5375 }
5376 if (cleanup)
5377 {
5378 for (i= 0; i < partitions; i++)
5379 my_free(keycache->partition_array[i]);
5380 my_free(keycache->partition_array);
5381 keycache->key_cache_inited= 0;
5382 }
5383 DBUG_VOID_RETURN;
5384}
5385
5386
5387/*
5388 Read a block of data from a partitioned key cache into a buffer
5389
5390 SYNOPSIS
5391
5392 partitioned_key_cache_read()
5393 keycache pointer to the control block of a partitioned key cache
5394 file handler for the file for the block of data to be read
5395 filepos position of the block of data in the file
5396 level determines the weight of the data
5397 buff buffer to where the data must be placed
5398 length length of the buffer
5399 block_length length of the read data from a key cache block
5400 return_buffer return pointer to the key cache buffer with the data
5401
5402 DESCRIPTION
5403 This function is the implementation of the key_cache_read interface
5404 function that is employed by partitioned key caches.
5405 The function takes the parameter keycache as a pointer to the
5406 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5407 partitioned key cache.
5408 In a general case the function reads a block of data from the key cache
5409 into the buffer buff of the size specified by the parameter length. The
5410 beginning of the block of data to be read is specified by the parameters
5411 file and filepos. The length of the read data is the same as the length
5412 of the buffer. The data is read into the buffer in key_cache_block_size
5413 increments. To read each portion the function first finds out in what
5414 partition of the key cache this portion(page) is to be saved, and calls
5415 simple_key_cache_read with the pointer to the corresponding simple key as
5416 its first parameter.
5417 If the parameter return_buffer is not ignored and its value is TRUE, and
5418 the data to be read of the specified size block_length can be read from one
5419 key cache buffer, then the function returns a pointer to the data in the
5420 key cache buffer.
5421 The function takes into account parameters block_length and return buffer
5422 only in a single-threaded environment.
5423 The parameter 'level' is used only by the midpoint insertion strategy
5424 when the data or its portion cannot be found in the key cache.
5425
5426 RETURN VALUE
5427 Returns address from where the data is placed if successful, 0 - otherwise.
5428*/
5429
5430static
5431uchar *partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB *keycache,
5432 File file, my_off_t filepos, int level,
5433 uchar *buff, uint length,
5434 uint block_length __attribute__((unused)),
5435 int return_buffer __attribute__((unused)))
5436{
5437 uint r_length;
5438 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5439 uchar *start= buff;
5440 DBUG_ENTER("partitioned_key_cache_read");
5441 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
5442 (uint) file, (ulong) filepos, length));
5443
5444
5445 /* Read data in key_cache_block_size increments */
5446 do
5447 {
5448 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache,
5449 file, filepos);
5450 uchar *ret_buff= 0;
5451 r_length= length;
5452 set_if_smaller(r_length, keycache->key_cache_block_size - offset);
5453 ret_buff= simple_key_cache_read((void *) partition,
5454 file, filepos, level,
5455 buff, r_length,
5456 block_length, return_buffer);
5457 if (ret_buff == 0)
5458 DBUG_RETURN(0);
5459 filepos+= r_length;
5460 buff+= r_length;
5461 offset= 0;
5462 } while ((length-= r_length));
5463
5464 DBUG_RETURN(start);
5465}
5466
5467
5468/*
5469 Insert a block of file data from a buffer into a partitioned key cache
5470
5471 SYNOPSIS
5472 partitioned_key_cache_insert()
5473 keycache pointer to the control block of a partitioned key cache
5474 file handler for the file to insert data from
5475 filepos position of the block of data in the file to insert
5476 level determines the weight of the data
5477 buff buffer to read data from
5478 length length of the data in the buffer
5479
5480 DESCRIPTION
5481 This function is the implementation of the key_cache_insert interface
5482 function that is employed by partitioned key caches.
5483 The function takes the parameter keycache as a pointer to the
5484 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5485 partitioned key cache.
5486 The function writes a block of file data from a buffer into the key cache.
5487 The buffer is specified with the parameters buff and length - the pointer
5488 to the beginning of the buffer and its size respectively. It's assumed
5489 that the buffer contains the data from 'file' allocated from the position
5490 filepos. The data is copied from the buffer in key_cache_block_size
5491 increments. For every portion of data the function finds out in what simple
5492 key cache from the array of partitions the data must be stored, and after
5493 this calls simple_key_cache_insert to copy the data into a key buffer of
5494 this simple key cache.
5495 The parameter level is used to set one characteristic for the key buffers
5496 loaded with the data from buff. The characteristic is used only by the
5497 midpoint insertion strategy.
5498
5499 RETURN VALUE
5500 0 if a success, 1 - otherwise.
5501
5502 NOTES
5503 The function is used by MyISAM to move all blocks from a index file to
5504 the key cache. It can be performed in parallel with reading the file data
5505 from the key buffers by other threads.
5506*/
5507
5508static
5509int partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB *keycache,
5510 File file, my_off_t filepos, int level,
5511 uchar *buff, uint length)
5512{
5513 uint w_length;
5514 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5515 DBUG_ENTER("partitioned_key_cache_insert");
5516 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
5517 (uint) file,(ulong) filepos, length));
5518
5519
5520 /* Write data in key_cache_block_size increments */
5521 do
5522 {
5523 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache,
5524 file, filepos);
5525 w_length= length;
5526 set_if_smaller(w_length, keycache->key_cache_block_size - offset);
5527 if (simple_key_cache_insert((void *) partition,
5528 file, filepos, level,
5529 buff, w_length))
5530 DBUG_RETURN(1);
5531
5532 filepos+= w_length;
5533 buff+= w_length;
5534 offset = 0;
5535 } while ((length-= w_length));
5536
5537 DBUG_RETURN(0);
5538}
5539
5540
5541/*
5542 Write data from a buffer into a partitioned key cache
5543
5544 SYNOPSIS
5545
5546 partitioned_key_cache_write()
5547 keycache pointer to the control block of a partitioned key cache
5548 file handler for the file to write data to
5549 filepos position in the file to write data to
5550 level determines the weight of the data
5551 buff buffer with the data
5552 length length of the buffer
5553 dont_write if is 0 then all dirty pages involved in writing
5554 should have been flushed from key cache
5555 file_extra maps of key cache partitions containing
5556 dirty pages from file
5557
5558 DESCRIPTION
5559 This function is the implementation of the key_cache_write interface
5560 function that is employed by partitioned key caches.
5561 The function takes the parameter keycache as a pointer to the
5562 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5563 partitioned key cache.
5564 In a general case the function copies data from a buffer into the key
5565 cache. The buffer is specified with the parameters buff and length -
5566 the pointer to the beginning of the buffer and its size respectively.
5567 It's assumed the buffer contains the data to be written into 'file'
5568 starting from the position filepos. The data is copied from the buffer
5569 in key_cache_block_size increments. For every portion of data the
5570 function finds out in what simple key cache from the array of partitions
5571 the data must be stored, and after this calls simple_key_cache_write to
5572 copy the data into a key buffer of this simple key cache.
5573 If the value of the parameter dont_write is FALSE then the function
5574 also writes the data into file.
5575 The parameter level is used to set one characteristic for the key buffers
5576 filled with the data from buff. The characteristic is employed only by
5577 the midpoint insertion strategy.
5578 The parameter file_expra provides a pointer to the shared bitmap of
5579 the partitions that may contains dirty pages for the file. This bitmap
5580 is used to optimize the function flush_partitioned_key_cache_blocks.
5581
5582 RETURN VALUE
5583 0 if a success, 1 - otherwise.
5584
5585 NOTES
5586 This implementation exploits the fact that the function is called only
5587 when a thread has got an exclusive lock for the key file.
5588*/
5589
5590static
5591int partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB *keycache,
5592 File file, void *file_extra,
5593 my_off_t filepos, int level,
5594 uchar *buff, uint length,
5595 uint block_length __attribute__((unused)),
5596 int dont_write)
5597{
5598 uint w_length;
5599 ulonglong *part_map= (ulonglong *) file_extra;
5600 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5601 DBUG_ENTER("partitioned_key_cache_write");
5602 DBUG_PRINT("enter",
5603 ("fd: %u pos: %lu length: %u block_length: %u"
5604 " key_block_length: %u",
5605 (uint) file, (ulong) filepos, length, block_length,
5606 keycache ? keycache->key_cache_block_size : 0));
5607
5608
5609 /* Write data in key_cache_block_size increments */
5610 do
5611 {
5612 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache,
5613 file,
5614 filepos,
5615 part_map);
5616 w_length = length;
5617 set_if_smaller(w_length, keycache->key_cache_block_size - offset );
5618 if (simple_key_cache_write(partition,
5619 file, 0, filepos, level,
5620 buff, w_length, block_length,
5621 dont_write))
5622 DBUG_RETURN(1);
5623
5624 filepos+= w_length;
5625 buff+= w_length;
5626 offset= 0;
5627 } while ((length-= w_length));
5628
5629 DBUG_RETURN(0);
5630}
5631
5632
5633/*
5634 Flush all blocks for a file from key buffers of a partitioned key cache
5635
5636 SYNOPSIS
5637
5638 flush_partitioned_key_cache_blocks()
5639 keycache pointer to the control block of a partitioned key cache
5640 file handler for the file to flush to
5641 file_extra maps of key cache partitions containing
5642 dirty pages from file (not used)
5643 flush_type type of the flush operation
5644
5645 DESCRIPTION
5646 This function is the implementation of the flush_key_blocks interface
5647 function that is employed by partitioned key caches.
5648 The function takes the parameter keycache as a pointer to the
5649 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5650 partitioned key cache.
5651 In a general case the function flushes the data from all dirty key
5652 buffers related to the file 'file' into this file. The function does
5653 exactly this if the value of the parameter type is FLUSH_KEEP. If the
5654 value of this parameter is FLUSH_RELEASE, the function additionally
5655 releases the key buffers containing data from 'file' for new usage.
5656 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
5657 just releases the key buffers containing data from 'file'.
5658 The function performs the operation by calling the function
5659 flush_simple_key_cache_blocks for the elements of the array of the
5660 simple key caches that comprise the partitioned key_cache. If the value
5661 of the parameter type is FLUSH_KEEP s_flush_key_blocks is called only
5662 for the partitions with possibly dirty pages marked in the bitmap
5663 pointed to by the parameter file_extra.
5664
5665 RETURN
5666 0 ok
5667 1 error
5668
5669 NOTES
5670 This implementation exploits the fact that the function is called only
5671 when a thread has got an exclusive lock for the key file.
5672*/
5673
5674static
5675int flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB *keycache,
5676 File file, void *file_extra,
5677 enum flush_type type)
5678{
5679 uint i;
5680 uint partitions= keycache->partitions;
5681 int err= 0;
5682 ulonglong *dirty_part_map= (ulonglong *) file_extra;
5683 DBUG_ENTER("partitioned_flush_key_blocks");
5684 DBUG_PRINT("enter", ("keycache: %p", keycache));
5685
5686 for (i= 0; i < partitions; i++)
5687 {
5688 SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i];
5689 if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
5690 !((*dirty_part_map) & ((ulonglong) 1 << i)))
5691 continue;
5692 err|= MY_TEST(flush_simple_key_cache_blocks(partition, file, 0, type));
5693 }
5694 *dirty_part_map= 0;
5695
5696 DBUG_RETURN(err);
5697}
5698
5699
5700/*
5701 Reset the counters of a partitioned key cache
5702
5703 SYNOPSIS
5704 reset_partitioned_key_cache_counters()
5705 name the name of a key cache
5706 keycache pointer to the control block of a partitioned key cache
5707
5708 DESCRIPTION
5709 This function is the implementation of the reset_key_cache_counters
5710 interface function that is employed by partitioned key caches.
5711 The function takes the parameter keycache as a pointer to the
5712 control block structure of the type PARTITIONED_KEY_CACHE_CB for a partitioned
5713 key cache.
5714 This function resets the values of the statistical counters of the simple
5715 key caches comprising partitioned key cache to 0. It does it by calling
5716 reset_simple_key_cache_counters for each key cache partition.
5717 The parameter name is currently not used.
5718
5719 RETURN
5720 0 on success (always because it can't fail)
5721*/
5722
5723static int
5724reset_partitioned_key_cache_counters(const char *name __attribute__((unused)),
5725 PARTITIONED_KEY_CACHE_CB *keycache)
5726{
5727 uint i;
5728 uint partitions= keycache->partitions;
5729 DBUG_ENTER("partitioned_reset_key_cache_counters");
5730
5731 for (i = 0; i < partitions; i++)
5732 {
5733 reset_simple_key_cache_counters(name, keycache->partition_array[i]);
5734 }
5735 DBUG_RETURN(0);
5736}
5737
5738
5739/*
5740 Get statistics for a partition key cache
5741
5742 SYNOPSIS
5743 get_partitioned_key_cache_statistics()
5744 keycache pointer to the control block of a partitioned key cache
5745 partition_no partition number to get statistics for
5746 key_cache_stats OUT pointer to the structure for the returned statistics
5747
5748 DESCRIPTION
5749 This function is the implementation of the get_key_cache_statistics
5750 interface function that is employed by partitioned key caches.
5751 The function takes the parameter keycache as a pointer to the
5752 control block structure of the type PARTITIONED_KEY_CACHE_CB for
5753 a partitioned key cache.
5754 If the value of the parameter partition_no is equal to 0 then aggregated
5755 statistics for all partitions is returned in the fields of the
5756 structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise
5757 the function returns data for the partition number partition_no of the
5758 key cache in the structure key_cache_stat. (Here partitions are numbered
5759 starting from 1.)
5760
5761 RETURN
5762 none
5763*/
5764
5765static
5766void
5767get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache,
5768 uint partition_no,
5769 KEY_CACHE_STATISTICS *keycache_stats)
5770{
5771 uint i;
5772 SIMPLE_KEY_CACHE_CB *partition;
5773 uint partitions= keycache->partitions;
5774 DBUG_ENTER("get_partitioned_key_cache_statistics");
5775
5776 if (partition_no != 0)
5777 {
5778 partition= keycache->partition_array[partition_no-1];
5779 get_simple_key_cache_statistics((void *) partition, 0, keycache_stats);
5780 DBUG_VOID_RETURN;
5781 }
5782 bzero(keycache_stats, sizeof(KEY_CACHE_STATISTICS));
5783 keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
5784 keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
5785 for (i = 0; i < partitions; i++)
5786 {
5787 partition= keycache->partition_array[i];
5788 keycache_stats->blocks_used+= partition->blocks_used;
5789 keycache_stats->blocks_unused+= partition->blocks_unused;
5790 keycache_stats->blocks_changed+= partition->global_blocks_changed;
5791 keycache_stats->blocks_warm+= partition->warm_blocks;
5792 keycache_stats->read_requests+= partition->global_cache_r_requests;
5793 keycache_stats->reads+= partition->global_cache_read;
5794 keycache_stats->write_requests+= partition->global_cache_w_requests;
5795 keycache_stats->writes+= partition->global_cache_write;
5796 }
5797 DBUG_VOID_RETURN;
5798}
5799
5800/*
5801 The array of pointers to the key cache interface functions used by
5802 partitioned key caches. Any partitioned key cache object caches exploits
5803 this array.
5804
5805 The current implementation of these functions does not allow to call
5806 them from the MySQL server code directly. The key cache interface
5807 wrappers must be used for this purpose.
5808*/
5809
5810static KEY_CACHE_FUNCS partitioned_key_cache_funcs =
5811{
5812 (INIT_KEY_CACHE) init_partitioned_key_cache,
5813 (RESIZE_KEY_CACHE) resize_partitioned_key_cache,
5814 (CHANGE_KEY_CACHE_PARAM) change_partitioned_key_cache_param,
5815 (KEY_CACHE_READ) partitioned_key_cache_read,
5816 (KEY_CACHE_INSERT) partitioned_key_cache_insert,
5817 (KEY_CACHE_WRITE) partitioned_key_cache_write,
5818 (FLUSH_KEY_BLOCKS) flush_partitioned_key_cache_blocks,
5819 (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters,
5820 (END_KEY_CACHE) end_partitioned_key_cache,
5821 (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics,
5822};
5823
5824
5825/******************************************************************************
5826 Key Cache Interface Module
5827
5828 The module contains wrappers for all key cache interface functions.
5829
5830 Currently there are key caches of two types: simple key caches and
5831 partitioned key caches. Each type (class) has its own implementation of the
5832 basic key cache operations used the MyISAM storage engine. The pointers
5833 to the implementation functions are stored in two static structures of the
5834 type KEY_CACHE_FUNC: simple_key_cache_funcs - for simple key caches, and
5835 partitioned_key_cache_funcs - for partitioned key caches. When a key cache
5836 object is created the constructor procedure init_key_cache places a pointer
5837 to the corresponding table into one of its fields. The procedure also
5838 initializes a control block for the key cache oject and saves the pointer
5839 to this block in another field of the key cache object.
5840 When a key cache wrapper function is invoked for a key cache object to
5841 perform a basic key cache operation it looks into the interface table
5842 associated with the key cache oject and calls the corresponding
5843 implementation of the operation. It passes the saved key cache control
5844 block to this implementation. If, for some reasons, the control block
5845 has not been fully initialized yet, the wrapper function either does not
5846 do anything or, in the case when it perform a read/write operation, the
5847 function do it directly through the system i/o functions.
5848
5849 As we can see the model with which the key cache interface is supported
5850 as quite conventional for interfaces in general.
5851
5852******************************************************************************/
5853
5854static
5855int repartition_key_cache_internal(KEY_CACHE *keycache,
5856 uint key_cache_block_size, size_t use_mem,
5857 uint division_limit, uint age_threshold,
5858 uint changed_blocks_hash_size,
5859 uint partitions, my_bool use_op_lock);
5860
5861/*
5862 Initialize a key cache : internal
5863
5864 SYNOPSIS
5865 init_key_cache_internal()
5866 keycache pointer to the key cache to be initialized
5867 key_cache_block_size size of blocks to keep cached data
5868 use_mem total memory to use for cache buffers/structures
5869 division_limit division limit (may be zero)
5870 age_threshold age threshold (may be zero)
5871 changed_blocks_hash_size Number of hash buckets to hold a link of different
5872 files. Should be proportional to number of different
5873 files sused.
5874 partitions Number of partitions in the key cache
5875 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
5876
5877 DESCRIPTION
5878 The function performs the actions required from init_key_cache().
5879 It has an additional parameter: use_op_lock. When the parameter
5880 is TRUE than the function initializes keycache->op_lock if needed,
5881 then locks it, and unlocks it before the return. Otherwise the actions
5882 with the lock are omitted.
5883
5884 RETURN VALUE
5885 total number of blocks in key cache partitions, if successful,
5886 <= 0 - otherwise.
5887
5888 NOTES
5889 if keycache->key_cache_inited != 0 we assume that the memory
5890 for the control block of the key cache has been already allocated.
5891*/
5892
5893static
5894int init_key_cache_internal(KEY_CACHE *keycache, uint key_cache_block_size,
5895 size_t use_mem, uint division_limit,
5896 uint age_threshold, uint changed_blocks_hash_size,
5897 uint partitions,
5898 my_bool use_op_lock)
5899{
5900 void *keycache_cb;
5901 int blocks;
5902 if (keycache->key_cache_inited)
5903 {
5904 if (use_op_lock)
5905 pthread_mutex_lock(&keycache->op_lock);
5906 keycache_cb= keycache->keycache_cb;
5907 }
5908 else
5909 {
5910 if (partitions == 0)
5911 {
5912 if (!(keycache_cb= (void *) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB),
5913 MYF(0))))
5914 return 0;
5915 ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
5916 keycache->key_cache_type= SIMPLE_KEY_CACHE;
5917 keycache->interface_funcs= &simple_key_cache_funcs;
5918 }
5919 else
5920 {
5921 if (!(keycache_cb= (void *) my_malloc(sizeof(PARTITIONED_KEY_CACHE_CB),
5922 MYF(0))))
5923 return 0;
5924 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
5925 keycache->key_cache_type= PARTITIONED_KEY_CACHE;
5926 keycache->interface_funcs= &partitioned_key_cache_funcs;
5927 }
5928 /*
5929 Initialize op_lock if it's not initialized before.
5930 The mutex may have been initialized before if we are being called
5931 from repartition_key_cache_internal().
5932 */
5933 if (use_op_lock)
5934 pthread_mutex_init(&keycache->op_lock, MY_MUTEX_INIT_FAST);
5935 keycache->keycache_cb= keycache_cb;
5936 keycache->key_cache_inited= 1;
5937 if (use_op_lock)
5938 pthread_mutex_lock(&keycache->op_lock);
5939 }
5940
5941 if (partitions != 0)
5942 {
5943 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions= partitions;
5944 }
5945 keycache->can_be_used= 0;
5946 blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size,
5947 use_mem, division_limit,
5948 age_threshold, changed_blocks_hash_size);
5949 keycache->partitions= partitions ?
5950 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions :
5951 0;
5952 DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS);
5953 keycache->key_cache_mem_size=
5954 keycache->partitions ?
5955 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size :
5956 ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size;
5957 if (blocks > 0)
5958 keycache->can_be_used= 1;
5959 if (use_op_lock)
5960 pthread_mutex_unlock(&keycache->op_lock);
5961 return blocks;
5962}
5963
5964
5965/*
5966 Initialize a key cache
5967
5968 SYNOPSIS
5969 init_key_cache()
5970 keycache pointer to the key cache to be initialized
5971 key_cache_block_size size of blocks to keep cached data
5972 use_mem total memory to use for cache buffers/structures
5973 division_limit division limit (may be zero)
5974 age_threshold age threshold (may be zero)
5975 partitions number of partitions in the key cache
5976
5977 DESCRIPTION
5978 The function creates a control block structure for a key cache and
5979 places the pointer to this block in the structure keycache.
5980 If the value of the parameter 'partitions' is 0 then a simple key cache
5981 is created. Otherwise a partitioned key cache with the specified number
5982 of partitions is created.
5983 The parameter key_cache_block_size specifies the size of the blocks in
5984 the key cache to be created. The parameters division_limit and
5985 age_threshold determine the initial values of those characteristics of
5986 the key cache that are used for midpoint insertion strategy. The parameter
5987 use_mem specifies the total amount of memory to be allocated for the
5988 key cache buffers and for all auxiliary structures.
5989 The function calls init_key_cache_internal() to perform all these actions
5990 with the last parameter set to TRUE.
5991
5992 RETURN VALUE
5993 total number of blocks in key cache partitions, if successful,
5994 <= 0 - otherwise.
5995
5996 NOTES
5997 It's assumed that no two threads call this function simultaneously
5998 referring to the same key cache handle.
5999*/
6000
6001int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6002 size_t use_mem, uint division_limit,
6003 uint age_threshold, uint changed_blocks_hash_size,
6004 uint partitions)
6005{
6006 return init_key_cache_internal(keycache, key_cache_block_size, use_mem,
6007 division_limit, age_threshold,
6008 changed_blocks_hash_size, partitions, 1);
6009}
6010
6011
6012/*
6013 Resize a key cache
6014
6015 SYNOPSIS
6016 resize_key_cache()
6017 keycache pointer to the key cache to be resized
6018 key_cache_block_size size of blocks to keep cached data
6019 use_mem total memory to use for the new key cache
6020 division_limit new division limit (if not zero)
6021 age_threshold new age threshold (if not zero)
6022
6023 DESCRIPTION
6024 The function operates over the key cache key cache.
6025 The parameter key_cache_block_size specifies the new size of the block
6026 buffers in the key cache. The parameters division_limit and age_threshold
6027 determine the new initial values of those characteristics of the key cache
6028 that are used for midpoint insertion strategy. The parameter use_mem
6029 specifies the total amount of memory to be allocated for the key cache
6030 buffers and for all auxiliary structures.
6031
6032 RETURN VALUE
6033 number of blocks in the key cache, if successful,
6034 0 - otherwise.
6035
6036 NOTES
6037 The function does not block the calls and executions of other functions
6038 from the key cache interface. However it assumes that the calls of
6039 resize_key_cache itself are serialized.
6040
6041 Currently the function is called when the values of the variables
6042 key_buffer_size and/or key_cache_block_size are being reset for
6043 the key cache keycache.
6044*/
6045
6046int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6047 size_t use_mem, uint division_limit, uint age_threshold,
6048 uint changed_blocks_hash_size)
6049{
6050 int blocks= -1;
6051 if (keycache->key_cache_inited)
6052 {
6053 pthread_mutex_lock(&keycache->op_lock);
6054 if ((uint) keycache->param_partitions != keycache->partitions && use_mem)
6055 blocks= repartition_key_cache_internal(keycache,
6056 key_cache_block_size, use_mem,
6057 division_limit, age_threshold,
6058 changed_blocks_hash_size,
6059 (uint) keycache->param_partitions,
6060 0);
6061 else
6062 {
6063 blocks= keycache->interface_funcs->resize(keycache->keycache_cb,
6064 key_cache_block_size,
6065 use_mem, division_limit,
6066 age_threshold,
6067 changed_blocks_hash_size);
6068
6069 if (keycache->partitions)
6070 keycache->partitions=
6071 ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions;
6072 }
6073
6074 keycache->key_cache_mem_size=
6075 keycache->partitions ?
6076 ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size :
6077 ((SIMPLE_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size;
6078
6079 keycache->can_be_used= (blocks >= 0);
6080 pthread_mutex_unlock(&keycache->op_lock);
6081 }
6082 return blocks;
6083}
6084
6085
6086/*
6087 Change key cache parameters of a key cache
6088
6089 SYNOPSIS
6090 change_key_cache_param()
6091 keycache pointer to the key cache to change parameters for
6092 division_limit new division limit (if not zero)
6093 age_threshold new age threshold (if not zero)
6094
6095 DESCRIPTION
6096 The function sets new values of the division limit and the age threshold
6097 used when the key cache keycach employs midpoint insertion strategy.
6098 The parameters division_limit and age_threshold provide these new values.
6099
6100 RETURN VALUE
6101 none
6102
6103 NOTES
6104 Currently the function is called when the values of the variables
6105 key_cache_division_limit and/or key_cache_age_threshold are being reset
6106 for the key cache keycache.
6107*/
6108
6109void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
6110 uint age_threshold)
6111{
6112 if (keycache->key_cache_inited)
6113 {
6114 pthread_mutex_lock(&keycache->op_lock);
6115 keycache->interface_funcs->change_param(keycache->keycache_cb,
6116 division_limit,
6117 age_threshold);
6118 pthread_mutex_unlock(&keycache->op_lock);
6119 }
6120}
6121
6122
6123/*
6124 Destroy a key cache : internal
6125
6126 SYNOPSIS
6127 end_key_cache_internal()
6128 keycache pointer to the key cache to be destroyed
6129 cleanup <=> complete free
6130 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
6131
6132 DESCRIPTION
6133 The function performs the actions required from end_key_cache().
6134 It has an additional parameter: use_op_lock. When the parameter
6135 is TRUE than the function destroys keycache->op_lock if cleanup is true.
6136 Otherwise the action with the lock is omitted.
6137
6138 RETURN VALUE
6139 none
6140*/
6141
6142static
6143void end_key_cache_internal(KEY_CACHE *keycache, my_bool cleanup,
6144 my_bool use_op_lock)
6145{
6146 if (keycache->key_cache_inited)
6147 {
6148 keycache->interface_funcs->end(keycache->keycache_cb, cleanup);
6149 if (cleanup)
6150 {
6151 if (keycache->keycache_cb)
6152 {
6153 my_free(keycache->keycache_cb);
6154 keycache->keycache_cb= 0;
6155 }
6156 /*
6157 We do not destroy op_lock if we are going to reuse the same key cache.
6158 This happens if we are called from repartition_key_cache_internal().
6159 */
6160 if (use_op_lock)
6161 pthread_mutex_destroy(&keycache->op_lock);
6162 keycache->key_cache_inited= 0;
6163 }
6164 keycache->can_be_used= 0;
6165 }
6166}
6167
6168
6169/*
6170 Destroy a key cache
6171
6172 SYNOPSIS
6173 end_key_cache()
6174 keycache pointer to the key cache to be destroyed
6175 cleanup <=> complete free
6176
6177 DESCRIPTION
6178 The function frees the memory allocated for the cache blocks and
6179 auxiliary structures used by the key cache keycache. If the value
6180 of the parameter cleanup is TRUE then all resources used by the key
6181 cache are to be freed.
6182 The function calls end_key_cache_internal() to perform all these actions
6183 with the last parameter set to TRUE.
6184
6185 RETURN VALUE
6186 none
6187*/
6188
6189void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
6190{
6191 end_key_cache_internal(keycache, cleanup, 1);
6192}
6193
6194
6195/*
6196 Read a block of data from a key cache into a buffer
6197
6198 SYNOPSIS
6199
6200 key_cache_read()
6201 keycache pointer to the key cache to read data from
6202 file handler for the file for the block of data to be read
6203 filepos position of the block of data in the file
6204 level determines the weight of the data
6205 buff buffer to where the data must be placed
6206 length length of the buffer
6207 block_length length of the data read from a key cache block
6208 return_buffer return pointer to the key cache buffer with the data
6209
6210 DESCRIPTION
6211 The function operates over buffers of the key cache keycache.
6212 In a general case the function reads a block of data from the key cache
6213 into the buffer buff of the size specified by the parameter length. The
6214 beginning of the block of data to be read is specified by the parameters
6215 file and filepos. The length of the read data is the same as the length
6216 of the buffer.
6217 If the parameter return_buffer is not ignored and its value is TRUE, and
6218 the data to be read of the specified size block_length can be read from one
6219 key cache buffer, then the function returns a pointer to the data in the
6220 key cache buffer.
6221 The parameter 'level' is used only by the midpoint insertion strategy
6222 when the data or its portion cannot be found in the key cache.
6223 The function reads data into the buffer directly from file if the control
6224 block of the key cache has not been initialized yet.
6225
6226 RETURN VALUE
6227 Returns address from where the data is placed if successful, 0 - otherwise.
6228
6229 NOTES.
6230 Filepos must be a multiple of 'block_length', but it doesn't
6231 have to be a multiple of key_cache_block_size;
6232*/
6233
6234uchar *key_cache_read(KEY_CACHE *keycache,
6235 File file, my_off_t filepos, int level,
6236 uchar *buff, uint length,
6237 uint block_length, int return_buffer)
6238{
6239 if (keycache->can_be_used)
6240 return keycache->interface_funcs->read(keycache->keycache_cb,
6241 file, filepos, level,
6242 buff, length,
6243 block_length, return_buffer);
6244
6245 /* We can't use mutex here as the key cache may not be initialized */
6246
6247 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
6248 return (uchar *) 0;
6249
6250 return buff;
6251}
6252
6253
6254/*
6255 Insert a block of file data from a buffer into a key cache
6256
6257 SYNOPSIS
6258 key_cache_insert()
6259 keycache pointer to the key cache to insert data into
6260 file handler for the file to insert data from
6261 filepos position of the block of data in the file to insert
6262 level determines the weight of the data
6263 buff buffer to read data from
6264 length length of the data in the buffer
6265
6266 DESCRIPTION
6267 The function operates over buffers of the key cache keycache.
6268 The function writes a block of file data from a buffer into the key cache.
6269 The buffer is specified with the parameters buff and length - the pointer
6270 to the beginning of the buffer and its size respectively. It's assumed
6271 that the buffer contains the data from 'file' allocated from the position
6272 filepos.
6273 The parameter level is used to set one characteristic for the key buffers
6274 loaded with the data from buff. The characteristic is used only by the
6275 midpoint insertion strategy.
6276
6277 RETURN VALUE
6278 0 if a success, 1 - otherwise.
6279
6280 NOTES
6281 The function is used by MyISAM to move all blocks from a index file to
6282 the key cache.
6283 It is assumed that it may be performed in parallel with reading the file
6284 data from the key buffers by other threads.
6285*/
6286
6287int key_cache_insert(KEY_CACHE *keycache,
6288 File file, my_off_t filepos, int level,
6289 uchar *buff, uint length)
6290{
6291 if (keycache->can_be_used)
6292 return keycache->interface_funcs->insert(keycache->keycache_cb,
6293 file, filepos, level,
6294 buff, length);
6295 return 0;
6296}
6297
6298
6299/*
6300 Write data from a buffer into a key cache
6301
6302 SYNOPSIS
6303
6304 key_cache_write()
6305 keycache pointer to the key cache to write data to
6306 file handler for the file to write data to
6307 filepos position in the file to write data to
6308 level determines the weight of the data
6309 buff buffer with the data
6310 length length of the buffer
6311 dont_write if is 0 then all dirty pages involved in writing
6312 should have been flushed from key cache
6313 file_extra pointer to optional file attributes
6314
6315 DESCRIPTION
6316 The function operates over buffers of the key cache keycache.
6317 In a general case the function writes data from a buffer into the key
6318 cache. The buffer is specified with the parameters buff and length -
6319 the pointer to the beginning of the buffer and its size respectively.
6320 It's assumed the buffer contains the data to be written into 'file'
6321 starting from the position filepos.
6322 If the value of the parameter dont_write is FALSE then the function
6323 also writes the data into file.
6324 The parameter level is used to set one characteristic for the key buffers
6325 filled with the data from buff. The characteristic is employed only by
6326 the midpoint insertion strategy.
6327 The parameter file_expra may point to additional file attributes used
6328 for optimization or other purposes.
6329 The function writes data from the buffer directly into file if the control
6330 block of the key cache has not been initialized yet.
6331
6332 RETURN VALUE
6333 0 if a success, 1 - otherwise.
6334
6335 NOTES
6336 This implementation may exploit the fact that the function is called only
6337 when a thread has got an exclusive lock for the key file.
6338*/
6339
6340int key_cache_write(KEY_CACHE *keycache,
6341 File file, void *file_extra,
6342 my_off_t filepos, int level,
6343 uchar *buff, uint length,
6344 uint block_length, int force_write)
6345{
6346 if (keycache->can_be_used)
6347 return keycache->interface_funcs->write(keycache->keycache_cb,
6348 file, file_extra,
6349 filepos, level,
6350 buff, length,
6351 block_length, force_write);
6352
6353 /* We can't use mutex here as the key cache may not be initialized */
6354 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
6355 return 1;
6356
6357 return 0;
6358}
6359
6360
6361/*
6362 Flush all blocks for a file from key buffers of a key cache
6363
6364 SYNOPSIS
6365
6366 flush_key_blocks()
6367 keycache pointer to the key cache whose blocks are to be flushed
6368 file handler for the file to flush to
6369 file_extra maps of key cache (used for partitioned key caches)
6370 flush_type type of the flush operation
6371
6372 DESCRIPTION
6373 The function operates over buffers of the key cache keycache.
6374 In a general case the function flushes the data from all dirty key
6375 buffers related to the file 'file' into this file. The function does
6376 exactly this if the value of the parameter type is FLUSH_KEEP. If the
6377 value of this parameter is FLUSH_RELEASE, the function additionally
6378 releases the key buffers containing data from 'file' for new usage.
6379 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
6380 just releases the key buffers containing data from 'file'.
6381 If the value of the parameter type is FLUSH_KEEP the function may use
6382 the value of the parameter file_extra pointing to possibly dirty
6383 partitions to optimize the operation for partitioned key caches.
6384
6385 RETURN
6386 0 ok
6387 1 error
6388
6389 NOTES
6390 Any implementation of the function may exploit the fact that the function
6391 is called only when a thread has got an exclusive lock for the key file.
6392*/
6393
6394int flush_key_blocks(KEY_CACHE *keycache,
6395 int file, void *file_extra,
6396 enum flush_type type)
6397{
6398 if (keycache->can_be_used)
6399 return keycache->interface_funcs->flush(keycache->keycache_cb,
6400 file, file_extra, type);
6401 return 0;
6402}
6403
6404
6405/*
6406 Reset the counters of a key cache
6407
6408 SYNOPSIS
6409 reset_key_cache_counters()
6410 name the name of a key cache (unused)
6411 keycache pointer to the key cache for which to reset counters
6412
6413 DESCRIPTION
6414 This function resets the values of the statistical counters for the key
6415 cache keycache.
6416 The parameter name is currently not used.
6417
6418 RETURN
6419 0 on success (always because it can't fail)
6420
6421 NOTES
6422 This procedure is used by process_key_caches() to reset the counters of all
6423 currently used key caches, both the default one and the named ones.
6424*/
6425
6426int reset_key_cache_counters(const char *name __attribute__((unused)),
6427 KEY_CACHE *keycache,
6428 void *unused __attribute__((unused)))
6429{
6430 int rc= 0;
6431 if (keycache->key_cache_inited)
6432 {
6433 pthread_mutex_lock(&keycache->op_lock);
6434 rc= keycache->interface_funcs->reset_counters(name,
6435 keycache->keycache_cb);
6436 pthread_mutex_unlock(&keycache->op_lock);
6437 }
6438 return rc;
6439}
6440
6441
6442/*
6443 Get statistics for a key cache
6444
6445 SYNOPSIS
6446 get_key_cache_statistics()
6447 keycache pointer to the key cache to get statistics for
6448 partition_no partition number to get statistics for
6449 key_cache_stats OUT pointer to the structure for the returned statistics
6450
6451 DESCRIPTION
6452 If the value of the parameter partition_no is equal to 0 then statistics
6453 for the whole key cache keycache (aggregated statistics) is returned in the
6454 fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS.
6455 Otherwise the value of the parameter partition_no makes sense only for
6456 a partitioned key cache. In this case the function returns statistics
6457 for the partition with the specified number partition_no.
6458
6459 RETURN
6460 none
6461*/
6462
6463void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no,
6464 KEY_CACHE_STATISTICS *key_cache_stats)
6465{
6466 if (keycache->key_cache_inited)
6467 {
6468 pthread_mutex_lock(&keycache->op_lock);
6469 keycache->interface_funcs->get_stats(keycache->keycache_cb,
6470 partition_no, key_cache_stats);
6471 pthread_mutex_unlock(&keycache->op_lock);
6472 }
6473}
6474
6475
6476/*
6477 Repartition a key cache : internal
6478
6479 SYNOPSIS
6480 repartition_key_cache_internal()
6481 keycache pointer to the key cache to be repartitioned
6482 key_cache_block_size size of blocks to keep cached data
6483 use_mem total memory to use for the new key cache
6484 division_limit new division limit (if not zero)
6485 age_threshold new age threshold (if not zero)
6486 partitions new number of partitions in the key cache
6487 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
6488
6489 DESCRIPTION
6490 The function performs the actions required from repartition_key_cache().
6491 It has an additional parameter: use_op_lock. When the parameter
6492 is TRUE then the function locks keycache->op_lock at start and
6493 unlocks it before the return. Otherwise the actions with the lock
6494 are omitted.
6495
6496 RETURN VALUE
6497 number of blocks in the key cache, if successful,
6498 0 - otherwise.
6499*/
6500
6501static
6502int repartition_key_cache_internal(KEY_CACHE *keycache,
6503 uint key_cache_block_size, size_t use_mem,
6504 uint division_limit, uint age_threshold,
6505 uint changed_blocks_hash_size,
6506 uint partitions, my_bool use_op_lock)
6507{
6508 uint blocks= -1;
6509 if (keycache->key_cache_inited)
6510 {
6511 if (use_op_lock)
6512 pthread_mutex_lock(&keycache->op_lock);
6513 keycache->interface_funcs->resize(keycache->keycache_cb,
6514 key_cache_block_size, 0,
6515 division_limit, age_threshold,
6516 changed_blocks_hash_size);
6517 end_key_cache_internal(keycache, 1, 0);
6518 blocks= init_key_cache_internal(keycache, key_cache_block_size, use_mem,
6519 division_limit, age_threshold,
6520 changed_blocks_hash_size, partitions,
6521 0);
6522 if (use_op_lock)
6523 pthread_mutex_unlock(&keycache->op_lock);
6524 }
6525 return blocks;
6526}
6527
6528/*
6529 Repartition a key cache
6530
6531 SYNOPSIS
6532 repartition_key_cache()
6533 keycache pointer to the key cache to be repartitioned
6534 key_cache_block_size size of blocks to keep cached data
6535 use_mem total memory to use for the new key cache
6536 division_limit new division limit (if not zero)
6537 age_threshold new age threshold (if not zero)
6538 partitions new number of partitions in the key cache
6539
6540 DESCRIPTION
6541 The function operates over the key cache keycache.
6542 The parameter partitions specifies the number of partitions in the key
6543 cache after repartitioning. If the value of this parameter is 0 then
6544 a simple key cache must be created instead of the old one.
6545 The parameter key_cache_block_size specifies the new size of the block
6546 buffers in the key cache. The parameters division_limit and age_threshold
6547 determine the new initial values of those characteristics of the key cache
6548 that are used for midpoint insertion strategy. The parameter use_mem
6549 specifies the total amount of memory to be allocated for the new key
6550 cache buffers and for all auxiliary structures.
6551 The function calls repartition_key_cache_internal() to perform all these
6552 actions with the last parameter set to TRUE.
6553
6554 RETURN VALUE
6555 number of blocks in the key cache, if successful,
6556 0 - otherwise.
6557
6558 NOTES
6559 Currently the function is called when the value of the variable
6560 key_cache_partitions is being reset for the key cache keycache.
6561*/
6562
6563int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6564 size_t use_mem, uint division_limit,
6565 uint age_threshold, uint changed_blocks_hash_size,
6566 uint partitions)
6567{
6568 return repartition_key_cache_internal(keycache, key_cache_block_size, use_mem,
6569 division_limit, age_threshold,
6570 changed_blocks_hash_size,
6571 partitions, 1);
6572}
6573
6574