1/*****************************************************************************
2
3Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file buf/buf0buddy.cc
22Binary buddy allocator for compressed pages
23
24Created December 2006 by Marko Makela
25*******************************************************/
26
27#include "buf0buddy.h"
28#include "buf0buf.h"
29#include "buf0lru.h"
30#include "buf0flu.h"
31#include "page0zip.h"
32#include "srv0start.h"
33
34/** When freeing a buf we attempt to coalesce by looking at its buddy
35and deciding whether it is free or not. To ascertain if the buddy is
36free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET
37within the buddy. The question is how we can be sure that it is
38safe to look at BUF_BUDDY_STAMP_OFFSET.
39The answer lies in following invariants:
40* All blocks allocated by buddy allocator are used for compressed
41page frame.
42* A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID
43* BUF_BUDDY_STAMP_OFFSET always points to the space_id field in
44a frame.
45 -- The above is true because we look at these fields when the
46 corresponding buddy block is free which implies that:
47 * The block we are looking at must have an address aligned at
48 the same size that its free buddy has. For example, if we have
49 a free block of 8K then its buddy's address must be aligned at
50 8K as well.
51 * It is possible that the block we are looking at may have been
52 further divided into smaller sized blocks but its starting
53 address must still remain the start of a page frame i.e.: it
54 cannot be middle of a block. For example, if we have a free
55 block of size 8K then its buddy may be divided into blocks
56 of, say, 1K, 1K, 2K, 4K but the buddy's address will still be
57 the starting address of first 1K compressed page.
58 * What is important to note is that for any given block, the
59 buddy's address cannot be in the middle of a larger block i.e.:
60 in above example, our 8K block cannot have a buddy whose address
61 is aligned on 8K but it is part of a larger 16K block.
62*/
63
64/** Offset within buf_buddy_free_t where free or non_free stamps
65are written.*/
66#define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
67
68/** Value that we stamp on all buffers that are currently on the zip_free
69list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
70#define BUF_BUDDY_STAMP_FREE SRV_LOG_SPACE_FIRST_ID
71
72/** Stamp value for non-free buffers. Will be overwritten by a non-zero
73value by the consumer of the block */
74#define BUF_BUDDY_STAMP_NONFREE 0XFFFFFFFFUL
75
76/** Return type of buf_buddy_is_free() */
77enum buf_buddy_state_t {
78 BUF_BUDDY_STATE_FREE, /*!< If the buddy to completely free */
79 BUF_BUDDY_STATE_USED, /*!< Buddy currently in used */
80 BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy
81 are in use */
82};
83
84#ifdef UNIV_DEBUG_VALGRIND
85/**********************************************************************//**
86Invalidate memory area that we won't access while page is free */
87UNIV_INLINE
88void
89buf_buddy_mem_invalid(
90/*==================*/
91 buf_buddy_free_t* buf, /*!< in: block to check */
92 ulint i) /*!< in: index of zip_free[] */
93{
94 const size_t size = BUF_BUDDY_LOW << i;
95 ut_ad(i <= BUF_BUDDY_SIZES);
96
97 UNIV_MEM_ASSERT_W(buf, size);
98 UNIV_MEM_INVALID(buf, size);
99}
100#else /* UNIV_DEBUG_VALGRIND */
101# define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES)
102#endif /* UNIV_DEBUG_VALGRIND */
103
104/**********************************************************************//**
105Check if a buddy is stamped free.
106@return whether the buddy is free */
107UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
108bool
109buf_buddy_stamp_is_free(
110/*====================*/
111 const buf_buddy_free_t* buf) /*!< in: block to check */
112{
113 compile_time_assert(BUF_BUDDY_STAMP_FREE < BUF_BUDDY_STAMP_NONFREE);
114 return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
115 == BUF_BUDDY_STAMP_FREE);
116}
117
118/**********************************************************************//**
119Stamps a buddy free. */
120UNIV_INLINE
121void
122buf_buddy_stamp_free(
123/*=================*/
124 buf_buddy_free_t* buf, /*!< in/out: block to stamp */
125 ulint i) /*!< in: block size */
126{
127 ut_d(memset(buf, static_cast<int>(i), BUF_BUDDY_LOW << i));
128 buf_buddy_mem_invalid(buf, i);
129 mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET,
130 BUF_BUDDY_STAMP_FREE);
131 buf->stamp.size = i;
132}
133
134/**********************************************************************//**
135Stamps a buddy nonfree.
136@param[in,out] buf block to stamp
137@param[in] i block size */
138static inline void buf_buddy_stamp_nonfree(buf_buddy_free_t* buf, ulint i)
139{
140 buf_buddy_mem_invalid(buf, i);
141 compile_time_assert(BUF_BUDDY_STAMP_NONFREE == 0xffffffffU);
142 memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4);
143}
144
145/**********************************************************************//**
146Get the offset of the buddy of a compressed page frame.
147@return the buddy relative of page */
148UNIV_INLINE
149void*
150buf_buddy_get(
151/*==========*/
152 byte* page, /*!< in: compressed page */
153 ulint size) /*!< in: page size in bytes */
154{
155 ut_ad(ut_is_2pow(size));
156 ut_ad(size >= BUF_BUDDY_LOW);
157 ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
158 ut_ad(size < BUF_BUDDY_HIGH);
159 ut_ad(BUF_BUDDY_HIGH == srv_page_size);
160 ut_ad(!ut_align_offset(page, size));
161
162 if (((ulint) page) & size) {
163 return(page - size);
164 } else {
165 return(page + size);
166 }
167}
168
169#ifdef UNIV_DEBUG
170/** Validate a given zip_free list. */
171struct CheckZipFree {
172 CheckZipFree(ulint i) : m_i(i) {}
173
174 void operator()(const buf_buddy_free_t* elem) const
175 {
176 ut_a(buf_buddy_stamp_is_free(elem));
177 ut_a(elem->stamp.size <= m_i);
178 }
179
180 ulint m_i;
181};
182
183/** Validate a buddy list.
184@param[in] buf_pool buffer pool instance
185@param[in] i buddy size to validate */
186static
187void
188buf_buddy_list_validate(
189 const buf_pool_t* buf_pool,
190 ulint i)
191{
192 CheckZipFree check(i);
193 ut_list_validate(buf_pool->zip_free[i], check);
194}
195
196/**********************************************************************//**
197Debug function to validate that a buffer is indeed free i.e.: in the
198zip_free[].
199@return true if free */
200UNIV_INLINE
201bool
202buf_buddy_check_free(
203/*=================*/
204 buf_pool_t* buf_pool,/*!< in: buffer pool instance */
205 const buf_buddy_free_t* buf, /*!< in: block to check */
206 ulint i) /*!< in: index of buf_pool->zip_free[] */
207{
208 const ulint size = BUF_BUDDY_LOW << i;
209
210 ut_ad(buf_pool_mutex_own(buf_pool));
211 ut_ad(!ut_align_offset(buf, size));
212 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
213
214 buf_buddy_free_t* itr;
215
216 for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
217 itr && itr != buf;
218 itr = UT_LIST_GET_NEXT(list, itr)) {
219 }
220
221 return(itr == buf);
222}
223#endif /* UNIV_DEBUG */
224
225/**********************************************************************//**
226Checks if a buf is free i.e.: in the zip_free[].
227@retval BUF_BUDDY_STATE_FREE if fully free
228@retval BUF_BUDDY_STATE_USED if currently in use
229@retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */
230static MY_ATTRIBUTE((warn_unused_result))
231buf_buddy_state_t
232buf_buddy_is_free(
233/*==============*/
234 buf_buddy_free_t* buf, /*!< in: block to check */
235 ulint i) /*!< in: index of
236 buf_pool->zip_free[] */
237{
238#ifdef UNIV_DEBUG
239 const ulint size = BUF_BUDDY_LOW << i;
240 ut_ad(!ut_align_offset(buf, size));
241 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
242#endif /* UNIV_DEBUG */
243
244 /* We assume that all memory from buf_buddy_alloc()
245 is used for compressed page frames. */
246
247 /* We look inside the allocated objects returned by
248 buf_buddy_alloc() and assume that each block is a compressed
249 page that contains one of the following in space_id.
250 * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
251 * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
252 not initialized yet or
253 * A valid space_id of a compressed tablespace
254
255 The call below attempts to read from free memory. The memory
256 is "owned" by the buddy allocator (and it has been allocated
257 from the buffer pool), so there is nothing wrong about this. */
258 if (!buf_buddy_stamp_is_free(buf)) {
259 return(BUF_BUDDY_STATE_USED);
260 }
261
262 /* A block may be free but a fragment of it may still be in use.
263 To guard against that we write the free block size in terms of
264 zip_free index at start of stamped block. Note that we can
265 safely rely on this value only if the buf is free. */
266 ut_ad(buf->stamp.size <= i);
267 return(buf->stamp.size == i
268 ? BUF_BUDDY_STATE_FREE
269 : BUF_BUDDY_STATE_PARTIALLY_USED);
270}
271
272/**********************************************************************//**
273Add a block to the head of the appropriate buddy free list. */
274UNIV_INLINE
275void
276buf_buddy_add_to_free(
277/*==================*/
278 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
279 buf_buddy_free_t* buf, /*!< in,own: block to be freed */
280 ulint i) /*!< in: index of
281 buf_pool->zip_free[] */
282{
283 ut_ad(buf_pool_mutex_own(buf_pool));
284 ut_ad(buf_pool->zip_free[i].start != buf);
285
286 buf_buddy_stamp_free(buf, i);
287 UT_LIST_ADD_FIRST(buf_pool->zip_free[i], buf);
288 ut_d(buf_buddy_list_validate(buf_pool, i));
289}
290
291/**********************************************************************//**
292Remove a block from the appropriate buddy free list. */
293UNIV_INLINE
294void
295buf_buddy_remove_from_free(
296/*=======================*/
297 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
298 buf_buddy_free_t* buf, /*!< in,own: block to be
299 freed */
300 ulint i) /*!< in: index of
301 buf_pool->zip_free[] */
302{
303 ut_ad(buf_pool_mutex_own(buf_pool));
304 ut_ad(buf_buddy_check_free(buf_pool, buf, i));
305
306 UT_LIST_REMOVE(buf_pool->zip_free[i], buf);
307 buf_buddy_stamp_nonfree(buf, i);
308}
309
310/**********************************************************************//**
311Try to allocate a block from buf_pool->zip_free[].
312@return allocated block, or NULL if buf_pool->zip_free[] was empty */
313static
314buf_buddy_free_t*
315buf_buddy_alloc_zip(
316/*================*/
317 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
318 ulint i) /*!< in: index of buf_pool->zip_free[] */
319{
320 buf_buddy_free_t* buf;
321
322 ut_ad(buf_pool_mutex_own(buf_pool));
323 ut_a(i < BUF_BUDDY_SIZES);
324 ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
325
326 ut_d(buf_buddy_list_validate(buf_pool, i));
327
328 buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
329
330 if (buf_pool->curr_size < buf_pool->old_size
331 && UT_LIST_GET_LEN(buf_pool->withdraw)
332 < buf_pool->withdraw_target) {
333
334 while (buf != NULL
335 && buf_frame_will_withdrawn(
336 buf_pool, reinterpret_cast<byte*>(buf))) {
337 /* This should be withdrawn, not to be allocated */
338 buf = UT_LIST_GET_NEXT(list, buf);
339 }
340 }
341
342 if (buf) {
343 buf_buddy_remove_from_free(buf_pool, buf, i);
344 } else if (i + 1 < BUF_BUDDY_SIZES) {
345 /* Attempt to split. */
346 buf = buf_buddy_alloc_zip(buf_pool, i + 1);
347
348 if (buf) {
349 buf_buddy_free_t* buddy =
350 reinterpret_cast<buf_buddy_free_t*>(
351 buf->stamp.bytes
352 + (BUF_BUDDY_LOW << i));
353
354 ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
355 buf_buddy_add_to_free(buf_pool, buddy, i);
356 }
357 }
358
359 if (buf) {
360 /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
361 UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
362 UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
363 + buf->stamp.bytes, ~i,
364 (BUF_BUDDY_LOW << i)
365 - (BUF_BUDDY_STAMP_OFFSET + 4));
366 ut_ad(mach_read_from_4(buf->stamp.bytes
367 + BUF_BUDDY_STAMP_OFFSET)
368 == BUF_BUDDY_STAMP_NONFREE);
369 }
370
371 return(buf);
372}
373
374/**********************************************************************//**
375Deallocate a buffer frame of srv_page_size. */
376static
377void
378buf_buddy_block_free(
379/*=================*/
380 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
381 void* buf) /*!< in: buffer frame to deallocate */
382{
383 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
384 buf_page_t* bpage;
385 buf_block_t* block;
386
387 ut_ad(buf_pool_mutex_own(buf_pool));
388 ut_ad(!mutex_own(&buf_pool->zip_mutex));
389 ut_a(!ut_align_offset(buf, srv_page_size));
390
391 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
392 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
393 && bpage->in_zip_hash && !bpage->in_page_hash),
394 ((buf_block_t*) bpage)->frame == buf);
395 ut_a(bpage);
396 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
397 ut_ad(!bpage->in_page_hash);
398 ut_ad(bpage->in_zip_hash);
399 ut_d(bpage->in_zip_hash = FALSE);
400 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
401
402 ut_d(memset(buf, 0, srv_page_size));
403 UNIV_MEM_INVALID(buf, srv_page_size);
404
405 block = (buf_block_t*) bpage;
406 buf_page_mutex_enter(block);
407 buf_LRU_block_free_non_file_page(block);
408 buf_page_mutex_exit(block);
409
410 ut_ad(buf_pool->buddy_n_frames > 0);
411 ut_d(buf_pool->buddy_n_frames--);
412}
413
414/**********************************************************************//**
415Allocate a buffer block to the buddy allocator. */
416static
417void
418buf_buddy_block_register(
419/*=====================*/
420 buf_block_t* block) /*!< in: buffer frame to allocate */
421{
422 buf_pool_t* buf_pool = buf_pool_from_block(block);
423 const ulint fold = BUF_POOL_ZIP_FOLD(block);
424 ut_ad(buf_pool_mutex_own(buf_pool));
425 ut_ad(!mutex_own(&buf_pool->zip_mutex));
426 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
427
428 buf_block_set_state(block, BUF_BLOCK_MEMORY);
429
430 ut_a(block->frame);
431 ut_a(!ut_align_offset(block->frame, srv_page_size));
432
433 ut_ad(!block->page.in_page_hash);
434 ut_ad(!block->page.in_zip_hash);
435 ut_d(block->page.in_zip_hash = TRUE);
436 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
437
438 ut_d(buf_pool->buddy_n_frames++);
439}
440
441/**********************************************************************//**
442Allocate a block from a bigger object.
443@return allocated block */
444static
445void*
446buf_buddy_alloc_from(
447/*=================*/
448 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
449 void* buf, /*!< in: a block that is free to use */
450 ulint i, /*!< in: index of
451 buf_pool->zip_free[] */
452 ulint j) /*!< in: size of buf as an index
453 of buf_pool->zip_free[] */
454{
455 ulint offs = BUF_BUDDY_LOW << j;
456 ut_ad(j <= BUF_BUDDY_SIZES);
457 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
458 ut_ad(j >= i);
459 ut_ad(!ut_align_offset(buf, offs));
460
461 /* Add the unused parts of the block to the free lists. */
462 while (j > i) {
463 buf_buddy_free_t* zip_buf;
464
465 offs >>= 1;
466 j--;
467
468 zip_buf = reinterpret_cast<buf_buddy_free_t*>(
469 reinterpret_cast<byte*>(buf) + offs);
470 buf_buddy_add_to_free(buf_pool, zip_buf, j);
471 }
472
473 buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
474 return(buf);
475}
476
477/**********************************************************************//**
478Allocate a block. The thread calling this function must hold
479buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
480The buf_pool_mutex may be released and reacquired.
481@return allocated block, never NULL */
482void*
483buf_buddy_alloc_low(
484/*================*/
485 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
486 ulint i, /*!< in: index of buf_pool->zip_free[],
487 or BUF_BUDDY_SIZES */
488 bool* lru) /*!< in: pointer to a variable that
489 will be assigned true if storage was
490 allocated from the LRU list and
491 buf_pool->mutex was temporarily
492 released */
493{
494 buf_block_t* block;
495
496 ut_ad(lru);
497 ut_ad(buf_pool_mutex_own(buf_pool));
498 ut_ad(!mutex_own(&buf_pool->zip_mutex));
499 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
500
501 if (i < BUF_BUDDY_SIZES) {
502 /* Try to allocate from the buddy system. */
503 block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i);
504
505 if (block) {
506 goto func_exit;
507 }
508 }
509
510 /* Try allocating from the buf_pool->free list. */
511 block = buf_LRU_get_free_only(buf_pool);
512
513 if (block) {
514
515 goto alloc_big;
516 }
517
518 /* Try replacing an uncompressed page in the buffer pool. */
519 buf_pool_mutex_exit(buf_pool);
520 block = buf_LRU_get_free_block(buf_pool);
521 *lru = true;
522 buf_pool_mutex_enter(buf_pool);
523
524alloc_big:
525 buf_buddy_block_register(block);
526
527 block = (buf_block_t*) buf_buddy_alloc_from(
528 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
529
530func_exit:
531 buf_pool->buddy_stat[i].used++;
532 return(block);
533}
534
535/**********************************************************************//**
536Try to relocate a block.
537@return true if relocated */
538static
539bool
540buf_buddy_relocate(
541/*===============*/
542 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
543 void* src, /*!< in: block to relocate */
544 void* dst, /*!< in: free block to relocate to */
545 ulint i, /*!< in: index of
546 buf_pool->zip_free[] */
547 bool force) /*!< in: true if we must relocate
548 always */
549{
550 buf_page_t* bpage;
551 const ulint size = BUF_BUDDY_LOW << i;
552 ulint space;
553 ulint offset;
554
555 ut_ad(buf_pool_mutex_own(buf_pool));
556 ut_ad(!mutex_own(&buf_pool->zip_mutex));
557 ut_ad(!ut_align_offset(src, size));
558 ut_ad(!ut_align_offset(dst, size));
559 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
560 UNIV_MEM_ASSERT_W(dst, size);
561
562 space = mach_read_from_4((const byte*) src
563 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
564 offset = mach_read_from_4((const byte*) src
565 + FIL_PAGE_OFFSET);
566
567 /* Suppress Valgrind warnings about conditional jump
568 on uninitialized value. */
569 UNIV_MEM_VALID(&space, sizeof space);
570 UNIV_MEM_VALID(&offset, sizeof offset);
571
572 ut_ad(space != BUF_BUDDY_STAMP_FREE);
573
574 const page_id_t page_id(space, offset);
575
576 /* If space,offset is bogus, then we know that the
577 buf_page_hash_get_low() call below will return NULL. */
578 if (!force && buf_pool != buf_pool_get(page_id)) {
579 return(false);
580 }
581
582 rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
583
584 rw_lock_x_lock(hash_lock);
585
586 bpage = buf_page_hash_get_low(buf_pool, page_id);
587
588 if (!bpage || bpage->zip.data != src) {
589 /* The block has probably been freshly
590 allocated by buf_LRU_get_free_block() but not
591 added to buf_pool->page_hash yet. Obviously,
592 it cannot be relocated. */
593
594 rw_lock_x_unlock(hash_lock);
595
596 if (!force || space != 0 || offset != 0) {
597 return(false);
598 }
599
600 /* It might be just uninitialized page.
601 We should search from LRU list also. */
602
603 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
604 while (bpage != NULL) {
605 if (bpage->zip.data == src) {
606 hash_lock = buf_page_hash_lock_get(
607 buf_pool, bpage->id);
608 rw_lock_x_lock(hash_lock);
609 break;
610 }
611 bpage = UT_LIST_GET_NEXT(LRU, bpage);
612 }
613
614 if (bpage == NULL) {
615 return(false);
616 }
617 }
618
619 if (page_zip_get_size(&bpage->zip) != size) {
620 /* The block is of different size. We would
621 have to relocate all blocks covered by src.
622 For the sake of simplicity, give up. */
623 ut_ad(page_zip_get_size(&bpage->zip) < size);
624
625 rw_lock_x_unlock(hash_lock);
626
627 return(false);
628 }
629
630 /* The block must have been allocated, but it may
631 contain uninitialized data. */
632 UNIV_MEM_ASSERT_W(src, size);
633
634 BPageMutex* block_mutex = buf_page_get_mutex(bpage);
635
636 mutex_enter(block_mutex);
637
638 if (buf_page_can_relocate(bpage)) {
639 /* Relocate the compressed page. */
640 uintmax_t usec = ut_time_us(NULL);
641
642 ut_a(bpage->zip.data == src);
643
644 memcpy(dst, src, size);
645 bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
646
647 rw_lock_x_unlock(hash_lock);
648
649 mutex_exit(block_mutex);
650
651 buf_buddy_mem_invalid(
652 reinterpret_cast<buf_buddy_free_t*>(src), i);
653
654 buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i];
655 buddy_stat->relocated++;
656 buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
657 return(true);
658 }
659
660 rw_lock_x_unlock(hash_lock);
661
662 mutex_exit(block_mutex);
663 return(false);
664}
665
666/**********************************************************************//**
667Deallocate a block. */
668void
669buf_buddy_free_low(
670/*===============*/
671 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
672 void* buf, /*!< in: block to be freed, must not be
673 pointed to by the buffer pool */
674 ulint i) /*!< in: index of buf_pool->zip_free[],
675 or BUF_BUDDY_SIZES */
676{
677 buf_buddy_free_t* buddy;
678
679 ut_ad(buf_pool_mutex_own(buf_pool));
680 ut_ad(!mutex_own(&buf_pool->zip_mutex));
681 ut_ad(i <= BUF_BUDDY_SIZES);
682 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
683 ut_ad(buf_pool->buddy_stat[i].used > 0);
684
685 buf_pool->buddy_stat[i].used--;
686recombine:
687 UNIV_MEM_ALLOC(buf, BUF_BUDDY_LOW << i);
688
689 if (i == BUF_BUDDY_SIZES) {
690 buf_buddy_block_free(buf_pool, buf);
691 return;
692 }
693
694 ut_ad(i < BUF_BUDDY_SIZES);
695 ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
696 ut_ad(!buf_pool_contains_zip(buf_pool, buf));
697
698 /* Do not recombine blocks if there are few free blocks.
699 We may waste up to 15360*max_len bytes to free blocks
700 (1024 + 2048 + 4096 + 8192 = 15360) */
701 if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16
702 && buf_pool->curr_size >= buf_pool->old_size) {
703 goto func_exit;
704 }
705
706 /* Try to combine adjacent blocks. */
707 buddy = reinterpret_cast<buf_buddy_free_t*>(
708 buf_buddy_get(reinterpret_cast<byte*>(buf),
709 BUF_BUDDY_LOW << i));
710
711 switch (buf_buddy_is_free(buddy, i)) {
712 case BUF_BUDDY_STATE_FREE:
713 /* The buddy is free: recombine */
714 buf_buddy_remove_from_free(buf_pool, buddy, i);
715buddy_is_free:
716 ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
717 i++;
718 buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
719
720 goto recombine;
721
722 case BUF_BUDDY_STATE_USED:
723 ut_d(buf_buddy_list_validate(buf_pool, i));
724
725 /* The buddy is not free. Is there a free block of
726 this size? */
727 if (buf_buddy_free_t* zip_buf =
728 UT_LIST_GET_FIRST(buf_pool->zip_free[i])) {
729
730 /* Remove the block from the free list, because
731 a successful buf_buddy_relocate() will overwrite
732 zip_free->list. */
733 buf_buddy_remove_from_free(buf_pool, zip_buf, i);
734
735 /* Try to relocate the buddy of buf to the free
736 block. */
737 if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i,
738 false)) {
739
740 goto buddy_is_free;
741 }
742
743 buf_buddy_add_to_free(buf_pool, zip_buf, i);
744 }
745
746 break;
747 case BUF_BUDDY_STATE_PARTIALLY_USED:
748 /* Some sub-blocks in the buddy are still in use.
749 Relocation will fail. No need to try. */
750 break;
751 }
752
753func_exit:
754 /* Free the block to the buddy list. */
755 buf_buddy_add_to_free(buf_pool,
756 reinterpret_cast<buf_buddy_free_t*>(buf),
757 i);
758}
759
760/** Reallocate a block.
761@param[in] buf_pool buffer pool instance
762@param[in] buf block to be reallocated, must be pointed
763to by the buffer pool
764@param[in] size block size, up to srv_page_size
765@retval false if failed because of no free blocks. */
766bool
767buf_buddy_realloc(
768 buf_pool_t* buf_pool,
769 void* buf,
770 ulint size)
771{
772 buf_block_t* block = NULL;
773 ulint i = buf_buddy_get_slot(size);
774
775 ut_ad(buf_pool_mutex_own(buf_pool));
776 ut_ad(!mutex_own(&buf_pool->zip_mutex));
777 ut_ad(i <= BUF_BUDDY_SIZES);
778 ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
779
780 if (i < BUF_BUDDY_SIZES) {
781 /* Try to allocate from the buddy system. */
782 block = reinterpret_cast<buf_block_t*>(
783 buf_buddy_alloc_zip(buf_pool, i));
784 }
785
786 if (block == NULL) {
787 /* Try allocating from the buf_pool->free list. */
788 block = buf_LRU_get_free_only(buf_pool);
789
790 if (block == NULL) {
791 return(false); /* free_list was not enough */
792 }
793
794 buf_buddy_block_register(block);
795
796 block = reinterpret_cast<buf_block_t*>(
797 buf_buddy_alloc_from(
798 buf_pool, block->frame, i, BUF_BUDDY_SIZES));
799 }
800
801 buf_pool->buddy_stat[i].used++;
802
803 /* Try to relocate the buddy of buf to the free block. */
804 if (buf_buddy_relocate(buf_pool, buf, block, i, true)) {
805 /* succeeded */
806 buf_buddy_free_low(buf_pool, buf, i);
807 } else {
808 /* failed */
809 buf_buddy_free_low(buf_pool, block, i);
810 }
811
812 return(true); /* free_list was enough */
813}
814
815/** Combine all pairs of free buddies.
816@param[in] buf_pool buffer pool instance */
817void
818buf_buddy_condense_free(
819 buf_pool_t* buf_pool)
820{
821 ut_ad(buf_pool_mutex_own(buf_pool));
822 ut_ad(buf_pool->curr_size < buf_pool->old_size);
823
824 for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
825 buf_buddy_free_t* buf =
826 UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
827
828 /* seek to withdraw target */
829 while (buf != NULL
830 && !buf_frame_will_withdrawn(
831 buf_pool, reinterpret_cast<byte*>(buf))) {
832 buf = UT_LIST_GET_NEXT(list, buf);
833 }
834
835 while (buf != NULL) {
836 buf_buddy_free_t* next =
837 UT_LIST_GET_NEXT(list, buf);
838
839 buf_buddy_free_t* buddy =
840 reinterpret_cast<buf_buddy_free_t*>(
841 buf_buddy_get(
842 reinterpret_cast<byte*>(buf),
843 BUF_BUDDY_LOW << i));
844
845 /* seek to the next withdraw target */
846 while (true) {
847 while (next != NULL
848 && !buf_frame_will_withdrawn(
849 buf_pool,
850 reinterpret_cast<byte*>(next))) {
851 next = UT_LIST_GET_NEXT(list, next);
852 }
853
854 if (buddy != next) {
855 break;
856 }
857
858 next = UT_LIST_GET_NEXT(list, next);
859 }
860
861 if (buf_buddy_is_free(buddy, i)
862 == BUF_BUDDY_STATE_FREE) {
863 /* Both buf and buddy are free.
864 Try to combine them. */
865 buf_buddy_remove_from_free(buf_pool, buf, i);
866 buf_pool->buddy_stat[i].used++;
867
868 buf_buddy_free_low(buf_pool, buf, i);
869 }
870
871 buf = next;
872 }
873 }
874}
875