1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file buf/buf0buddy.cc |
22 | Binary buddy allocator for compressed pages |
23 | |
24 | Created December 2006 by Marko Makela |
25 | *******************************************************/ |
26 | |
27 | #include "buf0buddy.h" |
28 | #include "buf0buf.h" |
29 | #include "buf0lru.h" |
30 | #include "buf0flu.h" |
31 | #include "page0zip.h" |
32 | #include "srv0start.h" |
33 | |
34 | /** When freeing a buf we attempt to coalesce by looking at its buddy |
35 | and deciding whether it is free or not. To ascertain if the buddy is |
36 | free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET |
37 | within the buddy. The question is how we can be sure that it is |
38 | safe to look at BUF_BUDDY_STAMP_OFFSET. |
39 | The answer lies in following invariants: |
40 | * All blocks allocated by buddy allocator are used for compressed |
41 | page frame. |
42 | * A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID |
43 | * BUF_BUDDY_STAMP_OFFSET always points to the space_id field in |
44 | a frame. |
45 | -- The above is true because we look at these fields when the |
46 | corresponding buddy block is free which implies that: |
47 | * The block we are looking at must have an address aligned at |
48 | the same size that its free buddy has. For example, if we have |
49 | a free block of 8K then its buddy's address must be aligned at |
50 | 8K as well. |
51 | * It is possible that the block we are looking at may have been |
52 | further divided into smaller sized blocks but its starting |
53 | address must still remain the start of a page frame i.e.: it |
54 | cannot be middle of a block. For example, if we have a free |
55 | block of size 8K then its buddy may be divided into blocks |
56 | of, say, 1K, 1K, 2K, 4K but the buddy's address will still be |
57 | the starting address of first 1K compressed page. |
58 | * What is important to note is that for any given block, the |
59 | buddy's address cannot be in the middle of a larger block i.e.: |
60 | in above example, our 8K block cannot have a buddy whose address |
61 | is aligned on 8K but it is part of a larger 16K block. |
62 | */ |
63 | |
64 | /** Offset within buf_buddy_free_t where free or non_free stamps |
65 | are written.*/ |
66 | #define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID |
67 | |
68 | /** Value that we stamp on all buffers that are currently on the zip_free |
69 | list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */ |
70 | #define BUF_BUDDY_STAMP_FREE SRV_LOG_SPACE_FIRST_ID |
71 | |
72 | /** Stamp value for non-free buffers. Will be overwritten by a non-zero |
73 | value by the consumer of the block */ |
74 | #define BUF_BUDDY_STAMP_NONFREE 0XFFFFFFFFUL |
75 | |
76 | /** Return type of buf_buddy_is_free() */ |
77 | enum buf_buddy_state_t { |
78 | BUF_BUDDY_STATE_FREE, /*!< If the buddy to completely free */ |
79 | BUF_BUDDY_STATE_USED, /*!< Buddy currently in used */ |
80 | BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy |
81 | are in use */ |
82 | }; |
83 | |
84 | #ifdef UNIV_DEBUG_VALGRIND |
85 | /**********************************************************************//** |
86 | Invalidate memory area that we won't access while page is free */ |
87 | UNIV_INLINE |
88 | void |
89 | buf_buddy_mem_invalid( |
90 | /*==================*/ |
91 | buf_buddy_free_t* buf, /*!< in: block to check */ |
92 | ulint i) /*!< in: index of zip_free[] */ |
93 | { |
94 | const size_t size = BUF_BUDDY_LOW << i; |
95 | ut_ad(i <= BUF_BUDDY_SIZES); |
96 | |
97 | UNIV_MEM_ASSERT_W(buf, size); |
98 | UNIV_MEM_INVALID(buf, size); |
99 | } |
100 | #else /* UNIV_DEBUG_VALGRIND */ |
101 | # define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES) |
102 | #endif /* UNIV_DEBUG_VALGRIND */ |
103 | |
104 | /**********************************************************************//** |
105 | Check if a buddy is stamped free. |
106 | @return whether the buddy is free */ |
107 | UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) |
108 | bool |
109 | buf_buddy_stamp_is_free( |
110 | /*====================*/ |
111 | const buf_buddy_free_t* buf) /*!< in: block to check */ |
112 | { |
113 | compile_time_assert(BUF_BUDDY_STAMP_FREE < BUF_BUDDY_STAMP_NONFREE); |
114 | return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET) |
115 | == BUF_BUDDY_STAMP_FREE); |
116 | } |
117 | |
118 | /**********************************************************************//** |
119 | Stamps a buddy free. */ |
120 | UNIV_INLINE |
121 | void |
122 | buf_buddy_stamp_free( |
123 | /*=================*/ |
124 | buf_buddy_free_t* buf, /*!< in/out: block to stamp */ |
125 | ulint i) /*!< in: block size */ |
126 | { |
127 | ut_d(memset(buf, static_cast<int>(i), BUF_BUDDY_LOW << i)); |
128 | buf_buddy_mem_invalid(buf, i); |
129 | mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, |
130 | BUF_BUDDY_STAMP_FREE); |
131 | buf->stamp.size = i; |
132 | } |
133 | |
134 | /**********************************************************************//** |
135 | Stamps a buddy nonfree. |
136 | @param[in,out] buf block to stamp |
137 | @param[in] i block size */ |
138 | static inline void buf_buddy_stamp_nonfree(buf_buddy_free_t* buf, ulint i) |
139 | { |
140 | buf_buddy_mem_invalid(buf, i); |
141 | compile_time_assert(BUF_BUDDY_STAMP_NONFREE == 0xffffffffU); |
142 | memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4); |
143 | } |
144 | |
145 | /**********************************************************************//** |
146 | Get the offset of the buddy of a compressed page frame. |
147 | @return the buddy relative of page */ |
148 | UNIV_INLINE |
149 | void* |
150 | buf_buddy_get( |
151 | /*==========*/ |
152 | byte* page, /*!< in: compressed page */ |
153 | ulint size) /*!< in: page size in bytes */ |
154 | { |
155 | ut_ad(ut_is_2pow(size)); |
156 | ut_ad(size >= BUF_BUDDY_LOW); |
157 | ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN); |
158 | ut_ad(size < BUF_BUDDY_HIGH); |
159 | ut_ad(BUF_BUDDY_HIGH == srv_page_size); |
160 | ut_ad(!ut_align_offset(page, size)); |
161 | |
162 | if (((ulint) page) & size) { |
163 | return(page - size); |
164 | } else { |
165 | return(page + size); |
166 | } |
167 | } |
168 | |
169 | #ifdef UNIV_DEBUG |
170 | /** Validate a given zip_free list. */ |
171 | struct CheckZipFree { |
172 | CheckZipFree(ulint i) : m_i(i) {} |
173 | |
174 | void operator()(const buf_buddy_free_t* elem) const |
175 | { |
176 | ut_a(buf_buddy_stamp_is_free(elem)); |
177 | ut_a(elem->stamp.size <= m_i); |
178 | } |
179 | |
180 | ulint m_i; |
181 | }; |
182 | |
183 | /** Validate a buddy list. |
184 | @param[in] buf_pool buffer pool instance |
185 | @param[in] i buddy size to validate */ |
186 | static |
187 | void |
188 | buf_buddy_list_validate( |
189 | const buf_pool_t* buf_pool, |
190 | ulint i) |
191 | { |
192 | CheckZipFree check(i); |
193 | ut_list_validate(buf_pool->zip_free[i], check); |
194 | } |
195 | |
196 | /**********************************************************************//** |
197 | Debug function to validate that a buffer is indeed free i.e.: in the |
198 | zip_free[]. |
199 | @return true if free */ |
200 | UNIV_INLINE |
201 | bool |
202 | buf_buddy_check_free( |
203 | /*=================*/ |
204 | buf_pool_t* buf_pool,/*!< in: buffer pool instance */ |
205 | const buf_buddy_free_t* buf, /*!< in: block to check */ |
206 | ulint i) /*!< in: index of buf_pool->zip_free[] */ |
207 | { |
208 | const ulint size = BUF_BUDDY_LOW << i; |
209 | |
210 | ut_ad(buf_pool_mutex_own(buf_pool)); |
211 | ut_ad(!ut_align_offset(buf, size)); |
212 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
213 | |
214 | buf_buddy_free_t* itr; |
215 | |
216 | for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); |
217 | itr && itr != buf; |
218 | itr = UT_LIST_GET_NEXT(list, itr)) { |
219 | } |
220 | |
221 | return(itr == buf); |
222 | } |
223 | #endif /* UNIV_DEBUG */ |
224 | |
225 | /**********************************************************************//** |
226 | Checks if a buf is free i.e.: in the zip_free[]. |
227 | @retval BUF_BUDDY_STATE_FREE if fully free |
228 | @retval BUF_BUDDY_STATE_USED if currently in use |
229 | @retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */ |
230 | static MY_ATTRIBUTE((warn_unused_result)) |
231 | buf_buddy_state_t |
232 | buf_buddy_is_free( |
233 | /*==============*/ |
234 | buf_buddy_free_t* buf, /*!< in: block to check */ |
235 | ulint i) /*!< in: index of |
236 | buf_pool->zip_free[] */ |
237 | { |
238 | #ifdef UNIV_DEBUG |
239 | const ulint size = BUF_BUDDY_LOW << i; |
240 | ut_ad(!ut_align_offset(buf, size)); |
241 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
242 | #endif /* UNIV_DEBUG */ |
243 | |
244 | /* We assume that all memory from buf_buddy_alloc() |
245 | is used for compressed page frames. */ |
246 | |
247 | /* We look inside the allocated objects returned by |
248 | buf_buddy_alloc() and assume that each block is a compressed |
249 | page that contains one of the following in space_id. |
250 | * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or |
251 | * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but |
252 | not initialized yet or |
253 | * A valid space_id of a compressed tablespace |
254 | |
255 | The call below attempts to read from free memory. The memory |
256 | is "owned" by the buddy allocator (and it has been allocated |
257 | from the buffer pool), so there is nothing wrong about this. */ |
258 | if (!buf_buddy_stamp_is_free(buf)) { |
259 | return(BUF_BUDDY_STATE_USED); |
260 | } |
261 | |
262 | /* A block may be free but a fragment of it may still be in use. |
263 | To guard against that we write the free block size in terms of |
264 | zip_free index at start of stamped block. Note that we can |
265 | safely rely on this value only if the buf is free. */ |
266 | ut_ad(buf->stamp.size <= i); |
267 | return(buf->stamp.size == i |
268 | ? BUF_BUDDY_STATE_FREE |
269 | : BUF_BUDDY_STATE_PARTIALLY_USED); |
270 | } |
271 | |
272 | /**********************************************************************//** |
273 | Add a block to the head of the appropriate buddy free list. */ |
274 | UNIV_INLINE |
275 | void |
276 | buf_buddy_add_to_free( |
277 | /*==================*/ |
278 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
279 | buf_buddy_free_t* buf, /*!< in,own: block to be freed */ |
280 | ulint i) /*!< in: index of |
281 | buf_pool->zip_free[] */ |
282 | { |
283 | ut_ad(buf_pool_mutex_own(buf_pool)); |
284 | ut_ad(buf_pool->zip_free[i].start != buf); |
285 | |
286 | buf_buddy_stamp_free(buf, i); |
287 | UT_LIST_ADD_FIRST(buf_pool->zip_free[i], buf); |
288 | ut_d(buf_buddy_list_validate(buf_pool, i)); |
289 | } |
290 | |
291 | /**********************************************************************//** |
292 | Remove a block from the appropriate buddy free list. */ |
293 | UNIV_INLINE |
294 | void |
295 | buf_buddy_remove_from_free( |
296 | /*=======================*/ |
297 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
298 | buf_buddy_free_t* buf, /*!< in,own: block to be |
299 | freed */ |
300 | ulint i) /*!< in: index of |
301 | buf_pool->zip_free[] */ |
302 | { |
303 | ut_ad(buf_pool_mutex_own(buf_pool)); |
304 | ut_ad(buf_buddy_check_free(buf_pool, buf, i)); |
305 | |
306 | UT_LIST_REMOVE(buf_pool->zip_free[i], buf); |
307 | buf_buddy_stamp_nonfree(buf, i); |
308 | } |
309 | |
310 | /**********************************************************************//** |
311 | Try to allocate a block from buf_pool->zip_free[]. |
312 | @return allocated block, or NULL if buf_pool->zip_free[] was empty */ |
313 | static |
314 | buf_buddy_free_t* |
315 | buf_buddy_alloc_zip( |
316 | /*================*/ |
317 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
318 | ulint i) /*!< in: index of buf_pool->zip_free[] */ |
319 | { |
320 | buf_buddy_free_t* buf; |
321 | |
322 | ut_ad(buf_pool_mutex_own(buf_pool)); |
323 | ut_a(i < BUF_BUDDY_SIZES); |
324 | ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
325 | |
326 | ut_d(buf_buddy_list_validate(buf_pool, i)); |
327 | |
328 | buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); |
329 | |
330 | if (buf_pool->curr_size < buf_pool->old_size |
331 | && UT_LIST_GET_LEN(buf_pool->withdraw) |
332 | < buf_pool->withdraw_target) { |
333 | |
334 | while (buf != NULL |
335 | && buf_frame_will_withdrawn( |
336 | buf_pool, reinterpret_cast<byte*>(buf))) { |
337 | /* This should be withdrawn, not to be allocated */ |
338 | buf = UT_LIST_GET_NEXT(list, buf); |
339 | } |
340 | } |
341 | |
342 | if (buf) { |
343 | buf_buddy_remove_from_free(buf_pool, buf, i); |
344 | } else if (i + 1 < BUF_BUDDY_SIZES) { |
345 | /* Attempt to split. */ |
346 | buf = buf_buddy_alloc_zip(buf_pool, i + 1); |
347 | |
348 | if (buf) { |
349 | buf_buddy_free_t* buddy = |
350 | reinterpret_cast<buf_buddy_free_t*>( |
351 | buf->stamp.bytes |
352 | + (BUF_BUDDY_LOW << i)); |
353 | |
354 | ut_ad(!buf_pool_contains_zip(buf_pool, buddy)); |
355 | buf_buddy_add_to_free(buf_pool, buddy, i); |
356 | } |
357 | } |
358 | |
359 | if (buf) { |
360 | /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */ |
361 | UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET); |
362 | UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4 |
363 | + buf->stamp.bytes, ~i, |
364 | (BUF_BUDDY_LOW << i) |
365 | - (BUF_BUDDY_STAMP_OFFSET + 4)); |
366 | ut_ad(mach_read_from_4(buf->stamp.bytes |
367 | + BUF_BUDDY_STAMP_OFFSET) |
368 | == BUF_BUDDY_STAMP_NONFREE); |
369 | } |
370 | |
371 | return(buf); |
372 | } |
373 | |
374 | /**********************************************************************//** |
375 | Deallocate a buffer frame of srv_page_size. */ |
376 | static |
377 | void |
378 | buf_buddy_block_free( |
379 | /*=================*/ |
380 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
381 | void* buf) /*!< in: buffer frame to deallocate */ |
382 | { |
383 | const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf); |
384 | buf_page_t* bpage; |
385 | buf_block_t* block; |
386 | |
387 | ut_ad(buf_pool_mutex_own(buf_pool)); |
388 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
389 | ut_a(!ut_align_offset(buf, srv_page_size)); |
390 | |
391 | HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage, |
392 | ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY |
393 | && bpage->in_zip_hash && !bpage->in_page_hash), |
394 | ((buf_block_t*) bpage)->frame == buf); |
395 | ut_a(bpage); |
396 | ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY); |
397 | ut_ad(!bpage->in_page_hash); |
398 | ut_ad(bpage->in_zip_hash); |
399 | ut_d(bpage->in_zip_hash = FALSE); |
400 | HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage); |
401 | |
402 | ut_d(memset(buf, 0, srv_page_size)); |
403 | UNIV_MEM_INVALID(buf, srv_page_size); |
404 | |
405 | block = (buf_block_t*) bpage; |
406 | buf_page_mutex_enter(block); |
407 | buf_LRU_block_free_non_file_page(block); |
408 | buf_page_mutex_exit(block); |
409 | |
410 | ut_ad(buf_pool->buddy_n_frames > 0); |
411 | ut_d(buf_pool->buddy_n_frames--); |
412 | } |
413 | |
414 | /**********************************************************************//** |
415 | Allocate a buffer block to the buddy allocator. */ |
416 | static |
417 | void |
418 | buf_buddy_block_register( |
419 | /*=====================*/ |
420 | buf_block_t* block) /*!< in: buffer frame to allocate */ |
421 | { |
422 | buf_pool_t* buf_pool = buf_pool_from_block(block); |
423 | const ulint fold = BUF_POOL_ZIP_FOLD(block); |
424 | ut_ad(buf_pool_mutex_own(buf_pool)); |
425 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
426 | ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE); |
427 | |
428 | buf_block_set_state(block, BUF_BLOCK_MEMORY); |
429 | |
430 | ut_a(block->frame); |
431 | ut_a(!ut_align_offset(block->frame, srv_page_size)); |
432 | |
433 | ut_ad(!block->page.in_page_hash); |
434 | ut_ad(!block->page.in_zip_hash); |
435 | ut_d(block->page.in_zip_hash = TRUE); |
436 | HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page); |
437 | |
438 | ut_d(buf_pool->buddy_n_frames++); |
439 | } |
440 | |
441 | /**********************************************************************//** |
442 | Allocate a block from a bigger object. |
443 | @return allocated block */ |
444 | static |
445 | void* |
446 | buf_buddy_alloc_from( |
447 | /*=================*/ |
448 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
449 | void* buf, /*!< in: a block that is free to use */ |
450 | ulint i, /*!< in: index of |
451 | buf_pool->zip_free[] */ |
452 | ulint j) /*!< in: size of buf as an index |
453 | of buf_pool->zip_free[] */ |
454 | { |
455 | ulint offs = BUF_BUDDY_LOW << j; |
456 | ut_ad(j <= BUF_BUDDY_SIZES); |
457 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
458 | ut_ad(j >= i); |
459 | ut_ad(!ut_align_offset(buf, offs)); |
460 | |
461 | /* Add the unused parts of the block to the free lists. */ |
462 | while (j > i) { |
463 | buf_buddy_free_t* zip_buf; |
464 | |
465 | offs >>= 1; |
466 | j--; |
467 | |
468 | zip_buf = reinterpret_cast<buf_buddy_free_t*>( |
469 | reinterpret_cast<byte*>(buf) + offs); |
470 | buf_buddy_add_to_free(buf_pool, zip_buf, j); |
471 | } |
472 | |
473 | buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i); |
474 | return(buf); |
475 | } |
476 | |
477 | /**********************************************************************//** |
478 | Allocate a block. The thread calling this function must hold |
479 | buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex. |
480 | The buf_pool_mutex may be released and reacquired. |
481 | @return allocated block, never NULL */ |
482 | void* |
483 | buf_buddy_alloc_low( |
484 | /*================*/ |
485 | buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ |
486 | ulint i, /*!< in: index of buf_pool->zip_free[], |
487 | or BUF_BUDDY_SIZES */ |
488 | bool* lru) /*!< in: pointer to a variable that |
489 | will be assigned true if storage was |
490 | allocated from the LRU list and |
491 | buf_pool->mutex was temporarily |
492 | released */ |
493 | { |
494 | buf_block_t* block; |
495 | |
496 | ut_ad(lru); |
497 | ut_ad(buf_pool_mutex_own(buf_pool)); |
498 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
499 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
500 | |
501 | if (i < BUF_BUDDY_SIZES) { |
502 | /* Try to allocate from the buddy system. */ |
503 | block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i); |
504 | |
505 | if (block) { |
506 | goto func_exit; |
507 | } |
508 | } |
509 | |
510 | /* Try allocating from the buf_pool->free list. */ |
511 | block = buf_LRU_get_free_only(buf_pool); |
512 | |
513 | if (block) { |
514 | |
515 | goto alloc_big; |
516 | } |
517 | |
518 | /* Try replacing an uncompressed page in the buffer pool. */ |
519 | buf_pool_mutex_exit(buf_pool); |
520 | block = buf_LRU_get_free_block(buf_pool); |
521 | *lru = true; |
522 | buf_pool_mutex_enter(buf_pool); |
523 | |
524 | alloc_big: |
525 | buf_buddy_block_register(block); |
526 | |
527 | block = (buf_block_t*) buf_buddy_alloc_from( |
528 | buf_pool, block->frame, i, BUF_BUDDY_SIZES); |
529 | |
530 | func_exit: |
531 | buf_pool->buddy_stat[i].used++; |
532 | return(block); |
533 | } |
534 | |
535 | /**********************************************************************//** |
536 | Try to relocate a block. |
537 | @return true if relocated */ |
538 | static |
539 | bool |
540 | buf_buddy_relocate( |
541 | /*===============*/ |
542 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
543 | void* src, /*!< in: block to relocate */ |
544 | void* dst, /*!< in: free block to relocate to */ |
545 | ulint i, /*!< in: index of |
546 | buf_pool->zip_free[] */ |
547 | bool force) /*!< in: true if we must relocate |
548 | always */ |
549 | { |
550 | buf_page_t* bpage; |
551 | const ulint size = BUF_BUDDY_LOW << i; |
552 | ulint space; |
553 | ulint offset; |
554 | |
555 | ut_ad(buf_pool_mutex_own(buf_pool)); |
556 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
557 | ut_ad(!ut_align_offset(src, size)); |
558 | ut_ad(!ut_align_offset(dst, size)); |
559 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
560 | UNIV_MEM_ASSERT_W(dst, size); |
561 | |
562 | space = mach_read_from_4((const byte*) src |
563 | + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); |
564 | offset = mach_read_from_4((const byte*) src |
565 | + FIL_PAGE_OFFSET); |
566 | |
567 | /* Suppress Valgrind warnings about conditional jump |
568 | on uninitialized value. */ |
569 | UNIV_MEM_VALID(&space, sizeof space); |
570 | UNIV_MEM_VALID(&offset, sizeof offset); |
571 | |
572 | ut_ad(space != BUF_BUDDY_STAMP_FREE); |
573 | |
574 | const page_id_t page_id(space, offset); |
575 | |
576 | /* If space,offset is bogus, then we know that the |
577 | buf_page_hash_get_low() call below will return NULL. */ |
578 | if (!force && buf_pool != buf_pool_get(page_id)) { |
579 | return(false); |
580 | } |
581 | |
582 | rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, page_id); |
583 | |
584 | rw_lock_x_lock(hash_lock); |
585 | |
586 | bpage = buf_page_hash_get_low(buf_pool, page_id); |
587 | |
588 | if (!bpage || bpage->zip.data != src) { |
589 | /* The block has probably been freshly |
590 | allocated by buf_LRU_get_free_block() but not |
591 | added to buf_pool->page_hash yet. Obviously, |
592 | it cannot be relocated. */ |
593 | |
594 | rw_lock_x_unlock(hash_lock); |
595 | |
596 | if (!force || space != 0 || offset != 0) { |
597 | return(false); |
598 | } |
599 | |
600 | /* It might be just uninitialized page. |
601 | We should search from LRU list also. */ |
602 | |
603 | bpage = UT_LIST_GET_FIRST(buf_pool->LRU); |
604 | while (bpage != NULL) { |
605 | if (bpage->zip.data == src) { |
606 | hash_lock = buf_page_hash_lock_get( |
607 | buf_pool, bpage->id); |
608 | rw_lock_x_lock(hash_lock); |
609 | break; |
610 | } |
611 | bpage = UT_LIST_GET_NEXT(LRU, bpage); |
612 | } |
613 | |
614 | if (bpage == NULL) { |
615 | return(false); |
616 | } |
617 | } |
618 | |
619 | if (page_zip_get_size(&bpage->zip) != size) { |
620 | /* The block is of different size. We would |
621 | have to relocate all blocks covered by src. |
622 | For the sake of simplicity, give up. */ |
623 | ut_ad(page_zip_get_size(&bpage->zip) < size); |
624 | |
625 | rw_lock_x_unlock(hash_lock); |
626 | |
627 | return(false); |
628 | } |
629 | |
630 | /* The block must have been allocated, but it may |
631 | contain uninitialized data. */ |
632 | UNIV_MEM_ASSERT_W(src, size); |
633 | |
634 | BPageMutex* block_mutex = buf_page_get_mutex(bpage); |
635 | |
636 | mutex_enter(block_mutex); |
637 | |
638 | if (buf_page_can_relocate(bpage)) { |
639 | /* Relocate the compressed page. */ |
640 | uintmax_t usec = ut_time_us(NULL); |
641 | |
642 | ut_a(bpage->zip.data == src); |
643 | |
644 | memcpy(dst, src, size); |
645 | bpage->zip.data = reinterpret_cast<page_zip_t*>(dst); |
646 | |
647 | rw_lock_x_unlock(hash_lock); |
648 | |
649 | mutex_exit(block_mutex); |
650 | |
651 | buf_buddy_mem_invalid( |
652 | reinterpret_cast<buf_buddy_free_t*>(src), i); |
653 | |
654 | buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; |
655 | buddy_stat->relocated++; |
656 | buddy_stat->relocated_usec += ut_time_us(NULL) - usec; |
657 | return(true); |
658 | } |
659 | |
660 | rw_lock_x_unlock(hash_lock); |
661 | |
662 | mutex_exit(block_mutex); |
663 | return(false); |
664 | } |
665 | |
666 | /**********************************************************************//** |
667 | Deallocate a block. */ |
668 | void |
669 | buf_buddy_free_low( |
670 | /*===============*/ |
671 | buf_pool_t* buf_pool, /*!< in: buffer pool instance */ |
672 | void* buf, /*!< in: block to be freed, must not be |
673 | pointed to by the buffer pool */ |
674 | ulint i) /*!< in: index of buf_pool->zip_free[], |
675 | or BUF_BUDDY_SIZES */ |
676 | { |
677 | buf_buddy_free_t* buddy; |
678 | |
679 | ut_ad(buf_pool_mutex_own(buf_pool)); |
680 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
681 | ut_ad(i <= BUF_BUDDY_SIZES); |
682 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
683 | ut_ad(buf_pool->buddy_stat[i].used > 0); |
684 | |
685 | buf_pool->buddy_stat[i].used--; |
686 | recombine: |
687 | UNIV_MEM_ALLOC(buf, BUF_BUDDY_LOW << i); |
688 | |
689 | if (i == BUF_BUDDY_SIZES) { |
690 | buf_buddy_block_free(buf_pool, buf); |
691 | return; |
692 | } |
693 | |
694 | ut_ad(i < BUF_BUDDY_SIZES); |
695 | ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); |
696 | ut_ad(!buf_pool_contains_zip(buf_pool, buf)); |
697 | |
698 | /* Do not recombine blocks if there are few free blocks. |
699 | We may waste up to 15360*max_len bytes to free blocks |
700 | (1024 + 2048 + 4096 + 8192 = 15360) */ |
701 | if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16 |
702 | && buf_pool->curr_size >= buf_pool->old_size) { |
703 | goto func_exit; |
704 | } |
705 | |
706 | /* Try to combine adjacent blocks. */ |
707 | buddy = reinterpret_cast<buf_buddy_free_t*>( |
708 | buf_buddy_get(reinterpret_cast<byte*>(buf), |
709 | BUF_BUDDY_LOW << i)); |
710 | |
711 | switch (buf_buddy_is_free(buddy, i)) { |
712 | case BUF_BUDDY_STATE_FREE: |
713 | /* The buddy is free: recombine */ |
714 | buf_buddy_remove_from_free(buf_pool, buddy, i); |
715 | buddy_is_free: |
716 | ut_ad(!buf_pool_contains_zip(buf_pool, buddy)); |
717 | i++; |
718 | buf = ut_align_down(buf, BUF_BUDDY_LOW << i); |
719 | |
720 | goto recombine; |
721 | |
722 | case BUF_BUDDY_STATE_USED: |
723 | ut_d(buf_buddy_list_validate(buf_pool, i)); |
724 | |
725 | /* The buddy is not free. Is there a free block of |
726 | this size? */ |
727 | if (buf_buddy_free_t* zip_buf = |
728 | UT_LIST_GET_FIRST(buf_pool->zip_free[i])) { |
729 | |
730 | /* Remove the block from the free list, because |
731 | a successful buf_buddy_relocate() will overwrite |
732 | zip_free->list. */ |
733 | buf_buddy_remove_from_free(buf_pool, zip_buf, i); |
734 | |
735 | /* Try to relocate the buddy of buf to the free |
736 | block. */ |
737 | if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i, |
738 | false)) { |
739 | |
740 | goto buddy_is_free; |
741 | } |
742 | |
743 | buf_buddy_add_to_free(buf_pool, zip_buf, i); |
744 | } |
745 | |
746 | break; |
747 | case BUF_BUDDY_STATE_PARTIALLY_USED: |
748 | /* Some sub-blocks in the buddy are still in use. |
749 | Relocation will fail. No need to try. */ |
750 | break; |
751 | } |
752 | |
753 | func_exit: |
754 | /* Free the block to the buddy list. */ |
755 | buf_buddy_add_to_free(buf_pool, |
756 | reinterpret_cast<buf_buddy_free_t*>(buf), |
757 | i); |
758 | } |
759 | |
760 | /** Reallocate a block. |
761 | @param[in] buf_pool buffer pool instance |
762 | @param[in] buf block to be reallocated, must be pointed |
763 | to by the buffer pool |
764 | @param[in] size block size, up to srv_page_size |
765 | @retval false if failed because of no free blocks. */ |
766 | bool |
767 | buf_buddy_realloc( |
768 | buf_pool_t* buf_pool, |
769 | void* buf, |
770 | ulint size) |
771 | { |
772 | buf_block_t* block = NULL; |
773 | ulint i = buf_buddy_get_slot(size); |
774 | |
775 | ut_ad(buf_pool_mutex_own(buf_pool)); |
776 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); |
777 | ut_ad(i <= BUF_BUDDY_SIZES); |
778 | ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); |
779 | |
780 | if (i < BUF_BUDDY_SIZES) { |
781 | /* Try to allocate from the buddy system. */ |
782 | block = reinterpret_cast<buf_block_t*>( |
783 | buf_buddy_alloc_zip(buf_pool, i)); |
784 | } |
785 | |
786 | if (block == NULL) { |
787 | /* Try allocating from the buf_pool->free list. */ |
788 | block = buf_LRU_get_free_only(buf_pool); |
789 | |
790 | if (block == NULL) { |
791 | return(false); /* free_list was not enough */ |
792 | } |
793 | |
794 | buf_buddy_block_register(block); |
795 | |
796 | block = reinterpret_cast<buf_block_t*>( |
797 | buf_buddy_alloc_from( |
798 | buf_pool, block->frame, i, BUF_BUDDY_SIZES)); |
799 | } |
800 | |
801 | buf_pool->buddy_stat[i].used++; |
802 | |
803 | /* Try to relocate the buddy of buf to the free block. */ |
804 | if (buf_buddy_relocate(buf_pool, buf, block, i, true)) { |
805 | /* succeeded */ |
806 | buf_buddy_free_low(buf_pool, buf, i); |
807 | } else { |
808 | /* failed */ |
809 | buf_buddy_free_low(buf_pool, block, i); |
810 | } |
811 | |
812 | return(true); /* free_list was enough */ |
813 | } |
814 | |
815 | /** Combine all pairs of free buddies. |
816 | @param[in] buf_pool buffer pool instance */ |
817 | void |
818 | buf_buddy_condense_free( |
819 | buf_pool_t* buf_pool) |
820 | { |
821 | ut_ad(buf_pool_mutex_own(buf_pool)); |
822 | ut_ad(buf_pool->curr_size < buf_pool->old_size); |
823 | |
824 | for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) { |
825 | buf_buddy_free_t* buf = |
826 | UT_LIST_GET_FIRST(buf_pool->zip_free[i]); |
827 | |
828 | /* seek to withdraw target */ |
829 | while (buf != NULL |
830 | && !buf_frame_will_withdrawn( |
831 | buf_pool, reinterpret_cast<byte*>(buf))) { |
832 | buf = UT_LIST_GET_NEXT(list, buf); |
833 | } |
834 | |
835 | while (buf != NULL) { |
836 | buf_buddy_free_t* next = |
837 | UT_LIST_GET_NEXT(list, buf); |
838 | |
839 | buf_buddy_free_t* buddy = |
840 | reinterpret_cast<buf_buddy_free_t*>( |
841 | buf_buddy_get( |
842 | reinterpret_cast<byte*>(buf), |
843 | BUF_BUDDY_LOW << i)); |
844 | |
845 | /* seek to the next withdraw target */ |
846 | while (true) { |
847 | while (next != NULL |
848 | && !buf_frame_will_withdrawn( |
849 | buf_pool, |
850 | reinterpret_cast<byte*>(next))) { |
851 | next = UT_LIST_GET_NEXT(list, next); |
852 | } |
853 | |
854 | if (buddy != next) { |
855 | break; |
856 | } |
857 | |
858 | next = UT_LIST_GET_NEXT(list, next); |
859 | } |
860 | |
861 | if (buf_buddy_is_free(buddy, i) |
862 | == BUF_BUDDY_STATE_FREE) { |
863 | /* Both buf and buddy are free. |
864 | Try to combine them. */ |
865 | buf_buddy_remove_from_free(buf_pool, buf, i); |
866 | buf_pool->buddy_stat[i].used++; |
867 | |
868 | buf_buddy_free_low(buf_pool, buf, i); |
869 | } |
870 | |
871 | buf = next; |
872 | } |
873 | } |
874 | } |
875 | |