| 1 | /* Copyright (C) 2007 Michael Widenius |
| 2 | Copyright (c) 2010, 2013, Monty Program Ab. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 16 | |
| 17 | /* |
| 18 | Bitmap handling (for records in block) |
| 19 | |
| 20 | The data file starts with a bitmap page, followed by as many data |
| 21 | pages as the bitmap can cover. After this there is a new bitmap page |
| 22 | and more data pages etc. |
| 23 | |
| 24 | The bitmap code assumes there is always an active bitmap page and thus |
| 25 | that there is at least one bitmap page in the file |
| 26 | |
| 27 | Structure of bitmap page: |
| 28 | |
| 29 | Fixed size records (to be implemented later): |
| 30 | |
| 31 | 2 bits are used to indicate: |
| 32 | |
| 33 | 0 Empty |
| 34 | 1 0-75 % full (at least room for 2 records) |
| 35 | 2 75-100 % full (at least room for one record) |
| 36 | 3 100 % full (no more room for records) |
| 37 | |
| 38 | Assuming 8K pages, this will allow us to map: |
| 39 | 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M |
| 40 | |
| 41 | (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN) |
| 42 | |
| 43 | Note that for fixed size rows, we can't add more columns without doing |
| 44 | a full reorganization of the table. The user can always force a dynamic |
| 45 | size row format by specifying ROW_FORMAT=dynamic. |
| 46 | |
| 47 | |
| 48 | Dynamic size records: |
| 49 | |
| 50 | 3 bits are used to indicate Bytes free in 8K page |
| 51 | |
| 52 | 0 Empty page 8176 (head or tail) |
| 53 | 1 0-30 % full (at least room for 3 records) 5724 |
| 54 | 2 30-60 % full (at least room for 2 records) 3271 |
| 55 | 3 60-90 % full (at least room for one record) 818 |
| 56 | 4 100 % full (no more room for records) 0 |
| 57 | 5 Tail page, 0-40 % full 4906 |
| 58 | 6 Tail page, 40-80 % full 1636 |
| 59 | 7 Full tail page or full blob page 0 |
| 60 | |
| 61 | Assuming 8K pages, this will allow us to map: |
| 62 | 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M |
| 63 | |
| 64 | Note that values 1-3 may be adjust for each individual table based on |
| 65 | 'min record length'. Tail pages are for overflow data which can be of |
| 66 | any size and thus doesn't have to be adjusted for different tables. |
| 67 | If we add more columns to the table, some of the originally calculated |
| 68 | 'cut off' points may not be optimal, but they shouldn't be 'drasticly |
| 69 | wrong'. |
| 70 | |
| 71 | When allocating data from the bitmap, we are trying to do it in a |
| 72 | 'best fit' manner. Blobs and varchar blocks are given out in large |
| 73 | continuous extents to allow fast access to these. Before allowing a |
| 74 | row to 'flow over' to other blocks, we will compact the page and use |
| 75 | all space on it. If there is many rows in the page, we will ensure |
| 76 | there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other |
| 77 | rows to grow. |
| 78 | |
| 79 | The bitmap format allows us to extend the row file in big chunks, if needed. |
| 80 | |
| 81 | When calculating the size for a packed row, we will calculate the following |
| 82 | things separately: |
| 83 | - Row header + null_bits + empty_bits fixed size segments etc. |
| 84 | - Size of all char/varchar fields |
| 85 | - Size of each blob field |
| 86 | |
| 87 | The bitmap handler will get all the above information and return |
| 88 | either one page or a set of pages to put the different parts. |
| 89 | |
| 90 | Bitmaps are read on demand in response to insert/delete/update operations. |
| 91 | The following bitmap pointers will be cached and stored on disk on close: |
| 92 | - Current insert_bitmap; When inserting new data we will first try to |
| 93 | fill this one. |
| 94 | - First bitmap which is not completely full. This is updated when we |
| 95 | free data with an update or delete. |
| 96 | |
| 97 | While flushing out bitmaps, we will cache the status of the bitmap in memory |
| 98 | to avoid having to read a bitmap for insert of new data that will not |
| 99 | be of any use |
| 100 | - Total empty space |
| 101 | - Largest number of continuous pages |
| 102 | |
| 103 | Bitmap ONLY goes to disk in the following scenarios |
| 104 | - The file is closed (and we flush all changes to disk) |
| 105 | - On checkpoint |
| 106 | (Ie: When we do a checkpoint, we have to ensure that all bitmaps are |
| 107 | put on disk even if they are not in the page cache). |
| 108 | - When explicitly requested (for example on backup or after recovery, |
| 109 | to simplify things) |
| 110 | |
| 111 | The flow of writing a row is that: |
| 112 | - Mark the bitmap not flushable (_ma_bitmap_flushable(X, 1)) |
| 113 | - Lock the bitmap |
| 114 | - Decide which data pages we will write to |
| 115 | - Mark them full in the bitmap page so that other threads do not try to |
| 116 | use the same data pages as us |
| 117 | - We unlock the bitmap |
| 118 | - Write the data pages |
| 119 | - Lock the bitmap |
| 120 | - Correct the bitmap page with the true final occupation of the data |
| 121 | pages (that is, we marked pages full but when we are done we realize |
| 122 | we didn't fill them) |
| 123 | - Unlock the bitmap. |
| 124 | - Mark the bitmap flushable (_ma_bitmap_flushable(X, -1)) |
| 125 | */ |
| 126 | |
| 127 | #include "maria_def.h" |
| 128 | #include "ma_blockrec.h" |
| 129 | |
| 130 | #define FULL_HEAD_PAGE 4 |
| 131 | #define FULL_TAIL_PAGE 7 |
| 132 | |
| 133 | const char *bits_to_txt[]= |
| 134 | { |
| 135 | "empty" , "00-30% full" , "30-60% full" , "60-90% full" , "full" , |
| 136 | "tail 00-40 % full" , "tail 40-80 % full" , "tail/blob full" |
| 137 | }; |
| 138 | |
| 139 | #define WRONG_BITMAP_FLUSH 0 /*define to 1 only for provoking bugs*/ |
| 140 | |
| 141 | static my_bool _ma_read_bitmap_page(MARIA_HA *info, |
| 142 | MARIA_FILE_BITMAP *bitmap, |
| 143 | pgcache_page_no_t page); |
| 144 | static my_bool _ma_bitmap_create_missing(MARIA_HA *info, |
| 145 | MARIA_FILE_BITMAP *bitmap, |
| 146 | pgcache_page_no_t page); |
| 147 | static void _ma_bitmap_unpin_all(MARIA_SHARE *share); |
| 148 | #ifndef DBUG_OFF |
| 149 | static void _ma_check_bitmap(MARIA_FILE_BITMAP *bitmap); |
| 150 | #else |
| 151 | #define _ma_check_bitmap(A) do { } while(0) |
| 152 | #endif |
| 153 | |
| 154 | |
| 155 | /* Write bitmap page to key cache */ |
| 156 | |
| 157 | static inline my_bool write_changed_bitmap(MARIA_SHARE *share, |
| 158 | MARIA_FILE_BITMAP *bitmap) |
| 159 | { |
| 160 | my_bool res; |
| 161 | DBUG_ENTER("write_changed_bitmap" ); |
| 162 | DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); |
| 163 | DBUG_ASSERT(bitmap->file.pre_write_hook != 0); |
| 164 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
| 165 | |
| 166 | /* |
| 167 | Mark that a bitmap page has been written to page cache and we have |
| 168 | to flush it during checkpoint. |
| 169 | */ |
| 170 | bitmap->changed_not_flushed= 1; |
| 171 | |
| 172 | if ((bitmap->non_flushable == 0) || WRONG_BITMAP_FLUSH) |
| 173 | { |
| 174 | res= pagecache_write(share->pagecache, |
| 175 | &bitmap->file, bitmap->page, 0, |
| 176 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
| 177 | PAGECACHE_LOCK_LEFT_UNLOCKED, |
| 178 | PAGECACHE_PIN_LEFT_UNPINNED, |
| 179 | PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE); |
| 180 | DBUG_ASSERT(!res); |
| 181 | DBUG_RETURN(res); |
| 182 | } |
| 183 | else |
| 184 | { |
| 185 | /* |
| 186 | bitmap->non_flushable means that someone has changed the bitmap, |
| 187 | but it's not yet complete so it can't yet be written to disk. |
| 188 | In this case we write the changed bitmap to the disk cache, |
| 189 | but keep it pinned until the change is completed. The page will |
| 190 | be unpinned later by _ma_bitmap_unpin_all() as soon as non_flushable |
| 191 | is set back to 0. |
| 192 | */ |
| 193 | MARIA_PINNED_PAGE page_link; |
| 194 | DBUG_PRINT("info" , ("Writing pinned bitmap page" )); |
| 195 | res= pagecache_write(share->pagecache, |
| 196 | &bitmap->file, bitmap->page, 0, |
| 197 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
| 198 | PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN, |
| 199 | PAGECACHE_WRITE_DELAY, &page_link.link, |
| 200 | LSN_IMPOSSIBLE); |
| 201 | page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED; |
| 202 | page_link.changed= 1; |
| 203 | push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link); |
| 204 | DBUG_ASSERT(!res); |
| 205 | DBUG_RETURN(res); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | /* |
| 210 | Initialize bitmap variables in share |
| 211 | |
| 212 | SYNOPSIS |
| 213 | _ma_bitmap_init() |
| 214 | share Share handler |
| 215 | file Data file handler |
| 216 | last_page Pointer to last page (max_file_size) that needs to be |
| 217 | mapped by the bitmap. This is adjusted to bitmap |
| 218 | alignment. |
| 219 | |
| 220 | NOTES |
| 221 | This is called the first time a file is opened. |
| 222 | |
| 223 | RETURN |
| 224 | 0 ok |
| 225 | 1 error |
| 226 | */ |
| 227 | |
| 228 | my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, |
| 229 | pgcache_page_no_t *last_page) |
| 230 | { |
| 231 | uint aligned_bit_blocks; |
| 232 | uint max_page_size; |
| 233 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 234 | uint size= share->block_size; |
| 235 | pgcache_page_no_t first_bitmap_with_space; |
| 236 | #ifndef DBUG_OFF |
| 237 | /* We want to have a copy of the bitmap to be able to print differences */ |
| 238 | size*= 2; |
| 239 | #endif |
| 240 | |
| 241 | if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) || |
| 242 | my_init_dynamic_array(&bitmap->pinned_pages, |
| 243 | sizeof(MARIA_PINNED_PAGE), 1, 1, MYF(0))) |
| 244 | return 1; |
| 245 | |
| 246 | bitmap->share= share; |
| 247 | bitmap->block_size= share->block_size; |
| 248 | bitmap->file.file= file; |
| 249 | _ma_bitmap_set_pagecache_callbacks(&bitmap->file, share); |
| 250 | |
| 251 | /* Size needs to be aligned on 6 */ |
| 252 | aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6; |
| 253 | bitmap->max_total_size= bitmap->total_size= aligned_bit_blocks * 6; |
| 254 | /* |
| 255 | In each 6 bytes, we have 6*8/3 = 16 pages covered |
| 256 | The +1 is to add the bitmap page, as this doesn't have to be covered |
| 257 | */ |
| 258 | bitmap->pages_covered= aligned_bit_blocks * 16 + 1; |
| 259 | bitmap->flush_all_requested= bitmap->waiting_for_flush_all_requested= |
| 260 | bitmap->waiting_for_non_flushable= 0; |
| 261 | bitmap->non_flushable= 0; |
| 262 | |
| 263 | /* Update size for bits */ |
| 264 | /* TODO; Make this dependent of the row size */ |
| 265 | max_page_size= share->block_size - PAGE_OVERHEAD_SIZE(share) + DIR_ENTRY_SIZE; |
| 266 | bitmap->sizes[0]= max_page_size; /* Empty page */ |
| 267 | bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; |
| 268 | bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; |
| 269 | bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100; |
| 270 | bitmap->sizes[4]= 0; /* Full page */ |
| 271 | bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100; |
| 272 | bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100; |
| 273 | bitmap->sizes[7]= 0; |
| 274 | |
| 275 | /* |
| 276 | If a record size will fit into the smallest empty page, return first |
| 277 | found page in find_head() |
| 278 | */ |
| 279 | if (bitmap->sizes[3] >= share->base.max_pack_length) |
| 280 | bitmap->return_first_match= 1; |
| 281 | |
| 282 | mysql_mutex_init(key_SHARE_BITMAP_lock, |
| 283 | &share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW); |
| 284 | mysql_cond_init(key_SHARE_BITMAP_cond, |
| 285 | &share->bitmap.bitmap_cond, 0); |
| 286 | |
| 287 | first_bitmap_with_space= share->state.first_bitmap_with_space; |
| 288 | _ma_bitmap_reset_cache(share); |
| 289 | |
| 290 | /* |
| 291 | The bitmap used to map the file are aligned on 6 bytes. We now |
| 292 | calculate the max file size that can be used by the bitmap. This |
| 293 | is needed to get ma_info() give a true file size so that the user can |
| 294 | estimate if there is still space free for records in the file. |
| 295 | */ |
| 296 | { |
| 297 | pgcache_page_no_t last_bitmap_page; |
| 298 | ulong blocks, bytes; |
| 299 | |
| 300 | last_bitmap_page= *last_page - *last_page % bitmap->pages_covered; |
| 301 | blocks= (ulong) (*last_page - last_bitmap_page); |
| 302 | bytes= (blocks * 3) / 8; /* 3 bit per page / 8 bits per byte */ |
| 303 | /* Size needs to be aligned on 6 */ |
| 304 | bytes/= 6; |
| 305 | bytes*= 6; |
| 306 | bitmap->last_bitmap_page= last_bitmap_page; |
| 307 | bitmap->last_total_size= (uint)bytes; |
| 308 | *last_page= ((last_bitmap_page + bytes*8/3)); |
| 309 | } |
| 310 | |
| 311 | /* Restore first_bitmap_with_space if it's resonable */ |
| 312 | if (first_bitmap_with_space <= (share->state.state.data_file_length / |
| 313 | share->block_size)) |
| 314 | share->state.first_bitmap_with_space= first_bitmap_with_space; |
| 315 | |
| 316 | return 0; |
| 317 | } |
| 318 | |
| 319 | |
| 320 | /* |
| 321 | Free data allocated by _ma_bitmap_init |
| 322 | |
| 323 | SYNOPSIS |
| 324 | _ma_bitmap_end() |
| 325 | share Share handler |
| 326 | */ |
| 327 | |
| 328 | my_bool _ma_bitmap_end(MARIA_SHARE *share) |
| 329 | { |
| 330 | my_bool res; |
| 331 | |
| 332 | #ifndef DBUG_OFF |
| 333 | if (! share->internal_table) |
| 334 | mysql_mutex_assert_owner(&share->close_lock); |
| 335 | #endif |
| 336 | DBUG_ASSERT(share->bitmap.non_flushable == 0); |
| 337 | DBUG_ASSERT(share->bitmap.flush_all_requested == 0); |
| 338 | DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 && |
| 339 | share->bitmap.waiting_for_flush_all_requested == 0); |
| 340 | DBUG_ASSERT(share->bitmap.pinned_pages.elements == 0); |
| 341 | |
| 342 | res= _ma_bitmap_flush(share); |
| 343 | mysql_mutex_destroy(&share->bitmap.bitmap_lock); |
| 344 | mysql_cond_destroy(&share->bitmap.bitmap_cond); |
| 345 | delete_dynamic(&share->bitmap.pinned_pages); |
| 346 | my_free(share->bitmap.map); |
| 347 | share->bitmap.map= 0; |
| 348 | /* |
| 349 | This is to not get an assert in checkpoint. The bitmap will be flushed |
| 350 | at once by _ma_once_end_block_record() as part of the normal flush |
| 351 | of the kfile. |
| 352 | */ |
| 353 | share->bitmap.changed_not_flushed= 0; |
| 354 | return res; |
| 355 | } |
| 356 | |
| 357 | /* |
| 358 | Ensure that we have incremented open count before we try to read/write |
| 359 | a page while we have the bitmap lock. |
| 360 | This is needed to ensure that we don't call _ma_mark_file_changed() as |
| 361 | part of flushing a page to disk, as this locks share->internal_lock |
| 362 | and then mutex lock would happen in the wrong order. |
| 363 | */ |
| 364 | |
| 365 | static inline void _ma_bitmap_mark_file_changed(MARIA_SHARE *share, |
| 366 | my_bool flush_translog) |
| 367 | { |
| 368 | /* |
| 369 | It's extremely unlikely that the following test is true as it |
| 370 | only happens once if the table has changed. |
| 371 | */ |
| 372 | if (unlikely(!share->global_changed && |
| 373 | (share->state.changed & STATE_CHANGED))) |
| 374 | { |
| 375 | /* purecov: begin inspected */ |
| 376 | /* unlock mutex as it can't be hold during _ma_mark_file_changed() */ |
| 377 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
| 378 | |
| 379 | /* |
| 380 | We have to flush the translog to ensure we have registered that the |
| 381 | table is open. |
| 382 | */ |
| 383 | if (flush_translog && share->now_transactional) |
| 384 | (void) translog_flush(share->state.logrec_file_id); |
| 385 | |
| 386 | _ma_mark_file_changed_now(share); |
| 387 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
| 388 | /* purecov: end */ |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | /* |
| 393 | Send updated bitmap to the page cache |
| 394 | |
| 395 | SYNOPSIS |
| 396 | _ma_bitmap_flush() |
| 397 | share Share handler |
| 398 | |
| 399 | NOTES |
| 400 | In the future, _ma_bitmap_flush() will be called to flush changes don't |
| 401 | by this thread (ie, checking the changed flag is ok). The reason we |
| 402 | check it again in the mutex is that if someone else did a flush at the |
| 403 | same time, we don't have to do the write. |
| 404 | This is also ok for _ma_scan_init_block_record() which does not want to |
| 405 | miss rows: it cares only for committed rows, that is, rows for which there |
| 406 | was a commit before our transaction started; as commit and transaction's |
| 407 | start are protected by the same LOCK_trn_list mutex, we see memory at |
| 408 | least as new as at other transaction's commit time, so if the committed |
| 409 | rows caused bitmap->changed to be true, we see it; if we see 0 it really |
| 410 | means a flush happened since then. So, it's ok to read without bitmap's |
| 411 | mutex. |
| 412 | |
| 413 | RETURN |
| 414 | 0 ok |
| 415 | 1 error |
| 416 | */ |
| 417 | |
| 418 | my_bool _ma_bitmap_flush(MARIA_SHARE *share) |
| 419 | { |
| 420 | my_bool res= 0; |
| 421 | DBUG_ENTER("_ma_bitmap_flush" ); |
| 422 | if (share->bitmap.changed) |
| 423 | { |
| 424 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
| 425 | if (share->bitmap.changed) |
| 426 | { |
| 427 | /* |
| 428 | We have to mark the file changed here, as otherwise the following |
| 429 | write to pagecache may force a page out from this file, which would |
| 430 | cause _ma_mark_file_changed() to be called with bitmaplock hold! |
| 431 | */ |
| 432 | _ma_bitmap_mark_file_changed(share, 1); |
| 433 | res= write_changed_bitmap(share, &share->bitmap); |
| 434 | share->bitmap.changed= 0; |
| 435 | } |
| 436 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
| 437 | } |
| 438 | DBUG_RETURN(res); |
| 439 | } |
| 440 | |
| 441 | |
| 442 | /** |
| 443 | Dirty-page filtering criteria for bitmap pages |
| 444 | |
| 445 | @param type Page's type |
| 446 | @param pageno Page's number |
| 447 | @param rec_lsn Page's rec_lsn |
| 448 | @param arg pages_covered of bitmap |
| 449 | */ |
| 450 | |
| 451 | static enum pagecache_flush_filter_result |
| 452 | filter_flush_bitmap_pages(enum pagecache_page_type type |
| 453 | __attribute__ ((unused)), |
| 454 | pgcache_page_no_t pageno, |
| 455 | LSN rec_lsn __attribute__ ((unused)), |
| 456 | void *arg) |
| 457 | { |
| 458 | return ((pageno % (*(ulong*)arg)) == 0); |
| 459 | } |
| 460 | |
| 461 | |
| 462 | /** |
| 463 | Flushes current bitmap page to the pagecache, and then all bitmap pages |
| 464 | from pagecache to the file. Used by Checkpoint. |
| 465 | |
| 466 | @param share Table's share |
| 467 | */ |
| 468 | |
| 469 | my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) |
| 470 | { |
| 471 | my_bool res= 0; |
| 472 | uint send_signal= 0; |
| 473 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 474 | DBUG_ENTER("_ma_bitmap_flush_all" ); |
| 475 | |
| 476 | #ifdef EXTRA_DEBUG_BITMAP |
| 477 | { |
| 478 | char buff[160]; |
| 479 | uint len= my_sprintf(buff, |
| 480 | (buff, "bitmap_flush: fd: %d id: %u " |
| 481 | "changed: %d changed_not_flushed: %d " |
| 482 | "flush_all_requested: %d" , |
| 483 | share->bitmap.file.file, |
| 484 | share->id, |
| 485 | bitmap->changed, |
| 486 | bitmap->changed_not_flushed, |
| 487 | bitmap->flush_all_requested)); |
| 488 | (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, |
| 489 | (uchar*) buff, len); |
| 490 | } |
| 491 | #endif |
| 492 | |
| 493 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 494 | if (!bitmap->changed && !bitmap->changed_not_flushed) |
| 495 | { |
| 496 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 497 | DBUG_RETURN(0); |
| 498 | } |
| 499 | |
| 500 | _ma_bitmap_mark_file_changed(share, 0); |
| 501 | |
| 502 | /* |
| 503 | The following should be true as it was tested above. We have to test |
| 504 | this again as _ma_bitmap_mark_file_changed() did temporarly release |
| 505 | the bitmap mutex. |
| 506 | */ |
| 507 | if (bitmap->changed || bitmap->changed_not_flushed) |
| 508 | { |
| 509 | bitmap->flush_all_requested++; |
| 510 | bitmap->waiting_for_non_flushable++; |
| 511 | #if !WRONG_BITMAP_FLUSH |
| 512 | while (bitmap->non_flushable > 0) |
| 513 | { |
| 514 | DBUG_PRINT("info" , ("waiting for bitmap to be flushable" )); |
| 515 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
| 516 | } |
| 517 | #endif |
| 518 | bitmap->waiting_for_non_flushable--; |
| 519 | #ifdef EXTRA_DEBUG_BITMAP |
| 520 | { |
| 521 | char tmp[MAX_BITMAP_INFO_LENGTH]; |
| 522 | _ma_get_bitmap_description(bitmap, bitmap->map, bitmap->page, tmp); |
| 523 | (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, |
| 524 | (uchar*) tmp, strlen(tmp)); |
| 525 | } |
| 526 | #endif |
| 527 | |
| 528 | DBUG_ASSERT(bitmap->flush_all_requested == 1); |
| 529 | /* |
| 530 | Bitmap is in a flushable state: its contents in memory are reflected by |
| 531 | log records (complete REDO-UNDO groups) and all bitmap pages are |
| 532 | unpinned. We keep the mutex to preserve this situation, and flush to the |
| 533 | file. |
| 534 | */ |
| 535 | if (bitmap->changed) |
| 536 | { |
| 537 | bitmap->changed= FALSE; |
| 538 | res= write_changed_bitmap(share, bitmap); |
| 539 | } |
| 540 | /* |
| 541 | We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap |
| 542 | pages have been flushed. That's a condition of correctness of |
| 543 | Recovery: data pages may have been all flushed, if we write the |
| 544 | checkpoint record Recovery will start from after their REDOs. If |
| 545 | bitmap page was not flushed, as the REDOs about it will be skipped, it |
| 546 | will wrongly not be recovered. If bitmap pages had a rec_lsn it would |
| 547 | be different. |
| 548 | There should be no pinned pages as bitmap->non_flushable==0. |
| 549 | */ |
| 550 | if (flush_pagecache_blocks_with_filter(share->pagecache, |
| 551 | &bitmap->file, FLUSH_KEEP, |
| 552 | filter_flush_bitmap_pages, |
| 553 | &bitmap->pages_covered) & |
| 554 | PCFLUSH_PINNED_AND_ERROR) |
| 555 | res= TRUE; |
| 556 | bitmap->changed_not_flushed= FALSE; |
| 557 | bitmap->flush_all_requested--; |
| 558 | /* |
| 559 | Some well-behaved threads may be waiting for flush_all_requested to |
| 560 | become false, wake them up. |
| 561 | */ |
| 562 | DBUG_PRINT("info" , ("bitmap flusher waking up others" )); |
| 563 | send_signal= (bitmap->waiting_for_flush_all_requested | |
| 564 | bitmap->waiting_for_non_flushable); |
| 565 | } |
| 566 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 567 | if (send_signal) |
| 568 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
| 569 | DBUG_RETURN(res); |
| 570 | } |
| 571 | |
| 572 | |
| 573 | /** |
| 574 | @brief Lock bitmap from being used by another thread |
| 575 | |
| 576 | @fn _ma_bitmap_lock() |
| 577 | @param share Table's share |
| 578 | |
| 579 | @notes |
| 580 | This is a temporary solution for allowing someone to delete an inserted |
| 581 | duplicate-key row while someone else is doing concurrent inserts. |
| 582 | This is ok for now as duplicate key errors are not that common. |
| 583 | |
| 584 | In the future we will add locks for row-pages to ensure two threads doesn't |
| 585 | work at the same time on the same page. |
| 586 | */ |
| 587 | |
| 588 | void _ma_bitmap_lock(MARIA_SHARE *share) |
| 589 | { |
| 590 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 591 | DBUG_ENTER("_ma_bitmap_lock" ); |
| 592 | |
| 593 | if (!share->now_transactional) |
| 594 | DBUG_VOID_RETURN; |
| 595 | |
| 596 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 597 | bitmap->flush_all_requested++; |
| 598 | bitmap->waiting_for_non_flushable++; |
| 599 | while (bitmap->non_flushable) |
| 600 | { |
| 601 | DBUG_PRINT("info" , ("waiting for bitmap to be flushable" )); |
| 602 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
| 603 | } |
| 604 | bitmap->waiting_for_non_flushable--; |
| 605 | /* |
| 606 | Ensure that _ma_bitmap_flush_all() and _ma_bitmap_lock() are blocked. |
| 607 | ma_bitmap_flushable() is blocked thanks to 'flush_all_requested'. |
| 608 | */ |
| 609 | bitmap->non_flushable= 1; |
| 610 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 611 | DBUG_VOID_RETURN; |
| 612 | } |
| 613 | |
| 614 | /** |
| 615 | @brief Unlock bitmap after _ma_bitmap_lock() |
| 616 | |
| 617 | @fn _ma_bitmap_unlock() |
| 618 | @param share Table's share |
| 619 | */ |
| 620 | |
| 621 | void _ma_bitmap_unlock(MARIA_SHARE *share) |
| 622 | { |
| 623 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 624 | uint send_signal; |
| 625 | DBUG_ENTER("_ma_bitmap_unlock" ); |
| 626 | |
| 627 | if (!share->now_transactional) |
| 628 | DBUG_VOID_RETURN; |
| 629 | DBUG_ASSERT(bitmap->flush_all_requested > 0 && bitmap->non_flushable == 1); |
| 630 | |
| 631 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 632 | bitmap->non_flushable= 0; |
| 633 | _ma_bitmap_unpin_all(share); |
| 634 | send_signal= bitmap->waiting_for_non_flushable; |
| 635 | if (!--bitmap->flush_all_requested) |
| 636 | send_signal|= bitmap->waiting_for_flush_all_requested; |
| 637 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 638 | if (send_signal) |
| 639 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
| 640 | DBUG_VOID_RETURN; |
| 641 | } |
| 642 | |
| 643 | |
| 644 | /** |
| 645 | @brief Unpin all pinned bitmap pages |
| 646 | |
| 647 | @param share Table's share |
| 648 | |
| 649 | @return Operation status |
| 650 | @retval 0 ok |
| 651 | |
| 652 | @note This unpins pages pinned by other threads. |
| 653 | */ |
| 654 | |
| 655 | static void _ma_bitmap_unpin_all(MARIA_SHARE *share) |
| 656 | { |
| 657 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 658 | MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*) |
| 659 | dynamic_array_ptr(&bitmap->pinned_pages, 0)); |
| 660 | MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements; |
| 661 | DBUG_ENTER("_ma_bitmap_unpin_all" ); |
| 662 | DBUG_PRINT("info" , ("pinned: %u" , bitmap->pinned_pages.elements)); |
| 663 | while (pinned_page-- != page_link) |
| 664 | pagecache_unlock_by_link(share->pagecache, pinned_page->link, |
| 665 | pinned_page->unlock, PAGECACHE_UNPIN, |
| 666 | LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, TRUE); |
| 667 | bitmap->pinned_pages.elements= 0; |
| 668 | DBUG_VOID_RETURN; |
| 669 | } |
| 670 | |
| 671 | |
| 672 | /* |
| 673 | Intialize bitmap in memory to a zero bitmap |
| 674 | |
| 675 | SYNOPSIS |
| 676 | _ma_bitmap_delete_all() |
| 677 | share Share handler |
| 678 | |
| 679 | NOTES |
| 680 | This is called on maria_delete_all_rows (truncate data file). |
| 681 | */ |
| 682 | |
| 683 | void _ma_bitmap_delete_all(MARIA_SHARE *share) |
| 684 | { |
| 685 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 686 | DBUG_ENTER("_ma_bitmap_delete_all" ); |
| 687 | if (bitmap->map) /* Not in create */ |
| 688 | { |
| 689 | bzero(bitmap->map, bitmap->block_size); |
| 690 | bitmap->changed= 1; |
| 691 | bitmap->page= 0; |
| 692 | bitmap->used_size= bitmap->full_tail_size= bitmap->full_head_size= 0; |
| 693 | bitmap->total_size= bitmap->max_total_size; |
| 694 | } |
| 695 | DBUG_VOID_RETURN; |
| 696 | } |
| 697 | |
| 698 | |
| 699 | /** |
| 700 | @brief Reset bitmap caches |
| 701 | |
| 702 | @fn _ma_bitmap_reset_cache() |
| 703 | @param share Maria share |
| 704 | |
| 705 | @notes |
| 706 | This is called after we have swapped file descriptors and we want |
| 707 | bitmap to forget all cached information. |
| 708 | It's also called directly after we have opened a file. |
| 709 | */ |
| 710 | |
| 711 | void _ma_bitmap_reset_cache(MARIA_SHARE *share) |
| 712 | { |
| 713 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 714 | |
| 715 | if (bitmap->map) /* If using bitmap */ |
| 716 | { |
| 717 | /* Forget changes in current bitmap page */ |
| 718 | bitmap->changed= 0; |
| 719 | |
| 720 | /* |
| 721 | We can't read a page yet, as in some case we don't have an active |
| 722 | page cache yet. |
| 723 | Pretend we have a dummy, full and not changed bitmap page in memory. |
| 724 | |
| 725 | We set bitmap->page to a value so that if we use it in |
| 726 | move_to_next_bitmap() it will point to page 0. |
| 727 | (This can only happen if writing to a bitmap page fails) |
| 728 | */ |
| 729 | bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered; |
| 730 | bitmap->used_size= bitmap->total_size= bitmap->max_total_size; |
| 731 | bitmap->full_head_size= bitmap->full_tail_size= bitmap->max_total_size; |
| 732 | bfill(bitmap->map, share->block_size, 255); |
| 733 | #ifndef DBUG_OFF |
| 734 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
| 735 | #endif |
| 736 | |
| 737 | /* Start scanning for free space from start of file */ |
| 738 | share->state.first_bitmap_with_space = 0; |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | |
| 743 | /* |
| 744 | Return bitmap pattern for the smallest head block that can hold 'size' |
| 745 | |
| 746 | SYNOPSIS |
| 747 | size_to_head_pattern() |
| 748 | bitmap Bitmap |
| 749 | size Requested size |
| 750 | |
| 751 | RETURN |
| 752 | 0-3 For a description of the bitmap sizes, see the header |
| 753 | */ |
| 754 | |
| 755 | static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
| 756 | { |
| 757 | if (size <= bitmap->sizes[3]) |
| 758 | return 3; |
| 759 | if (size <= bitmap->sizes[2]) |
| 760 | return 2; |
| 761 | if (size <= bitmap->sizes[1]) |
| 762 | return 1; |
| 763 | DBUG_ASSERT(size <= bitmap->sizes[0]); |
| 764 | return 0; |
| 765 | } |
| 766 | |
| 767 | |
| 768 | /* |
| 769 | Return bitmap pattern for head block where there is size bytes free |
| 770 | |
| 771 | SYNOPSIS |
| 772 | _ma_free_size_to_head_pattern() |
| 773 | bitmap Bitmap |
| 774 | size Requested size |
| 775 | |
| 776 | RETURN |
| 777 | 0-4 (Possible bitmap patterns for head block) |
| 778 | */ |
| 779 | |
| 780 | uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
| 781 | { |
| 782 | if (size < bitmap->sizes[3]) |
| 783 | return 4; |
| 784 | if (size < bitmap->sizes[2]) |
| 785 | return 3; |
| 786 | if (size < bitmap->sizes[1]) |
| 787 | return 2; |
| 788 | return (size < bitmap->sizes[0]) ? 1 : 0; |
| 789 | } |
| 790 | |
| 791 | |
| 792 | /* |
| 793 | Return bitmap pattern for the smallest tail block that can hold 'size' |
| 794 | |
| 795 | SYNOPSIS |
| 796 | size_to_tail_pattern() |
| 797 | bitmap Bitmap |
| 798 | size Requested size |
| 799 | |
| 800 | RETURN |
| 801 | 0, 5 or 6 For a description of the bitmap sizes, see the header |
| 802 | */ |
| 803 | |
| 804 | static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
| 805 | { |
| 806 | if (size <= bitmap->sizes[6]) |
| 807 | return 6; |
| 808 | if (size <= bitmap->sizes[5]) |
| 809 | return 5; |
| 810 | DBUG_ASSERT(size <= bitmap->sizes[0]); |
| 811 | return 0; |
| 812 | } |
| 813 | |
| 814 | |
| 815 | /* |
| 816 | Return bitmap pattern for tail block where there is size bytes free |
| 817 | |
| 818 | SYNOPSIS |
| 819 | free_size_to_tail_pattern() |
| 820 | bitmap Bitmap |
| 821 | size Requested size |
| 822 | |
| 823 | RETURN |
| 824 | 0, 5, 6, 7 For a description of the bitmap sizes, see the header |
| 825 | */ |
| 826 | |
| 827 | static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
| 828 | { |
| 829 | if (size >= bitmap->sizes[0]) |
| 830 | return 0; /* Revert to empty page */ |
| 831 | if (size < bitmap->sizes[6]) |
| 832 | return 7; |
| 833 | if (size < bitmap->sizes[5]) |
| 834 | return 6; |
| 835 | return 5; |
| 836 | } |
| 837 | |
| 838 | |
| 839 | /* |
| 840 | Return size guranteed to be available on a page |
| 841 | |
| 842 | SYNOPSIS |
| 843 | pattern_to_head_size() |
| 844 | bitmap Bitmap |
| 845 | pattern Pattern (0-7) |
| 846 | |
| 847 | RETURN |
| 848 | 0 - block_size |
| 849 | */ |
| 850 | |
| 851 | static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) |
| 852 | { |
| 853 | DBUG_ASSERT(pattern <= 7); |
| 854 | return bitmap->sizes[pattern]; |
| 855 | } |
| 856 | |
| 857 | |
| 858 | /* |
| 859 | Print bitmap for debugging |
| 860 | |
| 861 | SYNOPSIS |
| 862 | _ma_print_bitmap_changes() |
| 863 | bitmap Bitmap to print |
| 864 | |
| 865 | IMPLEMENTATION |
| 866 | Prints all changed bits since last call to _ma_print_bitmap(). |
| 867 | This is done by having a copy of the last bitmap in |
| 868 | bitmap->map+bitmap->block_size. |
| 869 | */ |
| 870 | |
| 871 | #ifndef DBUG_OFF |
| 872 | |
| 873 | static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap) |
| 874 | { |
| 875 | uchar *pos, *end, *org_pos; |
| 876 | ulong page; |
| 877 | DBUG_ENTER("_ma_print_bitmap_changes" ); |
| 878 | |
| 879 | end= bitmap->map + bitmap->used_size; |
| 880 | DBUG_LOCK_FILE; |
| 881 | fprintf(DBUG_FILE,"\nBitmap page changes at page: %lu bitmap: %p\n" , |
| 882 | (ulong) bitmap->page, bitmap->map); |
| 883 | |
| 884 | page= (ulong) bitmap->page+1; |
| 885 | for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ; |
| 886 | pos < end ; |
| 887 | pos+= 6, org_pos+= 6) |
| 888 | { |
| 889 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
| 890 | ulonglong org_bits= uint6korr(org_pos); |
| 891 | uint i; |
| 892 | |
| 893 | /* |
| 894 | Test if there is any changes in the next 16 bitmaps (to not have to |
| 895 | loop through all bits if we know they are the same) |
| 896 | */ |
| 897 | if (bits != org_bits) |
| 898 | { |
| 899 | for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3) |
| 900 | { |
| 901 | if ((bits & 7) != (org_bits & 7)) |
| 902 | fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n" , page+i, |
| 903 | bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]); |
| 904 | } |
| 905 | } |
| 906 | page+= 16; |
| 907 | } |
| 908 | fputc('\n', DBUG_FILE); |
| 909 | DBUG_UNLOCK_FILE; |
| 910 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
| 911 | DBUG_VOID_RETURN; |
| 912 | } |
| 913 | |
| 914 | |
| 915 | /* Print content of bitmap for debugging */ |
| 916 | |
| 917 | void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, |
| 918 | pgcache_page_no_t page) |
| 919 | { |
| 920 | uchar *pos, *end; |
| 921 | char llbuff[22]; |
| 922 | |
| 923 | DBUG_LOCK_FILE; |
| 924 | fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n" , llstr(page, llbuff)); |
| 925 | |
| 926 | page++; /* Skip bitmap page */ |
| 927 | for (pos= data, end= pos + bitmap->max_total_size; |
| 928 | pos < end ; |
| 929 | pos+= 6) |
| 930 | { |
| 931 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
| 932 | |
| 933 | /* |
| 934 | Test if there is any changes in the next 16 bitmaps (to not have to |
| 935 | loop through all bits if we know they are the same) |
| 936 | */ |
| 937 | if (bits) |
| 938 | { |
| 939 | uint i; |
| 940 | for (i= 0; i < 16 ; i++, bits>>= 3) |
| 941 | { |
| 942 | if (bits & 7) |
| 943 | fprintf(DBUG_FILE, "Page: %8s %s\n" , llstr(page+i, llbuff), |
| 944 | bits_to_txt[bits & 7]); |
| 945 | } |
| 946 | } |
| 947 | page+= 16; |
| 948 | } |
| 949 | fputc('\n', DBUG_FILE); |
| 950 | DBUG_UNLOCK_FILE; |
| 951 | } |
| 952 | |
| 953 | #endif /* DBUG_OFF */ |
| 954 | |
| 955 | |
| 956 | /* |
| 957 | Return content of bitmap as a printable string |
| 958 | */ |
| 959 | |
| 960 | void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap, |
| 961 | uchar *bitmap_data, |
| 962 | pgcache_page_no_t page, |
| 963 | char *out) |
| 964 | { |
| 965 | uchar *pos, *end; |
| 966 | uint count=0, dot_printed= 0, len; |
| 967 | char buff[80], last[80]; |
| 968 | |
| 969 | page++; |
| 970 | last[0]=0; |
| 971 | for (pos= bitmap_data, end= pos+ bitmap->used_size ; pos < end ; pos+= 6) |
| 972 | { |
| 973 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
| 974 | uint i; |
| 975 | |
| 976 | for (i= 0; i < 16 ; i++, bits>>= 3) |
| 977 | { |
| 978 | if (count > 60) |
| 979 | { |
| 980 | if (memcmp(buff, last, count)) |
| 981 | { |
| 982 | memcpy(last, buff, count); |
| 983 | len= sprintf(out, "%8lu: " , (ulong) page - count); |
| 984 | memcpy(out+len, buff, count); |
| 985 | out+= len + count + 1; |
| 986 | out[-1]= '\n'; |
| 987 | dot_printed= 0; |
| 988 | } |
| 989 | else if (!(dot_printed++)) |
| 990 | { |
| 991 | out= strmov(out, "...\n" ); |
| 992 | } |
| 993 | count= 0; |
| 994 | } |
| 995 | buff[count++]= '0' + (uint) (bits & 7); |
| 996 | page++; |
| 997 | } |
| 998 | } |
| 999 | len= sprintf(out, "%8lu: " , (ulong) page - count); |
| 1000 | memcpy(out+len, buff, count); |
| 1001 | out[len + count]= '\n'; |
| 1002 | out[len + count + 1]= 0; |
| 1003 | } |
| 1004 | |
| 1005 | |
| 1006 | /* |
| 1007 | Adjust bitmap->total_size to not go over max_data_file_size |
| 1008 | */ |
| 1009 | |
| 1010 | static void adjust_total_size(MARIA_HA *info, pgcache_page_no_t page) |
| 1011 | { |
| 1012 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1013 | |
| 1014 | if (page < bitmap->last_bitmap_page) |
| 1015 | bitmap->total_size= bitmap->max_total_size; /* Use all bits in bitmap */ |
| 1016 | else |
| 1017 | bitmap->total_size= bitmap->last_total_size; |
| 1018 | } |
| 1019 | |
| 1020 | /*************************************************************************** |
| 1021 | Reading & writing bitmap pages |
| 1022 | ***************************************************************************/ |
| 1023 | |
| 1024 | /* |
| 1025 | Read a given bitmap page |
| 1026 | |
| 1027 | SYNOPSIS |
| 1028 | _ma_read_bitmap_page() |
| 1029 | info Maria handler |
| 1030 | bitmap Bitmap handler |
| 1031 | page Page to read |
| 1032 | |
| 1033 | NOTE |
| 1034 | We don't always have share->bitmap.bitmap_lock here |
| 1035 | (when called from_ma_check_bitmap_data() for example). |
| 1036 | |
| 1037 | RETURN |
| 1038 | 0 ok |
| 1039 | 1 error (Error writing old bitmap or reading bitmap page) |
| 1040 | */ |
| 1041 | |
| 1042 | static my_bool _ma_read_bitmap_page(MARIA_HA *info, |
| 1043 | MARIA_FILE_BITMAP *bitmap, |
| 1044 | pgcache_page_no_t page) |
| 1045 | { |
| 1046 | MARIA_SHARE *share= info->s; |
| 1047 | my_bool res; |
| 1048 | DBUG_ENTER("_ma_read_bitmap_page" ); |
| 1049 | DBUG_PRINT("enter" , ("page: %lld data_file_length: %lld" , |
| 1050 | (longlong) page, |
| 1051 | (longlong) share->state.state.data_file_length)); |
| 1052 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
| 1053 | DBUG_ASSERT(!bitmap->changed); |
| 1054 | |
| 1055 | bitmap->page= page; |
| 1056 | if ((page + 1) * bitmap->block_size > share->state.state.data_file_length) |
| 1057 | { |
| 1058 | /* Inexistent or half-created page */ |
| 1059 | res= _ma_bitmap_create_missing(info, bitmap, page); |
| 1060 | if (!res) |
| 1061 | adjust_total_size(info, page); |
| 1062 | DBUG_RETURN(res); |
| 1063 | } |
| 1064 | |
| 1065 | adjust_total_size(info, page); |
| 1066 | bitmap->full_head_size= bitmap->full_tail_size= 0; |
| 1067 | DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); |
| 1068 | res= pagecache_read(share->pagecache, |
| 1069 | &bitmap->file, page, 0, |
| 1070 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
| 1071 | PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == NULL; |
| 1072 | |
| 1073 | if (!res) |
| 1074 | { |
| 1075 | /* Calculate used_size */ |
| 1076 | const uchar *data, *end= bitmap->map; |
| 1077 | for (data= bitmap->map + bitmap->total_size; --data >= end && *data == 0; ) |
| 1078 | {} |
| 1079 | bitmap->used_size= (uint) ((data + 1) - end); |
| 1080 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
| 1081 | } |
| 1082 | /* |
| 1083 | We can't check maria_bitmap_marker here as if the bitmap page |
| 1084 | previously had a true checksum and the user switched mode to not checksum |
| 1085 | this may have any value, except maria_normal_page_marker. |
| 1086 | |
| 1087 | Using maria_normal_page_marker gives us a protection against bugs |
| 1088 | when running without any checksums. |
| 1089 | */ |
| 1090 | |
| 1091 | #ifndef DBUG_OFF |
| 1092 | if (!res) |
| 1093 | { |
| 1094 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
| 1095 | _ma_check_bitmap(bitmap); |
| 1096 | } |
| 1097 | #endif |
| 1098 | DBUG_RETURN(res); |
| 1099 | } |
| 1100 | |
| 1101 | |
| 1102 | /* |
| 1103 | Change to another bitmap page |
| 1104 | |
| 1105 | SYNOPSIS |
| 1106 | _ma_change_bitmap_page() |
| 1107 | info Maria handler |
| 1108 | bitmap Bitmap handler |
| 1109 | page Bitmap page to read |
| 1110 | |
| 1111 | NOTES |
| 1112 | If old bitmap was changed, write it out before reading new one |
| 1113 | We return empty bitmap if page is outside of file size |
| 1114 | |
| 1115 | RETURN |
| 1116 | 0 ok |
| 1117 | 1 error (Error writing old bitmap or reading bitmap page) |
| 1118 | */ |
| 1119 | |
| 1120 | static my_bool _ma_change_bitmap_page(MARIA_HA *info, |
| 1121 | MARIA_FILE_BITMAP *bitmap, |
| 1122 | pgcache_page_no_t page) |
| 1123 | { |
| 1124 | DBUG_ENTER("_ma_change_bitmap_page" ); |
| 1125 | |
| 1126 | _ma_check_bitmap(bitmap); |
| 1127 | |
| 1128 | /* |
| 1129 | We have to mark the file changed here, as otherwise the following |
| 1130 | read/write to pagecache may force a page out from this file, which would |
| 1131 | cause _ma_mark_file_changed() to be called with bitmaplock hold! |
| 1132 | */ |
| 1133 | _ma_bitmap_mark_file_changed(info->s, 1); |
| 1134 | |
| 1135 | if (bitmap->changed) |
| 1136 | { |
| 1137 | if (write_changed_bitmap(info->s, bitmap)) |
| 1138 | DBUG_RETURN(1); |
| 1139 | bitmap->changed= 0; |
| 1140 | } |
| 1141 | DBUG_RETURN(_ma_read_bitmap_page(info, bitmap, page)); |
| 1142 | } |
| 1143 | |
| 1144 | |
| 1145 | /* |
| 1146 | Read next suitable bitmap |
| 1147 | |
| 1148 | SYNOPSIS |
| 1149 | move_to_next_bitmap() |
| 1150 | bitmap Bitmap handle |
| 1151 | |
| 1152 | NOTES |
| 1153 | The found bitmap may be full, so calling function may need to call this |
| 1154 | repeatedly until it finds enough space. |
| 1155 | |
| 1156 | TODO |
| 1157 | Add cache of bitmaps to not read something that is not usable |
| 1158 | |
| 1159 | RETURN |
| 1160 | 0 ok |
| 1161 | 1 error (either couldn't save old bitmap or read new one) |
| 1162 | */ |
| 1163 | |
| 1164 | static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) |
| 1165 | { |
| 1166 | pgcache_page_no_t page= bitmap->page; |
| 1167 | MARIA_STATE_INFO *state= &info->s->state; |
| 1168 | DBUG_ENTER("move_to_next_bitmap" ); |
| 1169 | |
| 1170 | if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 && |
| 1171 | state->first_bitmap_with_space != page) |
| 1172 | { |
| 1173 | page= state->first_bitmap_with_space; |
| 1174 | state->first_bitmap_with_space= ~(pgcache_page_no_t) 0; |
| 1175 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
| 1176 | } |
| 1177 | else |
| 1178 | { |
| 1179 | page+= bitmap->pages_covered; |
| 1180 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
| 1181 | } |
| 1182 | DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); |
| 1183 | } |
| 1184 | |
| 1185 | |
| 1186 | /**************************************************************************** |
| 1187 | Allocate data in bitmaps |
| 1188 | ****************************************************************************/ |
| 1189 | |
| 1190 | /* |
| 1191 | Store data in 'block' and mark the place used in the bitmap |
| 1192 | |
| 1193 | SYNOPSIS |
| 1194 | fill_block() |
| 1195 | bitmap Bitmap handle |
| 1196 | block Store data about what we found |
| 1197 | best_data Pointer to best 6 uchar aligned area in bitmap->map |
| 1198 | best_pos Which bit in *best_data the area starts |
| 1199 | 0 = first bit pattern, 1 second bit pattern etc |
| 1200 | best_bits The original value of the bits at best_pos |
| 1201 | fill_pattern Bitmap pattern to store in best_data[best_pos] |
| 1202 | |
| 1203 | NOTES |
| 1204 | We mark all pages to be 'TAIL's, which means that |
| 1205 | block->page_count is really a row position inside the page. |
| 1206 | */ |
| 1207 | |
| 1208 | static void fill_block(MARIA_FILE_BITMAP *bitmap, |
| 1209 | MARIA_BITMAP_BLOCK *block, |
| 1210 | uchar *best_data, uint best_pos, uint best_bits, |
| 1211 | uint fill_pattern) |
| 1212 | { |
| 1213 | uint page, offset, tmp; |
| 1214 | uchar *data; |
| 1215 | DBUG_ENTER("fill_block" ); |
| 1216 | |
| 1217 | /* For each 6 bytes we have 6*8/3= 16 patterns */ |
| 1218 | page= ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos; |
| 1219 | DBUG_ASSERT(page + 1 < bitmap->pages_covered); |
| 1220 | block->page= bitmap->page + 1 + page; |
| 1221 | block->page_count= TAIL_PAGE_COUNT_MARKER; |
| 1222 | block->empty_space= pattern_to_size(bitmap, best_bits); |
| 1223 | block->sub_blocks= 0; |
| 1224 | block->org_bitmap_value= best_bits; |
| 1225 | block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */ |
| 1226 | |
| 1227 | /* |
| 1228 | Mark place used by reading/writing 2 bytes at a time to handle |
| 1229 | bitmaps in overlapping bytes |
| 1230 | */ |
| 1231 | best_pos*= 3; |
| 1232 | data= best_data+ best_pos / 8; |
| 1233 | offset= best_pos & 7; |
| 1234 | tmp= uint2korr(data); |
| 1235 | |
| 1236 | /* we turn off the 3 bits and replace them with fill_pattern */ |
| 1237 | tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); |
| 1238 | int2store(data, tmp); |
| 1239 | bitmap->changed= 1; |
| 1240 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 1241 | DBUG_VOID_RETURN; |
| 1242 | } |
| 1243 | |
| 1244 | |
| 1245 | /* |
| 1246 | Allocate data for head block |
| 1247 | |
| 1248 | SYNOPSIS |
| 1249 | allocate_head() |
| 1250 | bitmap bitmap |
| 1251 | size Size of data region we need to store |
| 1252 | block Store found information here |
| 1253 | |
| 1254 | IMPLEMENTATION |
| 1255 | Find the best-fit page to put a region of 'size' |
| 1256 | This is defined as the first page of the set of pages |
| 1257 | with the smallest free space that can hold 'size'. |
| 1258 | |
| 1259 | NOTES |
| 1260 | Updates bitmap->full_head_size while scanning data |
| 1261 | |
| 1262 | RETURN |
| 1263 | 0 ok (block is updated) |
| 1264 | 1 error (no space in bitmap; block is not touched) |
| 1265 | */ |
| 1266 | |
| 1267 | |
| 1268 | static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, |
| 1269 | MARIA_BITMAP_BLOCK *block) |
| 1270 | { |
| 1271 | uint min_bits= size_to_head_pattern(bitmap, size); |
| 1272 | uchar *data, *end; |
| 1273 | uchar *best_data= 0; |
| 1274 | uint best_bits= (uint) -1, UNINIT_VAR(best_pos); |
| 1275 | my_bool first_pattern= 0; /* if doing insert_order */ |
| 1276 | my_bool first_found= 1; |
| 1277 | MARIA_SHARE *share= bitmap->share; |
| 1278 | my_bool insert_order= |
| 1279 | MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_INSERT_ORDER); |
| 1280 | DBUG_ENTER("allocate_head" ); |
| 1281 | |
| 1282 | DBUG_ASSERT(size <= FULL_PAGE_SIZE(share)); |
| 1283 | |
| 1284 | end= bitmap->map + bitmap->used_size; |
| 1285 | if (insert_order && bitmap->page == share->last_insert_bitmap) |
| 1286 | { |
| 1287 | uint last_insert_page= share->last_insert_page; |
| 1288 | uint byte= 6 * (last_insert_page / 16); |
| 1289 | first_pattern= last_insert_page % 16; |
| 1290 | data= bitmap->map+byte; |
| 1291 | DBUG_ASSERT(data <= end); |
| 1292 | } |
| 1293 | else |
| 1294 | data= bitmap->map + (bitmap->full_head_size/6)*6; |
| 1295 | |
| 1296 | for (; data < end; data+= 6, first_pattern= 0) |
| 1297 | { |
| 1298 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
| 1299 | uint i; |
| 1300 | |
| 1301 | /* |
| 1302 | Skip common patterns |
| 1303 | We can skip empty pages (if we already found a match) or |
| 1304 | anything matching the following pattern as this will be either |
| 1305 | a full page or a tail page |
| 1306 | */ |
| 1307 | if ((!bits && best_data) || |
| 1308 | ((bits & 04444444444444444LL) == 04444444444444444LL)) |
| 1309 | continue; |
| 1310 | |
| 1311 | for (i= first_pattern, bits >>= (3 * first_pattern); i < 16 ; |
| 1312 | i++, bits >>= 3) |
| 1313 | { |
| 1314 | uint pattern= (uint) (bits & 7); |
| 1315 | |
| 1316 | if (pattern <= 3) /* Room for more data */ |
| 1317 | { |
| 1318 | if (first_found) |
| 1319 | { |
| 1320 | first_found= 0; |
| 1321 | bitmap->full_head_size= (uint)(data - bitmap->map); |
| 1322 | } |
| 1323 | } |
| 1324 | if (pattern <= min_bits) |
| 1325 | { |
| 1326 | /* There is enough space here, check if we have found better */ |
| 1327 | if ((int) pattern > (int) best_bits) |
| 1328 | { |
| 1329 | /* |
| 1330 | There is more than enough space here and it's better than what |
| 1331 | we have found so far. Remember it, as we will choose it if we |
| 1332 | don't find anything in this bitmap page. |
| 1333 | */ |
| 1334 | best_bits= pattern; |
| 1335 | best_data= data; |
| 1336 | best_pos= i; |
| 1337 | if (pattern == min_bits || bitmap->return_first_match) |
| 1338 | goto found; /* Best possible match */ |
| 1339 | } |
| 1340 | } |
| 1341 | } |
| 1342 | } |
| 1343 | if (!best_data) /* Found no place */ |
| 1344 | { |
| 1345 | if (data >= bitmap->map + bitmap->total_size) |
| 1346 | DBUG_RETURN(1); /* No space in bitmap */ |
| 1347 | DBUG_ASSERT(uint6korr(data) == 0); |
| 1348 | /* Allocate data at end of bitmap */ |
| 1349 | bitmap->used_size= (uint) (data - bitmap->map) + 6; |
| 1350 | best_data= data; |
| 1351 | best_pos= best_bits= 0; |
| 1352 | } |
| 1353 | else |
| 1354 | { |
| 1355 | /* |
| 1356 | This is not stricly needed as used_size should be alligned on 6, |
| 1357 | but for easier debugging lets try to keep it more accurate |
| 1358 | */ |
| 1359 | uint position= (uint) (best_data - bitmap->map) + 6; |
| 1360 | set_if_bigger(bitmap->used_size, position); |
| 1361 | } |
| 1362 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
| 1363 | |
| 1364 | found: |
| 1365 | if (insert_order) |
| 1366 | { |
| 1367 | share->last_insert_page= |
| 1368 | ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos; |
| 1369 | share->last_insert_bitmap= bitmap->page; |
| 1370 | } |
| 1371 | fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); |
| 1372 | DBUG_RETURN(0); |
| 1373 | } |
| 1374 | |
| 1375 | |
| 1376 | /* |
| 1377 | Allocate data for tail block |
| 1378 | |
| 1379 | SYNOPSIS |
| 1380 | allocate_tail() |
| 1381 | bitmap bitmap |
| 1382 | size Size of block we need to find |
| 1383 | block Store found information here |
| 1384 | |
| 1385 | RETURN |
| 1386 | 0 ok (block is updated) |
| 1387 | 1 error (no space in bitmap; block is not touched) |
| 1388 | */ |
| 1389 | |
| 1390 | |
| 1391 | static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size, |
| 1392 | MARIA_BITMAP_BLOCK *block) |
| 1393 | { |
| 1394 | uint min_bits= size_to_tail_pattern(bitmap, size); |
| 1395 | uchar *data, *end, *best_data= 0; |
| 1396 | my_bool first_found= 1; |
| 1397 | uint best_bits= (uint) -1, UNINIT_VAR(best_pos); |
| 1398 | DBUG_ENTER("allocate_tail" ); |
| 1399 | DBUG_PRINT("enter" , ("size: %u" , size)); |
| 1400 | |
| 1401 | data= bitmap->map + (bitmap->full_tail_size/6)*6; |
| 1402 | end= bitmap->map + bitmap->used_size; |
| 1403 | |
| 1404 | /* |
| 1405 | We have to add DIR_ENTRY_SIZE here as this is not part of the data size |
| 1406 | See call to allocate_tail() in find_tail(). |
| 1407 | */ |
| 1408 | DBUG_ASSERT(size <= MAX_TAIL_SIZE(bitmap->block_size) + DIR_ENTRY_SIZE); |
| 1409 | |
| 1410 | for (; data < end; data += 6) |
| 1411 | { |
| 1412 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
| 1413 | uint i; |
| 1414 | |
| 1415 | /* |
| 1416 | Skip common patterns |
| 1417 | We can skip empty pages (if we already found a match) or |
| 1418 | the following patterns: 1-4 (head pages, not suitable for tail) or |
| 1419 | 7 (full tail page). See 'Dynamic size records' comment at start of file. |
| 1420 | |
| 1421 | At the moment we only skip full head and tail pages (ie, all bits are |
| 1422 | set) as this is easy to detect with one simple test and is a |
| 1423 | quite common case if we have blobs. |
| 1424 | */ |
| 1425 | |
| 1426 | if ((!bits && best_data) || bits == 0xffffffffffffLL || |
| 1427 | bits == 04444444444444444LL) |
| 1428 | continue; |
| 1429 | for (i= 0; i < 16; i++, bits >>= 3) |
| 1430 | { |
| 1431 | uint pattern= (uint) (bits & 7); |
| 1432 | |
| 1433 | if (pattern == 0 || |
| 1434 | (pattern > FULL_HEAD_PAGE && pattern < FULL_TAIL_PAGE)) |
| 1435 | { |
| 1436 | /* There is room for tail data */ |
| 1437 | if (first_found) |
| 1438 | { |
| 1439 | first_found= 0; |
| 1440 | bitmap->full_tail_size= (uint)(data - bitmap->map); |
| 1441 | } |
| 1442 | } |
| 1443 | |
| 1444 | if (pattern <= min_bits && (!pattern || pattern > FULL_HEAD_PAGE)) |
| 1445 | { |
| 1446 | if ((int) pattern > (int) best_bits) |
| 1447 | { |
| 1448 | best_bits= pattern; |
| 1449 | best_data= data; |
| 1450 | best_pos= i; |
| 1451 | if (pattern == min_bits) |
| 1452 | goto found; /* Can't be better */ |
| 1453 | } |
| 1454 | } |
| 1455 | } |
| 1456 | } |
| 1457 | if (!best_data) |
| 1458 | { |
| 1459 | if (data >= bitmap->map + bitmap->total_size) |
| 1460 | DBUG_RETURN(1); |
| 1461 | DBUG_ASSERT(uint6korr(data) == 0); |
| 1462 | /* Allocate data at end of bitmap */ |
| 1463 | best_data= data; |
| 1464 | bitmap->used_size= (uint) (data - bitmap->map) + 6; |
| 1465 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
| 1466 | best_pos= best_bits= 0; |
| 1467 | } |
| 1468 | |
| 1469 | found: |
| 1470 | fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE); |
| 1471 | DBUG_RETURN(0); |
| 1472 | } |
| 1473 | |
| 1474 | |
| 1475 | /* |
| 1476 | Allocate data for full blocks |
| 1477 | |
| 1478 | SYNOPSIS |
| 1479 | allocate_full_pages() |
| 1480 | bitmap bitmap |
| 1481 | pages_needed Total size in pages (bitmap->total_size) we would like to have |
| 1482 | block Store found information here |
| 1483 | full_page 1 if we are not allowed to split extent |
| 1484 | |
| 1485 | IMPLEMENTATION |
| 1486 | We will return the smallest area >= size. If there is no such |
| 1487 | block, we will return the biggest area that satisfies |
| 1488 | area_size >= MY_MIN(BLOB_SEGMENT_MIN_SIZE*full_page_size, size) |
| 1489 | |
| 1490 | To speed up searches, we will only consider areas that has at least 16 free |
| 1491 | pages starting on an even boundary. When finding such an area, we will |
| 1492 | extend it with all previous and following free pages. This will ensure |
| 1493 | we don't get holes between areas |
| 1494 | |
| 1495 | RETURN |
| 1496 | # Blocks used |
| 1497 | 0 error (no space in bitmap; block is not touched) |
| 1498 | */ |
| 1499 | |
| 1500 | static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, |
| 1501 | ulong pages_needed, |
| 1502 | MARIA_BITMAP_BLOCK *block, my_bool full_page) |
| 1503 | { |
| 1504 | uchar *data, *data_end, *page_end; |
| 1505 | uchar *best_data= 0; |
| 1506 | uint min_size; |
| 1507 | uint best_area_size, UNINIT_VAR(best_prefix_area_size); |
| 1508 | uint page, size; |
| 1509 | ulonglong UNINIT_VAR(best_prefix_bits); |
| 1510 | DBUG_ENTER("allocate_full_pages" ); |
| 1511 | DBUG_PRINT("enter" , ("pages_needed: %lu" , pages_needed)); |
| 1512 | |
| 1513 | min_size= pages_needed; |
| 1514 | if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE) |
| 1515 | min_size= BLOB_SEGMENT_MIN_SIZE; |
| 1516 | best_area_size= ~(uint) 0; |
| 1517 | |
| 1518 | data= bitmap->map + (bitmap->full_head_size/6)*6; |
| 1519 | data_end= bitmap->map + bitmap->used_size; |
| 1520 | page_end= bitmap->map + bitmap->total_size; |
| 1521 | |
| 1522 | for (; data < page_end; data+= 6) |
| 1523 | { |
| 1524 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
| 1525 | uchar *data_start; |
| 1526 | ulonglong prefix_bits= 0; |
| 1527 | uint area_size, prefix_area_size, suffix_area_size; |
| 1528 | |
| 1529 | /* Find area with at least 16 free pages */ |
| 1530 | if (bits) |
| 1531 | continue; |
| 1532 | data_start= data; |
| 1533 | /* Find size of area */ |
| 1534 | for (data+=6 ; data < data_end ; data+= 6) |
| 1535 | { |
| 1536 | if ((bits= uint6korr(data))) |
| 1537 | break; |
| 1538 | } |
| 1539 | /* |
| 1540 | Check if we are end of bitmap. In this case we know that |
| 1541 | the rest of the bitmap is usable |
| 1542 | */ |
| 1543 | if (data >= data_end) |
| 1544 | data= page_end; |
| 1545 | area_size= (uint) (data - data_start) / 6 * 16; |
| 1546 | if (area_size >= best_area_size) |
| 1547 | continue; |
| 1548 | prefix_area_size= suffix_area_size= 0; |
| 1549 | if (!bits) |
| 1550 | { |
| 1551 | /* |
| 1552 | End of page; All the rest of the bits on page are part of area |
| 1553 | This is needed because bitmap->used_size only covers the set bits |
| 1554 | in the bitmap. |
| 1555 | */ |
| 1556 | area_size+= (uint) (page_end - data) / 6 * 16; |
| 1557 | if (area_size >= best_area_size) |
| 1558 | break; |
| 1559 | data= page_end; |
| 1560 | } |
| 1561 | else |
| 1562 | { |
| 1563 | /* Add bits at end of page */ |
| 1564 | for (; !(bits & 7); bits >>= 3) |
| 1565 | suffix_area_size++; |
| 1566 | area_size+= suffix_area_size; |
| 1567 | } |
| 1568 | if (data_start != bitmap->map) |
| 1569 | { |
| 1570 | /* Add bits before page */ |
| 1571 | bits= prefix_bits= uint6korr(data_start - 6); |
| 1572 | DBUG_ASSERT(bits != 0); |
| 1573 | /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */ |
| 1574 | if (!(bits & 07000000000000000LL)) |
| 1575 | { |
| 1576 | data_start-= 6; |
| 1577 | do |
| 1578 | { |
| 1579 | prefix_area_size++; |
| 1580 | bits<<= 3; |
| 1581 | } while (!(bits & 07000000000000000LL)); |
| 1582 | area_size+= prefix_area_size; |
| 1583 | /* Calculate offset to page from data_start */ |
| 1584 | prefix_area_size= 16 - prefix_area_size; |
| 1585 | } |
| 1586 | } |
| 1587 | if (area_size >= min_size && area_size <= best_area_size) |
| 1588 | { |
| 1589 | best_data= data_start; |
| 1590 | best_area_size= area_size; |
| 1591 | best_prefix_bits= prefix_bits; |
| 1592 | best_prefix_area_size= prefix_area_size; |
| 1593 | |
| 1594 | /* Prefer to put data in biggest possible area */ |
| 1595 | if (area_size <= pages_needed) |
| 1596 | min_size= area_size; |
| 1597 | else |
| 1598 | min_size= pages_needed; |
| 1599 | } |
| 1600 | } |
| 1601 | if (!best_data) |
| 1602 | DBUG_RETURN(0); /* No room on page */ |
| 1603 | |
| 1604 | /* |
| 1605 | Now allocate MY_MIN(pages_needed, area_size), starting from |
| 1606 | best_start + best_prefix_area_size |
| 1607 | */ |
| 1608 | if (best_area_size > pages_needed) |
| 1609 | best_area_size= pages_needed; |
| 1610 | |
| 1611 | /* For each 6 bytes we have 6*8/3= 16 patterns */ |
| 1612 | page= ((uint) (best_data - bitmap->map) * 8) / 3 + best_prefix_area_size; |
| 1613 | block->page= bitmap->page + 1 + page; |
| 1614 | block->page_count= best_area_size; |
| 1615 | block->empty_space= 0; |
| 1616 | block->sub_blocks= 0; |
| 1617 | block->org_bitmap_value= 0; |
| 1618 | block->used= 0; |
| 1619 | DBUG_ASSERT(page + best_area_size < bitmap->pages_covered); |
| 1620 | DBUG_PRINT("info" , ("page: %lu page_count: %u" , |
| 1621 | (ulong) block->page, block->page_count)); |
| 1622 | |
| 1623 | if (best_prefix_area_size) |
| 1624 | { |
| 1625 | ulonglong tmp; |
| 1626 | /* Convert offset back to bits */ |
| 1627 | best_prefix_area_size= 16 - best_prefix_area_size; |
| 1628 | if (best_area_size < best_prefix_area_size) |
| 1629 | { |
| 1630 | tmp= (1LL << best_area_size*3) - 1; |
| 1631 | best_area_size= best_prefix_area_size; /* for easy end test */ |
| 1632 | } |
| 1633 | else |
| 1634 | tmp= (1LL << best_prefix_area_size*3) - 1; |
| 1635 | tmp<<= (16 - best_prefix_area_size) * 3; |
| 1636 | DBUG_ASSERT((best_prefix_bits & tmp) == 0); |
| 1637 | best_prefix_bits|= tmp; |
| 1638 | int6store(best_data, best_prefix_bits); |
| 1639 | if (!(best_area_size-= best_prefix_area_size)) |
| 1640 | goto end; |
| 1641 | best_data+= 6; |
| 1642 | } |
| 1643 | best_area_size*= 3; /* Bits to set */ |
| 1644 | size= best_area_size/8; /* Bytes to set */ |
| 1645 | bfill(best_data, size, 255); |
| 1646 | best_data+= size; |
| 1647 | if ((best_area_size-= size * 8)) |
| 1648 | { |
| 1649 | /* fill last uchar */ |
| 1650 | *best_data|= (uchar) ((1 << best_area_size) -1); |
| 1651 | best_data++; |
| 1652 | } |
| 1653 | if (data_end < best_data) |
| 1654 | { |
| 1655 | bitmap->used_size= (uint) (best_data - bitmap->map); |
| 1656 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
| 1657 | } |
| 1658 | end: |
| 1659 | bitmap->changed= 1; |
| 1660 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 1661 | DBUG_RETURN(block->page_count); |
| 1662 | } |
| 1663 | |
| 1664 | |
| 1665 | /**************************************************************************** |
| 1666 | Find right bitmaps where to store data |
| 1667 | ****************************************************************************/ |
| 1668 | |
| 1669 | /* |
| 1670 | Find right bitmap and position for head block |
| 1671 | |
| 1672 | SYNOPSIS |
| 1673 | find_head() |
| 1674 | info Maria handler |
| 1675 | length Size of data region we need store |
| 1676 | position Position in bitmap_blocks where to store the |
| 1677 | information for the head block. |
| 1678 | |
| 1679 | RETURN |
| 1680 | 0 ok |
| 1681 | 1 error |
| 1682 | */ |
| 1683 | |
| 1684 | static my_bool find_head(MARIA_HA *info, uint length, uint position) |
| 1685 | { |
| 1686 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1687 | MARIA_BITMAP_BLOCK *block; |
| 1688 | /* |
| 1689 | There is always place for the head block in bitmap_blocks as these are |
| 1690 | preallocated at _ma_init_block_record(). |
| 1691 | */ |
| 1692 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
| 1693 | |
| 1694 | if (info->s->base.extra_options & MA_EXTRA_OPTIONS_INSERT_ORDER) |
| 1695 | { |
| 1696 | if (bitmap->page != info->s->last_insert_bitmap && |
| 1697 | _ma_change_bitmap_page(info, bitmap, |
| 1698 | info->s->last_insert_bitmap)) |
| 1699 | return 1; |
| 1700 | /* Don't allocate any blocks from earlier pages */ |
| 1701 | info->s->state.first_bitmap_with_space= info->s->last_insert_bitmap; |
| 1702 | } |
| 1703 | |
| 1704 | /* |
| 1705 | We need to have DIRENTRY_SIZE here to take into account that we may |
| 1706 | need an extra directory entry for the row |
| 1707 | */ |
| 1708 | while (allocate_head(bitmap, length + DIR_ENTRY_SIZE, block)) |
| 1709 | if (move_to_next_bitmap(info, bitmap)) |
| 1710 | return 1; |
| 1711 | return 0; |
| 1712 | } |
| 1713 | |
| 1714 | |
| 1715 | /* |
| 1716 | Find right bitmap and position for tail |
| 1717 | |
| 1718 | SYNOPSIS |
| 1719 | find_tail() |
| 1720 | info Maria handler |
| 1721 | length Size of data region we need store |
| 1722 | position Position in bitmap_blocks where to store the |
| 1723 | information for the head block. |
| 1724 | |
| 1725 | RETURN |
| 1726 | 0 ok |
| 1727 | 1 error |
| 1728 | */ |
| 1729 | |
| 1730 | static my_bool find_tail(MARIA_HA *info, uint length, uint position) |
| 1731 | { |
| 1732 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1733 | MARIA_BITMAP_BLOCK *block; |
| 1734 | DBUG_ENTER("find_tail" ); |
| 1735 | DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE(info->s)); |
| 1736 | |
| 1737 | /* Needed, as there is no error checking in dynamic_element */ |
| 1738 | if (allocate_dynamic(&info->bitmap_blocks, position)) |
| 1739 | DBUG_RETURN(1); |
| 1740 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
| 1741 | |
| 1742 | /* |
| 1743 | We have to add DIR_ENTRY_SIZE to ensure we have space for the tail and |
| 1744 | it's directroy entry on the page |
| 1745 | */ |
| 1746 | while (allocate_tail(bitmap, length + DIR_ENTRY_SIZE, block)) |
| 1747 | if (move_to_next_bitmap(info, bitmap)) |
| 1748 | DBUG_RETURN(1); |
| 1749 | DBUG_RETURN(0); |
| 1750 | } |
| 1751 | |
| 1752 | |
| 1753 | /* |
| 1754 | Find right bitmap and position for full blocks in one extent |
| 1755 | |
| 1756 | SYNOPSIS |
| 1757 | find_mid() |
| 1758 | info Maria handler. |
| 1759 | pages How many pages to allocate. |
| 1760 | position Position in bitmap_blocks where to store the |
| 1761 | information for the head block. |
| 1762 | NOTES |
| 1763 | This is used to allocate the main extent after the 'head' block |
| 1764 | (Ie, the middle part of the head-middle-tail entry) |
| 1765 | |
| 1766 | RETURN |
| 1767 | 0 ok |
| 1768 | 1 error |
| 1769 | */ |
| 1770 | |
| 1771 | static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) |
| 1772 | { |
| 1773 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1774 | MARIA_BITMAP_BLOCK *block; |
| 1775 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
| 1776 | |
| 1777 | while (!allocate_full_pages(bitmap, pages, block, 1)) |
| 1778 | { |
| 1779 | if (move_to_next_bitmap(info, bitmap)) |
| 1780 | return 1; |
| 1781 | } |
| 1782 | return 0; |
| 1783 | } |
| 1784 | |
| 1785 | |
| 1786 | /* |
| 1787 | Find right bitmap and position for putting a blob |
| 1788 | |
| 1789 | SYNOPSIS |
| 1790 | find_blob() |
| 1791 | info Maria handler. |
| 1792 | length Length of the blob |
| 1793 | |
| 1794 | NOTES |
| 1795 | The extents are stored last in info->bitmap_blocks |
| 1796 | |
| 1797 | IMPLEMENTATION |
| 1798 | Allocate all full pages for the block + optionally one tail |
| 1799 | |
| 1800 | RETURN |
| 1801 | 0 ok |
| 1802 | 1 error |
| 1803 | */ |
| 1804 | |
| 1805 | static my_bool find_blob(MARIA_HA *info, ulong length) |
| 1806 | { |
| 1807 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1808 | uint full_page_size= FULL_PAGE_SIZE(info->s); |
| 1809 | ulong pages; |
| 1810 | uint rest_length, used; |
| 1811 | uint UNINIT_VAR(first_block_pos); |
| 1812 | MARIA_BITMAP_BLOCK *first_block= 0; |
| 1813 | DBUG_ENTER("find_blob" ); |
| 1814 | DBUG_PRINT("enter" , ("length: %lu" , length)); |
| 1815 | |
| 1816 | pages= length / full_page_size; |
| 1817 | rest_length= (uint) (length - pages * full_page_size); |
| 1818 | if (rest_length >= MAX_TAIL_SIZE(info->s->block_size)) |
| 1819 | { |
| 1820 | pages++; |
| 1821 | rest_length= 0; |
| 1822 | } |
| 1823 | |
| 1824 | first_block_pos= info->bitmap_blocks.elements; |
| 1825 | if (pages) |
| 1826 | { |
| 1827 | MARIA_BITMAP_BLOCK *block; |
| 1828 | if (allocate_dynamic(&info->bitmap_blocks, |
| 1829 | info->bitmap_blocks.elements + |
| 1830 | pages / BLOB_SEGMENT_MIN_SIZE + 2)) |
| 1831 | DBUG_RETURN(1); |
| 1832 | block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements, |
| 1833 | MARIA_BITMAP_BLOCK*); |
| 1834 | do |
| 1835 | { |
| 1836 | /* |
| 1837 | We use 0x3fff here as the two upmost bits are reserved for |
| 1838 | TAIL_BIT and START_EXTENT_BIT |
| 1839 | */ |
| 1840 | used= allocate_full_pages(bitmap, |
| 1841 | (pages >= 0x3fff ? 0x3fff : (uint) pages), |
| 1842 | block, 0); |
| 1843 | if (!used) |
| 1844 | { |
| 1845 | if (move_to_next_bitmap(info, bitmap)) |
| 1846 | DBUG_RETURN(1); |
| 1847 | } |
| 1848 | else |
| 1849 | { |
| 1850 | pages-= used; |
| 1851 | info->bitmap_blocks.elements++; |
| 1852 | block++; |
| 1853 | } |
| 1854 | } while (pages != 0); |
| 1855 | } |
| 1856 | if (rest_length && find_tail(info, rest_length, |
| 1857 | info->bitmap_blocks.elements++)) |
| 1858 | DBUG_RETURN(1); |
| 1859 | first_block= dynamic_element(&info->bitmap_blocks, first_block_pos, |
| 1860 | MARIA_BITMAP_BLOCK*); |
| 1861 | first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos; |
| 1862 | DBUG_RETURN(0); |
| 1863 | } |
| 1864 | |
| 1865 | |
| 1866 | /* |
| 1867 | Find pages to put ALL blobs |
| 1868 | |
| 1869 | SYNOPSIS |
| 1870 | allocate_blobs() |
| 1871 | info Maria handler |
| 1872 | row Information of what is in the row (from calc_record_size()) |
| 1873 | |
| 1874 | RETURN |
| 1875 | 0 ok |
| 1876 | 1 error |
| 1877 | */ |
| 1878 | |
| 1879 | static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row) |
| 1880 | { |
| 1881 | ulong *length, *end; |
| 1882 | uint elements; |
| 1883 | /* |
| 1884 | Reserve size for: |
| 1885 | head block |
| 1886 | one extent |
| 1887 | tail block |
| 1888 | */ |
| 1889 | elements= info->bitmap_blocks.elements; |
| 1890 | for (length= row->blob_lengths, end= length + info->s->base.blobs; |
| 1891 | length < end; length++) |
| 1892 | { |
| 1893 | if (*length && find_blob(info, *length)) |
| 1894 | return 1; |
| 1895 | } |
| 1896 | row->extents_count= (info->bitmap_blocks.elements - elements); |
| 1897 | return 0; |
| 1898 | } |
| 1899 | |
| 1900 | |
| 1901 | /* |
| 1902 | Reserve the current head page |
| 1903 | |
| 1904 | SYNOPSIS |
| 1905 | use_head() |
| 1906 | info Maria handler |
| 1907 | page Page number to update |
| 1908 | (Note that caller guarantees this is in the active |
| 1909 | bitmap) |
| 1910 | size How much free space is left on the page |
| 1911 | block_position In which info->bitmap_block we have the |
| 1912 | information about the head block. |
| 1913 | |
| 1914 | NOTES |
| 1915 | This is used on update where we are updating an existing head page |
| 1916 | */ |
| 1917 | |
| 1918 | static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size, |
| 1919 | uint block_position) |
| 1920 | { |
| 1921 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 1922 | MARIA_BITMAP_BLOCK *block; |
| 1923 | uchar *data; |
| 1924 | uint offset, tmp, offset_page; |
| 1925 | DBUG_ENTER("use_head" ); |
| 1926 | |
| 1927 | DBUG_ASSERT(page % bitmap->pages_covered); |
| 1928 | |
| 1929 | block= dynamic_element(&info->bitmap_blocks, block_position, |
| 1930 | MARIA_BITMAP_BLOCK*); |
| 1931 | block->page= page; |
| 1932 | block->page_count= 1 + TAIL_BIT; |
| 1933 | block->empty_space= size; |
| 1934 | block->used= BLOCKUSED_TAIL; |
| 1935 | |
| 1936 | /* |
| 1937 | Mark place used by reading/writing 2 bytes at a time to handle |
| 1938 | bitmaps in overlapping bytes |
| 1939 | */ |
| 1940 | offset_page= (uint) (page - bitmap->page - 1) * 3; |
| 1941 | offset= offset_page & 7; |
| 1942 | data= bitmap->map + offset_page / 8; |
| 1943 | tmp= uint2korr(data); |
| 1944 | block->org_bitmap_value= (tmp >> offset) & 7; |
| 1945 | tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset); |
| 1946 | int2store(data, tmp); |
| 1947 | bitmap->changed= 1; |
| 1948 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 1949 | DBUG_VOID_RETURN; |
| 1950 | } |
| 1951 | |
| 1952 | |
| 1953 | /* |
| 1954 | Find out where to split the row (ie, what goes in head, middle, tail etc) |
| 1955 | |
| 1956 | SYNOPSIS |
| 1957 | find_where_to_split_row() |
| 1958 | share Maria share |
| 1959 | row Information of what is in the row (from calc_record_size()) |
| 1960 | extents Max number of extents we have to store in header |
| 1961 | split_size Free size on the page (The head length must be less |
| 1962 | than this) |
| 1963 | |
| 1964 | RETURN |
| 1965 | row_length for the head block. |
| 1966 | */ |
| 1967 | |
| 1968 | static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, |
| 1969 | uint extents, uint split_size) |
| 1970 | { |
| 1971 | uint *lengths, *lengths_end; |
| 1972 | /* |
| 1973 | Ensure we have the minimum required space on head page: |
| 1974 | - Header + length of field lengths (row->min_length) |
| 1975 | - Number of extents |
| 1976 | - One extent |
| 1977 | */ |
| 1978 | uint row_length= (row->min_length + |
| 1979 | size_to_store_key_length(extents) + |
| 1980 | ROW_EXTENT_SIZE); |
| 1981 | DBUG_ASSERT(row_length <= split_size); |
| 1982 | |
| 1983 | /* |
| 1984 | Store first in all_field_lengths the different parts that are written |
| 1985 | to the row. This needs to be in same order as in |
| 1986 | ma_block_rec.c::write_block_record() |
| 1987 | */ |
| 1988 | row->null_field_lengths[-3]= extents * ROW_EXTENT_SIZE; |
| 1989 | row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; |
| 1990 | row->null_field_lengths[-1]= row->field_lengths_length; |
| 1991 | for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, |
| 1992 | lengths_end= (lengths + share->base.fields - share->base.blobs + |
| 1993 | EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) |
| 1994 | { |
| 1995 | if (row_length + *lengths > split_size) |
| 1996 | break; |
| 1997 | row_length+= *lengths; |
| 1998 | } |
| 1999 | return row_length; |
| 2000 | } |
| 2001 | |
| 2002 | |
| 2003 | /* |
| 2004 | Find where to write the middle parts of the row and the tail |
| 2005 | |
| 2006 | SYNOPSIS |
| 2007 | write_rest_of_head() |
| 2008 | info Maria handler |
| 2009 | position Position in bitmap_blocks. Is 0 for rows that needs |
| 2010 | full blocks (ie, has a head, middle part and optional tail) |
| 2011 | rest_length How much left of the head block to write. |
| 2012 | |
| 2013 | RETURN |
| 2014 | 0 ok |
| 2015 | 1 error |
| 2016 | */ |
| 2017 | |
| 2018 | static my_bool write_rest_of_head(MARIA_HA *info, uint position, |
| 2019 | ulong rest_length) |
| 2020 | { |
| 2021 | MARIA_SHARE *share= info->s; |
| 2022 | uint full_page_size= FULL_PAGE_SIZE(share); |
| 2023 | MARIA_BITMAP_BLOCK *block; |
| 2024 | DBUG_ENTER("write_rest_of_head" ); |
| 2025 | DBUG_PRINT("enter" , ("position: %u rest_length: %lu" , position, |
| 2026 | rest_length)); |
| 2027 | |
| 2028 | if (position == 0) |
| 2029 | { |
| 2030 | /* Write out full pages */ |
| 2031 | uint pages= rest_length / full_page_size; |
| 2032 | |
| 2033 | rest_length%= full_page_size; |
| 2034 | if (rest_length >= MAX_TAIL_SIZE(share->block_size)) |
| 2035 | { |
| 2036 | /* Put tail on a full page */ |
| 2037 | pages++; |
| 2038 | rest_length= 0; |
| 2039 | } |
| 2040 | if (find_mid(info, pages, 1)) |
| 2041 | DBUG_RETURN(1); |
| 2042 | /* |
| 2043 | Insert empty block after full pages, to allow write_block_record() to |
| 2044 | split segment into used + free page |
| 2045 | */ |
| 2046 | block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*); |
| 2047 | block->page_count= 0; |
| 2048 | block->used= 0; |
| 2049 | } |
| 2050 | if (rest_length) |
| 2051 | { |
| 2052 | if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1)) |
| 2053 | DBUG_RETURN(1); |
| 2054 | } |
| 2055 | else |
| 2056 | { |
| 2057 | /* Empty tail block */ |
| 2058 | block= dynamic_element(&info->bitmap_blocks, |
| 2059 | ELEMENTS_RESERVED_FOR_MAIN_PART - 1, |
| 2060 | MARIA_BITMAP_BLOCK *); |
| 2061 | block->page_count= 0; |
| 2062 | block->used= 0; |
| 2063 | } |
| 2064 | DBUG_RETURN(0); |
| 2065 | } |
| 2066 | |
| 2067 | |
| 2068 | /* |
| 2069 | Find where to store one row |
| 2070 | |
| 2071 | SYNPOSIS |
| 2072 | _ma_bitmap_find_place() |
| 2073 | info Maria handler |
| 2074 | row Information about row to write |
| 2075 | blocks Store data about allocated places here |
| 2076 | |
| 2077 | RETURN |
| 2078 | 0 ok |
| 2079 | row->space_on_head_page contains minimum number of bytes we |
| 2080 | expect to put on the head page. |
| 2081 | 1 error |
| 2082 | my_errno is set to error |
| 2083 | */ |
| 2084 | |
| 2085 | my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, |
| 2086 | MARIA_BITMAP_BLOCKS *blocks) |
| 2087 | { |
| 2088 | MARIA_SHARE *share= info->s; |
| 2089 | my_bool res= 1; |
| 2090 | uint full_page_size, position, max_page_size; |
| 2091 | uint head_length, row_length, rest_length, extents_length; |
| 2092 | DBUG_ENTER("_ma_bitmap_find_place" ); |
| 2093 | |
| 2094 | blocks->count= 0; |
| 2095 | blocks->tail_page_skipped= blocks->page_skipped= 0; |
| 2096 | row->extents_count= 0; |
| 2097 | |
| 2098 | /* |
| 2099 | Reserve place for the following blocks: |
| 2100 | - Head block |
| 2101 | - Full page block |
| 2102 | - Marker block to allow write_block_record() to split full page blocks |
| 2103 | into full and free part |
| 2104 | - Tail block |
| 2105 | */ |
| 2106 | |
| 2107 | info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; |
| 2108 | max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE(share)); |
| 2109 | |
| 2110 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
| 2111 | |
| 2112 | if (row->total_length <= max_page_size) |
| 2113 | { |
| 2114 | /* Row fits in one page */ |
| 2115 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
| 2116 | if (find_head(info, (uint) row->total_length, position)) |
| 2117 | goto abort; |
| 2118 | row->space_on_head_page= row->total_length; |
| 2119 | goto end; |
| 2120 | } |
| 2121 | |
| 2122 | /* |
| 2123 | First allocate all blobs so that we can find out the needed size for |
| 2124 | the main block. |
| 2125 | */ |
| 2126 | if (row->blob_length && allocate_blobs(info, row)) |
| 2127 | goto abort; |
| 2128 | |
| 2129 | extents_length= row->extents_count * ROW_EXTENT_SIZE; |
| 2130 | /* |
| 2131 | The + 3 is reserved for storing the number of segments in the row header. |
| 2132 | */ |
| 2133 | if ((head_length= (row->head_length + extents_length + 3)) <= |
| 2134 | max_page_size) |
| 2135 | { |
| 2136 | /* Main row part fits into one page */ |
| 2137 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
| 2138 | if (find_head(info, head_length, position)) |
| 2139 | goto abort; |
| 2140 | row->space_on_head_page= head_length; |
| 2141 | goto end; |
| 2142 | } |
| 2143 | |
| 2144 | /* Allocate enough space */ |
| 2145 | head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; |
| 2146 | |
| 2147 | /* The first segment size is stored in 'row_length' */ |
| 2148 | row_length= find_where_to_split_row(share, row, row->extents_count + |
| 2149 | ELEMENTS_RESERVED_FOR_MAIN_PART-1, |
| 2150 | max_page_size); |
| 2151 | |
| 2152 | full_page_size= MAX_TAIL_SIZE(share->block_size); |
| 2153 | position= 0; |
| 2154 | rest_length= head_length - row_length; |
| 2155 | if (rest_length <= full_page_size) |
| 2156 | position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ |
| 2157 | if (find_head(info, row_length, position)) |
| 2158 | goto abort; |
| 2159 | row->space_on_head_page= row_length; |
| 2160 | |
| 2161 | if (write_rest_of_head(info, position, rest_length)) |
| 2162 | goto abort; |
| 2163 | |
| 2164 | end: |
| 2165 | blocks->block= dynamic_element(&info->bitmap_blocks, position, |
| 2166 | MARIA_BITMAP_BLOCK*); |
| 2167 | blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; |
| 2168 | /* First block's page_count is for all blocks */ |
| 2169 | blocks->count= info->bitmap_blocks.elements - position; |
| 2170 | res= 0; |
| 2171 | |
| 2172 | abort: |
| 2173 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
| 2174 | DBUG_RETURN(res); |
| 2175 | } |
| 2176 | |
| 2177 | |
| 2178 | /* |
| 2179 | Find where to put row on update (when head page is already defined) |
| 2180 | |
| 2181 | SYNPOSIS |
| 2182 | _ma_bitmap_find_new_place() |
| 2183 | info Maria handler |
| 2184 | row Information about row to write |
| 2185 | page On which page original row was stored |
| 2186 | free_size Free size on head page |
| 2187 | blocks Store data about allocated places here |
| 2188 | |
| 2189 | NOTES |
| 2190 | This function is only called when the new row can't fit in the space of |
| 2191 | the old row in the head page. |
| 2192 | |
| 2193 | This is essently same as _ma_bitmap_find_place() except that |
| 2194 | we don't call find_head() to search in bitmaps where to put the page. |
| 2195 | |
| 2196 | RETURN |
| 2197 | 0 ok |
| 2198 | 1 error |
| 2199 | */ |
| 2200 | |
| 2201 | my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, |
| 2202 | pgcache_page_no_t page, uint free_size, |
| 2203 | MARIA_BITMAP_BLOCKS *blocks) |
| 2204 | { |
| 2205 | MARIA_SHARE *share= info->s; |
| 2206 | my_bool res= 1; |
| 2207 | uint position; |
| 2208 | uint head_length, row_length, rest_length, extents_length; |
| 2209 | ulonglong bitmap_page; |
| 2210 | DBUG_ENTER("_ma_bitmap_find_new_place" ); |
| 2211 | |
| 2212 | blocks->count= 0; |
| 2213 | blocks->tail_page_skipped= blocks->page_skipped= 0; |
| 2214 | row->extents_count= 0; |
| 2215 | info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; |
| 2216 | |
| 2217 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
| 2218 | |
| 2219 | /* |
| 2220 | First allocate all blobs (so that we can find out the needed size for |
| 2221 | the main block. |
| 2222 | */ |
| 2223 | if (row->blob_length && allocate_blobs(info, row)) |
| 2224 | goto abort; |
| 2225 | |
| 2226 | /* Switch bitmap to current head page */ |
| 2227 | bitmap_page= page - page % share->bitmap.pages_covered; |
| 2228 | |
| 2229 | if (share->bitmap.page != bitmap_page && |
| 2230 | _ma_change_bitmap_page(info, &share->bitmap, bitmap_page)) |
| 2231 | goto abort; |
| 2232 | |
| 2233 | extents_length= row->extents_count * ROW_EXTENT_SIZE; |
| 2234 | if ((head_length= (row->head_length + extents_length + 3)) <= free_size) |
| 2235 | { |
| 2236 | /* Main row part fits into one page */ |
| 2237 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
| 2238 | use_head(info, page, head_length, position); |
| 2239 | row->space_on_head_page= head_length; |
| 2240 | goto end; |
| 2241 | } |
| 2242 | |
| 2243 | /* Allocate enough space */ |
| 2244 | head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; |
| 2245 | |
| 2246 | /* |
| 2247 | The first segment size is stored in 'row_length' |
| 2248 | We have to add ELEMENTS_RESERVED_FOR_MAIN_PART here as the extent |
| 2249 | information may be up to this size when the header splits. |
| 2250 | */ |
| 2251 | row_length= find_where_to_split_row(share, row, row->extents_count + |
| 2252 | ELEMENTS_RESERVED_FOR_MAIN_PART-1, |
| 2253 | free_size); |
| 2254 | |
| 2255 | position= 0; |
| 2256 | rest_length= head_length - row_length; |
| 2257 | if (rest_length <= MAX_TAIL_SIZE(share->block_size)) |
| 2258 | position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ |
| 2259 | use_head(info, page, row_length, position); |
| 2260 | row->space_on_head_page= row_length; |
| 2261 | |
| 2262 | if (write_rest_of_head(info, position, rest_length)) |
| 2263 | goto abort; |
| 2264 | |
| 2265 | end: |
| 2266 | blocks->block= dynamic_element(&info->bitmap_blocks, position, |
| 2267 | MARIA_BITMAP_BLOCK*); |
| 2268 | blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; |
| 2269 | /* First block's page_count is for all blocks */ |
| 2270 | blocks->count= info->bitmap_blocks.elements - position; |
| 2271 | res= 0; |
| 2272 | |
| 2273 | abort: |
| 2274 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
| 2275 | DBUG_RETURN(res); |
| 2276 | } |
| 2277 | |
| 2278 | |
| 2279 | /**************************************************************************** |
| 2280 | Clear and reset bits |
| 2281 | ****************************************************************************/ |
| 2282 | |
| 2283 | /* |
| 2284 | Set fill pattern for a page |
| 2285 | |
| 2286 | set_page_bits() |
| 2287 | info Maria handler |
| 2288 | bitmap Bitmap handler |
| 2289 | page Adress to page |
| 2290 | fill_pattern Pattern (not size) for page |
| 2291 | |
| 2292 | NOTES |
| 2293 | Page may not be part of active bitmap |
| 2294 | |
| 2295 | RETURN |
| 2296 | 0 ok |
| 2297 | 1 error |
| 2298 | */ |
| 2299 | |
| 2300 | static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
| 2301 | pgcache_page_no_t page, uint fill_pattern) |
| 2302 | { |
| 2303 | pgcache_page_no_t bitmap_page; |
| 2304 | uint offset_page, offset, tmp, org_tmp, used_offset; |
| 2305 | uchar *data; |
| 2306 | DBUG_ENTER("set_page_bits" ); |
| 2307 | DBUG_ASSERT(fill_pattern <= 7); |
| 2308 | |
| 2309 | bitmap_page= page - page % bitmap->pages_covered; |
| 2310 | if (bitmap_page != bitmap->page && |
| 2311 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
| 2312 | DBUG_RETURN(1); |
| 2313 | |
| 2314 | /* Find page number from start of bitmap */ |
| 2315 | offset_page= (uint) (page - bitmap->page - 1); |
| 2316 | |
| 2317 | /* |
| 2318 | Mark place used by reading/writing 2 bytes at a time to handle |
| 2319 | bitmaps in overlapping bytes |
| 2320 | */ |
| 2321 | offset_page*= 3; |
| 2322 | offset= offset_page & 7; |
| 2323 | data= bitmap->map + offset_page / 8; |
| 2324 | org_tmp= tmp= uint2korr(data); |
| 2325 | tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); |
| 2326 | if (tmp == org_tmp) |
| 2327 | DBUG_RETURN(0); /* No changes */ |
| 2328 | |
| 2329 | /* |
| 2330 | Take care to not write bytes outside of bitmap. |
| 2331 | fill_pattern is 3 bits, so we need to write two bytes |
| 2332 | if bit position we write to is > (8-3) |
| 2333 | */ |
| 2334 | if (offset > 5) |
| 2335 | int2store(data, tmp); |
| 2336 | else |
| 2337 | data[0]= tmp; |
| 2338 | |
| 2339 | /* |
| 2340 | Reset full_head_size or full_tail_size if we are releasing data before |
| 2341 | it. Increase used_size if we are allocating data. |
| 2342 | */ |
| 2343 | used_offset= (uint) (data - bitmap->map); |
| 2344 | if (fill_pattern < 4) |
| 2345 | set_if_smaller(bitmap->full_head_size, used_offset); |
| 2346 | if (fill_pattern == 0 || (fill_pattern > 4 && fill_pattern < 7)) |
| 2347 | set_if_smaller(bitmap->full_tail_size, used_offset); |
| 2348 | if (fill_pattern != 0) |
| 2349 | { |
| 2350 | /* Calulcate which was the last changed byte */ |
| 2351 | used_offset+= offset > 5 ? 2 : 1; |
| 2352 | set_if_bigger(bitmap->used_size, used_offset); |
| 2353 | } |
| 2354 | |
| 2355 | _ma_check_bitmap(bitmap); |
| 2356 | bitmap->changed= 1; |
| 2357 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 2358 | if (fill_pattern != FULL_HEAD_PAGE && fill_pattern != FULL_TAIL_PAGE) |
| 2359 | set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page); |
| 2360 | /* |
| 2361 | Note that if the condition above is false (page is full), and all pages of |
| 2362 | this bitmap are now full, and that bitmap page was |
| 2363 | first_bitmap_with_space, we don't modify first_bitmap_with_space, indeed |
| 2364 | its value still tells us where to start our search for a bitmap with space |
| 2365 | (which is for sure after this full one). |
| 2366 | That does mean that first_bitmap_with_space is only a lower bound. |
| 2367 | */ |
| 2368 | DBUG_RETURN(0); |
| 2369 | } |
| 2370 | |
| 2371 | |
| 2372 | /* |
| 2373 | Get bitmap pattern for a given page |
| 2374 | |
| 2375 | SYNOPSIS |
| 2376 | bitmap_get_page_bits() |
| 2377 | info Maria handler |
| 2378 | bitmap Bitmap handler |
| 2379 | page Page number |
| 2380 | |
| 2381 | RETURN |
| 2382 | 0-7 Bitmap pattern |
| 2383 | ~0 Error (couldn't read page) |
| 2384 | */ |
| 2385 | |
| 2386 | static uint bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
| 2387 | pgcache_page_no_t page) |
| 2388 | { |
| 2389 | pgcache_page_no_t bitmap_page; |
| 2390 | uint offset_page, offset, tmp; |
| 2391 | uchar *data; |
| 2392 | DBUG_ENTER("_ma_bitmap_get_page_bits" ); |
| 2393 | |
| 2394 | bitmap_page= page - page % bitmap->pages_covered; |
| 2395 | if (bitmap_page != bitmap->page && |
| 2396 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
| 2397 | DBUG_RETURN(~ (uint) 0); |
| 2398 | |
| 2399 | /* Find page number from start of bitmap */ |
| 2400 | offset_page= (uint) (page - bitmap->page - 1); |
| 2401 | /* |
| 2402 | Mark place used by reading/writing 2 bytes at a time to handle |
| 2403 | bitmaps in overlapping bytes |
| 2404 | */ |
| 2405 | offset_page*= 3; |
| 2406 | offset= offset_page & 7; |
| 2407 | data= bitmap->map + offset_page / 8; |
| 2408 | tmp= uint2korr(data); |
| 2409 | DBUG_RETURN((tmp >> offset) & 7); |
| 2410 | } |
| 2411 | |
| 2412 | |
| 2413 | /* As above, but take a lock while getting the data */ |
| 2414 | |
| 2415 | uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
| 2416 | pgcache_page_no_t page) |
| 2417 | { |
| 2418 | uint tmp; |
| 2419 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 2420 | tmp= bitmap_get_page_bits(info, bitmap, page); |
| 2421 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2422 | return tmp; |
| 2423 | } |
| 2424 | |
| 2425 | |
| 2426 | /* |
| 2427 | Mark all pages in a region as free |
| 2428 | |
| 2429 | SYNOPSIS |
| 2430 | _ma_bitmap_reset_full_page_bits() |
| 2431 | info Maria handler |
| 2432 | bitmap Bitmap handler |
| 2433 | page Start page |
| 2434 | page_count Number of pages |
| 2435 | |
| 2436 | NOTES |
| 2437 | We assume that all pages in region is covered by same bitmap |
| 2438 | One must have a lock on info->s->bitmap.bitmap_lock |
| 2439 | |
| 2440 | RETURN |
| 2441 | 0 ok |
| 2442 | 1 Error (when reading bitmap) |
| 2443 | */ |
| 2444 | |
| 2445 | my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info, |
| 2446 | MARIA_FILE_BITMAP *bitmap, |
| 2447 | pgcache_page_no_t page, |
| 2448 | uint page_count) |
| 2449 | { |
| 2450 | ulonglong bitmap_page; |
| 2451 | uint offset, bit_start, bit_count, tmp, byte_offset; |
| 2452 | uchar *data; |
| 2453 | DBUG_ENTER("_ma_bitmap_reset_full_page_bits" ); |
| 2454 | DBUG_PRINT("enter" , ("page: %lu page_count: %u" , (ulong) page, page_count)); |
| 2455 | mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock); |
| 2456 | |
| 2457 | bitmap_page= page - page % bitmap->pages_covered; |
| 2458 | DBUG_ASSERT(page != bitmap_page); |
| 2459 | |
| 2460 | if (bitmap_page != bitmap->page && |
| 2461 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
| 2462 | DBUG_RETURN(1); |
| 2463 | |
| 2464 | /* Find page number from start of bitmap */ |
| 2465 | offset= (uint) (page - bitmap->page - 1); |
| 2466 | |
| 2467 | /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */ |
| 2468 | bit_start= offset * 3; |
| 2469 | bit_count= page_count * 3; |
| 2470 | |
| 2471 | byte_offset= bit_start/8; |
| 2472 | data= bitmap->map + byte_offset; |
| 2473 | offset= bit_start & 7; |
| 2474 | |
| 2475 | tmp= (255 << offset); /* Bits to keep */ |
| 2476 | if (bit_count + offset < 8) |
| 2477 | { |
| 2478 | /* Only clear bits between 'offset' and 'offset+bit_count-1' */ |
| 2479 | tmp^= (255 << (offset + bit_count)); |
| 2480 | } |
| 2481 | *data&= ~tmp; |
| 2482 | |
| 2483 | set_if_smaller(bitmap->full_head_size, byte_offset); |
| 2484 | set_if_smaller(bitmap->full_tail_size, byte_offset); |
| 2485 | |
| 2486 | if ((int) (bit_count-= (8 - offset)) > 0) |
| 2487 | { |
| 2488 | uint fill; |
| 2489 | data++; |
| 2490 | /* |
| 2491 | -1 is here to avoid one 'if' statement and to let the following code |
| 2492 | handle the last byte |
| 2493 | */ |
| 2494 | if ((fill= (bit_count - 1) / 8)) |
| 2495 | { |
| 2496 | bzero(data, fill); |
| 2497 | data+= fill; |
| 2498 | } |
| 2499 | bit_count-= fill * 8; /* Bits left to clear */ |
| 2500 | tmp= (1 << bit_count) - 1; |
| 2501 | *data&= ~tmp; |
| 2502 | } |
| 2503 | set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page); |
| 2504 | bitmap->changed= 1; |
| 2505 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 2506 | DBUG_RETURN(0); |
| 2507 | } |
| 2508 | |
| 2509 | |
| 2510 | /* |
| 2511 | Set all pages in a region as used |
| 2512 | |
| 2513 | SYNOPSIS |
| 2514 | _ma_bitmap_set_full_page_bits() |
| 2515 | info Maria handler |
| 2516 | bitmap Bitmap handler |
| 2517 | page Start page |
| 2518 | page_count Number of pages |
| 2519 | |
| 2520 | NOTES |
| 2521 | We assume that all pages in region is covered by same bitmap |
| 2522 | One must have a lock on info->s->bitmap.bitmap_lock |
| 2523 | |
| 2524 | RETURN |
| 2525 | 0 ok |
| 2526 | 1 Error (when reading bitmap) |
| 2527 | */ |
| 2528 | |
| 2529 | my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, |
| 2530 | MARIA_FILE_BITMAP *bitmap, |
| 2531 | pgcache_page_no_t page, uint page_count) |
| 2532 | { |
| 2533 | ulonglong bitmap_page; |
| 2534 | uint offset, bit_start, bit_count, tmp; |
| 2535 | uchar *data; |
| 2536 | DBUG_ENTER("_ma_bitmap_set_full_page_bits" ); |
| 2537 | DBUG_PRINT("enter" , ("page: %lu page_count: %u" , (ulong) page, page_count)); |
| 2538 | mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock); |
| 2539 | |
| 2540 | bitmap_page= page - page % bitmap->pages_covered; |
| 2541 | if (page == bitmap_page || |
| 2542 | page + page_count > bitmap_page + bitmap->pages_covered) |
| 2543 | { |
| 2544 | DBUG_ASSERT(0); /* Wrong in data */ |
| 2545 | DBUG_RETURN(1); |
| 2546 | } |
| 2547 | |
| 2548 | if (bitmap_page != bitmap->page && |
| 2549 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
| 2550 | DBUG_RETURN(1); |
| 2551 | |
| 2552 | /* Find page number from start of bitmap */ |
| 2553 | offset= (uint) (page - bitmap->page - 1); |
| 2554 | |
| 2555 | /* Set bits from 'page * 3' -> '(page + page_count) * 3' */ |
| 2556 | bit_start= offset * 3; |
| 2557 | bit_count= page_count * 3; |
| 2558 | |
| 2559 | data= bitmap->map + bit_start / 8; |
| 2560 | offset= bit_start & 7; |
| 2561 | |
| 2562 | tmp= (255 << offset); /* Bits to keep */ |
| 2563 | if (bit_count + offset < 8) |
| 2564 | { |
| 2565 | /* Only set bits between 'offset' and 'offset+bit_count-1' */ |
| 2566 | tmp^= (255 << (offset + bit_count)); |
| 2567 | } |
| 2568 | *data|= tmp; |
| 2569 | |
| 2570 | if ((int) (bit_count-= (8 - offset)) > 0) |
| 2571 | { |
| 2572 | uint fill; |
| 2573 | data++; |
| 2574 | /* |
| 2575 | -1 is here to avoid one 'if' statement and to let the following code |
| 2576 | handle the last byte |
| 2577 | */ |
| 2578 | if ((fill= (bit_count - 1) / 8)) |
| 2579 | { |
| 2580 | bfill(data, fill, 255); |
| 2581 | data+= fill; |
| 2582 | } |
| 2583 | bit_count-= fill * 8; /* Bits left to set */ |
| 2584 | tmp= (1 << bit_count) - 1; |
| 2585 | *data|= tmp; |
| 2586 | } |
| 2587 | set_if_bigger(bitmap->used_size, (uint) (data - bitmap->map) + 1); |
| 2588 | _ma_check_bitmap(bitmap); |
| 2589 | bitmap->changed= 1; |
| 2590 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
| 2591 | DBUG_RETURN(0); |
| 2592 | } |
| 2593 | |
| 2594 | |
| 2595 | /** |
| 2596 | @brief |
| 2597 | Make a transition of MARIA_FILE_BITMAP::non_flushable. |
| 2598 | If the bitmap becomes flushable, which requires that REDO-UNDO has been |
| 2599 | logged and all bitmap pages touched by the thread have a correct |
| 2600 | allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is |
| 2601 | waiting (in practice it is a checkpoint), it wakes it up. |
| 2602 | If the bitmap becomes or stays unflushable, the function merely records it |
| 2603 | unless a concurrent _ma_bitmap_flush_all() is happening, in which case the |
| 2604 | function first waits for the flush to be done. |
| 2605 | |
| 2606 | @note |
| 2607 | this sets info->non_flushable_state to 1 if we have incremented |
| 2608 | bitmap->non_flushable and not yet decremented it. |
| 2609 | |
| 2610 | @param share Table's share |
| 2611 | @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable |
| 2612 | (-1 or +1). |
| 2613 | */ |
| 2614 | |
| 2615 | void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) |
| 2616 | { |
| 2617 | MARIA_SHARE *share= info->s; |
| 2618 | MARIA_FILE_BITMAP *bitmap; |
| 2619 | DBUG_ENTER("_ma_bitmap_flushable" ); |
| 2620 | |
| 2621 | /* |
| 2622 | Not transactional tables are never automaticly flushed and needs no |
| 2623 | protection |
| 2624 | */ |
| 2625 | if (!share->now_transactional) |
| 2626 | DBUG_VOID_RETURN; |
| 2627 | |
| 2628 | bitmap= &share->bitmap; |
| 2629 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 2630 | |
| 2631 | if (non_flushable_inc == -1) |
| 2632 | { |
| 2633 | DBUG_ASSERT((int) bitmap->non_flushable > 0); |
| 2634 | DBUG_ASSERT(info->non_flushable_state == 1); |
| 2635 | if (--bitmap->non_flushable == 0) |
| 2636 | { |
| 2637 | /* |
| 2638 | We unlock and unpin pages locked and pinned by other threads. It does |
| 2639 | not seem to be an issue as all bitmap changes are serialized with |
| 2640 | the bitmap's mutex. |
| 2641 | */ |
| 2642 | _ma_bitmap_unpin_all(share); |
| 2643 | if (unlikely(bitmap->waiting_for_non_flushable)) |
| 2644 | { |
| 2645 | DBUG_PRINT("info" , ("bitmap flushable waking up flusher" )); |
| 2646 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
| 2647 | } |
| 2648 | } |
| 2649 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
| 2650 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2651 | info->non_flushable_state= 0; |
| 2652 | DBUG_VOID_RETURN; |
| 2653 | } |
| 2654 | DBUG_ASSERT(non_flushable_inc == 1); |
| 2655 | DBUG_ASSERT(info->non_flushable_state == 0); |
| 2656 | |
| 2657 | bitmap->waiting_for_flush_all_requested++; |
| 2658 | while (unlikely(bitmap->flush_all_requested)) |
| 2659 | { |
| 2660 | /* |
| 2661 | Some other thread is waiting for the bitmap to become |
| 2662 | flushable. Not the moment to make the bitmap unflushable or more |
| 2663 | unflushable; let's rather back off and wait. If we didn't do this, with |
| 2664 | multiple writers, there may always be one thread causing the bitmap to |
| 2665 | be unflushable and _ma_bitmap_flush_all() would wait for long. |
| 2666 | There should not be a deadlock because if our thread increased |
| 2667 | non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least |
| 2668 | our thread), it is not going to increase it more so is not going to come |
| 2669 | here. |
| 2670 | */ |
| 2671 | DBUG_PRINT("info" , ("waiting for bitmap flusher" )); |
| 2672 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
| 2673 | } |
| 2674 | bitmap->waiting_for_flush_all_requested--; |
| 2675 | bitmap->non_flushable++; |
| 2676 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
| 2677 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2678 | info->non_flushable_state= 1; |
| 2679 | DBUG_VOID_RETURN; |
| 2680 | } |
| 2681 | |
| 2682 | |
| 2683 | /* |
| 2684 | Correct bitmap pages to reflect the true allocation |
| 2685 | |
| 2686 | SYNOPSIS |
| 2687 | _ma_bitmap_release_unused() |
| 2688 | info Maria handle |
| 2689 | blocks Bitmap blocks |
| 2690 | |
| 2691 | IMPLEMENTATION |
| 2692 | If block->used & BLOCKUSED_TAIL is set: |
| 2693 | If block->used & BLOCKUSED_USED is set, then the bits for the |
| 2694 | corresponding page is set according to block->empty_space |
| 2695 | If block->used & BLOCKUSED_USED is not set, then the bits for |
| 2696 | the corresponding page is set to org_bitmap_value; |
| 2697 | |
| 2698 | If block->used & BLOCKUSED_TAIL is not set: |
| 2699 | if block->used is not set, the bits for the corresponding page are |
| 2700 | cleared |
| 2701 | |
| 2702 | For the first block (head block) the logic is same as for a tail block |
| 2703 | |
| 2704 | Note that we may have 'filler blocks' that are used to split a block |
| 2705 | in half; These can be recognized by that they have page_count == 0. |
| 2706 | |
| 2707 | This code also reverse the effect of ma_bitmap_flushable(.., 1); |
| 2708 | |
| 2709 | RETURN |
| 2710 | 0 ok |
| 2711 | 1 error (Couldn't write or read bitmap page) |
| 2712 | */ |
| 2713 | |
| 2714 | my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) |
| 2715 | { |
| 2716 | MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count; |
| 2717 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 2718 | uint bits, current_bitmap_value; |
| 2719 | DBUG_ENTER("_ma_bitmap_release_unused" ); |
| 2720 | |
| 2721 | /* |
| 2722 | We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full' |
| 2723 | when we allocated space in the page |
| 2724 | */ |
| 2725 | current_bitmap_value= FULL_HEAD_PAGE; |
| 2726 | |
| 2727 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 2728 | |
| 2729 | /* First handle head block */ |
| 2730 | if (block->used & BLOCKUSED_USED) |
| 2731 | { |
| 2732 | DBUG_PRINT("info" , ("head page: %lu empty_space: %u" , |
| 2733 | (ulong) block->page, block->empty_space)); |
| 2734 | bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space); |
| 2735 | if (block->used & BLOCKUSED_USE_ORG_BITMAP) |
| 2736 | current_bitmap_value= block->org_bitmap_value; |
| 2737 | } |
| 2738 | else |
| 2739 | bits= block->org_bitmap_value; |
| 2740 | if (bits != current_bitmap_value) |
| 2741 | { |
| 2742 | if (set_page_bits(info, bitmap, block->page, bits)) |
| 2743 | goto err; |
| 2744 | } |
| 2745 | else |
| 2746 | { |
| 2747 | DBUG_ASSERT(current_bitmap_value == |
| 2748 | bitmap_get_page_bits(info, bitmap, block->page)); |
| 2749 | } |
| 2750 | |
| 2751 | /* Handle all full pages and tail pages (for head page and blob) */ |
| 2752 | for (block++; block < end; block++) |
| 2753 | { |
| 2754 | uint page_count; |
| 2755 | if (!block->page_count) |
| 2756 | continue; /* Skip 'filler blocks' */ |
| 2757 | |
| 2758 | page_count= block->page_count; |
| 2759 | if (block->used & BLOCKUSED_TAIL) |
| 2760 | { |
| 2761 | current_bitmap_value= FULL_TAIL_PAGE; |
| 2762 | /* The bitmap page is only one page */ |
| 2763 | page_count= 1; |
| 2764 | if (block->used & BLOCKUSED_USED) |
| 2765 | { |
| 2766 | DBUG_PRINT("info" , ("tail page: %lu empty_space: %u" , |
| 2767 | (ulong) block->page, block->empty_space)); |
| 2768 | bits= free_size_to_tail_pattern(bitmap, block->empty_space); |
| 2769 | if (block->used & BLOCKUSED_USE_ORG_BITMAP) |
| 2770 | current_bitmap_value= block->org_bitmap_value; |
| 2771 | } |
| 2772 | else |
| 2773 | bits= block->org_bitmap_value; |
| 2774 | |
| 2775 | /* |
| 2776 | The page has all bits set; The following test is an optimization |
| 2777 | to not set the bits to the same value as before. |
| 2778 | */ |
| 2779 | DBUG_ASSERT(current_bitmap_value == |
| 2780 | bitmap_get_page_bits(info, bitmap, block->page)); |
| 2781 | |
| 2782 | if (bits != current_bitmap_value) |
| 2783 | { |
| 2784 | if (set_page_bits(info, bitmap, block->page, bits)) |
| 2785 | goto err; |
| 2786 | } |
| 2787 | } |
| 2788 | else if (!(block->used & BLOCKUSED_USED) && |
| 2789 | _ma_bitmap_reset_full_page_bits(info, bitmap, |
| 2790 | block->page, page_count)) |
| 2791 | goto err; |
| 2792 | } |
| 2793 | |
| 2794 | /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */ |
| 2795 | if (info->non_flushable_state) |
| 2796 | { |
| 2797 | DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0); |
| 2798 | info->non_flushable_state= 0; |
| 2799 | if (--bitmap->non_flushable == 0) |
| 2800 | { |
| 2801 | _ma_bitmap_unpin_all(info->s); |
| 2802 | if (unlikely(bitmap->waiting_for_non_flushable)) |
| 2803 | { |
| 2804 | DBUG_PRINT("info" , ("bitmap flushable waking up flusher" )); |
| 2805 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
| 2806 | } |
| 2807 | } |
| 2808 | } |
| 2809 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
| 2810 | |
| 2811 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2812 | DBUG_RETURN(0); |
| 2813 | |
| 2814 | err: |
| 2815 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2816 | DBUG_RETURN(1); |
| 2817 | } |
| 2818 | |
| 2819 | |
| 2820 | /* |
| 2821 | Free full pages from bitmap and pagecache |
| 2822 | |
| 2823 | SYNOPSIS |
| 2824 | _ma_bitmap_free_full_pages() |
| 2825 | info Maria handle |
| 2826 | extents Extents (as stored on disk) |
| 2827 | count Number of extents |
| 2828 | |
| 2829 | IMPLEMENTATION |
| 2830 | Mark all full pages (not tails) from extents as free, both in bitmap |
| 2831 | and page cache. |
| 2832 | |
| 2833 | RETURN |
| 2834 | 0 ok |
| 2835 | 1 error (Couldn't write or read bitmap page) |
| 2836 | */ |
| 2837 | |
| 2838 | my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents, |
| 2839 | uint count) |
| 2840 | { |
| 2841 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 2842 | my_bool res; |
| 2843 | DBUG_ENTER("_ma_bitmap_free_full_pages" ); |
| 2844 | |
| 2845 | for (; count--; extents+= ROW_EXTENT_SIZE) |
| 2846 | { |
| 2847 | pgcache_page_no_t page= uint5korr(extents); |
| 2848 | uint page_count= (uint2korr(extents + ROW_EXTENT_PAGE_SIZE) & |
| 2849 | ~START_EXTENT_BIT); |
| 2850 | if (!(page_count & TAIL_BIT)) |
| 2851 | { |
| 2852 | if (page == 0 && page_count == 0) |
| 2853 | continue; /* Not used extent */ |
| 2854 | if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page, |
| 2855 | page_count, PAGECACHE_LOCK_WRITE, 1)) |
| 2856 | DBUG_RETURN(1); |
| 2857 | mysql_mutex_lock(&bitmap->bitmap_lock); |
| 2858 | res= _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count); |
| 2859 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
| 2860 | if (res) |
| 2861 | DBUG_RETURN(1); |
| 2862 | } |
| 2863 | } |
| 2864 | DBUG_RETURN(0); |
| 2865 | } |
| 2866 | |
| 2867 | |
| 2868 | /* |
| 2869 | Mark in the bitmap how much free space there is on a page |
| 2870 | |
| 2871 | SYNOPSIS |
| 2872 | _ma_bitmap_set() |
| 2873 | info Maria handler |
| 2874 | page Adress to page |
| 2875 | head 1 if page is a head page, 0 if tail page |
| 2876 | empty_space How much empty space there is on page |
| 2877 | |
| 2878 | RETURN |
| 2879 | 0 ok |
| 2880 | 1 error |
| 2881 | */ |
| 2882 | |
| 2883 | my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head, |
| 2884 | uint empty_space) |
| 2885 | { |
| 2886 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
| 2887 | uint bits; |
| 2888 | my_bool res; |
| 2889 | DBUG_ENTER("_ma_bitmap_set" ); |
| 2890 | DBUG_PRINT("enter" , ("page: %lu head: %d empty_space: %u" , |
| 2891 | (ulong) page, head, empty_space)); |
| 2892 | |
| 2893 | mysql_mutex_lock(&info->s->bitmap.bitmap_lock); |
| 2894 | bits= (head ? |
| 2895 | _ma_free_size_to_head_pattern(bitmap, empty_space) : |
| 2896 | free_size_to_tail_pattern(bitmap, empty_space)); |
| 2897 | res= set_page_bits(info, bitmap, page, bits); |
| 2898 | mysql_mutex_unlock(&info->s->bitmap.bitmap_lock); |
| 2899 | DBUG_RETURN(res); |
| 2900 | } |
| 2901 | |
| 2902 | |
| 2903 | /* |
| 2904 | Check that bitmap pattern is correct for a page |
| 2905 | |
| 2906 | NOTES |
| 2907 | Used in maria_chk |
| 2908 | |
| 2909 | SYNOPSIS |
| 2910 | _ma_check_bitmap_data() |
| 2911 | info Maria handler |
| 2912 | page_type What kind of page this is |
| 2913 | page Adress to page |
| 2914 | empty_space Empty space on page |
| 2915 | bitmap_pattern Bitmap pattern for page (from bitmap) |
| 2916 | |
| 2917 | RETURN |
| 2918 | 0 ok |
| 2919 | 1 error |
| 2920 | */ |
| 2921 | |
| 2922 | my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type, |
| 2923 | uint empty_space, uint bitmap_pattern) |
| 2924 | { |
| 2925 | uint bits; |
| 2926 | switch (page_type) { |
| 2927 | case UNALLOCATED_PAGE: |
| 2928 | case MAX_PAGE_TYPE: |
| 2929 | bits= 0; |
| 2930 | break; |
| 2931 | case HEAD_PAGE: |
| 2932 | bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space); |
| 2933 | break; |
| 2934 | case TAIL_PAGE: |
| 2935 | bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space); |
| 2936 | break; |
| 2937 | case BLOB_PAGE: |
| 2938 | bits= FULL_TAIL_PAGE; |
| 2939 | break; |
| 2940 | default: |
| 2941 | bits= 0; /* to satisfy compiler */ |
| 2942 | DBUG_ASSERT(0); |
| 2943 | } |
| 2944 | return (bitmap_pattern != bits); |
| 2945 | } |
| 2946 | |
| 2947 | /** |
| 2948 | Check that bitmap looks correct |
| 2949 | |
| 2950 | - All data before full_head_size and full_tail_size are allocated |
| 2951 | - There is no allocated data after used_size |
| 2952 | All of the above need to be correct only according to 6 byte |
| 2953 | alignment as all loops reads 6 bytes at a time and we check both |
| 2954 | start and end position according to the current 6 byte position. |
| 2955 | */ |
| 2956 | |
| 2957 | #ifndef DBUG_OFF |
| 2958 | static void _ma_check_bitmap(MARIA_FILE_BITMAP *bitmap) |
| 2959 | { |
| 2960 | uchar *data= bitmap->map; |
| 2961 | uchar *end= bitmap->map + bitmap->total_size; |
| 2962 | uchar *full_head_end=0, *full_tail_end=0, *first_empty= bitmap->map; |
| 2963 | |
| 2964 | for (; data < end; data+= 6) |
| 2965 | { |
| 2966 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
| 2967 | uint i; |
| 2968 | |
| 2969 | if (bits == 04444444444444444LL || bits == 0xffffffffffffLL) |
| 2970 | { |
| 2971 | first_empty= data + 6; |
| 2972 | continue; /* block fully used */ |
| 2973 | } |
| 2974 | if (bits == 0) |
| 2975 | { |
| 2976 | if (!full_head_end) |
| 2977 | full_head_end= data; |
| 2978 | if (!full_tail_end) |
| 2979 | full_tail_end= data; |
| 2980 | continue; |
| 2981 | } |
| 2982 | |
| 2983 | first_empty= data + 6; |
| 2984 | if (!full_head_end || !full_tail_end) |
| 2985 | { |
| 2986 | for (i= 0, bits >>= 0; i < 16 ; i++, bits >>= 3) |
| 2987 | { |
| 2988 | uint pattern= (uint) (bits & 7); |
| 2989 | if (pattern == FULL_HEAD_PAGE || pattern == FULL_TAIL_PAGE) |
| 2990 | continue; |
| 2991 | |
| 2992 | if (pattern < 4 && !full_head_end) |
| 2993 | full_head_end= data; |
| 2994 | if ((pattern == 0 || (pattern > 4 && pattern < 7)) && !full_tail_end) |
| 2995 | full_tail_end= data; |
| 2996 | } |
| 2997 | } |
| 2998 | } |
| 2999 | if (!full_head_end) |
| 3000 | full_head_end= data; |
| 3001 | if (!full_tail_end) |
| 3002 | full_tail_end= data; |
| 3003 | |
| 3004 | /* used_size must point after the last byte that had some data) */ |
| 3005 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
| 3006 | DBUG_ASSERT((bitmap->map + (bitmap->used_size+5)/6*6) >= first_empty); |
| 3007 | /* full_xxxx_size can't point after the first block that has free data */ |
| 3008 | DBUG_ASSERT((bitmap->map + (bitmap->full_head_size/6*6)) <= full_head_end); |
| 3009 | DBUG_ASSERT((bitmap->map + (bitmap->full_tail_size/6*6)) <= full_tail_end); |
| 3010 | } |
| 3011 | #endif |
| 3012 | |
| 3013 | |
| 3014 | /* |
| 3015 | Check if the page type matches the one that we have in the bitmap |
| 3016 | |
| 3017 | SYNOPSIS |
| 3018 | _ma_check_if_right_bitmap_type() |
| 3019 | info Maria handler |
| 3020 | page_type What kind of page this is |
| 3021 | page Adress to page |
| 3022 | bitmap_pattern Store here the pattern that was in the bitmap for the |
| 3023 | page. This is always updated. |
| 3024 | |
| 3025 | NOTES |
| 3026 | Used in maria_chk |
| 3027 | |
| 3028 | RETURN |
| 3029 | 0 ok |
| 3030 | 1 error |
| 3031 | */ |
| 3032 | |
| 3033 | my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, |
| 3034 | enum en_page_type page_type, |
| 3035 | pgcache_page_no_t page, |
| 3036 | uint *bitmap_pattern) |
| 3037 | { |
| 3038 | if ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap, |
| 3039 | page)) > 7) |
| 3040 | return 1; /* Couldn't read page */ |
| 3041 | switch (page_type) { |
| 3042 | case HEAD_PAGE: |
| 3043 | return *bitmap_pattern < 1 || *bitmap_pattern > 4; |
| 3044 | case TAIL_PAGE: |
| 3045 | return *bitmap_pattern < 5; |
| 3046 | case BLOB_PAGE: |
| 3047 | return *bitmap_pattern != 7; |
| 3048 | default: |
| 3049 | break; |
| 3050 | } |
| 3051 | DBUG_ASSERT(0); |
| 3052 | return 1; |
| 3053 | } |
| 3054 | |
| 3055 | |
| 3056 | /** |
| 3057 | @brief create the first bitmap page of a freshly created data file |
| 3058 | |
| 3059 | @param share table's share |
| 3060 | |
| 3061 | @return Operation status |
| 3062 | @retval 0 OK |
| 3063 | @retval !=0 Error |
| 3064 | */ |
| 3065 | |
| 3066 | int _ma_bitmap_create_first(MARIA_SHARE *share) |
| 3067 | { |
| 3068 | uint block_size= share->bitmap.block_size; |
| 3069 | File file= share->bitmap.file.file; |
| 3070 | uchar marker[CRC_SIZE]; |
| 3071 | |
| 3072 | /* |
| 3073 | Next write operation of the page will write correct CRC |
| 3074 | if it is needed |
| 3075 | */ |
| 3076 | int4store(marker, MARIA_NO_CRC_BITMAP_PAGE); |
| 3077 | |
| 3078 | if (mysql_file_chsize(file, block_size - sizeof(marker), |
| 3079 | 0, MYF(MY_WME)) || |
| 3080 | my_pwrite(file, marker, sizeof(marker), |
| 3081 | block_size - sizeof(marker), |
| 3082 | MYF(MY_NABP | MY_WME))) |
| 3083 | return 1; |
| 3084 | share->state.state.data_file_length= block_size; |
| 3085 | _ma_bitmap_delete_all(share); |
| 3086 | return 0; |
| 3087 | } |
| 3088 | |
| 3089 | |
| 3090 | /** |
| 3091 | @brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a |
| 3092 | bitmap page needs to be flushed. |
| 3093 | |
| 3094 | @param page Page's content |
| 3095 | @param page_no Page's number (<offset>/<page length>) |
| 3096 | @param data_ptr Callback data pointer (pointer to MARIA_SHARE) |
| 3097 | |
| 3098 | @retval TRANSLOG_ADDRESS to flush up to. |
| 3099 | */ |
| 3100 | |
| 3101 | static my_bool |
| 3102 | flush_log_for_bitmap(PAGECACHE_IO_HOOK_ARGS *args __attribute__ ((unused))) |
| 3103 | { |
| 3104 | #ifdef DBUG_ASSERT_EXISTS |
| 3105 | const MARIA_SHARE *share= (MARIA_SHARE*)args->data; |
| 3106 | #endif |
| 3107 | DBUG_ENTER("flush_log_for_bitmap" ); |
| 3108 | DBUG_ASSERT(share->now_transactional); |
| 3109 | /* |
| 3110 | WAL imposes that UNDOs reach disk before bitmap is flushed. We don't know |
| 3111 | the LSN of the last UNDO about this bitmap page, so we flush whole log. |
| 3112 | */ |
| 3113 | DBUG_RETURN(translog_flush(translog_get_horizon())); |
| 3114 | } |
| 3115 | |
| 3116 | |
| 3117 | /** |
| 3118 | @brief Set callbacks for bitmap pages |
| 3119 | |
| 3120 | @note |
| 3121 | We don't use pagecache_file_init here, as we want to keep the |
| 3122 | code readable |
| 3123 | */ |
| 3124 | |
| 3125 | void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, |
| 3126 | MARIA_SHARE *share) |
| 3127 | { |
| 3128 | pagecache_file_set_null_hooks(file); |
| 3129 | file->callback_data= (uchar*) share; |
| 3130 | file->flush_log_callback= maria_flush_log_for_page_none; |
| 3131 | file->post_write_hook= maria_page_write_failure; |
| 3132 | |
| 3133 | if (share->temporary) |
| 3134 | { |
| 3135 | file->post_read_hook= &maria_page_crc_check_none; |
| 3136 | file->pre_write_hook= &maria_page_filler_set_none; |
| 3137 | } |
| 3138 | else |
| 3139 | { |
| 3140 | file->post_read_hook= &maria_page_crc_check_bitmap; |
| 3141 | if (share->options & HA_OPTION_PAGE_CHECKSUM) |
| 3142 | file->pre_write_hook= &maria_page_crc_set_normal; |
| 3143 | else |
| 3144 | file->pre_write_hook= &maria_page_filler_set_bitmap; |
| 3145 | if (share->now_transactional) |
| 3146 | file->flush_log_callback= flush_log_for_bitmap; |
| 3147 | } |
| 3148 | } |
| 3149 | |
| 3150 | |
| 3151 | /** |
| 3152 | Extends data file with zeroes and creates new bitmap pages into page cache. |
| 3153 | |
| 3154 | Writes all bitmap pages in [from, to]. |
| 3155 | |
| 3156 | Non-bitmap pages of zeroes are correct as they are marked empty in |
| 3157 | bitmaps. Bitmap pages will not be zeroes: they will get their CRC fixed when |
| 3158 | flushed. And if there is a crash before flush (so they are zeroes at |
| 3159 | restart), a REDO will re-create them in page cache. |
| 3160 | */ |
| 3161 | |
| 3162 | static my_bool |
| 3163 | _ma_bitmap_create_missing_into_pagecache(MARIA_SHARE *share, |
| 3164 | MARIA_FILE_BITMAP *bitmap, |
| 3165 | pgcache_page_no_t from, |
| 3166 | pgcache_page_no_t to, |
| 3167 | uchar *zeroes) |
| 3168 | { |
| 3169 | pgcache_page_no_t i; |
| 3170 | /* |
| 3171 | We do not use my_chsize() because there can be a race between when it |
| 3172 | reads the physical size and when it writes (assume data_file_length is 10, |
| 3173 | physical length is 8 and two data pages are in cache, and here we do a |
| 3174 | my_chsize: my_chsize sees physical length is 8, then the two data pages go |
| 3175 | to disk then my_chsize writes from page 8 and so overwrites the two data |
| 3176 | pages, wrongly). |
| 3177 | We instead rely on the filesystem filling gaps with zeroes. |
| 3178 | */ |
| 3179 | for (i= from; i <= to; i+= bitmap->pages_covered) |
| 3180 | { |
| 3181 | /** |
| 3182 | No need to keep them pinned, they are new so flushable. |
| 3183 | @todo but we may want to keep them pinned, as an optimization: if they |
| 3184 | are not pinned they may go to disk before the data pages go (so, the |
| 3185 | physical pages would be in non-ascending "sparse" order on disk), or the |
| 3186 | filesystem may fill gaps with zeroes physically which is a waste of |
| 3187 | time. |
| 3188 | */ |
| 3189 | if (pagecache_write(share->pagecache, |
| 3190 | &bitmap->file, i, 0, |
| 3191 | zeroes, PAGECACHE_PLAIN_PAGE, |
| 3192 | PAGECACHE_LOCK_LEFT_UNLOCKED, |
| 3193 | PAGECACHE_PIN_LEFT_UNPINNED, |
| 3194 | PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE)) |
| 3195 | goto err; |
| 3196 | } |
| 3197 | /* |
| 3198 | Data pages after data_file_length are full of zeroes but that is allowed |
| 3199 | as they are marked empty in the bitmap. |
| 3200 | */ |
| 3201 | return FALSE; |
| 3202 | err: |
| 3203 | return TRUE; |
| 3204 | } |
| 3205 | |
| 3206 | |
| 3207 | /** |
| 3208 | Creates missing bitmaps when we extend the data file. |
| 3209 | |
| 3210 | At run-time, when we need a new bitmap page we come here; and only one bitmap |
| 3211 | page at a time is created. |
| 3212 | |
| 3213 | In some recovery cases we insert at a large offset in the data file, way |
| 3214 | beyond state.data_file_length, so can need to create more than one bitmap |
| 3215 | page in one go. Known case is: |
| 3216 | Start a transaction in Maria; |
| 3217 | delete last row of very large table (with delete_row) |
| 3218 | do a bulk insert |
| 3219 | crash |
| 3220 | Then UNDO_BULK_INSERT will truncate table files, and |
| 3221 | UNDO_ROW_DELETE will want to put the row back to its original position, |
| 3222 | extending the data file a lot: bitmap page*s* in the hole must be created, |
| 3223 | or he table would look corrupted. |
| 3224 | |
| 3225 | We need to log REDOs for bitmap creation, consider: we apply a REDO for a |
| 3226 | data page, which creates the first data page covered by a new bitmap |
| 3227 | not yet created. If the data page is flushed but the bitmap page is not and |
| 3228 | there is a crash, re-execution of the REDO will complain about the zeroed |
| 3229 | bitmap page (see it as corruption). Thus a REDO is needed to re-create the |
| 3230 | bitmap. |
| 3231 | |
| 3232 | @param info Maria handler |
| 3233 | @param bitmap Bitmap handler |
| 3234 | @param page Last bitmap page to create |
| 3235 | |
| 3236 | @note When this function is called this must be true: |
| 3237 | ((page + 1) * bitmap->block_size > info->s->state.state.data_file_length) |
| 3238 | |
| 3239 | */ |
| 3240 | |
| 3241 | static my_bool _ma_bitmap_create_missing(MARIA_HA *info, |
| 3242 | MARIA_FILE_BITMAP *bitmap, |
| 3243 | pgcache_page_no_t page) |
| 3244 | { |
| 3245 | MARIA_SHARE *share= info->s; |
| 3246 | uint block_size= bitmap->block_size; |
| 3247 | pgcache_page_no_t from, to; |
| 3248 | my_off_t data_file_length= share->state.state.data_file_length; |
| 3249 | DBUG_ENTER("_ma_bitmap_create_missing" ); |
| 3250 | DBUG_PRINT("enter" , ("page: %lld" , (longlong) page)); |
| 3251 | |
| 3252 | /* First (in offset order) bitmap page to create */ |
| 3253 | if (data_file_length < block_size) |
| 3254 | goto err; /* corrupted, should have first bitmap page */ |
| 3255 | if (page * block_size >= share->base.max_data_file_length) |
| 3256 | { |
| 3257 | my_errno= HA_ERR_RECORD_FILE_FULL; |
| 3258 | goto err; |
| 3259 | } |
| 3260 | |
| 3261 | from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1; |
| 3262 | from*= bitmap->pages_covered; |
| 3263 | /* |
| 3264 | page>=from because: |
| 3265 | (page + 1) * bs > dfl, and page == k * pc so: |
| 3266 | (k * pc + 1) * bs > dfl; k * pc + 1 > dfl / bs; k * pc > dfl / bs - 1 |
| 3267 | k > (dfl / bs - 1) / pc; k >= (dfl / bs - 1) / pc + 1 |
| 3268 | k * pc >= ((dfl / bs - 1) / pc + 1) * pc == from. |
| 3269 | */ |
| 3270 | DBUG_ASSERT(page >= from); |
| 3271 | |
| 3272 | if (share->now_transactional) |
| 3273 | { |
| 3274 | LSN lsn; |
| 3275 | uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2]; |
| 3276 | LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; |
| 3277 | page_store(log_data + FILEID_STORE_SIZE, from); |
| 3278 | page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page); |
| 3279 | log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; |
| 3280 | log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); |
| 3281 | /* |
| 3282 | We don't use info->trn so that this REDO is always executed even though |
| 3283 | the UNDO does not reach disk due to crash. This is also consistent with |
| 3284 | the fact that the new bitmap pages are not pinned. |
| 3285 | */ |
| 3286 | if (translog_write_record(&lsn, LOGREC_REDO_BITMAP_NEW_PAGE, |
| 3287 | &dummy_transaction_object, info, |
| 3288 | (translog_size_t)sizeof(log_data), |
| 3289 | TRANSLOG_INTERNAL_PARTS + 1, log_array, |
| 3290 | log_data, NULL)) |
| 3291 | goto err; |
| 3292 | /* |
| 3293 | No need to flush the log: the bitmap pages we are going to create will |
| 3294 | flush it when they go to disk. |
| 3295 | */ |
| 3296 | } |
| 3297 | |
| 3298 | /* |
| 3299 | Last bitmap page. It has special creation: will go to the page cache |
| 3300 | only later as we are going to modify it very soon. |
| 3301 | */ |
| 3302 | bzero(bitmap->map, bitmap->block_size); |
| 3303 | bitmap->used_size= bitmap->full_head_size= bitmap->full_tail_size= 0; |
| 3304 | bitmap->changed=1; |
| 3305 | #ifndef DBUG_OFF |
| 3306 | /* |
| 3307 | Make a copy of the page to be able to print out bitmap changes during |
| 3308 | debugging |
| 3309 | */ |
| 3310 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
| 3311 | #endif |
| 3312 | |
| 3313 | /* Last bitmap page to create before 'page' */ |
| 3314 | DBUG_ASSERT(page >= bitmap->pages_covered); |
| 3315 | to= page - bitmap->pages_covered; |
| 3316 | /* |
| 3317 | In run-time situations, from>=to is always false, i.e. we always create |
| 3318 | one bitmap at a time ('page'). |
| 3319 | */ |
| 3320 | if ((from <= to) && |
| 3321 | _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to, |
| 3322 | bitmap->map)) |
| 3323 | goto err; |
| 3324 | |
| 3325 | share->state.state.data_file_length= (page + 1) * bitmap->block_size; |
| 3326 | |
| 3327 | DBUG_RETURN(FALSE); |
| 3328 | err: |
| 3329 | DBUG_RETURN(TRUE); |
| 3330 | } |
| 3331 | |
| 3332 | |
| 3333 | my_bool _ma_apply_redo_bitmap_new_page(MARIA_HA *info, |
| 3334 | LSN lsn __attribute__ ((unused)), |
| 3335 | const uchar *) |
| 3336 | { |
| 3337 | MARIA_SHARE *share= info->s; |
| 3338 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
| 3339 | my_bool error; |
| 3340 | pgcache_page_no_t from, to, min_from; |
| 3341 | DBUG_ENTER("_ma_apply_redo_bitmap_new_page" ); |
| 3342 | |
| 3343 | from= page_korr(header); |
| 3344 | to= page_korr(header + PAGE_STORE_SIZE); |
| 3345 | DBUG_PRINT("info" , ("from: %lu to: %lu" , (ulong)from, (ulong)to)); |
| 3346 | if ((from > to) || |
| 3347 | (from % bitmap->pages_covered) != 0 || |
| 3348 | (to % bitmap->pages_covered) != 0) |
| 3349 | { |
| 3350 | error= TRUE; /* corrupted log record */ |
| 3351 | goto err; |
| 3352 | } |
| 3353 | |
| 3354 | min_from= (share->state.state.data_file_length / bitmap->block_size - 1) / |
| 3355 | bitmap->pages_covered + 1; |
| 3356 | min_from*= bitmap->pages_covered; |
| 3357 | if (from < min_from) |
| 3358 | { |
| 3359 | DBUG_PRINT("info" , ("overwrite bitmap pages from %lu" , (ulong)min_from)); |
| 3360 | /* |
| 3361 | We have to overwrite. It could be that there was a bitmap page in |
| 3362 | memory, covering a data page which went to disk, then crash: the |
| 3363 | bitmap page is now full of zeros and is ==min_from, we have to overwrite |
| 3364 | it with correct checksum. |
| 3365 | */ |
| 3366 | } |
| 3367 | share->state.changed|= STATE_CHANGED; |
| 3368 | bzero(info->buff, bitmap->block_size); |
| 3369 | if (!(error= |
| 3370 | _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to, |
| 3371 | info->buff))) |
| 3372 | share->state.state.data_file_length= (to + 1) * bitmap->block_size; |
| 3373 | |
| 3374 | err: |
| 3375 | DBUG_RETURN(error); |
| 3376 | } |
| 3377 | |