1 | /* Copyright (C) 2007 Michael Widenius |
2 | Copyright (c) 2010, 2013, Monty Program Ab. |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
16 | |
17 | /* |
18 | Bitmap handling (for records in block) |
19 | |
20 | The data file starts with a bitmap page, followed by as many data |
21 | pages as the bitmap can cover. After this there is a new bitmap page |
22 | and more data pages etc. |
23 | |
24 | The bitmap code assumes there is always an active bitmap page and thus |
25 | that there is at least one bitmap page in the file |
26 | |
27 | Structure of bitmap page: |
28 | |
29 | Fixed size records (to be implemented later): |
30 | |
31 | 2 bits are used to indicate: |
32 | |
33 | 0 Empty |
34 | 1 0-75 % full (at least room for 2 records) |
35 | 2 75-100 % full (at least room for one record) |
36 | 3 100 % full (no more room for records) |
37 | |
38 | Assuming 8K pages, this will allow us to map: |
39 | 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M |
40 | |
41 | (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN) |
42 | |
43 | Note that for fixed size rows, we can't add more columns without doing |
44 | a full reorganization of the table. The user can always force a dynamic |
45 | size row format by specifying ROW_FORMAT=dynamic. |
46 | |
47 | |
48 | Dynamic size records: |
49 | |
50 | 3 bits are used to indicate Bytes free in 8K page |
51 | |
52 | 0 Empty page 8176 (head or tail) |
53 | 1 0-30 % full (at least room for 3 records) 5724 |
54 | 2 30-60 % full (at least room for 2 records) 3271 |
55 | 3 60-90 % full (at least room for one record) 818 |
56 | 4 100 % full (no more room for records) 0 |
57 | 5 Tail page, 0-40 % full 4906 |
58 | 6 Tail page, 40-80 % full 1636 |
59 | 7 Full tail page or full blob page 0 |
60 | |
61 | Assuming 8K pages, this will allow us to map: |
62 | 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M |
63 | |
64 | Note that values 1-3 may be adjust for each individual table based on |
65 | 'min record length'. Tail pages are for overflow data which can be of |
66 | any size and thus doesn't have to be adjusted for different tables. |
67 | If we add more columns to the table, some of the originally calculated |
68 | 'cut off' points may not be optimal, but they shouldn't be 'drasticly |
69 | wrong'. |
70 | |
71 | When allocating data from the bitmap, we are trying to do it in a |
72 | 'best fit' manner. Blobs and varchar blocks are given out in large |
73 | continuous extents to allow fast access to these. Before allowing a |
74 | row to 'flow over' to other blocks, we will compact the page and use |
75 | all space on it. If there is many rows in the page, we will ensure |
76 | there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other |
77 | rows to grow. |
78 | |
79 | The bitmap format allows us to extend the row file in big chunks, if needed. |
80 | |
81 | When calculating the size for a packed row, we will calculate the following |
82 | things separately: |
83 | - Row header + null_bits + empty_bits fixed size segments etc. |
84 | - Size of all char/varchar fields |
85 | - Size of each blob field |
86 | |
87 | The bitmap handler will get all the above information and return |
88 | either one page or a set of pages to put the different parts. |
89 | |
90 | Bitmaps are read on demand in response to insert/delete/update operations. |
91 | The following bitmap pointers will be cached and stored on disk on close: |
92 | - Current insert_bitmap; When inserting new data we will first try to |
93 | fill this one. |
94 | - First bitmap which is not completely full. This is updated when we |
95 | free data with an update or delete. |
96 | |
97 | While flushing out bitmaps, we will cache the status of the bitmap in memory |
98 | to avoid having to read a bitmap for insert of new data that will not |
99 | be of any use |
100 | - Total empty space |
101 | - Largest number of continuous pages |
102 | |
103 | Bitmap ONLY goes to disk in the following scenarios |
104 | - The file is closed (and we flush all changes to disk) |
105 | - On checkpoint |
106 | (Ie: When we do a checkpoint, we have to ensure that all bitmaps are |
107 | put on disk even if they are not in the page cache). |
108 | - When explicitly requested (for example on backup or after recovery, |
109 | to simplify things) |
110 | |
111 | The flow of writing a row is that: |
112 | - Mark the bitmap not flushable (_ma_bitmap_flushable(X, 1)) |
113 | - Lock the bitmap |
114 | - Decide which data pages we will write to |
115 | - Mark them full in the bitmap page so that other threads do not try to |
116 | use the same data pages as us |
117 | - We unlock the bitmap |
118 | - Write the data pages |
119 | - Lock the bitmap |
120 | - Correct the bitmap page with the true final occupation of the data |
121 | pages (that is, we marked pages full but when we are done we realize |
122 | we didn't fill them) |
123 | - Unlock the bitmap. |
124 | - Mark the bitmap flushable (_ma_bitmap_flushable(X, -1)) |
125 | */ |
126 | |
127 | #include "maria_def.h" |
128 | #include "ma_blockrec.h" |
129 | |
130 | #define FULL_HEAD_PAGE 4 |
131 | #define FULL_TAIL_PAGE 7 |
132 | |
133 | const char *bits_to_txt[]= |
134 | { |
135 | "empty" , "00-30% full" , "30-60% full" , "60-90% full" , "full" , |
136 | "tail 00-40 % full" , "tail 40-80 % full" , "tail/blob full" |
137 | }; |
138 | |
139 | #define WRONG_BITMAP_FLUSH 0 /*define to 1 only for provoking bugs*/ |
140 | |
141 | static my_bool _ma_read_bitmap_page(MARIA_HA *info, |
142 | MARIA_FILE_BITMAP *bitmap, |
143 | pgcache_page_no_t page); |
144 | static my_bool _ma_bitmap_create_missing(MARIA_HA *info, |
145 | MARIA_FILE_BITMAP *bitmap, |
146 | pgcache_page_no_t page); |
147 | static void _ma_bitmap_unpin_all(MARIA_SHARE *share); |
148 | #ifndef DBUG_OFF |
149 | static void _ma_check_bitmap(MARIA_FILE_BITMAP *bitmap); |
150 | #else |
151 | #define _ma_check_bitmap(A) do { } while(0) |
152 | #endif |
153 | |
154 | |
155 | /* Write bitmap page to key cache */ |
156 | |
157 | static inline my_bool write_changed_bitmap(MARIA_SHARE *share, |
158 | MARIA_FILE_BITMAP *bitmap) |
159 | { |
160 | my_bool res; |
161 | DBUG_ENTER("write_changed_bitmap" ); |
162 | DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); |
163 | DBUG_ASSERT(bitmap->file.pre_write_hook != 0); |
164 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
165 | |
166 | /* |
167 | Mark that a bitmap page has been written to page cache and we have |
168 | to flush it during checkpoint. |
169 | */ |
170 | bitmap->changed_not_flushed= 1; |
171 | |
172 | if ((bitmap->non_flushable == 0) || WRONG_BITMAP_FLUSH) |
173 | { |
174 | res= pagecache_write(share->pagecache, |
175 | &bitmap->file, bitmap->page, 0, |
176 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
177 | PAGECACHE_LOCK_LEFT_UNLOCKED, |
178 | PAGECACHE_PIN_LEFT_UNPINNED, |
179 | PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE); |
180 | DBUG_ASSERT(!res); |
181 | DBUG_RETURN(res); |
182 | } |
183 | else |
184 | { |
185 | /* |
186 | bitmap->non_flushable means that someone has changed the bitmap, |
187 | but it's not yet complete so it can't yet be written to disk. |
188 | In this case we write the changed bitmap to the disk cache, |
189 | but keep it pinned until the change is completed. The page will |
190 | be unpinned later by _ma_bitmap_unpin_all() as soon as non_flushable |
191 | is set back to 0. |
192 | */ |
193 | MARIA_PINNED_PAGE page_link; |
194 | DBUG_PRINT("info" , ("Writing pinned bitmap page" )); |
195 | res= pagecache_write(share->pagecache, |
196 | &bitmap->file, bitmap->page, 0, |
197 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
198 | PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN, |
199 | PAGECACHE_WRITE_DELAY, &page_link.link, |
200 | LSN_IMPOSSIBLE); |
201 | page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED; |
202 | page_link.changed= 1; |
203 | push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link); |
204 | DBUG_ASSERT(!res); |
205 | DBUG_RETURN(res); |
206 | } |
207 | } |
208 | |
209 | /* |
210 | Initialize bitmap variables in share |
211 | |
212 | SYNOPSIS |
213 | _ma_bitmap_init() |
214 | share Share handler |
215 | file Data file handler |
216 | last_page Pointer to last page (max_file_size) that needs to be |
217 | mapped by the bitmap. This is adjusted to bitmap |
218 | alignment. |
219 | |
220 | NOTES |
221 | This is called the first time a file is opened. |
222 | |
223 | RETURN |
224 | 0 ok |
225 | 1 error |
226 | */ |
227 | |
228 | my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, |
229 | pgcache_page_no_t *last_page) |
230 | { |
231 | uint aligned_bit_blocks; |
232 | uint max_page_size; |
233 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
234 | uint size= share->block_size; |
235 | pgcache_page_no_t first_bitmap_with_space; |
236 | #ifndef DBUG_OFF |
237 | /* We want to have a copy of the bitmap to be able to print differences */ |
238 | size*= 2; |
239 | #endif |
240 | |
241 | if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) || |
242 | my_init_dynamic_array(&bitmap->pinned_pages, |
243 | sizeof(MARIA_PINNED_PAGE), 1, 1, MYF(0))) |
244 | return 1; |
245 | |
246 | bitmap->share= share; |
247 | bitmap->block_size= share->block_size; |
248 | bitmap->file.file= file; |
249 | _ma_bitmap_set_pagecache_callbacks(&bitmap->file, share); |
250 | |
251 | /* Size needs to be aligned on 6 */ |
252 | aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6; |
253 | bitmap->max_total_size= bitmap->total_size= aligned_bit_blocks * 6; |
254 | /* |
255 | In each 6 bytes, we have 6*8/3 = 16 pages covered |
256 | The +1 is to add the bitmap page, as this doesn't have to be covered |
257 | */ |
258 | bitmap->pages_covered= aligned_bit_blocks * 16 + 1; |
259 | bitmap->flush_all_requested= bitmap->waiting_for_flush_all_requested= |
260 | bitmap->waiting_for_non_flushable= 0; |
261 | bitmap->non_flushable= 0; |
262 | |
263 | /* Update size for bits */ |
264 | /* TODO; Make this dependent of the row size */ |
265 | max_page_size= share->block_size - PAGE_OVERHEAD_SIZE(share) + DIR_ENTRY_SIZE; |
266 | bitmap->sizes[0]= max_page_size; /* Empty page */ |
267 | bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; |
268 | bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; |
269 | bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100; |
270 | bitmap->sizes[4]= 0; /* Full page */ |
271 | bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100; |
272 | bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100; |
273 | bitmap->sizes[7]= 0; |
274 | |
275 | /* |
276 | If a record size will fit into the smallest empty page, return first |
277 | found page in find_head() |
278 | */ |
279 | if (bitmap->sizes[3] >= share->base.max_pack_length) |
280 | bitmap->return_first_match= 1; |
281 | |
282 | mysql_mutex_init(key_SHARE_BITMAP_lock, |
283 | &share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW); |
284 | mysql_cond_init(key_SHARE_BITMAP_cond, |
285 | &share->bitmap.bitmap_cond, 0); |
286 | |
287 | first_bitmap_with_space= share->state.first_bitmap_with_space; |
288 | _ma_bitmap_reset_cache(share); |
289 | |
290 | /* |
291 | The bitmap used to map the file are aligned on 6 bytes. We now |
292 | calculate the max file size that can be used by the bitmap. This |
293 | is needed to get ma_info() give a true file size so that the user can |
294 | estimate if there is still space free for records in the file. |
295 | */ |
296 | { |
297 | pgcache_page_no_t last_bitmap_page; |
298 | ulong blocks, bytes; |
299 | |
300 | last_bitmap_page= *last_page - *last_page % bitmap->pages_covered; |
301 | blocks= (ulong) (*last_page - last_bitmap_page); |
302 | bytes= (blocks * 3) / 8; /* 3 bit per page / 8 bits per byte */ |
303 | /* Size needs to be aligned on 6 */ |
304 | bytes/= 6; |
305 | bytes*= 6; |
306 | bitmap->last_bitmap_page= last_bitmap_page; |
307 | bitmap->last_total_size= (uint)bytes; |
308 | *last_page= ((last_bitmap_page + bytes*8/3)); |
309 | } |
310 | |
311 | /* Restore first_bitmap_with_space if it's resonable */ |
312 | if (first_bitmap_with_space <= (share->state.state.data_file_length / |
313 | share->block_size)) |
314 | share->state.first_bitmap_with_space= first_bitmap_with_space; |
315 | |
316 | return 0; |
317 | } |
318 | |
319 | |
320 | /* |
321 | Free data allocated by _ma_bitmap_init |
322 | |
323 | SYNOPSIS |
324 | _ma_bitmap_end() |
325 | share Share handler |
326 | */ |
327 | |
328 | my_bool _ma_bitmap_end(MARIA_SHARE *share) |
329 | { |
330 | my_bool res; |
331 | |
332 | #ifndef DBUG_OFF |
333 | if (! share->internal_table) |
334 | mysql_mutex_assert_owner(&share->close_lock); |
335 | #endif |
336 | DBUG_ASSERT(share->bitmap.non_flushable == 0); |
337 | DBUG_ASSERT(share->bitmap.flush_all_requested == 0); |
338 | DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 && |
339 | share->bitmap.waiting_for_flush_all_requested == 0); |
340 | DBUG_ASSERT(share->bitmap.pinned_pages.elements == 0); |
341 | |
342 | res= _ma_bitmap_flush(share); |
343 | mysql_mutex_destroy(&share->bitmap.bitmap_lock); |
344 | mysql_cond_destroy(&share->bitmap.bitmap_cond); |
345 | delete_dynamic(&share->bitmap.pinned_pages); |
346 | my_free(share->bitmap.map); |
347 | share->bitmap.map= 0; |
348 | /* |
349 | This is to not get an assert in checkpoint. The bitmap will be flushed |
350 | at once by _ma_once_end_block_record() as part of the normal flush |
351 | of the kfile. |
352 | */ |
353 | share->bitmap.changed_not_flushed= 0; |
354 | return res; |
355 | } |
356 | |
357 | /* |
358 | Ensure that we have incremented open count before we try to read/write |
359 | a page while we have the bitmap lock. |
360 | This is needed to ensure that we don't call _ma_mark_file_changed() as |
361 | part of flushing a page to disk, as this locks share->internal_lock |
362 | and then mutex lock would happen in the wrong order. |
363 | */ |
364 | |
365 | static inline void _ma_bitmap_mark_file_changed(MARIA_SHARE *share, |
366 | my_bool flush_translog) |
367 | { |
368 | /* |
369 | It's extremely unlikely that the following test is true as it |
370 | only happens once if the table has changed. |
371 | */ |
372 | if (unlikely(!share->global_changed && |
373 | (share->state.changed & STATE_CHANGED))) |
374 | { |
375 | /* purecov: begin inspected */ |
376 | /* unlock mutex as it can't be hold during _ma_mark_file_changed() */ |
377 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
378 | |
379 | /* |
380 | We have to flush the translog to ensure we have registered that the |
381 | table is open. |
382 | */ |
383 | if (flush_translog && share->now_transactional) |
384 | (void) translog_flush(share->state.logrec_file_id); |
385 | |
386 | _ma_mark_file_changed_now(share); |
387 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
388 | /* purecov: end */ |
389 | } |
390 | } |
391 | |
392 | /* |
393 | Send updated bitmap to the page cache |
394 | |
395 | SYNOPSIS |
396 | _ma_bitmap_flush() |
397 | share Share handler |
398 | |
399 | NOTES |
400 | In the future, _ma_bitmap_flush() will be called to flush changes don't |
401 | by this thread (ie, checking the changed flag is ok). The reason we |
402 | check it again in the mutex is that if someone else did a flush at the |
403 | same time, we don't have to do the write. |
404 | This is also ok for _ma_scan_init_block_record() which does not want to |
405 | miss rows: it cares only for committed rows, that is, rows for which there |
406 | was a commit before our transaction started; as commit and transaction's |
407 | start are protected by the same LOCK_trn_list mutex, we see memory at |
408 | least as new as at other transaction's commit time, so if the committed |
409 | rows caused bitmap->changed to be true, we see it; if we see 0 it really |
410 | means a flush happened since then. So, it's ok to read without bitmap's |
411 | mutex. |
412 | |
413 | RETURN |
414 | 0 ok |
415 | 1 error |
416 | */ |
417 | |
418 | my_bool _ma_bitmap_flush(MARIA_SHARE *share) |
419 | { |
420 | my_bool res= 0; |
421 | DBUG_ENTER("_ma_bitmap_flush" ); |
422 | if (share->bitmap.changed) |
423 | { |
424 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
425 | if (share->bitmap.changed) |
426 | { |
427 | /* |
428 | We have to mark the file changed here, as otherwise the following |
429 | write to pagecache may force a page out from this file, which would |
430 | cause _ma_mark_file_changed() to be called with bitmaplock hold! |
431 | */ |
432 | _ma_bitmap_mark_file_changed(share, 1); |
433 | res= write_changed_bitmap(share, &share->bitmap); |
434 | share->bitmap.changed= 0; |
435 | } |
436 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
437 | } |
438 | DBUG_RETURN(res); |
439 | } |
440 | |
441 | |
442 | /** |
443 | Dirty-page filtering criteria for bitmap pages |
444 | |
445 | @param type Page's type |
446 | @param pageno Page's number |
447 | @param rec_lsn Page's rec_lsn |
448 | @param arg pages_covered of bitmap |
449 | */ |
450 | |
451 | static enum pagecache_flush_filter_result |
452 | filter_flush_bitmap_pages(enum pagecache_page_type type |
453 | __attribute__ ((unused)), |
454 | pgcache_page_no_t pageno, |
455 | LSN rec_lsn __attribute__ ((unused)), |
456 | void *arg) |
457 | { |
458 | return ((pageno % (*(ulong*)arg)) == 0); |
459 | } |
460 | |
461 | |
462 | /** |
463 | Flushes current bitmap page to the pagecache, and then all bitmap pages |
464 | from pagecache to the file. Used by Checkpoint. |
465 | |
466 | @param share Table's share |
467 | */ |
468 | |
469 | my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) |
470 | { |
471 | my_bool res= 0; |
472 | uint send_signal= 0; |
473 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
474 | DBUG_ENTER("_ma_bitmap_flush_all" ); |
475 | |
476 | #ifdef EXTRA_DEBUG_BITMAP |
477 | { |
478 | char buff[160]; |
479 | uint len= my_sprintf(buff, |
480 | (buff, "bitmap_flush: fd: %d id: %u " |
481 | "changed: %d changed_not_flushed: %d " |
482 | "flush_all_requested: %d" , |
483 | share->bitmap.file.file, |
484 | share->id, |
485 | bitmap->changed, |
486 | bitmap->changed_not_flushed, |
487 | bitmap->flush_all_requested)); |
488 | (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, |
489 | (uchar*) buff, len); |
490 | } |
491 | #endif |
492 | |
493 | mysql_mutex_lock(&bitmap->bitmap_lock); |
494 | if (!bitmap->changed && !bitmap->changed_not_flushed) |
495 | { |
496 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
497 | DBUG_RETURN(0); |
498 | } |
499 | |
500 | _ma_bitmap_mark_file_changed(share, 0); |
501 | |
502 | /* |
503 | The following should be true as it was tested above. We have to test |
504 | this again as _ma_bitmap_mark_file_changed() did temporarly release |
505 | the bitmap mutex. |
506 | */ |
507 | if (bitmap->changed || bitmap->changed_not_flushed) |
508 | { |
509 | bitmap->flush_all_requested++; |
510 | bitmap->waiting_for_non_flushable++; |
511 | #if !WRONG_BITMAP_FLUSH |
512 | while (bitmap->non_flushable > 0) |
513 | { |
514 | DBUG_PRINT("info" , ("waiting for bitmap to be flushable" )); |
515 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
516 | } |
517 | #endif |
518 | bitmap->waiting_for_non_flushable--; |
519 | #ifdef EXTRA_DEBUG_BITMAP |
520 | { |
521 | char tmp[MAX_BITMAP_INFO_LENGTH]; |
522 | _ma_get_bitmap_description(bitmap, bitmap->map, bitmap->page, tmp); |
523 | (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, |
524 | (uchar*) tmp, strlen(tmp)); |
525 | } |
526 | #endif |
527 | |
528 | DBUG_ASSERT(bitmap->flush_all_requested == 1); |
529 | /* |
530 | Bitmap is in a flushable state: its contents in memory are reflected by |
531 | log records (complete REDO-UNDO groups) and all bitmap pages are |
532 | unpinned. We keep the mutex to preserve this situation, and flush to the |
533 | file. |
534 | */ |
535 | if (bitmap->changed) |
536 | { |
537 | bitmap->changed= FALSE; |
538 | res= write_changed_bitmap(share, bitmap); |
539 | } |
540 | /* |
541 | We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap |
542 | pages have been flushed. That's a condition of correctness of |
543 | Recovery: data pages may have been all flushed, if we write the |
544 | checkpoint record Recovery will start from after their REDOs. If |
545 | bitmap page was not flushed, as the REDOs about it will be skipped, it |
546 | will wrongly not be recovered. If bitmap pages had a rec_lsn it would |
547 | be different. |
548 | There should be no pinned pages as bitmap->non_flushable==0. |
549 | */ |
550 | if (flush_pagecache_blocks_with_filter(share->pagecache, |
551 | &bitmap->file, FLUSH_KEEP, |
552 | filter_flush_bitmap_pages, |
553 | &bitmap->pages_covered) & |
554 | PCFLUSH_PINNED_AND_ERROR) |
555 | res= TRUE; |
556 | bitmap->changed_not_flushed= FALSE; |
557 | bitmap->flush_all_requested--; |
558 | /* |
559 | Some well-behaved threads may be waiting for flush_all_requested to |
560 | become false, wake them up. |
561 | */ |
562 | DBUG_PRINT("info" , ("bitmap flusher waking up others" )); |
563 | send_signal= (bitmap->waiting_for_flush_all_requested | |
564 | bitmap->waiting_for_non_flushable); |
565 | } |
566 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
567 | if (send_signal) |
568 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
569 | DBUG_RETURN(res); |
570 | } |
571 | |
572 | |
573 | /** |
574 | @brief Lock bitmap from being used by another thread |
575 | |
576 | @fn _ma_bitmap_lock() |
577 | @param share Table's share |
578 | |
579 | @notes |
580 | This is a temporary solution for allowing someone to delete an inserted |
581 | duplicate-key row while someone else is doing concurrent inserts. |
582 | This is ok for now as duplicate key errors are not that common. |
583 | |
584 | In the future we will add locks for row-pages to ensure two threads doesn't |
585 | work at the same time on the same page. |
586 | */ |
587 | |
588 | void _ma_bitmap_lock(MARIA_SHARE *share) |
589 | { |
590 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
591 | DBUG_ENTER("_ma_bitmap_lock" ); |
592 | |
593 | if (!share->now_transactional) |
594 | DBUG_VOID_RETURN; |
595 | |
596 | mysql_mutex_lock(&bitmap->bitmap_lock); |
597 | bitmap->flush_all_requested++; |
598 | bitmap->waiting_for_non_flushable++; |
599 | while (bitmap->non_flushable) |
600 | { |
601 | DBUG_PRINT("info" , ("waiting for bitmap to be flushable" )); |
602 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
603 | } |
604 | bitmap->waiting_for_non_flushable--; |
605 | /* |
606 | Ensure that _ma_bitmap_flush_all() and _ma_bitmap_lock() are blocked. |
607 | ma_bitmap_flushable() is blocked thanks to 'flush_all_requested'. |
608 | */ |
609 | bitmap->non_flushable= 1; |
610 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
611 | DBUG_VOID_RETURN; |
612 | } |
613 | |
614 | /** |
615 | @brief Unlock bitmap after _ma_bitmap_lock() |
616 | |
617 | @fn _ma_bitmap_unlock() |
618 | @param share Table's share |
619 | */ |
620 | |
621 | void _ma_bitmap_unlock(MARIA_SHARE *share) |
622 | { |
623 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
624 | uint send_signal; |
625 | DBUG_ENTER("_ma_bitmap_unlock" ); |
626 | |
627 | if (!share->now_transactional) |
628 | DBUG_VOID_RETURN; |
629 | DBUG_ASSERT(bitmap->flush_all_requested > 0 && bitmap->non_flushable == 1); |
630 | |
631 | mysql_mutex_lock(&bitmap->bitmap_lock); |
632 | bitmap->non_flushable= 0; |
633 | _ma_bitmap_unpin_all(share); |
634 | send_signal= bitmap->waiting_for_non_flushable; |
635 | if (!--bitmap->flush_all_requested) |
636 | send_signal|= bitmap->waiting_for_flush_all_requested; |
637 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
638 | if (send_signal) |
639 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
640 | DBUG_VOID_RETURN; |
641 | } |
642 | |
643 | |
644 | /** |
645 | @brief Unpin all pinned bitmap pages |
646 | |
647 | @param share Table's share |
648 | |
649 | @return Operation status |
650 | @retval 0 ok |
651 | |
652 | @note This unpins pages pinned by other threads. |
653 | */ |
654 | |
655 | static void _ma_bitmap_unpin_all(MARIA_SHARE *share) |
656 | { |
657 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
658 | MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*) |
659 | dynamic_array_ptr(&bitmap->pinned_pages, 0)); |
660 | MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements; |
661 | DBUG_ENTER("_ma_bitmap_unpin_all" ); |
662 | DBUG_PRINT("info" , ("pinned: %u" , bitmap->pinned_pages.elements)); |
663 | while (pinned_page-- != page_link) |
664 | pagecache_unlock_by_link(share->pagecache, pinned_page->link, |
665 | pinned_page->unlock, PAGECACHE_UNPIN, |
666 | LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, TRUE); |
667 | bitmap->pinned_pages.elements= 0; |
668 | DBUG_VOID_RETURN; |
669 | } |
670 | |
671 | |
672 | /* |
673 | Intialize bitmap in memory to a zero bitmap |
674 | |
675 | SYNOPSIS |
676 | _ma_bitmap_delete_all() |
677 | share Share handler |
678 | |
679 | NOTES |
680 | This is called on maria_delete_all_rows (truncate data file). |
681 | */ |
682 | |
683 | void _ma_bitmap_delete_all(MARIA_SHARE *share) |
684 | { |
685 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
686 | DBUG_ENTER("_ma_bitmap_delete_all" ); |
687 | if (bitmap->map) /* Not in create */ |
688 | { |
689 | bzero(bitmap->map, bitmap->block_size); |
690 | bitmap->changed= 1; |
691 | bitmap->page= 0; |
692 | bitmap->used_size= bitmap->full_tail_size= bitmap->full_head_size= 0; |
693 | bitmap->total_size= bitmap->max_total_size; |
694 | } |
695 | DBUG_VOID_RETURN; |
696 | } |
697 | |
698 | |
699 | /** |
700 | @brief Reset bitmap caches |
701 | |
702 | @fn _ma_bitmap_reset_cache() |
703 | @param share Maria share |
704 | |
705 | @notes |
706 | This is called after we have swapped file descriptors and we want |
707 | bitmap to forget all cached information. |
708 | It's also called directly after we have opened a file. |
709 | */ |
710 | |
711 | void _ma_bitmap_reset_cache(MARIA_SHARE *share) |
712 | { |
713 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
714 | |
715 | if (bitmap->map) /* If using bitmap */ |
716 | { |
717 | /* Forget changes in current bitmap page */ |
718 | bitmap->changed= 0; |
719 | |
720 | /* |
721 | We can't read a page yet, as in some case we don't have an active |
722 | page cache yet. |
723 | Pretend we have a dummy, full and not changed bitmap page in memory. |
724 | |
725 | We set bitmap->page to a value so that if we use it in |
726 | move_to_next_bitmap() it will point to page 0. |
727 | (This can only happen if writing to a bitmap page fails) |
728 | */ |
729 | bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered; |
730 | bitmap->used_size= bitmap->total_size= bitmap->max_total_size; |
731 | bitmap->full_head_size= bitmap->full_tail_size= bitmap->max_total_size; |
732 | bfill(bitmap->map, share->block_size, 255); |
733 | #ifndef DBUG_OFF |
734 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
735 | #endif |
736 | |
737 | /* Start scanning for free space from start of file */ |
738 | share->state.first_bitmap_with_space = 0; |
739 | } |
740 | } |
741 | |
742 | |
743 | /* |
744 | Return bitmap pattern for the smallest head block that can hold 'size' |
745 | |
746 | SYNOPSIS |
747 | size_to_head_pattern() |
748 | bitmap Bitmap |
749 | size Requested size |
750 | |
751 | RETURN |
752 | 0-3 For a description of the bitmap sizes, see the header |
753 | */ |
754 | |
755 | static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
756 | { |
757 | if (size <= bitmap->sizes[3]) |
758 | return 3; |
759 | if (size <= bitmap->sizes[2]) |
760 | return 2; |
761 | if (size <= bitmap->sizes[1]) |
762 | return 1; |
763 | DBUG_ASSERT(size <= bitmap->sizes[0]); |
764 | return 0; |
765 | } |
766 | |
767 | |
768 | /* |
769 | Return bitmap pattern for head block where there is size bytes free |
770 | |
771 | SYNOPSIS |
772 | _ma_free_size_to_head_pattern() |
773 | bitmap Bitmap |
774 | size Requested size |
775 | |
776 | RETURN |
777 | 0-4 (Possible bitmap patterns for head block) |
778 | */ |
779 | |
780 | uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
781 | { |
782 | if (size < bitmap->sizes[3]) |
783 | return 4; |
784 | if (size < bitmap->sizes[2]) |
785 | return 3; |
786 | if (size < bitmap->sizes[1]) |
787 | return 2; |
788 | return (size < bitmap->sizes[0]) ? 1 : 0; |
789 | } |
790 | |
791 | |
792 | /* |
793 | Return bitmap pattern for the smallest tail block that can hold 'size' |
794 | |
795 | SYNOPSIS |
796 | size_to_tail_pattern() |
797 | bitmap Bitmap |
798 | size Requested size |
799 | |
800 | RETURN |
801 | 0, 5 or 6 For a description of the bitmap sizes, see the header |
802 | */ |
803 | |
804 | static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
805 | { |
806 | if (size <= bitmap->sizes[6]) |
807 | return 6; |
808 | if (size <= bitmap->sizes[5]) |
809 | return 5; |
810 | DBUG_ASSERT(size <= bitmap->sizes[0]); |
811 | return 0; |
812 | } |
813 | |
814 | |
815 | /* |
816 | Return bitmap pattern for tail block where there is size bytes free |
817 | |
818 | SYNOPSIS |
819 | free_size_to_tail_pattern() |
820 | bitmap Bitmap |
821 | size Requested size |
822 | |
823 | RETURN |
824 | 0, 5, 6, 7 For a description of the bitmap sizes, see the header |
825 | */ |
826 | |
827 | static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) |
828 | { |
829 | if (size >= bitmap->sizes[0]) |
830 | return 0; /* Revert to empty page */ |
831 | if (size < bitmap->sizes[6]) |
832 | return 7; |
833 | if (size < bitmap->sizes[5]) |
834 | return 6; |
835 | return 5; |
836 | } |
837 | |
838 | |
839 | /* |
840 | Return size guranteed to be available on a page |
841 | |
842 | SYNOPSIS |
843 | pattern_to_head_size() |
844 | bitmap Bitmap |
845 | pattern Pattern (0-7) |
846 | |
847 | RETURN |
848 | 0 - block_size |
849 | */ |
850 | |
851 | static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) |
852 | { |
853 | DBUG_ASSERT(pattern <= 7); |
854 | return bitmap->sizes[pattern]; |
855 | } |
856 | |
857 | |
858 | /* |
859 | Print bitmap for debugging |
860 | |
861 | SYNOPSIS |
862 | _ma_print_bitmap_changes() |
863 | bitmap Bitmap to print |
864 | |
865 | IMPLEMENTATION |
866 | Prints all changed bits since last call to _ma_print_bitmap(). |
867 | This is done by having a copy of the last bitmap in |
868 | bitmap->map+bitmap->block_size. |
869 | */ |
870 | |
871 | #ifndef DBUG_OFF |
872 | |
873 | static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap) |
874 | { |
875 | uchar *pos, *end, *org_pos; |
876 | ulong page; |
877 | DBUG_ENTER("_ma_print_bitmap_changes" ); |
878 | |
879 | end= bitmap->map + bitmap->used_size; |
880 | DBUG_LOCK_FILE; |
881 | fprintf(DBUG_FILE,"\nBitmap page changes at page: %lu bitmap: %p\n" , |
882 | (ulong) bitmap->page, bitmap->map); |
883 | |
884 | page= (ulong) bitmap->page+1; |
885 | for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ; |
886 | pos < end ; |
887 | pos+= 6, org_pos+= 6) |
888 | { |
889 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
890 | ulonglong org_bits= uint6korr(org_pos); |
891 | uint i; |
892 | |
893 | /* |
894 | Test if there is any changes in the next 16 bitmaps (to not have to |
895 | loop through all bits if we know they are the same) |
896 | */ |
897 | if (bits != org_bits) |
898 | { |
899 | for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3) |
900 | { |
901 | if ((bits & 7) != (org_bits & 7)) |
902 | fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n" , page+i, |
903 | bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]); |
904 | } |
905 | } |
906 | page+= 16; |
907 | } |
908 | fputc('\n', DBUG_FILE); |
909 | DBUG_UNLOCK_FILE; |
910 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
911 | DBUG_VOID_RETURN; |
912 | } |
913 | |
914 | |
915 | /* Print content of bitmap for debugging */ |
916 | |
917 | void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, |
918 | pgcache_page_no_t page) |
919 | { |
920 | uchar *pos, *end; |
921 | char llbuff[22]; |
922 | |
923 | DBUG_LOCK_FILE; |
924 | fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n" , llstr(page, llbuff)); |
925 | |
926 | page++; /* Skip bitmap page */ |
927 | for (pos= data, end= pos + bitmap->max_total_size; |
928 | pos < end ; |
929 | pos+= 6) |
930 | { |
931 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
932 | |
933 | /* |
934 | Test if there is any changes in the next 16 bitmaps (to not have to |
935 | loop through all bits if we know they are the same) |
936 | */ |
937 | if (bits) |
938 | { |
939 | uint i; |
940 | for (i= 0; i < 16 ; i++, bits>>= 3) |
941 | { |
942 | if (bits & 7) |
943 | fprintf(DBUG_FILE, "Page: %8s %s\n" , llstr(page+i, llbuff), |
944 | bits_to_txt[bits & 7]); |
945 | } |
946 | } |
947 | page+= 16; |
948 | } |
949 | fputc('\n', DBUG_FILE); |
950 | DBUG_UNLOCK_FILE; |
951 | } |
952 | |
953 | #endif /* DBUG_OFF */ |
954 | |
955 | |
956 | /* |
957 | Return content of bitmap as a printable string |
958 | */ |
959 | |
960 | void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap, |
961 | uchar *bitmap_data, |
962 | pgcache_page_no_t page, |
963 | char *out) |
964 | { |
965 | uchar *pos, *end; |
966 | uint count=0, dot_printed= 0, len; |
967 | char buff[80], last[80]; |
968 | |
969 | page++; |
970 | last[0]=0; |
971 | for (pos= bitmap_data, end= pos+ bitmap->used_size ; pos < end ; pos+= 6) |
972 | { |
973 | ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ |
974 | uint i; |
975 | |
976 | for (i= 0; i < 16 ; i++, bits>>= 3) |
977 | { |
978 | if (count > 60) |
979 | { |
980 | if (memcmp(buff, last, count)) |
981 | { |
982 | memcpy(last, buff, count); |
983 | len= sprintf(out, "%8lu: " , (ulong) page - count); |
984 | memcpy(out+len, buff, count); |
985 | out+= len + count + 1; |
986 | out[-1]= '\n'; |
987 | dot_printed= 0; |
988 | } |
989 | else if (!(dot_printed++)) |
990 | { |
991 | out= strmov(out, "...\n" ); |
992 | } |
993 | count= 0; |
994 | } |
995 | buff[count++]= '0' + (uint) (bits & 7); |
996 | page++; |
997 | } |
998 | } |
999 | len= sprintf(out, "%8lu: " , (ulong) page - count); |
1000 | memcpy(out+len, buff, count); |
1001 | out[len + count]= '\n'; |
1002 | out[len + count + 1]= 0; |
1003 | } |
1004 | |
1005 | |
1006 | /* |
1007 | Adjust bitmap->total_size to not go over max_data_file_size |
1008 | */ |
1009 | |
1010 | static void adjust_total_size(MARIA_HA *info, pgcache_page_no_t page) |
1011 | { |
1012 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1013 | |
1014 | if (page < bitmap->last_bitmap_page) |
1015 | bitmap->total_size= bitmap->max_total_size; /* Use all bits in bitmap */ |
1016 | else |
1017 | bitmap->total_size= bitmap->last_total_size; |
1018 | } |
1019 | |
1020 | /*************************************************************************** |
1021 | Reading & writing bitmap pages |
1022 | ***************************************************************************/ |
1023 | |
1024 | /* |
1025 | Read a given bitmap page |
1026 | |
1027 | SYNOPSIS |
1028 | _ma_read_bitmap_page() |
1029 | info Maria handler |
1030 | bitmap Bitmap handler |
1031 | page Page to read |
1032 | |
1033 | NOTE |
1034 | We don't always have share->bitmap.bitmap_lock here |
1035 | (when called from_ma_check_bitmap_data() for example). |
1036 | |
1037 | RETURN |
1038 | 0 ok |
1039 | 1 error (Error writing old bitmap or reading bitmap page) |
1040 | */ |
1041 | |
1042 | static my_bool _ma_read_bitmap_page(MARIA_HA *info, |
1043 | MARIA_FILE_BITMAP *bitmap, |
1044 | pgcache_page_no_t page) |
1045 | { |
1046 | MARIA_SHARE *share= info->s; |
1047 | my_bool res; |
1048 | DBUG_ENTER("_ma_read_bitmap_page" ); |
1049 | DBUG_PRINT("enter" , ("page: %lld data_file_length: %lld" , |
1050 | (longlong) page, |
1051 | (longlong) share->state.state.data_file_length)); |
1052 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
1053 | DBUG_ASSERT(!bitmap->changed); |
1054 | |
1055 | bitmap->page= page; |
1056 | if ((page + 1) * bitmap->block_size > share->state.state.data_file_length) |
1057 | { |
1058 | /* Inexistent or half-created page */ |
1059 | res= _ma_bitmap_create_missing(info, bitmap, page); |
1060 | if (!res) |
1061 | adjust_total_size(info, page); |
1062 | DBUG_RETURN(res); |
1063 | } |
1064 | |
1065 | adjust_total_size(info, page); |
1066 | bitmap->full_head_size= bitmap->full_tail_size= 0; |
1067 | DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); |
1068 | res= pagecache_read(share->pagecache, |
1069 | &bitmap->file, page, 0, |
1070 | bitmap->map, PAGECACHE_PLAIN_PAGE, |
1071 | PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == NULL; |
1072 | |
1073 | if (!res) |
1074 | { |
1075 | /* Calculate used_size */ |
1076 | const uchar *data, *end= bitmap->map; |
1077 | for (data= bitmap->map + bitmap->total_size; --data >= end && *data == 0; ) |
1078 | {} |
1079 | bitmap->used_size= (uint) ((data + 1) - end); |
1080 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
1081 | } |
1082 | /* |
1083 | We can't check maria_bitmap_marker here as if the bitmap page |
1084 | previously had a true checksum and the user switched mode to not checksum |
1085 | this may have any value, except maria_normal_page_marker. |
1086 | |
1087 | Using maria_normal_page_marker gives us a protection against bugs |
1088 | when running without any checksums. |
1089 | */ |
1090 | |
1091 | #ifndef DBUG_OFF |
1092 | if (!res) |
1093 | { |
1094 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
1095 | _ma_check_bitmap(bitmap); |
1096 | } |
1097 | #endif |
1098 | DBUG_RETURN(res); |
1099 | } |
1100 | |
1101 | |
1102 | /* |
1103 | Change to another bitmap page |
1104 | |
1105 | SYNOPSIS |
1106 | _ma_change_bitmap_page() |
1107 | info Maria handler |
1108 | bitmap Bitmap handler |
1109 | page Bitmap page to read |
1110 | |
1111 | NOTES |
1112 | If old bitmap was changed, write it out before reading new one |
1113 | We return empty bitmap if page is outside of file size |
1114 | |
1115 | RETURN |
1116 | 0 ok |
1117 | 1 error (Error writing old bitmap or reading bitmap page) |
1118 | */ |
1119 | |
1120 | static my_bool _ma_change_bitmap_page(MARIA_HA *info, |
1121 | MARIA_FILE_BITMAP *bitmap, |
1122 | pgcache_page_no_t page) |
1123 | { |
1124 | DBUG_ENTER("_ma_change_bitmap_page" ); |
1125 | |
1126 | _ma_check_bitmap(bitmap); |
1127 | |
1128 | /* |
1129 | We have to mark the file changed here, as otherwise the following |
1130 | read/write to pagecache may force a page out from this file, which would |
1131 | cause _ma_mark_file_changed() to be called with bitmaplock hold! |
1132 | */ |
1133 | _ma_bitmap_mark_file_changed(info->s, 1); |
1134 | |
1135 | if (bitmap->changed) |
1136 | { |
1137 | if (write_changed_bitmap(info->s, bitmap)) |
1138 | DBUG_RETURN(1); |
1139 | bitmap->changed= 0; |
1140 | } |
1141 | DBUG_RETURN(_ma_read_bitmap_page(info, bitmap, page)); |
1142 | } |
1143 | |
1144 | |
1145 | /* |
1146 | Read next suitable bitmap |
1147 | |
1148 | SYNOPSIS |
1149 | move_to_next_bitmap() |
1150 | bitmap Bitmap handle |
1151 | |
1152 | NOTES |
1153 | The found bitmap may be full, so calling function may need to call this |
1154 | repeatedly until it finds enough space. |
1155 | |
1156 | TODO |
1157 | Add cache of bitmaps to not read something that is not usable |
1158 | |
1159 | RETURN |
1160 | 0 ok |
1161 | 1 error (either couldn't save old bitmap or read new one) |
1162 | */ |
1163 | |
1164 | static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) |
1165 | { |
1166 | pgcache_page_no_t page= bitmap->page; |
1167 | MARIA_STATE_INFO *state= &info->s->state; |
1168 | DBUG_ENTER("move_to_next_bitmap" ); |
1169 | |
1170 | if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 && |
1171 | state->first_bitmap_with_space != page) |
1172 | { |
1173 | page= state->first_bitmap_with_space; |
1174 | state->first_bitmap_with_space= ~(pgcache_page_no_t) 0; |
1175 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
1176 | } |
1177 | else |
1178 | { |
1179 | page+= bitmap->pages_covered; |
1180 | DBUG_ASSERT(page % bitmap->pages_covered == 0); |
1181 | } |
1182 | DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); |
1183 | } |
1184 | |
1185 | |
1186 | /**************************************************************************** |
1187 | Allocate data in bitmaps |
1188 | ****************************************************************************/ |
1189 | |
1190 | /* |
1191 | Store data in 'block' and mark the place used in the bitmap |
1192 | |
1193 | SYNOPSIS |
1194 | fill_block() |
1195 | bitmap Bitmap handle |
1196 | block Store data about what we found |
1197 | best_data Pointer to best 6 uchar aligned area in bitmap->map |
1198 | best_pos Which bit in *best_data the area starts |
1199 | 0 = first bit pattern, 1 second bit pattern etc |
1200 | best_bits The original value of the bits at best_pos |
1201 | fill_pattern Bitmap pattern to store in best_data[best_pos] |
1202 | |
1203 | NOTES |
1204 | We mark all pages to be 'TAIL's, which means that |
1205 | block->page_count is really a row position inside the page. |
1206 | */ |
1207 | |
1208 | static void fill_block(MARIA_FILE_BITMAP *bitmap, |
1209 | MARIA_BITMAP_BLOCK *block, |
1210 | uchar *best_data, uint best_pos, uint best_bits, |
1211 | uint fill_pattern) |
1212 | { |
1213 | uint page, offset, tmp; |
1214 | uchar *data; |
1215 | DBUG_ENTER("fill_block" ); |
1216 | |
1217 | /* For each 6 bytes we have 6*8/3= 16 patterns */ |
1218 | page= ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos; |
1219 | DBUG_ASSERT(page + 1 < bitmap->pages_covered); |
1220 | block->page= bitmap->page + 1 + page; |
1221 | block->page_count= TAIL_PAGE_COUNT_MARKER; |
1222 | block->empty_space= pattern_to_size(bitmap, best_bits); |
1223 | block->sub_blocks= 0; |
1224 | block->org_bitmap_value= best_bits; |
1225 | block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */ |
1226 | |
1227 | /* |
1228 | Mark place used by reading/writing 2 bytes at a time to handle |
1229 | bitmaps in overlapping bytes |
1230 | */ |
1231 | best_pos*= 3; |
1232 | data= best_data+ best_pos / 8; |
1233 | offset= best_pos & 7; |
1234 | tmp= uint2korr(data); |
1235 | |
1236 | /* we turn off the 3 bits and replace them with fill_pattern */ |
1237 | tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); |
1238 | int2store(data, tmp); |
1239 | bitmap->changed= 1; |
1240 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
1241 | DBUG_VOID_RETURN; |
1242 | } |
1243 | |
1244 | |
1245 | /* |
1246 | Allocate data for head block |
1247 | |
1248 | SYNOPSIS |
1249 | allocate_head() |
1250 | bitmap bitmap |
1251 | size Size of data region we need to store |
1252 | block Store found information here |
1253 | |
1254 | IMPLEMENTATION |
1255 | Find the best-fit page to put a region of 'size' |
1256 | This is defined as the first page of the set of pages |
1257 | with the smallest free space that can hold 'size'. |
1258 | |
1259 | NOTES |
1260 | Updates bitmap->full_head_size while scanning data |
1261 | |
1262 | RETURN |
1263 | 0 ok (block is updated) |
1264 | 1 error (no space in bitmap; block is not touched) |
1265 | */ |
1266 | |
1267 | |
1268 | static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, |
1269 | MARIA_BITMAP_BLOCK *block) |
1270 | { |
1271 | uint min_bits= size_to_head_pattern(bitmap, size); |
1272 | uchar *data, *end; |
1273 | uchar *best_data= 0; |
1274 | uint best_bits= (uint) -1, UNINIT_VAR(best_pos); |
1275 | my_bool first_pattern= 0; /* if doing insert_order */ |
1276 | my_bool first_found= 1; |
1277 | MARIA_SHARE *share= bitmap->share; |
1278 | my_bool insert_order= |
1279 | MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_INSERT_ORDER); |
1280 | DBUG_ENTER("allocate_head" ); |
1281 | |
1282 | DBUG_ASSERT(size <= FULL_PAGE_SIZE(share)); |
1283 | |
1284 | end= bitmap->map + bitmap->used_size; |
1285 | if (insert_order && bitmap->page == share->last_insert_bitmap) |
1286 | { |
1287 | uint last_insert_page= share->last_insert_page; |
1288 | uint byte= 6 * (last_insert_page / 16); |
1289 | first_pattern= last_insert_page % 16; |
1290 | data= bitmap->map+byte; |
1291 | DBUG_ASSERT(data <= end); |
1292 | } |
1293 | else |
1294 | data= bitmap->map + (bitmap->full_head_size/6)*6; |
1295 | |
1296 | for (; data < end; data+= 6, first_pattern= 0) |
1297 | { |
1298 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
1299 | uint i; |
1300 | |
1301 | /* |
1302 | Skip common patterns |
1303 | We can skip empty pages (if we already found a match) or |
1304 | anything matching the following pattern as this will be either |
1305 | a full page or a tail page |
1306 | */ |
1307 | if ((!bits && best_data) || |
1308 | ((bits & 04444444444444444LL) == 04444444444444444LL)) |
1309 | continue; |
1310 | |
1311 | for (i= first_pattern, bits >>= (3 * first_pattern); i < 16 ; |
1312 | i++, bits >>= 3) |
1313 | { |
1314 | uint pattern= (uint) (bits & 7); |
1315 | |
1316 | if (pattern <= 3) /* Room for more data */ |
1317 | { |
1318 | if (first_found) |
1319 | { |
1320 | first_found= 0; |
1321 | bitmap->full_head_size= (uint)(data - bitmap->map); |
1322 | } |
1323 | } |
1324 | if (pattern <= min_bits) |
1325 | { |
1326 | /* There is enough space here, check if we have found better */ |
1327 | if ((int) pattern > (int) best_bits) |
1328 | { |
1329 | /* |
1330 | There is more than enough space here and it's better than what |
1331 | we have found so far. Remember it, as we will choose it if we |
1332 | don't find anything in this bitmap page. |
1333 | */ |
1334 | best_bits= pattern; |
1335 | best_data= data; |
1336 | best_pos= i; |
1337 | if (pattern == min_bits || bitmap->return_first_match) |
1338 | goto found; /* Best possible match */ |
1339 | } |
1340 | } |
1341 | } |
1342 | } |
1343 | if (!best_data) /* Found no place */ |
1344 | { |
1345 | if (data >= bitmap->map + bitmap->total_size) |
1346 | DBUG_RETURN(1); /* No space in bitmap */ |
1347 | DBUG_ASSERT(uint6korr(data) == 0); |
1348 | /* Allocate data at end of bitmap */ |
1349 | bitmap->used_size= (uint) (data - bitmap->map) + 6; |
1350 | best_data= data; |
1351 | best_pos= best_bits= 0; |
1352 | } |
1353 | else |
1354 | { |
1355 | /* |
1356 | This is not stricly needed as used_size should be alligned on 6, |
1357 | but for easier debugging lets try to keep it more accurate |
1358 | */ |
1359 | uint position= (uint) (best_data - bitmap->map) + 6; |
1360 | set_if_bigger(bitmap->used_size, position); |
1361 | } |
1362 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
1363 | |
1364 | found: |
1365 | if (insert_order) |
1366 | { |
1367 | share->last_insert_page= |
1368 | ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos; |
1369 | share->last_insert_bitmap= bitmap->page; |
1370 | } |
1371 | fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); |
1372 | DBUG_RETURN(0); |
1373 | } |
1374 | |
1375 | |
1376 | /* |
1377 | Allocate data for tail block |
1378 | |
1379 | SYNOPSIS |
1380 | allocate_tail() |
1381 | bitmap bitmap |
1382 | size Size of block we need to find |
1383 | block Store found information here |
1384 | |
1385 | RETURN |
1386 | 0 ok (block is updated) |
1387 | 1 error (no space in bitmap; block is not touched) |
1388 | */ |
1389 | |
1390 | |
1391 | static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size, |
1392 | MARIA_BITMAP_BLOCK *block) |
1393 | { |
1394 | uint min_bits= size_to_tail_pattern(bitmap, size); |
1395 | uchar *data, *end, *best_data= 0; |
1396 | my_bool first_found= 1; |
1397 | uint best_bits= (uint) -1, UNINIT_VAR(best_pos); |
1398 | DBUG_ENTER("allocate_tail" ); |
1399 | DBUG_PRINT("enter" , ("size: %u" , size)); |
1400 | |
1401 | data= bitmap->map + (bitmap->full_tail_size/6)*6; |
1402 | end= bitmap->map + bitmap->used_size; |
1403 | |
1404 | /* |
1405 | We have to add DIR_ENTRY_SIZE here as this is not part of the data size |
1406 | See call to allocate_tail() in find_tail(). |
1407 | */ |
1408 | DBUG_ASSERT(size <= MAX_TAIL_SIZE(bitmap->block_size) + DIR_ENTRY_SIZE); |
1409 | |
1410 | for (; data < end; data += 6) |
1411 | { |
1412 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
1413 | uint i; |
1414 | |
1415 | /* |
1416 | Skip common patterns |
1417 | We can skip empty pages (if we already found a match) or |
1418 | the following patterns: 1-4 (head pages, not suitable for tail) or |
1419 | 7 (full tail page). See 'Dynamic size records' comment at start of file. |
1420 | |
1421 | At the moment we only skip full head and tail pages (ie, all bits are |
1422 | set) as this is easy to detect with one simple test and is a |
1423 | quite common case if we have blobs. |
1424 | */ |
1425 | |
1426 | if ((!bits && best_data) || bits == 0xffffffffffffLL || |
1427 | bits == 04444444444444444LL) |
1428 | continue; |
1429 | for (i= 0; i < 16; i++, bits >>= 3) |
1430 | { |
1431 | uint pattern= (uint) (bits & 7); |
1432 | |
1433 | if (pattern == 0 || |
1434 | (pattern > FULL_HEAD_PAGE && pattern < FULL_TAIL_PAGE)) |
1435 | { |
1436 | /* There is room for tail data */ |
1437 | if (first_found) |
1438 | { |
1439 | first_found= 0; |
1440 | bitmap->full_tail_size= (uint)(data - bitmap->map); |
1441 | } |
1442 | } |
1443 | |
1444 | if (pattern <= min_bits && (!pattern || pattern > FULL_HEAD_PAGE)) |
1445 | { |
1446 | if ((int) pattern > (int) best_bits) |
1447 | { |
1448 | best_bits= pattern; |
1449 | best_data= data; |
1450 | best_pos= i; |
1451 | if (pattern == min_bits) |
1452 | goto found; /* Can't be better */ |
1453 | } |
1454 | } |
1455 | } |
1456 | } |
1457 | if (!best_data) |
1458 | { |
1459 | if (data >= bitmap->map + bitmap->total_size) |
1460 | DBUG_RETURN(1); |
1461 | DBUG_ASSERT(uint6korr(data) == 0); |
1462 | /* Allocate data at end of bitmap */ |
1463 | best_data= data; |
1464 | bitmap->used_size= (uint) (data - bitmap->map) + 6; |
1465 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
1466 | best_pos= best_bits= 0; |
1467 | } |
1468 | |
1469 | found: |
1470 | fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE); |
1471 | DBUG_RETURN(0); |
1472 | } |
1473 | |
1474 | |
1475 | /* |
1476 | Allocate data for full blocks |
1477 | |
1478 | SYNOPSIS |
1479 | allocate_full_pages() |
1480 | bitmap bitmap |
1481 | pages_needed Total size in pages (bitmap->total_size) we would like to have |
1482 | block Store found information here |
1483 | full_page 1 if we are not allowed to split extent |
1484 | |
1485 | IMPLEMENTATION |
1486 | We will return the smallest area >= size. If there is no such |
1487 | block, we will return the biggest area that satisfies |
1488 | area_size >= MY_MIN(BLOB_SEGMENT_MIN_SIZE*full_page_size, size) |
1489 | |
1490 | To speed up searches, we will only consider areas that has at least 16 free |
1491 | pages starting on an even boundary. When finding such an area, we will |
1492 | extend it with all previous and following free pages. This will ensure |
1493 | we don't get holes between areas |
1494 | |
1495 | RETURN |
1496 | # Blocks used |
1497 | 0 error (no space in bitmap; block is not touched) |
1498 | */ |
1499 | |
1500 | static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, |
1501 | ulong pages_needed, |
1502 | MARIA_BITMAP_BLOCK *block, my_bool full_page) |
1503 | { |
1504 | uchar *data, *data_end, *page_end; |
1505 | uchar *best_data= 0; |
1506 | uint min_size; |
1507 | uint best_area_size, UNINIT_VAR(best_prefix_area_size); |
1508 | uint page, size; |
1509 | ulonglong UNINIT_VAR(best_prefix_bits); |
1510 | DBUG_ENTER("allocate_full_pages" ); |
1511 | DBUG_PRINT("enter" , ("pages_needed: %lu" , pages_needed)); |
1512 | |
1513 | min_size= pages_needed; |
1514 | if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE) |
1515 | min_size= BLOB_SEGMENT_MIN_SIZE; |
1516 | best_area_size= ~(uint) 0; |
1517 | |
1518 | data= bitmap->map + (bitmap->full_head_size/6)*6; |
1519 | data_end= bitmap->map + bitmap->used_size; |
1520 | page_end= bitmap->map + bitmap->total_size; |
1521 | |
1522 | for (; data < page_end; data+= 6) |
1523 | { |
1524 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
1525 | uchar *data_start; |
1526 | ulonglong prefix_bits= 0; |
1527 | uint area_size, prefix_area_size, suffix_area_size; |
1528 | |
1529 | /* Find area with at least 16 free pages */ |
1530 | if (bits) |
1531 | continue; |
1532 | data_start= data; |
1533 | /* Find size of area */ |
1534 | for (data+=6 ; data < data_end ; data+= 6) |
1535 | { |
1536 | if ((bits= uint6korr(data))) |
1537 | break; |
1538 | } |
1539 | /* |
1540 | Check if we are end of bitmap. In this case we know that |
1541 | the rest of the bitmap is usable |
1542 | */ |
1543 | if (data >= data_end) |
1544 | data= page_end; |
1545 | area_size= (uint) (data - data_start) / 6 * 16; |
1546 | if (area_size >= best_area_size) |
1547 | continue; |
1548 | prefix_area_size= suffix_area_size= 0; |
1549 | if (!bits) |
1550 | { |
1551 | /* |
1552 | End of page; All the rest of the bits on page are part of area |
1553 | This is needed because bitmap->used_size only covers the set bits |
1554 | in the bitmap. |
1555 | */ |
1556 | area_size+= (uint) (page_end - data) / 6 * 16; |
1557 | if (area_size >= best_area_size) |
1558 | break; |
1559 | data= page_end; |
1560 | } |
1561 | else |
1562 | { |
1563 | /* Add bits at end of page */ |
1564 | for (; !(bits & 7); bits >>= 3) |
1565 | suffix_area_size++; |
1566 | area_size+= suffix_area_size; |
1567 | } |
1568 | if (data_start != bitmap->map) |
1569 | { |
1570 | /* Add bits before page */ |
1571 | bits= prefix_bits= uint6korr(data_start - 6); |
1572 | DBUG_ASSERT(bits != 0); |
1573 | /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */ |
1574 | if (!(bits & 07000000000000000LL)) |
1575 | { |
1576 | data_start-= 6; |
1577 | do |
1578 | { |
1579 | prefix_area_size++; |
1580 | bits<<= 3; |
1581 | } while (!(bits & 07000000000000000LL)); |
1582 | area_size+= prefix_area_size; |
1583 | /* Calculate offset to page from data_start */ |
1584 | prefix_area_size= 16 - prefix_area_size; |
1585 | } |
1586 | } |
1587 | if (area_size >= min_size && area_size <= best_area_size) |
1588 | { |
1589 | best_data= data_start; |
1590 | best_area_size= area_size; |
1591 | best_prefix_bits= prefix_bits; |
1592 | best_prefix_area_size= prefix_area_size; |
1593 | |
1594 | /* Prefer to put data in biggest possible area */ |
1595 | if (area_size <= pages_needed) |
1596 | min_size= area_size; |
1597 | else |
1598 | min_size= pages_needed; |
1599 | } |
1600 | } |
1601 | if (!best_data) |
1602 | DBUG_RETURN(0); /* No room on page */ |
1603 | |
1604 | /* |
1605 | Now allocate MY_MIN(pages_needed, area_size), starting from |
1606 | best_start + best_prefix_area_size |
1607 | */ |
1608 | if (best_area_size > pages_needed) |
1609 | best_area_size= pages_needed; |
1610 | |
1611 | /* For each 6 bytes we have 6*8/3= 16 patterns */ |
1612 | page= ((uint) (best_data - bitmap->map) * 8) / 3 + best_prefix_area_size; |
1613 | block->page= bitmap->page + 1 + page; |
1614 | block->page_count= best_area_size; |
1615 | block->empty_space= 0; |
1616 | block->sub_blocks= 0; |
1617 | block->org_bitmap_value= 0; |
1618 | block->used= 0; |
1619 | DBUG_ASSERT(page + best_area_size < bitmap->pages_covered); |
1620 | DBUG_PRINT("info" , ("page: %lu page_count: %u" , |
1621 | (ulong) block->page, block->page_count)); |
1622 | |
1623 | if (best_prefix_area_size) |
1624 | { |
1625 | ulonglong tmp; |
1626 | /* Convert offset back to bits */ |
1627 | best_prefix_area_size= 16 - best_prefix_area_size; |
1628 | if (best_area_size < best_prefix_area_size) |
1629 | { |
1630 | tmp= (1LL << best_area_size*3) - 1; |
1631 | best_area_size= best_prefix_area_size; /* for easy end test */ |
1632 | } |
1633 | else |
1634 | tmp= (1LL << best_prefix_area_size*3) - 1; |
1635 | tmp<<= (16 - best_prefix_area_size) * 3; |
1636 | DBUG_ASSERT((best_prefix_bits & tmp) == 0); |
1637 | best_prefix_bits|= tmp; |
1638 | int6store(best_data, best_prefix_bits); |
1639 | if (!(best_area_size-= best_prefix_area_size)) |
1640 | goto end; |
1641 | best_data+= 6; |
1642 | } |
1643 | best_area_size*= 3; /* Bits to set */ |
1644 | size= best_area_size/8; /* Bytes to set */ |
1645 | bfill(best_data, size, 255); |
1646 | best_data+= size; |
1647 | if ((best_area_size-= size * 8)) |
1648 | { |
1649 | /* fill last uchar */ |
1650 | *best_data|= (uchar) ((1 << best_area_size) -1); |
1651 | best_data++; |
1652 | } |
1653 | if (data_end < best_data) |
1654 | { |
1655 | bitmap->used_size= (uint) (best_data - bitmap->map); |
1656 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
1657 | } |
1658 | end: |
1659 | bitmap->changed= 1; |
1660 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
1661 | DBUG_RETURN(block->page_count); |
1662 | } |
1663 | |
1664 | |
1665 | /**************************************************************************** |
1666 | Find right bitmaps where to store data |
1667 | ****************************************************************************/ |
1668 | |
1669 | /* |
1670 | Find right bitmap and position for head block |
1671 | |
1672 | SYNOPSIS |
1673 | find_head() |
1674 | info Maria handler |
1675 | length Size of data region we need store |
1676 | position Position in bitmap_blocks where to store the |
1677 | information for the head block. |
1678 | |
1679 | RETURN |
1680 | 0 ok |
1681 | 1 error |
1682 | */ |
1683 | |
1684 | static my_bool find_head(MARIA_HA *info, uint length, uint position) |
1685 | { |
1686 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1687 | MARIA_BITMAP_BLOCK *block; |
1688 | /* |
1689 | There is always place for the head block in bitmap_blocks as these are |
1690 | preallocated at _ma_init_block_record(). |
1691 | */ |
1692 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
1693 | |
1694 | if (info->s->base.extra_options & MA_EXTRA_OPTIONS_INSERT_ORDER) |
1695 | { |
1696 | if (bitmap->page != info->s->last_insert_bitmap && |
1697 | _ma_change_bitmap_page(info, bitmap, |
1698 | info->s->last_insert_bitmap)) |
1699 | return 1; |
1700 | /* Don't allocate any blocks from earlier pages */ |
1701 | info->s->state.first_bitmap_with_space= info->s->last_insert_bitmap; |
1702 | } |
1703 | |
1704 | /* |
1705 | We need to have DIRENTRY_SIZE here to take into account that we may |
1706 | need an extra directory entry for the row |
1707 | */ |
1708 | while (allocate_head(bitmap, length + DIR_ENTRY_SIZE, block)) |
1709 | if (move_to_next_bitmap(info, bitmap)) |
1710 | return 1; |
1711 | return 0; |
1712 | } |
1713 | |
1714 | |
1715 | /* |
1716 | Find right bitmap and position for tail |
1717 | |
1718 | SYNOPSIS |
1719 | find_tail() |
1720 | info Maria handler |
1721 | length Size of data region we need store |
1722 | position Position in bitmap_blocks where to store the |
1723 | information for the head block. |
1724 | |
1725 | RETURN |
1726 | 0 ok |
1727 | 1 error |
1728 | */ |
1729 | |
1730 | static my_bool find_tail(MARIA_HA *info, uint length, uint position) |
1731 | { |
1732 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1733 | MARIA_BITMAP_BLOCK *block; |
1734 | DBUG_ENTER("find_tail" ); |
1735 | DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE(info->s)); |
1736 | |
1737 | /* Needed, as there is no error checking in dynamic_element */ |
1738 | if (allocate_dynamic(&info->bitmap_blocks, position)) |
1739 | DBUG_RETURN(1); |
1740 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
1741 | |
1742 | /* |
1743 | We have to add DIR_ENTRY_SIZE to ensure we have space for the tail and |
1744 | it's directroy entry on the page |
1745 | */ |
1746 | while (allocate_tail(bitmap, length + DIR_ENTRY_SIZE, block)) |
1747 | if (move_to_next_bitmap(info, bitmap)) |
1748 | DBUG_RETURN(1); |
1749 | DBUG_RETURN(0); |
1750 | } |
1751 | |
1752 | |
1753 | /* |
1754 | Find right bitmap and position for full blocks in one extent |
1755 | |
1756 | SYNOPSIS |
1757 | find_mid() |
1758 | info Maria handler. |
1759 | pages How many pages to allocate. |
1760 | position Position in bitmap_blocks where to store the |
1761 | information for the head block. |
1762 | NOTES |
1763 | This is used to allocate the main extent after the 'head' block |
1764 | (Ie, the middle part of the head-middle-tail entry) |
1765 | |
1766 | RETURN |
1767 | 0 ok |
1768 | 1 error |
1769 | */ |
1770 | |
1771 | static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) |
1772 | { |
1773 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1774 | MARIA_BITMAP_BLOCK *block; |
1775 | block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); |
1776 | |
1777 | while (!allocate_full_pages(bitmap, pages, block, 1)) |
1778 | { |
1779 | if (move_to_next_bitmap(info, bitmap)) |
1780 | return 1; |
1781 | } |
1782 | return 0; |
1783 | } |
1784 | |
1785 | |
1786 | /* |
1787 | Find right bitmap and position for putting a blob |
1788 | |
1789 | SYNOPSIS |
1790 | find_blob() |
1791 | info Maria handler. |
1792 | length Length of the blob |
1793 | |
1794 | NOTES |
1795 | The extents are stored last in info->bitmap_blocks |
1796 | |
1797 | IMPLEMENTATION |
1798 | Allocate all full pages for the block + optionally one tail |
1799 | |
1800 | RETURN |
1801 | 0 ok |
1802 | 1 error |
1803 | */ |
1804 | |
1805 | static my_bool find_blob(MARIA_HA *info, ulong length) |
1806 | { |
1807 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1808 | uint full_page_size= FULL_PAGE_SIZE(info->s); |
1809 | ulong pages; |
1810 | uint rest_length, used; |
1811 | uint UNINIT_VAR(first_block_pos); |
1812 | MARIA_BITMAP_BLOCK *first_block= 0; |
1813 | DBUG_ENTER("find_blob" ); |
1814 | DBUG_PRINT("enter" , ("length: %lu" , length)); |
1815 | |
1816 | pages= length / full_page_size; |
1817 | rest_length= (uint) (length - pages * full_page_size); |
1818 | if (rest_length >= MAX_TAIL_SIZE(info->s->block_size)) |
1819 | { |
1820 | pages++; |
1821 | rest_length= 0; |
1822 | } |
1823 | |
1824 | first_block_pos= info->bitmap_blocks.elements; |
1825 | if (pages) |
1826 | { |
1827 | MARIA_BITMAP_BLOCK *block; |
1828 | if (allocate_dynamic(&info->bitmap_blocks, |
1829 | info->bitmap_blocks.elements + |
1830 | pages / BLOB_SEGMENT_MIN_SIZE + 2)) |
1831 | DBUG_RETURN(1); |
1832 | block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements, |
1833 | MARIA_BITMAP_BLOCK*); |
1834 | do |
1835 | { |
1836 | /* |
1837 | We use 0x3fff here as the two upmost bits are reserved for |
1838 | TAIL_BIT and START_EXTENT_BIT |
1839 | */ |
1840 | used= allocate_full_pages(bitmap, |
1841 | (pages >= 0x3fff ? 0x3fff : (uint) pages), |
1842 | block, 0); |
1843 | if (!used) |
1844 | { |
1845 | if (move_to_next_bitmap(info, bitmap)) |
1846 | DBUG_RETURN(1); |
1847 | } |
1848 | else |
1849 | { |
1850 | pages-= used; |
1851 | info->bitmap_blocks.elements++; |
1852 | block++; |
1853 | } |
1854 | } while (pages != 0); |
1855 | } |
1856 | if (rest_length && find_tail(info, rest_length, |
1857 | info->bitmap_blocks.elements++)) |
1858 | DBUG_RETURN(1); |
1859 | first_block= dynamic_element(&info->bitmap_blocks, first_block_pos, |
1860 | MARIA_BITMAP_BLOCK*); |
1861 | first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos; |
1862 | DBUG_RETURN(0); |
1863 | } |
1864 | |
1865 | |
1866 | /* |
1867 | Find pages to put ALL blobs |
1868 | |
1869 | SYNOPSIS |
1870 | allocate_blobs() |
1871 | info Maria handler |
1872 | row Information of what is in the row (from calc_record_size()) |
1873 | |
1874 | RETURN |
1875 | 0 ok |
1876 | 1 error |
1877 | */ |
1878 | |
1879 | static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row) |
1880 | { |
1881 | ulong *length, *end; |
1882 | uint elements; |
1883 | /* |
1884 | Reserve size for: |
1885 | head block |
1886 | one extent |
1887 | tail block |
1888 | */ |
1889 | elements= info->bitmap_blocks.elements; |
1890 | for (length= row->blob_lengths, end= length + info->s->base.blobs; |
1891 | length < end; length++) |
1892 | { |
1893 | if (*length && find_blob(info, *length)) |
1894 | return 1; |
1895 | } |
1896 | row->extents_count= (info->bitmap_blocks.elements - elements); |
1897 | return 0; |
1898 | } |
1899 | |
1900 | |
1901 | /* |
1902 | Reserve the current head page |
1903 | |
1904 | SYNOPSIS |
1905 | use_head() |
1906 | info Maria handler |
1907 | page Page number to update |
1908 | (Note that caller guarantees this is in the active |
1909 | bitmap) |
1910 | size How much free space is left on the page |
1911 | block_position In which info->bitmap_block we have the |
1912 | information about the head block. |
1913 | |
1914 | NOTES |
1915 | This is used on update where we are updating an existing head page |
1916 | */ |
1917 | |
1918 | static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size, |
1919 | uint block_position) |
1920 | { |
1921 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
1922 | MARIA_BITMAP_BLOCK *block; |
1923 | uchar *data; |
1924 | uint offset, tmp, offset_page; |
1925 | DBUG_ENTER("use_head" ); |
1926 | |
1927 | DBUG_ASSERT(page % bitmap->pages_covered); |
1928 | |
1929 | block= dynamic_element(&info->bitmap_blocks, block_position, |
1930 | MARIA_BITMAP_BLOCK*); |
1931 | block->page= page; |
1932 | block->page_count= 1 + TAIL_BIT; |
1933 | block->empty_space= size; |
1934 | block->used= BLOCKUSED_TAIL; |
1935 | |
1936 | /* |
1937 | Mark place used by reading/writing 2 bytes at a time to handle |
1938 | bitmaps in overlapping bytes |
1939 | */ |
1940 | offset_page= (uint) (page - bitmap->page - 1) * 3; |
1941 | offset= offset_page & 7; |
1942 | data= bitmap->map + offset_page / 8; |
1943 | tmp= uint2korr(data); |
1944 | block->org_bitmap_value= (tmp >> offset) & 7; |
1945 | tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset); |
1946 | int2store(data, tmp); |
1947 | bitmap->changed= 1; |
1948 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
1949 | DBUG_VOID_RETURN; |
1950 | } |
1951 | |
1952 | |
1953 | /* |
1954 | Find out where to split the row (ie, what goes in head, middle, tail etc) |
1955 | |
1956 | SYNOPSIS |
1957 | find_where_to_split_row() |
1958 | share Maria share |
1959 | row Information of what is in the row (from calc_record_size()) |
1960 | extents Max number of extents we have to store in header |
1961 | split_size Free size on the page (The head length must be less |
1962 | than this) |
1963 | |
1964 | RETURN |
1965 | row_length for the head block. |
1966 | */ |
1967 | |
1968 | static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, |
1969 | uint extents, uint split_size) |
1970 | { |
1971 | uint *lengths, *lengths_end; |
1972 | /* |
1973 | Ensure we have the minimum required space on head page: |
1974 | - Header + length of field lengths (row->min_length) |
1975 | - Number of extents |
1976 | - One extent |
1977 | */ |
1978 | uint row_length= (row->min_length + |
1979 | size_to_store_key_length(extents) + |
1980 | ROW_EXTENT_SIZE); |
1981 | DBUG_ASSERT(row_length <= split_size); |
1982 | |
1983 | /* |
1984 | Store first in all_field_lengths the different parts that are written |
1985 | to the row. This needs to be in same order as in |
1986 | ma_block_rec.c::write_block_record() |
1987 | */ |
1988 | row->null_field_lengths[-3]= extents * ROW_EXTENT_SIZE; |
1989 | row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; |
1990 | row->null_field_lengths[-1]= row->field_lengths_length; |
1991 | for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, |
1992 | lengths_end= (lengths + share->base.fields - share->base.blobs + |
1993 | EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) |
1994 | { |
1995 | if (row_length + *lengths > split_size) |
1996 | break; |
1997 | row_length+= *lengths; |
1998 | } |
1999 | return row_length; |
2000 | } |
2001 | |
2002 | |
2003 | /* |
2004 | Find where to write the middle parts of the row and the tail |
2005 | |
2006 | SYNOPSIS |
2007 | write_rest_of_head() |
2008 | info Maria handler |
2009 | position Position in bitmap_blocks. Is 0 for rows that needs |
2010 | full blocks (ie, has a head, middle part and optional tail) |
2011 | rest_length How much left of the head block to write. |
2012 | |
2013 | RETURN |
2014 | 0 ok |
2015 | 1 error |
2016 | */ |
2017 | |
2018 | static my_bool write_rest_of_head(MARIA_HA *info, uint position, |
2019 | ulong rest_length) |
2020 | { |
2021 | MARIA_SHARE *share= info->s; |
2022 | uint full_page_size= FULL_PAGE_SIZE(share); |
2023 | MARIA_BITMAP_BLOCK *block; |
2024 | DBUG_ENTER("write_rest_of_head" ); |
2025 | DBUG_PRINT("enter" , ("position: %u rest_length: %lu" , position, |
2026 | rest_length)); |
2027 | |
2028 | if (position == 0) |
2029 | { |
2030 | /* Write out full pages */ |
2031 | uint pages= rest_length / full_page_size; |
2032 | |
2033 | rest_length%= full_page_size; |
2034 | if (rest_length >= MAX_TAIL_SIZE(share->block_size)) |
2035 | { |
2036 | /* Put tail on a full page */ |
2037 | pages++; |
2038 | rest_length= 0; |
2039 | } |
2040 | if (find_mid(info, pages, 1)) |
2041 | DBUG_RETURN(1); |
2042 | /* |
2043 | Insert empty block after full pages, to allow write_block_record() to |
2044 | split segment into used + free page |
2045 | */ |
2046 | block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*); |
2047 | block->page_count= 0; |
2048 | block->used= 0; |
2049 | } |
2050 | if (rest_length) |
2051 | { |
2052 | if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1)) |
2053 | DBUG_RETURN(1); |
2054 | } |
2055 | else |
2056 | { |
2057 | /* Empty tail block */ |
2058 | block= dynamic_element(&info->bitmap_blocks, |
2059 | ELEMENTS_RESERVED_FOR_MAIN_PART - 1, |
2060 | MARIA_BITMAP_BLOCK *); |
2061 | block->page_count= 0; |
2062 | block->used= 0; |
2063 | } |
2064 | DBUG_RETURN(0); |
2065 | } |
2066 | |
2067 | |
2068 | /* |
2069 | Find where to store one row |
2070 | |
2071 | SYNPOSIS |
2072 | _ma_bitmap_find_place() |
2073 | info Maria handler |
2074 | row Information about row to write |
2075 | blocks Store data about allocated places here |
2076 | |
2077 | RETURN |
2078 | 0 ok |
2079 | row->space_on_head_page contains minimum number of bytes we |
2080 | expect to put on the head page. |
2081 | 1 error |
2082 | my_errno is set to error |
2083 | */ |
2084 | |
2085 | my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, |
2086 | MARIA_BITMAP_BLOCKS *blocks) |
2087 | { |
2088 | MARIA_SHARE *share= info->s; |
2089 | my_bool res= 1; |
2090 | uint full_page_size, position, max_page_size; |
2091 | uint head_length, row_length, rest_length, extents_length; |
2092 | DBUG_ENTER("_ma_bitmap_find_place" ); |
2093 | |
2094 | blocks->count= 0; |
2095 | blocks->tail_page_skipped= blocks->page_skipped= 0; |
2096 | row->extents_count= 0; |
2097 | |
2098 | /* |
2099 | Reserve place for the following blocks: |
2100 | - Head block |
2101 | - Full page block |
2102 | - Marker block to allow write_block_record() to split full page blocks |
2103 | into full and free part |
2104 | - Tail block |
2105 | */ |
2106 | |
2107 | info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; |
2108 | max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE(share)); |
2109 | |
2110 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
2111 | |
2112 | if (row->total_length <= max_page_size) |
2113 | { |
2114 | /* Row fits in one page */ |
2115 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
2116 | if (find_head(info, (uint) row->total_length, position)) |
2117 | goto abort; |
2118 | row->space_on_head_page= row->total_length; |
2119 | goto end; |
2120 | } |
2121 | |
2122 | /* |
2123 | First allocate all blobs so that we can find out the needed size for |
2124 | the main block. |
2125 | */ |
2126 | if (row->blob_length && allocate_blobs(info, row)) |
2127 | goto abort; |
2128 | |
2129 | extents_length= row->extents_count * ROW_EXTENT_SIZE; |
2130 | /* |
2131 | The + 3 is reserved for storing the number of segments in the row header. |
2132 | */ |
2133 | if ((head_length= (row->head_length + extents_length + 3)) <= |
2134 | max_page_size) |
2135 | { |
2136 | /* Main row part fits into one page */ |
2137 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
2138 | if (find_head(info, head_length, position)) |
2139 | goto abort; |
2140 | row->space_on_head_page= head_length; |
2141 | goto end; |
2142 | } |
2143 | |
2144 | /* Allocate enough space */ |
2145 | head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; |
2146 | |
2147 | /* The first segment size is stored in 'row_length' */ |
2148 | row_length= find_where_to_split_row(share, row, row->extents_count + |
2149 | ELEMENTS_RESERVED_FOR_MAIN_PART-1, |
2150 | max_page_size); |
2151 | |
2152 | full_page_size= MAX_TAIL_SIZE(share->block_size); |
2153 | position= 0; |
2154 | rest_length= head_length - row_length; |
2155 | if (rest_length <= full_page_size) |
2156 | position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ |
2157 | if (find_head(info, row_length, position)) |
2158 | goto abort; |
2159 | row->space_on_head_page= row_length; |
2160 | |
2161 | if (write_rest_of_head(info, position, rest_length)) |
2162 | goto abort; |
2163 | |
2164 | end: |
2165 | blocks->block= dynamic_element(&info->bitmap_blocks, position, |
2166 | MARIA_BITMAP_BLOCK*); |
2167 | blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; |
2168 | /* First block's page_count is for all blocks */ |
2169 | blocks->count= info->bitmap_blocks.elements - position; |
2170 | res= 0; |
2171 | |
2172 | abort: |
2173 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
2174 | DBUG_RETURN(res); |
2175 | } |
2176 | |
2177 | |
2178 | /* |
2179 | Find where to put row on update (when head page is already defined) |
2180 | |
2181 | SYNPOSIS |
2182 | _ma_bitmap_find_new_place() |
2183 | info Maria handler |
2184 | row Information about row to write |
2185 | page On which page original row was stored |
2186 | free_size Free size on head page |
2187 | blocks Store data about allocated places here |
2188 | |
2189 | NOTES |
2190 | This function is only called when the new row can't fit in the space of |
2191 | the old row in the head page. |
2192 | |
2193 | This is essently same as _ma_bitmap_find_place() except that |
2194 | we don't call find_head() to search in bitmaps where to put the page. |
2195 | |
2196 | RETURN |
2197 | 0 ok |
2198 | 1 error |
2199 | */ |
2200 | |
2201 | my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, |
2202 | pgcache_page_no_t page, uint free_size, |
2203 | MARIA_BITMAP_BLOCKS *blocks) |
2204 | { |
2205 | MARIA_SHARE *share= info->s; |
2206 | my_bool res= 1; |
2207 | uint position; |
2208 | uint head_length, row_length, rest_length, extents_length; |
2209 | ulonglong bitmap_page; |
2210 | DBUG_ENTER("_ma_bitmap_find_new_place" ); |
2211 | |
2212 | blocks->count= 0; |
2213 | blocks->tail_page_skipped= blocks->page_skipped= 0; |
2214 | row->extents_count= 0; |
2215 | info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; |
2216 | |
2217 | mysql_mutex_lock(&share->bitmap.bitmap_lock); |
2218 | |
2219 | /* |
2220 | First allocate all blobs (so that we can find out the needed size for |
2221 | the main block. |
2222 | */ |
2223 | if (row->blob_length && allocate_blobs(info, row)) |
2224 | goto abort; |
2225 | |
2226 | /* Switch bitmap to current head page */ |
2227 | bitmap_page= page - page % share->bitmap.pages_covered; |
2228 | |
2229 | if (share->bitmap.page != bitmap_page && |
2230 | _ma_change_bitmap_page(info, &share->bitmap, bitmap_page)) |
2231 | goto abort; |
2232 | |
2233 | extents_length= row->extents_count * ROW_EXTENT_SIZE; |
2234 | if ((head_length= (row->head_length + extents_length + 3)) <= free_size) |
2235 | { |
2236 | /* Main row part fits into one page */ |
2237 | position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; |
2238 | use_head(info, page, head_length, position); |
2239 | row->space_on_head_page= head_length; |
2240 | goto end; |
2241 | } |
2242 | |
2243 | /* Allocate enough space */ |
2244 | head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; |
2245 | |
2246 | /* |
2247 | The first segment size is stored in 'row_length' |
2248 | We have to add ELEMENTS_RESERVED_FOR_MAIN_PART here as the extent |
2249 | information may be up to this size when the header splits. |
2250 | */ |
2251 | row_length= find_where_to_split_row(share, row, row->extents_count + |
2252 | ELEMENTS_RESERVED_FOR_MAIN_PART-1, |
2253 | free_size); |
2254 | |
2255 | position= 0; |
2256 | rest_length= head_length - row_length; |
2257 | if (rest_length <= MAX_TAIL_SIZE(share->block_size)) |
2258 | position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ |
2259 | use_head(info, page, row_length, position); |
2260 | row->space_on_head_page= row_length; |
2261 | |
2262 | if (write_rest_of_head(info, position, rest_length)) |
2263 | goto abort; |
2264 | |
2265 | end: |
2266 | blocks->block= dynamic_element(&info->bitmap_blocks, position, |
2267 | MARIA_BITMAP_BLOCK*); |
2268 | blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; |
2269 | /* First block's page_count is for all blocks */ |
2270 | blocks->count= info->bitmap_blocks.elements - position; |
2271 | res= 0; |
2272 | |
2273 | abort: |
2274 | mysql_mutex_unlock(&share->bitmap.bitmap_lock); |
2275 | DBUG_RETURN(res); |
2276 | } |
2277 | |
2278 | |
2279 | /**************************************************************************** |
2280 | Clear and reset bits |
2281 | ****************************************************************************/ |
2282 | |
2283 | /* |
2284 | Set fill pattern for a page |
2285 | |
2286 | set_page_bits() |
2287 | info Maria handler |
2288 | bitmap Bitmap handler |
2289 | page Adress to page |
2290 | fill_pattern Pattern (not size) for page |
2291 | |
2292 | NOTES |
2293 | Page may not be part of active bitmap |
2294 | |
2295 | RETURN |
2296 | 0 ok |
2297 | 1 error |
2298 | */ |
2299 | |
2300 | static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
2301 | pgcache_page_no_t page, uint fill_pattern) |
2302 | { |
2303 | pgcache_page_no_t bitmap_page; |
2304 | uint offset_page, offset, tmp, org_tmp, used_offset; |
2305 | uchar *data; |
2306 | DBUG_ENTER("set_page_bits" ); |
2307 | DBUG_ASSERT(fill_pattern <= 7); |
2308 | |
2309 | bitmap_page= page - page % bitmap->pages_covered; |
2310 | if (bitmap_page != bitmap->page && |
2311 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
2312 | DBUG_RETURN(1); |
2313 | |
2314 | /* Find page number from start of bitmap */ |
2315 | offset_page= (uint) (page - bitmap->page - 1); |
2316 | |
2317 | /* |
2318 | Mark place used by reading/writing 2 bytes at a time to handle |
2319 | bitmaps in overlapping bytes |
2320 | */ |
2321 | offset_page*= 3; |
2322 | offset= offset_page & 7; |
2323 | data= bitmap->map + offset_page / 8; |
2324 | org_tmp= tmp= uint2korr(data); |
2325 | tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); |
2326 | if (tmp == org_tmp) |
2327 | DBUG_RETURN(0); /* No changes */ |
2328 | |
2329 | /* |
2330 | Take care to not write bytes outside of bitmap. |
2331 | fill_pattern is 3 bits, so we need to write two bytes |
2332 | if bit position we write to is > (8-3) |
2333 | */ |
2334 | if (offset > 5) |
2335 | int2store(data, tmp); |
2336 | else |
2337 | data[0]= tmp; |
2338 | |
2339 | /* |
2340 | Reset full_head_size or full_tail_size if we are releasing data before |
2341 | it. Increase used_size if we are allocating data. |
2342 | */ |
2343 | used_offset= (uint) (data - bitmap->map); |
2344 | if (fill_pattern < 4) |
2345 | set_if_smaller(bitmap->full_head_size, used_offset); |
2346 | if (fill_pattern == 0 || (fill_pattern > 4 && fill_pattern < 7)) |
2347 | set_if_smaller(bitmap->full_tail_size, used_offset); |
2348 | if (fill_pattern != 0) |
2349 | { |
2350 | /* Calulcate which was the last changed byte */ |
2351 | used_offset+= offset > 5 ? 2 : 1; |
2352 | set_if_bigger(bitmap->used_size, used_offset); |
2353 | } |
2354 | |
2355 | _ma_check_bitmap(bitmap); |
2356 | bitmap->changed= 1; |
2357 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
2358 | if (fill_pattern != FULL_HEAD_PAGE && fill_pattern != FULL_TAIL_PAGE) |
2359 | set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page); |
2360 | /* |
2361 | Note that if the condition above is false (page is full), and all pages of |
2362 | this bitmap are now full, and that bitmap page was |
2363 | first_bitmap_with_space, we don't modify first_bitmap_with_space, indeed |
2364 | its value still tells us where to start our search for a bitmap with space |
2365 | (which is for sure after this full one). |
2366 | That does mean that first_bitmap_with_space is only a lower bound. |
2367 | */ |
2368 | DBUG_RETURN(0); |
2369 | } |
2370 | |
2371 | |
2372 | /* |
2373 | Get bitmap pattern for a given page |
2374 | |
2375 | SYNOPSIS |
2376 | bitmap_get_page_bits() |
2377 | info Maria handler |
2378 | bitmap Bitmap handler |
2379 | page Page number |
2380 | |
2381 | RETURN |
2382 | 0-7 Bitmap pattern |
2383 | ~0 Error (couldn't read page) |
2384 | */ |
2385 | |
2386 | static uint bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
2387 | pgcache_page_no_t page) |
2388 | { |
2389 | pgcache_page_no_t bitmap_page; |
2390 | uint offset_page, offset, tmp; |
2391 | uchar *data; |
2392 | DBUG_ENTER("_ma_bitmap_get_page_bits" ); |
2393 | |
2394 | bitmap_page= page - page % bitmap->pages_covered; |
2395 | if (bitmap_page != bitmap->page && |
2396 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
2397 | DBUG_RETURN(~ (uint) 0); |
2398 | |
2399 | /* Find page number from start of bitmap */ |
2400 | offset_page= (uint) (page - bitmap->page - 1); |
2401 | /* |
2402 | Mark place used by reading/writing 2 bytes at a time to handle |
2403 | bitmaps in overlapping bytes |
2404 | */ |
2405 | offset_page*= 3; |
2406 | offset= offset_page & 7; |
2407 | data= bitmap->map + offset_page / 8; |
2408 | tmp= uint2korr(data); |
2409 | DBUG_RETURN((tmp >> offset) & 7); |
2410 | } |
2411 | |
2412 | |
2413 | /* As above, but take a lock while getting the data */ |
2414 | |
2415 | uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, |
2416 | pgcache_page_no_t page) |
2417 | { |
2418 | uint tmp; |
2419 | mysql_mutex_lock(&bitmap->bitmap_lock); |
2420 | tmp= bitmap_get_page_bits(info, bitmap, page); |
2421 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2422 | return tmp; |
2423 | } |
2424 | |
2425 | |
2426 | /* |
2427 | Mark all pages in a region as free |
2428 | |
2429 | SYNOPSIS |
2430 | _ma_bitmap_reset_full_page_bits() |
2431 | info Maria handler |
2432 | bitmap Bitmap handler |
2433 | page Start page |
2434 | page_count Number of pages |
2435 | |
2436 | NOTES |
2437 | We assume that all pages in region is covered by same bitmap |
2438 | One must have a lock on info->s->bitmap.bitmap_lock |
2439 | |
2440 | RETURN |
2441 | 0 ok |
2442 | 1 Error (when reading bitmap) |
2443 | */ |
2444 | |
2445 | my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info, |
2446 | MARIA_FILE_BITMAP *bitmap, |
2447 | pgcache_page_no_t page, |
2448 | uint page_count) |
2449 | { |
2450 | ulonglong bitmap_page; |
2451 | uint offset, bit_start, bit_count, tmp, byte_offset; |
2452 | uchar *data; |
2453 | DBUG_ENTER("_ma_bitmap_reset_full_page_bits" ); |
2454 | DBUG_PRINT("enter" , ("page: %lu page_count: %u" , (ulong) page, page_count)); |
2455 | mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock); |
2456 | |
2457 | bitmap_page= page - page % bitmap->pages_covered; |
2458 | DBUG_ASSERT(page != bitmap_page); |
2459 | |
2460 | if (bitmap_page != bitmap->page && |
2461 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
2462 | DBUG_RETURN(1); |
2463 | |
2464 | /* Find page number from start of bitmap */ |
2465 | offset= (uint) (page - bitmap->page - 1); |
2466 | |
2467 | /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */ |
2468 | bit_start= offset * 3; |
2469 | bit_count= page_count * 3; |
2470 | |
2471 | byte_offset= bit_start/8; |
2472 | data= bitmap->map + byte_offset; |
2473 | offset= bit_start & 7; |
2474 | |
2475 | tmp= (255 << offset); /* Bits to keep */ |
2476 | if (bit_count + offset < 8) |
2477 | { |
2478 | /* Only clear bits between 'offset' and 'offset+bit_count-1' */ |
2479 | tmp^= (255 << (offset + bit_count)); |
2480 | } |
2481 | *data&= ~tmp; |
2482 | |
2483 | set_if_smaller(bitmap->full_head_size, byte_offset); |
2484 | set_if_smaller(bitmap->full_tail_size, byte_offset); |
2485 | |
2486 | if ((int) (bit_count-= (8 - offset)) > 0) |
2487 | { |
2488 | uint fill; |
2489 | data++; |
2490 | /* |
2491 | -1 is here to avoid one 'if' statement and to let the following code |
2492 | handle the last byte |
2493 | */ |
2494 | if ((fill= (bit_count - 1) / 8)) |
2495 | { |
2496 | bzero(data, fill); |
2497 | data+= fill; |
2498 | } |
2499 | bit_count-= fill * 8; /* Bits left to clear */ |
2500 | tmp= (1 << bit_count) - 1; |
2501 | *data&= ~tmp; |
2502 | } |
2503 | set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page); |
2504 | bitmap->changed= 1; |
2505 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
2506 | DBUG_RETURN(0); |
2507 | } |
2508 | |
2509 | |
2510 | /* |
2511 | Set all pages in a region as used |
2512 | |
2513 | SYNOPSIS |
2514 | _ma_bitmap_set_full_page_bits() |
2515 | info Maria handler |
2516 | bitmap Bitmap handler |
2517 | page Start page |
2518 | page_count Number of pages |
2519 | |
2520 | NOTES |
2521 | We assume that all pages in region is covered by same bitmap |
2522 | One must have a lock on info->s->bitmap.bitmap_lock |
2523 | |
2524 | RETURN |
2525 | 0 ok |
2526 | 1 Error (when reading bitmap) |
2527 | */ |
2528 | |
2529 | my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, |
2530 | MARIA_FILE_BITMAP *bitmap, |
2531 | pgcache_page_no_t page, uint page_count) |
2532 | { |
2533 | ulonglong bitmap_page; |
2534 | uint offset, bit_start, bit_count, tmp; |
2535 | uchar *data; |
2536 | DBUG_ENTER("_ma_bitmap_set_full_page_bits" ); |
2537 | DBUG_PRINT("enter" , ("page: %lu page_count: %u" , (ulong) page, page_count)); |
2538 | mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock); |
2539 | |
2540 | bitmap_page= page - page % bitmap->pages_covered; |
2541 | if (page == bitmap_page || |
2542 | page + page_count > bitmap_page + bitmap->pages_covered) |
2543 | { |
2544 | DBUG_ASSERT(0); /* Wrong in data */ |
2545 | DBUG_RETURN(1); |
2546 | } |
2547 | |
2548 | if (bitmap_page != bitmap->page && |
2549 | _ma_change_bitmap_page(info, bitmap, bitmap_page)) |
2550 | DBUG_RETURN(1); |
2551 | |
2552 | /* Find page number from start of bitmap */ |
2553 | offset= (uint) (page - bitmap->page - 1); |
2554 | |
2555 | /* Set bits from 'page * 3' -> '(page + page_count) * 3' */ |
2556 | bit_start= offset * 3; |
2557 | bit_count= page_count * 3; |
2558 | |
2559 | data= bitmap->map + bit_start / 8; |
2560 | offset= bit_start & 7; |
2561 | |
2562 | tmp= (255 << offset); /* Bits to keep */ |
2563 | if (bit_count + offset < 8) |
2564 | { |
2565 | /* Only set bits between 'offset' and 'offset+bit_count-1' */ |
2566 | tmp^= (255 << (offset + bit_count)); |
2567 | } |
2568 | *data|= tmp; |
2569 | |
2570 | if ((int) (bit_count-= (8 - offset)) > 0) |
2571 | { |
2572 | uint fill; |
2573 | data++; |
2574 | /* |
2575 | -1 is here to avoid one 'if' statement and to let the following code |
2576 | handle the last byte |
2577 | */ |
2578 | if ((fill= (bit_count - 1) / 8)) |
2579 | { |
2580 | bfill(data, fill, 255); |
2581 | data+= fill; |
2582 | } |
2583 | bit_count-= fill * 8; /* Bits left to set */ |
2584 | tmp= (1 << bit_count) - 1; |
2585 | *data|= tmp; |
2586 | } |
2587 | set_if_bigger(bitmap->used_size, (uint) (data - bitmap->map) + 1); |
2588 | _ma_check_bitmap(bitmap); |
2589 | bitmap->changed= 1; |
2590 | DBUG_EXECUTE("bitmap" , _ma_print_bitmap_changes(bitmap);); |
2591 | DBUG_RETURN(0); |
2592 | } |
2593 | |
2594 | |
2595 | /** |
2596 | @brief |
2597 | Make a transition of MARIA_FILE_BITMAP::non_flushable. |
2598 | If the bitmap becomes flushable, which requires that REDO-UNDO has been |
2599 | logged and all bitmap pages touched by the thread have a correct |
2600 | allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is |
2601 | waiting (in practice it is a checkpoint), it wakes it up. |
2602 | If the bitmap becomes or stays unflushable, the function merely records it |
2603 | unless a concurrent _ma_bitmap_flush_all() is happening, in which case the |
2604 | function first waits for the flush to be done. |
2605 | |
2606 | @note |
2607 | this sets info->non_flushable_state to 1 if we have incremented |
2608 | bitmap->non_flushable and not yet decremented it. |
2609 | |
2610 | @param share Table's share |
2611 | @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable |
2612 | (-1 or +1). |
2613 | */ |
2614 | |
2615 | void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc) |
2616 | { |
2617 | MARIA_SHARE *share= info->s; |
2618 | MARIA_FILE_BITMAP *bitmap; |
2619 | DBUG_ENTER("_ma_bitmap_flushable" ); |
2620 | |
2621 | /* |
2622 | Not transactional tables are never automaticly flushed and needs no |
2623 | protection |
2624 | */ |
2625 | if (!share->now_transactional) |
2626 | DBUG_VOID_RETURN; |
2627 | |
2628 | bitmap= &share->bitmap; |
2629 | mysql_mutex_lock(&bitmap->bitmap_lock); |
2630 | |
2631 | if (non_flushable_inc == -1) |
2632 | { |
2633 | DBUG_ASSERT((int) bitmap->non_flushable > 0); |
2634 | DBUG_ASSERT(info->non_flushable_state == 1); |
2635 | if (--bitmap->non_flushable == 0) |
2636 | { |
2637 | /* |
2638 | We unlock and unpin pages locked and pinned by other threads. It does |
2639 | not seem to be an issue as all bitmap changes are serialized with |
2640 | the bitmap's mutex. |
2641 | */ |
2642 | _ma_bitmap_unpin_all(share); |
2643 | if (unlikely(bitmap->waiting_for_non_flushable)) |
2644 | { |
2645 | DBUG_PRINT("info" , ("bitmap flushable waking up flusher" )); |
2646 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
2647 | } |
2648 | } |
2649 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
2650 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2651 | info->non_flushable_state= 0; |
2652 | DBUG_VOID_RETURN; |
2653 | } |
2654 | DBUG_ASSERT(non_flushable_inc == 1); |
2655 | DBUG_ASSERT(info->non_flushable_state == 0); |
2656 | |
2657 | bitmap->waiting_for_flush_all_requested++; |
2658 | while (unlikely(bitmap->flush_all_requested)) |
2659 | { |
2660 | /* |
2661 | Some other thread is waiting for the bitmap to become |
2662 | flushable. Not the moment to make the bitmap unflushable or more |
2663 | unflushable; let's rather back off and wait. If we didn't do this, with |
2664 | multiple writers, there may always be one thread causing the bitmap to |
2665 | be unflushable and _ma_bitmap_flush_all() would wait for long. |
2666 | There should not be a deadlock because if our thread increased |
2667 | non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least |
2668 | our thread), it is not going to increase it more so is not going to come |
2669 | here. |
2670 | */ |
2671 | DBUG_PRINT("info" , ("waiting for bitmap flusher" )); |
2672 | mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); |
2673 | } |
2674 | bitmap->waiting_for_flush_all_requested--; |
2675 | bitmap->non_flushable++; |
2676 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
2677 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2678 | info->non_flushable_state= 1; |
2679 | DBUG_VOID_RETURN; |
2680 | } |
2681 | |
2682 | |
2683 | /* |
2684 | Correct bitmap pages to reflect the true allocation |
2685 | |
2686 | SYNOPSIS |
2687 | _ma_bitmap_release_unused() |
2688 | info Maria handle |
2689 | blocks Bitmap blocks |
2690 | |
2691 | IMPLEMENTATION |
2692 | If block->used & BLOCKUSED_TAIL is set: |
2693 | If block->used & BLOCKUSED_USED is set, then the bits for the |
2694 | corresponding page is set according to block->empty_space |
2695 | If block->used & BLOCKUSED_USED is not set, then the bits for |
2696 | the corresponding page is set to org_bitmap_value; |
2697 | |
2698 | If block->used & BLOCKUSED_TAIL is not set: |
2699 | if block->used is not set, the bits for the corresponding page are |
2700 | cleared |
2701 | |
2702 | For the first block (head block) the logic is same as for a tail block |
2703 | |
2704 | Note that we may have 'filler blocks' that are used to split a block |
2705 | in half; These can be recognized by that they have page_count == 0. |
2706 | |
2707 | This code also reverse the effect of ma_bitmap_flushable(.., 1); |
2708 | |
2709 | RETURN |
2710 | 0 ok |
2711 | 1 error (Couldn't write or read bitmap page) |
2712 | */ |
2713 | |
2714 | my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) |
2715 | { |
2716 | MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count; |
2717 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
2718 | uint bits, current_bitmap_value; |
2719 | DBUG_ENTER("_ma_bitmap_release_unused" ); |
2720 | |
2721 | /* |
2722 | We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full' |
2723 | when we allocated space in the page |
2724 | */ |
2725 | current_bitmap_value= FULL_HEAD_PAGE; |
2726 | |
2727 | mysql_mutex_lock(&bitmap->bitmap_lock); |
2728 | |
2729 | /* First handle head block */ |
2730 | if (block->used & BLOCKUSED_USED) |
2731 | { |
2732 | DBUG_PRINT("info" , ("head page: %lu empty_space: %u" , |
2733 | (ulong) block->page, block->empty_space)); |
2734 | bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space); |
2735 | if (block->used & BLOCKUSED_USE_ORG_BITMAP) |
2736 | current_bitmap_value= block->org_bitmap_value; |
2737 | } |
2738 | else |
2739 | bits= block->org_bitmap_value; |
2740 | if (bits != current_bitmap_value) |
2741 | { |
2742 | if (set_page_bits(info, bitmap, block->page, bits)) |
2743 | goto err; |
2744 | } |
2745 | else |
2746 | { |
2747 | DBUG_ASSERT(current_bitmap_value == |
2748 | bitmap_get_page_bits(info, bitmap, block->page)); |
2749 | } |
2750 | |
2751 | /* Handle all full pages and tail pages (for head page and blob) */ |
2752 | for (block++; block < end; block++) |
2753 | { |
2754 | uint page_count; |
2755 | if (!block->page_count) |
2756 | continue; /* Skip 'filler blocks' */ |
2757 | |
2758 | page_count= block->page_count; |
2759 | if (block->used & BLOCKUSED_TAIL) |
2760 | { |
2761 | current_bitmap_value= FULL_TAIL_PAGE; |
2762 | /* The bitmap page is only one page */ |
2763 | page_count= 1; |
2764 | if (block->used & BLOCKUSED_USED) |
2765 | { |
2766 | DBUG_PRINT("info" , ("tail page: %lu empty_space: %u" , |
2767 | (ulong) block->page, block->empty_space)); |
2768 | bits= free_size_to_tail_pattern(bitmap, block->empty_space); |
2769 | if (block->used & BLOCKUSED_USE_ORG_BITMAP) |
2770 | current_bitmap_value= block->org_bitmap_value; |
2771 | } |
2772 | else |
2773 | bits= block->org_bitmap_value; |
2774 | |
2775 | /* |
2776 | The page has all bits set; The following test is an optimization |
2777 | to not set the bits to the same value as before. |
2778 | */ |
2779 | DBUG_ASSERT(current_bitmap_value == |
2780 | bitmap_get_page_bits(info, bitmap, block->page)); |
2781 | |
2782 | if (bits != current_bitmap_value) |
2783 | { |
2784 | if (set_page_bits(info, bitmap, block->page, bits)) |
2785 | goto err; |
2786 | } |
2787 | } |
2788 | else if (!(block->used & BLOCKUSED_USED) && |
2789 | _ma_bitmap_reset_full_page_bits(info, bitmap, |
2790 | block->page, page_count)) |
2791 | goto err; |
2792 | } |
2793 | |
2794 | /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */ |
2795 | if (info->non_flushable_state) |
2796 | { |
2797 | DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0); |
2798 | info->non_flushable_state= 0; |
2799 | if (--bitmap->non_flushable == 0) |
2800 | { |
2801 | _ma_bitmap_unpin_all(info->s); |
2802 | if (unlikely(bitmap->waiting_for_non_flushable)) |
2803 | { |
2804 | DBUG_PRINT("info" , ("bitmap flushable waking up flusher" )); |
2805 | mysql_cond_broadcast(&bitmap->bitmap_cond); |
2806 | } |
2807 | } |
2808 | } |
2809 | DBUG_PRINT("info" , ("bitmap->non_flushable: %u" , bitmap->non_flushable)); |
2810 | |
2811 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2812 | DBUG_RETURN(0); |
2813 | |
2814 | err: |
2815 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2816 | DBUG_RETURN(1); |
2817 | } |
2818 | |
2819 | |
2820 | /* |
2821 | Free full pages from bitmap and pagecache |
2822 | |
2823 | SYNOPSIS |
2824 | _ma_bitmap_free_full_pages() |
2825 | info Maria handle |
2826 | extents Extents (as stored on disk) |
2827 | count Number of extents |
2828 | |
2829 | IMPLEMENTATION |
2830 | Mark all full pages (not tails) from extents as free, both in bitmap |
2831 | and page cache. |
2832 | |
2833 | RETURN |
2834 | 0 ok |
2835 | 1 error (Couldn't write or read bitmap page) |
2836 | */ |
2837 | |
2838 | my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents, |
2839 | uint count) |
2840 | { |
2841 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
2842 | my_bool res; |
2843 | DBUG_ENTER("_ma_bitmap_free_full_pages" ); |
2844 | |
2845 | for (; count--; extents+= ROW_EXTENT_SIZE) |
2846 | { |
2847 | pgcache_page_no_t page= uint5korr(extents); |
2848 | uint page_count= (uint2korr(extents + ROW_EXTENT_PAGE_SIZE) & |
2849 | ~START_EXTENT_BIT); |
2850 | if (!(page_count & TAIL_BIT)) |
2851 | { |
2852 | if (page == 0 && page_count == 0) |
2853 | continue; /* Not used extent */ |
2854 | if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page, |
2855 | page_count, PAGECACHE_LOCK_WRITE, 1)) |
2856 | DBUG_RETURN(1); |
2857 | mysql_mutex_lock(&bitmap->bitmap_lock); |
2858 | res= _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count); |
2859 | mysql_mutex_unlock(&bitmap->bitmap_lock); |
2860 | if (res) |
2861 | DBUG_RETURN(1); |
2862 | } |
2863 | } |
2864 | DBUG_RETURN(0); |
2865 | } |
2866 | |
2867 | |
2868 | /* |
2869 | Mark in the bitmap how much free space there is on a page |
2870 | |
2871 | SYNOPSIS |
2872 | _ma_bitmap_set() |
2873 | info Maria handler |
2874 | page Adress to page |
2875 | head 1 if page is a head page, 0 if tail page |
2876 | empty_space How much empty space there is on page |
2877 | |
2878 | RETURN |
2879 | 0 ok |
2880 | 1 error |
2881 | */ |
2882 | |
2883 | my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head, |
2884 | uint empty_space) |
2885 | { |
2886 | MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; |
2887 | uint bits; |
2888 | my_bool res; |
2889 | DBUG_ENTER("_ma_bitmap_set" ); |
2890 | DBUG_PRINT("enter" , ("page: %lu head: %d empty_space: %u" , |
2891 | (ulong) page, head, empty_space)); |
2892 | |
2893 | mysql_mutex_lock(&info->s->bitmap.bitmap_lock); |
2894 | bits= (head ? |
2895 | _ma_free_size_to_head_pattern(bitmap, empty_space) : |
2896 | free_size_to_tail_pattern(bitmap, empty_space)); |
2897 | res= set_page_bits(info, bitmap, page, bits); |
2898 | mysql_mutex_unlock(&info->s->bitmap.bitmap_lock); |
2899 | DBUG_RETURN(res); |
2900 | } |
2901 | |
2902 | |
2903 | /* |
2904 | Check that bitmap pattern is correct for a page |
2905 | |
2906 | NOTES |
2907 | Used in maria_chk |
2908 | |
2909 | SYNOPSIS |
2910 | _ma_check_bitmap_data() |
2911 | info Maria handler |
2912 | page_type What kind of page this is |
2913 | page Adress to page |
2914 | empty_space Empty space on page |
2915 | bitmap_pattern Bitmap pattern for page (from bitmap) |
2916 | |
2917 | RETURN |
2918 | 0 ok |
2919 | 1 error |
2920 | */ |
2921 | |
2922 | my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type, |
2923 | uint empty_space, uint bitmap_pattern) |
2924 | { |
2925 | uint bits; |
2926 | switch (page_type) { |
2927 | case UNALLOCATED_PAGE: |
2928 | case MAX_PAGE_TYPE: |
2929 | bits= 0; |
2930 | break; |
2931 | case HEAD_PAGE: |
2932 | bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space); |
2933 | break; |
2934 | case TAIL_PAGE: |
2935 | bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space); |
2936 | break; |
2937 | case BLOB_PAGE: |
2938 | bits= FULL_TAIL_PAGE; |
2939 | break; |
2940 | default: |
2941 | bits= 0; /* to satisfy compiler */ |
2942 | DBUG_ASSERT(0); |
2943 | } |
2944 | return (bitmap_pattern != bits); |
2945 | } |
2946 | |
2947 | /** |
2948 | Check that bitmap looks correct |
2949 | |
2950 | - All data before full_head_size and full_tail_size are allocated |
2951 | - There is no allocated data after used_size |
2952 | All of the above need to be correct only according to 6 byte |
2953 | alignment as all loops reads 6 bytes at a time and we check both |
2954 | start and end position according to the current 6 byte position. |
2955 | */ |
2956 | |
2957 | #ifndef DBUG_OFF |
2958 | static void _ma_check_bitmap(MARIA_FILE_BITMAP *bitmap) |
2959 | { |
2960 | uchar *data= bitmap->map; |
2961 | uchar *end= bitmap->map + bitmap->total_size; |
2962 | uchar *full_head_end=0, *full_tail_end=0, *first_empty= bitmap->map; |
2963 | |
2964 | for (; data < end; data+= 6) |
2965 | { |
2966 | ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ |
2967 | uint i; |
2968 | |
2969 | if (bits == 04444444444444444LL || bits == 0xffffffffffffLL) |
2970 | { |
2971 | first_empty= data + 6; |
2972 | continue; /* block fully used */ |
2973 | } |
2974 | if (bits == 0) |
2975 | { |
2976 | if (!full_head_end) |
2977 | full_head_end= data; |
2978 | if (!full_tail_end) |
2979 | full_tail_end= data; |
2980 | continue; |
2981 | } |
2982 | |
2983 | first_empty= data + 6; |
2984 | if (!full_head_end || !full_tail_end) |
2985 | { |
2986 | for (i= 0, bits >>= 0; i < 16 ; i++, bits >>= 3) |
2987 | { |
2988 | uint pattern= (uint) (bits & 7); |
2989 | if (pattern == FULL_HEAD_PAGE || pattern == FULL_TAIL_PAGE) |
2990 | continue; |
2991 | |
2992 | if (pattern < 4 && !full_head_end) |
2993 | full_head_end= data; |
2994 | if ((pattern == 0 || (pattern > 4 && pattern < 7)) && !full_tail_end) |
2995 | full_tail_end= data; |
2996 | } |
2997 | } |
2998 | } |
2999 | if (!full_head_end) |
3000 | full_head_end= data; |
3001 | if (!full_tail_end) |
3002 | full_tail_end= data; |
3003 | |
3004 | /* used_size must point after the last byte that had some data) */ |
3005 | DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); |
3006 | DBUG_ASSERT((bitmap->map + (bitmap->used_size+5)/6*6) >= first_empty); |
3007 | /* full_xxxx_size can't point after the first block that has free data */ |
3008 | DBUG_ASSERT((bitmap->map + (bitmap->full_head_size/6*6)) <= full_head_end); |
3009 | DBUG_ASSERT((bitmap->map + (bitmap->full_tail_size/6*6)) <= full_tail_end); |
3010 | } |
3011 | #endif |
3012 | |
3013 | |
3014 | /* |
3015 | Check if the page type matches the one that we have in the bitmap |
3016 | |
3017 | SYNOPSIS |
3018 | _ma_check_if_right_bitmap_type() |
3019 | info Maria handler |
3020 | page_type What kind of page this is |
3021 | page Adress to page |
3022 | bitmap_pattern Store here the pattern that was in the bitmap for the |
3023 | page. This is always updated. |
3024 | |
3025 | NOTES |
3026 | Used in maria_chk |
3027 | |
3028 | RETURN |
3029 | 0 ok |
3030 | 1 error |
3031 | */ |
3032 | |
3033 | my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, |
3034 | enum en_page_type page_type, |
3035 | pgcache_page_no_t page, |
3036 | uint *bitmap_pattern) |
3037 | { |
3038 | if ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap, |
3039 | page)) > 7) |
3040 | return 1; /* Couldn't read page */ |
3041 | switch (page_type) { |
3042 | case HEAD_PAGE: |
3043 | return *bitmap_pattern < 1 || *bitmap_pattern > 4; |
3044 | case TAIL_PAGE: |
3045 | return *bitmap_pattern < 5; |
3046 | case BLOB_PAGE: |
3047 | return *bitmap_pattern != 7; |
3048 | default: |
3049 | break; |
3050 | } |
3051 | DBUG_ASSERT(0); |
3052 | return 1; |
3053 | } |
3054 | |
3055 | |
3056 | /** |
3057 | @brief create the first bitmap page of a freshly created data file |
3058 | |
3059 | @param share table's share |
3060 | |
3061 | @return Operation status |
3062 | @retval 0 OK |
3063 | @retval !=0 Error |
3064 | */ |
3065 | |
3066 | int _ma_bitmap_create_first(MARIA_SHARE *share) |
3067 | { |
3068 | uint block_size= share->bitmap.block_size; |
3069 | File file= share->bitmap.file.file; |
3070 | uchar marker[CRC_SIZE]; |
3071 | |
3072 | /* |
3073 | Next write operation of the page will write correct CRC |
3074 | if it is needed |
3075 | */ |
3076 | int4store(marker, MARIA_NO_CRC_BITMAP_PAGE); |
3077 | |
3078 | if (mysql_file_chsize(file, block_size - sizeof(marker), |
3079 | 0, MYF(MY_WME)) || |
3080 | my_pwrite(file, marker, sizeof(marker), |
3081 | block_size - sizeof(marker), |
3082 | MYF(MY_NABP | MY_WME))) |
3083 | return 1; |
3084 | share->state.state.data_file_length= block_size; |
3085 | _ma_bitmap_delete_all(share); |
3086 | return 0; |
3087 | } |
3088 | |
3089 | |
3090 | /** |
3091 | @brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a |
3092 | bitmap page needs to be flushed. |
3093 | |
3094 | @param page Page's content |
3095 | @param page_no Page's number (<offset>/<page length>) |
3096 | @param data_ptr Callback data pointer (pointer to MARIA_SHARE) |
3097 | |
3098 | @retval TRANSLOG_ADDRESS to flush up to. |
3099 | */ |
3100 | |
3101 | static my_bool |
3102 | flush_log_for_bitmap(PAGECACHE_IO_HOOK_ARGS *args __attribute__ ((unused))) |
3103 | { |
3104 | #ifdef DBUG_ASSERT_EXISTS |
3105 | const MARIA_SHARE *share= (MARIA_SHARE*)args->data; |
3106 | #endif |
3107 | DBUG_ENTER("flush_log_for_bitmap" ); |
3108 | DBUG_ASSERT(share->now_transactional); |
3109 | /* |
3110 | WAL imposes that UNDOs reach disk before bitmap is flushed. We don't know |
3111 | the LSN of the last UNDO about this bitmap page, so we flush whole log. |
3112 | */ |
3113 | DBUG_RETURN(translog_flush(translog_get_horizon())); |
3114 | } |
3115 | |
3116 | |
3117 | /** |
3118 | @brief Set callbacks for bitmap pages |
3119 | |
3120 | @note |
3121 | We don't use pagecache_file_init here, as we want to keep the |
3122 | code readable |
3123 | */ |
3124 | |
3125 | void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, |
3126 | MARIA_SHARE *share) |
3127 | { |
3128 | pagecache_file_set_null_hooks(file); |
3129 | file->callback_data= (uchar*) share; |
3130 | file->flush_log_callback= maria_flush_log_for_page_none; |
3131 | file->post_write_hook= maria_page_write_failure; |
3132 | |
3133 | if (share->temporary) |
3134 | { |
3135 | file->post_read_hook= &maria_page_crc_check_none; |
3136 | file->pre_write_hook= &maria_page_filler_set_none; |
3137 | } |
3138 | else |
3139 | { |
3140 | file->post_read_hook= &maria_page_crc_check_bitmap; |
3141 | if (share->options & HA_OPTION_PAGE_CHECKSUM) |
3142 | file->pre_write_hook= &maria_page_crc_set_normal; |
3143 | else |
3144 | file->pre_write_hook= &maria_page_filler_set_bitmap; |
3145 | if (share->now_transactional) |
3146 | file->flush_log_callback= flush_log_for_bitmap; |
3147 | } |
3148 | } |
3149 | |
3150 | |
3151 | /** |
3152 | Extends data file with zeroes and creates new bitmap pages into page cache. |
3153 | |
3154 | Writes all bitmap pages in [from, to]. |
3155 | |
3156 | Non-bitmap pages of zeroes are correct as they are marked empty in |
3157 | bitmaps. Bitmap pages will not be zeroes: they will get their CRC fixed when |
3158 | flushed. And if there is a crash before flush (so they are zeroes at |
3159 | restart), a REDO will re-create them in page cache. |
3160 | */ |
3161 | |
3162 | static my_bool |
3163 | _ma_bitmap_create_missing_into_pagecache(MARIA_SHARE *share, |
3164 | MARIA_FILE_BITMAP *bitmap, |
3165 | pgcache_page_no_t from, |
3166 | pgcache_page_no_t to, |
3167 | uchar *zeroes) |
3168 | { |
3169 | pgcache_page_no_t i; |
3170 | /* |
3171 | We do not use my_chsize() because there can be a race between when it |
3172 | reads the physical size and when it writes (assume data_file_length is 10, |
3173 | physical length is 8 and two data pages are in cache, and here we do a |
3174 | my_chsize: my_chsize sees physical length is 8, then the two data pages go |
3175 | to disk then my_chsize writes from page 8 and so overwrites the two data |
3176 | pages, wrongly). |
3177 | We instead rely on the filesystem filling gaps with zeroes. |
3178 | */ |
3179 | for (i= from; i <= to; i+= bitmap->pages_covered) |
3180 | { |
3181 | /** |
3182 | No need to keep them pinned, they are new so flushable. |
3183 | @todo but we may want to keep them pinned, as an optimization: if they |
3184 | are not pinned they may go to disk before the data pages go (so, the |
3185 | physical pages would be in non-ascending "sparse" order on disk), or the |
3186 | filesystem may fill gaps with zeroes physically which is a waste of |
3187 | time. |
3188 | */ |
3189 | if (pagecache_write(share->pagecache, |
3190 | &bitmap->file, i, 0, |
3191 | zeroes, PAGECACHE_PLAIN_PAGE, |
3192 | PAGECACHE_LOCK_LEFT_UNLOCKED, |
3193 | PAGECACHE_PIN_LEFT_UNPINNED, |
3194 | PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE)) |
3195 | goto err; |
3196 | } |
3197 | /* |
3198 | Data pages after data_file_length are full of zeroes but that is allowed |
3199 | as they are marked empty in the bitmap. |
3200 | */ |
3201 | return FALSE; |
3202 | err: |
3203 | return TRUE; |
3204 | } |
3205 | |
3206 | |
3207 | /** |
3208 | Creates missing bitmaps when we extend the data file. |
3209 | |
3210 | At run-time, when we need a new bitmap page we come here; and only one bitmap |
3211 | page at a time is created. |
3212 | |
3213 | In some recovery cases we insert at a large offset in the data file, way |
3214 | beyond state.data_file_length, so can need to create more than one bitmap |
3215 | page in one go. Known case is: |
3216 | Start a transaction in Maria; |
3217 | delete last row of very large table (with delete_row) |
3218 | do a bulk insert |
3219 | crash |
3220 | Then UNDO_BULK_INSERT will truncate table files, and |
3221 | UNDO_ROW_DELETE will want to put the row back to its original position, |
3222 | extending the data file a lot: bitmap page*s* in the hole must be created, |
3223 | or he table would look corrupted. |
3224 | |
3225 | We need to log REDOs for bitmap creation, consider: we apply a REDO for a |
3226 | data page, which creates the first data page covered by a new bitmap |
3227 | not yet created. If the data page is flushed but the bitmap page is not and |
3228 | there is a crash, re-execution of the REDO will complain about the zeroed |
3229 | bitmap page (see it as corruption). Thus a REDO is needed to re-create the |
3230 | bitmap. |
3231 | |
3232 | @param info Maria handler |
3233 | @param bitmap Bitmap handler |
3234 | @param page Last bitmap page to create |
3235 | |
3236 | @note When this function is called this must be true: |
3237 | ((page + 1) * bitmap->block_size > info->s->state.state.data_file_length) |
3238 | |
3239 | */ |
3240 | |
3241 | static my_bool _ma_bitmap_create_missing(MARIA_HA *info, |
3242 | MARIA_FILE_BITMAP *bitmap, |
3243 | pgcache_page_no_t page) |
3244 | { |
3245 | MARIA_SHARE *share= info->s; |
3246 | uint block_size= bitmap->block_size; |
3247 | pgcache_page_no_t from, to; |
3248 | my_off_t data_file_length= share->state.state.data_file_length; |
3249 | DBUG_ENTER("_ma_bitmap_create_missing" ); |
3250 | DBUG_PRINT("enter" , ("page: %lld" , (longlong) page)); |
3251 | |
3252 | /* First (in offset order) bitmap page to create */ |
3253 | if (data_file_length < block_size) |
3254 | goto err; /* corrupted, should have first bitmap page */ |
3255 | if (page * block_size >= share->base.max_data_file_length) |
3256 | { |
3257 | my_errno= HA_ERR_RECORD_FILE_FULL; |
3258 | goto err; |
3259 | } |
3260 | |
3261 | from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1; |
3262 | from*= bitmap->pages_covered; |
3263 | /* |
3264 | page>=from because: |
3265 | (page + 1) * bs > dfl, and page == k * pc so: |
3266 | (k * pc + 1) * bs > dfl; k * pc + 1 > dfl / bs; k * pc > dfl / bs - 1 |
3267 | k > (dfl / bs - 1) / pc; k >= (dfl / bs - 1) / pc + 1 |
3268 | k * pc >= ((dfl / bs - 1) / pc + 1) * pc == from. |
3269 | */ |
3270 | DBUG_ASSERT(page >= from); |
3271 | |
3272 | if (share->now_transactional) |
3273 | { |
3274 | LSN lsn; |
3275 | uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2]; |
3276 | LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; |
3277 | page_store(log_data + FILEID_STORE_SIZE, from); |
3278 | page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page); |
3279 | log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; |
3280 | log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); |
3281 | /* |
3282 | We don't use info->trn so that this REDO is always executed even though |
3283 | the UNDO does not reach disk due to crash. This is also consistent with |
3284 | the fact that the new bitmap pages are not pinned. |
3285 | */ |
3286 | if (translog_write_record(&lsn, LOGREC_REDO_BITMAP_NEW_PAGE, |
3287 | &dummy_transaction_object, info, |
3288 | (translog_size_t)sizeof(log_data), |
3289 | TRANSLOG_INTERNAL_PARTS + 1, log_array, |
3290 | log_data, NULL)) |
3291 | goto err; |
3292 | /* |
3293 | No need to flush the log: the bitmap pages we are going to create will |
3294 | flush it when they go to disk. |
3295 | */ |
3296 | } |
3297 | |
3298 | /* |
3299 | Last bitmap page. It has special creation: will go to the page cache |
3300 | only later as we are going to modify it very soon. |
3301 | */ |
3302 | bzero(bitmap->map, bitmap->block_size); |
3303 | bitmap->used_size= bitmap->full_head_size= bitmap->full_tail_size= 0; |
3304 | bitmap->changed=1; |
3305 | #ifndef DBUG_OFF |
3306 | /* |
3307 | Make a copy of the page to be able to print out bitmap changes during |
3308 | debugging |
3309 | */ |
3310 | memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); |
3311 | #endif |
3312 | |
3313 | /* Last bitmap page to create before 'page' */ |
3314 | DBUG_ASSERT(page >= bitmap->pages_covered); |
3315 | to= page - bitmap->pages_covered; |
3316 | /* |
3317 | In run-time situations, from>=to is always false, i.e. we always create |
3318 | one bitmap at a time ('page'). |
3319 | */ |
3320 | if ((from <= to) && |
3321 | _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to, |
3322 | bitmap->map)) |
3323 | goto err; |
3324 | |
3325 | share->state.state.data_file_length= (page + 1) * bitmap->block_size; |
3326 | |
3327 | DBUG_RETURN(FALSE); |
3328 | err: |
3329 | DBUG_RETURN(TRUE); |
3330 | } |
3331 | |
3332 | |
3333 | my_bool _ma_apply_redo_bitmap_new_page(MARIA_HA *info, |
3334 | LSN lsn __attribute__ ((unused)), |
3335 | const uchar *) |
3336 | { |
3337 | MARIA_SHARE *share= info->s; |
3338 | MARIA_FILE_BITMAP *bitmap= &share->bitmap; |
3339 | my_bool error; |
3340 | pgcache_page_no_t from, to, min_from; |
3341 | DBUG_ENTER("_ma_apply_redo_bitmap_new_page" ); |
3342 | |
3343 | from= page_korr(header); |
3344 | to= page_korr(header + PAGE_STORE_SIZE); |
3345 | DBUG_PRINT("info" , ("from: %lu to: %lu" , (ulong)from, (ulong)to)); |
3346 | if ((from > to) || |
3347 | (from % bitmap->pages_covered) != 0 || |
3348 | (to % bitmap->pages_covered) != 0) |
3349 | { |
3350 | error= TRUE; /* corrupted log record */ |
3351 | goto err; |
3352 | } |
3353 | |
3354 | min_from= (share->state.state.data_file_length / bitmap->block_size - 1) / |
3355 | bitmap->pages_covered + 1; |
3356 | min_from*= bitmap->pages_covered; |
3357 | if (from < min_from) |
3358 | { |
3359 | DBUG_PRINT("info" , ("overwrite bitmap pages from %lu" , (ulong)min_from)); |
3360 | /* |
3361 | We have to overwrite. It could be that there was a bitmap page in |
3362 | memory, covering a data page which went to disk, then crash: the |
3363 | bitmap page is now full of zeros and is ==min_from, we have to overwrite |
3364 | it with correct checksum. |
3365 | */ |
3366 | } |
3367 | share->state.changed|= STATE_CHANGED; |
3368 | bzero(info->buff, bitmap->block_size); |
3369 | if (!(error= |
3370 | _ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to, |
3371 | info->buff))) |
3372 | share->state.state.data_file_length= (to + 1) * bitmap->block_size; |
3373 | |
3374 | err: |
3375 | DBUG_RETURN(error); |
3376 | } |
3377 | |