1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2012, Facebook Inc. |
5 | Copyright (c) 2017, 2018, MariaDB Corporation. |
6 | |
7 | This program is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free Software |
9 | Foundation; version 2 of the License. |
10 | |
11 | This program is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License along with |
16 | this program; if not, write to the Free Software Foundation, Inc., |
17 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
18 | |
19 | *****************************************************************************/ |
20 | |
21 | /**************************************************//** |
22 | @file page/page0page.cc |
23 | Index page routines |
24 | |
25 | Created 2/2/1994 Heikki Tuuri |
26 | *******************************************************/ |
27 | |
28 | #include "page0page.h" |
29 | #include "page0cur.h" |
30 | #include "page0zip.h" |
31 | #include "buf0buf.h" |
32 | #include "btr0btr.h" |
33 | #include "row0trunc.h" |
34 | #include "srv0srv.h" |
35 | #include "lock0lock.h" |
36 | #include "fut0lst.h" |
37 | #include "btr0sea.h" |
38 | #include "trx0sys.h" |
39 | |
40 | /* THE INDEX PAGE |
41 | ============== |
42 | |
43 | The index page consists of a page header which contains the page's |
44 | id and other information. On top of it are the index records |
45 | in a heap linked into a one way linear list according to alphabetic order. |
46 | |
47 | Just below page end is an array of pointers which we call page directory, |
48 | to about every sixth record in the list. The pointers are placed in |
49 | the directory in the alphabetical order of the records pointed to, |
50 | enabling us to make binary search using the array. Each slot n:o I |
51 | in the directory points to a record, where a 4-bit field contains a count |
52 | of those records which are in the linear list between pointer I and |
53 | the pointer I - 1 in the directory, including the record |
54 | pointed to by pointer I and not including the record pointed to by I - 1. |
55 | We say that the record pointed to by slot I, or that slot I, owns |
56 | these records. The count is always kept in the range 4 to 8, with |
57 | the exception that it is 1 for the first slot, and 1--8 for the second slot. |
58 | |
59 | An essentially binary search can be performed in the list of index |
60 | records, like we could do if we had pointer to every record in the |
61 | page directory. The data structure is, however, more efficient when |
62 | we are doing inserts, because most inserts are just pushed on a heap. |
63 | Only every 8th insert requires block move in the directory pointer |
64 | table, which itself is quite small. A record is deleted from the page |
65 | by just taking it off the linear list and updating the number of owned |
66 | records-field of the record which owns it, and updating the page directory, |
67 | if necessary. A special case is the one when the record owns itself. |
68 | Because the overhead of inserts is so small, we may also increase the |
69 | page size from the projected default of 8 kB to 64 kB without too |
70 | much loss of efficiency in inserts. Bigger page becomes actual |
71 | when the disk transfer rate compared to seek and latency time rises. |
72 | On the present system, the page size is set so that the page transfer |
73 | time (3 ms) is 20 % of the disk random access time (15 ms). |
74 | |
75 | When the page is split, merged, or becomes full but contains deleted |
76 | records, we have to reorganize the page. |
77 | |
78 | Assuming a page size of 8 kB, a typical index page of a secondary |
79 | index contains 300 index entries, and the size of the page directory |
80 | is 50 x 4 bytes = 200 bytes. */ |
81 | |
82 | /***************************************************************//** |
83 | Looks for the directory slot which owns the given record. |
84 | @return the directory slot number */ |
85 | ulint |
86 | page_dir_find_owner_slot( |
87 | /*=====================*/ |
88 | const rec_t* rec) /*!< in: the physical record */ |
89 | { |
90 | ut_ad(page_rec_check(rec)); |
91 | |
92 | const page_t* page = page_align(rec); |
93 | const page_dir_slot_t* first_slot = page_dir_get_nth_slot(page, 0); |
94 | const page_dir_slot_t* slot = page_dir_get_nth_slot( |
95 | page, ulint(page_dir_get_n_slots(page)) - 1); |
96 | const rec_t* r = rec; |
97 | |
98 | if (page_is_comp(page)) { |
99 | while (rec_get_n_owned_new(r) == 0) { |
100 | r = rec_get_next_ptr_const(r, TRUE); |
101 | ut_ad(r >= page + PAGE_NEW_SUPREMUM); |
102 | ut_ad(r < page + (srv_page_size - PAGE_DIR)); |
103 | } |
104 | } else { |
105 | while (rec_get_n_owned_old(r) == 0) { |
106 | r = rec_get_next_ptr_const(r, FALSE); |
107 | ut_ad(r >= page + PAGE_OLD_SUPREMUM); |
108 | ut_ad(r < page + (srv_page_size - PAGE_DIR)); |
109 | } |
110 | } |
111 | |
112 | uint16 rec_offs_bytes = mach_encode_2(ulint(r - page)); |
113 | |
114 | while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) { |
115 | |
116 | if (UNIV_UNLIKELY(slot == first_slot)) { |
117 | ib::error() << "Probable data corruption on page " |
118 | << page_get_page_no(page) |
119 | << ". Original record on that page;" ; |
120 | |
121 | if (page_is_comp(page)) { |
122 | fputs("(compact record)" , stderr); |
123 | } else { |
124 | rec_print_old(stderr, rec); |
125 | } |
126 | |
127 | ib::error() << "Cannot find the dir slot for this" |
128 | " record on that page;" ; |
129 | |
130 | if (page_is_comp(page)) { |
131 | fputs("(compact record)" , stderr); |
132 | } else { |
133 | rec_print_old(stderr, page |
134 | + mach_decode_2(rec_offs_bytes)); |
135 | } |
136 | |
137 | ut_error; |
138 | } |
139 | |
140 | slot += PAGE_DIR_SLOT_SIZE; |
141 | } |
142 | |
143 | return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE); |
144 | } |
145 | |
146 | /**************************************************************//** |
147 | Used to check the consistency of a directory slot. |
148 | @return TRUE if succeed */ |
149 | static |
150 | ibool |
151 | page_dir_slot_check( |
152 | /*================*/ |
153 | const page_dir_slot_t* slot) /*!< in: slot */ |
154 | { |
155 | const page_t* page; |
156 | ulint n_slots; |
157 | ulint n_owned; |
158 | |
159 | ut_a(slot); |
160 | |
161 | page = page_align(slot); |
162 | |
163 | n_slots = page_dir_get_n_slots(page); |
164 | |
165 | ut_a(slot <= page_dir_get_nth_slot(page, 0)); |
166 | ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); |
167 | |
168 | ut_a(page_rec_check(page_dir_slot_get_rec(slot))); |
169 | |
170 | if (page_is_comp(page)) { |
171 | n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot)); |
172 | } else { |
173 | n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot)); |
174 | } |
175 | |
176 | if (slot == page_dir_get_nth_slot(page, 0)) { |
177 | ut_a(n_owned == 1); |
178 | } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) { |
179 | ut_a(n_owned >= 1); |
180 | ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); |
181 | } else { |
182 | ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED); |
183 | ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); |
184 | } |
185 | |
186 | return(TRUE); |
187 | } |
188 | |
189 | /*************************************************************//** |
190 | Sets the max trx id field value. */ |
191 | void |
192 | page_set_max_trx_id( |
193 | /*================*/ |
194 | buf_block_t* block, /*!< in/out: page */ |
195 | page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ |
196 | trx_id_t trx_id, /*!< in: transaction id */ |
197 | mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */ |
198 | { |
199 | page_t* page = buf_block_get_frame(block); |
200 | ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); |
201 | |
202 | /* It is not necessary to write this change to the redo log, as |
203 | during a database recovery we assume that the max trx id of every |
204 | page is the maximum trx id assigned before the crash. */ |
205 | |
206 | if (page_zip) { |
207 | mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id); |
208 | page_zip_write_header(page_zip, |
209 | page + (PAGE_HEADER + PAGE_MAX_TRX_ID), |
210 | 8, mtr); |
211 | } else if (mtr) { |
212 | mlog_write_ull(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), |
213 | trx_id, mtr); |
214 | } else { |
215 | mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id); |
216 | } |
217 | } |
218 | |
219 | /** Persist the AUTO_INCREMENT value on a clustered index root page. |
220 | @param[in,out] block clustered index root page |
221 | @param[in] index clustered index |
222 | @param[in] autoinc next available AUTO_INCREMENT value |
223 | @param[in,out] mtr mini-transaction |
224 | @param[in] reset whether to reset the AUTO_INCREMENT |
225 | to a possibly smaller value than currently |
226 | exists in the page */ |
227 | void |
228 | page_set_autoinc( |
229 | buf_block_t* block, |
230 | const dict_index_t* index MY_ATTRIBUTE((unused)), |
231 | ib_uint64_t autoinc, |
232 | mtr_t* mtr, |
233 | bool reset) |
234 | { |
235 | ut_ad(mtr_memo_contains_flagged( |
236 | mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); |
237 | ut_ad(index->is_primary()); |
238 | ut_ad(index->page == block->page.id.page_no()); |
239 | ut_ad(index->table->space->id == block->page.id.space()); |
240 | |
241 | byte* field = PAGE_HEADER + PAGE_ROOT_AUTO_INC |
242 | + buf_block_get_frame(block); |
243 | if (!reset && mach_read_from_8(field) >= autoinc) { |
244 | /* nothing to update */ |
245 | } else if (page_zip_des_t* page_zip = buf_block_get_page_zip(block)) { |
246 | mach_write_to_8(field, autoinc); |
247 | page_zip_write_header(page_zip, field, 8, mtr); |
248 | } else { |
249 | mlog_write_ull(field, autoinc, mtr); |
250 | } |
251 | } |
252 | |
253 | /************************************************************//** |
254 | Allocates a block of memory from the heap of an index page. |
255 | @return pointer to start of allocated buffer, or NULL if allocation fails */ |
256 | byte* |
257 | page_mem_alloc_heap( |
258 | /*================*/ |
259 | page_t* page, /*!< in/out: index page */ |
260 | page_zip_des_t* page_zip,/*!< in/out: compressed page with enough |
261 | space available for inserting the record, |
262 | or NULL */ |
263 | ulint need, /*!< in: total number of bytes needed */ |
264 | ulint* heap_no)/*!< out: this contains the heap number |
265 | of the allocated record |
266 | if allocation succeeds */ |
267 | { |
268 | byte* block; |
269 | ulint avl_space; |
270 | |
271 | ut_ad(page && heap_no); |
272 | |
273 | avl_space = page_get_max_insert_size(page, 1); |
274 | |
275 | if (avl_space >= need) { |
276 | block = page_header_get_ptr(page, PAGE_HEAP_TOP); |
277 | |
278 | page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP, |
279 | block + need); |
280 | *heap_no = page_dir_get_n_heap(page); |
281 | |
282 | page_dir_set_n_heap(page, page_zip, 1 + *heap_no); |
283 | |
284 | return(block); |
285 | } |
286 | |
287 | return(NULL); |
288 | } |
289 | |
290 | /**********************************************************//** |
291 | Writes a log record of page creation. */ |
292 | UNIV_INLINE |
293 | void |
294 | page_create_write_log( |
295 | /*==================*/ |
296 | buf_frame_t* frame, /*!< in: a buffer frame where the page is |
297 | created */ |
298 | mtr_t* mtr, /*!< in: mini-transaction handle */ |
299 | ibool comp, /*!< in: TRUE=compact page format */ |
300 | bool is_rtree) /*!< in: whether it is R-tree */ |
301 | { |
302 | mlog_id_t type; |
303 | |
304 | if (is_rtree) { |
305 | type = comp ? MLOG_COMP_PAGE_CREATE_RTREE |
306 | : MLOG_PAGE_CREATE_RTREE; |
307 | } else { |
308 | type = comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE; |
309 | } |
310 | |
311 | mlog_write_initial_log_record(frame, type, mtr); |
312 | } |
313 | |
314 | /** The page infimum and supremum of an empty page in ROW_FORMAT=REDUNDANT */ |
315 | static const byte infimum_supremum_redundant[] = { |
316 | /* the infimum record */ |
317 | 0x08/*end offset*/, |
318 | 0x01/*n_owned*/, |
319 | 0x00, 0x00/*heap_no=0*/, |
320 | 0x03/*n_fields=1, 1-byte offsets*/, |
321 | 0x00, 0x74/* pointer to supremum */, |
322 | 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0, |
323 | /* the supremum record */ |
324 | 0x09/*end offset*/, |
325 | 0x01/*n_owned*/, |
326 | 0x00, 0x08/*heap_no=1*/, |
327 | 0x03/*n_fields=1, 1-byte offsets*/, |
328 | 0x00, 0x00/* end of record list */, |
329 | 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm', 0 |
330 | }; |
331 | |
332 | /** The page infimum and supremum of an empty page in ROW_FORMAT=COMPACT */ |
333 | static const byte infimum_supremum_compact[] = { |
334 | /* the infimum record */ |
335 | 0x01/*n_owned=1*/, |
336 | 0x00, 0x02/* heap_no=0, REC_STATUS_INFIMUM */, |
337 | 0x00, 0x0d/* pointer to supremum */, |
338 | 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0, |
339 | /* the supremum record */ |
340 | 0x01/*n_owned=1*/, |
341 | 0x00, 0x0b/* heap_no=1, REC_STATUS_SUPREMUM */, |
342 | 0x00, 0x00/* end of record list */, |
343 | 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm' |
344 | }; |
345 | |
346 | /**********************************************************//** |
347 | The index page creation function. |
348 | @return pointer to the page */ |
349 | static |
350 | page_t* |
351 | page_create_low( |
352 | /*============*/ |
353 | buf_block_t* block, /*!< in: a buffer block where the |
354 | page is created */ |
355 | ulint comp, /*!< in: nonzero=compact page format */ |
356 | bool is_rtree) /*!< in: if it is an R-Tree page */ |
357 | { |
358 | page_t* page; |
359 | |
360 | compile_time_assert(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE |
361 | <= PAGE_DATA); |
362 | compile_time_assert(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE |
363 | <= PAGE_DATA); |
364 | |
365 | buf_block_modify_clock_inc(block); |
366 | |
367 | page = buf_block_get_frame(block); |
368 | |
369 | if (is_rtree) { |
370 | fil_page_set_type(page, FIL_PAGE_RTREE); |
371 | } else { |
372 | fil_page_set_type(page, FIL_PAGE_INDEX); |
373 | } |
374 | |
375 | memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END); |
376 | page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2; |
377 | page[PAGE_HEADER + PAGE_INSTANT] = 0; |
378 | page[PAGE_HEADER + PAGE_DIRECTION_B] = PAGE_NO_DIRECTION; |
379 | |
380 | if (comp) { |
381 | page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/ |
382 | page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW; |
383 | page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_NEW_SUPREMUM_END; |
384 | memcpy(page + PAGE_DATA, infimum_supremum_compact, |
385 | sizeof infimum_supremum_compact); |
386 | memset(page |
387 | + PAGE_NEW_SUPREMUM_END, 0, |
388 | srv_page_size - PAGE_DIR - PAGE_NEW_SUPREMUM_END); |
389 | page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1] |
390 | = PAGE_NEW_SUPREMUM; |
391 | page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1] |
392 | = PAGE_NEW_INFIMUM; |
393 | } else { |
394 | page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW; |
395 | page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_OLD_SUPREMUM_END; |
396 | memcpy(page + PAGE_DATA, infimum_supremum_redundant, |
397 | sizeof infimum_supremum_redundant); |
398 | memset(page |
399 | + PAGE_OLD_SUPREMUM_END, 0, |
400 | srv_page_size - PAGE_DIR - PAGE_OLD_SUPREMUM_END); |
401 | page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1] |
402 | = PAGE_OLD_SUPREMUM; |
403 | page[srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1] |
404 | = PAGE_OLD_INFIMUM; |
405 | } |
406 | |
407 | return(page); |
408 | } |
409 | |
410 | /** Parses a redo log record of creating a page. |
411 | @param[in,out] block buffer block, or NULL |
412 | @param[in] comp nonzero=compact page format |
413 | @param[in] is_rtree whether it is rtree page */ |
414 | void |
415 | page_parse_create( |
416 | buf_block_t* block, |
417 | ulint comp, |
418 | bool is_rtree) |
419 | { |
420 | if (block != NULL) { |
421 | page_create_low(block, comp, is_rtree); |
422 | } |
423 | } |
424 | |
425 | /**********************************************************//** |
426 | Create an uncompressed B-tree or R-tree index page. |
427 | @return pointer to the page */ |
428 | page_t* |
429 | page_create( |
430 | /*========*/ |
431 | buf_block_t* block, /*!< in: a buffer block where the |
432 | page is created */ |
433 | mtr_t* mtr, /*!< in: mini-transaction handle */ |
434 | ulint comp, /*!< in: nonzero=compact page format */ |
435 | bool is_rtree) /*!< in: whether it is a R-Tree page */ |
436 | { |
437 | ut_ad(mtr->is_named_space(block->page.id.space())); |
438 | page_create_write_log(buf_block_get_frame(block), mtr, comp, is_rtree); |
439 | return(page_create_low(block, comp, is_rtree)); |
440 | } |
441 | |
442 | /**********************************************************//** |
443 | Create a compressed B-tree index page. |
444 | @return pointer to the page */ |
445 | page_t* |
446 | page_create_zip( |
447 | /*============*/ |
448 | buf_block_t* block, /*!< in/out: a buffer frame |
449 | where the page is created */ |
450 | dict_index_t* index, /*!< in: the index of the |
451 | page, or NULL when applying |
452 | TRUNCATE log |
453 | record during recovery */ |
454 | ulint level, /*!< in: the B-tree level |
455 | of the page */ |
456 | trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ |
457 | const redo_page_compress_t* page_comp_info, |
458 | /*!< in: used for applying |
459 | TRUNCATE log |
460 | record during recovery */ |
461 | mtr_t* mtr) /*!< in/out: mini-transaction |
462 | handle */ |
463 | { |
464 | page_t* page; |
465 | page_zip_des_t* page_zip = buf_block_get_page_zip(block); |
466 | bool is_spatial; |
467 | |
468 | ut_ad(block); |
469 | ut_ad(page_zip); |
470 | ut_ad(index == NULL || dict_table_is_comp(index->table)); |
471 | is_spatial = index ? dict_index_is_spatial(index) |
472 | : page_comp_info->type & DICT_SPATIAL; |
473 | |
474 | /* PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC are always 0 for |
475 | temporary tables. */ |
476 | ut_ad(max_trx_id == 0 || !index->table->is_temporary()); |
477 | /* In secondary indexes and the change buffer, PAGE_MAX_TRX_ID |
478 | must be zero on non-leaf pages. max_trx_id can be 0 when the |
479 | index consists of an empty root (leaf) page. */ |
480 | ut_ad(max_trx_id == 0 |
481 | || level == 0 |
482 | || !dict_index_is_sec_or_ibuf(index) |
483 | || index->table->is_temporary()); |
484 | /* In the clustered index, PAGE_ROOT_AUTOINC or |
485 | PAGE_MAX_TRX_ID must be 0 on other pages than the root. */ |
486 | ut_ad(level == 0 || max_trx_id == 0 |
487 | || !dict_index_is_sec_or_ibuf(index) |
488 | || index->table->is_temporary()); |
489 | |
490 | page = page_create_low(block, TRUE, is_spatial); |
491 | mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level); |
492 | mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id); |
493 | |
494 | if (truncate_t::s_fix_up_active) { |
495 | /* Compress the index page created when applying |
496 | TRUNCATE log during recovery */ |
497 | if (!page_zip_compress(page_zip, page, index, page_zip_level, |
498 | page_comp_info, NULL)) { |
499 | /* The compression of a newly created |
500 | page should always succeed. */ |
501 | ut_error; |
502 | } |
503 | |
504 | } else if (!page_zip_compress(page_zip, page, index, |
505 | page_zip_level, NULL, mtr)) { |
506 | /* The compression of a newly created |
507 | page should always succeed. */ |
508 | ut_error; |
509 | } |
510 | |
511 | return(page); |
512 | } |
513 | |
514 | /**********************************************************//** |
515 | Empty a previously created B-tree index page. */ |
516 | void |
517 | page_create_empty( |
518 | /*==============*/ |
519 | buf_block_t* block, /*!< in/out: B-tree block */ |
520 | dict_index_t* index, /*!< in: the index of the page */ |
521 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
522 | { |
523 | trx_id_t max_trx_id; |
524 | page_t* page = buf_block_get_frame(block); |
525 | page_zip_des_t* page_zip= buf_block_get_page_zip(block); |
526 | |
527 | ut_ad(fil_page_index_page_check(page)); |
528 | |
529 | /* Multiple transactions cannot simultaneously operate on the |
530 | same temp-table in parallel. |
531 | max_trx_id is ignored for temp tables because it not required |
532 | for MVCC. */ |
533 | if (dict_index_is_sec_or_ibuf(index) |
534 | && !index->table->is_temporary() |
535 | && page_is_leaf(page)) { |
536 | max_trx_id = page_get_max_trx_id(page); |
537 | ut_ad(max_trx_id); |
538 | } else if (page_is_root(page)) { |
539 | /* Preserve PAGE_ROOT_AUTO_INC. */ |
540 | max_trx_id = page_get_max_trx_id(page); |
541 | } else { |
542 | max_trx_id = 0; |
543 | } |
544 | |
545 | if (page_zip) { |
546 | ut_ad(!index->table->is_temporary()); |
547 | page_create_zip(block, index, |
548 | page_header_get_field(page, PAGE_LEVEL), |
549 | max_trx_id, NULL, mtr); |
550 | } else { |
551 | page_create(block, mtr, page_is_comp(page), |
552 | dict_index_is_spatial(index)); |
553 | |
554 | if (max_trx_id) { |
555 | mlog_write_ull(PAGE_HEADER + PAGE_MAX_TRX_ID + page, |
556 | max_trx_id, mtr); |
557 | } |
558 | } |
559 | } |
560 | |
561 | /*************************************************************//** |
562 | Differs from page_copy_rec_list_end, because this function does not |
563 | touch the lock table and max trx id on page or compress the page. |
564 | |
565 | IMPORTANT: The caller will have to update IBUF_BITMAP_FREE |
566 | if new_block is a compressed leaf page in a secondary index. |
567 | This has to be done either within the same mini-transaction, |
568 | or by invoking ibuf_reset_free_bits() before mtr_commit(). */ |
569 | void |
570 | page_copy_rec_list_end_no_locks( |
571 | /*============================*/ |
572 | buf_block_t* new_block, /*!< in: index page to copy to */ |
573 | buf_block_t* block, /*!< in: index page of rec */ |
574 | rec_t* rec, /*!< in: record on page */ |
575 | dict_index_t* index, /*!< in: record descriptor */ |
576 | mtr_t* mtr) /*!< in: mtr */ |
577 | { |
578 | page_t* new_page = buf_block_get_frame(new_block); |
579 | page_cur_t cur1; |
580 | rec_t* cur2; |
581 | mem_heap_t* heap = NULL; |
582 | ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
583 | ulint* offsets = offsets_; |
584 | rec_offs_init(offsets_); |
585 | |
586 | page_cur_position(rec, block, &cur1); |
587 | |
588 | if (page_cur_is_before_first(&cur1)) { |
589 | |
590 | page_cur_move_to_next(&cur1); |
591 | } |
592 | |
593 | btr_assert_not_corrupted(new_block, index); |
594 | ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); |
595 | ut_a(mach_read_from_2(new_page + srv_page_size - 10) == (ulint) |
596 | (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); |
597 | const bool is_leaf = page_is_leaf(block->frame); |
598 | |
599 | cur2 = page_get_infimum_rec(buf_block_get_frame(new_block)); |
600 | |
601 | /* Copy records from the original page to the new page */ |
602 | |
603 | while (!page_cur_is_after_last(&cur1)) { |
604 | rec_t* cur1_rec = page_cur_get_rec(&cur1); |
605 | rec_t* ins_rec; |
606 | offsets = rec_get_offsets(cur1_rec, index, offsets, is_leaf, |
607 | ULINT_UNDEFINED, &heap); |
608 | ins_rec = page_cur_insert_rec_low(cur2, index, |
609 | cur1_rec, offsets, mtr); |
610 | if (UNIV_UNLIKELY(!ins_rec)) { |
611 | ib::fatal() << "Rec offset " << page_offset(rec) |
612 | << ", cur1 offset " |
613 | << page_offset(page_cur_get_rec(&cur1)) |
614 | << ", cur2 offset " << page_offset(cur2); |
615 | } |
616 | |
617 | page_cur_move_to_next(&cur1); |
618 | cur2 = ins_rec; |
619 | } |
620 | |
621 | if (UNIV_LIKELY_NULL(heap)) { |
622 | mem_heap_free(heap); |
623 | } |
624 | } |
625 | |
626 | /*************************************************************//** |
627 | Copies records from page to new_page, from a given record onward, |
628 | including that record. Infimum and supremum records are not copied. |
629 | The records are copied to the start of the record list on new_page. |
630 | |
631 | IMPORTANT: The caller will have to update IBUF_BITMAP_FREE |
632 | if new_block is a compressed leaf page in a secondary index. |
633 | This has to be done either within the same mini-transaction, |
634 | or by invoking ibuf_reset_free_bits() before mtr_commit(). |
635 | |
636 | @return pointer to the original successor of the infimum record on |
637 | new_page, or NULL on zip overflow (new_block will be decompressed) */ |
638 | rec_t* |
639 | page_copy_rec_list_end( |
640 | /*===================*/ |
641 | buf_block_t* new_block, /*!< in/out: index page to copy to */ |
642 | buf_block_t* block, /*!< in: index page containing rec */ |
643 | rec_t* rec, /*!< in: record on page */ |
644 | dict_index_t* index, /*!< in: record descriptor */ |
645 | mtr_t* mtr) /*!< in: mtr */ |
646 | { |
647 | page_t* new_page = buf_block_get_frame(new_block); |
648 | page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); |
649 | page_t* page = page_align(rec); |
650 | rec_t* ret = page_rec_get_next( |
651 | page_get_infimum_rec(new_page)); |
652 | ulint num_moved = 0; |
653 | rtr_rec_move_t* rec_move = NULL; |
654 | mem_heap_t* heap = NULL; |
655 | |
656 | #ifdef UNIV_ZIP_DEBUG |
657 | if (new_page_zip) { |
658 | page_zip_des_t* page_zip = buf_block_get_page_zip(block); |
659 | ut_a(page_zip); |
660 | |
661 | /* Strict page_zip_validate() may fail here. |
662 | Furthermore, btr_compress() may set FIL_PAGE_PREV to |
663 | FIL_NULL on new_page while leaving it intact on |
664 | new_page_zip. So, we cannot validate new_page_zip. */ |
665 | ut_a(page_zip_validate_low(page_zip, page, index, TRUE)); |
666 | } |
667 | #endif /* UNIV_ZIP_DEBUG */ |
668 | ut_ad(buf_block_get_frame(block) == page); |
669 | ut_ad(page_is_leaf(page) == page_is_leaf(new_page)); |
670 | ut_ad(page_is_comp(page) == page_is_comp(new_page)); |
671 | /* Here, "ret" may be pointing to a user record or the |
672 | predefined supremum record. */ |
673 | |
674 | mtr_log_t log_mode = MTR_LOG_NONE; |
675 | |
676 | if (new_page_zip) { |
677 | log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
678 | } |
679 | |
680 | if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) { |
681 | page_copy_rec_list_end_to_created_page(new_page, rec, |
682 | index, mtr); |
683 | } else { |
684 | if (dict_index_is_spatial(index)) { |
685 | ulint max_to_move = page_get_n_recs( |
686 | buf_block_get_frame(block)); |
687 | heap = mem_heap_create(256); |
688 | |
689 | rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc( |
690 | heap, |
691 | sizeof (*rec_move) * max_to_move)); |
692 | |
693 | /* For spatial index, we need to insert recs one by one |
694 | to keep recs ordered. */ |
695 | rtr_page_copy_rec_list_end_no_locks(new_block, |
696 | block, rec, index, |
697 | heap, rec_move, |
698 | max_to_move, |
699 | &num_moved, |
700 | mtr); |
701 | } else { |
702 | page_copy_rec_list_end_no_locks(new_block, block, rec, |
703 | index, mtr); |
704 | } |
705 | } |
706 | |
707 | /* Update PAGE_MAX_TRX_ID on the uncompressed page. |
708 | Modifications will be redo logged and copied to the compressed |
709 | page in page_zip_compress() or page_zip_reorganize() below. |
710 | Multiple transactions cannot simultaneously operate on the |
711 | same temp-table in parallel. |
712 | max_trx_id is ignored for temp tables because it not required |
713 | for MVCC. */ |
714 | if (dict_index_is_sec_or_ibuf(index) |
715 | && page_is_leaf(page) |
716 | && !index->table->is_temporary()) { |
717 | page_update_max_trx_id(new_block, NULL, |
718 | page_get_max_trx_id(page), mtr); |
719 | } |
720 | |
721 | if (new_page_zip) { |
722 | mtr_set_log_mode(mtr, log_mode); |
723 | |
724 | if (!page_zip_compress(new_page_zip, |
725 | new_page, |
726 | index, |
727 | page_zip_level, |
728 | NULL, mtr)) { |
729 | /* Before trying to reorganize the page, |
730 | store the number of preceding records on the page. */ |
731 | ulint ret_pos |
732 | = page_rec_get_n_recs_before(ret); |
733 | /* Before copying, "ret" was the successor of |
734 | the predefined infimum record. It must still |
735 | have at least one predecessor (the predefined |
736 | infimum record, or a freshly copied record |
737 | that is smaller than "ret"). */ |
738 | ut_a(ret_pos > 0); |
739 | |
740 | if (!page_zip_reorganize(new_block, index, mtr)) { |
741 | |
742 | if (!page_zip_decompress(new_page_zip, |
743 | new_page, FALSE)) { |
744 | ut_error; |
745 | } |
746 | ut_ad(page_validate(new_page, index)); |
747 | |
748 | if (heap) { |
749 | mem_heap_free(heap); |
750 | } |
751 | |
752 | return(NULL); |
753 | } else { |
754 | /* The page was reorganized: |
755 | Seek to ret_pos. */ |
756 | ret = new_page + PAGE_NEW_INFIMUM; |
757 | |
758 | do { |
759 | ret = rec_get_next_ptr(ret, TRUE); |
760 | } while (--ret_pos); |
761 | } |
762 | } |
763 | } |
764 | |
765 | /* Update the lock table and possible hash index */ |
766 | |
767 | if (dict_table_is_locking_disabled(index->table)) { |
768 | } else if (rec_move && dict_index_is_spatial(index)) { |
769 | lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); |
770 | } else { |
771 | lock_move_rec_list_end(new_block, block, rec); |
772 | } |
773 | |
774 | if (heap) { |
775 | mem_heap_free(heap); |
776 | } |
777 | |
778 | btr_search_move_or_delete_hash_entries(new_block, block); |
779 | |
780 | return(ret); |
781 | } |
782 | |
783 | /*************************************************************//** |
784 | Copies records from page to new_page, up to the given record, |
785 | NOT including that record. Infimum and supremum records are not copied. |
786 | The records are copied to the end of the record list on new_page. |
787 | |
788 | IMPORTANT: The caller will have to update IBUF_BITMAP_FREE |
789 | if new_block is a compressed leaf page in a secondary index. |
790 | This has to be done either within the same mini-transaction, |
791 | or by invoking ibuf_reset_free_bits() before mtr_commit(). |
792 | |
793 | @return pointer to the original predecessor of the supremum record on |
794 | new_page, or NULL on zip overflow (new_block will be decompressed) */ |
795 | rec_t* |
796 | page_copy_rec_list_start( |
797 | /*=====================*/ |
798 | buf_block_t* new_block, /*!< in/out: index page to copy to */ |
799 | buf_block_t* block, /*!< in: index page containing rec */ |
800 | rec_t* rec, /*!< in: record on page */ |
801 | dict_index_t* index, /*!< in: record descriptor */ |
802 | mtr_t* mtr) /*!< in: mtr */ |
803 | { |
804 | page_t* new_page = buf_block_get_frame(new_block); |
805 | page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); |
806 | page_cur_t cur1; |
807 | rec_t* cur2; |
808 | mem_heap_t* heap = NULL; |
809 | ulint num_moved = 0; |
810 | rtr_rec_move_t* rec_move = NULL; |
811 | rec_t* ret |
812 | = page_rec_get_prev(page_get_supremum_rec(new_page)); |
813 | ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
814 | ulint* offsets = offsets_; |
815 | rec_offs_init(offsets_); |
816 | |
817 | /* Here, "ret" may be pointing to a user record or the |
818 | predefined infimum record. */ |
819 | |
820 | if (page_rec_is_infimum(rec)) { |
821 | |
822 | return(ret); |
823 | } |
824 | |
825 | mtr_log_t log_mode = MTR_LOG_NONE; |
826 | |
827 | if (new_page_zip) { |
828 | log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
829 | } |
830 | |
831 | page_cur_set_before_first(block, &cur1); |
832 | page_cur_move_to_next(&cur1); |
833 | |
834 | cur2 = ret; |
835 | |
836 | const bool is_leaf = page_rec_is_leaf(rec); |
837 | |
838 | /* Copy records from the original page to the new page */ |
839 | if (dict_index_is_spatial(index)) { |
840 | ulint max_to_move = page_get_n_recs( |
841 | buf_block_get_frame(block)); |
842 | heap = mem_heap_create(256); |
843 | |
844 | rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc( |
845 | heap, |
846 | sizeof (*rec_move) * max_to_move)); |
847 | |
848 | /* For spatial index, we need to insert recs one by one |
849 | to keep recs ordered. */ |
850 | rtr_page_copy_rec_list_start_no_locks(new_block, |
851 | block, rec, index, heap, |
852 | rec_move, max_to_move, |
853 | &num_moved, mtr); |
854 | } else { |
855 | |
856 | while (page_cur_get_rec(&cur1) != rec) { |
857 | rec_t* cur1_rec = page_cur_get_rec(&cur1); |
858 | offsets = rec_get_offsets(cur1_rec, index, offsets, |
859 | is_leaf, |
860 | ULINT_UNDEFINED, &heap); |
861 | cur2 = page_cur_insert_rec_low(cur2, index, |
862 | cur1_rec, offsets, mtr); |
863 | ut_a(cur2); |
864 | |
865 | page_cur_move_to_next(&cur1); |
866 | } |
867 | } |
868 | |
869 | /* Update PAGE_MAX_TRX_ID on the uncompressed page. |
870 | Modifications will be redo logged and copied to the compressed |
871 | page in page_zip_compress() or page_zip_reorganize() below. |
872 | Multiple transactions cannot simultaneously operate on the |
873 | same temp-table in parallel. |
874 | max_trx_id is ignored for temp tables because it not required |
875 | for MVCC. */ |
876 | if (is_leaf && dict_index_is_sec_or_ibuf(index) |
877 | && !index->table->is_temporary()) { |
878 | page_update_max_trx_id(new_block, NULL, |
879 | page_get_max_trx_id(page_align(rec)), |
880 | mtr); |
881 | } |
882 | |
883 | if (new_page_zip) { |
884 | mtr_set_log_mode(mtr, log_mode); |
885 | |
886 | DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail" , |
887 | goto zip_reorganize;); |
888 | |
889 | if (!page_zip_compress(new_page_zip, new_page, index, |
890 | page_zip_level, NULL, mtr)) { |
891 | ulint ret_pos; |
892 | #ifndef DBUG_OFF |
893 | zip_reorganize: |
894 | #endif /* DBUG_OFF */ |
895 | /* Before trying to reorganize the page, |
896 | store the number of preceding records on the page. */ |
897 | ret_pos = page_rec_get_n_recs_before(ret); |
898 | /* Before copying, "ret" was the predecessor |
899 | of the predefined supremum record. If it was |
900 | the predefined infimum record, then it would |
901 | still be the infimum, and we would have |
902 | ret_pos == 0. */ |
903 | |
904 | if (UNIV_UNLIKELY |
905 | (!page_zip_reorganize(new_block, index, mtr))) { |
906 | |
907 | if (UNIV_UNLIKELY |
908 | (!page_zip_decompress(new_page_zip, |
909 | new_page, FALSE))) { |
910 | ut_error; |
911 | } |
912 | ut_ad(page_validate(new_page, index)); |
913 | |
914 | if (UNIV_LIKELY_NULL(heap)) { |
915 | mem_heap_free(heap); |
916 | } |
917 | |
918 | return(NULL); |
919 | } |
920 | |
921 | /* The page was reorganized: Seek to ret_pos. */ |
922 | ret = page_rec_get_nth(new_page, ret_pos); |
923 | } |
924 | } |
925 | |
926 | /* Update the lock table and possible hash index */ |
927 | |
928 | if (dict_table_is_locking_disabled(index->table)) { |
929 | } else if (dict_index_is_spatial(index)) { |
930 | lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); |
931 | } else { |
932 | lock_move_rec_list_start(new_block, block, rec, ret); |
933 | } |
934 | |
935 | if (heap) { |
936 | mem_heap_free(heap); |
937 | } |
938 | |
939 | btr_search_move_or_delete_hash_entries(new_block, block); |
940 | |
941 | return(ret); |
942 | } |
943 | |
944 | /**********************************************************//** |
945 | Writes a log record of a record list end or start deletion. */ |
946 | UNIV_INLINE |
947 | void |
948 | page_delete_rec_list_write_log( |
949 | /*===========================*/ |
950 | rec_t* rec, /*!< in: record on page */ |
951 | dict_index_t* index, /*!< in: record descriptor */ |
952 | mlog_id_t type, /*!< in: operation type: |
953 | MLOG_LIST_END_DELETE, ... */ |
954 | mtr_t* mtr) /*!< in: mtr */ |
955 | { |
956 | byte* log_ptr; |
957 | ut_ad(type == MLOG_LIST_END_DELETE |
958 | || type == MLOG_LIST_START_DELETE |
959 | || type == MLOG_COMP_LIST_END_DELETE |
960 | || type == MLOG_COMP_LIST_START_DELETE); |
961 | |
962 | log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2); |
963 | if (log_ptr) { |
964 | /* Write the parameter as a 2-byte ulint */ |
965 | mach_write_to_2(log_ptr, page_offset(rec)); |
966 | mlog_close(mtr, log_ptr + 2); |
967 | } |
968 | } |
969 | |
970 | /**********************************************************//** |
971 | Parses a log record of a record list end or start deletion. |
972 | @return end of log record or NULL */ |
973 | byte* |
974 | page_parse_delete_rec_list( |
975 | /*=======================*/ |
976 | mlog_id_t type, /*!< in: MLOG_LIST_END_DELETE, |
977 | MLOG_LIST_START_DELETE, |
978 | MLOG_COMP_LIST_END_DELETE or |
979 | MLOG_COMP_LIST_START_DELETE */ |
980 | byte* ptr, /*!< in: buffer */ |
981 | byte* end_ptr,/*!< in: buffer end */ |
982 | buf_block_t* block, /*!< in/out: buffer block or NULL */ |
983 | dict_index_t* index, /*!< in: record descriptor */ |
984 | mtr_t* mtr) /*!< in: mtr or NULL */ |
985 | { |
986 | page_t* page; |
987 | ulint offset; |
988 | |
989 | ut_ad(type == MLOG_LIST_END_DELETE |
990 | || type == MLOG_LIST_START_DELETE |
991 | || type == MLOG_COMP_LIST_END_DELETE |
992 | || type == MLOG_COMP_LIST_START_DELETE); |
993 | |
994 | /* Read the record offset as a 2-byte ulint */ |
995 | |
996 | if (end_ptr < ptr + 2) { |
997 | |
998 | return(NULL); |
999 | } |
1000 | |
1001 | offset = mach_read_from_2(ptr); |
1002 | ptr += 2; |
1003 | |
1004 | if (!block) { |
1005 | |
1006 | return(ptr); |
1007 | } |
1008 | |
1009 | page = buf_block_get_frame(block); |
1010 | |
1011 | ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); |
1012 | |
1013 | if (type == MLOG_LIST_END_DELETE |
1014 | || type == MLOG_COMP_LIST_END_DELETE) { |
1015 | page_delete_rec_list_end(page + offset, block, index, |
1016 | ULINT_UNDEFINED, ULINT_UNDEFINED, |
1017 | mtr); |
1018 | } else { |
1019 | page_delete_rec_list_start(page + offset, block, index, mtr); |
1020 | } |
1021 | |
1022 | return(ptr); |
1023 | } |
1024 | |
1025 | /*************************************************************//** |
1026 | Deletes records from a page from a given record onward, including that record. |
1027 | The infimum and supremum records are not deleted. */ |
1028 | void |
1029 | page_delete_rec_list_end( |
1030 | /*=====================*/ |
1031 | rec_t* rec, /*!< in: pointer to record on page */ |
1032 | buf_block_t* block, /*!< in: buffer block of the page */ |
1033 | dict_index_t* index, /*!< in: record descriptor */ |
1034 | ulint n_recs, /*!< in: number of records to delete, |
1035 | or ULINT_UNDEFINED if not known */ |
1036 | ulint size, /*!< in: the sum of the sizes of the |
1037 | records in the end of the chain to |
1038 | delete, or ULINT_UNDEFINED if not known */ |
1039 | mtr_t* mtr) /*!< in: mtr */ |
1040 | { |
1041 | page_dir_slot_t*slot; |
1042 | ulint slot_index; |
1043 | rec_t* last_rec; |
1044 | rec_t* prev_rec; |
1045 | ulint n_owned; |
1046 | page_zip_des_t* page_zip = buf_block_get_page_zip(block); |
1047 | page_t* page = page_align(rec); |
1048 | mem_heap_t* heap = NULL; |
1049 | ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
1050 | ulint* offsets = offsets_; |
1051 | rec_offs_init(offsets_); |
1052 | |
1053 | ut_ad(size == ULINT_UNDEFINED || size < srv_page_size); |
1054 | ut_ad(!page_zip || page_rec_is_comp(rec)); |
1055 | #ifdef UNIV_ZIP_DEBUG |
1056 | ut_a(!page_zip || page_zip_validate(page_zip, page, index)); |
1057 | #endif /* UNIV_ZIP_DEBUG */ |
1058 | |
1059 | if (page_rec_is_supremum(rec)) { |
1060 | ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED); |
1061 | /* Nothing to do, there are no records bigger than the |
1062 | page supremum. */ |
1063 | return; |
1064 | } |
1065 | |
1066 | if (recv_recovery_is_on()) { |
1067 | /* If we are replaying a redo log record, we must |
1068 | replay it exactly. Since MySQL 5.6.11, we should be |
1069 | generating a redo log record for page creation if |
1070 | the page would become empty. Thus, this branch should |
1071 | only be executed when applying redo log that was |
1072 | generated by an older version of MySQL. */ |
1073 | } else if (page_rec_is_infimum(rec) |
1074 | || n_recs == page_get_n_recs(page)) { |
1075 | delete_all: |
1076 | /* We are deleting all records. */ |
1077 | page_create_empty(block, index, mtr); |
1078 | return; |
1079 | } else if (page_is_comp(page)) { |
1080 | if (page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) == rec) { |
1081 | /* We are deleting everything from the first |
1082 | user record onwards. */ |
1083 | goto delete_all; |
1084 | } |
1085 | } else { |
1086 | if (page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0) == rec) { |
1087 | /* We are deleting everything from the first |
1088 | user record onwards. */ |
1089 | goto delete_all; |
1090 | } |
1091 | } |
1092 | |
1093 | /* Reset the last insert info in the page header and increment |
1094 | the modify clock for the frame */ |
1095 | |
1096 | page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL); |
1097 | |
1098 | /* The page gets invalid for optimistic searches: increment the |
1099 | frame modify clock */ |
1100 | |
1101 | buf_block_modify_clock_inc(block); |
1102 | |
1103 | page_delete_rec_list_write_log(rec, index, page_is_comp(page) |
1104 | ? MLOG_COMP_LIST_END_DELETE |
1105 | : MLOG_LIST_END_DELETE, mtr); |
1106 | |
1107 | const bool is_leaf = page_is_leaf(page); |
1108 | |
1109 | if (page_zip) { |
1110 | mtr_log_t log_mode; |
1111 | |
1112 | ut_a(page_is_comp(page)); |
1113 | /* Individual deletes are not logged */ |
1114 | |
1115 | log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
1116 | |
1117 | do { |
1118 | page_cur_t cur; |
1119 | page_cur_position(rec, block, &cur); |
1120 | |
1121 | offsets = rec_get_offsets(rec, index, offsets, is_leaf, |
1122 | ULINT_UNDEFINED, &heap); |
1123 | rec = rec_get_next_ptr(rec, TRUE); |
1124 | #ifdef UNIV_ZIP_DEBUG |
1125 | ut_a(page_zip_validate(page_zip, page, index)); |
1126 | #endif /* UNIV_ZIP_DEBUG */ |
1127 | page_cur_delete_rec(&cur, index, offsets, mtr); |
1128 | } while (page_offset(rec) != PAGE_NEW_SUPREMUM); |
1129 | |
1130 | if (UNIV_LIKELY_NULL(heap)) { |
1131 | mem_heap_free(heap); |
1132 | } |
1133 | |
1134 | /* Restore log mode */ |
1135 | |
1136 | mtr_set_log_mode(mtr, log_mode); |
1137 | return; |
1138 | } |
1139 | |
1140 | prev_rec = page_rec_get_prev(rec); |
1141 | |
1142 | last_rec = page_rec_get_prev(page_get_supremum_rec(page)); |
1143 | |
1144 | bool scrub = srv_immediate_scrub_data_uncompressed; |
1145 | if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) || |
1146 | scrub) { |
1147 | rec_t* rec2 = rec; |
1148 | /* Calculate the sum of sizes and the number of records */ |
1149 | size = 0; |
1150 | n_recs = 0; |
1151 | |
1152 | do { |
1153 | ulint s; |
1154 | offsets = rec_get_offsets(rec2, index, offsets, |
1155 | is_leaf, |
1156 | ULINT_UNDEFINED, &heap); |
1157 | s = rec_offs_size(offsets); |
1158 | ut_ad(ulint(rec2 - page) + s |
1159 | - rec_offs_extra_size(offsets) |
1160 | < srv_page_size); |
1161 | ut_ad(size + s < srv_page_size); |
1162 | size += s; |
1163 | n_recs++; |
1164 | |
1165 | if (scrub) { |
1166 | /* scrub record */ |
1167 | memset(rec2, 0, rec_offs_data_size(offsets)); |
1168 | } |
1169 | |
1170 | rec2 = page_rec_get_next(rec2); |
1171 | } while (!page_rec_is_supremum(rec2)); |
1172 | |
1173 | if (UNIV_LIKELY_NULL(heap)) { |
1174 | mem_heap_free(heap); |
1175 | } |
1176 | } |
1177 | |
1178 | ut_ad(size < srv_page_size); |
1179 | |
1180 | /* Update the page directory; there is no need to balance the number |
1181 | of the records owned by the supremum record, as it is allowed to be |
1182 | less than PAGE_DIR_SLOT_MIN_N_OWNED */ |
1183 | |
1184 | if (page_is_comp(page)) { |
1185 | rec_t* rec2 = rec; |
1186 | ulint count = 0; |
1187 | |
1188 | while (rec_get_n_owned_new(rec2) == 0) { |
1189 | count++; |
1190 | |
1191 | rec2 = rec_get_next_ptr(rec2, TRUE); |
1192 | } |
1193 | |
1194 | ut_ad(rec_get_n_owned_new(rec2) > count); |
1195 | |
1196 | n_owned = rec_get_n_owned_new(rec2) - count; |
1197 | slot_index = page_dir_find_owner_slot(rec2); |
1198 | ut_ad(slot_index > 0); |
1199 | slot = page_dir_get_nth_slot(page, slot_index); |
1200 | } else { |
1201 | rec_t* rec2 = rec; |
1202 | ulint count = 0; |
1203 | |
1204 | while (rec_get_n_owned_old(rec2) == 0) { |
1205 | count++; |
1206 | |
1207 | rec2 = rec_get_next_ptr(rec2, FALSE); |
1208 | } |
1209 | |
1210 | ut_ad(rec_get_n_owned_old(rec2) > count); |
1211 | |
1212 | n_owned = rec_get_n_owned_old(rec2) - count; |
1213 | slot_index = page_dir_find_owner_slot(rec2); |
1214 | ut_ad(slot_index > 0); |
1215 | slot = page_dir_get_nth_slot(page, slot_index); |
1216 | } |
1217 | |
1218 | page_dir_slot_set_rec(slot, page_get_supremum_rec(page)); |
1219 | page_dir_slot_set_n_owned(slot, NULL, n_owned); |
1220 | |
1221 | page_dir_set_n_slots(page, NULL, slot_index + 1); |
1222 | |
1223 | /* Remove the record chain segment from the record chain */ |
1224 | page_rec_set_next(prev_rec, page_get_supremum_rec(page)); |
1225 | |
1226 | /* Catenate the deleted chain segment to the page free list */ |
1227 | |
1228 | page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE)); |
1229 | page_header_set_ptr(page, NULL, PAGE_FREE, rec); |
1230 | |
1231 | page_header_set_field(page, NULL, PAGE_GARBAGE, size |
1232 | + page_header_get_field(page, PAGE_GARBAGE)); |
1233 | |
1234 | page_header_set_field(page, NULL, PAGE_N_RECS, |
1235 | (ulint)(page_get_n_recs(page) - n_recs)); |
1236 | } |
1237 | |
1238 | /*************************************************************//** |
1239 | Deletes records from page, up to the given record, NOT including |
1240 | that record. Infimum and supremum records are not deleted. */ |
1241 | void |
1242 | page_delete_rec_list_start( |
1243 | /*=======================*/ |
1244 | rec_t* rec, /*!< in: record on page */ |
1245 | buf_block_t* block, /*!< in: buffer block of the page */ |
1246 | dict_index_t* index, /*!< in: record descriptor */ |
1247 | mtr_t* mtr) /*!< in: mtr */ |
1248 | { |
1249 | page_cur_t cur1; |
1250 | ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
1251 | ulint* offsets = offsets_; |
1252 | mem_heap_t* heap = NULL; |
1253 | |
1254 | rec_offs_init(offsets_); |
1255 | |
1256 | ut_ad((ibool) !!page_rec_is_comp(rec) |
1257 | == dict_table_is_comp(index->table)); |
1258 | #ifdef UNIV_ZIP_DEBUG |
1259 | { |
1260 | page_zip_des_t* page_zip= buf_block_get_page_zip(block); |
1261 | page_t* page = buf_block_get_frame(block); |
1262 | |
1263 | /* page_zip_validate() would detect a min_rec_mark mismatch |
1264 | in btr_page_split_and_insert() |
1265 | between btr_attach_half_pages() and insert_page = ... |
1266 | when btr_page_get_split_rec_to_left() holds |
1267 | (direction == FSP_DOWN). */ |
1268 | ut_a(!page_zip |
1269 | || page_zip_validate_low(page_zip, page, index, TRUE)); |
1270 | } |
1271 | #endif /* UNIV_ZIP_DEBUG */ |
1272 | |
1273 | if (page_rec_is_infimum(rec)) { |
1274 | return; |
1275 | } |
1276 | |
1277 | if (page_rec_is_supremum(rec)) { |
1278 | /* We are deleting all records. */ |
1279 | page_create_empty(block, index, mtr); |
1280 | return; |
1281 | } |
1282 | |
1283 | mlog_id_t type; |
1284 | |
1285 | if (page_rec_is_comp(rec)) { |
1286 | type = MLOG_COMP_LIST_START_DELETE; |
1287 | } else { |
1288 | type = MLOG_LIST_START_DELETE; |
1289 | } |
1290 | |
1291 | page_delete_rec_list_write_log(rec, index, type, mtr); |
1292 | |
1293 | page_cur_set_before_first(block, &cur1); |
1294 | page_cur_move_to_next(&cur1); |
1295 | |
1296 | /* Individual deletes are not logged */ |
1297 | |
1298 | mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); |
1299 | const bool is_leaf = page_rec_is_leaf(rec); |
1300 | |
1301 | while (page_cur_get_rec(&cur1) != rec) { |
1302 | offsets = rec_get_offsets(page_cur_get_rec(&cur1), index, |
1303 | offsets, is_leaf, |
1304 | ULINT_UNDEFINED, &heap); |
1305 | page_cur_delete_rec(&cur1, index, offsets, mtr); |
1306 | } |
1307 | |
1308 | if (UNIV_LIKELY_NULL(heap)) { |
1309 | mem_heap_free(heap); |
1310 | } |
1311 | |
1312 | /* Restore log mode */ |
1313 | |
1314 | mtr_set_log_mode(mtr, log_mode); |
1315 | } |
1316 | |
1317 | /*************************************************************//** |
1318 | Moves record list end to another page. Moved records include |
1319 | split_rec. |
1320 | |
1321 | IMPORTANT: The caller will have to update IBUF_BITMAP_FREE |
1322 | if new_block is a compressed leaf page in a secondary index. |
1323 | This has to be done either within the same mini-transaction, |
1324 | or by invoking ibuf_reset_free_bits() before mtr_commit(). |
1325 | |
1326 | @return TRUE on success; FALSE on compression failure (new_block will |
1327 | be decompressed) */ |
1328 | ibool |
1329 | page_move_rec_list_end( |
1330 | /*===================*/ |
1331 | buf_block_t* new_block, /*!< in/out: index page where to move */ |
1332 | buf_block_t* block, /*!< in: index page from where to move */ |
1333 | rec_t* split_rec, /*!< in: first record to move */ |
1334 | dict_index_t* index, /*!< in: record descriptor */ |
1335 | mtr_t* mtr) /*!< in: mtr */ |
1336 | { |
1337 | page_t* new_page = buf_block_get_frame(new_block); |
1338 | ulint old_data_size; |
1339 | ulint new_data_size; |
1340 | ulint old_n_recs; |
1341 | ulint new_n_recs; |
1342 | |
1343 | ut_ad(!dict_index_is_spatial(index)); |
1344 | |
1345 | old_data_size = page_get_data_size(new_page); |
1346 | old_n_recs = page_get_n_recs(new_page); |
1347 | #ifdef UNIV_ZIP_DEBUG |
1348 | { |
1349 | page_zip_des_t* new_page_zip |
1350 | = buf_block_get_page_zip(new_block); |
1351 | page_zip_des_t* page_zip |
1352 | = buf_block_get_page_zip(block); |
1353 | ut_a(!new_page_zip == !page_zip); |
1354 | ut_a(!new_page_zip |
1355 | || page_zip_validate(new_page_zip, new_page, index)); |
1356 | ut_a(!page_zip |
1357 | || page_zip_validate(page_zip, page_align(split_rec), |
1358 | index)); |
1359 | } |
1360 | #endif /* UNIV_ZIP_DEBUG */ |
1361 | |
1362 | if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block, |
1363 | split_rec, index, mtr))) { |
1364 | return(FALSE); |
1365 | } |
1366 | |
1367 | new_data_size = page_get_data_size(new_page); |
1368 | new_n_recs = page_get_n_recs(new_page); |
1369 | |
1370 | ut_ad(new_data_size >= old_data_size); |
1371 | |
1372 | page_delete_rec_list_end(split_rec, block, index, |
1373 | new_n_recs - old_n_recs, |
1374 | new_data_size - old_data_size, mtr); |
1375 | |
1376 | return(TRUE); |
1377 | } |
1378 | |
1379 | /*************************************************************//** |
1380 | Moves record list start to another page. Moved records do not include |
1381 | split_rec. |
1382 | |
1383 | IMPORTANT: The caller will have to update IBUF_BITMAP_FREE |
1384 | if new_block is a compressed leaf page in a secondary index. |
1385 | This has to be done either within the same mini-transaction, |
1386 | or by invoking ibuf_reset_free_bits() before mtr_commit(). |
1387 | |
1388 | @return TRUE on success; FALSE on compression failure */ |
1389 | ibool |
1390 | page_move_rec_list_start( |
1391 | /*=====================*/ |
1392 | buf_block_t* new_block, /*!< in/out: index page where to move */ |
1393 | buf_block_t* block, /*!< in/out: page containing split_rec */ |
1394 | rec_t* split_rec, /*!< in: first record not to move */ |
1395 | dict_index_t* index, /*!< in: record descriptor */ |
1396 | mtr_t* mtr) /*!< in: mtr */ |
1397 | { |
1398 | if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block, |
1399 | split_rec, index, mtr))) { |
1400 | return(FALSE); |
1401 | } |
1402 | |
1403 | page_delete_rec_list_start(split_rec, block, index, mtr); |
1404 | |
1405 | return(TRUE); |
1406 | } |
1407 | |
1408 | /**************************************************************//** |
1409 | Used to delete n slots from the directory. This function updates |
1410 | also n_owned fields in the records, so that the first slot after |
1411 | the deleted ones inherits the records of the deleted slots. */ |
1412 | UNIV_INLINE |
1413 | void |
1414 | page_dir_delete_slot( |
1415 | /*=================*/ |
1416 | page_t* page, /*!< in/out: the index page */ |
1417 | page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ |
1418 | ulint slot_no)/*!< in: slot to be deleted */ |
1419 | { |
1420 | page_dir_slot_t* slot; |
1421 | ulint n_owned; |
1422 | ulint i; |
1423 | ulint n_slots; |
1424 | |
1425 | ut_ad(!page_zip || page_is_comp(page)); |
1426 | ut_ad(slot_no > 0); |
1427 | ut_ad(slot_no + 1 < page_dir_get_n_slots(page)); |
1428 | |
1429 | n_slots = page_dir_get_n_slots(page); |
1430 | |
1431 | /* 1. Reset the n_owned fields of the slots to be |
1432 | deleted */ |
1433 | slot = page_dir_get_nth_slot(page, slot_no); |
1434 | n_owned = page_dir_slot_get_n_owned(slot); |
1435 | page_dir_slot_set_n_owned(slot, page_zip, 0); |
1436 | |
1437 | /* 2. Update the n_owned value of the first non-deleted slot */ |
1438 | |
1439 | slot = page_dir_get_nth_slot(page, slot_no + 1); |
1440 | page_dir_slot_set_n_owned(slot, page_zip, |
1441 | n_owned + page_dir_slot_get_n_owned(slot)); |
1442 | |
1443 | /* 3. Destroy the slot by copying slots */ |
1444 | for (i = slot_no + 1; i < n_slots; i++) { |
1445 | rec_t* rec = (rec_t*) |
1446 | page_dir_slot_get_rec(page_dir_get_nth_slot(page, i)); |
1447 | page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec); |
1448 | } |
1449 | |
1450 | /* 4. Zero out the last slot, which will be removed */ |
1451 | mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0); |
1452 | |
1453 | /* 5. Update the page header */ |
1454 | page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1); |
1455 | } |
1456 | |
1457 | /**************************************************************//** |
1458 | Used to add n slots to the directory. Does not set the record pointers |
1459 | in the added slots or update n_owned values: this is the responsibility |
1460 | of the caller. */ |
1461 | UNIV_INLINE |
1462 | void |
1463 | page_dir_add_slot( |
1464 | /*==============*/ |
1465 | page_t* page, /*!< in/out: the index page */ |
1466 | page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */ |
1467 | ulint start) /*!< in: the slot above which the new slots |
1468 | are added */ |
1469 | { |
1470 | page_dir_slot_t* slot; |
1471 | ulint n_slots; |
1472 | |
1473 | n_slots = page_dir_get_n_slots(page); |
1474 | |
1475 | ut_ad(start < n_slots - 1); |
1476 | |
1477 | /* Update the page header */ |
1478 | page_dir_set_n_slots(page, page_zip, n_slots + 1); |
1479 | |
1480 | /* Move slots up */ |
1481 | slot = page_dir_get_nth_slot(page, n_slots); |
1482 | memmove(slot, slot + PAGE_DIR_SLOT_SIZE, |
1483 | (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE); |
1484 | } |
1485 | |
1486 | /****************************************************************//** |
1487 | Splits a directory slot which owns too many records. */ |
1488 | void |
1489 | page_dir_split_slot( |
1490 | /*================*/ |
1491 | page_t* page, /*!< in/out: index page */ |
1492 | page_zip_des_t* page_zip,/*!< in/out: compressed page whose |
1493 | uncompressed part will be written, or NULL */ |
1494 | ulint slot_no)/*!< in: the directory slot */ |
1495 | { |
1496 | rec_t* rec; |
1497 | page_dir_slot_t* new_slot; |
1498 | page_dir_slot_t* prev_slot; |
1499 | page_dir_slot_t* slot; |
1500 | ulint i; |
1501 | ulint n_owned; |
1502 | |
1503 | ut_ad(!page_zip || page_is_comp(page)); |
1504 | ut_ad(slot_no > 0); |
1505 | |
1506 | slot = page_dir_get_nth_slot(page, slot_no); |
1507 | |
1508 | n_owned = page_dir_slot_get_n_owned(slot); |
1509 | ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1); |
1510 | |
1511 | /* 1. We loop to find a record approximately in the middle of the |
1512 | records owned by the slot. */ |
1513 | |
1514 | prev_slot = page_dir_get_nth_slot(page, slot_no - 1); |
1515 | rec = (rec_t*) page_dir_slot_get_rec(prev_slot); |
1516 | |
1517 | for (i = 0; i < n_owned / 2; i++) { |
1518 | rec = page_rec_get_next(rec); |
1519 | } |
1520 | |
1521 | ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED); |
1522 | |
1523 | /* 2. We add one directory slot immediately below the slot to be |
1524 | split. */ |
1525 | |
1526 | page_dir_add_slot(page, page_zip, slot_no - 1); |
1527 | |
1528 | /* The added slot is now number slot_no, and the old slot is |
1529 | now number slot_no + 1 */ |
1530 | |
1531 | new_slot = page_dir_get_nth_slot(page, slot_no); |
1532 | slot = page_dir_get_nth_slot(page, slot_no + 1); |
1533 | |
1534 | /* 3. We store the appropriate values to the new slot. */ |
1535 | |
1536 | page_dir_slot_set_rec(new_slot, rec); |
1537 | page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2); |
1538 | |
1539 | /* 4. Finally, we update the number of records field of the |
1540 | original slot */ |
1541 | |
1542 | page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2)); |
1543 | } |
1544 | |
1545 | /*************************************************************//** |
1546 | Tries to balance the given directory slot with too few records with the upper |
1547 | neighbor, so that there are at least the minimum number of records owned by |
1548 | the slot; this may result in the merging of two slots. */ |
1549 | void |
1550 | page_dir_balance_slot( |
1551 | /*==================*/ |
1552 | page_t* page, /*!< in/out: index page */ |
1553 | page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ |
1554 | ulint slot_no)/*!< in: the directory slot */ |
1555 | { |
1556 | page_dir_slot_t* slot; |
1557 | page_dir_slot_t* up_slot; |
1558 | ulint n_owned; |
1559 | ulint up_n_owned; |
1560 | rec_t* old_rec; |
1561 | rec_t* new_rec; |
1562 | |
1563 | ut_ad(!page_zip || page_is_comp(page)); |
1564 | ut_ad(slot_no > 0); |
1565 | |
1566 | slot = page_dir_get_nth_slot(page, slot_no); |
1567 | |
1568 | /* The last directory slot cannot be balanced with the upper |
1569 | neighbor, as there is none. */ |
1570 | |
1571 | if (UNIV_UNLIKELY(slot_no + 1 == page_dir_get_n_slots(page))) { |
1572 | |
1573 | return; |
1574 | } |
1575 | |
1576 | up_slot = page_dir_get_nth_slot(page, slot_no + 1); |
1577 | |
1578 | n_owned = page_dir_slot_get_n_owned(slot); |
1579 | up_n_owned = page_dir_slot_get_n_owned(up_slot); |
1580 | |
1581 | ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1); |
1582 | |
1583 | /* If the upper slot has the minimum value of n_owned, we will merge |
1584 | the two slots, therefore we assert: */ |
1585 | ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED); |
1586 | |
1587 | if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) { |
1588 | |
1589 | /* In this case we can just transfer one record owned |
1590 | by the upper slot to the property of the lower slot */ |
1591 | old_rec = (rec_t*) page_dir_slot_get_rec(slot); |
1592 | |
1593 | if (page_is_comp(page)) { |
1594 | new_rec = rec_get_next_ptr(old_rec, TRUE); |
1595 | |
1596 | rec_set_n_owned_new(old_rec, page_zip, 0); |
1597 | rec_set_n_owned_new(new_rec, page_zip, n_owned + 1); |
1598 | } else { |
1599 | new_rec = rec_get_next_ptr(old_rec, FALSE); |
1600 | |
1601 | rec_set_n_owned_old(old_rec, 0); |
1602 | rec_set_n_owned_old(new_rec, n_owned + 1); |
1603 | } |
1604 | |
1605 | page_dir_slot_set_rec(slot, new_rec); |
1606 | |
1607 | page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1); |
1608 | } else { |
1609 | /* In this case we may merge the two slots */ |
1610 | page_dir_delete_slot(page, page_zip, slot_no); |
1611 | } |
1612 | } |
1613 | |
1614 | /************************************************************//** |
1615 | Returns the nth record of the record list. |
1616 | This is the inverse function of page_rec_get_n_recs_before(). |
1617 | @return nth record */ |
1618 | const rec_t* |
1619 | page_rec_get_nth_const( |
1620 | /*===================*/ |
1621 | const page_t* page, /*!< in: page */ |
1622 | ulint nth) /*!< in: nth record */ |
1623 | { |
1624 | const page_dir_slot_t* slot; |
1625 | ulint i; |
1626 | ulint n_owned; |
1627 | const rec_t* rec; |
1628 | |
1629 | if (nth == 0) { |
1630 | return(page_get_infimum_rec(page)); |
1631 | } |
1632 | |
1633 | ut_ad(nth < srv_page_size / (REC_N_NEW_EXTRA_BYTES + 1)); |
1634 | |
1635 | for (i = 0;; i++) { |
1636 | |
1637 | slot = page_dir_get_nth_slot(page, i); |
1638 | n_owned = page_dir_slot_get_n_owned(slot); |
1639 | |
1640 | if (n_owned > nth) { |
1641 | break; |
1642 | } else { |
1643 | nth -= n_owned; |
1644 | } |
1645 | } |
1646 | |
1647 | ut_ad(i > 0); |
1648 | slot = page_dir_get_nth_slot(page, i - 1); |
1649 | rec = page_dir_slot_get_rec(slot); |
1650 | |
1651 | if (page_is_comp(page)) { |
1652 | do { |
1653 | rec = page_rec_get_next_low(rec, TRUE); |
1654 | ut_ad(rec); |
1655 | } while (nth--); |
1656 | } else { |
1657 | do { |
1658 | rec = page_rec_get_next_low(rec, FALSE); |
1659 | ut_ad(rec); |
1660 | } while (nth--); |
1661 | } |
1662 | |
1663 | return(rec); |
1664 | } |
1665 | |
1666 | /***************************************************************//** |
1667 | Returns the number of records before the given record in chain. |
1668 | The number includes infimum and supremum records. |
1669 | @return number of records */ |
1670 | ulint |
1671 | page_rec_get_n_recs_before( |
1672 | /*=======================*/ |
1673 | const rec_t* rec) /*!< in: the physical record */ |
1674 | { |
1675 | const page_dir_slot_t* slot; |
1676 | const rec_t* slot_rec; |
1677 | const page_t* page; |
1678 | ulint i; |
1679 | lint n = 0; |
1680 | |
1681 | ut_ad(page_rec_check(rec)); |
1682 | |
1683 | page = page_align(rec); |
1684 | if (page_is_comp(page)) { |
1685 | while (rec_get_n_owned_new(rec) == 0) { |
1686 | |
1687 | rec = rec_get_next_ptr_const(rec, TRUE); |
1688 | n--; |
1689 | } |
1690 | |
1691 | for (i = 0; ; i++) { |
1692 | slot = page_dir_get_nth_slot(page, i); |
1693 | slot_rec = page_dir_slot_get_rec(slot); |
1694 | |
1695 | n += lint(rec_get_n_owned_new(slot_rec)); |
1696 | |
1697 | if (rec == slot_rec) { |
1698 | |
1699 | break; |
1700 | } |
1701 | } |
1702 | } else { |
1703 | while (rec_get_n_owned_old(rec) == 0) { |
1704 | |
1705 | rec = rec_get_next_ptr_const(rec, FALSE); |
1706 | n--; |
1707 | } |
1708 | |
1709 | for (i = 0; ; i++) { |
1710 | slot = page_dir_get_nth_slot(page, i); |
1711 | slot_rec = page_dir_slot_get_rec(slot); |
1712 | |
1713 | n += lint(rec_get_n_owned_old(slot_rec)); |
1714 | |
1715 | if (rec == slot_rec) { |
1716 | |
1717 | break; |
1718 | } |
1719 | } |
1720 | } |
1721 | |
1722 | n--; |
1723 | |
1724 | ut_ad(n >= 0); |
1725 | ut_ad((ulong) n < srv_page_size / (REC_N_NEW_EXTRA_BYTES + 1)); |
1726 | |
1727 | return((ulint) n); |
1728 | } |
1729 | |
1730 | /************************************************************//** |
1731 | Prints record contents including the data relevant only in |
1732 | the index page context. */ |
1733 | void |
1734 | page_rec_print( |
1735 | /*===========*/ |
1736 | const rec_t* rec, /*!< in: physical record */ |
1737 | const ulint* offsets)/*!< in: record descriptor */ |
1738 | { |
1739 | ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); |
1740 | rec_print_new(stderr, rec, offsets); |
1741 | if (page_rec_is_comp(rec)) { |
1742 | ib::info() << "n_owned: " << rec_get_n_owned_new(rec) |
1743 | << "; heap_no: " << rec_get_heap_no_new(rec) |
1744 | << "; next rec: " << rec_get_next_offs(rec, TRUE); |
1745 | } else { |
1746 | ib::info() << "n_owned: " << rec_get_n_owned_old(rec) |
1747 | << "; heap_no: " << rec_get_heap_no_old(rec) |
1748 | << "; next rec: " << rec_get_next_offs(rec, FALSE); |
1749 | } |
1750 | |
1751 | page_rec_check(rec); |
1752 | rec_validate(rec, offsets); |
1753 | } |
1754 | |
1755 | #ifdef UNIV_BTR_PRINT |
1756 | /***************************************************************//** |
1757 | This is used to print the contents of the directory for |
1758 | debugging purposes. */ |
1759 | void |
1760 | page_dir_print( |
1761 | /*===========*/ |
1762 | page_t* page, /*!< in: index page */ |
1763 | ulint pr_n) /*!< in: print n first and n last entries */ |
1764 | { |
1765 | ulint n; |
1766 | ulint i; |
1767 | page_dir_slot_t* slot; |
1768 | |
1769 | n = page_dir_get_n_slots(page); |
1770 | |
1771 | fprintf(stderr, "--------------------------------\n" |
1772 | "PAGE DIRECTORY\n" |
1773 | "Page address %p\n" |
1774 | "Directory stack top at offs: %lu; number of slots: %lu\n" , |
1775 | page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)), |
1776 | (ulong) n); |
1777 | for (i = 0; i < n; i++) { |
1778 | slot = page_dir_get_nth_slot(page, i); |
1779 | if ((i == pr_n) && (i < n - pr_n)) { |
1780 | fputs(" ... \n" , stderr); |
1781 | } |
1782 | if ((i < pr_n) || (i >= n - pr_n)) { |
1783 | fprintf(stderr, |
1784 | "Contents of slot: %lu: n_owned: %lu," |
1785 | " rec offs: %lu\n" , |
1786 | (ulong) i, |
1787 | (ulong) page_dir_slot_get_n_owned(slot), |
1788 | (ulong) |
1789 | page_offset(page_dir_slot_get_rec(slot))); |
1790 | } |
1791 | } |
1792 | fprintf(stderr, "Total of %lu records\n" |
1793 | "--------------------------------\n" , |
1794 | (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page))); |
1795 | } |
1796 | |
1797 | /***************************************************************//** |
1798 | This is used to print the contents of the page record list for |
1799 | debugging purposes. */ |
1800 | void |
1801 | page_print_list( |
1802 | /*============*/ |
1803 | buf_block_t* block, /*!< in: index page */ |
1804 | dict_index_t* index, /*!< in: dictionary index of the page */ |
1805 | ulint pr_n) /*!< in: print n first and n last entries */ |
1806 | { |
1807 | page_t* page = block->frame; |
1808 | page_cur_t cur; |
1809 | ulint count; |
1810 | ulint n_recs; |
1811 | mem_heap_t* heap = NULL; |
1812 | ulint offsets_[REC_OFFS_NORMAL_SIZE]; |
1813 | ulint* offsets = offsets_; |
1814 | rec_offs_init(offsets_); |
1815 | |
1816 | ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); |
1817 | |
1818 | fprint(stderr, |
1819 | "--------------------------------\n" |
1820 | "PAGE RECORD LIST\n" |
1821 | "Page address %p\n" , page); |
1822 | |
1823 | n_recs = page_get_n_recs(page); |
1824 | |
1825 | page_cur_set_before_first(block, &cur); |
1826 | count = 0; |
1827 | for (;;) { |
1828 | offsets = rec_get_offsets(cur.rec, index, offsets, |
1829 | ULINT_UNDEFINED, &heap); |
1830 | page_rec_print(cur.rec, offsets); |
1831 | |
1832 | if (count == pr_n) { |
1833 | break; |
1834 | } |
1835 | if (page_cur_is_after_last(&cur)) { |
1836 | break; |
1837 | } |
1838 | page_cur_move_to_next(&cur); |
1839 | count++; |
1840 | } |
1841 | |
1842 | if (n_recs > 2 * pr_n) { |
1843 | fputs(" ... \n" , stderr); |
1844 | } |
1845 | |
1846 | while (!page_cur_is_after_last(&cur)) { |
1847 | page_cur_move_to_next(&cur); |
1848 | |
1849 | if (count + pr_n >= n_recs) { |
1850 | offsets = rec_get_offsets(cur.rec, index, offsets, |
1851 | ULINT_UNDEFINED, &heap); |
1852 | page_rec_print(cur.rec, offsets); |
1853 | } |
1854 | count++; |
1855 | } |
1856 | |
1857 | fprintf(stderr, |
1858 | "Total of %lu records \n" |
1859 | "--------------------------------\n" , |
1860 | (ulong) (count + 1)); |
1861 | |
1862 | if (UNIV_LIKELY_NULL(heap)) { |
1863 | mem_heap_free(heap); |
1864 | } |
1865 | } |
1866 | |
1867 | /***************************************************************//** |
1868 | Prints the info in a page header. */ |
1869 | void |
1870 | page_header_print( |
1871 | /*==============*/ |
1872 | const page_t* page) |
1873 | { |
1874 | fprintf(stderr, |
1875 | "--------------------------------\n" |
1876 | "PAGE HEADER INFO\n" |
1877 | "Page address %p, n records %u (%s)\n" |
1878 | "n dir slots %u, heap top %u\n" |
1879 | "Page n heap %u, free %u, garbage %u\n" |
1880 | "Page last insert %u, direction %u, n direction %u\n" , |
1881 | page, page_header_get_field(page, PAGE_N_RECS), |
1882 | page_is_comp(page) ? "compact format" : "original format" , |
1883 | page_header_get_field(page, PAGE_N_DIR_SLOTS), |
1884 | page_header_get_field(page, PAGE_HEAP_TOP), |
1885 | page_dir_get_n_heap(page), |
1886 | page_header_get_field(page, PAGE_FREE), |
1887 | page_header_get_field(page, PAGE_GARBAGE), |
1888 | page_header_get_field(page, PAGE_LAST_INSERT), |
1889 | page_get_direction(page), |
1890 | page_header_get_field(page, PAGE_N_DIRECTION)); |
1891 | } |
1892 | |
1893 | /***************************************************************//** |
1894 | This is used to print the contents of the page for |
1895 | debugging purposes. */ |
1896 | void |
1897 | page_print( |
1898 | /*=======*/ |
1899 | buf_block_t* block, /*!< in: index page */ |
1900 | dict_index_t* index, /*!< in: dictionary index of the page */ |
1901 | ulint dn, /*!< in: print dn first and last entries |
1902 | in directory */ |
1903 | ulint rn) /*!< in: print rn first and last records |
1904 | in directory */ |
1905 | { |
1906 | page_t* page = block->frame; |
1907 | |
1908 | page_header_print(page); |
1909 | page_dir_print(page, dn); |
1910 | page_print_list(block, index, rn); |
1911 | } |
1912 | #endif /* UNIV_BTR_PRINT */ |
1913 | |
1914 | /***************************************************************//** |
1915 | The following is used to validate a record on a page. This function |
1916 | differs from rec_validate as it can also check the n_owned field and |
1917 | the heap_no field. |
1918 | @return TRUE if ok */ |
1919 | ibool |
1920 | page_rec_validate( |
1921 | /*==============*/ |
1922 | const rec_t* rec, /*!< in: physical record */ |
1923 | const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ |
1924 | { |
1925 | ulint n_owned; |
1926 | ulint heap_no; |
1927 | const page_t* page; |
1928 | |
1929 | page = page_align(rec); |
1930 | ut_a(!page_is_comp(page) == !rec_offs_comp(offsets)); |
1931 | |
1932 | page_rec_check(rec); |
1933 | rec_validate(rec, offsets); |
1934 | |
1935 | if (page_rec_is_comp(rec)) { |
1936 | n_owned = rec_get_n_owned_new(rec); |
1937 | heap_no = rec_get_heap_no_new(rec); |
1938 | } else { |
1939 | n_owned = rec_get_n_owned_old(rec); |
1940 | heap_no = rec_get_heap_no_old(rec); |
1941 | } |
1942 | |
1943 | if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) { |
1944 | ib::warn() << "Dir slot of rec " << page_offset(rec) |
1945 | << ", n owned too big " << n_owned; |
1946 | return(FALSE); |
1947 | } |
1948 | |
1949 | if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) { |
1950 | ib::warn() << "Heap no of rec " << page_offset(rec) |
1951 | << " too big " << heap_no << " " |
1952 | << page_dir_get_n_heap(page); |
1953 | return(FALSE); |
1954 | } |
1955 | |
1956 | return(TRUE); |
1957 | } |
1958 | |
1959 | #ifdef UNIV_DEBUG |
1960 | /***************************************************************//** |
1961 | Checks that the first directory slot points to the infimum record and |
1962 | the last to the supremum. This function is intended to track if the |
1963 | bug fixed in 4.0.14 has caused corruption to users' databases. */ |
1964 | void |
1965 | page_check_dir( |
1966 | /*===========*/ |
1967 | const page_t* page) /*!< in: index page */ |
1968 | { |
1969 | ulint n_slots; |
1970 | ulint infimum_offs; |
1971 | ulint supremum_offs; |
1972 | |
1973 | n_slots = page_dir_get_n_slots(page); |
1974 | infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0)); |
1975 | supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page, |
1976 | n_slots - 1)); |
1977 | |
1978 | if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) { |
1979 | |
1980 | ib::fatal() << "Page directory corruption: infimum not" |
1981 | " pointed to" ; |
1982 | } |
1983 | |
1984 | if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) { |
1985 | |
1986 | ib::fatal() << "Page directory corruption: supremum not" |
1987 | " pointed to" ; |
1988 | } |
1989 | } |
1990 | #endif /* UNIV_DEBUG */ |
1991 | |
1992 | /***************************************************************//** |
1993 | This function checks the consistency of an index page when we do not |
1994 | know the index. This is also resilient so that this should never crash |
1995 | even if the page is total garbage. |
1996 | @return TRUE if ok */ |
1997 | ibool |
1998 | page_simple_validate_old( |
1999 | /*=====================*/ |
2000 | const page_t* page) /*!< in: index page in ROW_FORMAT=REDUNDANT */ |
2001 | { |
2002 | const page_dir_slot_t* slot; |
2003 | ulint slot_no; |
2004 | ulint n_slots; |
2005 | const rec_t* rec; |
2006 | const byte* rec_heap_top; |
2007 | ulint count; |
2008 | ulint own_count; |
2009 | ibool ret = FALSE; |
2010 | |
2011 | ut_a(!page_is_comp(page)); |
2012 | |
2013 | /* Check first that the record heap and the directory do not |
2014 | overlap. */ |
2015 | |
2016 | n_slots = page_dir_get_n_slots(page); |
2017 | |
2018 | if (UNIV_UNLIKELY(n_slots > srv_page_size / 4)) { |
2019 | ib::error() << "Nonsensical number " << n_slots |
2020 | << " of page dir slots" ; |
2021 | |
2022 | goto func_exit; |
2023 | } |
2024 | |
2025 | rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); |
2026 | |
2027 | if (UNIV_UNLIKELY(rec_heap_top |
2028 | > page_dir_get_nth_slot(page, n_slots - 1))) { |
2029 | ib::error() |
2030 | << "Record heap and dir overlap on a page, heap top " |
2031 | << page_header_get_field(page, PAGE_HEAP_TOP) |
2032 | << ", dir " |
2033 | << page_offset(page_dir_get_nth_slot(page, |
2034 | n_slots - 1)); |
2035 | |
2036 | goto func_exit; |
2037 | } |
2038 | |
2039 | /* Validate the record list in a loop checking also that it is |
2040 | consistent with the page record directory. */ |
2041 | |
2042 | count = 0; |
2043 | own_count = 1; |
2044 | slot_no = 0; |
2045 | slot = page_dir_get_nth_slot(page, slot_no); |
2046 | |
2047 | rec = page_get_infimum_rec(page); |
2048 | |
2049 | for (;;) { |
2050 | if (UNIV_UNLIKELY(rec > rec_heap_top)) { |
2051 | ib::error() << "Record " << (rec - page) |
2052 | << " is above rec heap top " |
2053 | << (rec_heap_top - page); |
2054 | |
2055 | goto func_exit; |
2056 | } |
2057 | |
2058 | if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) != 0)) { |
2059 | /* This is a record pointed to by a dir slot */ |
2060 | if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) |
2061 | != own_count)) { |
2062 | |
2063 | ib::error() << "Wrong owned count " |
2064 | << rec_get_n_owned_old(rec) |
2065 | << ", " << own_count << ", rec " |
2066 | << (rec - page); |
2067 | |
2068 | goto func_exit; |
2069 | } |
2070 | |
2071 | if (UNIV_UNLIKELY |
2072 | (page_dir_slot_get_rec(slot) != rec)) { |
2073 | ib::error() << "Dir slot does not point" |
2074 | " to right rec " << (rec - page); |
2075 | |
2076 | goto func_exit; |
2077 | } |
2078 | |
2079 | own_count = 0; |
2080 | |
2081 | if (!page_rec_is_supremum(rec)) { |
2082 | slot_no++; |
2083 | slot = page_dir_get_nth_slot(page, slot_no); |
2084 | } |
2085 | } |
2086 | |
2087 | if (page_rec_is_supremum(rec)) { |
2088 | |
2089 | break; |
2090 | } |
2091 | |
2092 | if (UNIV_UNLIKELY |
2093 | (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA |
2094 | || rec_get_next_offs(rec, FALSE) >= srv_page_size)) { |
2095 | |
2096 | ib::error() << "Next record offset nonsensical " |
2097 | << rec_get_next_offs(rec, FALSE) << " for rec " |
2098 | << (rec - page); |
2099 | |
2100 | goto func_exit; |
2101 | } |
2102 | |
2103 | count++; |
2104 | |
2105 | if (UNIV_UNLIKELY(count > srv_page_size)) { |
2106 | ib::error() << "Page record list appears" |
2107 | " to be circular " << count; |
2108 | goto func_exit; |
2109 | } |
2110 | |
2111 | rec = page_rec_get_next_const(rec); |
2112 | own_count++; |
2113 | } |
2114 | |
2115 | if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { |
2116 | ib::error() << "n owned is zero in a supremum rec" ; |
2117 | |
2118 | goto func_exit; |
2119 | } |
2120 | |
2121 | if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { |
2122 | ib::error() << "n slots wrong " |
2123 | << slot_no << ", " << (n_slots - 1); |
2124 | goto func_exit; |
2125 | } |
2126 | |
2127 | if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS)) |
2128 | + PAGE_HEAP_NO_USER_LOW |
2129 | != count + 1)) { |
2130 | ib::error() << "n recs wrong " |
2131 | << page_header_get_field(page, PAGE_N_RECS) |
2132 | + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); |
2133 | |
2134 | goto func_exit; |
2135 | } |
2136 | |
2137 | /* Check then the free list */ |
2138 | rec = page_header_get_ptr(page, PAGE_FREE); |
2139 | |
2140 | while (rec != NULL) { |
2141 | if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA |
2142 | || rec >= page + srv_page_size)) { |
2143 | ib::error() << "Free list record has" |
2144 | " a nonsensical offset " << (rec - page); |
2145 | |
2146 | goto func_exit; |
2147 | } |
2148 | |
2149 | if (UNIV_UNLIKELY(rec > rec_heap_top)) { |
2150 | ib::error() << "Free list record " << (rec - page) |
2151 | << " is above rec heap top " |
2152 | << (rec_heap_top - page); |
2153 | |
2154 | goto func_exit; |
2155 | } |
2156 | |
2157 | count++; |
2158 | |
2159 | if (UNIV_UNLIKELY(count > srv_page_size)) { |
2160 | ib::error() << "Page free list appears" |
2161 | " to be circular " << count; |
2162 | goto func_exit; |
2163 | } |
2164 | |
2165 | rec = page_rec_get_next_const(rec); |
2166 | } |
2167 | |
2168 | if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { |
2169 | |
2170 | ib::error() << "N heap is wrong " |
2171 | << page_dir_get_n_heap(page) << ", " << (count + 1); |
2172 | |
2173 | goto func_exit; |
2174 | } |
2175 | |
2176 | ret = TRUE; |
2177 | |
2178 | func_exit: |
2179 | return(ret); |
2180 | } |
2181 | |
2182 | /***************************************************************//** |
2183 | This function checks the consistency of an index page when we do not |
2184 | know the index. This is also resilient so that this should never crash |
2185 | even if the page is total garbage. |
2186 | @return TRUE if ok */ |
2187 | ibool |
2188 | page_simple_validate_new( |
2189 | /*=====================*/ |
2190 | const page_t* page) /*!< in: index page in ROW_FORMAT!=REDUNDANT */ |
2191 | { |
2192 | const page_dir_slot_t* slot; |
2193 | ulint slot_no; |
2194 | ulint n_slots; |
2195 | const rec_t* rec; |
2196 | const byte* rec_heap_top; |
2197 | ulint count; |
2198 | ulint own_count; |
2199 | ibool ret = FALSE; |
2200 | |
2201 | ut_a(page_is_comp(page)); |
2202 | |
2203 | /* Check first that the record heap and the directory do not |
2204 | overlap. */ |
2205 | |
2206 | n_slots = page_dir_get_n_slots(page); |
2207 | |
2208 | if (UNIV_UNLIKELY(n_slots > srv_page_size / 4)) { |
2209 | ib::error() << "Nonsensical number " << n_slots |
2210 | << " of page dir slots" ; |
2211 | |
2212 | goto func_exit; |
2213 | } |
2214 | |
2215 | rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); |
2216 | |
2217 | if (UNIV_UNLIKELY(rec_heap_top |
2218 | > page_dir_get_nth_slot(page, n_slots - 1))) { |
2219 | |
2220 | ib::error() << "Record heap and dir overlap on a page," |
2221 | " heap top " |
2222 | << page_header_get_field(page, PAGE_HEAP_TOP) |
2223 | << ", dir " << page_offset( |
2224 | page_dir_get_nth_slot(page, n_slots - 1)); |
2225 | |
2226 | goto func_exit; |
2227 | } |
2228 | |
2229 | /* Validate the record list in a loop checking also that it is |
2230 | consistent with the page record directory. */ |
2231 | |
2232 | count = 0; |
2233 | own_count = 1; |
2234 | slot_no = 0; |
2235 | slot = page_dir_get_nth_slot(page, slot_no); |
2236 | |
2237 | rec = page_get_infimum_rec(page); |
2238 | |
2239 | for (;;) { |
2240 | if (UNIV_UNLIKELY(rec > rec_heap_top)) { |
2241 | |
2242 | ib::error() << "Record " << page_offset(rec) |
2243 | << " is above rec heap top " |
2244 | << page_offset(rec_heap_top); |
2245 | |
2246 | goto func_exit; |
2247 | } |
2248 | |
2249 | if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) { |
2250 | /* This is a record pointed to by a dir slot */ |
2251 | if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) |
2252 | != own_count)) { |
2253 | |
2254 | ib::error() << "Wrong owned count " |
2255 | << rec_get_n_owned_new(rec) << ", " |
2256 | << own_count << ", rec " |
2257 | << page_offset(rec); |
2258 | |
2259 | goto func_exit; |
2260 | } |
2261 | |
2262 | if (UNIV_UNLIKELY |
2263 | (page_dir_slot_get_rec(slot) != rec)) { |
2264 | ib::error() << "Dir slot does not point" |
2265 | " to right rec " << page_offset(rec); |
2266 | |
2267 | goto func_exit; |
2268 | } |
2269 | |
2270 | own_count = 0; |
2271 | |
2272 | if (!page_rec_is_supremum(rec)) { |
2273 | slot_no++; |
2274 | slot = page_dir_get_nth_slot(page, slot_no); |
2275 | } |
2276 | } |
2277 | |
2278 | if (page_rec_is_supremum(rec)) { |
2279 | |
2280 | break; |
2281 | } |
2282 | |
2283 | if (UNIV_UNLIKELY |
2284 | (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA |
2285 | || rec_get_next_offs(rec, TRUE) >= srv_page_size)) { |
2286 | |
2287 | ib::error() << "Next record offset nonsensical " |
2288 | << rec_get_next_offs(rec, TRUE) |
2289 | << " for rec " << page_offset(rec); |
2290 | |
2291 | goto func_exit; |
2292 | } |
2293 | |
2294 | count++; |
2295 | |
2296 | if (UNIV_UNLIKELY(count > srv_page_size)) { |
2297 | ib::error() << "Page record list appears to be" |
2298 | " circular " << count; |
2299 | goto func_exit; |
2300 | } |
2301 | |
2302 | rec = page_rec_get_next_const(rec); |
2303 | own_count++; |
2304 | } |
2305 | |
2306 | if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) { |
2307 | ib::error() << "n owned is zero in a supremum rec" ; |
2308 | |
2309 | goto func_exit; |
2310 | } |
2311 | |
2312 | if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { |
2313 | ib::error() << "n slots wrong " << slot_no << ", " |
2314 | << (n_slots - 1); |
2315 | goto func_exit; |
2316 | } |
2317 | |
2318 | if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS)) |
2319 | + PAGE_HEAP_NO_USER_LOW |
2320 | != count + 1)) { |
2321 | ib::error() << "n recs wrong " |
2322 | << page_header_get_field(page, PAGE_N_RECS) |
2323 | + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); |
2324 | |
2325 | goto func_exit; |
2326 | } |
2327 | |
2328 | /* Check then the free list */ |
2329 | rec = page_header_get_ptr(page, PAGE_FREE); |
2330 | |
2331 | while (rec != NULL) { |
2332 | if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA |
2333 | || rec >= page + srv_page_size)) { |
2334 | |
2335 | ib::error() << "Free list record has" |
2336 | " a nonsensical offset " << page_offset(rec); |
2337 | |
2338 | goto func_exit; |
2339 | } |
2340 | |
2341 | if (UNIV_UNLIKELY(rec > rec_heap_top)) { |
2342 | ib::error() << "Free list record " << page_offset(rec) |
2343 | << " is above rec heap top " |
2344 | << page_offset(rec_heap_top); |
2345 | |
2346 | goto func_exit; |
2347 | } |
2348 | |
2349 | count++; |
2350 | |
2351 | if (UNIV_UNLIKELY(count > srv_page_size)) { |
2352 | ib::error() << "Page free list appears to be" |
2353 | " circular " << count; |
2354 | goto func_exit; |
2355 | } |
2356 | |
2357 | rec = page_rec_get_next_const(rec); |
2358 | } |
2359 | |
2360 | if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { |
2361 | |
2362 | ib::error() << "N heap is wrong " |
2363 | << page_dir_get_n_heap(page) << ", " << (count + 1); |
2364 | |
2365 | goto func_exit; |
2366 | } |
2367 | |
2368 | ret = TRUE; |
2369 | |
2370 | func_exit: |
2371 | return(ret); |
2372 | } |
2373 | |
2374 | /***************************************************************//** |
2375 | This function checks the consistency of an index page. |
2376 | @return TRUE if ok */ |
2377 | ibool |
2378 | page_validate( |
2379 | /*==========*/ |
2380 | const page_t* page, /*!< in: index page */ |
2381 | dict_index_t* index) /*!< in: data dictionary index containing |
2382 | the page record type definition */ |
2383 | { |
2384 | const page_dir_slot_t* slot; |
2385 | mem_heap_t* heap; |
2386 | byte* buf; |
2387 | ulint count; |
2388 | ulint own_count; |
2389 | ulint rec_own_count; |
2390 | ulint slot_no; |
2391 | ulint data_size; |
2392 | const rec_t* rec; |
2393 | const rec_t* old_rec = NULL; |
2394 | ulint offs; |
2395 | ulint n_slots; |
2396 | ibool ret = FALSE; |
2397 | ulint i; |
2398 | ulint* offsets = NULL; |
2399 | ulint* old_offsets = NULL; |
2400 | |
2401 | #ifdef UNIV_GIS_DEBUG |
2402 | if (dict_index_is_spatial(index)) { |
2403 | fprintf(stderr, "Page no: %lu\n" , page_get_page_no(page)); |
2404 | } |
2405 | #endif /* UNIV_DEBUG */ |
2406 | |
2407 | if (UNIV_UNLIKELY((ibool) !!page_is_comp(page) |
2408 | != dict_table_is_comp(index->table))) { |
2409 | ib::error() << "'compact format' flag mismatch" ; |
2410 | goto func_exit2; |
2411 | } |
2412 | if (page_is_comp(page)) { |
2413 | if (UNIV_UNLIKELY(!page_simple_validate_new(page))) { |
2414 | goto func_exit2; |
2415 | } |
2416 | } else { |
2417 | if (UNIV_UNLIKELY(!page_simple_validate_old(page))) { |
2418 | goto func_exit2; |
2419 | } |
2420 | } |
2421 | |
2422 | /* Multiple transactions cannot simultaneously operate on the |
2423 | same temp-table in parallel. |
2424 | max_trx_id is ignored for temp tables because it not required |
2425 | for MVCC. */ |
2426 | if (!page_is_leaf(page) || page_is_empty(page) |
2427 | || !dict_index_is_sec_or_ibuf(index) |
2428 | || index->table->is_temporary()) { |
2429 | } else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) { |
2430 | trx_id_t max_trx_id = page_get_max_trx_id(page); |
2431 | |
2432 | if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) { |
2433 | ib::error() << "PAGE_MAX_TRX_ID out of bounds: " |
2434 | << max_trx_id << ", " << sys_max_trx_id; |
2435 | goto func_exit2; |
2436 | } |
2437 | } else { |
2438 | ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN); |
2439 | } |
2440 | |
2441 | heap = mem_heap_create(srv_page_size + 200); |
2442 | |
2443 | /* The following buffer is used to check that the |
2444 | records in the page record heap do not overlap */ |
2445 | |
2446 | buf = static_cast<byte*>(mem_heap_zalloc(heap, srv_page_size)); |
2447 | |
2448 | /* Check first that the record heap and the directory do not |
2449 | overlap. */ |
2450 | |
2451 | n_slots = page_dir_get_n_slots(page); |
2452 | |
2453 | if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP) |
2454 | <= page_dir_get_nth_slot(page, n_slots - 1)))) { |
2455 | |
2456 | ib::warn() << "Record heap and dir overlap on space " |
2457 | << page_get_space_id(page) << " page " |
2458 | << page_get_page_no(page) << " index " << index->name |
2459 | << ", " << page_header_get_ptr(page, PAGE_HEAP_TOP) |
2460 | << ", " << page_dir_get_nth_slot(page, n_slots - 1); |
2461 | |
2462 | goto func_exit; |
2463 | } |
2464 | |
2465 | /* Validate the record list in a loop checking also that |
2466 | it is consistent with the directory. */ |
2467 | count = 0; |
2468 | data_size = 0; |
2469 | own_count = 1; |
2470 | slot_no = 0; |
2471 | slot = page_dir_get_nth_slot(page, slot_no); |
2472 | |
2473 | rec = page_get_infimum_rec(page); |
2474 | |
2475 | for (;;) { |
2476 | offsets = rec_get_offsets(rec, index, offsets, |
2477 | page_is_leaf(page), |
2478 | ULINT_UNDEFINED, &heap); |
2479 | |
2480 | if (page_is_comp(page) && page_rec_is_user_rec(rec) |
2481 | && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec) |
2482 | == page_is_leaf(page))) { |
2483 | ib::error() << "'node_ptr' flag mismatch" ; |
2484 | goto func_exit; |
2485 | } |
2486 | |
2487 | if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { |
2488 | goto func_exit; |
2489 | } |
2490 | |
2491 | /* Check that the records are in the ascending order */ |
2492 | if (count >= PAGE_HEAP_NO_USER_LOW |
2493 | && !page_rec_is_supremum(rec)) { |
2494 | |
2495 | int ret = cmp_rec_rec( |
2496 | rec, old_rec, offsets, old_offsets, index); |
2497 | |
2498 | /* For spatial index, on nonleaf leavel, we |
2499 | allow recs to be equal. */ |
2500 | bool rtr_equal_nodeptrs = |
2501 | (ret == 0 && dict_index_is_spatial(index) |
2502 | && !page_is_leaf(page)); |
2503 | |
2504 | if (ret <= 0 && !rtr_equal_nodeptrs) { |
2505 | |
2506 | ib::error() << "Records in wrong order on" |
2507 | " space " << page_get_space_id(page) |
2508 | << " page " << page_get_page_no(page) |
2509 | << " index " << index->name; |
2510 | |
2511 | fputs("\nInnoDB: previous record " , stderr); |
2512 | /* For spatial index, print the mbr info.*/ |
2513 | if (index->type & DICT_SPATIAL) { |
2514 | putc('\n', stderr); |
2515 | rec_print_mbr_rec(stderr, |
2516 | old_rec, old_offsets); |
2517 | fputs("\nInnoDB: record " , stderr); |
2518 | putc('\n', stderr); |
2519 | rec_print_mbr_rec(stderr, rec, offsets); |
2520 | putc('\n', stderr); |
2521 | putc('\n', stderr); |
2522 | |
2523 | } else { |
2524 | rec_print_new(stderr, old_rec, old_offsets); |
2525 | fputs("\nInnoDB: record " , stderr); |
2526 | rec_print_new(stderr, rec, offsets); |
2527 | putc('\n', stderr); |
2528 | } |
2529 | |
2530 | goto func_exit; |
2531 | } |
2532 | } |
2533 | |
2534 | if (page_rec_is_user_rec(rec)) { |
2535 | |
2536 | data_size += rec_offs_size(offsets); |
2537 | |
2538 | #if defined(UNIV_GIS_DEBUG) |
2539 | /* For spatial index, print the mbr info.*/ |
2540 | if (index->type & DICT_SPATIAL) { |
2541 | rec_print_mbr_rec(stderr, rec, offsets); |
2542 | putc('\n', stderr); |
2543 | } |
2544 | #endif /* UNIV_GIS_DEBUG */ |
2545 | } |
2546 | |
2547 | offs = page_offset(rec_get_start(rec, offsets)); |
2548 | i = rec_offs_size(offsets); |
2549 | if (UNIV_UNLIKELY(offs + i >= srv_page_size)) { |
2550 | ib::error() << "Record offset out of bounds" ; |
2551 | goto func_exit; |
2552 | } |
2553 | |
2554 | while (i--) { |
2555 | if (UNIV_UNLIKELY(buf[offs + i])) { |
2556 | /* No other record may overlap this */ |
2557 | ib::error() << "Record overlaps another" ; |
2558 | goto func_exit; |
2559 | } |
2560 | |
2561 | buf[offs + i] = 1; |
2562 | } |
2563 | |
2564 | if (page_is_comp(page)) { |
2565 | rec_own_count = rec_get_n_owned_new(rec); |
2566 | } else { |
2567 | rec_own_count = rec_get_n_owned_old(rec); |
2568 | } |
2569 | |
2570 | if (UNIV_UNLIKELY(rec_own_count != 0)) { |
2571 | /* This is a record pointed to by a dir slot */ |
2572 | if (UNIV_UNLIKELY(rec_own_count != own_count)) { |
2573 | ib::error() << "Wrong owned count " |
2574 | << rec_own_count << ", " << own_count; |
2575 | goto func_exit; |
2576 | } |
2577 | |
2578 | if (page_dir_slot_get_rec(slot) != rec) { |
2579 | ib::error() << "Dir slot does not" |
2580 | " point to right rec" ; |
2581 | goto func_exit; |
2582 | } |
2583 | |
2584 | page_dir_slot_check(slot); |
2585 | |
2586 | own_count = 0; |
2587 | if (!page_rec_is_supremum(rec)) { |
2588 | slot_no++; |
2589 | slot = page_dir_get_nth_slot(page, slot_no); |
2590 | } |
2591 | } |
2592 | |
2593 | if (page_rec_is_supremum(rec)) { |
2594 | break; |
2595 | } |
2596 | |
2597 | count++; |
2598 | own_count++; |
2599 | old_rec = rec; |
2600 | rec = page_rec_get_next_const(rec); |
2601 | |
2602 | /* set old_offsets to offsets; recycle offsets */ |
2603 | { |
2604 | ulint* offs = old_offsets; |
2605 | old_offsets = offsets; |
2606 | offsets = offs; |
2607 | } |
2608 | } |
2609 | |
2610 | if (page_is_comp(page)) { |
2611 | if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) { |
2612 | |
2613 | goto n_owned_zero; |
2614 | } |
2615 | } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { |
2616 | n_owned_zero: |
2617 | ib::error() << "n owned is zero" ; |
2618 | goto func_exit; |
2619 | } |
2620 | |
2621 | if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { |
2622 | ib::error() << "n slots wrong " << slot_no << " " |
2623 | << (n_slots - 1); |
2624 | goto func_exit; |
2625 | } |
2626 | |
2627 | if (UNIV_UNLIKELY(ulint(page_header_get_field(page, PAGE_N_RECS)) |
2628 | + PAGE_HEAP_NO_USER_LOW |
2629 | != count + 1)) { |
2630 | ib::error() << "n recs wrong " |
2631 | << page_header_get_field(page, PAGE_N_RECS) |
2632 | + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); |
2633 | goto func_exit; |
2634 | } |
2635 | |
2636 | if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) { |
2637 | ib::error() << "Summed data size " << data_size |
2638 | << ", returned by func " << page_get_data_size(page); |
2639 | goto func_exit; |
2640 | } |
2641 | |
2642 | /* Check then the free list */ |
2643 | rec = page_header_get_ptr(page, PAGE_FREE); |
2644 | |
2645 | while (rec != NULL) { |
2646 | offsets = rec_get_offsets(rec, index, offsets, |
2647 | page_is_leaf(page), |
2648 | ULINT_UNDEFINED, &heap); |
2649 | if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { |
2650 | |
2651 | goto func_exit; |
2652 | } |
2653 | |
2654 | count++; |
2655 | offs = page_offset(rec_get_start(rec, offsets)); |
2656 | i = rec_offs_size(offsets); |
2657 | if (UNIV_UNLIKELY(offs + i >= srv_page_size)) { |
2658 | ib::error() << "Record offset out of bounds" ; |
2659 | goto func_exit; |
2660 | } |
2661 | |
2662 | while (i--) { |
2663 | |
2664 | if (UNIV_UNLIKELY(buf[offs + i])) { |
2665 | ib::error() << "Record overlaps another" |
2666 | " in free list" ; |
2667 | goto func_exit; |
2668 | } |
2669 | |
2670 | buf[offs + i] = 1; |
2671 | } |
2672 | |
2673 | rec = page_rec_get_next_const(rec); |
2674 | } |
2675 | |
2676 | if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { |
2677 | ib::error() << "N heap is wrong " |
2678 | << page_dir_get_n_heap(page) << " " << count + 1; |
2679 | goto func_exit; |
2680 | } |
2681 | |
2682 | ret = TRUE; |
2683 | |
2684 | func_exit: |
2685 | mem_heap_free(heap); |
2686 | |
2687 | if (UNIV_UNLIKELY(ret == FALSE)) { |
2688 | func_exit2: |
2689 | ib::error() << "Apparent corruption in space " |
2690 | << page_get_space_id(page) << " page " |
2691 | << page_get_page_no(page) << " index " << index->name; |
2692 | } |
2693 | |
2694 | return(ret); |
2695 | } |
2696 | |
2697 | /***************************************************************//** |
2698 | Looks in the page record list for a record with the given heap number. |
2699 | @return record, NULL if not found */ |
2700 | const rec_t* |
2701 | page_find_rec_with_heap_no( |
2702 | /*=======================*/ |
2703 | const page_t* page, /*!< in: index page */ |
2704 | ulint heap_no)/*!< in: heap number */ |
2705 | { |
2706 | const rec_t* rec; |
2707 | |
2708 | if (page_is_comp(page)) { |
2709 | rec = page + PAGE_NEW_INFIMUM; |
2710 | |
2711 | for (;;) { |
2712 | ulint rec_heap_no = rec_get_heap_no_new(rec); |
2713 | |
2714 | if (rec_heap_no == heap_no) { |
2715 | |
2716 | return(rec); |
2717 | } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) { |
2718 | |
2719 | return(NULL); |
2720 | } |
2721 | |
2722 | rec = page + rec_get_next_offs(rec, TRUE); |
2723 | } |
2724 | } else { |
2725 | rec = page + PAGE_OLD_INFIMUM; |
2726 | |
2727 | for (;;) { |
2728 | ulint rec_heap_no = rec_get_heap_no_old(rec); |
2729 | |
2730 | if (rec_heap_no == heap_no) { |
2731 | |
2732 | return(rec); |
2733 | } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) { |
2734 | |
2735 | return(NULL); |
2736 | } |
2737 | |
2738 | rec = page + rec_get_next_offs(rec, FALSE); |
2739 | } |
2740 | } |
2741 | } |
2742 | |
2743 | /*******************************************************//** |
2744 | Removes the record from a leaf page. This function does not log |
2745 | any changes. It is used by the IMPORT tablespace functions. |
2746 | The cursor is moved to the next record after the deleted one. |
2747 | @return true if success, i.e., the page did not become too empty */ |
2748 | bool |
2749 | page_delete_rec( |
2750 | /*============*/ |
2751 | const dict_index_t* index, /*!< in: The index that the record |
2752 | belongs to */ |
2753 | page_cur_t* pcur, /*!< in/out: page cursor on record |
2754 | to delete */ |
2755 | page_zip_des_t* |
2756 | #ifdef UNIV_ZIP_DEBUG |
2757 | page_zip/*!< in: compressed page descriptor */ |
2758 | #endif |
2759 | , |
2760 | const ulint* offsets)/*!< in: offsets for record */ |
2761 | { |
2762 | bool no_compress_needed; |
2763 | buf_block_t* block = pcur->block; |
2764 | page_t* page = buf_block_get_frame(block); |
2765 | |
2766 | ut_ad(page_is_leaf(page)); |
2767 | |
2768 | if (!rec_offs_any_extern(offsets) |
2769 | && ((page_get_data_size(page) - rec_offs_size(offsets) |
2770 | < BTR_CUR_PAGE_COMPRESS_LIMIT(index)) |
2771 | || !page_has_siblings(page) |
2772 | || (page_get_n_recs(page) < 2))) { |
2773 | |
2774 | ulint root_page_no = dict_index_get_page(index); |
2775 | |
2776 | /* The page fillfactor will drop below a predefined |
2777 | minimum value, OR the level in the B-tree contains just |
2778 | one page, OR the page will become empty: we recommend |
2779 | compression if this is not the root page. */ |
2780 | |
2781 | no_compress_needed = page_get_page_no(page) == root_page_no; |
2782 | } else { |
2783 | no_compress_needed = true; |
2784 | } |
2785 | |
2786 | if (no_compress_needed) { |
2787 | #ifdef UNIV_ZIP_DEBUG |
2788 | ut_a(!page_zip || page_zip_validate(page_zip, page, index)); |
2789 | #endif /* UNIV_ZIP_DEBUG */ |
2790 | |
2791 | page_cur_delete_rec(pcur, index, offsets, 0); |
2792 | |
2793 | #ifdef UNIV_ZIP_DEBUG |
2794 | ut_a(!page_zip || page_zip_validate(page_zip, page, index)); |
2795 | #endif /* UNIV_ZIP_DEBUG */ |
2796 | } |
2797 | |
2798 | return(no_compress_needed); |
2799 | } |
2800 | |
2801 | /** Get the last non-delete-marked record on a page. |
2802 | @param[in] page index tree leaf page |
2803 | @return the last record, not delete-marked |
2804 | @retval infimum record if all records are delete-marked */ |
2805 | const rec_t* |
2806 | page_find_rec_max_not_deleted( |
2807 | const page_t* page) |
2808 | { |
2809 | const rec_t* rec = page_get_infimum_rec(page); |
2810 | const rec_t* prev_rec = NULL; // remove warning |
2811 | |
2812 | /* Because the page infimum is never delete-marked |
2813 | and never the 'default row' pseudo-record (MIN_REC_FLAG)), |
2814 | prev_rec will always be assigned to it first. */ |
2815 | ut_ad(!rec_get_info_bits(rec, page_rec_is_comp(rec))); |
2816 | ut_ad(page_is_leaf(page)); |
2817 | |
2818 | if (page_is_comp(page)) { |
2819 | do { |
2820 | if (!(rec[-REC_NEW_INFO_BITS] |
2821 | & (REC_INFO_DELETED_FLAG |
2822 | | REC_INFO_MIN_REC_FLAG))) { |
2823 | prev_rec = rec; |
2824 | } |
2825 | rec = page_rec_get_next_low(rec, true); |
2826 | } while (rec != page + PAGE_NEW_SUPREMUM); |
2827 | } else { |
2828 | do { |
2829 | if (!(rec[-REC_OLD_INFO_BITS] |
2830 | & (REC_INFO_DELETED_FLAG |
2831 | | REC_INFO_MIN_REC_FLAG))) { |
2832 | prev_rec = rec; |
2833 | } |
2834 | rec = page_rec_get_next_low(rec, false); |
2835 | } while (rec != page + PAGE_OLD_SUPREMUM); |
2836 | } |
2837 | return(prev_rec); |
2838 | } |
2839 | |
2840 | /** Issue a warning when the checksum that is stored in the page is valid, |
2841 | but different than the global setting innodb_checksum_algorithm. |
2842 | @param[in] current_algo current checksum algorithm |
2843 | @param[in] page_checksum page valid checksum |
2844 | @param[in] page_id page identifier */ |
2845 | void |
2846 | page_warn_strict_checksum( |
2847 | srv_checksum_algorithm_t curr_algo, |
2848 | srv_checksum_algorithm_t page_checksum, |
2849 | const page_id_t& page_id) |
2850 | { |
2851 | srv_checksum_algorithm_t curr_algo_nonstrict; |
2852 | switch (curr_algo) { |
2853 | case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: |
2854 | curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_CRC32; |
2855 | break; |
2856 | case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: |
2857 | curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_INNODB; |
2858 | break; |
2859 | case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: |
2860 | curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_NONE; |
2861 | break; |
2862 | default: |
2863 | ut_error; |
2864 | } |
2865 | |
2866 | ib::warn() << "innodb_checksum_algorithm is set to \"" |
2867 | << buf_checksum_algorithm_name(curr_algo) << "\"" |
2868 | << " but the page " << page_id << " contains a valid checksum \"" |
2869 | << buf_checksum_algorithm_name(page_checksum) << "\". " |
2870 | << " Accepting the page as valid. Change" |
2871 | << " innodb_checksum_algorithm to \"" |
2872 | << buf_checksum_algorithm_name(curr_algo_nonstrict) |
2873 | << "\" to silently accept such pages or rewrite all pages" |
2874 | << " so that they contain \"" |
2875 | << buf_checksum_algorithm_name(curr_algo_nonstrict) |
2876 | << "\" checksum." ; |
2877 | } |
2878 | |