| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2012, Facebook Inc. |
| 5 | Copyright (c) 2014, 2018, MariaDB Corporation. |
| 6 | |
| 7 | This program is free software; you can redistribute it and/or modify it under |
| 8 | the terms of the GNU General Public License as published by the Free Software |
| 9 | Foundation; version 2 of the License. |
| 10 | |
| 11 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU General Public License along with |
| 16 | this program; if not, write to the Free Software Foundation, Inc., |
| 17 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 18 | |
| 19 | *****************************************************************************/ |
| 20 | |
| 21 | /**************************************************//** |
| 22 | @file include/btr0btr.h |
| 23 | The B-tree |
| 24 | |
| 25 | Created 6/2/1994 Heikki Tuuri |
| 26 | *******************************************************/ |
| 27 | |
| 28 | #ifndef btr0btr_h |
| 29 | #define btr0btr_h |
| 30 | |
| 31 | #include "univ.i" |
| 32 | |
| 33 | #include "dict0dict.h" |
| 34 | #include "data0data.h" |
| 35 | #include "page0cur.h" |
| 36 | #include "mtr0mtr.h" |
| 37 | #include "btr0types.h" |
| 38 | #include "gis0type.h" |
| 39 | |
| 40 | #define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level |
| 41 | (not really a hard limit). |
| 42 | Used in debug assertions |
| 43 | in btr_page_set_level and |
| 44 | btr_page_get_level */ |
| 45 | |
| 46 | /** Maximum record size which can be stored on a page, without using the |
| 47 | special big record storage structure */ |
| 48 | #define BTR_PAGE_MAX_REC_SIZE (srv_page_size / 2 - 200) |
| 49 | |
| 50 | /** @brief Maximum depth of a B-tree in InnoDB. |
| 51 | |
| 52 | Note that this isn't a maximum as such; none of the tree operations |
| 53 | avoid producing trees bigger than this. It is instead a "max depth |
| 54 | that other code must work with", useful for e.g. fixed-size arrays |
| 55 | that must store some information about each level in a tree. In other |
| 56 | words: if a B-tree with bigger depth than this is encountered, it is |
| 57 | not acceptable for it to lead to mysterious memory corruption, but it |
| 58 | is acceptable for the program to die with a clear assert failure. */ |
| 59 | #define BTR_MAX_LEVELS 100 |
| 60 | |
| 61 | /** Latching modes for btr_cur_search_to_nth_level(). */ |
| 62 | enum btr_latch_mode { |
| 63 | /** Search a record on a leaf page and S-latch it. */ |
| 64 | BTR_SEARCH_LEAF = RW_S_LATCH, |
| 65 | /** (Prepare to) modify a record on a leaf page and X-latch it. */ |
| 66 | BTR_MODIFY_LEAF = RW_X_LATCH, |
| 67 | /** Obtain no latches. */ |
| 68 | BTR_NO_LATCHES = RW_NO_LATCH, |
| 69 | /** Start modifying the entire B-tree. */ |
| 70 | BTR_MODIFY_TREE = 33, |
| 71 | /** Continue modifying the entire B-tree. */ |
| 72 | BTR_CONT_MODIFY_TREE = 34, |
| 73 | /** Search the previous record. */ |
| 74 | BTR_SEARCH_PREV = 35, |
| 75 | /** Modify the previous record. */ |
| 76 | BTR_MODIFY_PREV = 36, |
| 77 | /** Start searching the entire B-tree. */ |
| 78 | BTR_SEARCH_TREE = 37, |
| 79 | /** Continue searching the entire B-tree. */ |
| 80 | BTR_CONT_SEARCH_TREE = 38, |
| 81 | |
| 82 | /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually |
| 83 | exclusive. */ |
| 84 | /** The search tuple will be inserted to the secondary index |
| 85 | at the searched position. When the leaf page is not in the |
| 86 | buffer pool, try to use the change buffer. */ |
| 87 | BTR_INSERT = 512, |
| 88 | |
| 89 | /** Try to delete mark a secondary index leaf page record at |
| 90 | the searched position using the change buffer when the page is |
| 91 | not in the buffer pool. */ |
| 92 | BTR_DELETE_MARK = 4096, |
| 93 | |
| 94 | /** Try to purge the record using the change buffer when the |
| 95 | secondary index leaf page is not in the buffer pool. */ |
| 96 | BTR_DELETE = 8192, |
| 97 | |
| 98 | /** The caller is already holding dict_index_t::lock S-latch. */ |
| 99 | BTR_ALREADY_S_LATCHED = 16384, |
| 100 | /** Search and S-latch a leaf page, assuming that the |
| 101 | dict_index_t::lock S-latch is being held. */ |
| 102 | BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF |
| 103 | | BTR_ALREADY_S_LATCHED, |
| 104 | /** Search the entire index tree, assuming that the |
| 105 | dict_index_t::lock S-latch is being held. */ |
| 106 | BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE |
| 107 | | BTR_ALREADY_S_LATCHED, |
| 108 | /** Search and X-latch a leaf page, assuming that the |
| 109 | dict_index_t::lock S-latch is being held. */ |
| 110 | BTR_MODIFY_LEAF_ALREADY_S_LATCHED = BTR_MODIFY_LEAF |
| 111 | | BTR_ALREADY_S_LATCHED, |
| 112 | |
| 113 | /** Attempt to delete-mark a secondary index record. */ |
| 114 | BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK, |
| 115 | /** Attempt to delete-mark a secondary index record |
| 116 | while holding the dict_index_t::lock S-latch. */ |
| 117 | BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED = BTR_DELETE_MARK_LEAF |
| 118 | | BTR_ALREADY_S_LATCHED, |
| 119 | /** Attempt to purge a secondary index record. */ |
| 120 | BTR_PURGE_LEAF = BTR_MODIFY_LEAF | BTR_DELETE, |
| 121 | /** Attempt to purge a secondary index record |
| 122 | while holding the dict_index_t::lock S-latch. */ |
| 123 | BTR_PURGE_LEAF_ALREADY_S_LATCHED = BTR_PURGE_LEAF |
| 124 | | BTR_ALREADY_S_LATCHED |
| 125 | }; |
| 126 | |
| 127 | /** This flag ORed to btr_latch_mode says that we do the search in query |
| 128 | optimization */ |
| 129 | #define BTR_ESTIMATE 1024U |
| 130 | |
| 131 | /** This flag ORed to BTR_INSERT says that we can ignore possible |
| 132 | UNIQUE definition on secondary indexes when we decide if we can use |
| 133 | the insert buffer to speed up inserts */ |
| 134 | #define BTR_IGNORE_SEC_UNIQUE 2048U |
| 135 | |
| 136 | /** In the case of BTR_MODIFY_TREE, the caller specifies the intention |
| 137 | to insert record only. It is used to optimize block->lock range.*/ |
| 138 | #define BTR_LATCH_FOR_INSERT 32768U |
| 139 | |
| 140 | /** In the case of BTR_MODIFY_TREE, the caller specifies the intention |
| 141 | to delete record only. It is used to optimize block->lock range.*/ |
| 142 | #define BTR_LATCH_FOR_DELETE 65536U |
| 143 | |
| 144 | /** This flag is for undo insert of rtree. For rtree, we need this flag |
| 145 | to find proper rec to undo insert.*/ |
| 146 | #define BTR_RTREE_UNDO_INS 131072U |
| 147 | |
| 148 | /** In the case of BTR_MODIFY_LEAF, the caller intends to allocate or |
| 149 | free the pages of externally stored fields. */ |
| 150 | #define BTR_MODIFY_EXTERNAL 262144U |
| 151 | |
| 152 | /** Try to delete mark the record at the searched position when the |
| 153 | record is in spatial index */ |
| 154 | #define BTR_RTREE_DELETE_MARK 524288U |
| 155 | |
| 156 | #define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \ |
| 157 | ((latch_mode) & ulint(~(BTR_INSERT \ |
| 158 | | BTR_DELETE_MARK \ |
| 159 | | BTR_RTREE_UNDO_INS \ |
| 160 | | BTR_RTREE_DELETE_MARK \ |
| 161 | | BTR_DELETE \ |
| 162 | | BTR_ESTIMATE \ |
| 163 | | BTR_IGNORE_SEC_UNIQUE \ |
| 164 | | BTR_ALREADY_S_LATCHED \ |
| 165 | | BTR_LATCH_FOR_INSERT \ |
| 166 | | BTR_LATCH_FOR_DELETE \ |
| 167 | | BTR_MODIFY_EXTERNAL))) |
| 168 | |
| 169 | #define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode) \ |
| 170 | ((latch_mode) & ulint(~(BTR_LATCH_FOR_INSERT \ |
| 171 | | BTR_LATCH_FOR_DELETE \ |
| 172 | | BTR_MODIFY_EXTERNAL))) |
| 173 | |
| 174 | /**************************************************************//** |
| 175 | Report that an index page is corrupted. */ |
| 176 | void |
| 177 | btr_corruption_report( |
| 178 | /*==================*/ |
| 179 | const buf_block_t* block, /*!< in: corrupted block */ |
| 180 | const dict_index_t* index) /*!< in: index tree */ |
| 181 | ATTRIBUTE_COLD __attribute__((nonnull)); |
| 182 | |
| 183 | /** Assert that a B-tree page is not corrupted. |
| 184 | @param block buffer block containing a B-tree page |
| 185 | @param index the B-tree index */ |
| 186 | #define btr_assert_not_corrupted(block, index) \ |
| 187 | if ((ibool) !!page_is_comp(buf_block_get_frame(block)) \ |
| 188 | != dict_table_is_comp((index)->table)) { \ |
| 189 | btr_corruption_report(block, index); \ |
| 190 | ut_error; \ |
| 191 | } |
| 192 | |
| 193 | /**************************************************************//** |
| 194 | Gets the root node of a tree and sx-latches it for segment access. |
| 195 | @return root page, sx-latched */ |
| 196 | page_t* |
| 197 | btr_root_get( |
| 198 | /*=========*/ |
| 199 | const dict_index_t* index, /*!< in: index tree */ |
| 200 | mtr_t* mtr) /*!< in: mtr */ |
| 201 | MY_ATTRIBUTE((nonnull)); |
| 202 | |
| 203 | /**************************************************************//** |
| 204 | Checks and adjusts the root node of a tree during IMPORT TABLESPACE. |
| 205 | @return error code, or DB_SUCCESS */ |
| 206 | dberr_t |
| 207 | btr_root_adjust_on_import( |
| 208 | /*======================*/ |
| 209 | const dict_index_t* index) /*!< in: index tree */ |
| 210 | MY_ATTRIBUTE((warn_unused_result)); |
| 211 | |
| 212 | /**************************************************************//** |
| 213 | Gets the height of the B-tree (the level of the root, when the leaf |
| 214 | level is assumed to be 0). The caller must hold an S or X latch on |
| 215 | the index. |
| 216 | @return tree height (level of the root) */ |
| 217 | ulint |
| 218 | btr_height_get( |
| 219 | /*===========*/ |
| 220 | dict_index_t* index, /*!< in: index tree */ |
| 221 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 222 | MY_ATTRIBUTE((warn_unused_result)); |
| 223 | |
| 224 | /** Gets a buffer page and declares its latching order level. |
| 225 | @param[in] page_id page id |
| 226 | @param[in] mode latch mode |
| 227 | @param[in] file file name |
| 228 | @param[in] line line where called |
| 229 | @param[in] index index tree, may be NULL if it is not an insert buffer |
| 230 | tree |
| 231 | @param[in,out] mtr mini-transaction |
| 232 | @return block */ |
| 233 | UNIV_INLINE |
| 234 | buf_block_t* |
| 235 | btr_block_get_func( |
| 236 | const page_id_t& page_id, |
| 237 | const page_size_t& page_size, |
| 238 | ulint mode, |
| 239 | const char* file, |
| 240 | unsigned line, |
| 241 | dict_index_t* index, |
| 242 | mtr_t* mtr); |
| 243 | |
| 244 | # ifdef UNIV_DEBUG |
| 245 | /** Gets a buffer page and declares its latching order level. |
| 246 | @param page_id tablespace/page identifier |
| 247 | @param page_size page size |
| 248 | @param mode latch mode |
| 249 | @param index index tree, may be NULL if not the insert buffer tree |
| 250 | @param mtr mini-transaction handle |
| 251 | @return the block descriptor */ |
| 252 | # define btr_block_get(page_id, page_size, mode, index, mtr) \ |
| 253 | btr_block_get_func(page_id, page_size, mode, \ |
| 254 | __FILE__, __LINE__, (dict_index_t*)index, mtr) |
| 255 | # else /* UNIV_DEBUG */ |
| 256 | /** Gets a buffer page and declares its latching order level. |
| 257 | @param page_id tablespace/page identifier |
| 258 | @param page_size page size |
| 259 | @param mode latch mode |
| 260 | @param index index tree, may be NULL if not the insert buffer tree |
| 261 | @param mtr mini-transaction handle |
| 262 | @return the block descriptor */ |
| 263 | # define btr_block_get(page_id, page_size, mode, index, mtr) \ |
| 264 | btr_block_get_func(page_id, page_size, mode, __FILE__, __LINE__, (dict_index_t*)index, mtr) |
| 265 | # endif /* UNIV_DEBUG */ |
| 266 | /** Gets a buffer page and declares its latching order level. |
| 267 | @param page_id tablespace/page identifier |
| 268 | @param page_size page size |
| 269 | @param mode latch mode |
| 270 | @param index index tree, may be NULL if not the insert buffer tree |
| 271 | @param mtr mini-transaction handle |
| 272 | @return the uncompressed page frame */ |
| 273 | UNIV_INLINE |
| 274 | page_t* |
| 275 | btr_page_get( |
| 276 | /*=========*/ |
| 277 | const page_id_t& page_id, |
| 278 | const page_size_t& page_size, |
| 279 | ulint mode, |
| 280 | dict_index_t* index, |
| 281 | mtr_t* mtr) |
| 282 | MY_ATTRIBUTE((warn_unused_result)); |
| 283 | /**************************************************************//** |
| 284 | Gets the index id field of a page. |
| 285 | @return index id */ |
| 286 | UNIV_INLINE |
| 287 | index_id_t |
| 288 | btr_page_get_index_id( |
| 289 | /*==================*/ |
| 290 | const page_t* page) /*!< in: index page */ |
| 291 | MY_ATTRIBUTE((warn_unused_result)); |
| 292 | /********************************************************//** |
| 293 | Gets the node level field in an index page. |
| 294 | @param[in] page index page |
| 295 | @return level, leaf level == 0 */ |
| 296 | UNIV_INLINE |
| 297 | ulint |
| 298 | btr_page_get_level(const page_t* page) |
| 299 | { |
| 300 | ulint level; |
| 301 | |
| 302 | ut_ad(page); |
| 303 | |
| 304 | level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL); |
| 305 | |
| 306 | ut_ad(level <= BTR_MAX_NODE_LEVEL); |
| 307 | |
| 308 | return(level); |
| 309 | } MY_ATTRIBUTE((warn_unused_result)) |
| 310 | /********************************************************//** |
| 311 | Gets the next index page number. |
| 312 | @return next page number */ |
| 313 | UNIV_INLINE |
| 314 | ulint |
| 315 | btr_page_get_next( |
| 316 | /*==============*/ |
| 317 | const page_t* page, /*!< in: index page */ |
| 318 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
| 319 | MY_ATTRIBUTE((warn_unused_result)); |
| 320 | /********************************************************//** |
| 321 | Gets the previous index page number. |
| 322 | @return prev page number */ |
| 323 | UNIV_INLINE |
| 324 | ulint |
| 325 | btr_page_get_prev( |
| 326 | /*==============*/ |
| 327 | const page_t* page, /*!< in: index page */ |
| 328 | mtr_t* mtr) /*!< in: mini-transaction handle */ |
| 329 | MY_ATTRIBUTE((warn_unused_result)); |
| 330 | /**************************************************************//** |
| 331 | Releases the latch on a leaf page and bufferunfixes it. */ |
| 332 | UNIV_INLINE |
| 333 | void |
| 334 | btr_leaf_page_release( |
| 335 | /*==================*/ |
| 336 | buf_block_t* block, /*!< in: buffer block */ |
| 337 | ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or |
| 338 | BTR_MODIFY_LEAF */ |
| 339 | mtr_t* mtr) /*!< in: mtr */ |
| 340 | MY_ATTRIBUTE((nonnull)); |
| 341 | /**************************************************************//** |
| 342 | Gets the child node file address in a node pointer. |
| 343 | NOTE: the offsets array must contain all offsets for the record since |
| 344 | we read the last field according to offsets and assume that it contains |
| 345 | the child page number. In other words offsets must have been retrieved |
| 346 | with rec_get_offsets(n_fields=ULINT_UNDEFINED). |
| 347 | @return child node address */ |
| 348 | UNIV_INLINE |
| 349 | ulint |
| 350 | btr_node_ptr_get_child_page_no( |
| 351 | /*===========================*/ |
| 352 | const rec_t* rec, /*!< in: node pointer record */ |
| 353 | const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ |
| 354 | MY_ATTRIBUTE((warn_unused_result)); |
| 355 | |
| 356 | /** Create the root node for a new index tree. |
| 357 | @param[in] type type of the index |
| 358 | @param[in,out] space tablespace where created |
| 359 | @param[in] index_id index id |
| 360 | @param[in] index index, or NULL when applying TRUNCATE |
| 361 | log record during recovery |
| 362 | @param[in] btr_redo_create_info used for applying TRUNCATE log |
| 363 | @param[in] mtr mini-transaction handle |
| 364 | record during recovery |
| 365 | @return page number of the created root, FIL_NULL if did not succeed */ |
| 366 | ulint |
| 367 | btr_create( |
| 368 | ulint type, |
| 369 | fil_space_t* space, |
| 370 | index_id_t index_id, |
| 371 | dict_index_t* index, |
| 372 | const btr_create_t* btr_redo_create_info, |
| 373 | mtr_t* mtr); |
| 374 | |
| 375 | /** Free a persistent index tree if it exists. |
| 376 | @param[in] page_id root page id |
| 377 | @param[in] page_size page size |
| 378 | @param[in] index_id PAGE_INDEX_ID contents |
| 379 | @param[in,out] mtr mini-transaction */ |
| 380 | void |
| 381 | btr_free_if_exists( |
| 382 | const page_id_t& page_id, |
| 383 | const page_size_t& page_size, |
| 384 | index_id_t index_id, |
| 385 | mtr_t* mtr); |
| 386 | |
| 387 | /** Free an index tree in a temporary tablespace or during TRUNCATE TABLE. |
| 388 | @param[in] page_id root page id |
| 389 | @param[in] page_size page size */ |
| 390 | void |
| 391 | btr_free( |
| 392 | const page_id_t& page_id, |
| 393 | const page_size_t& page_size); |
| 394 | |
| 395 | /** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC. |
| 396 | @param[in,out] index clustered index |
| 397 | @return the last used AUTO_INCREMENT value |
| 398 | @retval 0 on error or if no AUTO_INCREMENT value was used yet */ |
| 399 | ib_uint64_t |
| 400 | btr_read_autoinc(dict_index_t* index) |
| 401 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
| 402 | |
| 403 | /** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC, |
| 404 | or fall back to MAX(auto_increment_column). |
| 405 | @param[in] table table containing an AUTO_INCREMENT column |
| 406 | @param[in] col_no index of the AUTO_INCREMENT column |
| 407 | @return the AUTO_INCREMENT value |
| 408 | @retval 0 on error or if no AUTO_INCREMENT value was used yet */ |
| 409 | ib_uint64_t |
| 410 | btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no) |
| 411 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
| 412 | |
| 413 | /** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC. |
| 414 | @param[in,out] index clustered index |
| 415 | @param[in] autoinc the AUTO_INCREMENT value |
| 416 | @param[in] reset whether to reset the AUTO_INCREMENT |
| 417 | to a possibly smaller value than currently |
| 418 | exists in the page */ |
| 419 | void |
| 420 | btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) |
| 421 | MY_ATTRIBUTE((nonnull)); |
| 422 | |
| 423 | /*************************************************************//** |
| 424 | Makes tree one level higher by splitting the root, and inserts |
| 425 | the tuple. It is assumed that mtr contains an x-latch on the tree. |
| 426 | NOTE that the operation of this function must always succeed, |
| 427 | we cannot reverse it: therefore enough free disk space must be |
| 428 | guaranteed to be available before this function is called. |
| 429 | @return inserted record */ |
| 430 | rec_t* |
| 431 | btr_root_raise_and_insert( |
| 432 | /*======================*/ |
| 433 | ulint flags, /*!< in: undo logging and locking flags */ |
| 434 | btr_cur_t* cursor, /*!< in: cursor at which to insert: must be |
| 435 | on the root page; when the function returns, |
| 436 | the cursor is positioned on the predecessor |
| 437 | of the inserted record */ |
| 438 | ulint** offsets,/*!< out: offsets on inserted record */ |
| 439 | mem_heap_t** heap, /*!< in/out: pointer to memory heap |
| 440 | that can be emptied, or NULL */ |
| 441 | const dtuple_t* tuple, /*!< in: tuple to insert */ |
| 442 | ulint n_ext, /*!< in: number of externally stored columns */ |
| 443 | mtr_t* mtr) /*!< in: mtr */ |
| 444 | MY_ATTRIBUTE((warn_unused_result)); |
| 445 | /*************************************************************//** |
| 446 | Reorganizes an index page. |
| 447 | |
| 448 | IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE |
| 449 | if this is a compressed leaf page in a secondary index. This has to |
| 450 | be done either within the same mini-transaction, or by invoking |
| 451 | ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, |
| 452 | IBUF_BITMAP_FREE is unaffected by reorganization. |
| 453 | |
| 454 | @retval true if the operation was successful |
| 455 | @retval false if it is a compressed page, and recompression failed */ |
| 456 | bool |
| 457 | btr_page_reorganize_low( |
| 458 | /*====================*/ |
| 459 | bool recovery,/*!< in: true if called in recovery: |
| 460 | locks should not be updated, i.e., |
| 461 | there cannot exist locks on the |
| 462 | page, and a hash index should not be |
| 463 | dropped: it cannot exist */ |
| 464 | ulint z_level,/*!< in: compression level to be used |
| 465 | if dealing with compressed page */ |
| 466 | page_cur_t* cursor, /*!< in/out: page cursor */ |
| 467 | dict_index_t* index, /*!< in: the index tree of the page */ |
| 468 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 469 | MY_ATTRIBUTE((warn_unused_result)); |
| 470 | /*************************************************************//** |
| 471 | Reorganizes an index page. |
| 472 | |
| 473 | IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE |
| 474 | if this is a compressed leaf page in a secondary index. This has to |
| 475 | be done either within the same mini-transaction, or by invoking |
| 476 | ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, |
| 477 | IBUF_BITMAP_FREE is unaffected by reorganization. |
| 478 | |
| 479 | @retval true if the operation was successful |
| 480 | @retval false if it is a compressed page, and recompression failed */ |
| 481 | bool |
| 482 | btr_page_reorganize( |
| 483 | /*================*/ |
| 484 | page_cur_t* cursor, /*!< in/out: page cursor */ |
| 485 | dict_index_t* index, /*!< in: the index tree of the page */ |
| 486 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 487 | MY_ATTRIBUTE((nonnull)); |
| 488 | /*************************************************************//** |
| 489 | Decides if the page should be split at the convergence point of |
| 490 | inserts converging to left. |
| 491 | @return TRUE if split recommended */ |
| 492 | ibool |
| 493 | btr_page_get_split_rec_to_left( |
| 494 | /*===========================*/ |
| 495 | btr_cur_t* cursor, /*!< in: cursor at which to insert */ |
| 496 | rec_t** split_rec)/*!< out: if split recommended, |
| 497 | the first record on upper half page, |
| 498 | or NULL if tuple should be first */ |
| 499 | MY_ATTRIBUTE((warn_unused_result)); |
| 500 | /*************************************************************//** |
| 501 | Decides if the page should be split at the convergence point of |
| 502 | inserts converging to right. |
| 503 | @return TRUE if split recommended */ |
| 504 | ibool |
| 505 | btr_page_get_split_rec_to_right( |
| 506 | /*============================*/ |
| 507 | btr_cur_t* cursor, /*!< in: cursor at which to insert */ |
| 508 | rec_t** split_rec)/*!< out: if split recommended, |
| 509 | the first record on upper half page, |
| 510 | or NULL if tuple should be first */ |
| 511 | MY_ATTRIBUTE((warn_unused_result)); |
| 512 | |
| 513 | /*************************************************************//** |
| 514 | Splits an index page to halves and inserts the tuple. It is assumed |
| 515 | that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is |
| 516 | released within this function! NOTE that the operation of this |
| 517 | function must always succeed, we cannot reverse it: therefore enough |
| 518 | free disk space (2 pages) must be guaranteed to be available before |
| 519 | this function is called. |
| 520 | |
| 521 | @return inserted record */ |
| 522 | rec_t* |
| 523 | btr_page_split_and_insert( |
| 524 | /*======================*/ |
| 525 | ulint flags, /*!< in: undo logging and locking flags */ |
| 526 | btr_cur_t* cursor, /*!< in: cursor at which to insert; when the |
| 527 | function returns, the cursor is positioned |
| 528 | on the predecessor of the inserted record */ |
| 529 | ulint** offsets,/*!< out: offsets on inserted record */ |
| 530 | mem_heap_t** heap, /*!< in/out: pointer to memory heap |
| 531 | that can be emptied, or NULL */ |
| 532 | const dtuple_t* tuple, /*!< in: tuple to insert */ |
| 533 | ulint n_ext, /*!< in: number of externally stored columns */ |
| 534 | mtr_t* mtr) /*!< in: mtr */ |
| 535 | MY_ATTRIBUTE((warn_unused_result)); |
| 536 | /*******************************************************//** |
| 537 | Inserts a data tuple to a tree on a non-leaf level. It is assumed |
| 538 | that mtr holds an x-latch on the tree. */ |
| 539 | void |
| 540 | btr_insert_on_non_leaf_level_func( |
| 541 | /*==============================*/ |
| 542 | ulint flags, /*!< in: undo logging and locking flags */ |
| 543 | dict_index_t* index, /*!< in: index */ |
| 544 | ulint level, /*!< in: level, must be > 0 */ |
| 545 | dtuple_t* tuple, /*!< in: the record to be inserted */ |
| 546 | const char* file, /*!< in: file name */ |
| 547 | unsigned line, /*!< in: line where called */ |
| 548 | mtr_t* mtr); /*!< in: mtr */ |
| 549 | #define btr_insert_on_non_leaf_level(f,i,l,t,m) \ |
| 550 | btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m) |
| 551 | /****************************************************************//** |
| 552 | Sets a record as the predefined minimum record. */ |
| 553 | void |
| 554 | btr_set_min_rec_mark( |
| 555 | /*=================*/ |
| 556 | rec_t* rec, /*!< in/out: record */ |
| 557 | mtr_t* mtr) /*!< in: mtr */ |
| 558 | MY_ATTRIBUTE((nonnull)); |
| 559 | /*************************************************************//** |
| 560 | Deletes on the upper level the node pointer to a page. */ |
| 561 | void |
| 562 | btr_node_ptr_delete( |
| 563 | /*================*/ |
| 564 | dict_index_t* index, /*!< in: index tree */ |
| 565 | buf_block_t* block, /*!< in: page whose node pointer is deleted */ |
| 566 | mtr_t* mtr) /*!< in: mtr */ |
| 567 | MY_ATTRIBUTE((nonnull)); |
| 568 | #ifdef UNIV_DEBUG |
| 569 | /************************************************************//** |
| 570 | Checks that the node pointer to a page is appropriate. |
| 571 | @return TRUE */ |
| 572 | ibool |
| 573 | btr_check_node_ptr( |
| 574 | /*===============*/ |
| 575 | dict_index_t* index, /*!< in: index tree */ |
| 576 | buf_block_t* block, /*!< in: index page */ |
| 577 | mtr_t* mtr) /*!< in: mtr */ |
| 578 | MY_ATTRIBUTE((warn_unused_result)); |
| 579 | #endif /* UNIV_DEBUG */ |
| 580 | /*************************************************************//** |
| 581 | Tries to merge the page first to the left immediate brother if such a |
| 582 | brother exists, and the node pointers to the current page and to the |
| 583 | brother reside on the same page. If the left brother does not satisfy these |
| 584 | conditions, looks at the right brother. If the page is the only one on that |
| 585 | level lifts the records of the page to the father page, thus reducing the |
| 586 | tree height. It is assumed that mtr holds an x-latch on the tree and on the |
| 587 | page. If cursor is on the leaf level, mtr must also hold x-latches to |
| 588 | the brothers, if they exist. |
| 589 | @return TRUE on success */ |
| 590 | ibool |
| 591 | btr_compress( |
| 592 | /*=========*/ |
| 593 | btr_cur_t* cursor, /*!< in/out: cursor on the page to merge |
| 594 | or lift; the page must not be empty: |
| 595 | when deleting records, use btr_discard_page() |
| 596 | if the page would become empty */ |
| 597 | ibool adjust, /*!< in: TRUE if should adjust the |
| 598 | cursor position even if compression occurs */ |
| 599 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 600 | MY_ATTRIBUTE((nonnull)); |
| 601 | /*************************************************************//** |
| 602 | Discards a page from a B-tree. This is used to remove the last record from |
| 603 | a B-tree page: the whole page must be removed at the same time. This cannot |
| 604 | be used for the root page, which is allowed to be empty. */ |
| 605 | void |
| 606 | btr_discard_page( |
| 607 | /*=============*/ |
| 608 | btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on |
| 609 | the root page */ |
| 610 | mtr_t* mtr); /*!< in: mtr */ |
| 611 | /****************************************************************//** |
| 612 | Parses the redo log record for setting an index record as the predefined |
| 613 | minimum record. |
| 614 | @return end of log record or NULL */ |
| 615 | byte* |
| 616 | btr_parse_set_min_rec_mark( |
| 617 | /*=======================*/ |
| 618 | byte* ptr, /*!< in: buffer */ |
| 619 | byte* end_ptr,/*!< in: buffer end */ |
| 620 | ulint comp, /*!< in: nonzero=compact page format */ |
| 621 | page_t* page, /*!< in: page or NULL */ |
| 622 | mtr_t* mtr) /*!< in: mtr or NULL */ |
| 623 | MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); |
| 624 | /***********************************************************//** |
| 625 | Parses a redo log record of reorganizing a page. |
| 626 | @return end of log record or NULL */ |
| 627 | byte* |
| 628 | btr_parse_page_reorganize( |
| 629 | /*======================*/ |
| 630 | byte* ptr, /*!< in: buffer */ |
| 631 | byte* end_ptr,/*!< in: buffer end */ |
| 632 | dict_index_t* index, /*!< in: record descriptor */ |
| 633 | bool compressed,/*!< in: true if compressed page */ |
| 634 | buf_block_t* block, /*!< in: page to be reorganized, or NULL */ |
| 635 | mtr_t* mtr) /*!< in: mtr or NULL */ |
| 636 | MY_ATTRIBUTE((warn_unused_result)); |
| 637 | /**************************************************************//** |
| 638 | Gets the number of pages in a B-tree. |
| 639 | @return number of pages, or ULINT_UNDEFINED if the index is unavailable */ |
| 640 | ulint |
| 641 | btr_get_size( |
| 642 | /*=========*/ |
| 643 | dict_index_t* index, /*!< in: index */ |
| 644 | ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ |
| 645 | mtr_t* mtr) /*!< in/out: mini-transaction where index |
| 646 | is s-latched */ |
| 647 | MY_ATTRIBUTE((warn_unused_result)); |
| 648 | /**************************************************************//** |
| 649 | Gets the number of reserved and used pages in a B-tree. |
| 650 | @return number of pages reserved, or ULINT_UNDEFINED if the index |
| 651 | is unavailable */ |
| 652 | UNIV_INTERN |
| 653 | ulint |
| 654 | btr_get_size_and_reserved( |
| 655 | /*======================*/ |
| 656 | dict_index_t* index, /*!< in: index */ |
| 657 | ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ |
| 658 | ulint* used, /*!< out: number of pages used (<= reserved) */ |
| 659 | mtr_t* mtr) /*!< in/out: mini-transaction where index |
| 660 | is s-latched */ |
| 661 | __attribute__((nonnull)); |
| 662 | |
| 663 | /**************************************************************//** |
| 664 | Allocates a new file page to be used in an index tree. NOTE: we assume |
| 665 | that the caller has made the reservation for free extents! |
| 666 | @retval NULL if no page could be allocated |
| 667 | @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded |
| 668 | (init_mtr == mtr, or the page was not previously freed in mtr) |
| 669 | @retval block (not allocated or initialized) otherwise */ |
| 670 | buf_block_t* |
| 671 | btr_page_alloc( |
| 672 | /*===========*/ |
| 673 | dict_index_t* index, /*!< in: index tree */ |
| 674 | ulint hint_page_no, /*!< in: hint of a good page */ |
| 675 | byte file_direction, /*!< in: direction where a possible |
| 676 | page split is made */ |
| 677 | ulint level, /*!< in: level where the page is placed |
| 678 | in the tree */ |
| 679 | mtr_t* mtr, /*!< in/out: mini-transaction |
| 680 | for the allocation */ |
| 681 | mtr_t* init_mtr) /*!< in/out: mini-transaction |
| 682 | for x-latching and initializing |
| 683 | the page */ |
| 684 | MY_ATTRIBUTE((warn_unused_result)); |
| 685 | /**************************************************************//** |
| 686 | Frees a file page used in an index tree. NOTE: cannot free field external |
| 687 | storage pages because the page must contain info on its level. */ |
| 688 | void |
| 689 | btr_page_free( |
| 690 | /*==========*/ |
| 691 | dict_index_t* index, /*!< in: index tree */ |
| 692 | buf_block_t* block, /*!< in: block to be freed, x-latched */ |
| 693 | mtr_t* mtr) /*!< in: mtr */ |
| 694 | MY_ATTRIBUTE((nonnull)); |
| 695 | /** Empty an index page (possibly the root page). @see btr_page_create(). |
| 696 | @param[in,out] block page to be emptied |
| 697 | @param[in,out] page_zip compressed page frame, or NULL |
| 698 | @param[in] index index of the page |
| 699 | @param[in] level B-tree level of the page (0=leaf) |
| 700 | @param[in,out] mtr mini-transaction */ |
| 701 | void |
| 702 | btr_page_empty( |
| 703 | buf_block_t* block, |
| 704 | page_zip_des_t* page_zip, |
| 705 | dict_index_t* index, |
| 706 | ulint level, |
| 707 | mtr_t* mtr) |
| 708 | MY_ATTRIBUTE((nonnull(1, 3, 5))); |
| 709 | /**************************************************************//** |
| 710 | Creates a new index page (not the root, and also not |
| 711 | used in page reorganization). @see btr_page_empty(). */ |
| 712 | void |
| 713 | btr_page_create( |
| 714 | /*============*/ |
| 715 | buf_block_t* block, /*!< in/out: page to be created */ |
| 716 | page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ |
| 717 | dict_index_t* index, /*!< in: index */ |
| 718 | ulint level, /*!< in: the B-tree level of the page */ |
| 719 | mtr_t* mtr); /*!< in: mtr */ |
| 720 | /**************************************************************//** |
| 721 | Frees a file page used in an index tree. Can be used also to BLOB |
| 722 | external storage pages. */ |
| 723 | void |
| 724 | btr_page_free_low( |
| 725 | /*==============*/ |
| 726 | dict_index_t* index, /*!< in: index tree */ |
| 727 | buf_block_t* block, /*!< in: block to be freed, x-latched */ |
| 728 | ulint level, /*!< in: page level (ULINT_UNDEFINED=BLOB) */ |
| 729 | bool blob, /*!< in: blob page */ |
| 730 | mtr_t* mtr) /*!< in: mtr */ |
| 731 | MY_ATTRIBUTE((nonnull(1,2))); |
| 732 | /**************************************************************//** |
| 733 | Gets the root node of a tree and x- or s-latches it. |
| 734 | @return root page, x- or s-latched */ |
| 735 | buf_block_t* |
| 736 | btr_root_block_get( |
| 737 | /*===============*/ |
| 738 | const dict_index_t* index, /*!< in: index tree */ |
| 739 | ulint mode, /*!< in: either RW_S_LATCH |
| 740 | or RW_X_LATCH */ |
| 741 | mtr_t* mtr); /*!< in: mtr */ |
| 742 | |
| 743 | /*************************************************************//** |
| 744 | Reorganizes an index page. |
| 745 | |
| 746 | IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE |
| 747 | if this is a compressed leaf page in a secondary index. This has to |
| 748 | be done either within the same mini-transaction, or by invoking |
| 749 | ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, |
| 750 | IBUF_BITMAP_FREE is unaffected by reorganization. |
| 751 | |
| 752 | @retval true if the operation was successful |
| 753 | @retval false if it is a compressed page, and recompression failed */ |
| 754 | UNIV_INTERN |
| 755 | bool |
| 756 | btr_page_reorganize_block( |
| 757 | /*======================*/ |
| 758 | bool recovery,/*!< in: true if called in recovery: |
| 759 | locks should not be updated, i.e., |
| 760 | there cannot exist locks on the |
| 761 | page, and a hash index should not be |
| 762 | dropped: it cannot exist */ |
| 763 | ulint z_level,/*!< in: compression level to be used |
| 764 | if dealing with compressed page */ |
| 765 | buf_block_t* block, /*!< in/out: B-tree page */ |
| 766 | dict_index_t* index, /*!< in: the index tree of the page */ |
| 767 | mtr_t* mtr) /*!< in/out: mini-transaction */ |
| 768 | __attribute__((nonnull)); |
| 769 | |
| 770 | #ifdef UNIV_BTR_PRINT |
| 771 | /*************************************************************//** |
| 772 | Prints size info of a B-tree. */ |
| 773 | void |
| 774 | btr_print_size( |
| 775 | /*===========*/ |
| 776 | dict_index_t* index) /*!< in: index tree */ |
| 777 | MY_ATTRIBUTE((nonnull)); |
| 778 | /**************************************************************//** |
| 779 | Prints directories and other info of all nodes in the index. */ |
| 780 | void |
| 781 | btr_print_index( |
| 782 | /*============*/ |
| 783 | dict_index_t* index, /*!< in: index */ |
| 784 | ulint width) /*!< in: print this many entries from start |
| 785 | and end */ |
| 786 | MY_ATTRIBUTE((nonnull)); |
| 787 | #endif /* UNIV_BTR_PRINT */ |
| 788 | /************************************************************//** |
| 789 | Checks the size and number of fields in a record based on the definition of |
| 790 | the index. |
| 791 | @return TRUE if ok */ |
| 792 | ibool |
| 793 | btr_index_rec_validate( |
| 794 | /*===================*/ |
| 795 | const rec_t* rec, /*!< in: index record */ |
| 796 | const dict_index_t* index, /*!< in: index */ |
| 797 | ibool dump_on_error) /*!< in: TRUE if the function |
| 798 | should print hex dump of record |
| 799 | and page on error */ |
| 800 | MY_ATTRIBUTE((warn_unused_result)); |
| 801 | /**************************************************************//** |
| 802 | Checks the consistency of an index tree. |
| 803 | @return DB_SUCCESS if ok, error code if not */ |
| 804 | dberr_t |
| 805 | btr_validate_index( |
| 806 | /*===============*/ |
| 807 | dict_index_t* index, /*!< in: index */ |
| 808 | const trx_t* trx, /*!< in: transaction or 0 */ |
| 809 | bool lockout)/*!< in: true if X-latch index is intended */ |
| 810 | MY_ATTRIBUTE((warn_unused_result)); |
| 811 | |
| 812 | /*************************************************************//** |
| 813 | Removes a page from the level list of pages. */ |
| 814 | UNIV_INTERN |
| 815 | void |
| 816 | btr_level_list_remove_func( |
| 817 | /*=======================*/ |
| 818 | ulint space, /*!< in: space where removed */ |
| 819 | const page_size_t& page_size,/*!< in: page size */ |
| 820 | page_t* page, /*!< in/out: page to remove */ |
| 821 | dict_index_t* index, /*!< in: index tree */ |
| 822 | mtr_t* mtr); /*!< in/out: mini-transaction */ |
| 823 | /*************************************************************//** |
| 824 | Removes a page from the level list of pages. |
| 825 | @param space in: space where removed |
| 826 | @param zip_size in: compressed page size in bytes, or 0 for uncompressed |
| 827 | @param page in/out: page to remove |
| 828 | @param index in: index tree |
| 829 | @param mtr in/out: mini-transaction */ |
| 830 | # define btr_level_list_remove(space,zip_size,page,index,mtr) \ |
| 831 | btr_level_list_remove_func(space,zip_size,page,index,mtr) |
| 832 | |
| 833 | /*************************************************************//** |
| 834 | If page is the only on its level, this function moves its records to the |
| 835 | father page, thus reducing the tree height. |
| 836 | @return father block */ |
| 837 | UNIV_INTERN |
| 838 | buf_block_t* |
| 839 | btr_lift_page_up( |
| 840 | /*=============*/ |
| 841 | dict_index_t* index, /*!< in: index tree */ |
| 842 | buf_block_t* block, /*!< in: page which is the only on its level; |
| 843 | must not be empty: use |
| 844 | btr_discard_only_page_on_level if the last |
| 845 | record from the page should be removed */ |
| 846 | mtr_t* mtr) /*!< in: mtr */ |
| 847 | __attribute__((nonnull)); |
| 848 | |
| 849 | #define BTR_N_LEAF_PAGES 1 |
| 850 | #define BTR_TOTAL_SIZE 2 |
| 851 | |
| 852 | #include "btr0btr.ic" |
| 853 | |
| 854 | /**************************************************************** |
| 855 | Global variable controlling if scrubbing should be performed */ |
| 856 | extern my_bool srv_immediate_scrub_data_uncompressed; |
| 857 | |
| 858 | #endif |
| 859 | |