1/*****************************************************************************
2
3Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file include/btr0cur.h
22The index tree cursor
23
24Created 10/16/1994 Heikki Tuuri
25*******************************************************/
26
27#ifndef btr0cur_h
28#define btr0cur_h
29
30#include "univ.i"
31#include "my_base.h"
32#include "dict0dict.h"
33#include "page0cur.h"
34#include "btr0types.h"
35#include "gis0type.h"
36
37/** Mode flags for btr_cur operations; these can be ORed */
38enum {
39 /** do no undo logging */
40 BTR_NO_UNDO_LOG_FLAG = 1,
41 /** do no record lock checking */
42 BTR_NO_LOCKING_FLAG = 2,
43 /** sys fields will be found in the update vector or inserted
44 entry */
45 BTR_KEEP_SYS_FLAG = 4,
46
47 /** no rollback */
48 BTR_NO_ROLLBACK = BTR_NO_UNDO_LOG_FLAG
49 | BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG,
50
51 /** btr_cur_pessimistic_update() must keep cursor position
52 when moving columns to big_rec */
53 BTR_KEEP_POS_FLAG = 8,
54 /** the caller is creating the index or wants to bypass the
55 index->info.online creation log */
56 BTR_CREATE_FLAG = 16,
57 /** the caller of btr_cur_optimistic_update() or
58 btr_cur_update_in_place() will take care of
59 updating IBUF_BITMAP_FREE */
60 BTR_KEEP_IBUF_BITMAP = 32
61};
62
63/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
64struct btr_latch_leaves_t {
65 /* left block, target block and right block */
66 buf_block_t* blocks[3];
67 ulint savepoints[3];
68};
69
70#include "que0types.h"
71#include "row0types.h"
72#include "ha0ha.h"
73
74#ifdef UNIV_DEBUG
75/*********************************************************//**
76Returns the page cursor component of a tree cursor.
77@return pointer to page cursor component */
78UNIV_INLINE
79page_cur_t*
80btr_cur_get_page_cur(
81/*=================*/
82 const btr_cur_t* cursor);/*!< in: tree cursor */
83/*********************************************************//**
84Returns the buffer block on which the tree cursor is positioned.
85@return pointer to buffer block */
86UNIV_INLINE
87buf_block_t*
88btr_cur_get_block(
89/*==============*/
90 const btr_cur_t* cursor);/*!< in: tree cursor */
91/*********************************************************//**
92Returns the record pointer of a tree cursor.
93@return pointer to record */
94UNIV_INLINE
95rec_t*
96btr_cur_get_rec(
97/*============*/
98 const btr_cur_t* cursor);/*!< in: tree cursor */
99#else /* UNIV_DEBUG */
100# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
101# define btr_cur_get_block(cursor) ((cursor)->page_cur.block)
102# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec)
103#endif /* UNIV_DEBUG */
104/*********************************************************//**
105Returns the compressed page on which the tree cursor is positioned.
106@return pointer to compressed page, or NULL if the page is not compressed */
107UNIV_INLINE
108page_zip_des_t*
109btr_cur_get_page_zip(
110/*=================*/
111 btr_cur_t* cursor);/*!< in: tree cursor */
112/*********************************************************//**
113Returns the page of a tree cursor.
114@return pointer to page */
115UNIV_INLINE
116page_t*
117btr_cur_get_page(
118/*=============*/
119 btr_cur_t* cursor);/*!< in: tree cursor */
120/*********************************************************//**
121Returns the index of a cursor.
122@param cursor b-tree cursor
123@return index */
124#define btr_cur_get_index(cursor) ((cursor)->index)
125/*********************************************************//**
126Positions a tree cursor at a given record. */
127UNIV_INLINE
128void
129btr_cur_position(
130/*=============*/
131 dict_index_t* index, /*!< in: index */
132 rec_t* rec, /*!< in: record in tree */
133 buf_block_t* block, /*!< in: buffer block of rec */
134 btr_cur_t* cursor);/*!< in: cursor */
135
136/** Load the instant ALTER TABLE metadata from the clustered index
137when loading a table definition.
138@param[in,out] table table definition from the data dictionary
139@return error code
140@retval DB_SUCCESS if no error occurred */
141dberr_t
142btr_cur_instant_init(dict_table_t* table)
143 ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
144
145/** Initialize the n_core_null_bytes on first access to a clustered
146index root page.
147@param[in] index clustered index that is on its first access
148@param[in] page clustered index root page
149@return whether the page is corrupted */
150bool
151btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
152 ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
153
154/** Optimistically latches the leaf page or pages requested.
155@param[in] block guessed buffer block
156@param[in] modify_clock modify clock value
157@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
158@param[in,out] cursor cursor
159@param[in] file file name
160@param[in] line line where called
161@param[in] mtr mini-transaction
162@return true if success */
163bool
164btr_cur_optimistic_latch_leaves(
165 buf_block_t* block,
166 ib_uint64_t modify_clock,
167 ulint* latch_mode,
168 btr_cur_t* cursor,
169 const char* file,
170 unsigned line,
171 mtr_t* mtr);
172
173/********************************************************************//**
174Searches an index tree and positions a tree cursor on a given level.
175NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
176to node pointer page number fields on the upper levels of the tree!
177Note that if mode is PAGE_CUR_LE, which is used in inserts, then
178cursor->up_match and cursor->low_match both will have sensible values.
179If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
180dberr_t
181btr_cur_search_to_nth_level_func(
182 dict_index_t* index, /*!< in: index */
183 ulint level, /*!< in: the tree level of search */
184 const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
185 tuple must be set so that it cannot get
186 compared to the node ptr page number field! */
187 page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
188 NOTE that if the search is made using a unique
189 prefix of a record, mode should be PAGE_CUR_LE,
190 not PAGE_CUR_GE, as the latter may end up on
191 the previous page of the record! Inserts
192 should always be made using PAGE_CUR_LE to
193 search the position! */
194 ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
195 at most one of BTR_INSERT, BTR_DELETE_MARK,
196 BTR_DELETE, or BTR_ESTIMATE;
197 cursor->left_block is used to store a pointer
198 to the left neighbor page, in the cases
199 BTR_SEARCH_PREV and BTR_MODIFY_PREV;
200 NOTE that if ahi_latch, we might not have a
201 cursor page latch, we assume that ahi_latch
202 protects the record! */
203 btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
204 s- or x-latched, but see also above! */
205#ifdef BTR_CUR_HASH_ADAPT
206 rw_lock_t* ahi_latch,
207 /*!< in: currently held btr_search_latch
208 (in RW_S_LATCH mode), or NULL */
209#endif /* BTR_CUR_HASH_ADAPT */
210 const char* file, /*!< in: file name */
211 unsigned line, /*!< in: line where called */
212 mtr_t* mtr, /*!< in/out: mini-transaction */
213 ib_uint64_t autoinc = 0);
214 /*!< in: PAGE_ROOT_AUTO_INC to be written
215 (0 if none) */
216#ifdef BTR_CUR_HASH_ADAPT
217# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
218 btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,a,fi,li,mtr)
219#else /* BTR_CUR_HASH_ADAPT */
220# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
221 btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,fi,li,mtr)
222#endif /* BTR_CUR_HASH_ADAPT */
223
224/*****************************************************************//**
225Opens a cursor at either end of an index.
226@return DB_SUCCESS or error code */
227dberr_t
228btr_cur_open_at_index_side_func(
229/*============================*/
230 bool from_left, /*!< in: true if open to the low end,
231 false if to the high end */
232 dict_index_t* index, /*!< in: index */
233 ulint latch_mode, /*!< in: latch mode */
234 btr_cur_t* cursor, /*!< in/out: cursor */
235 ulint level, /*!< in: level to search for
236 (0=leaf) */
237 const char* file, /*!< in: file name */
238 unsigned line, /*!< in: line where called */
239 mtr_t* mtr) /*!< in/out: mini-transaction */
240 MY_ATTRIBUTE((nonnull));
241
242#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
243 btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
244
245/**********************************************************************//**
246Positions a cursor at a randomly chosen position within a B-tree.
247@return true if the index is available and we have put the cursor, false
248if the index is unavailable */
249bool
250btr_cur_open_at_rnd_pos_func(
251/*=========================*/
252 dict_index_t* index, /*!< in: index */
253 ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
254 btr_cur_t* cursor, /*!< in/out: B-tree cursor */
255 const char* file, /*!< in: file name */
256 unsigned line, /*!< in: line where called */
257 mtr_t* mtr); /*!< in: mtr */
258#define btr_cur_open_at_rnd_pos(i,l,c,m) \
259 btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
260/*************************************************************//**
261Tries to perform an insert to a page in an index tree, next to cursor.
262It is assumed that mtr holds an x-latch on the page. The operation does
263not succeed if there is too little space on the page. If there is just
264one record on the page, the insert will always succeed; this is to
265prevent trying to split a page with just one record.
266@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
267dberr_t
268btr_cur_optimistic_insert(
269/*======================*/
270 ulint flags, /*!< in: undo logging and locking flags: if not
271 zero, the parameters index and thr should be
272 specified */
273 btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
274 cursor stays valid */
275 ulint** offsets,/*!< out: offsets on *rec */
276 mem_heap_t** heap, /*!< in/out: pointer to memory heap */
277 dtuple_t* entry, /*!< in/out: entry to insert */
278 rec_t** rec, /*!< out: pointer to inserted record if
279 succeed */
280 big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
281 be stored externally by the caller */
282 ulint n_ext, /*!< in: number of externally stored columns */
283 que_thr_t* thr, /*!< in/out: query thread; can be NULL if
284 !(~flags
285 & (BTR_NO_LOCKING_FLAG
286 | BTR_NO_UNDO_LOG_FLAG)) */
287 mtr_t* mtr) /*!< in/out: mini-transaction;
288 if this function returns DB_SUCCESS on
289 a leaf page of a secondary index in a
290 compressed tablespace, the caller must
291 mtr_commit(mtr) before latching
292 any further pages */
293 MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
294/*************************************************************//**
295Performs an insert on a page of an index tree. It is assumed that mtr
296holds an x-latch on the tree and on the cursor page. If the insert is
297made on the leaf level, to avoid deadlocks, mtr must also own x-latches
298to brothers of page, if those brothers exist.
299@return DB_SUCCESS or error number */
300dberr_t
301btr_cur_pessimistic_insert(
302/*=======================*/
303 ulint flags, /*!< in: undo logging and locking flags: if not
304 zero, the parameter thr should be
305 specified; if no undo logging is specified,
306 then the caller must have reserved enough
307 free extents in the file space so that the
308 insertion will certainly succeed */
309 btr_cur_t* cursor, /*!< in: cursor after which to insert;
310 cursor stays valid */
311 ulint** offsets,/*!< out: offsets on *rec */
312 mem_heap_t** heap, /*!< in/out: pointer to memory heap
313 that can be emptied */
314 dtuple_t* entry, /*!< in/out: entry to insert */
315 rec_t** rec, /*!< out: pointer to inserted record if
316 succeed */
317 big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
318 be stored externally by the caller */
319 ulint n_ext, /*!< in: number of externally stored columns */
320 que_thr_t* thr, /*!< in/out: query thread; can be NULL if
321 !(~flags
322 & (BTR_NO_LOCKING_FLAG
323 | BTR_NO_UNDO_LOG_FLAG)) */
324 mtr_t* mtr) /*!< in/out: mini-transaction */
325 MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
326/*************************************************************//**
327See if there is enough place in the page modification log to log
328an update-in-place.
329
330@retval false if out of space; IBUF_BITMAP_FREE will be reset
331outside mtr if the page was recompressed
332@retval true if enough place;
333
334IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
335a secondary index leaf page. This has to be done either within the
336same mini-transaction, or by invoking ibuf_reset_free_bits() before
337mtr_commit(mtr). */
338bool
339btr_cur_update_alloc_zip_func(
340/*==========================*/
341 page_zip_des_t* page_zip,/*!< in/out: compressed page */
342 page_cur_t* cursor, /*!< in/out: B-tree page cursor */
343 dict_index_t* index, /*!< in: the index corresponding to cursor */
344#ifdef UNIV_DEBUG
345 ulint* offsets,/*!< in/out: offsets of the cursor record */
346#endif /* UNIV_DEBUG */
347 ulint length, /*!< in: size needed */
348 bool create, /*!< in: true=delete-and-insert,
349 false=update-in-place */
350 mtr_t* mtr) /*!< in/out: mini-transaction */
351 MY_ATTRIBUTE((nonnull, warn_unused_result));
352#ifdef UNIV_DEBUG
353# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
354 btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr)
355#else /* UNIV_DEBUG */
356# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
357 btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr)
358#endif /* UNIV_DEBUG */
359/*************************************************************//**
360Updates a record when the update causes no size changes in its fields.
361@return locking or undo log related error code, or
362@retval DB_SUCCESS on success
363@retval DB_ZIP_OVERFLOW if there is not enough space left
364on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
365dberr_t
366btr_cur_update_in_place(
367/*====================*/
368 ulint flags, /*!< in: undo logging and locking flags */
369 btr_cur_t* cursor, /*!< in: cursor on the record to update;
370 cursor stays valid and positioned on the
371 same record */
372 ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
373 const upd_t* update, /*!< in: update vector */
374 ulint cmpl_info,/*!< in: compiler info on secondary index
375 updates */
376 que_thr_t* thr, /*!< in: query thread */
377 trx_id_t trx_id, /*!< in: transaction id */
378 mtr_t* mtr) /*!< in/out: mini-transaction; if this
379 is a secondary index, the caller must
380 mtr_commit(mtr) before latching any
381 further pages */
382 MY_ATTRIBUTE((warn_unused_result, nonnull));
383/***********************************************************//**
384Writes a redo log record of updating a record in-place. */
385void
386btr_cur_update_in_place_log(
387/*========================*/
388 ulint flags, /*!< in: flags */
389 const rec_t* rec, /*!< in: record */
390 dict_index_t* index, /*!< in: index of the record */
391 const upd_t* update, /*!< in: update vector */
392 trx_id_t trx_id, /*!< in: transaction id */
393 roll_ptr_t roll_ptr, /*!< in: roll ptr */
394 mtr_t* mtr) /*!< in: mtr */
395 MY_ATTRIBUTE((nonnull));
396/*************************************************************//**
397Tries to update a record on a page in an index tree. It is assumed that mtr
398holds an x-latch on the page. The operation does not succeed if there is too
399little space on the page or if the update would result in too empty a page,
400so that tree compression is recommended.
401@return error code, including
402@retval DB_SUCCESS on success
403@retval DB_OVERFLOW if the updated record does not fit
404@retval DB_UNDERFLOW if the page would become too empty
405@retval DB_ZIP_OVERFLOW if there is not enough space left
406on the compressed page */
407dberr_t
408btr_cur_optimistic_update(
409/*======================*/
410 ulint flags, /*!< in: undo logging and locking flags */
411 btr_cur_t* cursor, /*!< in: cursor on the record to update;
412 cursor stays valid and positioned on the
413 same record */
414 ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
415 mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
416 const upd_t* update, /*!< in: update vector; this must also
417 contain trx id and roll ptr fields */
418 ulint cmpl_info,/*!< in: compiler info on secondary index
419 updates */
420 que_thr_t* thr, /*!< in: query thread */
421 trx_id_t trx_id, /*!< in: transaction id */
422 mtr_t* mtr) /*!< in/out: mini-transaction; if this
423 is a secondary index, the caller must
424 mtr_commit(mtr) before latching any
425 further pages */
426 MY_ATTRIBUTE((warn_unused_result, nonnull));
427/*************************************************************//**
428Performs an update of a record on a page of a tree. It is assumed
429that mtr holds an x-latch on the tree and on the cursor page. If the
430update is made on the leaf level, to avoid deadlocks, mtr must also
431own x-latches to brothers of page, if those brothers exist.
432@return DB_SUCCESS or error code */
433dberr_t
434btr_cur_pessimistic_update(
435/*=======================*/
436 ulint flags, /*!< in: undo logging, locking, and rollback
437 flags */
438 btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
439 cursor may become invalid if *big_rec == NULL
440 || !(flags & BTR_KEEP_POS_FLAG) */
441 ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
442 mem_heap_t** offsets_heap,
443 /*!< in/out: pointer to memory heap
444 that can be emptied */
445 mem_heap_t* entry_heap,
446 /*!< in/out: memory heap for allocating
447 big_rec and the index tuple */
448 big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
449 be stored externally by the caller */
450 upd_t* update, /*!< in/out: update vector; this is allowed to
451 also contain trx id and roll ptr fields.
452 Non-updated columns that are moved offpage will
453 be appended to this. */
454 ulint cmpl_info,/*!< in: compiler info on secondary index
455 updates */
456 que_thr_t* thr, /*!< in: query thread */
457 trx_id_t trx_id, /*!< in: transaction id */
458 mtr_t* mtr) /*!< in/out: mini-transaction; must be committed
459 before latching any further pages */
460 MY_ATTRIBUTE((warn_unused_result, nonnull));
461/***********************************************************//**
462Marks a clustered index record deleted. Writes an undo log record to
463undo log on this delete marking. Writes in the trx id field the id
464of the deleting transaction, and in the roll ptr field pointer to the
465undo log record created.
466@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
467dberr_t
468btr_cur_del_mark_set_clust_rec(
469/*===========================*/
470 buf_block_t* block, /*!< in/out: buffer block of the record */
471 rec_t* rec, /*!< in/out: record */
472 dict_index_t* index, /*!< in: clustered index of the record */
473 const ulint* offsets,/*!< in: rec_get_offsets(rec) */
474 que_thr_t* thr, /*!< in: query thread */
475 const dtuple_t* entry, /*!< in: dtuple for the deleting record */
476 mtr_t* mtr) /*!< in/out: mini-transaction */
477 MY_ATTRIBUTE((nonnull, warn_unused_result));
478/***********************************************************//**
479Sets a secondary index record delete mark to TRUE or FALSE.
480@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
481dberr_t
482btr_cur_del_mark_set_sec_rec(
483/*=========================*/
484 ulint flags, /*!< in: locking flag */
485 btr_cur_t* cursor, /*!< in: cursor */
486 ibool val, /*!< in: value to set */
487 que_thr_t* thr, /*!< in: query thread */
488 mtr_t* mtr) /*!< in/out: mini-transaction */
489 MY_ATTRIBUTE((nonnull, warn_unused_result));
490/*************************************************************//**
491Tries to compress a page of the tree if it seems useful. It is assumed
492that mtr holds an x-latch on the tree and on the cursor page. To avoid
493deadlocks, mtr must also own x-latches to brothers of page, if those
494brothers exist. NOTE: it is assumed that the caller has reserved enough
495free extents so that the compression will always succeed if done!
496@return TRUE if compression occurred */
497ibool
498btr_cur_compress_if_useful(
499/*=======================*/
500 btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
501 cursor does not stay valid if compression
502 occurs */
503 ibool adjust, /*!< in: TRUE if should adjust the
504 cursor position even if compression occurs */
505 mtr_t* mtr) /*!< in/out: mini-transaction */
506 MY_ATTRIBUTE((nonnull));
507/*******************************************************//**
508Removes the record on which the tree cursor is positioned. It is assumed
509that the mtr has an x-latch on the page where the cursor is positioned,
510but no latch on the whole tree.
511@return TRUE if success, i.e., the page did not become too empty */
512ibool
513btr_cur_optimistic_delete_func(
514/*===========================*/
515 btr_cur_t* cursor, /*!< in: cursor on the record to delete;
516 cursor stays valid: if deletion succeeds,
517 on function exit it points to the successor
518 of the deleted record */
519# ifdef UNIV_DEBUG
520 ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
521# endif /* UNIV_DEBUG */
522 mtr_t* mtr) /*!< in: mtr; if this function returns
523 TRUE on a leaf page of a secondary
524 index, the mtr must be committed
525 before latching any further pages */
526 MY_ATTRIBUTE((nonnull, warn_unused_result));
527# ifdef UNIV_DEBUG
528# define btr_cur_optimistic_delete(cursor, flags, mtr) \
529 btr_cur_optimistic_delete_func(cursor, flags, mtr)
530# else /* UNIV_DEBUG */
531# define btr_cur_optimistic_delete(cursor, flags, mtr) \
532 btr_cur_optimistic_delete_func(cursor, mtr)
533# endif /* UNIV_DEBUG */
534/*************************************************************//**
535Removes the record on which the tree cursor is positioned. Tries
536to compress the page if its fillfactor drops below a threshold
537or if it is the only page on the level. It is assumed that mtr holds
538an x-latch on the tree and on the cursor page. To avoid deadlocks,
539mtr must also own x-latches to brothers of page, if those brothers
540exist.
541@return TRUE if compression occurred */
542ibool
543btr_cur_pessimistic_delete(
544/*=======================*/
545 dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
546 the latter may occur because we may have
547 to update node pointers on upper levels,
548 and in the case of variable length keys
549 these may actually grow in size */
550 ibool has_reserved_extents, /*!< in: TRUE if the
551 caller has already reserved enough free
552 extents so that he knows that the operation
553 will succeed */
554 btr_cur_t* cursor, /*!< in: cursor on the record to delete;
555 if compression does not occur, the cursor
556 stays valid: it points to successor of
557 deleted record on function exit */
558 ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
559 bool rollback,/*!< in: performing rollback? */
560 mtr_t* mtr) /*!< in: mtr */
561 MY_ATTRIBUTE((nonnull));
562/***********************************************************//**
563Parses a redo log record of updating a record in-place.
564@return end of log record or NULL */
565byte*
566btr_cur_parse_update_in_place(
567/*==========================*/
568 byte* ptr, /*!< in: buffer */
569 byte* end_ptr,/*!< in: buffer end */
570 page_t* page, /*!< in/out: page or NULL */
571 page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
572 dict_index_t* index); /*!< in: index corresponding to page */
573/****************************************************************//**
574Parses the redo log record for delete marking or unmarking of a clustered
575index record.
576@return end of log record or NULL */
577byte*
578btr_cur_parse_del_mark_set_clust_rec(
579/*=================================*/
580 byte* ptr, /*!< in: buffer */
581 byte* end_ptr,/*!< in: buffer end */
582 page_t* page, /*!< in/out: page or NULL */
583 page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
584 dict_index_t* index); /*!< in: index corresponding to page */
585/****************************************************************//**
586Parses the redo log record for delete marking or unmarking of a secondary
587index record.
588@return end of log record or NULL */
589byte*
590btr_cur_parse_del_mark_set_sec_rec(
591/*===============================*/
592 byte* ptr, /*!< in: buffer */
593 byte* end_ptr,/*!< in: buffer end */
594 page_t* page, /*!< in/out: page or NULL */
595 page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
596
597/** Estimates the number of rows in a given index range.
598@param[in] index index
599@param[in] tuple1 range start, may also be empty tuple
600@param[in] mode1 search mode for range start
601@param[in] tuple2 range end, may also be empty tuple
602@param[in] mode2 search mode for range end
603@return estimated number of rows */
604ha_rows
605btr_estimate_n_rows_in_range(
606 dict_index_t* index,
607 const dtuple_t* tuple1,
608 page_cur_mode_t mode1,
609 const dtuple_t* tuple2,
610 page_cur_mode_t mode2);
611
612/*******************************************************************//**
613Estimates the number of different key values in a given index, for
614each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
615The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
6160..n_uniq-1) and the number of pages that were sampled is saved in
617index->stat_n_sample_sizes[].
618If innodb_stats_method is nulls_ignored, we also record the number of
619non-null values for each prefix and stored the estimates in
620array index->stat_n_non_null_key_vals.
621@return true if the index is available and we get the estimated numbers,
622false if the index is unavailable. */
623bool
624btr_estimate_number_of_different_key_vals(
625/*======================================*/
626 dict_index_t* index); /*!< in: index */
627
628/** Gets the externally stored size of a record, in units of a database page.
629@param[in] rec record
630@param[in] offsets array returned by rec_get_offsets()
631@return externally stored part, in units of a database page */
632ulint
633btr_rec_get_externally_stored_len(
634 const rec_t* rec,
635 const ulint* offsets);
636
637/*******************************************************************//**
638Marks non-updated off-page fields as disowned by this record. The ownership
639must be transferred to the updated record which is inserted elsewhere in the
640index tree. In purge only the owner of externally stored field is allowed
641to free the field. */
642void
643btr_cur_disown_inherited_fields(
644/*============================*/
645 page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
646 part will be updated, or NULL */
647 rec_t* rec, /*!< in/out: record in a clustered index */
648 dict_index_t* index, /*!< in: index of the page */
649 const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
650 const upd_t* update, /*!< in: update vector */
651 mtr_t* mtr) /*!< in/out: mini-transaction */
652 MY_ATTRIBUTE((nonnull(2,3,4,5,6)));
653
654/** Operation code for btr_store_big_rec_extern_fields(). */
655enum blob_op {
656 /** Store off-page columns for a freshly inserted record */
657 BTR_STORE_INSERT = 0,
658 /** Store off-page columns for an insert by update */
659 BTR_STORE_INSERT_UPDATE,
660 /** Store off-page columns for an update */
661 BTR_STORE_UPDATE,
662 /** Store off-page columns for a freshly inserted record by bulk */
663 BTR_STORE_INSERT_BULK
664};
665
666/*******************************************************************//**
667Determine if an operation on off-page columns is an update.
668@return TRUE if op != BTR_STORE_INSERT */
669UNIV_INLINE
670ibool
671btr_blob_op_is_update(
672/*==================*/
673 enum blob_op op) /*!< in: operation */
674 MY_ATTRIBUTE((warn_unused_result));
675
676/*******************************************************************//**
677Stores the fields in big_rec_vec to the tablespace and puts pointers to
678them in rec. The extern flags in rec will have to be set beforehand.
679The fields are stored on pages allocated from leaf node
680file segment of the index tree.
681@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
682dberr_t
683btr_store_big_rec_extern_fields(
684/*============================*/
685 btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
686 btr_mtr is restarted, then this can
687 be repositioned. */
688 ulint* offsets, /*!< in/out: rec_get_offsets() on
689 pcur. the "external storage" flags
690 in offsets will correctly correspond
691 to rec when this function returns */
692 const big_rec_t*big_rec_vec, /*!< in: vector containing fields
693 to be stored externally */
694 mtr_t* btr_mtr, /*!< in/out: mtr containing the
695 latches to the clustered index. can be
696 committed and restarted. */
697 enum blob_op op) /*! in: operation code */
698 MY_ATTRIBUTE((warn_unused_result));
699
700/*******************************************************************//**
701Frees the space in an externally stored field to the file space
702management if the field in data is owned the externally stored field,
703in a rollback we may have the additional condition that the field must
704not be inherited. */
705void
706btr_free_externally_stored_field(
707/*=============================*/
708 dict_index_t* index, /*!< in: index of the data, the index
709 tree MUST be X-latched; if the tree
710 height is 1, then also the root page
711 must be X-latched! (this is relevant
712 in the case this function is called
713 from purge where 'data' is located on
714 an undo log page, not an index
715 page) */
716 byte* field_ref, /*!< in/out: field reference */
717 const rec_t* rec, /*!< in: record containing field_ref, for
718 page_zip_write_blob_ptr(), or NULL */
719 const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
720 or NULL */
721 page_zip_des_t* page_zip, /*!< in: compressed page corresponding
722 to rec, or NULL if rec == NULL */
723 ulint i, /*!< in: field number of field_ref;
724 ignored if rec == NULL */
725 bool rollback, /*!< in: performing rollback? */
726 mtr_t* local_mtr); /*!< in: mtr containing the latch */
727/** Copies the prefix of an externally stored field of a record.
728The clustered index record must be protected by a lock or a page latch.
729@param[out] buf the field, or a prefix of it
730@param[in] len length of buf, in bytes
731@param[in] page_size BLOB page size
732@param[in] data 'internally' stored part of the field
733containing also the reference to the external part; must be protected by
734a lock or a page latch
735@param[in] local_len length of data, in bytes
736@return the length of the copied field, or 0 if the column was being
737or has been deleted */
738ulint
739btr_copy_externally_stored_field_prefix(
740 byte* buf,
741 ulint len,
742 const page_size_t& page_size,
743 const byte* data,
744 ulint local_len);
745
746/** Copies an externally stored field of a record to mem heap.
747The clustered index record must be protected by a lock or a page latch.
748@param[out] len length of the whole field
749@param[in] data 'internally' stored part of the field
750containing also the reference to the external part; must be protected by
751a lock or a page latch
752@param[in] page_size BLOB page size
753@param[in] local_len length of data
754@param[in,out] heap mem heap
755@return the whole field copied to heap */
756byte*
757btr_copy_externally_stored_field(
758 ulint* len,
759 const byte* data,
760 const page_size_t& page_size,
761 ulint local_len,
762 mem_heap_t* heap);
763
764/** Copies an externally stored field of a record to mem heap.
765@param[in] rec record in a clustered index; must be
766protected by a lock or a page latch
767@param[in] offset array returned by rec_get_offsets()
768@param[in] page_size BLOB page size
769@param[in] no field number
770@param[out] len length of the field
771@param[in,out] heap mem heap
772@return the field copied to heap, or NULL if the field is incomplete */
773byte*
774btr_rec_copy_externally_stored_field(
775 const rec_t* rec,
776 const ulint* offsets,
777 const page_size_t& page_size,
778 ulint no,
779 ulint* len,
780 mem_heap_t* heap);
781
782/*******************************************************************//**
783Flags the data tuple fields that are marked as extern storage in the
784update vector. We use this function to remember which fields we must
785mark as extern storage in a record inserted for an update.
786@return number of flagged external columns */
787ulint
788btr_push_update_extern_fields(
789/*==========================*/
790 dtuple_t* tuple, /*!< in/out: data tuple */
791 const upd_t* update, /*!< in: update vector */
792 mem_heap_t* heap) /*!< in: memory heap */
793 MY_ATTRIBUTE((nonnull));
794/***********************************************************//**
795Sets a secondary index record's delete mark to the given value. This
796function is only used by the insert buffer merge mechanism. */
797void
798btr_cur_set_deleted_flag_for_ibuf(
799/*==============================*/
800 rec_t* rec, /*!< in/out: record */
801 page_zip_des_t* page_zip, /*!< in/out: compressed page
802 corresponding to rec, or NULL
803 when the tablespace is uncompressed */
804 ibool val, /*!< in: value to set */
805 mtr_t* mtr); /*!< in/out: mini-transaction */
806
807/******************************************************//**
808The following function is used to set the deleted bit of a record. */
809UNIV_INLINE
810void
811btr_rec_set_deleted_flag(
812/*=====================*/
813 rec_t* rec, /*!< in/out: physical record */
814 page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
815 ulint flag); /*!< in: nonzero if delete marked */
816
817/** Latches the leaf page or pages requested.
818@param[in] block leaf page where the search converged
819@param[in] page_id page id of the leaf
820@param[in] latch_mode BTR_SEARCH_LEAF, ...
821@param[in] cursor cursor
822@param[in] mtr mini-transaction
823@return blocks and savepoints which actually latched. */
824btr_latch_leaves_t
825btr_cur_latch_leaves(
826 buf_block_t* block,
827 const page_id_t& page_id,
828 const page_size_t& page_size,
829 ulint latch_mode,
830 btr_cur_t* cursor,
831 mtr_t* mtr);
832
833/*######################################################################*/
834
835/** In the pessimistic delete, if the page data size drops below this
836limit, merging it to a neighbor is tried */
837#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
838 ((srv_page_size * (ulint)((index)->merge_threshold)) / 100)
839
840/** A slot in the path array. We store here info on a search path down the
841tree. Each slot contains data on a single level of the tree. */
842struct btr_path_t {
843 /* Assume a page like:
844 records: (inf, a, b, c, d, sup)
845 index of the record: 0, 1, 2, 3, 4, 5
846 */
847
848 /** Index of the record where the page cursor stopped on this level
849 (index in alphabetical order). Value ULINT_UNDEFINED denotes array
850 end. In the above example, if the search stopped on record 'c', then
851 nth_rec will be 3. */
852 ulint nth_rec;
853
854 /** Number of the records on the page, not counting inf and sup.
855 In the above example n_recs will be 4. */
856 ulint n_recs;
857
858 /** Number of the page containing the record. */
859 ulint page_no;
860
861 /** Level of the page. If later we fetch the page under page_no
862 and it is no different level then we know that the tree has been
863 reorganized. */
864 ulint page_level;
865};
866
867#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
868
869/** Values for the flag documenting the used search method */
870enum btr_cur_method {
871 BTR_CUR_HASH = 1, /*!< successful shortcut using
872 the hash index */
873 BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
874 binary search: the misleading hash
875 reference is stored in the field
876 hash_node, and might be necessary to
877 update */
878 BTR_CUR_BINARY, /*!< success using the binary search */
879 BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
880 the insert buffer */
881 BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
882 mark in the insert/delete buffer */
883 BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
884 the insert/delete buffer */
885 BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
886};
887
888/** The tree cursor: the definition appears here only for the compiler
889to know struct size! */
890struct btr_cur_t {
891 dict_index_t* index; /*!< index where positioned */
892 page_cur_t page_cur; /*!< page cursor */
893 purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
894 buf_block_t* left_block; /*!< this field is used to store
895 a pointer to the left neighbor
896 page, in the cases
897 BTR_SEARCH_PREV and
898 BTR_MODIFY_PREV */
899 /*------------------------------*/
900 que_thr_t* thr; /*!< this field is only used
901 when btr_cur_search_to_nth_level
902 is called for an index entry
903 insertion: the calling query
904 thread is passed here to be
905 used in the insert buffer */
906 /*------------------------------*/
907 /** The following fields are used in
908 btr_cur_search_to_nth_level to pass information: */
909 /* @{ */
910 enum btr_cur_method flag; /*!< Search method used */
911 ulint tree_height; /*!< Tree height if the search is done
912 for a pessimistic insert or update
913 operation */
914 ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
915 the number of matched fields to the
916 the first user record to the right of
917 the cursor record after
918 btr_cur_search_to_nth_level;
919 for the mode PAGE_CUR_GE, the matched
920 fields to the first user record AT THE
921 CURSOR or to the right of it;
922 NOTE that the up_match and low_match
923 values may exceed the correct values
924 for comparison to the adjacent user
925 record if that record is on a
926 different leaf page! (See the note in
927 row_ins_duplicate_error_in_clust.) */
928 ulint up_bytes; /*!< number of matched bytes to the
929 right at the time cursor positioned;
930 only used internally in searches: not
931 defined after the search */
932 ulint low_match; /*!< if search mode was PAGE_CUR_LE,
933 the number of matched fields to the
934 first user record AT THE CURSOR or
935 to the left of it after
936 btr_cur_search_to_nth_level;
937 NOT defined for PAGE_CUR_GE or any
938 other search modes; see also the NOTE
939 in up_match! */
940 ulint low_bytes; /*!< number of matched bytes to the
941 left at the time cursor positioned;
942 only used internally in searches: not
943 defined after the search */
944 ulint n_fields; /*!< prefix length used in a hash
945 search if hash_node != NULL */
946 ulint n_bytes; /*!< hash prefix bytes if hash_node !=
947 NULL */
948 ulint fold; /*!< fold value used in the search if
949 flag is BTR_CUR_HASH */
950 /* @} */
951 btr_path_t* path_arr; /*!< in estimating the number of
952 rows in range, we store in this array
953 information of the path through
954 the tree */
955 rtr_info_t* rtr_info; /*!< rtree search info */
956 btr_cur_t():thr(NULL), rtr_info(NULL) {}
957 /* default values */
958};
959
960/******************************************************//**
961The following function is used to set the deleted bit of a record. */
962UNIV_INLINE
963void
964btr_rec_set_deleted_flag(
965/*=====================*/
966 rec_t* rec, /*!< in/out: physical record */
967 page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
968 ulint flag); /*!< in: nonzero if delete marked */
969
970/** If pessimistic delete fails because of lack of file space, there
971is still a good change of success a little later. Try this many
972times. */
973#define BTR_CUR_RETRY_DELETE_N_TIMES 100
974/** If pessimistic delete fails because of lack of file space, there
975is still a good change of success a little later. Sleep this many
976microseconds between retries. */
977#define BTR_CUR_RETRY_SLEEP_TIME 50000
978
979/** The reference in a field for which data is stored on a different page.
980The reference is at the end of the 'locally' stored part of the field.
981'Locally' means storage in the index record.
982We store locally a long enough prefix of each column so that we can determine
983the ordering parts of each index record without looking into the externally
984stored part. */
985/*-------------------------------------- @{ */
986#define BTR_EXTERN_SPACE_ID 0U /*!< space id where stored */
987#define BTR_EXTERN_PAGE_NO 4U /*!< page no where stored */
988#define BTR_EXTERN_OFFSET 8U /*!< offset of BLOB header
989 on that page */
990#define BTR_EXTERN_LEN 12U /*!< 8 bytes containing the
991 length of the externally
992 stored part of the BLOB.
993 The 2 highest bits are
994 reserved to the flags below. */
995/*-------------------------------------- @} */
996/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
997
998/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
999significant bit of the byte at smallest address) is set to 1 if this
1000field does not 'own' the externally stored field; only the owner field
1001is allowed to free the field in purge! */
1002#define BTR_EXTERN_OWNER_FLAG 128U
1003/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
1004second most significant bit of the byte at smallest address) is 1 then
1005it means that the externally stored field was inherited from an
1006earlier version of the row. In rollback we are not allowed to free an
1007inherited external field. */
1008#define BTR_EXTERN_INHERITED_FLAG 64U
1009
1010/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
1011extern ulint btr_cur_n_non_sea;
1012/** Old value of btr_cur_n_non_sea. Copied by
1013srv_refresh_innodb_monitor_stats(). Referenced by
1014srv_printf_innodb_monitor(). */
1015extern ulint btr_cur_n_non_sea_old;
1016#ifdef BTR_CUR_HASH_ADAPT
1017/** Number of successful adaptive hash index lookups in
1018btr_cur_search_to_nth_level(). */
1019extern ulint btr_cur_n_sea;
1020/** Old value of btr_cur_n_sea. Copied by
1021srv_refresh_innodb_monitor_stats(). Referenced by
1022srv_printf_innodb_monitor(). */
1023extern ulint btr_cur_n_sea_old;
1024#endif /* BTR_CUR_HASH_ADAPT */
1025
1026#ifdef UNIV_DEBUG
1027/* Flag to limit optimistic insert records */
1028extern uint btr_cur_limit_optimistic_insert_debug;
1029#endif /* UNIV_DEBUG */
1030
1031#include "btr0cur.ic"
1032
1033#endif
1034