1/*****************************************************************************
2
3Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2008, Google Inc.
5Copyright (c) 2017, 2018, MariaDB Corporation.
6
7Portions of this file contain modifications contributed and copyrighted by
8Google, Inc. Those modifications are gratefully acknowledged and are described
9briefly in the InnoDB documentation. The contributions by Google are
10incorporated with their permission, and subject to the conditions contained in
11the file COPYING.Google.
12
13This program is free software; you can redistribute it and/or modify it under
14the terms of the GNU General Public License as published by the Free Software
15Foundation; version 2 of the License.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25*****************************************************************************/
26
27/********************************************************************//**
28@file btr/btr0sea.cc
29The index tree adaptive search
30
31Created 2/17/1996 Heikki Tuuri
32*************************************************************************/
33
34#include "btr0sea.h"
35#ifdef BTR_CUR_HASH_ADAPT
36#include "buf0buf.h"
37#include "page0page.h"
38#include "page0cur.h"
39#include "btr0cur.h"
40#include "btr0pcur.h"
41#include "btr0btr.h"
42#include "ha0ha.h"
43#include "srv0mon.h"
44#include "sync0sync.h"
45
46/** Is search system enabled.
47Search system is protected by array of latches. */
48char btr_search_enabled;
49
50/** Number of adaptive hash index partition. */
51ulong btr_ahi_parts;
52
53#ifdef UNIV_SEARCH_PERF_STAT
54/** Number of successful adaptive hash index lookups */
55ulint btr_search_n_succ = 0;
56/** Number of failed adaptive hash index lookups */
57ulint btr_search_n_hash_fail = 0;
58#endif /* UNIV_SEARCH_PERF_STAT */
59
60/** padding to prevent other memory update
61hotspots from residing on the same memory
62cache line as btr_search_latches */
63UNIV_INTERN byte btr_sea_pad1[CACHE_LINE_SIZE];
64
65/** The latches protecting the adaptive search system: this latches protects the
66(1) positions of records on those pages where a hash index has been built.
67NOTE: It does not protect values of non-ordering fields within a record from
68being updated in-place! We can use fact (1) to perform unique searches to
69indexes. We will allocate the latches from dynamic memory to get it to the
70same DRAM page as other hotspot semaphores */
71rw_lock_t** btr_search_latches;
72
73/** padding to prevent other memory update hotspots from residing on
74the same memory cache line */
75UNIV_INTERN byte btr_sea_pad2[CACHE_LINE_SIZE];
76
77/** The adaptive hash index */
78btr_search_sys_t* btr_search_sys;
79
80/** If the number of records on the page divided by this parameter
81would have been successfully accessed using a hash index, the index
82is then built on the page, assuming the global limit has been reached */
83#define BTR_SEARCH_PAGE_BUILD_LIMIT 16U
84
85/** The global limit for consecutive potentially successful hash searches,
86before hash index building is started */
87#define BTR_SEARCH_BUILD_LIMIT 100U
88
89/** Compute a hash value of a record in a page.
90@param[in] rec index record
91@param[in] offsets return value of rec_get_offsets()
92@param[in] n_fields number of complete fields to fold
93@param[in] n_bytes number of bytes to fold in the last field
94@param[in] index_id index tree ID
95@return the hash value */
96static inline
97ulint
98rec_fold(
99 const rec_t* rec,
100 const ulint* offsets,
101 ulint n_fields,
102 ulint n_bytes,
103 index_id_t tree_id)
104{
105 ulint i;
106 const byte* data;
107 ulint len;
108 ulint fold;
109 ulint n_fields_rec;
110
111 ut_ad(rec_offs_validate(rec, NULL, offsets));
112 ut_ad(rec_validate(rec, offsets));
113 ut_ad(page_rec_is_leaf(rec));
114 ut_ad(!page_rec_is_default_row(rec));
115 ut_ad(n_fields > 0 || n_bytes > 0);
116
117 n_fields_rec = rec_offs_n_fields(offsets);
118 ut_ad(n_fields <= n_fields_rec);
119 ut_ad(n_fields < n_fields_rec || n_bytes == 0);
120
121 if (n_fields > n_fields_rec) {
122 n_fields = n_fields_rec;
123 }
124
125 if (n_fields == n_fields_rec) {
126 n_bytes = 0;
127 }
128
129 fold = ut_fold_ull(tree_id);
130
131 for (i = 0; i < n_fields; i++) {
132 data = rec_get_nth_field(rec, offsets, i, &len);
133
134 if (len != UNIV_SQL_NULL) {
135 fold = ut_fold_ulint_pair(fold,
136 ut_fold_binary(data, len));
137 }
138 }
139
140 if (n_bytes > 0) {
141 data = rec_get_nth_field(rec, offsets, i, &len);
142
143 if (len != UNIV_SQL_NULL) {
144 if (len > n_bytes) {
145 len = n_bytes;
146 }
147
148 fold = ut_fold_ulint_pair(fold,
149 ut_fold_binary(data, len));
150 }
151 }
152
153 return(fold);
154}
155
156/** Determine the number of accessed key fields.
157@param[in] n_fields number of complete fields
158@param[in] n_bytes number of bytes in an incomplete last field
159@return number of complete or incomplete fields */
160inline MY_ATTRIBUTE((warn_unused_result))
161ulint
162btr_search_get_n_fields(
163 ulint n_fields,
164 ulint n_bytes)
165{
166 return(n_fields + (n_bytes > 0 ? 1 : 0));
167}
168
169/** Determine the number of accessed key fields.
170@param[in] cursor b-tree cursor
171@return number of complete or incomplete fields */
172inline MY_ATTRIBUTE((warn_unused_result))
173ulint
174btr_search_get_n_fields(
175 const btr_cur_t* cursor)
176{
177 return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes));
178}
179
180/** This function should be called before reserving any btr search mutex, if
181the intended operation might add nodes to the search system hash table.
182Because of the latching order, once we have reserved the btr search system
183latch, we cannot allocate a free frame from the buffer pool. Checks that
184there is a free buffer frame allocated for hash table heap in the btr search
185system. If not, allocates a free frames for the heap. This check makes it
186probable that, when have reserved the btr search system latch and we need to
187allocate a new node to the hash table, it will succeed. However, the check
188will not guarantee success.
189@param[in] index index handler */
190static
191void
192btr_search_check_free_space_in_heap(const dict_index_t* index)
193{
194 hash_table_t* table;
195 mem_heap_t* heap;
196
197 ut_ad(!btr_search_own_any(RW_LOCK_S));
198 ut_ad(!btr_search_own_any(RW_LOCK_X));
199
200 table = btr_get_search_table(index);
201
202 heap = table->heap;
203
204 /* Note that we peek the value of heap->free_block without reserving
205 the latch: this is ok, because we will not guarantee that there will
206 be enough free space in the hash table. */
207
208 if (heap->free_block == NULL) {
209 buf_block_t* block = buf_block_alloc(NULL);
210 rw_lock_t* ahi_latch = btr_get_search_latch(index);
211
212 rw_lock_x_lock(ahi_latch);
213
214 if (btr_search_enabled
215 && heap->free_block == NULL) {
216 heap->free_block = block;
217 } else {
218 buf_block_free(block);
219 }
220
221 rw_lock_x_unlock(ahi_latch);
222 }
223}
224
225/** Creates and initializes the adaptive search system at a database start.
226@param[in] hash_size hash table size. */
227void btr_search_sys_create(ulint hash_size)
228{
229 /* Search System is divided into n parts.
230 Each part controls access to distinct set of hash buckets from
231 hash table through its own latch. */
232
233 /* Step-1: Allocate latches (1 per part). */
234 btr_search_latches = reinterpret_cast<rw_lock_t**>(
235 ut_malloc(sizeof(rw_lock_t*) * btr_ahi_parts, mem_key_ahi));
236
237 for (ulint i = 0; i < btr_ahi_parts; ++i) {
238
239 btr_search_latches[i] = reinterpret_cast<rw_lock_t*>(
240 ut_malloc(sizeof(rw_lock_t), mem_key_ahi));
241
242 rw_lock_create(btr_search_latch_key,
243 btr_search_latches[i], SYNC_SEARCH_SYS);
244 }
245
246 /* Step-2: Allocate hash tablees. */
247 btr_search_sys = reinterpret_cast<btr_search_sys_t*>(
248 ut_malloc(sizeof(btr_search_sys_t), mem_key_ahi));
249
250 btr_search_sys->hash_tables = reinterpret_cast<hash_table_t**>(
251 ut_malloc(sizeof(hash_table_t*) * btr_ahi_parts, mem_key_ahi));
252
253 for (ulint i = 0; i < btr_ahi_parts; ++i) {
254
255 btr_search_sys->hash_tables[i] =
256 ib_create((hash_size / btr_ahi_parts),
257 LATCH_ID_HASH_TABLE_MUTEX,
258 0, MEM_HEAP_FOR_BTR_SEARCH);
259
260#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
261 btr_search_sys->hash_tables[i]->adaptive = TRUE;
262#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
263 }
264}
265
266/** Resize hash index hash table.
267@param[in] hash_size hash index hash table size */
268void btr_search_sys_resize(ulint hash_size)
269{
270 /* Step-1: Lock all search latches in exclusive mode. */
271 btr_search_x_lock_all();
272
273 if (btr_search_enabled) {
274
275 btr_search_x_unlock_all();
276
277 ib::error() << "btr_search_sys_resize failed because"
278 " hash index hash table is not empty.";
279 ut_ad(0);
280 return;
281 }
282
283 /* Step-2: Recreate hash tables with new size. */
284 for (ulint i = 0; i < btr_ahi_parts; ++i) {
285
286 mem_heap_free(btr_search_sys->hash_tables[i]->heap);
287 hash_table_free(btr_search_sys->hash_tables[i]);
288
289 btr_search_sys->hash_tables[i] =
290 ib_create((hash_size / btr_ahi_parts),
291 LATCH_ID_HASH_TABLE_MUTEX,
292 0, MEM_HEAP_FOR_BTR_SEARCH);
293
294#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
295 btr_search_sys->hash_tables[i]->adaptive = TRUE;
296#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
297 }
298
299 /* Step-3: Unlock all search latches from exclusive mode. */
300 btr_search_x_unlock_all();
301}
302
303/** Frees the adaptive search system at a database shutdown. */
304void btr_search_sys_free()
305{
306 if (!btr_search_sys) {
307 ut_ad(!btr_search_latches);
308 return;
309 }
310
311 ut_ad(btr_search_latches);
312
313 /* Step-1: Release the hash tables. */
314 for (ulint i = 0; i < btr_ahi_parts; ++i) {
315
316 mem_heap_free(btr_search_sys->hash_tables[i]->heap);
317 hash_table_free(btr_search_sys->hash_tables[i]);
318
319 }
320
321 ut_free(btr_search_sys->hash_tables);
322 ut_free(btr_search_sys);
323 btr_search_sys = NULL;
324
325 /* Step-2: Release all allocates latches. */
326 for (ulint i = 0; i < btr_ahi_parts; ++i) {
327
328 rw_lock_free(btr_search_latches[i]);
329 ut_free(btr_search_latches[i]);
330 }
331
332 ut_free(btr_search_latches);
333 btr_search_latches = NULL;
334}
335
336/** Set index->ref_count = 0 on all indexes of a table.
337@param[in,out] table table handler */
338static
339void
340btr_search_disable_ref_count(
341 dict_table_t* table)
342{
343 dict_index_t* index;
344
345 ut_ad(mutex_own(&dict_sys->mutex));
346
347 for (index = dict_table_get_first_index(table);
348 index != NULL;
349 index = dict_table_get_next_index(index)) {
350 index->search_info->ref_count = 0;
351 }
352}
353
354/** Disable the adaptive hash search system and empty the index.
355@param[in] need_mutex need to acquire dict_sys->mutex */
356void btr_search_disable(bool need_mutex)
357{
358 dict_table_t* table;
359
360 if (need_mutex) {
361 mutex_enter(&dict_sys->mutex);
362 }
363
364 ut_ad(mutex_own(&dict_sys->mutex));
365 btr_search_x_lock_all();
366
367 if (!btr_search_enabled) {
368 if (need_mutex) {
369 mutex_exit(&dict_sys->mutex);
370 }
371
372 btr_search_x_unlock_all();
373 return;
374 }
375
376 btr_search_enabled = false;
377
378 /* Clear the index->search_info->ref_count of every index in
379 the data dictionary cache. */
380 for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table;
381 table = UT_LIST_GET_NEXT(table_LRU, table)) {
382
383 btr_search_disable_ref_count(table);
384 }
385
386 for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); table;
387 table = UT_LIST_GET_NEXT(table_LRU, table)) {
388
389 btr_search_disable_ref_count(table);
390 }
391
392 if (need_mutex) {
393 mutex_exit(&dict_sys->mutex);
394 }
395
396 /* Set all block->index = NULL. */
397 buf_pool_clear_hash_index();
398
399 /* Clear the adaptive hash index. */
400 for (ulint i = 0; i < btr_ahi_parts; ++i) {
401 hash_table_clear(btr_search_sys->hash_tables[i]);
402 mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
403 }
404
405 btr_search_x_unlock_all();
406}
407
408/** Enable the adaptive hash search system. */
409void btr_search_enable()
410{
411 buf_pool_mutex_enter_all();
412 if (srv_buf_pool_old_size != srv_buf_pool_size) {
413 buf_pool_mutex_exit_all();
414 return;
415 }
416 buf_pool_mutex_exit_all();
417
418 btr_search_x_lock_all();
419 btr_search_enabled = true;
420 btr_search_x_unlock_all();
421}
422
423/** Returns the value of ref_count. The value is protected by latch.
424@param[in] info search info
425@param[in] index index identifier
426@return ref_count value. */
427ulint
428btr_search_info_get_ref_count(
429 btr_search_t* info,
430 dict_index_t* index)
431{
432 ulint ret = 0;
433
434 if (!btr_search_enabled || !index->table->space) {
435 return(ret);
436 }
437
438 ut_ad(info);
439
440 rw_lock_t* ahi_latch = btr_get_search_latch(index);
441 rw_lock_s_lock(ahi_latch);
442 ret = info->ref_count;
443 rw_lock_s_unlock(ahi_latch);
444
445 return(ret);
446}
447
448/** Updates the search info of an index about hash successes. NOTE that info
449is NOT protected by any semaphore, to save CPU time! Do not assume its fields
450are consistent.
451@param[in,out] info search info
452@param[in] cursor cursor which was just positioned */
453static
454void
455btr_search_info_update_hash(
456 btr_search_t* info,
457 btr_cur_t* cursor)
458{
459 dict_index_t* index = cursor->index;
460 ulint n_unique;
461 int cmp;
462
463 ut_ad(!btr_search_own_any(RW_LOCK_S));
464 ut_ad(!btr_search_own_any(RW_LOCK_X));
465
466 if (dict_index_is_ibuf(index)) {
467 /* So many deletes are performed on an insert buffer tree
468 that we do not consider a hash index useful on it: */
469
470 return;
471 }
472
473 n_unique = dict_index_get_n_unique_in_tree(index);
474
475 if (info->n_hash_potential == 0) {
476
477 goto set_new_recomm;
478 }
479
480 /* Test if the search would have succeeded using the recommended
481 hash prefix */
482
483 if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
484increment_potential:
485 info->n_hash_potential++;
486
487 return;
488 }
489
490 cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
491 cursor->low_match, cursor->low_bytes);
492
493 if (info->left_side ? cmp <= 0 : cmp > 0) {
494
495 goto set_new_recomm;
496 }
497
498 cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
499 cursor->up_match, cursor->up_bytes);
500
501 if (info->left_side ? cmp <= 0 : cmp > 0) {
502
503 goto increment_potential;
504 }
505
506set_new_recomm:
507 /* We have to set a new recommendation; skip the hash analysis
508 for a while to avoid unnecessary CPU time usage when there is no
509 chance for success */
510
511 info->hash_analysis = 0;
512
513 cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
514 cursor->low_match, cursor->low_bytes);
515 if (cmp == 0) {
516 info->n_hash_potential = 0;
517
518 /* For extra safety, we set some sensible values here */
519
520 info->n_fields = 1;
521 info->n_bytes = 0;
522
523 info->left_side = TRUE;
524
525 } else if (cmp > 0) {
526 info->n_hash_potential = 1;
527
528 if (cursor->up_match >= n_unique) {
529
530 info->n_fields = n_unique;
531 info->n_bytes = 0;
532
533 } else if (cursor->low_match < cursor->up_match) {
534
535 info->n_fields = cursor->low_match + 1;
536 info->n_bytes = 0;
537 } else {
538 info->n_fields = cursor->low_match;
539 info->n_bytes = cursor->low_bytes + 1;
540 }
541
542 info->left_side = TRUE;
543 } else {
544 info->n_hash_potential = 1;
545
546 if (cursor->low_match >= n_unique) {
547
548 info->n_fields = n_unique;
549 info->n_bytes = 0;
550 } else if (cursor->low_match > cursor->up_match) {
551
552 info->n_fields = cursor->up_match + 1;
553 info->n_bytes = 0;
554 } else {
555 info->n_fields = cursor->up_match;
556 info->n_bytes = cursor->up_bytes + 1;
557 }
558
559 info->left_side = FALSE;
560 }
561}
562
563/** Update the block search info on hash successes. NOTE that info and
564block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
565semaphore, to save CPU time! Do not assume the fields are consistent.
566@return TRUE if building a (new) hash index on the block is recommended
567@param[in,out] info search info
568@param[in,out] block buffer block */
569static
570bool
571btr_search_update_block_hash_info(btr_search_t* info, buf_block_t* block)
572{
573 ut_ad(!btr_search_own_any(RW_LOCK_S));
574 ut_ad(!btr_search_own_any(RW_LOCK_X));
575 ut_ad(rw_lock_own(&block->lock, RW_LOCK_S)
576 || rw_lock_own(&block->lock, RW_LOCK_X));
577
578 info->last_hash_succ = FALSE;
579
580 ut_a(buf_block_state_valid(block));
581 ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
582
583 if ((block->n_hash_helps > 0)
584 && (info->n_hash_potential > 0)
585 && (block->n_fields == info->n_fields)
586 && (block->n_bytes == info->n_bytes)
587 && (block->left_side == info->left_side)) {
588
589 if ((block->index)
590 && (block->curr_n_fields == info->n_fields)
591 && (block->curr_n_bytes == info->n_bytes)
592 && (block->curr_left_side == info->left_side)) {
593
594 /* The search would presumably have succeeded using
595 the hash index */
596
597 info->last_hash_succ = TRUE;
598 }
599
600 block->n_hash_helps++;
601 } else {
602 block->n_hash_helps = 1;
603 block->n_fields = info->n_fields;
604 block->n_bytes = info->n_bytes;
605 block->left_side = info->left_side;
606 }
607
608 if ((block->n_hash_helps > page_get_n_recs(block->frame)
609 / BTR_SEARCH_PAGE_BUILD_LIMIT)
610 && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
611
612 if ((!block->index)
613 || (block->n_hash_helps
614 > 2U * page_get_n_recs(block->frame))
615 || (block->n_fields != block->curr_n_fields)
616 || (block->n_bytes != block->curr_n_bytes)
617 || (block->left_side != block->curr_left_side)) {
618
619 /* Build a new hash index on the page */
620
621 return(true);
622 }
623 }
624
625 return(false);
626}
627
628/** Updates a hash node reference when it has been unsuccessfully used in a
629search which could have succeeded with the used hash parameters. This can
630happen because when building a hash index for a page, we do not check
631what happens at page boundaries, and therefore there can be misleading
632hash nodes. Also, collisions in the fold value can lead to misleading
633references. This function lazily fixes these imperfections in the hash
634index.
635@param[in] info search info
636@param[in] block buffer block where cursor positioned
637@param[in] cursor cursor */
638static
639void
640btr_search_update_hash_ref(
641 const btr_search_t* info,
642 buf_block_t* block,
643 const btr_cur_t* cursor)
644{
645 dict_index_t* index;
646 ulint fold;
647 rec_t* rec;
648
649 ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
650 ut_ad(rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
651 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
652 || rw_lock_own(&(block->lock), RW_LOCK_X));
653 ut_ad(page_align(btr_cur_get_rec(cursor)) == block->frame);
654 ut_ad(page_is_leaf(block->frame));
655 assert_block_ahi_valid(block);
656
657 index = block->index;
658
659 if (!index) {
660
661 return;
662 }
663
664 ut_ad(block->page.id.space() == index->table->space->id);
665 ut_ad(index == cursor->index);
666 ut_ad(!dict_index_is_ibuf(index));
667
668 if ((info->n_hash_potential > 0)
669 && (block->curr_n_fields == info->n_fields)
670 && (block->curr_n_bytes == info->n_bytes)
671 && (block->curr_left_side == info->left_side)) {
672 mem_heap_t* heap = NULL;
673 ulint offsets_[REC_OFFS_NORMAL_SIZE];
674 rec_offs_init(offsets_);
675
676 rec = btr_cur_get_rec(cursor);
677
678 if (!page_rec_is_user_rec(rec)) {
679
680 return;
681 }
682
683 fold = rec_fold(rec,
684 rec_get_offsets(rec, index, offsets_, true,
685 ULINT_UNDEFINED, &heap),
686 block->curr_n_fields,
687 block->curr_n_bytes, index->id);
688 if (UNIV_LIKELY_NULL(heap)) {
689 mem_heap_free(heap);
690 }
691
692 ha_insert_for_fold(btr_get_search_table(index), fold,
693 block, rec);
694
695 MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);
696 }
697}
698
699/** Checks if a guessed position for a tree cursor is right. Note that if
700mode is PAGE_CUR_LE, which is used in inserts, and the function returns
701TRUE, then cursor->up_match and cursor->low_match both have sensible values.
702@param[in,out] cursor guess cursor position
703@param[in] can_only_compare_to_cursor_rec
704 if we do not have a latch on the page of cursor,
705 but a latch corresponding search system, then
706 ONLY the columns of the record UNDER the cursor
707 are protected, not the next or previous record
708 in the chain: we cannot look at the next or
709 previous record to check our guess!
710@param[in] tuple data tuple
711@param[in] mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE
712@return whether a match was found */
713static
714bool
715btr_search_check_guess(
716 btr_cur_t* cursor,
717 bool can_only_compare_to_cursor_rec,
718 const dtuple_t* tuple,
719 ulint mode)
720{
721 rec_t* rec;
722 ulint n_unique;
723 ulint match;
724 int cmp;
725 mem_heap_t* heap = NULL;
726 ulint offsets_[REC_OFFS_NORMAL_SIZE];
727 ulint* offsets = offsets_;
728 ibool success = FALSE;
729 rec_offs_init(offsets_);
730
731 n_unique = dict_index_get_n_unique_in_tree(cursor->index);
732
733 rec = btr_cur_get_rec(cursor);
734
735 ut_ad(page_rec_is_user_rec(rec));
736 ut_ad(page_rec_is_leaf(rec));
737
738 match = 0;
739
740 offsets = rec_get_offsets(rec, cursor->index, offsets, true,
741 n_unique, &heap);
742 cmp = cmp_dtuple_rec_with_match(tuple, rec, offsets, &match);
743
744 if (mode == PAGE_CUR_GE) {
745 if (cmp > 0) {
746 goto exit_func;
747 }
748
749 cursor->up_match = match;
750
751 if (match >= n_unique) {
752 success = TRUE;
753 goto exit_func;
754 }
755 } else if (mode == PAGE_CUR_LE) {
756 if (cmp < 0) {
757 goto exit_func;
758 }
759
760 cursor->low_match = match;
761
762 } else if (mode == PAGE_CUR_G) {
763 if (cmp >= 0) {
764 goto exit_func;
765 }
766 } else if (mode == PAGE_CUR_L) {
767 if (cmp <= 0) {
768 goto exit_func;
769 }
770 }
771
772 if (can_only_compare_to_cursor_rec) {
773 /* Since we could not determine if our guess is right just by
774 looking at the record under the cursor, return FALSE */
775 goto exit_func;
776 }
777
778 match = 0;
779
780 if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
781 ut_ad(!page_rec_is_infimum(rec));
782
783 const rec_t* prev_rec = page_rec_get_prev(rec);
784
785 if (page_rec_is_infimum(prev_rec)) {
786 success = *reinterpret_cast<const uint32_t*>(
787 page_align(prev_rec) + FIL_PAGE_PREV)
788 == FIL_NULL;
789
790 goto exit_func;
791 }
792
793 offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
794 true, n_unique, &heap);
795 cmp = cmp_dtuple_rec_with_match(
796 tuple, prev_rec, offsets, &match);
797 if (mode == PAGE_CUR_GE) {
798 success = cmp > 0;
799 } else {
800 success = cmp >= 0;
801 }
802 } else {
803 ut_ad(!page_rec_is_supremum(rec));
804
805 const rec_t* next_rec = page_rec_get_next(rec);
806
807 if (page_rec_is_supremum(next_rec)) {
808 if (*reinterpret_cast<const uint32_t*>(
809 page_align(next_rec) + FIL_PAGE_NEXT)
810 == FIL_NULL) {
811
812 cursor->up_match = 0;
813 success = TRUE;
814 }
815
816 goto exit_func;
817 }
818
819 offsets = rec_get_offsets(next_rec, cursor->index, offsets,
820 true, n_unique, &heap);
821 cmp = cmp_dtuple_rec_with_match(
822 tuple, next_rec, offsets, &match);
823 if (mode == PAGE_CUR_LE) {
824 success = cmp < 0;
825 cursor->up_match = match;
826 } else {
827 success = cmp <= 0;
828 }
829 }
830exit_func:
831 if (UNIV_LIKELY_NULL(heap)) {
832 mem_heap_free(heap);
833 }
834 return(success);
835}
836
837static
838void
839btr_search_failure(btr_search_t* info, btr_cur_t* cursor)
840{
841 cursor->flag = BTR_CUR_HASH_FAIL;
842
843#ifdef UNIV_SEARCH_PERF_STAT
844 ++info->n_hash_fail;
845
846 if (info->n_hash_succ > 0) {
847 --info->n_hash_succ;
848 }
849#endif /* UNIV_SEARCH_PERF_STAT */
850
851 info->last_hash_succ = FALSE;
852}
853
854/** Tries to guess the right search position based on the hash search info
855of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
856and the function returns TRUE, then cursor->up_match and cursor->low_match
857both have sensible values.
858@param[in,out] index index
859@param[in,out] info index search info
860@param[in] tuple logical record
861@param[in] mode PAGE_CUR_L, ....
862@param[in] latch_mode BTR_SEARCH_LEAF, ...;
863 NOTE that only if has_search_latch is 0, we will
864 have a latch set on the cursor page, otherwise
865 we assume the caller uses his search latch
866 to protect the record!
867@param[out] cursor tree cursor
868@param[in] ahi_latch the adaptive hash index latch being held,
869 or NULL
870@param[in] mtr mini transaction
871@return whether the search succeeded */
872bool
873btr_search_guess_on_hash(
874 dict_index_t* index,
875 btr_search_t* info,
876 const dtuple_t* tuple,
877 ulint mode,
878 ulint latch_mode,
879 btr_cur_t* cursor,
880 rw_lock_t* ahi_latch,
881 mtr_t* mtr)
882{
883 const rec_t* rec;
884 ulint fold;
885 index_id_t index_id;
886#ifdef notdefined
887 btr_cur_t cursor2;
888 btr_pcur_t pcur;
889#endif
890 ut_ad(!ahi_latch || rw_lock_own(ahi_latch, RW_LOCK_S)
891 || rw_lock_own(ahi_latch, RW_LOCK_X));
892
893 if (!btr_search_enabled) {
894 return(FALSE);
895 }
896
897 ut_ad(index && info && tuple && cursor && mtr);
898 ut_ad(!dict_index_is_ibuf(index));
899 ut_ad(!ahi_latch || ahi_latch == btr_get_search_latch(index));
900 ut_ad((latch_mode == BTR_SEARCH_LEAF)
901 || (latch_mode == BTR_MODIFY_LEAF));
902
903 /* Not supported for spatial index */
904 ut_ad(!dict_index_is_spatial(index));
905
906 /* Note that, for efficiency, the struct info may not be protected by
907 any latch here! */
908
909 if (info->n_hash_potential == 0) {
910
911 return(FALSE);
912 }
913
914 cursor->n_fields = info->n_fields;
915 cursor->n_bytes = info->n_bytes;
916
917 if (dtuple_get_n_fields(tuple) < btr_search_get_n_fields(cursor)) {
918
919 return(FALSE);
920 }
921
922 index_id = index->id;
923
924#ifdef UNIV_SEARCH_PERF_STAT
925 info->n_hash_succ++;
926#endif
927 fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
928
929 cursor->fold = fold;
930 cursor->flag = BTR_CUR_HASH;
931
932 rw_lock_t* use_latch = ahi_latch ? NULL : btr_get_search_latch(index);
933
934 if (use_latch) {
935 rw_lock_s_lock(use_latch);
936
937 if (!btr_search_enabled) {
938 goto fail;
939 }
940 } else {
941 ut_ad(btr_search_enabled);
942 ut_ad(rw_lock_own(ahi_latch, RW_LOCK_S));
943 }
944
945 rec = (rec_t*) ha_search_and_get_data(
946 btr_get_search_table(index), fold);
947
948 if (rec == NULL) {
949 if (use_latch) {
950fail:
951 rw_lock_s_unlock(use_latch);
952 }
953
954 btr_search_failure(info, cursor);
955
956 return(FALSE);
957 }
958
959 buf_block_t* block = buf_block_from_ahi(rec);
960
961 if (use_latch) {
962
963 if (!buf_page_get_known_nowait(
964 latch_mode, block, BUF_MAKE_YOUNG,
965 __FILE__, __LINE__, mtr)) {
966 goto fail;
967 }
968
969 rw_lock_s_unlock(use_latch);
970
971 buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
972 }
973
974 if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
975
976 ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
977
978 if (!ahi_latch) {
979
980 btr_leaf_page_release(block, latch_mode, mtr);
981 }
982
983 btr_search_failure(info, cursor);
984
985 return(FALSE);
986 }
987
988 ut_ad(page_rec_is_user_rec(rec));
989
990 btr_cur_position(index, (rec_t*) rec, block, cursor);
991
992 /* Check the validity of the guess within the page */
993
994 /* If we only have the latch on search system, not on the
995 page, it only protects the columns of the record the cursor
996 is positioned on. We cannot look at the next of the previous
997 record to determine if our guess for the cursor position is
998 right. */
999 if (index_id != btr_page_get_index_id(block->frame)
1000 || !btr_search_check_guess(cursor, !!ahi_latch, tuple, mode)) {
1001
1002 if (!ahi_latch) {
1003 btr_leaf_page_release(block, latch_mode, mtr);
1004 }
1005
1006 btr_search_failure(info, cursor);
1007
1008 return(FALSE);
1009 }
1010
1011 if (info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5) {
1012
1013 info->n_hash_potential++;
1014 }
1015
1016#ifdef notdefined
1017 /* These lines of code can be used in a debug version to check
1018 the correctness of the searched cursor position: */
1019
1020 info->last_hash_succ = FALSE;
1021
1022 /* Currently, does not work if the following fails: */
1023 ut_ad(!ahi_latch);
1024
1025 btr_leaf_page_release(block, latch_mode, mtr);
1026
1027 btr_cur_search_to_nth_level(
1028 index, 0, tuple, mode, latch_mode, &cursor2, 0, mtr);
1029
1030 if (mode == PAGE_CUR_GE
1031 && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
1032
1033 /* If mode is PAGE_CUR_GE, then the binary search
1034 in the index tree may actually take us to the supremum
1035 of the previous page */
1036
1037 info->last_hash_succ = FALSE;
1038
1039 btr_pcur_open_on_user_rec(
1040 index, tuple, mode, latch_mode, &pcur, mtr);
1041
1042 ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
1043 } else {
1044 ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
1045 }
1046
1047 /* NOTE that it is theoretically possible that the above assertions
1048 fail if the page of the cursor gets removed from the buffer pool
1049 meanwhile! Thus it might not be a bug. */
1050#endif
1051 info->last_hash_succ = TRUE;
1052
1053#ifdef UNIV_SEARCH_PERF_STAT
1054 btr_search_n_succ++;
1055#endif
1056 if (!ahi_latch && buf_page_peek_if_too_old(&block->page)) {
1057
1058 buf_page_make_young(&block->page);
1059 }
1060
1061 /* Increment the page get statistics though we did not really
1062 fix the page: for user info only */
1063 {
1064 buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
1065
1066 ++buf_pool->stat.n_page_gets;
1067 }
1068
1069 return(TRUE);
1070}
1071
1072/** Drop any adaptive hash index entries that point to an index page.
1073@param[in,out] block block containing index page, s- or x-latched, or an
1074 index page for which we know that
1075 block->buf_fix_count == 0 or it is an index page which
1076 has already been removed from the buf_pool->page_hash
1077 i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
1078void btr_search_drop_page_hash_index(buf_block_t* block)
1079{
1080 ulint n_fields;
1081 ulint n_bytes;
1082 const page_t* page;
1083 const rec_t* rec;
1084 ulint fold;
1085 ulint prev_fold;
1086 ulint n_cached;
1087 ulint n_recs;
1088 ulint* folds;
1089 ulint i;
1090 mem_heap_t* heap;
1091 const dict_index_t* index;
1092 ulint* offsets;
1093 rw_lock_t* latch;
1094 btr_search_t* info;
1095
1096retry:
1097 /* Do a dirty check on block->index, return if the block is
1098 not in the adaptive hash index. */
1099 index = block->index;
1100 /* This debug check uses a dirty read that could theoretically cause
1101 false positives while buf_pool_clear_hash_index() is executing. */
1102 assert_block_ahi_valid(block);
1103 ut_ad(!btr_search_own_any(RW_LOCK_S));
1104 ut_ad(!btr_search_own_any(RW_LOCK_X));
1105
1106 if (index == NULL) {
1107 return;
1108 }
1109
1110 ut_ad(block->page.buf_fix_count == 0
1111 || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH
1112 || rw_lock_own(&block->lock, RW_LOCK_S)
1113 || rw_lock_own(&block->lock, RW_LOCK_X));
1114 ut_ad(page_is_leaf(block->frame));
1115
1116 /* We must not dereference index here, because it could be freed
1117 if (index->table->n_ref_count == 0 && !mutex_own(&dict_sys->mutex)).
1118 Determine the ahi_slot based on the block contents. */
1119
1120 const index_id_t index_id
1121 = btr_page_get_index_id(block->frame);
1122 const ulint ahi_slot
1123 = ut_fold_ulint_pair(static_cast<ulint>(index_id),
1124 static_cast<ulint>(block->page.id.space()))
1125 % btr_ahi_parts;
1126 latch = btr_search_latches[ahi_slot];
1127
1128 rw_lock_s_lock(latch);
1129 assert_block_ahi_valid(block);
1130
1131 if (block->index == NULL) {
1132 rw_lock_s_unlock(latch);
1133 return;
1134 }
1135
1136 /* The index associated with a block must remain the
1137 same, because we are holding block->lock or the block is
1138 not accessible by other threads (BUF_BLOCK_REMOVE_HASH),
1139 or the index is not accessible to other threads
1140 (buf_fix_count == 0 when DROP TABLE or similar is executing
1141 buf_LRU_drop_page_hash_for_tablespace()). */
1142 ut_a(index == block->index);
1143#ifdef MYSQL_INDEX_DISABLE_AHI
1144 ut_ad(!index->disable_ahi);
1145#endif
1146 ut_ad(btr_search_enabled);
1147
1148 ut_ad(block->page.id.space() == index->table->space->id);
1149 ut_a(index_id == index->id);
1150 ut_a(!dict_index_is_ibuf(index));
1151#ifdef UNIV_DEBUG
1152 switch (dict_index_get_online_status(index)) {
1153 case ONLINE_INDEX_CREATION:
1154 /* The index is being created (bulk loaded). */
1155 case ONLINE_INDEX_COMPLETE:
1156 /* The index has been published. */
1157 case ONLINE_INDEX_ABORTED:
1158 /* Either the index creation was aborted due to an
1159 error observed by InnoDB (in which case there should
1160 not be any adaptive hash index entries), or it was
1161 completed and then flagged aborted in
1162 rollback_inplace_alter_table(). */
1163 break;
1164 case ONLINE_INDEX_ABORTED_DROPPED:
1165 /* The index should have been dropped from the tablespace
1166 already, and the adaptive hash index entries should have
1167 been dropped as well. */
1168 ut_error;
1169 }
1170#endif /* UNIV_DEBUG */
1171
1172 n_fields = block->curr_n_fields;
1173 n_bytes = block->curr_n_bytes;
1174
1175 /* NOTE: The AHI fields of block must not be accessed after
1176 releasing search latch, as the index page might only be s-latched! */
1177
1178 rw_lock_s_unlock(latch);
1179
1180 ut_a(n_fields > 0 || n_bytes > 0);
1181
1182 page = block->frame;
1183 n_recs = page_get_n_recs(page);
1184
1185 /* Calculate and cache fold values into an array for fast deletion
1186 from the hash index */
1187
1188 folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
1189
1190 n_cached = 0;
1191
1192 rec = page_get_infimum_rec(page);
1193 rec = page_rec_get_next_low(rec, page_is_comp(page));
1194 if (rec_is_default_row(rec, index)) {
1195 rec = page_rec_get_next_low(rec, page_is_comp(page));
1196 }
1197
1198 prev_fold = 0;
1199
1200 heap = NULL;
1201 offsets = NULL;
1202
1203 while (!page_rec_is_supremum(rec)) {
1204 offsets = rec_get_offsets(
1205 rec, index, offsets, true,
1206 btr_search_get_n_fields(n_fields, n_bytes),
1207 &heap);
1208 fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1209
1210 if (fold == prev_fold && prev_fold != 0) {
1211
1212 goto next_rec;
1213 }
1214
1215 /* Remove all hash nodes pointing to this page from the
1216 hash chain */
1217
1218 folds[n_cached] = fold;
1219 n_cached++;
1220next_rec:
1221 rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
1222 prev_fold = fold;
1223 }
1224
1225 if (UNIV_LIKELY_NULL(heap)) {
1226 mem_heap_free(heap);
1227 }
1228
1229 rw_lock_x_lock(latch);
1230
1231 if (UNIV_UNLIKELY(!block->index)) {
1232 /* Someone else has meanwhile dropped the hash index */
1233
1234 goto cleanup;
1235 }
1236
1237 ut_a(block->index == index);
1238
1239 if (block->curr_n_fields != n_fields
1240 || block->curr_n_bytes != n_bytes) {
1241
1242 /* Someone else has meanwhile built a new hash index on the
1243 page, with different parameters */
1244
1245 rw_lock_x_unlock(latch);
1246
1247 ut_free(folds);
1248 goto retry;
1249 }
1250
1251 for (i = 0; i < n_cached; i++) {
1252
1253 ha_remove_all_nodes_to_page(
1254 btr_search_sys->hash_tables[ahi_slot],
1255 folds[i], page);
1256 }
1257
1258 info = btr_search_get_info(block->index);
1259 ut_a(info->ref_count > 0);
1260 info->ref_count--;
1261
1262 block->index = NULL;
1263
1264 MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_REMOVED);
1265 MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached);
1266
1267cleanup:
1268 assert_block_ahi_valid(block);
1269 rw_lock_x_unlock(latch);
1270
1271 ut_free(folds);
1272}
1273
1274/** Drop any adaptive hash index entries that may point to an index
1275page that may be in the buffer pool, when a page is evicted from the
1276buffer pool or freed in a file segment.
1277@param[in] page_id page id */
1278void btr_search_drop_page_hash_when_freed(const page_id_t& page_id)
1279{
1280 buf_block_t* block;
1281 mtr_t mtr;
1282 dberr_t err = DB_SUCCESS;
1283
1284 ut_d(export_vars.innodb_ahi_drop_lookups++);
1285
1286 mtr_start(&mtr);
1287
1288 /* If the caller has a latch on the page, then the caller must
1289 have a x-latch on the page and it must have already dropped
1290 the hash index for the page. Because of the x-latch that we
1291 are possibly holding, we cannot s-latch the page, but must
1292 (recursively) x-latch it, even though we are only reading. */
1293
1294 block = buf_page_get_gen(page_id, univ_page_size, RW_X_LATCH, NULL,
1295 BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
1296 &mtr, &err);
1297
1298 if (block) {
1299
1300 /* If AHI is still valid, page can't be in free state.
1301 AHI is dropped when page is freed. */
1302 ut_ad(!block->page.file_page_was_freed);
1303
1304 buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
1305
1306 dict_index_t* index = block->index;
1307 if (index != NULL) {
1308 /* In all our callers, the table handle should
1309 be open, or we should be in the process of
1310 dropping the table (preventing eviction). */
1311 ut_ad(index->table->n_ref_count > 0
1312 || mutex_own(&dict_sys->mutex));
1313 btr_search_drop_page_hash_index(block);
1314 }
1315 }
1316
1317 mtr_commit(&mtr);
1318}
1319
1320/** Build a hash index on a page with the given parameters. If the page already
1321has a hash index with different parameters, the old hash index is removed.
1322If index is non-NULL, this function checks if n_fields and n_bytes are
1323sensible, and does not build a hash index if not.
1324@param[in,out] index index for which to build.
1325@param[in,out] block index page, s-/x- latched.
1326@param[in,out] ahi_latch the adaptive search latch
1327@param[in] n_fields hash this many full fields
1328@param[in] n_bytes hash this many bytes of the next field
1329@param[in] left_side hash for searches from left side */
1330static
1331void
1332btr_search_build_page_hash_index(
1333 dict_index_t* index,
1334 buf_block_t* block,
1335 rw_lock_t* ahi_latch,
1336 ulint n_fields,
1337 ulint n_bytes,
1338 ibool left_side)
1339{
1340 const rec_t* rec;
1341 const rec_t* next_rec;
1342 ulint fold;
1343 ulint next_fold;
1344 ulint n_cached;
1345 ulint n_recs;
1346 ulint* folds;
1347 const rec_t** recs;
1348 ulint i;
1349 mem_heap_t* heap = NULL;
1350 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1351 ulint* offsets = offsets_;
1352
1353#ifdef MYSQL_INDEX_DISABLE_AHI
1354 if (index->disable_ahi) return;
1355#endif
1356 if (!btr_search_enabled) {
1357 return;
1358 }
1359
1360 rec_offs_init(offsets_);
1361 ut_ad(ahi_latch == btr_get_search_latch(index));
1362 ut_ad(index);
1363 ut_ad(block->page.id.space() == index->table->space->id);
1364 ut_a(!dict_index_is_ibuf(index));
1365 ut_ad(page_is_leaf(block->frame));
1366
1367 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
1368 || rw_lock_own(&(block->lock), RW_LOCK_X));
1369
1370 rw_lock_s_lock(ahi_latch);
1371
1372 const bool rebuild = block->index
1373 && (block->curr_n_fields != n_fields
1374 || block->curr_n_bytes != n_bytes
1375 || block->curr_left_side != left_side);
1376
1377 rw_lock_s_unlock(ahi_latch);
1378
1379 if (rebuild) {
1380 btr_search_drop_page_hash_index(block);
1381 }
1382
1383 /* Check that the values for hash index build are sensible */
1384
1385 if (n_fields == 0 && n_bytes == 0) {
1386
1387 return;
1388 }
1389
1390 if (dict_index_get_n_unique_in_tree(index)
1391 < btr_search_get_n_fields(n_fields, n_bytes)) {
1392 return;
1393 }
1394
1395 page_t* page = buf_block_get_frame(block);
1396 n_recs = page_get_n_recs(page);
1397
1398 if (n_recs == 0) {
1399
1400 return;
1401 }
1402
1403 rec = page_rec_get_next_const(page_get_infimum_rec(page));
1404
1405 if (rec_is_default_row(rec, index)) {
1406 rec = page_rec_get_next_const(rec);
1407 if (!--n_recs) return;
1408 }
1409
1410 /* Calculate and cache fold values and corresponding records into
1411 an array for fast insertion to the hash index */
1412
1413 folds = static_cast<ulint*>(ut_malloc_nokey(n_recs * sizeof *folds));
1414 recs = static_cast<const rec_t**>(
1415 ut_malloc_nokey(n_recs * sizeof *recs));
1416
1417 n_cached = 0;
1418
1419 ut_a(index->id == btr_page_get_index_id(page));
1420
1421 offsets = rec_get_offsets(
1422 rec, index, offsets, true,
1423 btr_search_get_n_fields(n_fields, n_bytes),
1424 &heap);
1425 ut_ad(page_rec_is_supremum(rec)
1426 || n_fields + (n_bytes > 0) == rec_offs_n_fields(offsets));
1427
1428 fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
1429
1430 if (left_side) {
1431
1432 folds[n_cached] = fold;
1433 recs[n_cached] = rec;
1434 n_cached++;
1435 }
1436
1437 for (;;) {
1438 next_rec = page_rec_get_next_const(rec);
1439
1440 if (page_rec_is_supremum(next_rec)) {
1441
1442 if (!left_side) {
1443
1444 folds[n_cached] = fold;
1445 recs[n_cached] = rec;
1446 n_cached++;
1447 }
1448
1449 break;
1450 }
1451
1452 offsets = rec_get_offsets(
1453 next_rec, index, offsets, true,
1454 btr_search_get_n_fields(n_fields, n_bytes), &heap);
1455 next_fold = rec_fold(next_rec, offsets, n_fields,
1456 n_bytes, index->id);
1457
1458 if (fold != next_fold) {
1459 /* Insert an entry into the hash index */
1460
1461 if (left_side) {
1462
1463 folds[n_cached] = next_fold;
1464 recs[n_cached] = next_rec;
1465 n_cached++;
1466 } else {
1467 folds[n_cached] = fold;
1468 recs[n_cached] = rec;
1469 n_cached++;
1470 }
1471 }
1472
1473 rec = next_rec;
1474 fold = next_fold;
1475 }
1476
1477 btr_search_check_free_space_in_heap(index);
1478
1479 hash_table_t* table = btr_get_search_table(index);
1480 rw_lock_x_lock(ahi_latch);
1481
1482 if (!btr_search_enabled) {
1483 goto exit_func;
1484 }
1485
1486 if (block->index && ((block->curr_n_fields != n_fields)
1487 || (block->curr_n_bytes != n_bytes)
1488 || (block->curr_left_side != left_side))) {
1489 goto exit_func;
1490 }
1491
1492 /* This counter is decremented every time we drop page
1493 hash index entries and is incremented here. Since we can
1494 rebuild hash index for a page that is already hashed, we
1495 have to take care not to increment the counter in that
1496 case. */
1497 if (!block->index) {
1498 assert_block_ahi_empty(block);
1499 index->search_info->ref_count++;
1500 }
1501
1502 block->n_hash_helps = 0;
1503
1504 block->curr_n_fields = unsigned(n_fields);
1505 block->curr_n_bytes = unsigned(n_bytes);
1506 block->curr_left_side = unsigned(left_side);
1507 block->index = index;
1508
1509 for (i = 0; i < n_cached; i++) {
1510
1511 ha_insert_for_fold(table, folds[i], block, recs[i]);
1512 }
1513
1514 MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
1515 MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
1516exit_func:
1517 assert_block_ahi_valid(block);
1518 rw_lock_x_unlock(ahi_latch);
1519
1520 ut_free(folds);
1521 ut_free(recs);
1522 if (UNIV_LIKELY_NULL(heap)) {
1523 mem_heap_free(heap);
1524 }
1525}
1526
1527/** Updates the search info.
1528@param[in,out] info search info
1529@param[in,out] cursor cursor which was just positioned */
1530void
1531btr_search_info_update_slow(btr_search_t* info, btr_cur_t* cursor)
1532{
1533 rw_lock_t* ahi_latch = btr_get_search_latch(cursor->index);
1534
1535 ut_ad(!rw_lock_own(ahi_latch, RW_LOCK_S));
1536 ut_ad(!rw_lock_own(ahi_latch, RW_LOCK_X));
1537
1538 buf_block_t* block = btr_cur_get_block(cursor);
1539
1540 /* NOTE that the following two function calls do NOT protect
1541 info or block->n_fields etc. with any semaphore, to save CPU time!
1542 We cannot assume the fields are consistent when we return from
1543 those functions! */
1544
1545 btr_search_info_update_hash(info, cursor);
1546
1547 bool build_index = btr_search_update_block_hash_info(info, block);
1548
1549 if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
1550
1551 btr_search_check_free_space_in_heap(cursor->index);
1552 }
1553
1554 if (cursor->flag == BTR_CUR_HASH_FAIL) {
1555 /* Update the hash node reference, if appropriate */
1556
1557#ifdef UNIV_SEARCH_PERF_STAT
1558 btr_search_n_hash_fail++;
1559#endif /* UNIV_SEARCH_PERF_STAT */
1560
1561 rw_lock_x_lock(ahi_latch);
1562
1563 btr_search_update_hash_ref(info, block, cursor);
1564
1565 rw_lock_x_unlock(ahi_latch);
1566 }
1567
1568 if (build_index) {
1569 /* Note that since we did not protect block->n_fields etc.
1570 with any semaphore, the values can be inconsistent. We have
1571 to check inside the function call that they make sense. */
1572 btr_search_build_page_hash_index(cursor->index, block,
1573 ahi_latch,
1574 block->n_fields,
1575 block->n_bytes,
1576 block->left_side);
1577 }
1578}
1579
1580/** Move or delete hash entries for moved records, usually in a page split.
1581If new_block is already hashed, then any hash index for block is dropped.
1582If new_block is not hashed, and block is hashed, then a new hash index is
1583built to new_block with the same parameters as block.
1584@param[in,out] new_block destination page
1585@param[in,out] block source page (subject to deletion later) */
1586void
1587btr_search_move_or_delete_hash_entries(
1588 buf_block_t* new_block,
1589 buf_block_t* block)
1590{
1591 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
1592 ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_X));
1593
1594 if (!btr_search_enabled) {
1595 return;
1596 }
1597
1598 dict_index_t* index = block->index;
1599 if (!index) {
1600 index = new_block->index;
1601 } else {
1602 ut_ad(!new_block->index || index == new_block->index);
1603 }
1604 assert_block_ahi_valid(block);
1605 assert_block_ahi_valid(new_block);
1606
1607 rw_lock_t* ahi_latch = index ? btr_get_search_latch(index) : NULL;
1608
1609 if (new_block->index) {
1610 btr_search_drop_page_hash_index(block);
1611 return;
1612 }
1613
1614 if (!index) {
1615 return;
1616 }
1617
1618 rw_lock_s_lock(ahi_latch);
1619
1620 if (block->index) {
1621 ulint n_fields = block->curr_n_fields;
1622 ulint n_bytes = block->curr_n_bytes;
1623 ibool left_side = block->curr_left_side;
1624
1625 new_block->n_fields = block->curr_n_fields;
1626 new_block->n_bytes = block->curr_n_bytes;
1627 new_block->left_side = left_side;
1628
1629 rw_lock_s_unlock(ahi_latch);
1630
1631 ut_a(n_fields > 0 || n_bytes > 0);
1632
1633 btr_search_build_page_hash_index(
1634 index, new_block, ahi_latch,
1635 n_fields, n_bytes, left_side);
1636 ut_ad(n_fields == block->curr_n_fields);
1637 ut_ad(n_bytes == block->curr_n_bytes);
1638 ut_ad(left_side == block->curr_left_side);
1639 return;
1640 }
1641
1642 rw_lock_s_unlock(ahi_latch);
1643}
1644
1645/** Updates the page hash index when a single record is deleted from a page.
1646@param[in] cursor cursor which was positioned on the record to delete
1647 using btr_cur_search_, the record is not yet deleted.*/
1648void btr_search_update_hash_on_delete(btr_cur_t* cursor)
1649{
1650 hash_table_t* table;
1651 buf_block_t* block;
1652 const rec_t* rec;
1653 ulint fold;
1654 dict_index_t* index;
1655 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1656 mem_heap_t* heap = NULL;
1657 rec_offs_init(offsets_);
1658
1659 ut_ad(page_is_leaf(btr_cur_get_page(cursor)));
1660#ifdef MYSQL_INDEX_DISABLE_AHI
1661 if (cursor->index->disable_ahi) return;
1662#endif
1663
1664 if (!btr_search_enabled) {
1665 return;
1666 }
1667
1668 block = btr_cur_get_block(cursor);
1669
1670 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
1671
1672 assert_block_ahi_valid(block);
1673 index = block->index;
1674
1675 if (!index) {
1676
1677 return;
1678 }
1679
1680 ut_ad(block->page.id.space() == index->table->space->id);
1681 ut_a(index == cursor->index);
1682 ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
1683 ut_a(!dict_index_is_ibuf(index));
1684
1685 table = btr_get_search_table(index);
1686
1687 rec = btr_cur_get_rec(cursor);
1688
1689 fold = rec_fold(rec, rec_get_offsets(rec, index, offsets_, true,
1690 ULINT_UNDEFINED, &heap),
1691 block->curr_n_fields, block->curr_n_bytes, index->id);
1692 if (UNIV_LIKELY_NULL(heap)) {
1693 mem_heap_free(heap);
1694 }
1695
1696 rw_lock_t* ahi_latch = btr_get_search_latch(index);
1697
1698 rw_lock_x_lock(ahi_latch);
1699 assert_block_ahi_valid(block);
1700
1701 if (block->index) {
1702 ut_a(block->index == index);
1703
1704 if (ha_search_and_delete_if_found(table, fold, rec)) {
1705 MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_REMOVED);
1706 } else {
1707 MONITOR_INC(
1708 MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND);
1709 }
1710
1711 assert_block_ahi_valid(block);
1712 }
1713
1714 rw_lock_x_unlock(ahi_latch);
1715}
1716
1717/** Updates the page hash index when a single record is inserted on a page.
1718@param[in] cursor cursor which was positioned to the place to insert
1719 using btr_cur_search_, and the new record has been
1720 inserted next to the cursor.
1721@param[in] ahi_latch the adaptive hash index latch */
1722void
1723btr_search_update_hash_node_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch)
1724{
1725 hash_table_t* table;
1726 buf_block_t* block;
1727 dict_index_t* index;
1728 rec_t* rec;
1729
1730 ut_ad(ahi_latch == btr_get_search_latch(cursor->index));
1731 ut_ad(!btr_search_own_any(RW_LOCK_S));
1732 ut_ad(!btr_search_own_any(RW_LOCK_X));
1733#ifdef MYSQL_INDEX_DISABLE_AHI
1734 if (cursor->index->disable_ahi) return;
1735#endif
1736 if (!btr_search_enabled) {
1737 return;
1738 }
1739
1740 rec = btr_cur_get_rec(cursor);
1741
1742 block = btr_cur_get_block(cursor);
1743
1744 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
1745
1746 index = block->index;
1747
1748 if (!index) {
1749
1750 return;
1751 }
1752
1753 ut_a(cursor->index == index);
1754 ut_a(!dict_index_is_ibuf(index));
1755 rw_lock_x_lock(ahi_latch);
1756
1757 if (!block->index) {
1758
1759 goto func_exit;
1760 }
1761
1762 ut_a(block->index == index);
1763
1764 if ((cursor->flag == BTR_CUR_HASH)
1765 && (cursor->n_fields == block->curr_n_fields)
1766 && (cursor->n_bytes == block->curr_n_bytes)
1767 && !block->curr_left_side) {
1768
1769 table = btr_get_search_table(index);
1770
1771 if (ha_search_and_update_if_found(
1772 table, cursor->fold, rec, block,
1773 page_rec_get_next(rec))) {
1774 MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_UPDATED);
1775 }
1776
1777func_exit:
1778 assert_block_ahi_valid(block);
1779 rw_lock_x_unlock(ahi_latch);
1780 } else {
1781 rw_lock_x_unlock(ahi_latch);
1782
1783 btr_search_update_hash_on_insert(cursor, ahi_latch);
1784 }
1785}
1786
1787/** Updates the page hash index when a single record is inserted on a page.
1788@param[in,out] cursor cursor which was positioned to the
1789 place to insert using btr_cur_search_...,
1790 and the new record has been inserted next
1791 to the cursor
1792@param[in] ahi_latch the adaptive hash index latch */
1793void
1794btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch)
1795{
1796 hash_table_t* table;
1797 buf_block_t* block;
1798 dict_index_t* index;
1799 const rec_t* rec;
1800 const rec_t* ins_rec;
1801 const rec_t* next_rec;
1802 ulint fold;
1803 ulint ins_fold;
1804 ulint next_fold = 0; /* remove warning (??? bug ???) */
1805 ulint n_fields;
1806 ulint n_bytes;
1807 ibool left_side;
1808 bool locked = false;
1809 mem_heap_t* heap = NULL;
1810 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1811 ulint* offsets = offsets_;
1812 rec_offs_init(offsets_);
1813
1814 ut_ad(ahi_latch == btr_get_search_latch(cursor->index));
1815 ut_ad(page_is_leaf(btr_cur_get_page(cursor)));
1816 ut_ad(!btr_search_own_any(RW_LOCK_S));
1817 ut_ad(!btr_search_own_any(RW_LOCK_X));
1818#ifdef MYSQL_INDEX_DISABLE_AHI
1819 if (cursor->index->disable_ahi) return;
1820#endif
1821 if (!btr_search_enabled) {
1822 return;
1823 }
1824
1825 block = btr_cur_get_block(cursor);
1826
1827 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
1828 assert_block_ahi_valid(block);
1829
1830 index = block->index;
1831
1832 if (!index) {
1833
1834 return;
1835 }
1836
1837 ut_ad(block->page.id.space() == index->table->space->id);
1838 btr_search_check_free_space_in_heap(index);
1839
1840 table = btr_get_search_table(index);
1841
1842 rec = btr_cur_get_rec(cursor);
1843
1844#ifdef MYSQL_INDEX_DISABLE_AHI
1845 ut_a(!index->disable_ahi);
1846#endif
1847 ut_a(index == cursor->index);
1848 ut_a(!dict_index_is_ibuf(index));
1849
1850 n_fields = block->curr_n_fields;
1851 n_bytes = block->curr_n_bytes;
1852 left_side = block->curr_left_side;
1853
1854 ins_rec = page_rec_get_next_const(rec);
1855 next_rec = page_rec_get_next_const(ins_rec);
1856
1857 offsets = rec_get_offsets(ins_rec, index, offsets, true,
1858 ULINT_UNDEFINED, &heap);
1859 ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id);
1860
1861 if (!page_rec_is_supremum(next_rec)) {
1862 offsets = rec_get_offsets(
1863 next_rec, index, offsets, true,
1864 btr_search_get_n_fields(n_fields, n_bytes), &heap);
1865 next_fold = rec_fold(next_rec, offsets, n_fields,
1866 n_bytes, index->id);
1867 }
1868
1869 if (!page_rec_is_infimum(rec) && !rec_is_default_row(rec, index)) {
1870 offsets = rec_get_offsets(
1871 rec, index, offsets, true,
1872 btr_search_get_n_fields(n_fields, n_bytes), &heap);
1873 fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
1874 } else {
1875 if (left_side) {
1876 locked = true;
1877 rw_lock_x_lock(ahi_latch);
1878
1879 if (!btr_search_enabled) {
1880 goto function_exit;
1881 }
1882
1883 ha_insert_for_fold(table, ins_fold, block, ins_rec);
1884 }
1885
1886 goto check_next_rec;
1887 }
1888
1889 if (fold != ins_fold) {
1890
1891 if (!locked) {
1892 locked = true;
1893 rw_lock_x_lock(ahi_latch);
1894
1895 if (!btr_search_enabled) {
1896 goto function_exit;
1897 }
1898 }
1899
1900 if (!left_side) {
1901 ha_insert_for_fold(table, fold, block, rec);
1902 } else {
1903 ha_insert_for_fold(table, ins_fold, block, ins_rec);
1904 }
1905 }
1906
1907check_next_rec:
1908 if (page_rec_is_supremum(next_rec)) {
1909
1910 if (!left_side) {
1911 if (!locked) {
1912 locked = true;
1913 rw_lock_x_lock(ahi_latch);
1914
1915 if (!btr_search_enabled) {
1916 goto function_exit;
1917 }
1918 }
1919
1920 ha_insert_for_fold(table, ins_fold, block, ins_rec);
1921 }
1922
1923 goto function_exit;
1924 }
1925
1926 if (ins_fold != next_fold) {
1927
1928 if (!locked) {
1929 locked = true;
1930 rw_lock_x_lock(ahi_latch);
1931
1932 if (!btr_search_enabled) {
1933 goto function_exit;
1934 }
1935 }
1936
1937 if (!left_side) {
1938 ha_insert_for_fold(table, ins_fold, block, ins_rec);
1939 } else {
1940 ha_insert_for_fold(table, next_fold, block, next_rec);
1941 }
1942 }
1943
1944function_exit:
1945 if (UNIV_LIKELY_NULL(heap)) {
1946 mem_heap_free(heap);
1947 }
1948 if (locked) {
1949 rw_lock_x_unlock(ahi_latch);
1950 }
1951 ut_ad(!rw_lock_own(ahi_latch, RW_LOCK_X));
1952}
1953
1954#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1955
1956/** Validates the search system for given hash table.
1957@param[in] hash_table_id hash table to validate
1958@return TRUE if ok */
1959static
1960ibool
1961btr_search_hash_table_validate(ulint hash_table_id)
1962{
1963 ha_node_t* node;
1964 ibool ok = TRUE;
1965 ulint i;
1966 ulint cell_count;
1967 mem_heap_t* heap = NULL;
1968 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1969 ulint* offsets = offsets_;
1970
1971 if (!btr_search_enabled) {
1972 return(TRUE);
1973 }
1974
1975 /* How many cells to check before temporarily releasing
1976 search latches. */
1977 ulint chunk_size = 10000;
1978
1979 rec_offs_init(offsets_);
1980
1981 btr_search_x_lock_all();
1982 buf_pool_mutex_enter_all();
1983
1984 cell_count = hash_get_n_cells(
1985 btr_search_sys->hash_tables[hash_table_id]);
1986
1987 for (i = 0; i < cell_count; i++) {
1988 /* We release search latches every once in a while to
1989 give other queries a chance to run. */
1990 if ((i != 0) && ((i % chunk_size) == 0)) {
1991
1992 buf_pool_mutex_exit_all();
1993 btr_search_x_unlock_all();
1994
1995 os_thread_yield();
1996
1997 btr_search_x_lock_all();
1998 buf_pool_mutex_enter_all();
1999
2000 ulint curr_cell_count = hash_get_n_cells(
2001 btr_search_sys->hash_tables[hash_table_id]);
2002
2003 if (cell_count != curr_cell_count) {
2004
2005 cell_count = curr_cell_count;
2006
2007 if (i >= cell_count) {
2008 break;
2009 }
2010 }
2011 }
2012
2013 node = (ha_node_t*) hash_get_nth_cell(
2014 btr_search_sys->hash_tables[hash_table_id], i)->node;
2015
2016 for (; node != NULL; node = node->next) {
2017 const buf_block_t* block
2018 = buf_block_from_ahi((byte*) node->data);
2019 const buf_block_t* hash_block;
2020 buf_pool_t* buf_pool;
2021 index_id_t page_index_id;
2022
2023 buf_pool = buf_pool_from_bpage((buf_page_t*) block);
2024
2025 if (UNIV_LIKELY(buf_block_get_state(block)
2026 == BUF_BLOCK_FILE_PAGE)) {
2027
2028 /* The space and offset are only valid
2029 for file blocks. It is possible that
2030 the block is being freed
2031 (BUF_BLOCK_REMOVE_HASH, see the
2032 assertion and the comment below) */
2033 hash_block = buf_block_hash_get(
2034 buf_pool,
2035 block->page.id);
2036 } else {
2037 hash_block = NULL;
2038 }
2039
2040 if (hash_block) {
2041 ut_a(hash_block == block);
2042 } else {
2043 /* When a block is being freed,
2044 buf_LRU_search_and_free_block() first
2045 removes the block from
2046 buf_pool->page_hash by calling
2047 buf_LRU_block_remove_hashed_page().
2048 After that, it invokes
2049 btr_search_drop_page_hash_index() to
2050 remove the block from
2051 btr_search_sys->hash_tables[i]. */
2052
2053 ut_a(buf_block_get_state(block)
2054 == BUF_BLOCK_REMOVE_HASH);
2055 }
2056
2057 ut_a(!dict_index_is_ibuf(block->index));
2058 ut_ad(block->page.id.space()
2059 == block->index->table->space->id);
2060
2061 page_index_id = btr_page_get_index_id(block->frame);
2062
2063 offsets = rec_get_offsets(
2064 node->data, block->index, offsets, true,
2065 btr_search_get_n_fields(block->curr_n_fields,
2066 block->curr_n_bytes),
2067 &heap);
2068
2069 const ulint fold = rec_fold(
2070 node->data, offsets,
2071 block->curr_n_fields,
2072 block->curr_n_bytes,
2073 page_index_id);
2074
2075 if (node->fold != fold) {
2076 const page_t* page = block->frame;
2077
2078 ok = FALSE;
2079
2080 ib::error() << "Error in an adaptive hash"
2081 << " index pointer to page "
2082 << page_id_t(page_get_space_id(page),
2083 page_get_page_no(page))
2084 << ", ptr mem address "
2085 << reinterpret_cast<const void*>(
2086 node->data)
2087 << ", index id " << page_index_id
2088 << ", node fold " << node->fold
2089 << ", rec fold " << fold;
2090
2091 fputs("InnoDB: Record ", stderr);
2092 rec_print_new(stderr, node->data, offsets);
2093 fprintf(stderr, "\nInnoDB: on that page."
2094 " Page mem address %p, is hashed %p,"
2095 " n fields %lu\n"
2096 "InnoDB: side %lu\n",
2097 (void*) page, (void*) block->index,
2098 (ulong) block->curr_n_fields,
2099 (ulong) block->curr_left_side);
2100 ut_ad(0);
2101 }
2102 }
2103 }
2104
2105 for (i = 0; i < cell_count; i += chunk_size) {
2106 /* We release search latches every once in a while to
2107 give other queries a chance to run. */
2108 if (i != 0) {
2109
2110 buf_pool_mutex_exit_all();
2111 btr_search_x_unlock_all();
2112
2113 os_thread_yield();
2114
2115 btr_search_x_lock_all();
2116 buf_pool_mutex_enter_all();
2117
2118 ulint curr_cell_count = hash_get_n_cells(
2119 btr_search_sys->hash_tables[hash_table_id]);
2120
2121 if (cell_count != curr_cell_count) {
2122
2123 cell_count = curr_cell_count;
2124
2125 if (i >= cell_count) {
2126 break;
2127 }
2128 }
2129 }
2130
2131 ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
2132
2133 if (!ha_validate(btr_search_sys->hash_tables[hash_table_id],
2134 i, end_index)) {
2135 ok = FALSE;
2136 }
2137 }
2138
2139 buf_pool_mutex_exit_all();
2140 btr_search_x_unlock_all();
2141
2142 if (UNIV_LIKELY_NULL(heap)) {
2143 mem_heap_free(heap);
2144 }
2145
2146 return(ok);
2147}
2148
2149/** Validate the search system.
2150@return true if ok. */
2151bool
2152btr_search_validate()
2153{
2154 for (ulint i = 0; i < btr_ahi_parts; ++i) {
2155 if (!btr_search_hash_table_validate(i)) {
2156 return(false);
2157 }
2158 }
2159
2160 return(true);
2161}
2162
2163#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
2164#endif /* BTR_CUR_HASH_ADAPT */
2165