1/*****************************************************************************
2
3Copyright (c) 2009, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file dict/dict0stats.cc
22Code used for calculating and manipulating table statistics.
23
24Created Jan 06, 2010 Vasil Dimov
25*******************************************************/
26
27#include "univ.i"
28
29#include "ut0ut.h"
30#include "ut0rnd.h"
31#include "dyn0buf.h"
32#include "row0sel.h"
33#include "trx0trx.h"
34#include "pars0pars.h"
35#include "dict0stats.h"
36#include "ha_prototypes.h"
37#include "ut0new.h"
38#include <mysql_com.h>
39#include "btr0btr.h"
40
41#include <algorithm>
42#include <map>
43#include <vector>
44
45/* Sampling algorithm description @{
46
47The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
48let it be A, which is the number of leaf pages to analyze for a given index
49for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
50analyzed).
51
52Let the total number of leaf pages in the table be T.
53Level 0 - leaf pages, level H - root.
54
55Definition: N-prefix-boring record is a record on a non-leaf page that equals
56the next (to the right, cross page boundaries, skipping the supremum and
57infimum) record on the same level when looking at the fist n-prefix columns.
58The last (user) record on a level is not boring (it does not match the
59non-existent user record to the right). We call the records boring because all
60the records on the page below a boring record are equal to that boring record.
61
62We avoid diving below boring records when searching for a leaf page to
63estimate the number of distinct records because we know that such a leaf
64page will have number of distinct records == 1.
65
66For each n-prefix: start from the root level and full scan subsequent lower
67levels until a level that contains at least A*10 distinct records is found.
68Lets call this level LA.
69As an optimization the search is canceled if it has reached level 1 (never
70descend to the level 0 (leaf)) and also if the next level to be scanned
71would contain more than A pages. The latter is because the user has asked
72to analyze A leaf pages and it does not make sense to scan much more than
73A non-leaf pages with the sole purpose of finding a good sample of A leaf
74pages.
75
76After finding the appropriate level LA with >A*10 distinct records (or less in
77the exceptions described above), divide it into groups of equal records and
78pick A such groups. Then pick the last record from each group. For example,
79let the level be:
80
81index: 0,1,2,3,4,5,6,7,8,9,10
82record: 1,1,1,2,2,7,7,7,7,7,9
83
84There are 4 groups of distinct records and if A=2 random ones are selected,
85e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
86
87After selecting A records as described above, dive below them to find A leaf
88pages and analyze them, finding the total number of distinct records. The
89dive to the leaf level is performed by selecting a non-boring record from
90each page and diving below it.
91
92This way, a total of A leaf pages are analyzed for the given n-prefix.
93
94Let the number of different key values found in each leaf page i be Pi (i=1..A).
95Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
96Let the number of different key values on level LA be N_DIFF_LA.
97Let the total number of records on level LA be TOTAL_LA.
98Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
99leaf level.
100Let the number of leaf pages be N.
101Then the total number of different key values on the leaf level is:
102N * R * N_DIFF_AVG_LEAF.
103See REF01 for the implementation.
104
105The above describes how to calculate the cardinality of an index.
106This algorithm is executed for each n-prefix of a multi-column index
107where n=1..n_uniq.
108@} */
109
110/* names of the tables from the persistent statistics storage */
111#define TABLE_STATS_NAME "mysql/innodb_table_stats"
112#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
113#define INDEX_STATS_NAME "mysql/innodb_index_stats"
114#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
115
116#ifdef UNIV_STATS_DEBUG
117#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
118#else /* UNIV_STATS_DEBUG */
119#define DEBUG_PRINTF(fmt, ...) /* noop */
120#endif /* UNIV_STATS_DEBUG */
121
122/* Gets the number of leaf pages to sample in persistent stats estimation */
123#define N_SAMPLE_PAGES(index) \
124 static_cast<ib_uint64_t>( \
125 (index)->table->stats_sample_pages != 0 \
126 ? (index)->table->stats_sample_pages \
127 : srv_stats_persistent_sample_pages)
128
129/* number of distinct records on a given level that are required to stop
130descending to lower levels and fetch N_SAMPLE_PAGES(index) records
131from that level */
132#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
133
134/* A dynamic array where we store the boundaries of each distinct group
135of keys. For example if a btree level is:
136index: 0,1,2,3,4,5,6,7,8,9,10,11,12
137data: b,b,b,b,b,b,g,g,j,j,j, x, y
138then we would store 5,7,10,11,12 in the array. */
139typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
140
141/** Allocator type used for index_map_t. */
142typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
143 index_map_t_allocator;
144
145/** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
146typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
147 index_map_t_allocator> index_map_t;
148
149/*********************************************************************//**
150Checks whether an index should be ignored in stats manipulations:
151* stats fetch
152* stats recalc
153* stats save
154@return true if exists and all tables are ok */
155UNIV_INLINE
156bool
157dict_stats_should_ignore_index(
158/*===========================*/
159 const dict_index_t* index) /*!< in: index */
160{
161 return((index->type & (DICT_FTS | DICT_SPATIAL))
162 || index->is_corrupted()
163 || index->to_be_dropped
164 || !index->is_committed());
165}
166
167/*********************************************************************//**
168Checks whether the persistent statistics storage exists and that all
169tables have the proper structure.
170@return true if exists and all tables are ok */
171static
172bool
173dict_stats_persistent_storage_check(
174/*================================*/
175 bool caller_has_dict_sys_mutex) /*!< in: true if the caller
176 owns dict_sys->mutex */
177{
178 /* definition for the table TABLE_STATS_NAME */
179 dict_col_meta_t table_stats_columns[] = {
180 {"database_name", DATA_VARMYSQL,
181 DATA_NOT_NULL, 192},
182
183 {"table_name", DATA_VARMYSQL,
184 DATA_NOT_NULL, 192},
185
186 {"last_update", DATA_FIXBINARY,
187 DATA_NOT_NULL, 4},
188
189 {"n_rows", DATA_INT,
190 DATA_NOT_NULL | DATA_UNSIGNED, 8},
191
192 {"clustered_index_size", DATA_INT,
193 DATA_NOT_NULL | DATA_UNSIGNED, 8},
194
195 {"sum_of_other_index_sizes", DATA_INT,
196 DATA_NOT_NULL | DATA_UNSIGNED, 8}
197 };
198 dict_table_schema_t table_stats_schema = {
199 TABLE_STATS_NAME,
200 UT_ARR_SIZE(table_stats_columns),
201 table_stats_columns,
202 0 /* n_foreign */,
203 0 /* n_referenced */
204 };
205
206 /* definition for the table INDEX_STATS_NAME */
207 dict_col_meta_t index_stats_columns[] = {
208 {"database_name", DATA_VARMYSQL,
209 DATA_NOT_NULL, 192},
210
211 {"table_name", DATA_VARMYSQL,
212 DATA_NOT_NULL, 192},
213
214 {"index_name", DATA_VARMYSQL,
215 DATA_NOT_NULL, 192},
216
217 {"last_update", DATA_FIXBINARY,
218 DATA_NOT_NULL, 4},
219
220 {"stat_name", DATA_VARMYSQL,
221 DATA_NOT_NULL, 64*3},
222
223 {"stat_value", DATA_INT,
224 DATA_NOT_NULL | DATA_UNSIGNED, 8},
225
226 {"sample_size", DATA_INT,
227 DATA_UNSIGNED, 8},
228
229 {"stat_description", DATA_VARMYSQL,
230 DATA_NOT_NULL, 1024*3}
231 };
232 dict_table_schema_t index_stats_schema = {
233 INDEX_STATS_NAME,
234 UT_ARR_SIZE(index_stats_columns),
235 index_stats_columns,
236 0 /* n_foreign */,
237 0 /* n_referenced */
238 };
239
240 char errstr[512];
241 dberr_t ret;
242
243 if (!caller_has_dict_sys_mutex) {
244 mutex_enter(&dict_sys->mutex);
245 }
246
247 ut_ad(mutex_own(&dict_sys->mutex));
248
249 /* first check table_stats */
250 ret = dict_table_schema_check(&table_stats_schema, errstr,
251 sizeof(errstr));
252 if (ret == DB_SUCCESS) {
253 /* if it is ok, then check index_stats */
254 ret = dict_table_schema_check(&index_stats_schema, errstr,
255 sizeof(errstr));
256 }
257
258 if (!caller_has_dict_sys_mutex) {
259 mutex_exit(&dict_sys->mutex);
260 }
261
262 if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
263 ib::error() << errstr;
264 return(false);
265 } else if (ret == DB_STATS_DO_NOT_EXIST) {
266 return false;
267 }
268 /* else */
269
270 return(true);
271}
272
273/** Executes a given SQL statement using the InnoDB internal SQL parser.
274This function will free the pinfo object.
275@param[in,out] pinfo pinfo to pass to que_eval_sql() must already
276have any literals bound to it
277@param[in] sql SQL string to execute
278@param[in,out] trx in case of NULL the function will allocate and
279free the trx object. If it is not NULL then it will be rolled back
280only in the case of error, but not freed.
281@return DB_SUCCESS or error code */
282static
283dberr_t
284dict_stats_exec_sql(
285 pars_info_t* pinfo,
286 const char* sql,
287 trx_t* trx)
288{
289 dberr_t err;
290 bool trx_started = false;
291
292 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
293 ut_ad(mutex_own(&dict_sys->mutex));
294
295 if (!dict_stats_persistent_storage_check(true)) {
296 pars_info_free(pinfo);
297 return(DB_STATS_DO_NOT_EXIST);
298 }
299
300 if (trx == NULL) {
301 trx = trx_create();
302 trx_started = true;
303
304 if (srv_read_only_mode) {
305 trx_start_internal_read_only(trx);
306 } else {
307 trx_start_internal(trx);
308 }
309 }
310
311 err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
312
313 DBUG_EXECUTE_IF("stats_index_error",
314 if (!trx_started) {
315 err = DB_STATS_DO_NOT_EXIST;
316 trx->error_state = DB_STATS_DO_NOT_EXIST;
317 });
318
319 if (!trx_started && err == DB_SUCCESS) {
320 return(DB_SUCCESS);
321 }
322
323 if (err == DB_SUCCESS) {
324 trx_commit_for_mysql(trx);
325 } else {
326 trx->op_info = "rollback of internal trx on stats tables";
327 trx->dict_operation_lock_mode = RW_X_LATCH;
328 trx_rollback_to_savepoint(trx, NULL);
329 trx->dict_operation_lock_mode = 0;
330 trx->op_info = "";
331 ut_a(trx->error_state == DB_SUCCESS);
332 }
333
334 if (trx_started) {
335 trx_free(trx);
336 }
337
338 return(err);
339}
340
341/*********************************************************************//**
342Duplicate a table object and its indexes.
343This function creates a dummy dict_table_t object and initializes the
344following table and index members:
345dict_table_t::id (copied)
346dict_table_t::heap (newly created)
347dict_table_t::name (copied)
348dict_table_t::corrupted (copied)
349dict_table_t::indexes<> (newly created)
350dict_table_t::magic_n
351for each entry in dict_table_t::indexes, the following are initialized:
352(indexes that have DICT_FTS set in index->type are skipped)
353dict_index_t::id (copied)
354dict_index_t::name (copied)
355dict_index_t::table_name (points to the copied table name)
356dict_index_t::table (points to the above semi-initialized object)
357dict_index_t::type (copied)
358dict_index_t::to_be_dropped (copied)
359dict_index_t::online_status (copied)
360dict_index_t::n_uniq (copied)
361dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
362dict_index_t::indexes<> (newly created)
363dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
364dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
365dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
366dict_index_t::magic_n
367The returned object should be freed with dict_stats_table_clone_free()
368when no longer needed.
369@return incomplete table object */
370static
371dict_table_t*
372dict_stats_table_clone_create(
373/*==========================*/
374 const dict_table_t* table) /*!< in: table whose stats to copy */
375{
376 size_t heap_size;
377 dict_index_t* index;
378
379 /* Estimate the size needed for the table and all of its indexes */
380
381 heap_size = 0;
382 heap_size += sizeof(dict_table_t);
383 heap_size += strlen(table->name.m_name) + 1;
384
385 for (index = dict_table_get_first_index(table);
386 index != NULL;
387 index = dict_table_get_next_index(index)) {
388
389 if (dict_stats_should_ignore_index(index)) {
390 continue;
391 }
392
393 ut_ad(!dict_index_is_ibuf(index));
394
395 ulint n_uniq = dict_index_get_n_unique(index);
396
397 heap_size += sizeof(dict_index_t);
398 heap_size += strlen(index->name) + 1;
399 heap_size += n_uniq * sizeof(index->fields[0]);
400 for (ulint i = 0; i < n_uniq; i++) {
401 heap_size += strlen(index->fields[i].name) + 1;
402 }
403 heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
404 heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
405 heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
406 }
407
408 /* Allocate the memory and copy the members */
409
410 mem_heap_t* heap;
411
412 heap = mem_heap_create(heap_size);
413
414 dict_table_t* t;
415
416 t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
417
418 UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
419 t->id = table->id;
420
421 t->heap = heap;
422
423 t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
424
425 t->corrupted = table->corrupted;
426
427 /* This private object "t" is not shared with other threads, so
428 we do not need the stats_latch (thus we pass false below). The
429 dict_table_stats_lock()/unlock() routines will do nothing. */
430 dict_table_stats_latch_create(t, false);
431
432 UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
433
434 for (index = dict_table_get_first_index(table);
435 index != NULL;
436 index = dict_table_get_next_index(index)) {
437
438 if (dict_stats_should_ignore_index(index)) {
439 continue;
440 }
441
442 ut_ad(!dict_index_is_ibuf(index));
443
444 dict_index_t* idx;
445
446 idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
447
448 UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
449 idx->id = index->id;
450
451 idx->name = mem_heap_strdup(heap, index->name);
452
453 idx->table = t;
454
455 idx->type = index->type;
456
457 idx->to_be_dropped = 0;
458
459 idx->online_status = ONLINE_INDEX_COMPLETE;
460 idx->set_committed(true);
461
462 idx->n_uniq = index->n_uniq;
463
464 idx->fields = (dict_field_t*) mem_heap_alloc(
465 heap, idx->n_uniq * sizeof(idx->fields[0]));
466
467 for (ulint i = 0; i < idx->n_uniq; i++) {
468 idx->fields[i].name = mem_heap_strdup(
469 heap, index->fields[i].name);
470 }
471
472 /* hook idx into t->indexes */
473 UT_LIST_ADD_LAST(t->indexes, idx);
474
475 idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
476 heap,
477 idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
478
479 idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
480 heap,
481 idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
482
483 idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
484 heap,
485 idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
486 ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
487
488 idx->stat_defrag_n_page_split = 0;
489 idx->stat_defrag_n_pages_freed = 0;
490 }
491
492 ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
493
494 return(t);
495}
496
497/*********************************************************************//**
498Free the resources occupied by an object returned by
499dict_stats_table_clone_create(). */
500static
501void
502dict_stats_table_clone_free(
503/*========================*/
504 dict_table_t* t) /*!< in: dummy table object to free */
505{
506 dict_table_stats_latch_destroy(t);
507 mem_heap_free(t->heap);
508}
509
510/*********************************************************************//**
511Write all zeros (or 1 where it makes sense) into an index
512statistics members. The resulting stats correspond to an empty index.
513The caller must own index's table stats latch in X mode
514(dict_table_stats_lock(table, RW_X_LATCH)) */
515static
516void
517dict_stats_empty_index(
518/*===================*/
519 dict_index_t* index, /*!< in/out: index */
520 bool empty_defrag_stats)
521 /*!< in: whether to empty defrag stats */
522{
523 ut_ad(!(index->type & DICT_FTS));
524 ut_ad(!dict_index_is_ibuf(index));
525
526 ulint n_uniq = index->n_uniq;
527
528 for (ulint i = 0; i < n_uniq; i++) {
529 index->stat_n_diff_key_vals[i] = 0;
530 index->stat_n_sample_sizes[i] = 1;
531 index->stat_n_non_null_key_vals[i] = 0;
532 }
533
534 index->stat_index_size = 1;
535 index->stat_n_leaf_pages = 1;
536
537 if (empty_defrag_stats) {
538 dict_stats_empty_defrag_stats(index);
539 dict_stats_empty_defrag_summary(index);
540 }
541}
542
543/*********************************************************************//**
544Write all zeros (or 1 where it makes sense) into a table and its indexes'
545statistics members. The resulting stats correspond to an empty table. */
546static
547void
548dict_stats_empty_table(
549/*===================*/
550 dict_table_t* table, /*!< in/out: table */
551 bool empty_defrag_stats)
552 /*!< in: whether to empty defrag stats */
553{
554 /* Zero the stats members */
555
556 dict_table_stats_lock(table, RW_X_LATCH);
557
558 table->stat_n_rows = 0;
559 table->stat_clustered_index_size = 1;
560 /* 1 page for each index, not counting the clustered */
561 table->stat_sum_of_other_index_sizes
562 = UT_LIST_GET_LEN(table->indexes) - 1;
563 table->stat_modified_counter = 0;
564
565 dict_index_t* index;
566
567 for (index = dict_table_get_first_index(table);
568 index != NULL;
569 index = dict_table_get_next_index(index)) {
570
571 if (index->type & DICT_FTS) {
572 continue;
573 }
574
575 ut_ad(!dict_index_is_ibuf(index));
576
577 dict_stats_empty_index(index, empty_defrag_stats);
578 }
579
580 table->stat_initialized = TRUE;
581
582 dict_table_stats_unlock(table, RW_X_LATCH);
583}
584
585/*********************************************************************//**
586Check whether index's stats are initialized (assert if they are not). */
587static
588void
589dict_stats_assert_initialized_index(
590/*================================*/
591 const dict_index_t* index) /*!< in: index */
592{
593 UNIV_MEM_ASSERT_RW_ABORT(
594 index->stat_n_diff_key_vals,
595 index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
596
597 UNIV_MEM_ASSERT_RW_ABORT(
598 index->stat_n_sample_sizes,
599 index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
600
601 UNIV_MEM_ASSERT_RW_ABORT(
602 index->stat_n_non_null_key_vals,
603 index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
604
605 UNIV_MEM_ASSERT_RW_ABORT(
606 &index->stat_index_size,
607 sizeof(index->stat_index_size));
608
609 UNIV_MEM_ASSERT_RW_ABORT(
610 &index->stat_n_leaf_pages,
611 sizeof(index->stat_n_leaf_pages));
612}
613
614/*********************************************************************//**
615Check whether table's stats are initialized (assert if they are not). */
616static
617void
618dict_stats_assert_initialized(
619/*==========================*/
620 const dict_table_t* table) /*!< in: table */
621{
622 ut_a(table->stat_initialized);
623
624 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
625 sizeof(table->stats_last_recalc));
626
627 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
628 sizeof(table->stat_persistent));
629
630 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
631 sizeof(table->stats_auto_recalc));
632
633 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
634 sizeof(table->stats_sample_pages));
635
636 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
637 sizeof(table->stat_n_rows));
638
639 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
640 sizeof(table->stat_clustered_index_size));
641
642 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
643 sizeof(table->stat_sum_of_other_index_sizes));
644
645 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
646 sizeof(table->stat_modified_counter));
647
648 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
649 sizeof(table->stats_bg_flag));
650
651 for (dict_index_t* index = dict_table_get_first_index(table);
652 index != NULL;
653 index = dict_table_get_next_index(index)) {
654
655 if (!dict_stats_should_ignore_index(index)) {
656 dict_stats_assert_initialized_index(index);
657 }
658 }
659}
660
661#define INDEX_EQ(i1, i2) \
662 ((i1) != NULL \
663 && (i2) != NULL \
664 && (i1)->id == (i2)->id \
665 && strcmp((i1)->name, (i2)->name) == 0)
666
667/*********************************************************************//**
668Copy table and index statistics from one table to another, including index
669stats. Extra indexes in src are ignored and extra indexes in dst are
670initialized to correspond to an empty index. */
671static
672void
673dict_stats_copy(
674/*============*/
675 dict_table_t* dst, /*!< in/out: destination table */
676 const dict_table_t* src, /*!< in: source table */
677 bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
678 to have the same statistics as if
679 the table was empty */
680{
681 dst->stats_last_recalc = src->stats_last_recalc;
682 dst->stat_n_rows = src->stat_n_rows;
683 dst->stat_clustered_index_size = src->stat_clustered_index_size;
684 dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
685 dst->stat_modified_counter = src->stat_modified_counter;
686
687 dict_index_t* dst_idx;
688 dict_index_t* src_idx;
689
690 for (dst_idx = dict_table_get_first_index(dst),
691 src_idx = dict_table_get_first_index(src);
692 dst_idx != NULL;
693 dst_idx = dict_table_get_next_index(dst_idx),
694 (src_idx != NULL
695 && (src_idx = dict_table_get_next_index(src_idx)))) {
696
697 if (dict_stats_should_ignore_index(dst_idx)) {
698 if (reset_ignored_indexes) {
699 /* Reset index statistics for all ignored indexes,
700 unless they are FT indexes (these have no statistics)*/
701 if (dst_idx->type & DICT_FTS) {
702 continue;
703 }
704 dict_stats_empty_index(dst_idx, true);
705 } else {
706 continue;
707 }
708 }
709
710 ut_ad(!dict_index_is_ibuf(dst_idx));
711
712 if (!INDEX_EQ(src_idx, dst_idx)) {
713 for (src_idx = dict_table_get_first_index(src);
714 src_idx != NULL;
715 src_idx = dict_table_get_next_index(src_idx)) {
716
717 if (INDEX_EQ(src_idx, dst_idx)) {
718 break;
719 }
720 }
721 }
722
723 if (!INDEX_EQ(src_idx, dst_idx)) {
724 dict_stats_empty_index(dst_idx, true);
725 continue;
726 }
727
728 ulint n_copy_el;
729
730 if (dst_idx->n_uniq > src_idx->n_uniq) {
731 n_copy_el = src_idx->n_uniq;
732 /* Since src is smaller some elements in dst
733 will remain untouched by the following memmove(),
734 thus we init all of them here. */
735 dict_stats_empty_index(dst_idx, true);
736 } else {
737 n_copy_el = dst_idx->n_uniq;
738 }
739
740 memmove(dst_idx->stat_n_diff_key_vals,
741 src_idx->stat_n_diff_key_vals,
742 n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
743
744 memmove(dst_idx->stat_n_sample_sizes,
745 src_idx->stat_n_sample_sizes,
746 n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
747
748 memmove(dst_idx->stat_n_non_null_key_vals,
749 src_idx->stat_n_non_null_key_vals,
750 n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
751
752 dst_idx->stat_index_size = src_idx->stat_index_size;
753
754 dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
755
756 dst_idx->stat_defrag_modified_counter =
757 src_idx->stat_defrag_modified_counter;
758 dst_idx->stat_defrag_n_pages_freed =
759 src_idx->stat_defrag_n_pages_freed;
760 dst_idx->stat_defrag_n_page_split =
761 src_idx->stat_defrag_n_page_split;
762 }
763
764 dst->stat_initialized = TRUE;
765}
766
767/** Duplicate the stats of a table and its indexes.
768This function creates a dummy dict_table_t object and copies the input
769table's stats into it. The returned table object is not in the dictionary
770cache and cannot be accessed by any other threads. In addition to the
771members copied in dict_stats_table_clone_create() this function initializes
772the following:
773dict_table_t::stat_initialized
774dict_table_t::stat_persistent
775dict_table_t::stat_n_rows
776dict_table_t::stat_clustered_index_size
777dict_table_t::stat_sum_of_other_index_sizes
778dict_table_t::stat_modified_counter
779dict_index_t::stat_n_diff_key_vals[]
780dict_index_t::stat_n_sample_sizes[]
781dict_index_t::stat_n_non_null_key_vals[]
782dict_index_t::stat_index_size
783dict_index_t::stat_n_leaf_pages
784dict_index_t::stat_defrag_modified_counter
785dict_index_t::stat_defrag_n_pages_freed
786dict_index_t::stat_defrag_n_page_split
787The returned object should be freed with dict_stats_snapshot_free()
788when no longer needed.
789@param[in] table table whose stats to copy
790@return incomplete table object */
791static
792dict_table_t*
793dict_stats_snapshot_create(
794 dict_table_t* table)
795{
796 mutex_enter(&dict_sys->mutex);
797
798 dict_table_stats_lock(table, RW_S_LATCH);
799
800 dict_stats_assert_initialized(table);
801
802 dict_table_t* t;
803
804 t = dict_stats_table_clone_create(table);
805
806 dict_stats_copy(t, table, false);
807
808 t->stat_persistent = table->stat_persistent;
809 t->stats_auto_recalc = table->stats_auto_recalc;
810 t->stats_sample_pages = table->stats_sample_pages;
811 t->stats_bg_flag = table->stats_bg_flag;
812
813 dict_table_stats_unlock(table, RW_S_LATCH);
814
815 mutex_exit(&dict_sys->mutex);
816
817 return(t);
818}
819
820/*********************************************************************//**
821Free the resources occupied by an object returned by
822dict_stats_snapshot_create(). */
823static
824void
825dict_stats_snapshot_free(
826/*=====================*/
827 dict_table_t* t) /*!< in: dummy table object to free */
828{
829 dict_stats_table_clone_free(t);
830}
831
832/*********************************************************************//**
833Calculates new estimates for index statistics. This function is
834relatively quick and is used to calculate transient statistics that
835are not saved on disk. This was the only way to calculate statistics
836before the Persistent Statistics feature was introduced.
837This function doesn't update the defragmentation related stats.
838Only persistent statistics supports defragmentation stats. */
839static
840void
841dict_stats_update_transient_for_index(
842/*==================================*/
843 dict_index_t* index) /*!< in/out: index */
844{
845 if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
846 && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
847 || !dict_index_is_clust(index))) {
848 /* If we have set a high innodb_force_recovery
849 level, do not calculate statistics, as a badly
850 corrupted index can cause a crash in it.
851 Initialize some bogus index cardinality
852 statistics, so that the data can be queried in
853 various means, also via secondary indexes. */
854 dict_stats_empty_index(index, false);
855#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
856 } else if (ibuf_debug && !dict_index_is_clust(index)) {
857 dict_stats_empty_index(index, false);
858#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
859 } else {
860 mtr_t mtr;
861 ulint size;
862
863 mtr_start(&mtr);
864
865 mtr_s_lock(dict_index_get_lock(index), &mtr);
866
867 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
868
869 if (size != ULINT_UNDEFINED) {
870 index->stat_index_size = size;
871
872 size = btr_get_size(
873 index, BTR_N_LEAF_PAGES, &mtr);
874 }
875
876 mtr_commit(&mtr);
877
878 switch (size) {
879 case ULINT_UNDEFINED:
880 dict_stats_empty_index(index, false);
881 return;
882 case 0:
883 /* The root node of the tree is a leaf */
884 size = 1;
885 }
886
887 index->stat_n_leaf_pages = size;
888
889 /* Do not continue if table decryption has failed or
890 table is already marked as corrupted. */
891 if (index->is_readable()) {
892 /* We don't handle the return value since it
893 will be false only when some thread is
894 dropping the table and we don't have to empty
895 the statistics of the to be dropped index */
896 btr_estimate_number_of_different_key_vals(index);
897 }
898 }
899}
900
901/*********************************************************************//**
902Calculates new estimates for table and index statistics. This function
903is relatively quick and is used to calculate transient statistics that
904are not saved on disk.
905This was the only way to calculate statistics before the
906Persistent Statistics feature was introduced. */
907static
908void
909dict_stats_update_transient(
910/*========================*/
911 dict_table_t* table) /*!< in/out: table */
912{
913 dict_index_t* index;
914 ulint sum_of_index_sizes = 0;
915
916 /* Find out the sizes of the indexes and how many different values
917 for the key they approximately have */
918
919 index = dict_table_get_first_index(table);
920
921 if (!table->space) {
922 /* Nothing to do. */
923 dict_stats_empty_table(table, true);
924 return;
925 } else if (index == NULL) {
926 /* Table definition is corrupt */
927
928 ib::warn() << "Table " << table->name
929 << " has no indexes. Cannot calculate statistics.";
930 dict_stats_empty_table(table, true);
931 return;
932 }
933
934 for (; index != NULL; index = dict_table_get_next_index(index)) {
935
936 ut_ad(!dict_index_is_ibuf(index));
937
938 if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
939 continue;
940 }
941
942 dict_stats_empty_index(index, false);
943
944 if (dict_stats_should_ignore_index(index)) {
945 continue;
946 }
947
948 /* Do not continue if table decryption has failed or
949 table is already marked as corrupted. */
950 if (!index->is_readable()) {
951 break;
952 }
953
954 dict_stats_update_transient_for_index(index);
955
956 sum_of_index_sizes += index->stat_index_size;
957 }
958
959 index = dict_table_get_first_index(table);
960
961 table->stat_n_rows = index->stat_n_diff_key_vals[
962 dict_index_get_n_unique(index) - 1];
963
964 table->stat_clustered_index_size = index->stat_index_size;
965
966 table->stat_sum_of_other_index_sizes = sum_of_index_sizes
967 - index->stat_index_size;
968
969 table->stats_last_recalc = ut_time();
970
971 table->stat_modified_counter = 0;
972
973 table->stat_initialized = TRUE;
974}
975
976/* @{ Pseudo code about the relation between the following functions
977
978let N = N_SAMPLE_PAGES(index)
979
980dict_stats_analyze_index()
981 for each n_prefix
982 search for good enough level:
983 dict_stats_analyze_index_level() // only called if level has <= N pages
984 // full scan of the level in one mtr
985 collect statistics about the given level
986 if we are not satisfied with the level, search next lower level
987 we have found a good enough level here
988 dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
989 // full scan of the level in one mtr
990 dive below some records and analyze the leaf page there:
991 dict_stats_analyze_index_below_cur()
992@} */
993
994/*********************************************************************//**
995Find the total number and the number of distinct keys on a given level in
996an index. Each of the 1..n_uniq prefixes are looked up and the results are
997saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
998records on the level is saved in total_recs.
999Also, the index of the last record in each group of equal records is saved
1000in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
1001record on the level and continues cross pages boundaries, counting from 0. */
1002static
1003void
1004dict_stats_analyze_index_level(
1005/*===========================*/
1006 dict_index_t* index, /*!< in: index */
1007 ulint level, /*!< in: level */
1008 ib_uint64_t* n_diff, /*!< out: array for number of
1009 distinct keys for all prefixes */
1010 ib_uint64_t* total_recs, /*!< out: total number of records */
1011 ib_uint64_t* total_pages, /*!< out: total number of pages */
1012 boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
1013 of distinct keys */
1014 mtr_t* mtr) /*!< in/out: mini-transaction */
1015{
1016 ulint n_uniq;
1017 mem_heap_t* heap;
1018 btr_pcur_t pcur;
1019 const page_t* page;
1020 const rec_t* rec;
1021 const rec_t* prev_rec;
1022 bool prev_rec_is_copied;
1023 byte* prev_rec_buf = NULL;
1024 ulint prev_rec_buf_size = 0;
1025 ulint* rec_offsets;
1026 ulint* prev_rec_offsets;
1027 ulint i;
1028
1029 DEBUG_PRINTF(" %s(table=%s, index=%s, level=" ULINTPF ")\n",
1030 __func__, index->table->name, index->name, level);
1031
1032 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1033 MTR_MEMO_SX_LOCK));
1034
1035 n_uniq = dict_index_get_n_unique(index);
1036
1037 /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1038 memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1039
1040 /* Allocate space for the offsets header (the allocation size at
1041 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
1042 so that this will never be less than the size calculated in
1043 rec_get_offsets_func(). */
1044 i = (REC_OFFS_HEADER_SIZE + 1 + 1) + n_uniq;
1045
1046 heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1047 rec_offsets = static_cast<ulint*>(
1048 mem_heap_alloc(heap, i * sizeof *rec_offsets));
1049 prev_rec_offsets = static_cast<ulint*>(
1050 mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1051 rec_offs_set_n_alloc(rec_offsets, i);
1052 rec_offs_set_n_alloc(prev_rec_offsets, i);
1053
1054 /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1055 if (n_diff_boundaries != NULL) {
1056 for (i = 0; i < n_uniq; i++) {
1057 n_diff_boundaries[i].erase(
1058 n_diff_boundaries[i].begin(),
1059 n_diff_boundaries[i].end());
1060 }
1061 }
1062
1063 /* Position pcur on the leftmost record on the leftmost page
1064 on the desired level. */
1065
1066 btr_pcur_open_at_index_side(
1067 true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1068 &pcur, true, level, mtr);
1069 btr_pcur_move_to_next_on_page(&pcur);
1070
1071 page = btr_pcur_get_page(&pcur);
1072
1073 /* The page must not be empty, except when
1074 it is the root page (and the whole index is empty). */
1075 ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1076 ut_ad(btr_pcur_get_rec(&pcur)
1077 == page_rec_get_next_const(page_get_infimum_rec(page)));
1078
1079 /* check that we are indeed on the desired level */
1080 ut_a(btr_page_get_level(page) == level);
1081
1082 /* there should not be any pages on the left */
1083 ut_a(!page_has_prev(page));
1084
1085 if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1086 btr_pcur_get_rec(&pcur), page_is_comp(page))) {
1087 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1088 if (level == 0) {
1089 /* Skip the 'default row' pseudo-record */
1090 ut_ad(index->is_instant());
1091 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1092 }
1093 } else {
1094 /* The first record on the leftmost page must be
1095 marked as such on each level except the leaf level. */
1096 ut_a(level == 0);
1097 }
1098
1099 prev_rec = NULL;
1100 prev_rec_is_copied = false;
1101
1102 /* no records by default */
1103 *total_recs = 0;
1104
1105 *total_pages = 0;
1106
1107 /* iterate over all user records on this level
1108 and compare each two adjacent ones, even the last on page
1109 X and the fist on page X+1 */
1110 for (;
1111 btr_pcur_is_on_user_rec(&pcur);
1112 btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1113
1114 bool rec_is_last_on_page;
1115
1116 rec = btr_pcur_get_rec(&pcur);
1117
1118 /* If rec and prev_rec are on different pages, then prev_rec
1119 must have been copied, because we hold latch only on the page
1120 where rec resides. */
1121 if (prev_rec != NULL
1122 && page_align(rec) != page_align(prev_rec)) {
1123
1124 ut_a(prev_rec_is_copied);
1125 }
1126
1127 rec_is_last_on_page =
1128 page_rec_is_supremum(page_rec_get_next_const(rec));
1129
1130 /* increment the pages counter at the end of each page */
1131 if (rec_is_last_on_page) {
1132
1133 (*total_pages)++;
1134 }
1135
1136 /* Skip delete-marked records on the leaf level. If we
1137 do not skip them, then ANALYZE quickly after DELETE
1138 could count them or not (purge may have already wiped
1139 them away) which brings non-determinism. We skip only
1140 leaf-level delete marks because delete marks on
1141 non-leaf level do not make sense. */
1142
1143 if (level == 0
1144 && !srv_stats_include_delete_marked
1145 && rec_get_deleted_flag(
1146 rec,
1147 page_is_comp(btr_pcur_get_page(&pcur)))) {
1148
1149 if (rec_is_last_on_page
1150 && !prev_rec_is_copied
1151 && prev_rec != NULL) {
1152 /* copy prev_rec */
1153
1154 prev_rec_offsets = rec_get_offsets(
1155 prev_rec, index, prev_rec_offsets,
1156 true,
1157 n_uniq, &heap);
1158
1159 prev_rec = rec_copy_prefix_to_buf(
1160 prev_rec, index, n_uniq,
1161 &prev_rec_buf, &prev_rec_buf_size);
1162
1163 prev_rec_is_copied = true;
1164 }
1165
1166 continue;
1167 }
1168 rec_offsets = rec_get_offsets(
1169 rec, index, rec_offsets, !level, n_uniq, &heap);
1170
1171 (*total_recs)++;
1172
1173 if (prev_rec != NULL) {
1174 ulint matched_fields;
1175
1176 prev_rec_offsets = rec_get_offsets(
1177 prev_rec, index, prev_rec_offsets, !level,
1178 n_uniq, &heap);
1179
1180 cmp_rec_rec_with_match(rec,
1181 prev_rec,
1182 rec_offsets,
1183 prev_rec_offsets,
1184 index,
1185 FALSE,
1186 &matched_fields);
1187
1188 for (i = matched_fields; i < n_uniq; i++) {
1189
1190 if (n_diff_boundaries != NULL) {
1191 /* push the index of the previous
1192 record, that is - the last one from
1193 a group of equal keys */
1194
1195 ib_uint64_t idx;
1196
1197 /* the index of the current record
1198 is total_recs - 1, the index of the
1199 previous record is total_recs - 2;
1200 we know that idx is not going to
1201 become negative here because if we
1202 are in this branch then there is a
1203 previous record and thus
1204 total_recs >= 2 */
1205 idx = *total_recs - 2;
1206
1207 n_diff_boundaries[i].push_back(idx);
1208 }
1209
1210 /* increment the number of different keys
1211 for n_prefix=i+1 (e.g. if i=0 then we increment
1212 for n_prefix=1 which is stored in n_diff[0]) */
1213 n_diff[i]++;
1214 }
1215 } else {
1216 /* this is the first non-delete marked record */
1217 for (i = 0; i < n_uniq; i++) {
1218 n_diff[i] = 1;
1219 }
1220 }
1221
1222 if (rec_is_last_on_page) {
1223 /* end of a page has been reached */
1224
1225 /* we need to copy the record instead of assigning
1226 like prev_rec = rec; because when we traverse the
1227 records on this level at some point we will jump from
1228 one page to the next and then rec and prev_rec will
1229 be on different pages and
1230 btr_pcur_move_to_next_user_rec() will release the
1231 latch on the page that prev_rec is on */
1232 prev_rec = rec_copy_prefix_to_buf(
1233 rec, index, n_uniq,
1234 &prev_rec_buf, &prev_rec_buf_size);
1235 prev_rec_is_copied = true;
1236
1237 } else {
1238 /* still on the same page, the next call to
1239 btr_pcur_move_to_next_user_rec() will not jump
1240 on the next page, we can simply assign pointers
1241 instead of copying the records like above */
1242
1243 prev_rec = rec;
1244 prev_rec_is_copied = false;
1245 }
1246 }
1247
1248 /* if *total_pages is left untouched then the above loop was not
1249 entered at all and there is one page in the whole tree which is
1250 empty or the loop was entered but this is level 0, contains one page
1251 and all records are delete-marked */
1252 if (*total_pages == 0) {
1253
1254 ut_ad(level == 0);
1255 ut_ad(*total_recs == 0);
1256
1257 *total_pages = 1;
1258 }
1259
1260 /* if there are records on this level and boundaries
1261 should be saved */
1262 if (*total_recs > 0 && n_diff_boundaries != NULL) {
1263
1264 /* remember the index of the last record on the level as the
1265 last one from the last group of equal keys; this holds for
1266 all possible prefixes */
1267 for (i = 0; i < n_uniq; i++) {
1268 ib_uint64_t idx;
1269
1270 idx = *total_recs - 1;
1271
1272 n_diff_boundaries[i].push_back(idx);
1273 }
1274 }
1275
1276 /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1277 for i=0..n_uniq-1 */
1278
1279#ifdef UNIV_STATS_DEBUG
1280 for (i = 0; i < n_uniq; i++) {
1281
1282 DEBUG_PRINTF(" %s(): total recs: " UINT64PF
1283 ", total pages: " UINT64PF
1284 ", n_diff[" ULINTPF "]: " UINT64PF "\n",
1285 __func__, *total_recs,
1286 *total_pages,
1287 i, n_diff[i]);
1288
1289#if 0
1290 if (n_diff_boundaries != NULL) {
1291 ib_uint64_t j;
1292
1293 DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
1294 __func__, i);
1295
1296 for (j = 0; j < n_diff[i]; j++) {
1297 ib_uint64_t idx;
1298
1299 idx = n_diff_boundaries[i][j];
1300
1301 DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1302 j, idx);
1303 }
1304 DEBUG_PRINTF("\n");
1305 }
1306#endif
1307 }
1308#endif /* UNIV_STATS_DEBUG */
1309
1310 /* Release the latch on the last page, because that is not done by
1311 btr_pcur_close(). This function works also for non-leaf pages. */
1312 btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1313
1314 btr_pcur_close(&pcur);
1315 ut_free(prev_rec_buf);
1316 mem_heap_free(heap);
1317}
1318
1319/** Scan a page, reading records from left to right and counting the number
1320of distinct records (looking only at the first n_prefix
1321columns) and the number of external pages pointed by records from this page.
1322If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1323will return as soon as it finds a record that does not match its neighbor
1324to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1325returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1326equal) or 2 (the function found a non-boring record and returned).
1327@param[out] out_rec record, or NULL
1328@param[out] offsets1 rec_get_offsets() working space (must
1329be big enough)
1330@param[out] offsets2 rec_get_offsets() working space (must
1331be big enough)
1332@param[in] index index of the page
1333@param[in] page the page to scan
1334@param[in] n_prefix look at the first n_prefix columns
1335@param[in] is_leaf whether this is the leaf page
1336@param[out] n_diff number of distinct records encountered
1337@param[out] n_external_pages if this is non-NULL then it will be set
1338to the number of externally stored pages which were encountered
1339@return offsets1 or offsets2 (the offsets of *out_rec),
1340or NULL if the page is empty and does not contain user records. */
1341UNIV_INLINE
1342ulint*
1343dict_stats_scan_page(
1344 const rec_t** out_rec,
1345 ulint* offsets1,
1346 ulint* offsets2,
1347 const dict_index_t* index,
1348 const page_t* page,
1349 ulint n_prefix,
1350 bool is_leaf,
1351 ib_uint64_t* n_diff,
1352 ib_uint64_t* n_external_pages)
1353{
1354 ulint* offsets_rec = offsets1;
1355 ulint* offsets_next_rec = offsets2;
1356 const rec_t* rec;
1357 const rec_t* next_rec;
1358 /* A dummy heap, to be passed to rec_get_offsets().
1359 Because offsets1,offsets2 should be big enough,
1360 this memory heap should never be used. */
1361 mem_heap_t* heap = NULL;
1362 ut_ad(is_leaf == page_is_leaf(page));
1363 const rec_t* (*get_next)(const rec_t*)
1364 = !is_leaf || srv_stats_include_delete_marked
1365 ? page_rec_get_next_const
1366 : page_rec_get_next_non_del_marked;
1367
1368 const bool should_count_external_pages = n_external_pages != NULL;
1369
1370 if (should_count_external_pages) {
1371 *n_external_pages = 0;
1372 }
1373
1374 rec = get_next(page_get_infimum_rec(page));
1375
1376 if (page_rec_is_supremum(rec)) {
1377 /* the page is empty or contains only delete-marked records */
1378 *n_diff = 0;
1379 *out_rec = NULL;
1380 return(NULL);
1381 }
1382
1383 offsets_rec = rec_get_offsets(rec, index, offsets_rec, is_leaf,
1384 ULINT_UNDEFINED, &heap);
1385
1386 if (should_count_external_pages) {
1387 *n_external_pages += btr_rec_get_externally_stored_len(
1388 rec, offsets_rec);
1389 }
1390
1391 next_rec = get_next(rec);
1392
1393 *n_diff = 1;
1394
1395 while (!page_rec_is_supremum(next_rec)) {
1396
1397 ulint matched_fields;
1398
1399 offsets_next_rec = rec_get_offsets(next_rec, index,
1400 offsets_next_rec, is_leaf,
1401 ULINT_UNDEFINED,
1402 &heap);
1403
1404 /* check whether rec != next_rec when looking at
1405 the first n_prefix fields */
1406 cmp_rec_rec_with_match(rec, next_rec,
1407 offsets_rec, offsets_next_rec,
1408 index, FALSE, &matched_fields);
1409
1410 if (matched_fields < n_prefix) {
1411 /* rec != next_rec, => rec is non-boring */
1412
1413 (*n_diff)++;
1414
1415 if (!is_leaf) {
1416 break;
1417 }
1418 }
1419
1420 rec = next_rec;
1421 {
1422 /* Assign offsets_rec = offsets_next_rec
1423 so that offsets_rec matches with rec which
1424 was just assigned rec = next_rec above.
1425 Also need to point offsets_next_rec to the
1426 place where offsets_rec was pointing before
1427 because we have just 2 placeholders where
1428 data is actually stored:
1429 offsets1 and offsets2 and we
1430 are using them in circular fashion
1431 (offsets[_next]_rec are just pointers to
1432 those placeholders). */
1433 ulint* offsets_tmp;
1434 offsets_tmp = offsets_rec;
1435 offsets_rec = offsets_next_rec;
1436 offsets_next_rec = offsets_tmp;
1437 }
1438
1439 if (should_count_external_pages) {
1440 *n_external_pages += btr_rec_get_externally_stored_len(
1441 rec, offsets_rec);
1442 }
1443
1444 next_rec = get_next(next_rec);
1445 }
1446
1447 /* offsets1,offsets2 should have been big enough */
1448 ut_a(heap == NULL);
1449 *out_rec = rec;
1450 return(offsets_rec);
1451}
1452
1453/** Dive below the current position of a cursor and calculate the number of
1454distinct records on the leaf page, when looking at the fist n_prefix
1455columns. Also calculate the number of external pages pointed by records
1456on the leaf page.
1457@param[in] cur cursor
1458@param[in] n_prefix look at the first n_prefix columns
1459when comparing records
1460@param[out] n_diff number of distinct records
1461@param[out] n_external_pages number of external pages
1462@return number of distinct records on the leaf page */
1463static
1464void
1465dict_stats_analyze_index_below_cur(
1466 const btr_cur_t* cur,
1467 ulint n_prefix,
1468 ib_uint64_t* n_diff,
1469 ib_uint64_t* n_external_pages)
1470{
1471 dict_index_t* index;
1472 buf_block_t* block;
1473 const page_t* page;
1474 mem_heap_t* heap;
1475 const rec_t* rec;
1476 ulint* offsets1;
1477 ulint* offsets2;
1478 ulint* offsets_rec;
1479 ulint size;
1480 mtr_t mtr;
1481
1482 index = btr_cur_get_index(cur);
1483
1484 /* Allocate offsets for the record and the node pointer, for
1485 node pointer records. In a secondary index, the node pointer
1486 record will consist of all index fields followed by a child
1487 page number.
1488 Allocate space for the offsets header (the allocation size at
1489 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1490 so that this will never be less than the size calculated in
1491 rec_get_offsets_func(). */
1492 size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1493
1494 heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1495
1496 offsets1 = static_cast<ulint*>(mem_heap_alloc(
1497 heap, size * sizeof *offsets1));
1498
1499 offsets2 = static_cast<ulint*>(mem_heap_alloc(
1500 heap, size * sizeof *offsets2));
1501
1502 rec_offs_set_n_alloc(offsets1, size);
1503 rec_offs_set_n_alloc(offsets2, size);
1504
1505 rec = btr_cur_get_rec(cur);
1506 ut_ad(!page_rec_is_leaf(rec));
1507
1508 offsets_rec = rec_get_offsets(rec, index, offsets1, false,
1509 ULINT_UNDEFINED, &heap);
1510
1511 page_id_t page_id(index->table->space->id,
1512 btr_node_ptr_get_child_page_no(
1513 rec, offsets_rec));
1514 const page_size_t page_size(index->table->space->flags);
1515
1516 /* assume no external pages by default - in case we quit from this
1517 function without analyzing any leaf pages */
1518 *n_external_pages = 0;
1519
1520 mtr_start(&mtr);
1521
1522 /* descend to the leaf level on the B-tree */
1523 for (;;) {
1524
1525 dberr_t err = DB_SUCCESS;
1526
1527 block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
1528 NULL /* no guessed block */,
1529 BUF_GET, __FILE__, __LINE__, &mtr, &err);
1530
1531 page = buf_block_get_frame(block);
1532
1533 if (page_is_leaf(page)) {
1534 /* leaf level */
1535 break;
1536 }
1537 /* else */
1538
1539 /* search for the first non-boring record on the page */
1540 offsets_rec = dict_stats_scan_page(
1541 &rec, offsets1, offsets2, index, page, n_prefix,
1542 false, n_diff, NULL);
1543
1544 /* pages on level > 0 are not allowed to be empty */
1545 ut_a(offsets_rec != NULL);
1546 /* if page is not empty (offsets_rec != NULL) then n_diff must
1547 be > 0, otherwise there is a bug in dict_stats_scan_page() */
1548 ut_a(*n_diff > 0);
1549
1550 if (*n_diff == 1) {
1551 mtr_commit(&mtr);
1552
1553 /* page has all keys equal and the end of the page
1554 was reached by dict_stats_scan_page(), no need to
1555 descend to the leaf level */
1556 mem_heap_free(heap);
1557 /* can't get an estimate for n_external_pages here
1558 because we do not dive to the leaf level, assume no
1559 external pages (*n_external_pages was assigned to 0
1560 above). */
1561 return;
1562 }
1563 /* else */
1564
1565 /* when we instruct dict_stats_scan_page() to quit on the
1566 first non-boring record it finds, then the returned n_diff
1567 can either be 0 (empty page), 1 (page has all keys equal) or
1568 2 (non-boring record was found) */
1569 ut_a(*n_diff == 2);
1570
1571 /* we have a non-boring record in rec, descend below it */
1572
1573 page_id.set_page_no(
1574 btr_node_ptr_get_child_page_no(rec, offsets_rec));
1575 }
1576
1577 /* make sure we got a leaf page as a result from the above loop */
1578 ut_ad(page_is_leaf(page));
1579
1580 /* scan the leaf page and find the number of distinct keys,
1581 when looking only at the first n_prefix columns; also estimate
1582 the number of externally stored pages pointed by records on this
1583 page */
1584
1585 offsets_rec = dict_stats_scan_page(
1586 &rec, offsets1, offsets2, index, page, n_prefix,
1587 true, n_diff,
1588 n_external_pages);
1589
1590#if 0
1591 DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1592 __func__, page_no, n_diff);
1593#endif
1594
1595 mtr_commit(&mtr);
1596 mem_heap_free(heap);
1597}
1598
1599/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1600for each n-columns prefix (n from 1 to n_uniq). */
1601struct n_diff_data_t {
1602 /** Index of the level on which the descent through the btree
1603 stopped. level 0 is the leaf level. This is >= 1 because we
1604 avoid scanning the leaf level because it may contain too many
1605 pages and doing so is useless when combined with the random dives -
1606 if we are to scan the leaf level, this means a full scan and we can
1607 simply do that instead of fiddling with picking random records higher
1608 in the tree and to dive below them. At the start of the analyzing
1609 we may decide to do full scan of the leaf level, but then this
1610 structure is not used in that code path. */
1611 ulint level;
1612
1613 /** Number of records on the level where the descend through the btree
1614 stopped. When we scan the btree from the root, we stop at some mid
1615 level, choose some records from it and dive below them towards a leaf
1616 page to analyze. */
1617 ib_uint64_t n_recs_on_level;
1618
1619 /** Number of different key values that were found on the mid level. */
1620 ib_uint64_t n_diff_on_level;
1621
1622 /** Number of leaf pages that are analyzed. This is also the same as
1623 the number of records that we pick from the mid level and dive below
1624 them. */
1625 ib_uint64_t n_leaf_pages_to_analyze;
1626
1627 /** Cumulative sum of the number of different key values that were
1628 found on all analyzed pages. */
1629 ib_uint64_t n_diff_all_analyzed_pages;
1630
1631 /** Cumulative sum of the number of external pages (stored outside of
1632 the btree but in the same file segment). */
1633 ib_uint64_t n_external_pages_sum;
1634};
1635
1636/** Estimate the number of different key values in an index when looking at
1637the first n_prefix columns. For a given level in an index select
1638n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1639them to the corresponding leaf pages, then scan those leaf pages and save the
1640sampling results in n_diff_data->n_diff_all_analyzed_pages.
1641@param[in] index index
1642@param[in] n_prefix look at first 'n_prefix' columns when
1643comparing records
1644@param[in] boundaries a vector that contains
1645n_diff_data->n_diff_on_level integers each of which represents the index (on
1646level 'level', counting from left/smallest to right/biggest from 0) of the
1647last record from each group of distinct keys
1648@param[in,out] n_diff_data n_diff_all_analyzed_pages and
1649n_external_pages_sum in this structure will be set by this function. The
1650members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1651caller in advance - they are used by some calculations inside this function
1652@param[in,out] mtr mini-transaction */
1653static
1654void
1655dict_stats_analyze_index_for_n_prefix(
1656 dict_index_t* index,
1657 ulint n_prefix,
1658 const boundaries_t* boundaries,
1659 n_diff_data_t* n_diff_data,
1660 mtr_t* mtr)
1661{
1662 btr_pcur_t pcur;
1663 const page_t* page;
1664 ib_uint64_t rec_idx;
1665 ib_uint64_t i;
1666
1667#if 0
1668 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1669 " n_diff_on_level=" UINT64PF ")\n",
1670 __func__, index->table->name, index->name, level,
1671 n_prefix, n_diff_data->n_diff_on_level);
1672#endif
1673
1674 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1675 MTR_MEMO_SX_LOCK));
1676
1677 /* Position pcur on the leftmost record on the leftmost page
1678 on the desired level. */
1679
1680 btr_pcur_open_at_index_side(
1681 true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1682 &pcur, true, n_diff_data->level, mtr);
1683 btr_pcur_move_to_next_on_page(&pcur);
1684
1685 page = btr_pcur_get_page(&pcur);
1686
1687 const rec_t* first_rec = btr_pcur_get_rec(&pcur);
1688
1689 /* We shouldn't be scanning the leaf level. The caller of this function
1690 should have stopped the descend on level 1 or higher. */
1691 ut_ad(n_diff_data->level > 0);
1692 ut_ad(!page_is_leaf(page));
1693
1694 /* The page must not be empty, except when
1695 it is the root page (and the whole index is empty). */
1696 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1697 ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1698
1699 /* check that we are indeed on the desired level */
1700 ut_a(btr_page_get_level(page) == n_diff_data->level);
1701
1702 /* there should not be any pages on the left */
1703 ut_a(!page_has_prev(page));
1704
1705 /* check whether the first record on the leftmost page is marked
1706 as such; we are on a non-leaf level */
1707 ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1708 & REC_INFO_MIN_REC_FLAG);
1709
1710 const ib_uint64_t last_idx_on_level = boundaries->at(
1711 static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1712
1713 rec_idx = 0;
1714
1715 n_diff_data->n_diff_all_analyzed_pages = 0;
1716 n_diff_data->n_external_pages_sum = 0;
1717
1718 for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1719 /* there are n_diff_on_level elements
1720 in 'boundaries' and we divide those elements
1721 into n_leaf_pages_to_analyze segments, for example:
1722
1723 let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1724 segment i=0: [0, 24]
1725 segment i=1: [25, 49]
1726 segment i=2: [50, 74]
1727 segment i=3: [75, 99] or
1728
1729 let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1730 segment i=0: [0, 0] or
1731
1732 let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1733 segment i=0: [0, 0]
1734 segment i=1: [1, 1] or
1735
1736 let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1737 segment i=0: [0, 0]
1738 segment i=1: [1, 2]
1739 segment i=2: [3, 4]
1740 segment i=3: [5, 6]
1741 segment i=4: [7, 8]
1742 segment i=5: [9, 10]
1743 segment i=6: [11, 12]
1744
1745 then we select a random record from each segment and dive
1746 below it */
1747 const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
1748 const ib_uint64_t n_pick
1749 = n_diff_data->n_leaf_pages_to_analyze;
1750
1751 const ib_uint64_t left = n_diff * i / n_pick;
1752 const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
1753
1754 ut_a(left <= right);
1755 ut_a(right <= last_idx_on_level);
1756
1757 const ulint rnd = right == left ? 0 :
1758 ut_rnd_gen_ulint() % (right - left);
1759
1760 const ib_uint64_t dive_below_idx
1761 = boundaries->at(static_cast<unsigned>(left + rnd));
1762
1763#if 0
1764 DEBUG_PRINTF(" %s(): dive below record with index="
1765 UINT64PF "\n", __func__, dive_below_idx);
1766#endif
1767
1768 /* seek to the record with index dive_below_idx */
1769 while (rec_idx < dive_below_idx
1770 && btr_pcur_is_on_user_rec(&pcur)) {
1771
1772 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1773 rec_idx++;
1774 }
1775
1776 /* if the level has finished before the record we are
1777 searching for, this means that the B-tree has changed in
1778 the meantime, quit our sampling and use whatever stats
1779 we have collected so far */
1780 if (rec_idx < dive_below_idx) {
1781
1782 ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1783 break;
1784 }
1785
1786 /* it could be that the tree has changed in such a way that
1787 the record under dive_below_idx is the supremum record, in
1788 this case rec_idx == dive_below_idx and pcur is positioned
1789 on the supremum, we do not want to dive below it */
1790 if (!btr_pcur_is_on_user_rec(&pcur)) {
1791 break;
1792 }
1793
1794 ut_a(rec_idx == dive_below_idx);
1795
1796 ib_uint64_t n_diff_on_leaf_page;
1797 ib_uint64_t n_external_pages;
1798
1799 dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1800 n_prefix,
1801 &n_diff_on_leaf_page,
1802 &n_external_pages);
1803
1804 /* We adjust n_diff_on_leaf_page here to avoid counting
1805 one value twice - once as the last on some page and once
1806 as the first on another page. Consider the following example:
1807 Leaf level:
1808 page: (2,2,2,2,3,3)
1809 ... many pages like (3,3,3,3,3,3) ...
1810 page: (3,3,3,3,5,5)
1811 ... many pages like (5,5,5,5,5,5) ...
1812 page: (5,5,5,5,8,8)
1813 page: (8,8,8,8,9,9)
1814 our algo would (correctly) get an estimate that there are
1815 2 distinct records per page (average). Having 4 pages below
1816 non-boring records, it would (wrongly) estimate the number
1817 of distinct records to 8. */
1818 if (n_diff_on_leaf_page > 0) {
1819 n_diff_on_leaf_page--;
1820 }
1821
1822 n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1823
1824 n_diff_data->n_external_pages_sum += n_external_pages;
1825 }
1826
1827 btr_pcur_close(&pcur);
1828}
1829
1830/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1831@param[in] n_diff_data input data to use to derive the results
1832@param[in,out] index index whose stat_n_diff_key_vals[] to set */
1833UNIV_INLINE
1834void
1835dict_stats_index_set_n_diff(
1836 const n_diff_data_t* n_diff_data,
1837 dict_index_t* index)
1838{
1839 for (ulint n_prefix = dict_index_get_n_unique(index);
1840 n_prefix >= 1;
1841 n_prefix--) {
1842 /* n_diff_all_analyzed_pages can be 0 here if
1843 all the leaf pages sampled contained only
1844 delete-marked records. In this case we should assign
1845 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1846 the formula below does. */
1847
1848 const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
1849
1850 ut_ad(data->n_leaf_pages_to_analyze > 0);
1851 ut_ad(data->n_recs_on_level > 0);
1852
1853 ib_uint64_t n_ordinary_leaf_pages;
1854
1855 if (data->level == 1) {
1856 /* If we know the number of records on level 1, then
1857 this number is the same as the number of pages on
1858 level 0 (leaf). */
1859 n_ordinary_leaf_pages = data->n_recs_on_level;
1860 } else {
1861 /* If we analyzed D ordinary leaf pages and found E
1862 external pages in total linked from those D ordinary
1863 leaf pages, then this means that the ratio
1864 ordinary/external is D/E. Then the ratio ordinary/total
1865 is D / (D + E). Knowing that the total number of pages
1866 is T (including ordinary and external) then we estimate
1867 that the total number of ordinary leaf pages is
1868 T * D / (D + E). */
1869 n_ordinary_leaf_pages
1870 = index->stat_n_leaf_pages
1871 * data->n_leaf_pages_to_analyze
1872 / (data->n_leaf_pages_to_analyze
1873 + data->n_external_pages_sum);
1874 }
1875
1876 /* See REF01 for an explanation of the algorithm */
1877 index->stat_n_diff_key_vals[n_prefix - 1]
1878 = n_ordinary_leaf_pages
1879
1880 * data->n_diff_on_level
1881 / data->n_recs_on_level
1882
1883 * data->n_diff_all_analyzed_pages
1884 / data->n_leaf_pages_to_analyze;
1885
1886 index->stat_n_sample_sizes[n_prefix - 1]
1887 = data->n_leaf_pages_to_analyze;
1888
1889 DEBUG_PRINTF(" %s(): n_diff=" UINT64PF
1890 " for n_prefix=" ULINTPF
1891 " (" ULINTPF
1892 " * " UINT64PF " / " UINT64PF
1893 " * " UINT64PF " / " UINT64PF ")\n",
1894 __func__,
1895 index->stat_n_diff_key_vals[n_prefix - 1],
1896 n_prefix,
1897 index->stat_n_leaf_pages,
1898 data->n_diff_on_level,
1899 data->n_recs_on_level,
1900 data->n_diff_all_analyzed_pages,
1901 data->n_leaf_pages_to_analyze);
1902 }
1903}
1904
1905/*********************************************************************//**
1906Calculates new statistics for a given index and saves them to the index
1907members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1908stat_n_leaf_pages. This function could be slow. */
1909static
1910void
1911dict_stats_analyze_index(
1912/*=====================*/
1913 dict_index_t* index) /*!< in/out: index to analyze */
1914{
1915 ulint root_level;
1916 ulint level;
1917 bool level_is_analyzed;
1918 ulint n_uniq;
1919 ulint n_prefix;
1920 ib_uint64_t total_recs;
1921 ib_uint64_t total_pages;
1922 mtr_t mtr;
1923 ulint size;
1924 DBUG_ENTER("dict_stats_analyze_index");
1925
1926 DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1927 dict_index_get_online_status(index)));
1928
1929 /* Disable update statistic for Rtree */
1930 if (dict_index_is_spatial(index)) {
1931 DBUG_VOID_RETURN;
1932 }
1933
1934 DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
1935
1936 dict_stats_empty_index(index, false);
1937
1938 mtr_start(&mtr);
1939
1940 mtr_s_lock(dict_index_get_lock(index), &mtr);
1941
1942 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1943
1944 if (size != ULINT_UNDEFINED) {
1945 index->stat_index_size = size;
1946 size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1947 }
1948
1949 /* Release the X locks on the root page taken by btr_get_size() */
1950 mtr_commit(&mtr);
1951
1952 switch (size) {
1953 case ULINT_UNDEFINED:
1954 dict_stats_assert_initialized_index(index);
1955 DBUG_VOID_RETURN;
1956 case 0:
1957 /* The root node of the tree is a leaf */
1958 size = 1;
1959 }
1960
1961 index->stat_n_leaf_pages = size;
1962
1963 mtr_start(&mtr);
1964
1965 mtr_sx_lock(dict_index_get_lock(index), &mtr);
1966
1967 root_level = btr_height_get(index, &mtr);
1968
1969 n_uniq = dict_index_get_n_unique(index);
1970
1971 /* If the tree has just one level (and one page) or if the user
1972 has requested to sample too many pages then do full scan.
1973
1974 For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1975 will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1976 pages will be sampled. If that number is bigger than the total
1977 number of leaf pages then do full scan of the leaf level instead
1978 since it will be faster and will give better results. */
1979
1980 if (root_level == 0
1981 || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1982
1983 if (root_level == 0) {
1984 DEBUG_PRINTF(" %s(): just one page,"
1985 " doing full scan\n", __func__);
1986 } else {
1987 DEBUG_PRINTF(" %s(): too many pages requested for"
1988 " sampling, doing full scan\n", __func__);
1989 }
1990
1991 /* do full scan of level 0; save results directly
1992 into the index */
1993
1994 dict_stats_analyze_index_level(index,
1995 0 /* leaf level */,
1996 index->stat_n_diff_key_vals,
1997 &total_recs,
1998 &total_pages,
1999 NULL /* boundaries not needed */,
2000 &mtr);
2001
2002 for (ulint i = 0; i < n_uniq; i++) {
2003 index->stat_n_sample_sizes[i] = total_pages;
2004 }
2005
2006 mtr_commit(&mtr);
2007
2008 dict_stats_assert_initialized_index(index);
2009 DBUG_VOID_RETURN;
2010 }
2011
2012 /* For each level that is being scanned in the btree, this contains the
2013 number of different key values for all possible n-column prefixes. */
2014 ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
2015 ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2016
2017 /* For each level that is being scanned in the btree, this contains the
2018 index of the last record from each group of equal records (when
2019 comparing only the first n columns, n=1..n_uniq). */
2020 boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2021 n_uniq);
2022
2023 /* For each n-column prefix this array contains the input data that is
2024 used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2025 n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2026
2027 /* total_recs is also used to estimate the number of pages on one
2028 level below, so at the start we have 1 page (the root) */
2029 total_recs = 1;
2030
2031 /* Here we use the following optimization:
2032 If we find that level L is the first one (searching from the
2033 root) that contains at least D distinct keys when looking at
2034 the first n_prefix columns, then:
2035 if we look at the first n_prefix-1 columns then the first
2036 level that contains D distinct keys will be either L or a
2037 lower one.
2038 So if we find that the first level containing D distinct
2039 keys (on n_prefix columns) is L, we continue from L when
2040 searching for D distinct keys on n_prefix-1 columns. */
2041 level = root_level;
2042 level_is_analyzed = false;
2043
2044 for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2045
2046 DEBUG_PRINTF(" %s(): searching level with >=%llu "
2047 "distinct records, n_prefix=" ULINTPF "\n",
2048 __func__, N_DIFF_REQUIRED(index), n_prefix);
2049
2050 /* Commit the mtr to release the tree S lock to allow
2051 other threads to do some work too. */
2052 mtr_commit(&mtr);
2053 mtr_start(&mtr);
2054 mtr_sx_lock(dict_index_get_lock(index), &mtr);
2055 if (root_level != btr_height_get(index, &mtr)) {
2056 /* Just quit if the tree has changed beyond
2057 recognition here. The old stats from previous
2058 runs will remain in the values that we have
2059 not calculated yet. Initially when the index
2060 object is created the stats members are given
2061 some sensible values so leaving them untouched
2062 here even the first time will not cause us to
2063 read uninitialized memory later. */
2064 break;
2065 }
2066
2067 /* check whether we should pick the current level;
2068 we pick level 1 even if it does not have enough
2069 distinct records because we do not want to scan the
2070 leaf level because it may contain too many records */
2071 if (level_is_analyzed
2072 && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2073 || level == 1)) {
2074
2075 goto found_level;
2076 }
2077
2078 /* search for a level that contains enough distinct records */
2079
2080 if (level_is_analyzed && level > 1) {
2081
2082 /* if this does not hold we should be on
2083 "found_level" instead of here */
2084 ut_ad(n_diff_on_level[n_prefix - 1]
2085 < N_DIFF_REQUIRED(index));
2086
2087 level--;
2088 level_is_analyzed = false;
2089 }
2090
2091 /* descend into the tree, searching for "good enough" level */
2092 for (;;) {
2093
2094 /* make sure we do not scan the leaf level
2095 accidentally, it may contain too many pages */
2096 ut_ad(level > 0);
2097
2098 /* scanning the same level twice is an optimization
2099 bug */
2100 ut_ad(!level_is_analyzed);
2101
2102 /* Do not scan if this would read too many pages.
2103 Here we use the following fact:
2104 the number of pages on level L equals the number
2105 of records on level L+1, thus we deduce that the
2106 following call would scan total_recs pages, because
2107 total_recs is left from the previous iteration when
2108 we scanned one level upper or we have not scanned any
2109 levels yet in which case total_recs is 1. */
2110 if (total_recs > N_SAMPLE_PAGES(index)) {
2111
2112 /* if the above cond is true then we are
2113 not at the root level since on the root
2114 level total_recs == 1 (set before we
2115 enter the n-prefix loop) and cannot
2116 be > N_SAMPLE_PAGES(index) */
2117 ut_a(level != root_level);
2118
2119 /* step one level back and be satisfied with
2120 whatever it contains */
2121 level++;
2122 level_is_analyzed = true;
2123
2124 break;
2125 }
2126
2127 dict_stats_analyze_index_level(index,
2128 level,
2129 n_diff_on_level,
2130 &total_recs,
2131 &total_pages,
2132 n_diff_boundaries,
2133 &mtr);
2134
2135 level_is_analyzed = true;
2136
2137 if (level == 1
2138 || n_diff_on_level[n_prefix - 1]
2139 >= N_DIFF_REQUIRED(index)) {
2140 /* we have reached the last level we could scan
2141 or we found a good level with many distinct
2142 records */
2143 break;
2144 }
2145
2146 level--;
2147 level_is_analyzed = false;
2148 }
2149found_level:
2150
2151 DEBUG_PRINTF(" %s(): found level " ULINTPF
2152 " that has " UINT64PF
2153 " distinct records for n_prefix=" ULINTPF "\n",
2154 __func__, level, n_diff_on_level[n_prefix - 1],
2155 n_prefix);
2156 /* here we are either on level 1 or the level that we are on
2157 contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2158 deeper levels because they would contain too many pages */
2159
2160 ut_ad(level > 0);
2161
2162 ut_ad(level_is_analyzed);
2163
2164 /* if any of these is 0 then there is exactly one page in the
2165 B-tree and it is empty and we should have done full scan and
2166 should not be here */
2167 ut_ad(total_recs > 0);
2168 ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2169
2170 ut_ad(N_SAMPLE_PAGES(index) > 0);
2171
2172 n_diff_data_t* data = &n_diff_data[n_prefix - 1];
2173
2174 data->level = level;
2175
2176 data->n_recs_on_level = total_recs;
2177
2178 data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2179
2180 data->n_leaf_pages_to_analyze = std::min(
2181 N_SAMPLE_PAGES(index),
2182 n_diff_on_level[n_prefix - 1]);
2183
2184 /* pick some records from this level and dive below them for
2185 the given n_prefix */
2186
2187 dict_stats_analyze_index_for_n_prefix(
2188 index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2189 data, &mtr);
2190 }
2191
2192 mtr_commit(&mtr);
2193
2194 UT_DELETE_ARRAY(n_diff_boundaries);
2195
2196 UT_DELETE_ARRAY(n_diff_on_level);
2197
2198 /* n_prefix == 0 means that the above loop did not end up prematurely
2199 due to tree being changed and so n_diff_data[] is set up. */
2200 if (n_prefix == 0) {
2201 dict_stats_index_set_n_diff(n_diff_data, index);
2202 }
2203
2204 UT_DELETE_ARRAY(n_diff_data);
2205
2206 dict_stats_assert_initialized_index(index);
2207 DBUG_VOID_RETURN;
2208}
2209
2210/*********************************************************************//**
2211Calculates new estimates for table and index statistics. This function
2212is relatively slow and is used to calculate persistent statistics that
2213will be saved on disk.
2214@return DB_SUCCESS or error code */
2215static
2216dberr_t
2217dict_stats_update_persistent(
2218/*=========================*/
2219 dict_table_t* table) /*!< in/out: table */
2220{
2221 dict_index_t* index;
2222
2223 DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2224
2225 dict_table_stats_lock(table, RW_X_LATCH);
2226
2227 /* analyze the clustered index first */
2228
2229 index = dict_table_get_first_index(table);
2230
2231 if (index == NULL
2232 || index->is_corrupted()
2233 || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2234
2235 /* Table definition is corrupt */
2236 dict_table_stats_unlock(table, RW_X_LATCH);
2237 dict_stats_empty_table(table, true);
2238
2239 return(DB_CORRUPTION);
2240 }
2241
2242 ut_ad(!dict_index_is_ibuf(index));
2243
2244 dict_stats_analyze_index(index);
2245
2246 ulint n_unique = dict_index_get_n_unique(index);
2247
2248 table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
2249
2250 table->stat_clustered_index_size = index->stat_index_size;
2251
2252 /* analyze other indexes from the table, if any */
2253
2254 table->stat_sum_of_other_index_sizes = 0;
2255
2256 for (index = dict_table_get_next_index(index);
2257 index != NULL;
2258 index = dict_table_get_next_index(index)) {
2259
2260 ut_ad(!dict_index_is_ibuf(index));
2261
2262 if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
2263 continue;
2264 }
2265
2266 dict_stats_empty_index(index, false);
2267
2268 if (dict_stats_should_ignore_index(index)) {
2269 continue;
2270 }
2271
2272 if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2273 dict_stats_analyze_index(index);
2274 }
2275
2276 table->stat_sum_of_other_index_sizes
2277 += index->stat_index_size;
2278 }
2279
2280 table->stats_last_recalc = ut_time();
2281
2282 table->stat_modified_counter = 0;
2283
2284 table->stat_initialized = TRUE;
2285
2286 dict_stats_assert_initialized(table);
2287
2288 dict_table_stats_unlock(table, RW_X_LATCH);
2289
2290 return(DB_SUCCESS);
2291}
2292
2293#include "mysql_com.h"
2294/** Save an individual index's statistic into the persistent statistics
2295storage.
2296@param[in] index index to be updated
2297@param[in] last_update timestamp of the stat
2298@param[in] stat_name name of the stat
2299@param[in] stat_value value of the stat
2300@param[in] sample_size n pages sampled or NULL
2301@param[in] stat_description description of the stat
2302@param[in,out] trx in case of NULL the function will
2303allocate and free the trx object. If it is not NULL then it will be
2304rolled back only in the case of error, but not freed.
2305@return DB_SUCCESS or error code */
2306dberr_t
2307dict_stats_save_index_stat(
2308 dict_index_t* index,
2309 ib_time_t last_update,
2310 const char* stat_name,
2311 ib_uint64_t stat_value,
2312 ib_uint64_t* sample_size,
2313 const char* stat_description,
2314 trx_t* trx)
2315{
2316 dberr_t ret;
2317 pars_info_t* pinfo;
2318 char db_utf8[MAX_DB_UTF8_LEN];
2319 char table_utf8[MAX_TABLE_UTF8_LEN];
2320
2321 ut_ad(!trx || trx->internal || trx->mysql_thd);
2322 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
2323 ut_ad(mutex_own(&dict_sys->mutex));
2324
2325 dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2326 table_utf8, sizeof(table_utf8));
2327
2328 pinfo = pars_info_create();
2329 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2330 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2331 pars_info_add_str_literal(pinfo, "index_name", index->name);
2332 UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
2333 pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
2334 UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2335 pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2336 UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
2337 pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2338 if (sample_size != NULL) {
2339 UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
2340 pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2341 } else {
2342 pars_info_add_literal(pinfo, "sample_size", NULL,
2343 UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2344 }
2345 UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2346 pars_info_add_str_literal(pinfo, "stat_description",
2347 stat_description);
2348
2349 ret = dict_stats_exec_sql(
2350 pinfo,
2351 "PROCEDURE INDEX_STATS_SAVE () IS\n"
2352 "BEGIN\n"
2353
2354 "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2355 "WHERE\n"
2356 "database_name = :database_name AND\n"
2357 "table_name = :table_name AND\n"
2358 "index_name = :index_name AND\n"
2359 "stat_name = :stat_name;\n"
2360
2361 "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2362 "VALUES\n"
2363 "(\n"
2364 ":database_name,\n"
2365 ":table_name,\n"
2366 ":index_name,\n"
2367 ":last_update,\n"
2368 ":stat_name,\n"
2369 ":stat_value,\n"
2370 ":sample_size,\n"
2371 ":stat_description\n"
2372 ");\n"
2373 "END;", trx);
2374
2375 if (ret != DB_SUCCESS) {
2376 if (innodb_index_stats_not_found == false &&
2377 index->stats_error_printed == false) {
2378 ib::error() << "Cannot save index statistics for table "
2379 << index->table->name
2380 << ", index " << index->name
2381 << ", stat name \"" << stat_name << "\": "
2382 << ut_strerr(ret);
2383 index->stats_error_printed = true;
2384 }
2385 }
2386
2387 return(ret);
2388}
2389
2390/** Report an error if updating table statistics failed because
2391.ibd file is missing, table decryption failed or table is corrupted.
2392@param[in,out] table Table
2393@param[in] defragment true if statistics is for defragment
2394@retval DB_DECRYPTION_FAILED if decryption of the table failed
2395@retval DB_TABLESPACE_DELETED if .ibd file is missing
2396@retval DB_CORRUPTION if table is marked as corrupted */
2397dberr_t
2398dict_stats_report_error(dict_table_t* table, bool defragment)
2399{
2400 dberr_t err;
2401
2402 const char* df = defragment ? " defragment" : "";
2403
2404 if (!table->space) {
2405 ib::warn() << "Cannot save" << df << " statistics for table "
2406 << table->name
2407 << " because the .ibd file is missing. "
2408 << TROUBLESHOOTING_MSG;
2409 err = DB_TABLESPACE_DELETED;
2410 } else {
2411 ib::warn() << "Cannot save" << df << " statistics for table "
2412 << table->name
2413 << " because file "
2414 << table->space->chain.start->name
2415 << (table->corrupted
2416 ? " is corrupted."
2417 : " cannot be decrypted.");
2418 err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
2419 }
2420
2421 dict_stats_empty_table(table, defragment);
2422 return err;
2423}
2424
2425/** Save the table's statistics into the persistent statistics storage.
2426@param[in] table_orig table whose stats to save
2427@param[in] only_for_index if this is non-NULL, then stats for indexes
2428that are not equal to it will not be saved, if NULL, then all indexes' stats
2429are saved
2430@return DB_SUCCESS or error code */
2431static
2432dberr_t
2433dict_stats_save(
2434 dict_table_t* table_orig,
2435 const index_id_t* only_for_index)
2436{
2437 pars_info_t* pinfo;
2438 ib_time_t now;
2439 dberr_t ret;
2440 dict_table_t* table;
2441 char db_utf8[MAX_DB_UTF8_LEN];
2442 char table_utf8[MAX_TABLE_UTF8_LEN];
2443
2444 if (high_level_read_only) {
2445 return DB_READ_ONLY;
2446 }
2447
2448 if (!table_orig->is_readable()) {
2449 return (dict_stats_report_error(table_orig));
2450 }
2451
2452 table = dict_stats_snapshot_create(table_orig);
2453
2454 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2455 table_utf8, sizeof(table_utf8));
2456
2457 now = ut_time();
2458 rw_lock_x_lock(dict_operation_lock);
2459 mutex_enter(&dict_sys->mutex);
2460
2461 pinfo = pars_info_create();
2462
2463 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2464 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2465 pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
2466 pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2467 pars_info_add_ull_literal(pinfo, "clustered_index_size",
2468 table->stat_clustered_index_size);
2469 pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2470 table->stat_sum_of_other_index_sizes);
2471
2472 ret = dict_stats_exec_sql(
2473 pinfo,
2474 "PROCEDURE TABLE_STATS_SAVE () IS\n"
2475 "BEGIN\n"
2476
2477 "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2478 "WHERE\n"
2479 "database_name = :database_name AND\n"
2480 "table_name = :table_name;\n"
2481
2482 "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2483 "VALUES\n"
2484 "(\n"
2485 ":database_name,\n"
2486 ":table_name,\n"
2487 ":last_update,\n"
2488 ":n_rows,\n"
2489 ":clustered_index_size,\n"
2490 ":sum_of_other_index_sizes\n"
2491 ");\n"
2492 "END;", NULL);
2493
2494 if (ret != DB_SUCCESS) {
2495 ib::error() << "Cannot save table statistics for table "
2496 << table->name << ": " << ut_strerr(ret);
2497
2498 mutex_exit(&dict_sys->mutex);
2499 rw_lock_x_unlock(dict_operation_lock);
2500
2501 dict_stats_snapshot_free(table);
2502
2503 return(ret);
2504 }
2505
2506 trx_t* trx = trx_create();
2507 trx_start_internal(trx);
2508
2509 dict_index_t* index;
2510 index_map_t indexes(
2511 (ut_strcmp_functor()),
2512 index_map_t_allocator(mem_key_dict_stats_index_map_t));
2513
2514 /* Below we do all the modifications in innodb_index_stats in a single
2515 transaction for performance reasons. Modifying more than one row in a
2516 single transaction may deadlock with other transactions if they
2517 lock the rows in different order. Other transaction could be for
2518 example when we DROP a table and do
2519 DELETE FROM innodb_index_stats WHERE database_name = '...'
2520 AND table_name = '...'; which will affect more than one row. To
2521 prevent deadlocks we always lock the rows in the same order - the
2522 order of the PK, which is (database_name, table_name, index_name,
2523 stat_name). This is why below we sort the indexes by name and then
2524 for each index, do the mods ordered by stat_name. */
2525
2526 for (index = dict_table_get_first_index(table);
2527 index != NULL;
2528 index = dict_table_get_next_index(index)) {
2529
2530 indexes[index->name] = index;
2531 }
2532
2533 index_map_t::const_iterator it;
2534
2535 for (it = indexes.begin(); it != indexes.end(); ++it) {
2536
2537 index = it->second;
2538
2539 if (only_for_index != NULL && index->id != *only_for_index) {
2540 continue;
2541 }
2542
2543 if (dict_stats_should_ignore_index(index)) {
2544 continue;
2545 }
2546
2547 ut_ad(!dict_index_is_ibuf(index));
2548
2549 for (unsigned i = 0; i < index->n_uniq; i++) {
2550
2551 char stat_name[16];
2552 char stat_description[1024];
2553
2554 snprintf(stat_name, sizeof(stat_name),
2555 "n_diff_pfx%02u", i + 1);
2556
2557 /* craft a string that contains the column names */
2558 snprintf(stat_description, sizeof(stat_description),
2559 "%s", index->fields[0].name());
2560 for (unsigned j = 1; j <= i; j++) {
2561 size_t len;
2562
2563 len = strlen(stat_description);
2564
2565 snprintf(stat_description + len,
2566 sizeof(stat_description) - len,
2567 ",%s", index->fields[j].name());
2568 }
2569
2570 ret = dict_stats_save_index_stat(
2571 index, now, stat_name,
2572 index->stat_n_diff_key_vals[i],
2573 &index->stat_n_sample_sizes[i],
2574 stat_description, trx);
2575
2576 if (ret != DB_SUCCESS) {
2577 goto end;
2578 }
2579 }
2580
2581 ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2582 index->stat_n_leaf_pages,
2583 NULL,
2584 "Number of leaf pages "
2585 "in the index", trx);
2586 if (ret != DB_SUCCESS) {
2587 goto end;
2588 }
2589
2590 ret = dict_stats_save_index_stat(index, now, "size",
2591 index->stat_index_size,
2592 NULL,
2593 "Number of pages "
2594 "in the index", trx);
2595 if (ret != DB_SUCCESS) {
2596 goto end;
2597 }
2598 }
2599
2600 trx_commit_for_mysql(trx);
2601
2602end:
2603 trx_free(trx);
2604
2605 mutex_exit(&dict_sys->mutex);
2606 rw_lock_x_unlock(dict_operation_lock);
2607
2608 dict_stats_snapshot_free(table);
2609
2610 return(ret);
2611}
2612
2613/*********************************************************************//**
2614Called for the row that is selected by
2615SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2616The second argument is a pointer to the table and the fetched stats are
2617written to it.
2618@return non-NULL dummy */
2619static
2620ibool
2621dict_stats_fetch_table_stats_step(
2622/*==============================*/
2623 void* node_void, /*!< in: select node */
2624 void* table_void) /*!< out: table */
2625{
2626 sel_node_t* node = (sel_node_t*) node_void;
2627 dict_table_t* table = (dict_table_t*) table_void;
2628 que_common_t* cnode;
2629 int i;
2630
2631 /* this should loop exactly 3 times - for
2632 n_rows,clustered_index_size,sum_of_other_index_sizes */
2633 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2634 cnode != NULL;
2635 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2636 i++) {
2637
2638 const byte* data;
2639 dfield_t* dfield = que_node_get_val(cnode);
2640 dtype_t* type = dfield_get_type(dfield);
2641 ulint len = dfield_get_len(dfield);
2642
2643 data = static_cast<const byte*>(dfield_get_data(dfield));
2644
2645 switch (i) {
2646 case 0: /* mysql.innodb_table_stats.n_rows */
2647
2648 ut_a(dtype_get_mtype(type) == DATA_INT);
2649 ut_a(len == 8);
2650
2651 table->stat_n_rows = mach_read_from_8(data);
2652
2653 break;
2654
2655 case 1: /* mysql.innodb_table_stats.clustered_index_size */
2656
2657 ut_a(dtype_get_mtype(type) == DATA_INT);
2658 ut_a(len == 8);
2659
2660 table->stat_clustered_index_size
2661 = (ulint) mach_read_from_8(data);
2662
2663 break;
2664
2665 case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2666
2667 ut_a(dtype_get_mtype(type) == DATA_INT);
2668 ut_a(len == 8);
2669
2670 table->stat_sum_of_other_index_sizes
2671 = (ulint) mach_read_from_8(data);
2672
2673 break;
2674
2675 default:
2676
2677 /* someone changed SELECT
2678 n_rows,clustered_index_size,sum_of_other_index_sizes
2679 to select more columns from innodb_table_stats without
2680 adjusting here */
2681 ut_error;
2682 }
2683 }
2684
2685 /* if i < 3 this means someone changed the
2686 SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2687 to select less columns from innodb_table_stats without adjusting here;
2688 if i > 3 we would have ut_error'ed earlier */
2689 ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2690
2691 /* XXX this is not used but returning non-NULL is necessary */
2692 return(TRUE);
2693}
2694
2695/** Aux struct used to pass a table and a boolean to
2696dict_stats_fetch_index_stats_step(). */
2697struct index_fetch_t {
2698 dict_table_t* table; /*!< table whose indexes are to be modified */
2699 bool stats_were_modified; /*!< will be set to true if at
2700 least one index stats were modified */
2701};
2702
2703/*********************************************************************//**
2704Called for the rows that are selected by
2705SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2706The second argument is a pointer to the table and the fetched stats are
2707written to its indexes.
2708Let a table has N indexes and each index has Ui unique columns for i=1..N,
2709then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2710So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2711N*AVG(Ui). In each call it searches for the currently fetched index into
2712table->indexes linearly, assuming this list is not sorted. Thus, overall,
2713fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2714is the number of indexes.
2715This can be improved if we sort table->indexes in a temporary area just once
2716and then search in that sorted list. Then the complexity will be O(N*log(N)).
2717We assume a table will not have more than 100 indexes, so we go with the
2718simpler N^2 algorithm.
2719@return non-NULL dummy */
2720static
2721ibool
2722dict_stats_fetch_index_stats_step(
2723/*==============================*/
2724 void* node_void, /*!< in: select node */
2725 void* arg_void) /*!< out: table + a flag that tells if we
2726 modified anything */
2727{
2728 sel_node_t* node = (sel_node_t*) node_void;
2729 index_fetch_t* arg = (index_fetch_t*) arg_void;
2730 dict_table_t* table = arg->table;
2731 dict_index_t* index = NULL;
2732 que_common_t* cnode;
2733 const char* stat_name = NULL;
2734 ulint stat_name_len = ULINT_UNDEFINED;
2735 ib_uint64_t stat_value = UINT64_UNDEFINED;
2736 ib_uint64_t sample_size = UINT64_UNDEFINED;
2737 int i;
2738
2739 /* this should loop exactly 4 times - for the columns that
2740 were selected: index_name,stat_name,stat_value,sample_size */
2741 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2742 cnode != NULL;
2743 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2744 i++) {
2745
2746 const byte* data;
2747 dfield_t* dfield = que_node_get_val(cnode);
2748 dtype_t* type = dfield_get_type(dfield);
2749 ulint len = dfield_get_len(dfield);
2750
2751 data = static_cast<const byte*>(dfield_get_data(dfield));
2752
2753 switch (i) {
2754 case 0: /* mysql.innodb_index_stats.index_name */
2755
2756 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2757
2758 /* search for index in table's indexes whose name
2759 matches data; the fetched index name is in data,
2760 has no terminating '\0' and has length len */
2761 for (index = dict_table_get_first_index(table);
2762 index != NULL;
2763 index = dict_table_get_next_index(index)) {
2764
2765 if (index->is_committed()
2766 && strlen(index->name) == len
2767 && memcmp(index->name, data, len) == 0) {
2768 /* the corresponding index was found */
2769 break;
2770 }
2771 }
2772
2773 /* if index is NULL here this means that
2774 mysql.innodb_index_stats contains more rows than the
2775 number of indexes in the table; this is ok, we just
2776 return ignoring those extra rows; in other words
2777 dict_stats_fetch_index_stats_step() has been called
2778 for a row from index_stats with unknown index_name
2779 column */
2780 if (index == NULL) {
2781
2782 return(TRUE);
2783 }
2784
2785 break;
2786
2787 case 1: /* mysql.innodb_index_stats.stat_name */
2788
2789 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2790
2791 ut_a(index != NULL);
2792
2793 stat_name = (const char*) data;
2794 stat_name_len = len;
2795
2796 break;
2797
2798 case 2: /* mysql.innodb_index_stats.stat_value */
2799
2800 ut_a(dtype_get_mtype(type) == DATA_INT);
2801 ut_a(len == 8);
2802
2803 ut_a(index != NULL);
2804 ut_a(stat_name != NULL);
2805 ut_a(stat_name_len != ULINT_UNDEFINED);
2806
2807 stat_value = mach_read_from_8(data);
2808
2809 break;
2810
2811 case 3: /* mysql.innodb_index_stats.sample_size */
2812
2813 ut_a(dtype_get_mtype(type) == DATA_INT);
2814 ut_a(len == 8 || len == UNIV_SQL_NULL);
2815
2816 ut_a(index != NULL);
2817 ut_a(stat_name != NULL);
2818 ut_a(stat_name_len != ULINT_UNDEFINED);
2819 ut_a(stat_value != UINT64_UNDEFINED);
2820
2821 if (len == UNIV_SQL_NULL) {
2822 break;
2823 }
2824 /* else */
2825
2826 sample_size = mach_read_from_8(data);
2827
2828 break;
2829
2830 default:
2831
2832 /* someone changed
2833 SELECT index_name,stat_name,stat_value,sample_size
2834 to select more columns from innodb_index_stats without
2835 adjusting here */
2836 ut_error;
2837 }
2838 }
2839
2840 /* if i < 4 this means someone changed the
2841 SELECT index_name,stat_name,stat_value,sample_size
2842 to select less columns from innodb_index_stats without adjusting here;
2843 if i > 4 we would have ut_error'ed earlier */
2844 ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2845
2846 ut_a(index != NULL);
2847 ut_a(stat_name != NULL);
2848 ut_a(stat_name_len != ULINT_UNDEFINED);
2849 ut_a(stat_value != UINT64_UNDEFINED);
2850 /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2851
2852#define PFX "n_diff_pfx"
2853#define PFX_LEN 10
2854
2855 if (stat_name_len == 4 /* strlen("size") */
2856 && strncasecmp("size", stat_name, stat_name_len) == 0) {
2857 index->stat_index_size = (ulint) stat_value;
2858 arg->stats_were_modified = true;
2859 } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2860 && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2861 == 0) {
2862 index->stat_n_leaf_pages = (ulint) stat_value;
2863 arg->stats_were_modified = true;
2864 } else if (stat_name_len == 12 /* strlen("n_page_split") */
2865 && strncasecmp("n_page_split", stat_name, stat_name_len)
2866 == 0) {
2867 index->stat_defrag_n_page_split = (ulint) stat_value;
2868 arg->stats_were_modified = true;
2869 } else if (stat_name_len == 13 /* strlen("n_pages_freed") */
2870 && strncasecmp("n_pages_freed", stat_name, stat_name_len)
2871 == 0) {
2872 index->stat_defrag_n_pages_freed = (ulint) stat_value;
2873 arg->stats_were_modified = true;
2874 } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2875 && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2876
2877 const char* num_ptr;
2878 unsigned long n_pfx;
2879
2880 /* point num_ptr into "1" from "n_diff_pfx12..." */
2881 num_ptr = stat_name + PFX_LEN;
2882
2883 /* stat_name should have exactly 2 chars appended to PFX
2884 and they should be digits */
2885 if (stat_name_len != PFX_LEN + 2
2886 || num_ptr[0] < '0' || num_ptr[0] > '9'
2887 || num_ptr[1] < '0' || num_ptr[1] > '9') {
2888
2889 char db_utf8[MAX_DB_UTF8_LEN];
2890 char table_utf8[MAX_TABLE_UTF8_LEN];
2891
2892 dict_fs2utf8(table->name.m_name,
2893 db_utf8, sizeof(db_utf8),
2894 table_utf8, sizeof(table_utf8));
2895
2896 ib::info out;
2897 out << "Ignoring strange row from "
2898 << INDEX_STATS_NAME_PRINT << " WHERE"
2899 " database_name = '" << db_utf8
2900 << "' AND table_name = '" << table_utf8
2901 << "' AND index_name = '" << index->name()
2902 << "' AND stat_name = '";
2903 out.write(stat_name, stat_name_len);
2904 out << "'; because stat_name is malformed";
2905 return(TRUE);
2906 }
2907 /* else */
2908
2909 /* extract 12 from "n_diff_pfx12..." into n_pfx
2910 note that stat_name does not have a terminating '\0' */
2911 n_pfx = ulong(num_ptr[0] - '0') * 10 + ulong(num_ptr[1] - '0');
2912
2913 ulint n_uniq = index->n_uniq;
2914
2915 if (n_pfx == 0 || n_pfx > n_uniq) {
2916
2917 char db_utf8[MAX_DB_UTF8_LEN];
2918 char table_utf8[MAX_TABLE_UTF8_LEN];
2919
2920 dict_fs2utf8(table->name.m_name,
2921 db_utf8, sizeof(db_utf8),
2922 table_utf8, sizeof(table_utf8));
2923
2924 ib::info out;
2925 out << "Ignoring strange row from "
2926 << INDEX_STATS_NAME_PRINT << " WHERE"
2927 " database_name = '" << db_utf8
2928 << "' AND table_name = '" << table_utf8
2929 << "' AND index_name = '" << index->name()
2930 << "' AND stat_name = '";
2931 out.write(stat_name, stat_name_len);
2932 out << "'; because stat_name is out of range, the index"
2933 " has " << n_uniq << " unique columns";
2934
2935 return(TRUE);
2936 }
2937 /* else */
2938
2939 index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2940
2941 if (sample_size != UINT64_UNDEFINED) {
2942 index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2943 } else {
2944 /* hmm, strange... the user must have UPDATEd the
2945 table manually and SET sample_size = NULL */
2946 index->stat_n_sample_sizes[n_pfx - 1] = 0;
2947 }
2948
2949 index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2950
2951 arg->stats_were_modified = true;
2952 } else {
2953 /* silently ignore rows with unknown stat_name, the
2954 user may have developed her own stats */
2955 }
2956
2957 /* XXX this is not used but returning non-NULL is necessary */
2958 return(TRUE);
2959}
2960
2961/*********************************************************************//**
2962Read table's statistics from the persistent statistics storage.
2963@return DB_SUCCESS or error code */
2964static
2965dberr_t
2966dict_stats_fetch_from_ps(
2967/*=====================*/
2968 dict_table_t* table) /*!< in/out: table */
2969{
2970 index_fetch_t index_fetch_arg;
2971 trx_t* trx;
2972 pars_info_t* pinfo;
2973 dberr_t ret;
2974 char db_utf8[MAX_DB_UTF8_LEN];
2975 char table_utf8[MAX_TABLE_UTF8_LEN];
2976
2977 ut_ad(!mutex_own(&dict_sys->mutex));
2978
2979 /* Initialize all stats to dummy values before fetching because if
2980 the persistent storage contains incomplete stats (e.g. missing stats
2981 for some index) then we would end up with (partially) uninitialized
2982 stats. */
2983 dict_stats_empty_table(table, true);
2984
2985 trx = trx_create();
2986
2987 /* Use 'read-uncommitted' so that the SELECTs we execute
2988 do not get blocked in case some user has locked the rows we
2989 are SELECTing */
2990
2991 trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2992
2993 if (srv_read_only_mode) {
2994 trx_start_internal_read_only(trx);
2995 } else {
2996 trx_start_internal(trx);
2997 }
2998
2999 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
3000 table_utf8, sizeof(table_utf8));
3001
3002 pinfo = pars_info_create();
3003
3004 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3005
3006 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3007
3008 pars_info_bind_function(pinfo,
3009 "fetch_table_stats_step",
3010 dict_stats_fetch_table_stats_step,
3011 table);
3012
3013 index_fetch_arg.table = table;
3014 index_fetch_arg.stats_were_modified = false;
3015 pars_info_bind_function(pinfo,
3016 "fetch_index_stats_step",
3017 dict_stats_fetch_index_stats_step,
3018 &index_fetch_arg);
3019
3020 ret = que_eval_sql(pinfo,
3021 "PROCEDURE FETCH_STATS () IS\n"
3022 "found INT;\n"
3023 "DECLARE FUNCTION fetch_table_stats_step;\n"
3024 "DECLARE FUNCTION fetch_index_stats_step;\n"
3025 "DECLARE CURSOR table_stats_cur IS\n"
3026 " SELECT\n"
3027 /* if you change the selected fields, be
3028 sure to adjust
3029 dict_stats_fetch_table_stats_step() */
3030 " n_rows,\n"
3031 " clustered_index_size,\n"
3032 " sum_of_other_index_sizes\n"
3033 " FROM \"" TABLE_STATS_NAME "\"\n"
3034 " WHERE\n"
3035 " database_name = :database_name AND\n"
3036 " table_name = :table_name;\n"
3037 "DECLARE CURSOR index_stats_cur IS\n"
3038 " SELECT\n"
3039 /* if you change the selected fields, be
3040 sure to adjust
3041 dict_stats_fetch_index_stats_step() */
3042 " index_name,\n"
3043 " stat_name,\n"
3044 " stat_value,\n"
3045 " sample_size\n"
3046 " FROM \"" INDEX_STATS_NAME "\"\n"
3047 " WHERE\n"
3048 " database_name = :database_name AND\n"
3049 " table_name = :table_name;\n"
3050
3051 "BEGIN\n"
3052
3053 "OPEN table_stats_cur;\n"
3054 "FETCH table_stats_cur INTO\n"
3055 " fetch_table_stats_step();\n"
3056 "IF (SQL % NOTFOUND) THEN\n"
3057 " CLOSE table_stats_cur;\n"
3058 " RETURN;\n"
3059 "END IF;\n"
3060 "CLOSE table_stats_cur;\n"
3061
3062 "OPEN index_stats_cur;\n"
3063 "found := 1;\n"
3064 "WHILE found = 1 LOOP\n"
3065 " FETCH index_stats_cur INTO\n"
3066 " fetch_index_stats_step();\n"
3067 " IF (SQL % NOTFOUND) THEN\n"
3068 " found := 0;\n"
3069 " END IF;\n"
3070 "END LOOP;\n"
3071 "CLOSE index_stats_cur;\n"
3072
3073 "END;",
3074 TRUE, trx);
3075 /* pinfo is freed by que_eval_sql() */
3076
3077 trx_commit_for_mysql(trx);
3078
3079 trx_free(trx);
3080
3081 if (!index_fetch_arg.stats_were_modified) {
3082 return(DB_STATS_DO_NOT_EXIST);
3083 }
3084
3085 return(ret);
3086}
3087
3088/*********************************************************************//**
3089Clear defragmentation stats modified counter for all indices in table. */
3090static
3091void
3092dict_stats_empty_defrag_modified_counter(
3093 dict_table_t* table) /*!< in: table */
3094{
3095 dict_index_t* index;
3096 ut_a(table);
3097 for (index = dict_table_get_first_index(table);
3098 index != NULL;
3099 index = dict_table_get_next_index(index)) {
3100 index->stat_defrag_modified_counter = 0;
3101 }
3102}
3103
3104/*********************************************************************//**
3105Fetches or calculates new estimates for index statistics. */
3106void
3107dict_stats_update_for_index(
3108/*========================*/
3109 dict_index_t* index) /*!< in/out: index */
3110{
3111 DBUG_ENTER("dict_stats_update_for_index");
3112
3113 ut_ad(!mutex_own(&dict_sys->mutex));
3114
3115 if (dict_stats_is_persistent_enabled(index->table)) {
3116
3117 if (dict_stats_persistent_storage_check(false)) {
3118 dict_table_stats_lock(index->table, RW_X_LATCH);
3119 dict_stats_analyze_index(index);
3120 dict_table_stats_unlock(index->table, RW_X_LATCH);
3121 dict_stats_save(index->table, &index->id);
3122 DBUG_VOID_RETURN;
3123 }
3124 /* else */
3125
3126 if (innodb_index_stats_not_found == false &&
3127 index->stats_error_printed == false) {
3128 /* Fall back to transient stats since the persistent
3129 storage is not present or is corrupted */
3130
3131 ib::info() << "Recalculation of persistent statistics"
3132 " requested for table " << index->table->name
3133 << " index " << index->name
3134 << " but the required"
3135 " persistent statistics storage is not present or is"
3136 " corrupted. Using transient stats instead.";
3137 index->stats_error_printed = false;
3138 }
3139 }
3140
3141 dict_table_stats_lock(index->table, RW_X_LATCH);
3142 dict_stats_update_transient_for_index(index);
3143 dict_table_stats_unlock(index->table, RW_X_LATCH);
3144
3145 DBUG_VOID_RETURN;
3146}
3147
3148/*********************************************************************//**
3149Calculates new estimates for table and index statistics. The statistics
3150are used in query optimization.
3151@return DB_SUCCESS or error code */
3152dberr_t
3153dict_stats_update(
3154/*==============*/
3155 dict_table_t* table, /*!< in/out: table */
3156 dict_stats_upd_option_t stats_upd_option)
3157 /*!< in: whether to (re) calc
3158 the stats or to fetch them from
3159 the persistent statistics
3160 storage */
3161{
3162 ut_ad(!mutex_own(&dict_sys->mutex));
3163
3164 if (!table->is_readable()) {
3165 return (dict_stats_report_error(table));
3166 } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3167 /* If we have set a high innodb_force_recovery level, do
3168 not calculate statistics, as a badly corrupted index can
3169 cause a crash in it. */
3170 dict_stats_empty_table(table, false);
3171 return(DB_SUCCESS);
3172 }
3173
3174 switch (stats_upd_option) {
3175 case DICT_STATS_RECALC_PERSISTENT:
3176
3177 if (srv_read_only_mode) {
3178 goto transient;
3179 }
3180
3181 /* Persistent recalculation requested, called from
3182 1) ANALYZE TABLE, or
3183 2) the auto recalculation background thread, or
3184 3) open table if stats do not exist on disk and auto recalc
3185 is enabled */
3186
3187 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3188 persistent stats enabled */
3189 ut_a(strchr(table->name.m_name, '/') != NULL);
3190
3191 /* check if the persistent statistics storage exists
3192 before calling the potentially slow function
3193 dict_stats_update_persistent(); that is a
3194 prerequisite for dict_stats_save() succeeding */
3195 if (dict_stats_persistent_storage_check(false)) {
3196
3197 dberr_t err;
3198
3199 err = dict_stats_update_persistent(table);
3200
3201 if (err != DB_SUCCESS) {
3202 return(err);
3203 }
3204
3205 err = dict_stats_save(table, NULL);
3206
3207 return(err);
3208 }
3209
3210 /* Fall back to transient stats since the persistent
3211 storage is not present or is corrupted */
3212
3213 if (innodb_table_stats_not_found == false &&
3214 table->stats_error_printed == false) {
3215 ib::warn() << "Recalculation of persistent statistics"
3216 " requested for table "
3217 << table->name
3218 << " but the required persistent"
3219 " statistics storage is not present or is corrupted."
3220 " Using transient stats instead.";
3221 table->stats_error_printed = true;
3222 }
3223
3224 goto transient;
3225
3226 case DICT_STATS_RECALC_TRANSIENT:
3227
3228 goto transient;
3229
3230 case DICT_STATS_EMPTY_TABLE:
3231
3232 dict_stats_empty_table(table, true);
3233
3234 /* If table is using persistent stats,
3235 then save the stats on disk */
3236
3237 if (dict_stats_is_persistent_enabled(table)) {
3238
3239 if (dict_stats_persistent_storage_check(false)) {
3240
3241 return(dict_stats_save(table, NULL));
3242 }
3243
3244 return(DB_STATS_DO_NOT_EXIST);
3245 }
3246
3247 return(DB_SUCCESS);
3248
3249 case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3250
3251 /* fetch requested, either fetch from persistent statistics
3252 storage or use the old method */
3253
3254 if (table->stat_initialized) {
3255 return(DB_SUCCESS);
3256 }
3257
3258 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3259 persistent stats enabled */
3260 ut_a(strchr(table->name.m_name, '/') != NULL);
3261
3262 if (!dict_stats_persistent_storage_check(false)) {
3263 /* persistent statistics storage does not exist
3264 or is corrupted, calculate the transient stats */
3265
3266 if (innodb_table_stats_not_found == false &&
3267 table->stats_error_printed == false) {
3268 ib::error() << "Fetch of persistent statistics"
3269 " requested for table "
3270 << table->name
3271 << " but the required system tables "
3272 << TABLE_STATS_NAME_PRINT
3273 << " and " << INDEX_STATS_NAME_PRINT
3274 << " are not present or have unexpected"
3275 " structure. Using transient stats instead.";
3276 table->stats_error_printed = true;
3277 }
3278
3279 goto transient;
3280 }
3281
3282 dict_table_t* t;
3283
3284 /* Create a dummy table object with the same name and
3285 indexes, suitable for fetching the stats into it. */
3286 t = dict_stats_table_clone_create(table);
3287
3288 dberr_t err = dict_stats_fetch_from_ps(t);
3289
3290 t->stats_last_recalc = table->stats_last_recalc;
3291 t->stat_modified_counter = 0;
3292 dict_stats_empty_defrag_modified_counter(t);
3293
3294 switch (err) {
3295 case DB_SUCCESS:
3296
3297 dict_table_stats_lock(table, RW_X_LATCH);
3298
3299 /* Pass reset_ignored_indexes=true as parameter
3300 to dict_stats_copy. This will cause statictics
3301 for corrupted indexes to be set to empty values */
3302 dict_stats_copy(table, t, true);
3303
3304 dict_stats_assert_initialized(table);
3305
3306 dict_table_stats_unlock(table, RW_X_LATCH);
3307
3308 dict_stats_table_clone_free(t);
3309
3310 return(DB_SUCCESS);
3311 case DB_STATS_DO_NOT_EXIST:
3312
3313 dict_stats_table_clone_free(t);
3314
3315 if (srv_read_only_mode) {
3316 goto transient;
3317 }
3318
3319 if (dict_stats_auto_recalc_is_enabled(table)) {
3320 return(dict_stats_update(
3321 table,
3322 DICT_STATS_RECALC_PERSISTENT));
3323 }
3324
3325 ib::info() << "Trying to use table " << table->name
3326 << " which has persistent statistics enabled,"
3327 " but auto recalculation turned off and the"
3328 " statistics do not exist in "
3329 TABLE_STATS_NAME_PRINT
3330 " and " INDEX_STATS_NAME_PRINT
3331 ". Please either run \"ANALYZE TABLE "
3332 << table->name << ";\" manually or enable the"
3333 " auto recalculation with \"ALTER TABLE "
3334 << table->name << " STATS_AUTO_RECALC=1;\"."
3335 " InnoDB will now use transient statistics for "
3336 << table->name << ".";
3337
3338 goto transient;
3339 default:
3340
3341 dict_stats_table_clone_free(t);
3342
3343 if (innodb_table_stats_not_found == false &&
3344 table->stats_error_printed == false) {
3345 ib::error() << "Error fetching persistent statistics"
3346 " for table "
3347 << table->name
3348 << " from " TABLE_STATS_NAME_PRINT " and "
3349 INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
3350 << ". Using transient stats method instead.";
3351 }
3352
3353 goto transient;
3354 }
3355 /* no "default:" in order to produce a compilation warning
3356 about unhandled enumeration value */
3357 }
3358
3359transient:
3360
3361 dict_table_stats_lock(table, RW_X_LATCH);
3362
3363 dict_stats_update_transient(table);
3364
3365 dict_table_stats_unlock(table, RW_X_LATCH);
3366
3367 return(DB_SUCCESS);
3368}
3369
3370/*********************************************************************//**
3371Removes the information for a particular index's stats from the persistent
3372storage if it exists and if there is data stored for this index.
3373This function creates its own trx and commits it.
3374A note from Marko why we cannot edit user and sys_* tables in one trx:
3375marko: The problem is that ibuf merges should be disabled while we are
3376rolling back dict transactions.
3377marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
3378But we shouldn't open *.ibd files before we have rolled back dict
3379transactions and opened the SYS_* records for the *.ibd files.
3380@return DB_SUCCESS or error code */
3381dberr_t
3382dict_stats_drop_index(
3383/*==================*/
3384 const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
3385 const char* iname, /*!< in: index name */
3386 char* errstr, /*!< out: error message if != DB_SUCCESS
3387 is returned */
3388 ulint errstr_sz)/*!< in: size of the errstr buffer */
3389{
3390 char db_utf8[MAX_DB_UTF8_LEN];
3391 char table_utf8[MAX_TABLE_UTF8_LEN];
3392 pars_info_t* pinfo;
3393 dberr_t ret;
3394
3395 ut_ad(!mutex_own(&dict_sys->mutex));
3396
3397 /* skip indexes whose table names do not contain a database name
3398 e.g. if we are dropping an index from SYS_TABLES */
3399 if (strchr(db_and_table, '/') == NULL) {
3400
3401 return(DB_SUCCESS);
3402 }
3403
3404 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3405 table_utf8, sizeof(table_utf8));
3406
3407 pinfo = pars_info_create();
3408
3409 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3410
3411 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3412
3413 pars_info_add_str_literal(pinfo, "index_name", iname);
3414
3415 rw_lock_x_lock(dict_operation_lock);
3416 mutex_enter(&dict_sys->mutex);
3417
3418 ret = dict_stats_exec_sql(
3419 pinfo,
3420 "PROCEDURE DROP_INDEX_STATS () IS\n"
3421 "BEGIN\n"
3422 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3423 "database_name = :database_name AND\n"
3424 "table_name = :table_name AND\n"
3425 "index_name = :index_name;\n"
3426 "END;\n", NULL);
3427
3428 mutex_exit(&dict_sys->mutex);
3429 rw_lock_x_unlock(dict_operation_lock);
3430
3431 if (ret == DB_STATS_DO_NOT_EXIST) {
3432 ret = DB_SUCCESS;
3433 }
3434
3435 if (ret != DB_SUCCESS) {
3436 snprintf(errstr, errstr_sz,
3437 "Unable to delete statistics for index %s"
3438 " from %s%s: %s. They can be deleted later using"
3439 " DELETE FROM %s WHERE"
3440 " database_name = '%s' AND"
3441 " table_name = '%s' AND"
3442 " index_name = '%s';",
3443 iname,
3444 INDEX_STATS_NAME_PRINT,
3445 (ret == DB_LOCK_WAIT_TIMEOUT
3446 ? " because the rows are locked"
3447 : ""),
3448 ut_strerr(ret),
3449 INDEX_STATS_NAME_PRINT,
3450 db_utf8,
3451 table_utf8,
3452 iname);
3453
3454 ut_print_timestamp(stderr);
3455 fprintf(stderr, " InnoDB: %s\n", errstr);
3456 }
3457
3458 return(ret);
3459}
3460
3461/*********************************************************************//**
3462Executes
3463DELETE FROM mysql.innodb_table_stats
3464WHERE database_name = '...' AND table_name = '...';
3465Creates its own transaction and commits it.
3466@return DB_SUCCESS or error code */
3467UNIV_INLINE
3468dberr_t
3469dict_stats_delete_from_table_stats(
3470/*===============================*/
3471 const char* database_name, /*!< in: database name, e.g. 'db' */
3472 const char* table_name) /*!< in: table name, e.g. 'table' */
3473{
3474 pars_info_t* pinfo;
3475 dberr_t ret;
3476
3477 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3478 ut_ad(mutex_own(&dict_sys->mutex));
3479
3480 pinfo = pars_info_create();
3481
3482 pars_info_add_str_literal(pinfo, "database_name", database_name);
3483 pars_info_add_str_literal(pinfo, "table_name", table_name);
3484
3485 ret = dict_stats_exec_sql(
3486 pinfo,
3487 "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3488 "BEGIN\n"
3489 "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3490 "database_name = :database_name AND\n"
3491 "table_name = :table_name;\n"
3492 "END;\n", NULL);
3493
3494 return(ret);
3495}
3496
3497/*********************************************************************//**
3498Executes
3499DELETE FROM mysql.innodb_index_stats
3500WHERE database_name = '...' AND table_name = '...';
3501Creates its own transaction and commits it.
3502@return DB_SUCCESS or error code */
3503UNIV_INLINE
3504dberr_t
3505dict_stats_delete_from_index_stats(
3506/*===============================*/
3507 const char* database_name, /*!< in: database name, e.g. 'db' */
3508 const char* table_name) /*!< in: table name, e.g. 'table' */
3509{
3510 pars_info_t* pinfo;
3511 dberr_t ret;
3512
3513 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3514 ut_ad(mutex_own(&dict_sys->mutex));
3515
3516 pinfo = pars_info_create();
3517
3518 pars_info_add_str_literal(pinfo, "database_name", database_name);
3519 pars_info_add_str_literal(pinfo, "table_name", table_name);
3520
3521 ret = dict_stats_exec_sql(
3522 pinfo,
3523 "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3524 "BEGIN\n"
3525 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3526 "database_name = :database_name AND\n"
3527 "table_name = :table_name;\n"
3528 "END;\n", NULL);
3529
3530 return(ret);
3531}
3532
3533/*********************************************************************//**
3534Removes the statistics for a table and all of its indexes from the
3535persistent statistics storage if it exists and if there is data stored for
3536the table. This function creates its own transaction and commits it.
3537@return DB_SUCCESS or error code */
3538dberr_t
3539dict_stats_drop_table(
3540/*==================*/
3541 const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
3542 char* errstr, /*!< out: error message
3543 if != DB_SUCCESS is returned */
3544 ulint errstr_sz) /*!< in: size of errstr buffer */
3545{
3546 char db_utf8[MAX_DB_UTF8_LEN];
3547 char table_utf8[MAX_TABLE_UTF8_LEN];
3548 dberr_t ret;
3549
3550 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3551 ut_ad(mutex_own(&dict_sys->mutex));
3552
3553 /* skip tables that do not contain a database name
3554 e.g. if we are dropping SYS_TABLES */
3555 if (strchr(db_and_table, '/') == NULL) {
3556
3557 return(DB_SUCCESS);
3558 }
3559
3560 /* skip innodb_table_stats and innodb_index_stats themselves */
3561 if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3562 || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3563
3564 return(DB_SUCCESS);
3565 }
3566
3567 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3568 table_utf8, sizeof(table_utf8));
3569
3570 ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3571
3572 if (ret == DB_SUCCESS) {
3573 ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3574 }
3575
3576 if (ret == DB_STATS_DO_NOT_EXIST) {
3577 ret = DB_SUCCESS;
3578 }
3579
3580 if (ret != DB_SUCCESS) {
3581
3582 snprintf(errstr, errstr_sz,
3583 "Unable to delete statistics for table %s.%s: %s."
3584 " They can be deleted later using"
3585
3586 " DELETE FROM %s WHERE"
3587 " database_name = '%s' AND"
3588 " table_name = '%s';"
3589
3590 " DELETE FROM %s WHERE"
3591 " database_name = '%s' AND"
3592 " table_name = '%s';",
3593
3594 db_utf8, table_utf8,
3595 ut_strerr(ret),
3596
3597 INDEX_STATS_NAME_PRINT,
3598 db_utf8, table_utf8,
3599
3600 TABLE_STATS_NAME_PRINT,
3601 db_utf8, table_utf8);
3602 }
3603
3604 return(ret);
3605}
3606
3607/*********************************************************************//**
3608Executes
3609UPDATE mysql.innodb_table_stats SET
3610database_name = '...', table_name = '...'
3611WHERE database_name = '...' AND table_name = '...';
3612Creates its own transaction and commits it.
3613@return DB_SUCCESS or error code */
3614UNIV_INLINE
3615dberr_t
3616dict_stats_rename_table_in_table_stats(
3617/*===================================*/
3618 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3619 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3620 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3621 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3622{
3623 pars_info_t* pinfo;
3624 dberr_t ret;
3625
3626 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3627 ut_ad(mutex_own(&dict_sys->mutex));
3628
3629 pinfo = pars_info_create();
3630
3631 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3632 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3633 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3634 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3635
3636 ret = dict_stats_exec_sql(
3637 pinfo,
3638 "PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3639 "BEGIN\n"
3640 "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3641 "database_name = :new_dbname_utf8,\n"
3642 "table_name = :new_tablename_utf8\n"
3643 "WHERE\n"
3644 "database_name = :old_dbname_utf8 AND\n"
3645 "table_name = :old_tablename_utf8;\n"
3646 "END;\n", NULL);
3647
3648 return(ret);
3649}
3650
3651/*********************************************************************//**
3652Executes
3653UPDATE mysql.innodb_index_stats SET
3654database_name = '...', table_name = '...'
3655WHERE database_name = '...' AND table_name = '...';
3656Creates its own transaction and commits it.
3657@return DB_SUCCESS or error code */
3658UNIV_INLINE
3659dberr_t
3660dict_stats_rename_table_in_index_stats(
3661/*===================================*/
3662 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3663 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3664 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3665 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3666{
3667 pars_info_t* pinfo;
3668 dberr_t ret;
3669
3670 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3671 ut_ad(mutex_own(&dict_sys->mutex));
3672
3673 pinfo = pars_info_create();
3674
3675 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3676 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3677 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3678 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3679
3680 ret = dict_stats_exec_sql(
3681 pinfo,
3682 "PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3683 "BEGIN\n"
3684 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3685 "database_name = :new_dbname_utf8,\n"
3686 "table_name = :new_tablename_utf8\n"
3687 "WHERE\n"
3688 "database_name = :old_dbname_utf8 AND\n"
3689 "table_name = :old_tablename_utf8;\n"
3690 "END;\n", NULL);
3691
3692 return(ret);
3693}
3694
3695/*********************************************************************//**
3696Renames a table in InnoDB persistent stats storage.
3697This function creates its own transaction and commits it.
3698@return DB_SUCCESS or error code */
3699dberr_t
3700dict_stats_rename_table(
3701/*====================*/
3702 const char* old_name, /*!< in: old name, e.g. 'db/table' */
3703 const char* new_name, /*!< in: new name, e.g. 'db/table' */
3704 char* errstr, /*!< out: error string if != DB_SUCCESS
3705 is returned */
3706 size_t errstr_sz) /*!< in: errstr size */
3707{
3708 char old_db_utf8[MAX_DB_UTF8_LEN];
3709 char new_db_utf8[MAX_DB_UTF8_LEN];
3710 char old_table_utf8[MAX_TABLE_UTF8_LEN];
3711 char new_table_utf8[MAX_TABLE_UTF8_LEN];
3712 dberr_t ret;
3713
3714 ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
3715 ut_ad(!mutex_own(&dict_sys->mutex));
3716
3717 /* skip innodb_table_stats and innodb_index_stats themselves */
3718 if (strcmp(old_name, TABLE_STATS_NAME) == 0
3719 || strcmp(old_name, INDEX_STATS_NAME) == 0
3720 || strcmp(new_name, TABLE_STATS_NAME) == 0
3721 || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3722
3723 return(DB_SUCCESS);
3724 }
3725
3726 dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3727 old_table_utf8, sizeof(old_table_utf8));
3728
3729 dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3730 new_table_utf8, sizeof(new_table_utf8));
3731
3732 rw_lock_x_lock(dict_operation_lock);
3733 mutex_enter(&dict_sys->mutex);
3734
3735 ulint n_attempts = 0;
3736 do {
3737 n_attempts++;
3738
3739 ret = dict_stats_rename_table_in_table_stats(
3740 old_db_utf8, old_table_utf8,
3741 new_db_utf8, new_table_utf8);
3742
3743 if (ret == DB_DUPLICATE_KEY) {
3744 dict_stats_delete_from_table_stats(
3745 new_db_utf8, new_table_utf8);
3746 }
3747
3748 if (ret == DB_STATS_DO_NOT_EXIST) {
3749 ret = DB_SUCCESS;
3750 }
3751
3752 if (ret != DB_SUCCESS) {
3753 mutex_exit(&dict_sys->mutex);
3754 rw_lock_x_unlock(dict_operation_lock);
3755 os_thread_sleep(200000 /* 0.2 sec */);
3756 rw_lock_x_lock(dict_operation_lock);
3757 mutex_enter(&dict_sys->mutex);
3758 }
3759 } while ((ret == DB_DEADLOCK
3760 || ret == DB_DUPLICATE_KEY
3761 || ret == DB_LOCK_WAIT_TIMEOUT)
3762 && n_attempts < 5);
3763
3764 if (ret != DB_SUCCESS) {
3765 snprintf(errstr, errstr_sz,
3766 "Unable to rename statistics from"
3767 " %s.%s to %s.%s in %s: %s."
3768 " They can be renamed later using"
3769
3770 " UPDATE %s SET"
3771 " database_name = '%s',"
3772 " table_name = '%s'"
3773 " WHERE"
3774 " database_name = '%s' AND"
3775 " table_name = '%s';",
3776
3777 old_db_utf8, old_table_utf8,
3778 new_db_utf8, new_table_utf8,
3779 TABLE_STATS_NAME_PRINT,
3780 ut_strerr(ret),
3781
3782 TABLE_STATS_NAME_PRINT,
3783 new_db_utf8, new_table_utf8,
3784 old_db_utf8, old_table_utf8);
3785 mutex_exit(&dict_sys->mutex);
3786 rw_lock_x_unlock(dict_operation_lock);
3787 return(ret);
3788 }
3789 /* else */
3790
3791 n_attempts = 0;
3792 do {
3793 n_attempts++;
3794
3795 ret = dict_stats_rename_table_in_index_stats(
3796 old_db_utf8, old_table_utf8,
3797 new_db_utf8, new_table_utf8);
3798
3799 if (ret == DB_DUPLICATE_KEY) {
3800 dict_stats_delete_from_index_stats(
3801 new_db_utf8, new_table_utf8);
3802 }
3803
3804 if (ret == DB_STATS_DO_NOT_EXIST) {
3805 ret = DB_SUCCESS;
3806 }
3807
3808 if (ret != DB_SUCCESS) {
3809 mutex_exit(&dict_sys->mutex);
3810 rw_lock_x_unlock(dict_operation_lock);
3811 os_thread_sleep(200000 /* 0.2 sec */);
3812 rw_lock_x_lock(dict_operation_lock);
3813 mutex_enter(&dict_sys->mutex);
3814 }
3815 } while ((ret == DB_DEADLOCK
3816 || ret == DB_DUPLICATE_KEY
3817 || ret == DB_LOCK_WAIT_TIMEOUT)
3818 && n_attempts < 5);
3819
3820 mutex_exit(&dict_sys->mutex);
3821 rw_lock_x_unlock(dict_operation_lock);
3822
3823 if (ret != DB_SUCCESS) {
3824 snprintf(errstr, errstr_sz,
3825 "Unable to rename statistics from"
3826 " %s.%s to %s.%s in %s: %s."
3827 " They can be renamed later using"
3828
3829 " UPDATE %s SET"
3830 " database_name = '%s',"
3831 " table_name = '%s'"
3832 " WHERE"
3833 " database_name = '%s' AND"
3834 " table_name = '%s';",
3835
3836 old_db_utf8, old_table_utf8,
3837 new_db_utf8, new_table_utf8,
3838 INDEX_STATS_NAME_PRINT,
3839 ut_strerr(ret),
3840
3841 INDEX_STATS_NAME_PRINT,
3842 new_db_utf8, new_table_utf8,
3843 old_db_utf8, old_table_utf8);
3844 }
3845
3846 return(ret);
3847}
3848
3849#ifdef MYSQL_RENAME_INDEX
3850/*********************************************************************//**
3851Renames an index in InnoDB persistent stats storage.
3852This function creates its own transaction and commits it.
3853@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3854if the persistent stats do not exist. */
3855dberr_t
3856dict_stats_rename_index(
3857/*====================*/
3858 const dict_table_t* table, /*!< in: table whose index
3859 is renamed */
3860 const char* old_index_name, /*!< in: old index name */
3861 const char* new_index_name) /*!< in: new index name */
3862{
3863 rw_lock_x_lock(dict_operation_lock);
3864 mutex_enter(&dict_sys->mutex);
3865
3866 if (!dict_stats_persistent_storage_check(true)) {
3867 mutex_exit(&dict_sys->mutex);
3868 rw_lock_x_unlock(dict_operation_lock);
3869 return(DB_STATS_DO_NOT_EXIST);
3870 }
3871
3872 char dbname_utf8[MAX_DB_UTF8_LEN];
3873 char tablename_utf8[MAX_TABLE_UTF8_LEN];
3874
3875 dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3876 tablename_utf8, sizeof(tablename_utf8));
3877
3878 pars_info_t* pinfo;
3879
3880 pinfo = pars_info_create();
3881
3882 pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3883 pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3884 pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3885 pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3886
3887 dberr_t ret;
3888
3889 ret = dict_stats_exec_sql(
3890 pinfo,
3891 "PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3892 "BEGIN\n"
3893 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3894 "index_name = :new_index_name\n"
3895 "WHERE\n"
3896 "database_name = :dbname_utf8 AND\n"
3897 "table_name = :tablename_utf8 AND\n"
3898 "index_name = :old_index_name;\n"
3899 "END;\n", NULL);
3900
3901 mutex_exit(&dict_sys->mutex);
3902 rw_lock_x_unlock(dict_operation_lock);
3903
3904 return(ret);
3905}
3906#endif /* MYSQL_RENAME_INDEX */
3907
3908/* tests @{ */
3909#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3910
3911/* The following unit tests test some of the functions in this file
3912individually, such testing cannot be performed by the mysql-test framework
3913via SQL. */
3914
3915/* test_dict_table_schema_check() @{ */
3916void
3917test_dict_table_schema_check()
3918{
3919 /*
3920 CREATE TABLE tcheck (
3921 c01 VARCHAR(123),
3922 c02 INT,
3923 c03 INT NOT NULL,
3924 c04 INT UNSIGNED,
3925 c05 BIGINT,
3926 c06 BIGINT UNSIGNED NOT NULL,
3927 c07 TIMESTAMP
3928 ) ENGINE=INNODB;
3929 */
3930 /* definition for the table 'test/tcheck' */
3931 dict_col_meta_t columns[] = {
3932 {"c01", DATA_VARCHAR, 0, 123},
3933 {"c02", DATA_INT, 0, 4},
3934 {"c03", DATA_INT, DATA_NOT_NULL, 4},
3935 {"c04", DATA_INT, DATA_UNSIGNED, 4},
3936 {"c05", DATA_INT, 0, 8},
3937 {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3938 {"c07", DATA_INT, 0, 4},
3939 {"c_extra", DATA_INT, 0, 4}
3940 };
3941 dict_table_schema_t schema = {
3942 "test/tcheck",
3943 0 /* will be set individually for each test below */,
3944 columns
3945 };
3946 char errstr[512];
3947
3948 snprintf(errstr, sizeof(errstr), "Table not found");
3949
3950 /* prevent any data dictionary modifications while we are checking
3951 the tables' structure */
3952
3953 mutex_enter(&dict_sys->mutex);
3954
3955 /* check that a valid table is reported as valid */
3956 schema.n_cols = 7;
3957 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3958 == DB_SUCCESS) {
3959 printf("OK: test.tcheck ok\n");
3960 } else {
3961 printf("ERROR: %s\n", errstr);
3962 printf("ERROR: test.tcheck not present or corrupted\n");
3963 goto test_dict_table_schema_check_end;
3964 }
3965
3966 /* check columns with wrong length */
3967 schema.columns[1].len = 8;
3968 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3969 != DB_SUCCESS) {
3970 printf("OK: test.tcheck.c02 has different length and is"
3971 " reported as corrupted\n");
3972 } else {
3973 printf("OK: test.tcheck.c02 has different length but is"
3974 " reported as ok\n");
3975 goto test_dict_table_schema_check_end;
3976 }
3977 schema.columns[1].len = 4;
3978
3979 /* request that c02 is NOT NULL while actually it does not have
3980 this flag set */
3981 schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3982 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3983 != DB_SUCCESS) {
3984 printf("OK: test.tcheck.c02 does not have NOT NULL while"
3985 " it should and is reported as corrupted\n");
3986 } else {
3987 printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3988 " it should and is not reported as corrupted\n");
3989 goto test_dict_table_schema_check_end;
3990 }
3991 schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3992
3993 /* check a table that contains some extra columns */
3994 schema.n_cols = 6;
3995 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3996 == DB_SUCCESS) {
3997 printf("ERROR: test.tcheck has more columns but is not"
3998 " reported as corrupted\n");
3999 goto test_dict_table_schema_check_end;
4000 } else {
4001 printf("OK: test.tcheck has more columns and is"
4002 " reported as corrupted\n");
4003 }
4004
4005 /* check a table that has some columns missing */
4006 schema.n_cols = 8;
4007 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4008 != DB_SUCCESS) {
4009 printf("OK: test.tcheck has missing columns and is"
4010 " reported as corrupted\n");
4011 } else {
4012 printf("ERROR: test.tcheck has missing columns but is"
4013 " reported as ok\n");
4014 goto test_dict_table_schema_check_end;
4015 }
4016
4017 /* check non-existent table */
4018 schema.table_name = "test/tcheck_nonexistent";
4019 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4020 != DB_SUCCESS) {
4021 printf("OK: test.tcheck_nonexistent is not present\n");
4022 } else {
4023 printf("ERROR: test.tcheck_nonexistent is present!?\n");
4024 goto test_dict_table_schema_check_end;
4025 }
4026
4027test_dict_table_schema_check_end:
4028
4029 mutex_exit(&dict_sys->mutex);
4030}
4031/* @} */
4032
4033/* save/fetch aux macros @{ */
4034#define TEST_DATABASE_NAME "foobardb"
4035#define TEST_TABLE_NAME "test_dict_stats"
4036
4037#define TEST_N_ROWS 111
4038#define TEST_CLUSTERED_INDEX_SIZE 222
4039#define TEST_SUM_OF_OTHER_INDEX_SIZES 333
4040
4041#define TEST_IDX1_NAME "tidx1"
4042#define TEST_IDX1_COL1_NAME "tidx1_col1"
4043#define TEST_IDX1_INDEX_SIZE 123
4044#define TEST_IDX1_N_LEAF_PAGES 234
4045#define TEST_IDX1_N_DIFF1 50
4046#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
4047
4048#define TEST_IDX2_NAME "tidx2"
4049#define TEST_IDX2_COL1_NAME "tidx2_col1"
4050#define TEST_IDX2_COL2_NAME "tidx2_col2"
4051#define TEST_IDX2_COL3_NAME "tidx2_col3"
4052#define TEST_IDX2_COL4_NAME "tidx2_col4"
4053#define TEST_IDX2_INDEX_SIZE 321
4054#define TEST_IDX2_N_LEAF_PAGES 432
4055#define TEST_IDX2_N_DIFF1 60
4056#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
4057#define TEST_IDX2_N_DIFF2 61
4058#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
4059#define TEST_IDX2_N_DIFF3 62
4060#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
4061#define TEST_IDX2_N_DIFF4 63
4062#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
4063/* @} */
4064
4065/* test_dict_stats_save() @{ */
4066void
4067test_dict_stats_save()
4068{
4069 dict_table_t table;
4070 dict_index_t index1;
4071 dict_field_t index1_fields[1];
4072 ib_uint64_t index1_stat_n_diff_key_vals[1];
4073 ib_uint64_t index1_stat_n_sample_sizes[1];
4074 dict_index_t index2;
4075 dict_field_t index2_fields[4];
4076 ib_uint64_t index2_stat_n_diff_key_vals[4];
4077 ib_uint64_t index2_stat_n_sample_sizes[4];
4078 dberr_t ret;
4079
4080 /* craft a dummy dict_table_t */
4081 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4082 table.stat_n_rows = TEST_N_ROWS;
4083 table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4084 table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4085 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4086 UT_LIST_ADD_LAST(table.indexes, &index1);
4087 UT_LIST_ADD_LAST(table.indexes, &index2);
4088 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4089 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4090
4091 index1.name = TEST_IDX1_NAME;
4092 index1.table = &table;
4093 index1.cached = 1;
4094 index1.n_uniq = 1;
4095 index1.fields = index1_fields;
4096 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4097 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4098 index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4099 index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4100 index1_fields[0].name = TEST_IDX1_COL1_NAME;
4101 index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4102 index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4103
4104 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4105 index2.name = TEST_IDX2_NAME;
4106 index2.table = &table;
4107 index2.cached = 1;
4108 index2.n_uniq = 4;
4109 index2.fields = index2_fields;
4110 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4111 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4112 index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4113 index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4114 index2_fields[0].name = TEST_IDX2_COL1_NAME;
4115 index2_fields[1].name = TEST_IDX2_COL2_NAME;
4116 index2_fields[2].name = TEST_IDX2_COL3_NAME;
4117 index2_fields[3].name = TEST_IDX2_COL4_NAME;
4118 index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4119 index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4120 index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4121 index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4122 index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4123 index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4124 index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4125 index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4126
4127 ret = dict_stats_save(&table, NULL);
4128
4129 ut_a(ret == DB_SUCCESS);
4130
4131 printf("\nOK: stats saved successfully, now go ahead and read"
4132 " what's inside %s and %s:\n\n",
4133 TABLE_STATS_NAME_PRINT,
4134 INDEX_STATS_NAME_PRINT);
4135
4136 printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4137 "FROM %s\n"
4138 "WHERE\n"
4139 "database_name = '%s' AND\n"
4140 "table_name = '%s' AND\n"
4141 "n_rows = %d AND\n"
4142 "clustered_index_size = %d AND\n"
4143 "sum_of_other_index_sizes = %d;\n"
4144 "\n",
4145 TABLE_STATS_NAME_PRINT,
4146 TEST_DATABASE_NAME,
4147 TEST_TABLE_NAME,
4148 TEST_N_ROWS,
4149 TEST_CLUSTERED_INDEX_SIZE,
4150 TEST_SUM_OF_OTHER_INDEX_SIZES);
4151
4152 printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4153 "FROM %s\n"
4154 "WHERE\n"
4155 "database_name = '%s' AND\n"
4156 "table_name = '%s' AND\n"
4157 "index_name = '%s' AND\n"
4158 "(\n"
4159 " (stat_name = 'size' AND stat_value = %d AND"
4160 " sample_size IS NULL) OR\n"
4161 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4162 " sample_size IS NULL) OR\n"
4163 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4164 " sample_size = '%d' AND stat_description = '%s')\n"
4165 ");\n"
4166 "\n",
4167 INDEX_STATS_NAME_PRINT,
4168 TEST_DATABASE_NAME,
4169 TEST_TABLE_NAME,
4170 TEST_IDX1_NAME,
4171 TEST_IDX1_INDEX_SIZE,
4172 TEST_IDX1_N_LEAF_PAGES,
4173 TEST_IDX1_N_DIFF1,
4174 TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4175 TEST_IDX1_COL1_NAME);
4176
4177 printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4178 "FROM %s\n"
4179 "WHERE\n"
4180 "database_name = '%s' AND\n"
4181 "table_name = '%s' AND\n"
4182 "index_name = '%s' AND\n"
4183 "(\n"
4184 " (stat_name = 'size' AND stat_value = %d AND"
4185 " sample_size IS NULL) OR\n"
4186 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4187 " sample_size IS NULL) OR\n"
4188 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4189 " sample_size = '%d' AND stat_description = '%s') OR\n"
4190 " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4191 " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4192 " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4193 " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4194 " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4195 " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4196 ");\n"
4197 "\n",
4198 INDEX_STATS_NAME_PRINT,
4199 TEST_DATABASE_NAME,
4200 TEST_TABLE_NAME,
4201 TEST_IDX2_NAME,
4202 TEST_IDX2_INDEX_SIZE,
4203 TEST_IDX2_N_LEAF_PAGES,
4204 TEST_IDX2_N_DIFF1,
4205 TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4206 TEST_IDX2_N_DIFF2,
4207 TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4208 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4209 TEST_IDX2_N_DIFF3,
4210 TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4211 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4212 TEST_IDX2_N_DIFF4,
4213 TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4214 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4215 TEST_IDX2_COL4_NAME);
4216}
4217/* @} */
4218
4219/* test_dict_stats_fetch_from_ps() @{ */
4220void
4221test_dict_stats_fetch_from_ps()
4222{
4223 dict_table_t table;
4224 dict_index_t index1;
4225 ib_uint64_t index1_stat_n_diff_key_vals[1];
4226 ib_uint64_t index1_stat_n_sample_sizes[1];
4227 dict_index_t index2;
4228 ib_uint64_t index2_stat_n_diff_key_vals[4];
4229 ib_uint64_t index2_stat_n_sample_sizes[4];
4230 dberr_t ret;
4231
4232 /* craft a dummy dict_table_t */
4233 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4234 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4235 UT_LIST_ADD_LAST(table.indexes, &index1);
4236 UT_LIST_ADD_LAST(table.indexes, &index2);
4237 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4238
4239 index1.name = TEST_IDX1_NAME;
4240 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4241 index1.cached = 1;
4242 index1.n_uniq = 1;
4243 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4244 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4245
4246 index2.name = TEST_IDX2_NAME;
4247 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4248 index2.cached = 1;
4249 index2.n_uniq = 4;
4250 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4251 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4252
4253 ret = dict_stats_fetch_from_ps(&table);
4254
4255 ut_a(ret == DB_SUCCESS);
4256
4257 ut_a(table.stat_n_rows == TEST_N_ROWS);
4258 ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4259 ut_a(table.stat_sum_of_other_index_sizes
4260 == TEST_SUM_OF_OTHER_INDEX_SIZES);
4261
4262 ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4263 ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4264 ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4265 ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4266
4267 ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4268 ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4269 ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4270 ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4271 ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4272 ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4273 ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4274 ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4275 ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4276 ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4277
4278 printf("OK: fetch successful\n");
4279}
4280/* @} */
4281
4282/* test_dict_stats_all() @{ */
4283void
4284test_dict_stats_all()
4285{
4286 test_dict_table_schema_check();
4287
4288 test_dict_stats_save();
4289
4290 test_dict_stats_fetch_from_ps();
4291}
4292/* @} */
4293
4294#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4295/* @} */
4296