dict0stats.cc source code [MariaDB/storage/innobase/dict/dict0stats.cc]

1	/*****************************************************************************
2
3	Copyright (c) 2009, 2017, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2015, 2018, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file dict/dict0stats.cc
22	Code used for calculating and manipulating table statistics.
23
24	Created Jan 06, 2010 Vasil Dimov
25	*******************************************************/
26
27	#include "univ.i"
28
29	#include "ut0ut.h"
30	#include "ut0rnd.h"
31	#include "dyn0buf.h"
32	#include "row0sel.h"
33	#include "trx0trx.h"
34	#include "pars0pars.h"
35	#include "dict0stats.h"
36	#include "ha_prototypes.h"
37	#include "ut0new.h"
38	#include <mysql_com.h>
39	#include "btr0btr.h"
40
41	#include <algorithm>
42	#include <map>
43	#include <vector>
44
45	/ Sampling algorithm description @{*
46
47	The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
48	let it be A, which is the number of leaf pages to analyze for a given index
49	for each n-prefix (if the index is on 3 columns, then 3A leaf pages will be*
50	analyzed).
51
52	Let the total number of leaf pages in the table be T.
53	Level 0 - leaf pages, level H - root.
54
55	Definition: N-prefix-boring record is a record on a non-leaf page that equals
56	the next (to the right, cross page boundaries, skipping the supremum and
57	infimum) record on the same level when looking at the fist n-prefix columns.
58	The last (user) record on a level is not boring (it does not match the
59	non-existent user record to the right). We call the records boring because all
60	the records on the page below a boring record are equal to that boring record.
61
62	We avoid diving below boring records when searching for a leaf page to
63	estimate the number of distinct records because we know that such a leaf
64	page will have number of distinct records == 1.
65
66	For each n-prefix: start from the root level and full scan subsequent lower
67	levels until a level that contains at least A10 distinct records is found.*
68	Lets call this level LA.
69	As an optimization the search is canceled if it has reached level 1 (never
70	descend to the level 0 (leaf)) and also if the next level to be scanned
71	would contain more than A pages. The latter is because the user has asked
72	to analyze A leaf pages and it does not make sense to scan much more than
73	A non-leaf pages with the sole purpose of finding a good sample of A leaf
74	pages.
75
76	After finding the appropriate level LA with >A10 distinct records (or less in*
77	the exceptions described above), divide it into groups of equal records and
78	pick A such groups. Then pick the last record from each group. For example,
79	let the level be:
80
81	index: 0,1,2,3,4,5,6,7,8,9,10
82	record: 1,1,1,2,2,7,7,7,7,7,9
83
84	There are 4 groups of distinct records and if A=2 random ones are selected,
85	e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
86
87	After selecting A records as described above, dive below them to find A leaf
88	pages and analyze them, finding the total number of distinct records. The
89	dive to the leaf level is performed by selecting a non-boring record from
90	each page and diving below it.
91
92	This way, a total of A leaf pages are analyzed for the given n-prefix.
93
94	Let the number of different key values found in each leaf page i be Pi (i=1..A).
95	Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
96	Let the number of different key values on level LA be N_DIFF_LA.
97	Let the total number of records on level LA be TOTAL_LA.
98	Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
99	leaf level.
100	Let the number of leaf pages be N.
101	Then the total number of different key values on the leaf level is:
102	N R * N_DIFF_AVG_LEAF.*
103	See REF01 for the implementation.
104
105	The above describes how to calculate the cardinality of an index.
106	This algorithm is executed for each n-prefix of a multi-column index
107	where n=1..n_uniq.
108	@} /*
109
110	/ names of the tables from the persistent statistics storage /
111	#define TABLE_STATS_NAME "mysql/innodb_table_stats"
112	#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
113	#define INDEX_STATS_NAME "mysql/innodb_index_stats"
114	#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
115
116	#ifdef UNIV_STATS_DEBUG
117	#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
118	#else /* UNIV_STATS_DEBUG */
119	#define DEBUG_PRINTF(fmt, ...) /* noop */
120	#endif /* UNIV_STATS_DEBUG */
121
122	/ Gets the number of leaf pages to sample in persistent stats estimation /
123	#define N_SAMPLE_PAGES(index) \
124	static_cast<ib_uint64_t>( \
125	(index)->table->stats_sample_pages != 0 \
126	? (index)->table->stats_sample_pages \
127	: srv_stats_persistent_sample_pages)
128
129	/ number of distinct records on a given level that are required to stop*
130	descending to lower levels and fetch N_SAMPLE_PAGES(index) records
131	from that level /*
132	#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
133
134	/ A dynamic array where we store the boundaries of each distinct group*
135	of keys. For example if a btree level is:
136	index: 0,1,2,3,4,5,6,7,8,9,10,11,12
137	data: b,b,b,b,b,b,g,g,j,j,j, x, y
138	then we would store 5,7,10,11,12 in the array. /*
139	typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
140
141	/* Allocator type used for index_map_t. /
142	typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
143	index_map_t_allocator;
144
145	/* Auxiliary map used for sorting indexes by name in dict_stats_save(). /
146	typedef std::map<const char, dict_index_t, ut_strcmp_functor,
147	index_map_t_allocator> index_map_t;
148
149	/*******************************************************************//**
150	Checks whether an index should be ignored in stats manipulations:
151	* stats fetch
152	* stats recalc
153	* stats save
154	@return true if exists and all tables are ok /*
155	UNIV_INLINE
156	bool
157	dict_stats_should_ignore_index(
158	/===========================/
159	const dict_index_t* index) /!< in: index /
160	{
161	return((index->type & (DICT_FTS \| DICT_SPATIAL))
162	\|\| index->is_corrupted()
163	\|\| index->to_be_dropped
164	\|\| !index->is_committed());
165	}
166
167	/*******************************************************************//**
168	Checks whether the persistent statistics storage exists and that all
169	tables have the proper structure.
170	@return true if exists and all tables are ok /*
171	static
172	bool
173	dict_stats_persistent_storage_check(
174	/================================/
175	bool caller_has_dict_sys_mutex) /!< in: true if the caller*
176	owns dict_sys->mutex /*
177	{
178	/ definition for the table TABLE_STATS_NAME /
179	dict_col_meta_t table_stats_columns[] = {
180	{"database_name", DATA_VARMYSQL,
181	DATA_NOT_NULL, `192`},
182
183	{"table_name", DATA_VARMYSQL,
184	DATA_NOT_NULL, `192`},
185
186	{"last_update", DATA_FIXBINARY,
187	DATA_NOT_NULL, `4`},
188
189	{"n_rows", DATA_INT,
190	DATA_NOT_NULL \| DATA_UNSIGNED, `8`},
191
192	{"clustered_index_size", DATA_INT,
193	DATA_NOT_NULL \| DATA_UNSIGNED, `8`},
194
195	{"sum_of_other_index_sizes", DATA_INT,
196	DATA_NOT_NULL \| DATA_UNSIGNED, `8`}
197	};
198	dict_table_schema_t table_stats_schema = {
199	TABLE_STATS_NAME,
200	UT_ARR_SIZE(table_stats_columns),
201	table_stats_columns,
202	`0` / n_foreign /,
203	`0` / n_referenced /
204	};
205
206	/ definition for the table INDEX_STATS_NAME /
207	dict_col_meta_t index_stats_columns[] = {
208	{"database_name", DATA_VARMYSQL,
209	DATA_NOT_NULL, `192`},
210
211	{"table_name", DATA_VARMYSQL,
212	DATA_NOT_NULL, `192`},
213
214	{"index_name", DATA_VARMYSQL,
215	DATA_NOT_NULL, `192`},
216
217	{"last_update", DATA_FIXBINARY,
218	DATA_NOT_NULL, `4`},
219
220	{"stat_name", DATA_VARMYSQL,
221	DATA_NOT_NULL, `64`*`3`},
222
223	{"stat_value", DATA_INT,
224	DATA_NOT_NULL \| DATA_UNSIGNED, `8`},
225
226	{"sample_size", DATA_INT,
227	DATA_UNSIGNED, `8`},
228
229	{"stat_description", DATA_VARMYSQL,
230	DATA_NOT_NULL, `1024`*`3`}
231	};
232	dict_table_schema_t index_stats_schema = {
233	INDEX_STATS_NAME,
234	UT_ARR_SIZE(index_stats_columns),
235	index_stats_columns,
236	`0` / n_foreign /,
237	`0` / n_referenced /
238	};
239
240	char errstr[`512`];
241	dberr_t ret;
242
243	if (!caller_has_dict_sys_mutex) {
244	mutex_enter(&dict_sys->mutex);
245	}
246
247	ut_ad(mutex_own(&dict_sys->mutex));
248
249	/ first check table_stats /
250	ret = dict_table_schema_check(&table_stats_schema, errstr,
251	sizeof(errstr));
252	if (ret == DB_SUCCESS) {
253	/ if it is ok, then check index_stats /
254	ret = dict_table_schema_check(&index_stats_schema, errstr,
255	sizeof(errstr));
256	}
257
258	if (!caller_has_dict_sys_mutex) {
259	mutex_exit(&dict_sys->mutex);
260	}
261
262	if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
263	ib::error () << errstr;
264	return(false);
265	} else if (ret == DB_STATS_DO_NOT_EXIST) {
266	return false;
267	}
268	/ else /
269
270	return(true);
271	}
272
273	/* Executes a given SQL statement using the InnoDB internal SQL parser.*
274	This function will free the pinfo object.
275	@param[in,out] pinfo pinfo to pass to que_eval_sql() must already
276	have any literals bound to it
277	@param[in] sql SQL string to execute
278	@param[in,out] trx in case of NULL the function will allocate and
279	free the trx object. If it is not NULL then it will be rolled back
280	only in the case of error, but not freed.
281	@return DB_SUCCESS or error code /*
282	static
283	dberr_t
284	dict_stats_exec_sql(
285	pars_info_t* pinfo,
286	const char* sql,
287	trx_t* trx)
288	{
289	dberr_t err;
290	bool trx_started = false;
291
292	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
293	ut_ad(mutex_own(&dict_sys->mutex));
294
295	if (!dict_stats_persistent_storage_check(true)) {
296	pars_info_free(pinfo);
297	return(DB_STATS_DO_NOT_EXIST);
298	}
299
300	if (trx == NULL) {
301	trx = trx_create();
302	trx_started = true;
303
304	if (srv_read_only_mode) {
305	trx_start_internal_read_only(trx);
306	} else {
307	trx_start_internal(trx);
308	}
309	}
310
311	err = que_eval_sql(pinfo, sql, FALSE, trx); / pinfo is freed here /
312
313	DBUG_EXECUTE_IF("stats_index_error",
314	if (!trx_started) {
315	err = DB_STATS_DO_NOT_EXIST;
316	trx->error_state = DB_STATS_DO_NOT_EXIST;
317	});
318
319	if (!trx_started && err == DB_SUCCESS) {
320	return(DB_SUCCESS);
321	}
322
323	if (err == DB_SUCCESS) {
324	trx_commit_for_mysql(trx);
325	} else {
326	trx->op_info = "rollback of internal trx on stats tables";
327	trx->dict_operation_lock_mode = RW_X_LATCH;
328	trx_rollback_to_savepoint(trx, NULL);
329	trx->dict_operation_lock_mode = `0`;
330	trx->op_info = "";
331	ut_a(trx->error_state == DB_SUCCESS);
332	}
333
334	if (trx_started) {
335	trx_free(trx);
336	}
337
338	return(err);
339	}
340
341	/*******************************************************************//**
342	Duplicate a table object and its indexes.
343	This function creates a dummy dict_table_t object and initializes the
344	following table and index members:
345	dict_table_t::id (copied)
346	dict_table_t::heap (newly created)
347	dict_table_t::name (copied)
348	dict_table_t::corrupted (copied)
349	dict_table_t::indexes<> (newly created)
350	dict_table_t::magic_n
351	for each entry in dict_table_t::indexes, the following are initialized:
352	(indexes that have DICT_FTS set in index->type are skipped)
353	dict_index_t::id (copied)
354	dict_index_t::name (copied)
355	dict_index_t::table_name (points to the copied table name)
356	dict_index_t::table (points to the above semi-initialized object)
357	dict_index_t::type (copied)
358	dict_index_t::to_be_dropped (copied)
359	dict_index_t::online_status (copied)
360	dict_index_t::n_uniq (copied)
361	dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
362	dict_index_t::indexes<> (newly created)
363	dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
364	dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
365	dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
366	dict_index_t::magic_n
367	The returned object should be freed with dict_stats_table_clone_free()
368	when no longer needed.
369	@return incomplete table object /*
370	static
371	dict_table_t*
372	dict_stats_table_clone_create(
373	/==========================/
374	const dict_table_t* table) /!< in: table whose stats to copy /
375	{
376	size_t heap_size;
377	dict_index_t* index;
378
379	/ Estimate the size needed for the table and all of its indexes /
380
381	heap_size = `0`;
382	heap_size += sizeof(dict_table_t);
383	heap_size += strlen(table->name.m_name) + `1`;
384
385	for (index = dict_table_get_first_index(table);
386	index != NULL;
387	index = dict_table_get_next_index(index)) {
388
389	if (dict_stats_should_ignore_index(index)) {
390	continue;
391	}
392
393	ut_ad(!dict_index_is_ibuf(index));
394
395	ulint n_uniq = dict_index_get_n_unique(index);
396
397	heap_size += sizeof(dict_index_t);
398	heap_size += strlen(index->name) + `1`;
399	heap_size += n_uniq * sizeof(index->fields[`0`]);
400	for (ulint i = `0`; i < n_uniq; i++) {
401	heap_size += strlen(index->fields[i].name) + `1`;
402	}
403	heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[`0`]);
404	heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[`0`]);
405	heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[`0`]);
406	}
407
408	/ Allocate the memory and copy the members /
409
410	mem_heap_t* heap;
411
412	heap = mem_heap_create(heap_size);
413
414	dict_table_t* t;
415
416	t = (dict_table_t) mem_heap_alloc(heap, sizeof(t));
417
418	UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
419	t->id = table->id;
420
421	t->heap = heap;
422
423	t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
424
425	t->corrupted = table->corrupted;
426
427	/ This private object "t" is not shared with other threads, so*
428	we do not need the stats_latch (thus we pass false below). The
429	dict_table_stats_lock()/unlock() routines will do nothing. /*
430	dict_table_stats_latch_create(t, false);
431
432	UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
433
434	for (index = dict_table_get_first_index(table);
435	index != NULL;
436	index = dict_table_get_next_index(index)) {
437
438	if (dict_stats_should_ignore_index(index)) {
439	continue;
440	}
441
442	ut_ad(!dict_index_is_ibuf(index));
443
444	dict_index_t* idx;
445
446	idx = (dict_index_t) mem_heap_alloc(heap, sizeof(idx));
447
448	UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
449	idx->id = index->id;
450
451	idx->name = mem_heap_strdup(heap, index->name);
452
453	idx->table = t;
454
455	idx->type = index->type;
456
457	idx->to_be_dropped = `0`;
458
459	idx->online_status = ONLINE_INDEX_COMPLETE;
460	idx->set_committed(true);
461
462	idx->n_uniq = index->n_uniq;
463
464	idx->fields = (dict_field_t*) mem_heap_alloc(
465	heap, idx->n_uniq * sizeof(idx->fields[`0`]));
466
467	for (ulint i = `0`; i < idx->n_uniq; i++) {
468	idx->fields[i].name = mem_heap_strdup(
469	heap, index->fields[i].name);
470	}
471
472	/ hook idx into t->indexes /
473	UT_LIST_ADD_LAST(t->indexes, idx);
474
475	idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
476	heap,
477	idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[`0`]));
478
479	idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
480	heap,
481	idx->n_uniq * sizeof(idx->stat_n_sample_sizes[`0`]));
482
483	idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
484	heap,
485	idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[`0`]));
486	ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
487
488	idx->stat_defrag_n_page_split = `0`;
489	idx->stat_defrag_n_pages_freed = `0`;
490	}
491
492	ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
493
494	return(t);
495	}
496
497	/*******************************************************************//**
498	Free the resources occupied by an object returned by
499	dict_stats_table_clone_create(). /*
500	static
501	void
502	dict_stats_table_clone_free(
503	/========================/
504	dict_table_t* t) /!< in: dummy table object to free /
505	{
506	dict_table_stats_latch_destroy(t);
507	mem_heap_free(t->heap);
508	}
509
510	/*******************************************************************//**
511	Write all zeros (or 1 where it makes sense) into an index
512	statistics members. The resulting stats correspond to an empty index.
513	The caller must own index's table stats latch in X mode
514	(dict_table_stats_lock(table, RW_X_LATCH)) /*
515	static
516	void
517	dict_stats_empty_index(
518	/===================/
519	dict_index_t* index, /!< in/out: index /
520	bool empty_defrag_stats)
521	/!< in: whether to empty defrag stats /
522	{
523	ut_ad(!(index->type & DICT_FTS));
524	ut_ad(!dict_index_is_ibuf(index));
525
526	ulint n_uniq = index->n_uniq;
527
528	for (ulint i = `0`; i < n_uniq; i++) {
529	index->stat_n_diff_key_vals[i] = `0`;
530	index->stat_n_sample_sizes[i] = `1`;
531	index->stat_n_non_null_key_vals[i] = `0`;
532	}
533
534	index->stat_index_size = `1`;
535	index->stat_n_leaf_pages = `1`;
536
537	if (empty_defrag_stats) {
538	dict_stats_empty_defrag_stats(index);
539	dict_stats_empty_defrag_summary(index);
540	}
541	}
542
543	/*******************************************************************//**
544	Write all zeros (or 1 where it makes sense) into a table and its indexes'
545	statistics members. The resulting stats correspond to an empty table. /*
546	static
547	void
548	dict_stats_empty_table(
549	/===================/
550	dict_table_t* table, /!< in/out: table /
551	bool empty_defrag_stats)
552	/!< in: whether to empty defrag stats /
553	{
554	/ Zero the stats members /
555
556	dict_table_stats_lock(table, RW_X_LATCH);
557
558	table->stat_n_rows = `0`;
559	table->stat_clustered_index_size = `1`;
560	/ 1 page for each index, not counting the clustered /
561	table->stat_sum_of_other_index_sizes
562	= UT_LIST_GET_LEN(table->indexes) - `1`;
563	table->stat_modified_counter = `0`;
564
565	dict_index_t* index;
566
567	for (index = dict_table_get_first_index(table);
568	index != NULL;
569	index = dict_table_get_next_index(index)) {
570
571	if (index->type & DICT_FTS) {
572	continue;
573	}
574
575	ut_ad(!dict_index_is_ibuf(index));
576
577	dict_stats_empty_index(index, empty_defrag_stats);
578	}
579
580	table->stat_initialized = TRUE;
581
582	dict_table_stats_unlock(table, RW_X_LATCH);
583	}
584
585	/*******************************************************************//**
586	Check whether index's stats are initialized (assert if they are not). /*
587	static
588	void
589	dict_stats_assert_initialized_index(
590	/================================/
591	const dict_index_t* index) /!< in: index /
592	{
593	UNIV_MEM_ASSERT_RW_ABORT(
594	index->stat_n_diff_key_vals,
595	index->n_uniq * sizeof(index->stat_n_diff_key_vals[`0`]));
596
597	UNIV_MEM_ASSERT_RW_ABORT(
598	index->stat_n_sample_sizes,
599	index->n_uniq * sizeof(index->stat_n_sample_sizes[`0`]));
600
601	UNIV_MEM_ASSERT_RW_ABORT(
602	index->stat_n_non_null_key_vals,
603	index->n_uniq * sizeof(index->stat_n_non_null_key_vals[`0`]));
604
605	UNIV_MEM_ASSERT_RW_ABORT(
606	&index->stat_index_size,
607	sizeof(index->stat_index_size));
608
609	UNIV_MEM_ASSERT_RW_ABORT(
610	&index->stat_n_leaf_pages,
611	sizeof(index->stat_n_leaf_pages));
612	}
613
614	/*******************************************************************//**
615	Check whether table's stats are initialized (assert if they are not). /*
616	static
617	void
618	dict_stats_assert_initialized(
619	/==========================/
620	const dict_table_t* table) /!< in: table /
621	{
622	ut_a(table->stat_initialized);
623
624	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
625	sizeof(table->stats_last_recalc));
626
627	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
628	sizeof(table->stat_persistent));
629
630	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
631	sizeof(table->stats_auto_recalc));
632
633	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
634	sizeof(table->stats_sample_pages));
635
636	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
637	sizeof(table->stat_n_rows));
638
639	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
640	sizeof(table->stat_clustered_index_size));
641
642	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
643	sizeof(table->stat_sum_of_other_index_sizes));
644
645	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
646	sizeof(table->stat_modified_counter));
647
648	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
649	sizeof(table->stats_bg_flag));
650
651	for (dict_index_t* index = dict_table_get_first_index(table);
652	index != NULL;
653	index = dict_table_get_next_index(index)) {
654
655	if (!dict_stats_should_ignore_index(index)) {
656	dict_stats_assert_initialized_index(index);
657	}
658	}
659	}
660
661	#define INDEX_EQ(i1, i2) \
662	((i1) != NULL \
663	&& (i2) != NULL \
664	&& (i1)->id == (i2)->id \
665	&& strcmp((i1)->name, (i2)->name) == 0)
666
667	/*******************************************************************//**
668	Copy table and index statistics from one table to another, including index
669	stats. Extra indexes in src are ignored and extra indexes in dst are
670	initialized to correspond to an empty index. /*
671	static
672	void
673	dict_stats_copy(
674	/============/
675	dict_table_t* dst, /!< in/out: destination table /
676	const dict_table_t* src, /!< in: source table /
677	bool reset_ignored_indexes) /!< in: if true, set ignored indexes*
678	to have the same statistics as if
679	the table was empty /*
680	{
681	dst->stats_last_recalc = src->stats_last_recalc;
682	dst->stat_n_rows = src->stat_n_rows;
683	dst->stat_clustered_index_size = src->stat_clustered_index_size;
684	dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
685	dst->stat_modified_counter = src->stat_modified_counter;
686
687	dict_index_t* dst_idx;
688	dict_index_t* src_idx;
689
690	for (dst_idx = dict_table_get_first_index(dst),
691	src_idx = dict_table_get_first_index(src);
692	dst_idx != NULL;
693	dst_idx = dict_table_get_next_index(dst_idx),
694	(src_idx != NULL
695	&& (src_idx = dict_table_get_next_index(src_idx)))) {
696
697	if (dict_stats_should_ignore_index(dst_idx)) {
698	if (reset_ignored_indexes) {
699	/ Reset index statistics for all ignored indexes,*
700	unless they are FT indexes (these have no statistics)/*
701	if (dst_idx->type & DICT_FTS) {
702	continue;
703	}
704	dict_stats_empty_index(dst_idx, true);
705	} else {
706	continue;
707	}
708	}
709
710	ut_ad(!dict_index_is_ibuf(dst_idx));
711
712	if (!INDEX_EQ(src_idx, dst_idx)) {
713	for (src_idx = dict_table_get_first_index(src);
714	src_idx != NULL;
715	src_idx = dict_table_get_next_index(src_idx)) {
716
717	if (INDEX_EQ(src_idx, dst_idx)) {
718	break;
719	}
720	}
721	}
722
723	if (!INDEX_EQ(src_idx, dst_idx)) {
724	dict_stats_empty_index(dst_idx, true);
725	continue;
726	}
727
728	ulint n_copy_el;
729
730	if (dst_idx->n_uniq > src_idx->n_uniq) {
731	n_copy_el = src_idx->n_uniq;
732	/ Since src is smaller some elements in dst*
733	will remain untouched by the following memmove(),
734	thus we init all of them here. /*
735	dict_stats_empty_index(dst_idx, true);
736	} else {
737	n_copy_el = dst_idx->n_uniq;
738	}
739
740	memmove(dst_idx->stat_n_diff_key_vals,
741	src_idx->stat_n_diff_key_vals,
742	n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[`0`]));
743
744	memmove(dst_idx->stat_n_sample_sizes,
745	src_idx->stat_n_sample_sizes,
746	n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[`0`]));
747
748	memmove(dst_idx->stat_n_non_null_key_vals,
749	src_idx->stat_n_non_null_key_vals,
750	n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[`0`]));
751
752	dst_idx->stat_index_size = src_idx->stat_index_size;
753
754	dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
755
756	dst_idx->stat_defrag_modified_counter =
757	src_idx->stat_defrag_modified_counter;
758	dst_idx->stat_defrag_n_pages_freed =
759	src_idx->stat_defrag_n_pages_freed;
760	dst_idx->stat_defrag_n_page_split =
761	src_idx->stat_defrag_n_page_split;
762	}
763
764	dst->stat_initialized = TRUE;
765	}
766
767	/* Duplicate the stats of a table and its indexes.*
768	This function creates a dummy dict_table_t object and copies the input
769	table's stats into it. The returned table object is not in the dictionary
770	cache and cannot be accessed by any other threads. In addition to the
771	members copied in dict_stats_table_clone_create() this function initializes
772	the following:
773	dict_table_t::stat_initialized
774	dict_table_t::stat_persistent
775	dict_table_t::stat_n_rows
776	dict_table_t::stat_clustered_index_size
777	dict_table_t::stat_sum_of_other_index_sizes
778	dict_table_t::stat_modified_counter
779	dict_index_t::stat_n_diff_key_vals[]
780	dict_index_t::stat_n_sample_sizes[]
781	dict_index_t::stat_n_non_null_key_vals[]
782	dict_index_t::stat_index_size
783	dict_index_t::stat_n_leaf_pages
784	dict_index_t::stat_defrag_modified_counter
785	dict_index_t::stat_defrag_n_pages_freed
786	dict_index_t::stat_defrag_n_page_split
787	The returned object should be freed with dict_stats_snapshot_free()
788	when no longer needed.
789	@param[in] table table whose stats to copy
790	@return incomplete table object /*
791	static
792	dict_table_t*
793	dict_stats_snapshot_create(
794	dict_table_t* table)
795	{
796	mutex_enter(&dict_sys->mutex);
797
798	dict_table_stats_lock(table, RW_S_LATCH);
799
800	dict_stats_assert_initialized(table);
801
802	dict_table_t* t;
803
804	t = dict_stats_table_clone_create(table);
805
806	dict_stats_copy(t, table, false);
807
808	t->stat_persistent = table->stat_persistent;
809	t->stats_auto_recalc = table->stats_auto_recalc;
810	t->stats_sample_pages = table->stats_sample_pages;
811	t->stats_bg_flag = table->stats_bg_flag;
812
813	dict_table_stats_unlock(table, RW_S_LATCH);
814
815	mutex_exit(&dict_sys->mutex);
816
817	return(t);
818	}
819
820	/*******************************************************************//**
821	Free the resources occupied by an object returned by
822	dict_stats_snapshot_create(). /*
823	static
824	void
825	dict_stats_snapshot_free(
826	/=====================/
827	dict_table_t* t) /!< in: dummy table object to free /
828	{
829	dict_stats_table_clone_free(t);
830	}
831
832	/*******************************************************************//**
833	Calculates new estimates for index statistics. This function is
834	relatively quick and is used to calculate transient statistics that
835	are not saved on disk. This was the only way to calculate statistics
836	before the Persistent Statistics feature was introduced.
837	This function doesn't update the defragmentation related stats.
838	Only persistent statistics supports defragmentation stats. /*
839	static
840	void
841	dict_stats_update_transient_for_index(
842	/==================================/
843	dict_index_t* index) /!< in/out: index /
844	{
845	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
846	&& (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
847	\|\| !dict_index_is_clust(index))) {
848	/ If we have set a high innodb_force_recovery*
849	level, do not calculate statistics, as a badly
850	corrupted index can cause a crash in it.
851	Initialize some bogus index cardinality
852	statistics, so that the data can be queried in
853	various means, also via secondary indexes. /*
854	dict_stats_empty_index(index, false);
855	#if defined UNIV_DEBUG \|\| defined UNIV_IBUF_DEBUG
856	} else if (ibuf_debug && !dict_index_is_clust(index)) {
857	dict_stats_empty_index(index, false);
858	#endif /* UNIV_DEBUG \|\| UNIV_IBUF_DEBUG */
859	} else {
860	mtr_t mtr;
861	ulint size;
862
863	mtr_start(&mtr);
864
865	mtr_s_lock(dict_index_get_lock(index), &mtr);
866
867	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
868
869	if (size != ULINT_UNDEFINED) {
870	index->stat_index_size = size;
871
872	size = btr_get_size(
873	index, BTR_N_LEAF_PAGES, &mtr);
874	}
875
876	mtr_commit(&mtr);
877
878	switch (size) {
879	case ULINT_UNDEFINED:
880	dict_stats_empty_index(index, false);
881	return;
882	case `0`:
883	/ The root node of the tree is a leaf /
884	size = `1`;
885	}
886
887	index->stat_n_leaf_pages = size;
888
889	/ Do not continue if table decryption has failed or*
890	table is already marked as corrupted. /*
891	if (index->is_readable()) {
892	/ We don't handle the return value since it*
893	will be false only when some thread is
894	dropping the table and we don't have to empty
895	the statistics of the to be dropped index /*
896	btr_estimate_number_of_different_key_vals(index);
897	}
898	}
899	}
900
901	/*******************************************************************//**
902	Calculates new estimates for table and index statistics. This function
903	is relatively quick and is used to calculate transient statistics that
904	are not saved on disk.
905	This was the only way to calculate statistics before the
906	Persistent Statistics feature was introduced. /*
907	static
908	void
909	dict_stats_update_transient(
910	/========================/
911	dict_table_t* table) /!< in/out: table /
912	{
913	dict_index_t* index;
914	ulint sum_of_index_sizes = `0`;
915
916	/ Find out the sizes of the indexes and how many different values*
917	for the key they approximately have /*
918
919	index = dict_table_get_first_index(table);
920
921	if (!table->space) {
922	/ Nothing to do. /
923	dict_stats_empty_table(table, true);
924	return;
925	} else if (index == NULL) {
926	/ Table definition is corrupt /
927
928	ib::warn () << "Table " << table->name
929	<< " has no indexes. Cannot calculate statistics.";
930	dict_stats_empty_table(table, true);
931	return;
932	}
933
934	for (; index != NULL; index = dict_table_get_next_index(index)) {
935
936	ut_ad(!dict_index_is_ibuf(index));
937
938	if (index->type & DICT_FTS \|\| dict_index_is_spatial(index)) {
939	continue;
940	}
941
942	dict_stats_empty_index(index, false);
943
944	if (dict_stats_should_ignore_index(index)) {
945	continue;
946	}
947
948	/ Do not continue if table decryption has failed or*
949	table is already marked as corrupted. /*
950	if (!index->is_readable()) {
951	break;
952	}
953
954	dict_stats_update_transient_for_index(index);
955
956	sum_of_index_sizes += index->stat_index_size;
957	}
958
959	index = dict_table_get_first_index(table);
960
961	table->stat_n_rows = index->stat_n_diff_key_vals[
962	dict_index_get_n_unique(index) - `1`];
963
964	table->stat_clustered_index_size = index->stat_index_size;
965
966	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
967	- index->stat_index_size;
968
969	table->stats_last_recalc = ut_time();
970
971	table->stat_modified_counter = `0`;
972
973	table->stat_initialized = TRUE;
974	}
975
976	/ @{ Pseudo code about the relation between the following functions*
977
978	let N = N_SAMPLE_PAGES(index)
979
980	dict_stats_analyze_index()
981	for each n_prefix
982	search for good enough level:
983	dict_stats_analyze_index_level() // only called if level has <= N pages
984	// full scan of the level in one mtr
985	collect statistics about the given level
986	if we are not satisfied with the level, search next lower level
987	we have found a good enough level here
988	dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
989	// full scan of the level in one mtr
990	dive below some records and analyze the leaf page there:
991	dict_stats_analyze_index_below_cur()
992	@} /*
993
994	/*******************************************************************//**
995	Find the total number and the number of distinct keys on a given level in
996	an index. Each of the 1..n_uniq prefixes are looked up and the results are
997	saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
998	records on the level is saved in total_recs.
999	Also, the index of the last record in each group of equal records is saved
1000	in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
1001	record on the level and continues cross pages boundaries, counting from 0. /*
1002	static
1003	void
1004	dict_stats_analyze_index_level(
1005	/===========================/
1006	dict_index_t* index, /!< in: index /
1007	ulint level, /!< in: level /
1008	ib_uint64_t* n_diff, /!< out: array for number of*
1009	distinct keys for all prefixes /*
1010	ib_uint64_t* total_recs, /!< out: total number of records /
1011	ib_uint64_t* total_pages, /!< out: total number of pages /
1012	boundaries_t* n_diff_boundaries,/!< out: boundaries of the groups*
1013	of distinct keys /*
1014	mtr_t* mtr) /!< in/out: mini-transaction /
1015	{
1016	ulint n_uniq;
1017	mem_heap_t* heap;
1018	btr_pcur_t pcur;
1019	const page_t* page;
1020	const rec_t* rec;
1021	const rec_t* prev_rec;
1022	bool prev_rec_is_copied;
1023	byte* prev_rec_buf = NULL;
1024	ulint prev_rec_buf_size = `0`;
1025	ulint* rec_offsets;
1026	ulint* prev_rec_offsets;
1027	ulint i;
1028
1029	DEBUG_PRINTF(" %s(table=%s, index=%s, level=" ULINTPF ")\n",
1030	__func__, index->table->name, index->name, level);
1031
1032	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1033	MTR_MEMO_SX_LOCK));
1034
1035	n_uniq = dict_index_get_n_unique(index);
1036
1037	/ elements in the n_diff array are 0..n_uniq-1 (inclusive) /
1038	memset(n_diff, `0x0`, n_uniq * sizeof(n_diff[`0`]));
1039
1040	/ Allocate space for the offsets header (the allocation size at*
1041	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
1042	so that this will never be less than the size calculated in
1043	rec_get_offsets_func(). /*
1044	i = (REC_OFFS_HEADER_SIZE + `1` + `1`) + n_uniq;
1045
1046	heap = mem_heap_create((`2` * sizeof rec_offsets) i);
1047	rec_offsets = static_cast<ulint*>(
1048	mem_heap_alloc(heap, i * sizeof *rec_offsets));
1049	prev_rec_offsets = static_cast<ulint*>(
1050	mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1051	rec_offs_set_n_alloc(rec_offsets, i);
1052	rec_offs_set_n_alloc(prev_rec_offsets, i);
1053
1054	/ reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] /
1055	if (n_diff_boundaries != NULL) {
1056	for (i = `0`; i < n_uniq; i++) {
1057	n_diff_boundaries[i].erase(
1058	n_diff_boundaries[i].begin(),
1059	n_diff_boundaries[i].end());
1060	}
1061	}
1062
1063	/ Position pcur on the leftmost record on the leftmost page*
1064	on the desired level. /*
1065
1066	btr_pcur_open_at_index_side(
1067	true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1068	&pcur, true, level, mtr);
1069	btr_pcur_move_to_next_on_page(&pcur);
1070
1071	page = btr_pcur_get_page(&pcur);
1072
1073	/ The page must not be empty, except when*
1074	it is the root page (and the whole index is empty). /*
1075	ut_ad(btr_pcur_is_on_user_rec(&pcur) \|\| page_is_leaf(page));
1076	ut_ad(btr_pcur_get_rec(&pcur)
1077	== page_rec_get_next_const(page_get_infimum_rec(page)));
1078
1079	/ check that we are indeed on the desired level /
1080	ut_a(btr_page_get_level(page) == level);
1081
1082	/ there should not be any pages on the left /
1083	ut_a(!page_has_prev(page));
1084
1085	if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1086	btr_pcur_get_rec(&pcur), page_is_comp(page))) {
1087	ut_ad(btr_pcur_is_on_user_rec(&pcur));
1088	if (level == `0`) {
1089	/ Skip the 'default row' pseudo-record /
1090	ut_ad(index->is_instant());
1091	btr_pcur_move_to_next_user_rec(&pcur, mtr);
1092	}
1093	} else {
1094	/ The first record on the leftmost page must be*
1095	marked as such on each level except the leaf level. /*
1096	ut_a(level == `0`);
1097	}
1098
1099	prev_rec = NULL;
1100	prev_rec_is_copied = false;
1101
1102	/ no records by default /
1103	*total_recs = `0`;
1104
1105	*total_pages = `0`;
1106
1107	/ iterate over all user records on this level*
1108	and compare each two adjacent ones, even the last on page
1109	X and the fist on page X+1 /*
1110	for (;
1111	btr_pcur_is_on_user_rec(&pcur);
1112	btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1113
1114	bool rec_is_last_on_page;
1115
1116	rec = btr_pcur_get_rec(&pcur);
1117
1118	/ If rec and prev_rec are on different pages, then prev_rec*
1119	must have been copied, because we hold latch only on the page
1120	where rec resides. /*
1121	if (prev_rec != NULL
1122	&& page_align(rec) != page_align(prev_rec)) {
1123
1124	ut_a(prev_rec_is_copied);
1125	}
1126
1127	rec_is_last_on_page =
1128	page_rec_is_supremum(page_rec_get_next_const(rec));
1129
1130	/ increment the pages counter at the end of each page /
1131	if (rec_is_last_on_page) {
1132
1133	(*total_pages)++;
1134	}
1135
1136	/ Skip delete-marked records on the leaf level. If we*
1137	do not skip them, then ANALYZE quickly after DELETE
1138	could count them or not (purge may have already wiped
1139	them away) which brings non-determinism. We skip only
1140	leaf-level delete marks because delete marks on
1141	non-leaf level do not make sense. /*
1142
1143	if (level == `0`
1144	&& !srv_stats_include_delete_marked
1145	&& rec_get_deleted_flag(
1146	rec,
1147	page_is_comp(btr_pcur_get_page(&pcur)))) {
1148
1149	if (rec_is_last_on_page
1150	&& !prev_rec_is_copied
1151	&& prev_rec != NULL) {
1152	/ copy prev_rec /
1153
1154	prev_rec_offsets = rec_get_offsets(
1155	prev_rec, index, prev_rec_offsets,
1156	true,
1157	n_uniq, &heap);
1158
1159	prev_rec = rec_copy_prefix_to_buf(
1160	prev_rec, index, n_uniq,
1161	&prev_rec_buf, &prev_rec_buf_size);
1162
1163	prev_rec_is_copied = true;
1164	}
1165
1166	continue;
1167	}
1168	rec_offsets = rec_get_offsets(
1169	rec, index, rec_offsets, !level, n_uniq, &heap);
1170
1171	(*total_recs)++;
1172
1173	if (prev_rec != NULL) {
1174	ulint matched_fields;
1175
1176	prev_rec_offsets = rec_get_offsets(
1177	prev_rec, index, prev_rec_offsets, !level,
1178	n_uniq, &heap);
1179
1180	cmp_rec_rec_with_match(rec,
1181	prev_rec,
1182	rec_offsets,
1183	prev_rec_offsets,
1184	index,
1185	FALSE,
1186	&matched_fields);
1187
1188	for (i = matched_fields; i < n_uniq; i++) {
1189
1190	if (n_diff_boundaries != NULL) {
1191	/ push the index of the previous*
1192	record, that is - the last one from
1193	a group of equal keys /*
1194
1195	ib_uint64_t idx;
1196
1197	/ the index of the current record*
1198	is total_recs - 1, the index of the
1199	previous record is total_recs - 2;
1200	we know that idx is not going to
1201	become negative here because if we
1202	are in this branch then there is a
1203	previous record and thus
1204	total_recs >= 2 /*
1205	idx = *total_recs - `2`;
1206
1207	n_diff_boundaries[i].push_back(idx);
1208	}
1209
1210	/ increment the number of different keys*
1211	for n_prefix=i+1 (e.g. if i=0 then we increment
1212	for n_prefix=1 which is stored in n_diff[0]) /*
1213	n_diff[i]++;
1214	}
1215	} else {
1216	/ this is the first non-delete marked record /
1217	for (i = `0`; i < n_uniq; i++) {
1218	n_diff[i] = `1`;
1219	}
1220	}
1221
1222	if (rec_is_last_on_page) {
1223	/ end of a page has been reached /
1224
1225	/ we need to copy the record instead of assigning*
1226	like prev_rec = rec; because when we traverse the
1227	records on this level at some point we will jump from
1228	one page to the next and then rec and prev_rec will
1229	be on different pages and
1230	btr_pcur_move_to_next_user_rec() will release the
1231	latch on the page that prev_rec is on /*
1232	prev_rec = rec_copy_prefix_to_buf(
1233	rec, index, n_uniq,
1234	&prev_rec_buf, &prev_rec_buf_size);
1235	prev_rec_is_copied = true;
1236
1237	} else {
1238	/ still on the same page, the next call to*
1239	btr_pcur_move_to_next_user_rec() will not jump
1240	on the next page, we can simply assign pointers
1241	instead of copying the records like above /*
1242
1243	prev_rec = rec;
1244	prev_rec_is_copied = false;
1245	}
1246	}
1247
1248	/ if total_pages is left untouched then the above loop was not
1249	entered at all and there is one page in the whole tree which is
1250	empty or the loop was entered but this is level 0, contains one page
1251	and all records are delete-marked /*
1252	if (*total_pages == `0`) {
1253
1254	ut_ad(level == `0`);
1255	ut_ad(*total_recs == `0`);
1256
1257	*total_pages = `1`;
1258	}
1259
1260	/ if there are records on this level and boundaries*
1261	should be saved /*
1262	if (*total_recs > `0` && n_diff_boundaries != NULL) {
1263
1264	/ remember the index of the last record on the level as the*
1265	last one from the last group of equal keys; this holds for
1266	all possible prefixes /*
1267	for (i = `0`; i < n_uniq; i++) {
1268	ib_uint64_t idx;
1269
1270	idx = *total_recs - `1`;
1271
1272	n_diff_boundaries[i].push_back(idx);
1273	}
1274	}
1275
1276	/ now in n_diff_boundaries[i] there are exactly n_diff[i] integers,*
1277	for i=0..n_uniq-1 /*
1278
1279	#ifdef UNIV_STATS_DEBUG
1280	for (i = `0`; i < n_uniq; i++) {
1281
1282	DEBUG_PRINTF(" %s(): total recs: " UINT64PF
1283	", total pages: " UINT64PF
1284	", n_diff[" ULINTPF "]: " UINT64PF "\n",
1285	__func__, *total_recs,
1286	*total_pages,
1287	i, n_diff[i]);
1288
1289	#if 0
1290	if (n_diff_boundaries != NULL) {
1291	ib_uint64_t j;
1292
1293	DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
1294	__func__, i);
1295
1296	for (j = `0`; j < n_diff[i]; j++) {
1297	ib_uint64_t idx;
1298
1299	idx = n_diff_boundaries[i][j];
1300
1301	DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1302	j, idx);
1303	}
1304	DEBUG_PRINTF("\n");
1305	}
1306	#endif
1307	}
1308	#endif /* UNIV_STATS_DEBUG */
1309
1310	/ Release the latch on the last page, because that is not done by*
1311	btr_pcur_close(). This function works also for non-leaf pages. /*
1312	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1313
1314	btr_pcur_close(&pcur);
1315	ut_free(prev_rec_buf);
1316	mem_heap_free(heap);
1317	}
1318
1319	/* Scan a page, reading records from left to right and counting the number*
1320	of distinct records (looking only at the first n_prefix
1321	columns) and the number of external pages pointed by records from this page.
1322	If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1323	will return as soon as it finds a record that does not match its neighbor
1324	to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1325	returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1326	equal) or 2 (the function found a non-boring record and returned).
1327	@param[out] out_rec record, or NULL
1328	@param[out] offsets1 rec_get_offsets() working space (must
1329	be big enough)
1330	@param[out] offsets2 rec_get_offsets() working space (must
1331	be big enough)
1332	@param[in] index index of the page
1333	@param[in] page the page to scan
1334	@param[in] n_prefix look at the first n_prefix columns
1335	@param[in] is_leaf whether this is the leaf page
1336	@param[out] n_diff number of distinct records encountered
1337	@param[out] n_external_pages if this is non-NULL then it will be set
1338	to the number of externally stored pages which were encountered
1339	@return offsets1 or offsets2 (the offsets of out_rec),*
1340	or NULL if the page is empty and does not contain user records. /*
1341	UNIV_INLINE
1342	ulint*
1343	dict_stats_scan_page(
1344	const rec_t** out_rec,
1345	ulint* offsets1,
1346	ulint* offsets2,
1347	const dict_index_t* index,
1348	const page_t* page,
1349	ulint n_prefix,
1350	bool is_leaf,
1351	ib_uint64_t* n_diff,
1352	ib_uint64_t* n_external_pages)
1353	{
1354	ulint* offsets_rec = offsets1;
1355	ulint* offsets_next_rec = offsets2;
1356	const rec_t* rec;
1357	const rec_t* next_rec;
1358	/ A dummy heap, to be passed to rec_get_offsets().*
1359	Because offsets1,offsets2 should be big enough,
1360	this memory heap should never be used. /*
1361	mem_heap_t* heap = NULL;
1362	ut_ad(is_leaf == page_is_leaf(page));
1363	const rec_t* (get_next)(const* rec_t*)
1364	= !is_leaf \|\| srv_stats_include_delete_marked
1365	? page_rec_get_next_const
1366	: page_rec_get_next_non_del_marked;
1367
1368	const bool should_count_external_pages = n_external_pages != NULL;
1369
1370	if (should_count_external_pages) {
1371	*n_external_pages = `0`;
1372	}
1373
1374	rec = get_next(page_get_infimum_rec(page));
1375
1376	if (page_rec_is_supremum(rec)) {
1377	/ the page is empty or contains only delete-marked records /
1378	*n_diff = `0`;
1379	*out_rec = NULL;
1380	return(NULL);
1381	}
1382
1383	offsets_rec = rec_get_offsets(rec, index, offsets_rec, is_leaf,
1384	ULINT_UNDEFINED, &heap);
1385
1386	if (should_count_external_pages) {
1387	*n_external_pages += btr_rec_get_externally_stored_len(
1388	rec, offsets_rec);
1389	}
1390
1391	next_rec = get_next(rec);
1392
1393	*n_diff = `1`;
1394
1395	while (!page_rec_is_supremum(next_rec)) {
1396
1397	ulint matched_fields;
1398
1399	offsets_next_rec = rec_get_offsets(next_rec, index,
1400	offsets_next_rec, is_leaf,
1401	ULINT_UNDEFINED,
1402	&heap);
1403
1404	/ check whether rec != next_rec when looking at*
1405	the first n_prefix fields /*
1406	cmp_rec_rec_with_match(rec, next_rec,
1407	offsets_rec, offsets_next_rec,
1408	index, FALSE, &matched_fields);
1409
1410	if (matched_fields < n_prefix) {
1411	/ rec != next_rec, => rec is non-boring /
1412
1413	(*n_diff)++;
1414
1415	if (!is_leaf) {
1416	break;
1417	}
1418	}
1419
1420	rec = next_rec;
1421	{
1422	/ Assign offsets_rec = offsets_next_rec*
1423	so that offsets_rec matches with rec which
1424	was just assigned rec = next_rec above.
1425	Also need to point offsets_next_rec to the
1426	place where offsets_rec was pointing before
1427	because we have just 2 placeholders where
1428	data is actually stored:
1429	offsets1 and offsets2 and we
1430	are using them in circular fashion
1431	(offsets[_next]_rec are just pointers to
1432	those placeholders). /*
1433	ulint* offsets_tmp;
1434	offsets_tmp = offsets_rec;
1435	offsets_rec = offsets_next_rec;
1436	offsets_next_rec = offsets_tmp;
1437	}
1438
1439	if (should_count_external_pages) {
1440	*n_external_pages += btr_rec_get_externally_stored_len(
1441	rec, offsets_rec);
1442	}
1443
1444	next_rec = get_next(next_rec);
1445	}
1446
1447	/ offsets1,offsets2 should have been big enough /
1448	ut_a(heap == NULL);
1449	*out_rec = rec;
1450	return(offsets_rec);
1451	}
1452
1453	/* Dive below the current position of a cursor and calculate the number of*
1454	distinct records on the leaf page, when looking at the fist n_prefix
1455	columns. Also calculate the number of external pages pointed by records
1456	on the leaf page.
1457	@param[in] cur cursor
1458	@param[in] n_prefix look at the first n_prefix columns
1459	when comparing records
1460	@param[out] n_diff number of distinct records
1461	@param[out] n_external_pages number of external pages
1462	@return number of distinct records on the leaf page /*
1463	static
1464	void
1465	dict_stats_analyze_index_below_cur(
1466	const btr_cur_t* cur,
1467	ulint n_prefix,
1468	ib_uint64_t* n_diff,
1469	ib_uint64_t* n_external_pages)
1470	{
1471	dict_index_t* index;
1472	buf_block_t* block;
1473	const page_t* page;
1474	mem_heap_t* heap;
1475	const rec_t* rec;
1476	ulint* offsets1;
1477	ulint* offsets2;
1478	ulint* offsets_rec;
1479	ulint size;
1480	mtr_t mtr;
1481
1482	index = btr_cur_get_index(cur);
1483
1484	/ Allocate offsets for the record and the node pointer, for*
1485	node pointer records. In a secondary index, the node pointer
1486	record will consist of all index fields followed by a child
1487	page number.
1488	Allocate space for the offsets header (the allocation size at
1489	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1490	so that this will never be less than the size calculated in
1491	rec_get_offsets_func(). /*
1492	size = (`1` + REC_OFFS_HEADER_SIZE) + `1` + dict_index_get_n_fields(index);
1493
1494	heap = mem_heap_create(size * (sizeof offsets1 + sizeof* *offsets2));
1495
1496	offsets1 = static_cast<ulint*>(mem_heap_alloc(
1497	heap, size * sizeof *offsets1));
1498
1499	offsets2 = static_cast<ulint*>(mem_heap_alloc(
1500	heap, size * sizeof *offsets2));
1501
1502	rec_offs_set_n_alloc(offsets1, size);
1503	rec_offs_set_n_alloc(offsets2, size);
1504
1505	rec = btr_cur_get_rec(cur);
1506	ut_ad(!page_rec_is_leaf(rec));
1507
1508	offsets_rec = rec_get_offsets(rec, index, offsets1, false,
1509	ULINT_UNDEFINED, &heap);
1510
1511	page_id_t page_id(index->table->space->id,
1512	btr_node_ptr_get_child_page_no(
1513	rec, offsets_rec));
1514	const page_size_t page_size(index->table->space->flags);
1515
1516	/ assume no external pages by default - in case we quit from this*
1517	function without analyzing any leaf pages /*
1518	*n_external_pages = `0`;
1519
1520	mtr_start(&mtr);
1521
1522	/ descend to the leaf level on the B-tree /
1523	for (;;) {
1524
1525	dberr_t err = DB_SUCCESS;
1526
1527	block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
1528	NULL / no guessed block /,
1529	BUF_GET, __FILE__, __LINE__, &mtr, &err);
1530
1531	page = buf_block_get_frame(block);
1532
1533	if (page_is_leaf(page)) {
1534	/ leaf level /
1535	break;
1536	}
1537	/ else /
1538
1539	/ search for the first non-boring record on the page /
1540	offsets_rec = dict_stats_scan_page(
1541	&rec, offsets1, offsets2, index, page, n_prefix,
1542	false, n_diff, NULL);
1543
1544	/ pages on level > 0 are not allowed to be empty /
1545	ut_a(offsets_rec != NULL);
1546	/ if page is not empty (offsets_rec != NULL) then n_diff must*
1547	be > 0, otherwise there is a bug in dict_stats_scan_page() /*
1548	ut_a(*n_diff > `0`);
1549
1550	if (*n_diff == `1`) {
1551	mtr_commit(&mtr);
1552
1553	/ page has all keys equal and the end of the page*
1554	was reached by dict_stats_scan_page(), no need to
1555	descend to the leaf level /*
1556	mem_heap_free(heap);
1557	/ can't get an estimate for n_external_pages here*
1558	because we do not dive to the leaf level, assume no
1559	external pages (n_external_pages was assigned to 0*
1560	above). /*
1561	return;
1562	}
1563	/ else /
1564
1565	/ when we instruct dict_stats_scan_page() to quit on the*
1566	first non-boring record it finds, then the returned n_diff
1567	can either be 0 (empty page), 1 (page has all keys equal) or
1568	2 (non-boring record was found) /*
1569	ut_a(*n_diff == `2`);
1570
1571	/ we have a non-boring record in rec, descend below it /
1572
1573	page_id.set_page_no(
1574	btr_node_ptr_get_child_page_no(rec, offsets_rec));
1575	}
1576
1577	/ make sure we got a leaf page as a result from the above loop /
1578	ut_ad(page_is_leaf(page));
1579
1580	/ scan the leaf page and find the number of distinct keys,*
1581	when looking only at the first n_prefix columns; also estimate
1582	the number of externally stored pages pointed by records on this
1583	page /*
1584
1585	offsets_rec = dict_stats_scan_page(
1586	&rec, offsets1, offsets2, index, page, n_prefix,
1587	true, n_diff,
1588	n_external_pages);
1589
1590	#if 0
1591	DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1592	__func__, page_no, n_diff);
1593	#endif
1594
1595	mtr_commit(&mtr);
1596	mem_heap_free(heap);
1597	}
1598
1599	/* Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]*
1600	for each n-columns prefix (n from 1 to n_uniq). /*
1601	struct n_diff_data_t {
1602	/* Index of the level on which the descent through the btree*
1603	stopped. level 0 is the leaf level. This is >= 1 because we
1604	avoid scanning the leaf level because it may contain too many
1605	pages and doing so is useless when combined with the random dives -
1606	if we are to scan the leaf level, this means a full scan and we can
1607	simply do that instead of fiddling with picking random records higher
1608	in the tree and to dive below them. At the start of the analyzing
1609	we may decide to do full scan of the leaf level, but then this
1610	structure is not used in that code path. /*
1611	ulint level;
1612
1613	/* Number of records on the level where the descend through the btree*
1614	stopped. When we scan the btree from the root, we stop at some mid
1615	level, choose some records from it and dive below them towards a leaf
1616	page to analyze. /*
1617	ib_uint64_t n_recs_on_level;
1618
1619	/* Number of different key values that were found on the mid level. /
1620	ib_uint64_t n_diff_on_level;
1621
1622	/* Number of leaf pages that are analyzed. This is also the same as*
1623	the number of records that we pick from the mid level and dive below
1624	them. /*
1625	ib_uint64_t n_leaf_pages_to_analyze;
1626
1627	/* Cumulative sum of the number of different key values that were*
1628	found on all analyzed pages. /*
1629	ib_uint64_t n_diff_all_analyzed_pages;
1630
1631	/* Cumulative sum of the number of external pages (stored outside of*
1632	the btree but in the same file segment). /*
1633	ib_uint64_t n_external_pages_sum;
1634	};
1635
1636	/* Estimate the number of different key values in an index when looking at*
1637	the first n_prefix columns. For a given level in an index select
1638	n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1639	them to the corresponding leaf pages, then scan those leaf pages and save the
1640	sampling results in n_diff_data->n_diff_all_analyzed_pages.
1641	@param[in] index index
1642	@param[in] n_prefix look at first 'n_prefix' columns when
1643	comparing records
1644	@param[in] boundaries a vector that contains
1645	n_diff_data->n_diff_on_level integers each of which represents the index (on
1646	level 'level', counting from left/smallest to right/biggest from 0) of the
1647	last record from each group of distinct keys
1648	@param[in,out] n_diff_data n_diff_all_analyzed_pages and
1649	n_external_pages_sum in this structure will be set by this function. The
1650	members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1651	caller in advance - they are used by some calculations inside this function
1652	@param[in,out] mtr mini-transaction /*
1653	static
1654	void
1655	dict_stats_analyze_index_for_n_prefix(
1656	dict_index_t* index,
1657	ulint n_prefix,
1658	const boundaries_t* boundaries,
1659	n_diff_data_t* n_diff_data,
1660	mtr_t* mtr)
1661	{
1662	btr_pcur_t pcur;
1663	const page_t* page;
1664	ib_uint64_t rec_idx;
1665	ib_uint64_t i;
1666
1667	#if 0
1668	DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1669	" n_diff_on_level=" UINT64PF ")\n",
1670	__func__, index->table->name, index->name, level,
1671	n_prefix, n_diff_data->n_diff_on_level);
1672	#endif
1673
1674	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1675	MTR_MEMO_SX_LOCK));
1676
1677	/ Position pcur on the leftmost record on the leftmost page*
1678	on the desired level. /*
1679
1680	btr_pcur_open_at_index_side(
1681	true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1682	&pcur, true, n_diff_data->level, mtr);
1683	btr_pcur_move_to_next_on_page(&pcur);
1684
1685	page = btr_pcur_get_page(&pcur);
1686
1687	const rec_t* first_rec = btr_pcur_get_rec(&pcur);
1688
1689	/ We shouldn't be scanning the leaf level. The caller of this function*
1690	should have stopped the descend on level 1 or higher. /*
1691	ut_ad(n_diff_data->level > `0`);
1692	ut_ad(!page_is_leaf(page));
1693
1694	/ The page must not be empty, except when*
1695	it is the root page (and the whole index is empty). /*
1696	ut_ad(btr_pcur_is_on_user_rec(&pcur));
1697	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1698
1699	/ check that we are indeed on the desired level /
1700	ut_a(btr_page_get_level(page) == n_diff_data->level);
1701
1702	/ there should not be any pages on the left /
1703	ut_a(!page_has_prev(page));
1704
1705	/ check whether the first record on the leftmost page is marked*
1706	as such; we are on a non-leaf level /*
1707	ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1708	& REC_INFO_MIN_REC_FLAG);
1709
1710	const ib_uint64_t last_idx_on_level = boundaries->at(
1711	static_cast<unsigned>(n_diff_data->n_diff_on_level - `1`));
1712
1713	rec_idx = `0`;
1714
1715	n_diff_data->n_diff_all_analyzed_pages = `0`;
1716	n_diff_data->n_external_pages_sum = `0`;
1717
1718	for (i = `0`; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1719	/ there are n_diff_on_level elements*
1720	in 'boundaries' and we divide those elements
1721	into n_leaf_pages_to_analyze segments, for example:
1722
1723	let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1724	segment i=0: [0, 24]
1725	segment i=1: [25, 49]
1726	segment i=2: [50, 74]
1727	segment i=3: [75, 99] or
1728
1729	let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1730	segment i=0: [0, 0] or
1731
1732	let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1733	segment i=0: [0, 0]
1734	segment i=1: [1, 1] or
1735
1736	let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1737	segment i=0: [0, 0]
1738	segment i=1: [1, 2]
1739	segment i=2: [3, 4]
1740	segment i=3: [5, 6]
1741	segment i=4: [7, 8]
1742	segment i=5: [9, 10]
1743	segment i=6: [11, 12]
1744
1745	then we select a random record from each segment and dive
1746	below it /*
1747	const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
1748	const ib_uint64_t n_pick
1749	= n_diff_data->n_leaf_pages_to_analyze;
1750
1751	const ib_uint64_t left = n_diff * i / n_pick;
1752	const ib_uint64_t right = n_diff * (i + `1`) / n_pick - `1`;
1753
1754	ut_a(left <= right);
1755	ut_a(right <= last_idx_on_level);
1756
1757	const ulint rnd = right == left ? `0` :
1758	ut_rnd_gen_ulint() % (right - left);
1759
1760	const ib_uint64_t dive_below_idx
1761	= boundaries->at(static_cast<unsigned>(left + rnd));
1762
1763	#if 0
1764	DEBUG_PRINTF(" %s(): dive below record with index="
1765	UINT64PF "\n", __func__, dive_below_idx);
1766	#endif
1767
1768	/ seek to the record with index dive_below_idx /
1769	while (rec_idx < dive_below_idx
1770	&& btr_pcur_is_on_user_rec(&pcur)) {
1771
1772	btr_pcur_move_to_next_user_rec(&pcur, mtr);
1773	rec_idx++;
1774	}
1775
1776	/ if the level has finished before the record we are*
1777	searching for, this means that the B-tree has changed in
1778	the meantime, quit our sampling and use whatever stats
1779	we have collected so far /*
1780	if (rec_idx < dive_below_idx) {
1781
1782	ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1783	break;
1784	}
1785
1786	/ it could be that the tree has changed in such a way that*
1787	the record under dive_below_idx is the supremum record, in
1788	this case rec_idx == dive_below_idx and pcur is positioned
1789	on the supremum, we do not want to dive below it /*
1790	if (!btr_pcur_is_on_user_rec(&pcur)) {
1791	break;
1792	}
1793
1794	ut_a(rec_idx == dive_below_idx);
1795
1796	ib_uint64_t n_diff_on_leaf_page;
1797	ib_uint64_t n_external_pages;
1798
1799	dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1800	n_prefix,
1801	&n_diff_on_leaf_page,
1802	&n_external_pages);
1803
1804	/ We adjust n_diff_on_leaf_page here to avoid counting*
1805	one value twice - once as the last on some page and once
1806	as the first on another page. Consider the following example:
1807	Leaf level:
1808	page: (2,2,2,2,3,3)
1809	... many pages like (3,3,3,3,3,3) ...
1810	page: (3,3,3,3,5,5)
1811	... many pages like (5,5,5,5,5,5) ...
1812	page: (5,5,5,5,8,8)
1813	page: (8,8,8,8,9,9)
1814	our algo would (correctly) get an estimate that there are
1815	2 distinct records per page (average). Having 4 pages below
1816	non-boring records, it would (wrongly) estimate the number
1817	of distinct records to 8. /*
1818	if (n_diff_on_leaf_page > `0`) {
1819	n_diff_on_leaf_page--;
1820	}
1821
1822	n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1823
1824	n_diff_data->n_external_pages_sum += n_external_pages;
1825	}
1826
1827	btr_pcur_close(&pcur);
1828	}
1829
1830	/* Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].*
1831	@param[in] n_diff_data input data to use to derive the results
1832	@param[in,out] index index whose stat_n_diff_key_vals[] to set /*
1833	UNIV_INLINE
1834	void
1835	dict_stats_index_set_n_diff(
1836	const n_diff_data_t* n_diff_data,
1837	dict_index_t* index)
1838	{
1839	for (ulint n_prefix = dict_index_get_n_unique(index);
1840	n_prefix >= `1`;
1841	n_prefix--) {
1842	/ n_diff_all_analyzed_pages can be 0 here if*
1843	all the leaf pages sampled contained only
1844	delete-marked records. In this case we should assign
1845	0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1846	the formula below does. /*
1847
1848	const n_diff_data_t* data = &n_diff_data[n_prefix - `1`];
1849
1850	ut_ad(data->n_leaf_pages_to_analyze > `0`);
1851	ut_ad(data->n_recs_on_level > `0`);
1852
1853	ib_uint64_t n_ordinary_leaf_pages;
1854
1855	if (data->level == `1`) {
1856	/ If we know the number of records on level 1, then*
1857	this number is the same as the number of pages on
1858	level 0 (leaf). /*
1859	n_ordinary_leaf_pages = data->n_recs_on_level;
1860	} else {
1861	/ If we analyzed D ordinary leaf pages and found E*
1862	external pages in total linked from those D ordinary
1863	leaf pages, then this means that the ratio
1864	ordinary/external is D/E. Then the ratio ordinary/total
1865	is D / (D + E). Knowing that the total number of pages
1866	is T (including ordinary and external) then we estimate
1867	that the total number of ordinary leaf pages is
1868	T D / (D + E). /
1869	n_ordinary_leaf_pages
1870	= index->stat_n_leaf_pages
1871	* data->n_leaf_pages_to_analyze
1872	/ (data->n_leaf_pages_to_analyze
1873	+ data->n_external_pages_sum);
1874	}
1875
1876	/ See REF01 for an explanation of the algorithm /
1877	index->stat_n_diff_key_vals[n_prefix - `1`]
1878	= n_ordinary_leaf_pages
1879
1880	* data->n_diff_on_level
1881	/ data->n_recs_on_level
1882
1883	* data->n_diff_all_analyzed_pages
1884	/ data->n_leaf_pages_to_analyze;
1885
1886	index->stat_n_sample_sizes[n_prefix - `1`]
1887	= data->n_leaf_pages_to_analyze;
1888
1889	DEBUG_PRINTF(" %s(): n_diff=" UINT64PF
1890	" for n_prefix=" ULINTPF
1891	" (" ULINTPF
1892	" * " UINT64PF " / " UINT64PF
1893	" * " UINT64PF " / " UINT64PF ")\n",
1894	__func__,
1895	index->stat_n_diff_key_vals[n_prefix - `1`],
1896	n_prefix,
1897	index->stat_n_leaf_pages,
1898	data->n_diff_on_level,
1899	data->n_recs_on_level,
1900	data->n_diff_all_analyzed_pages,
1901	data->n_leaf_pages_to_analyze);
1902	}
1903	}
1904
1905	/*******************************************************************//**
1906	Calculates new statistics for a given index and saves them to the index
1907	members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1908	stat_n_leaf_pages. This function could be slow. /*
1909	static
1910	void
1911	dict_stats_analyze_index(
1912	/=====================/
1913	dict_index_t* index) /!< in/out: index to analyze /
1914	{
1915	ulint root_level;
1916	ulint level;
1917	bool level_is_analyzed;
1918	ulint n_uniq;
1919	ulint n_prefix;
1920	ib_uint64_t total_recs;
1921	ib_uint64_t total_pages;
1922	mtr_t mtr;
1923	ulint size;
1924	DBUG_ENTER("dict_stats_analyze_index");
1925
1926	DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1927	dict_index_get_online_status(index)));
1928
1929	/ Disable update statistic for Rtree /
1930	if (dict_index_is_spatial(index)) {
1931	DBUG_VOID_RETURN;
1932	}
1933
1934	DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
1935
1936	dict_stats_empty_index(index, false);
1937
1938	mtr_start(&mtr);
1939
1940	mtr_s_lock(dict_index_get_lock(index), &mtr);
1941
1942	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1943
1944	if (size != ULINT_UNDEFINED) {
1945	index->stat_index_size = size;
1946	size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1947	}
1948
1949	/ Release the X locks on the root page taken by btr_get_size() /
1950	mtr_commit(&mtr);
1951
1952	switch (size) {
1953	case ULINT_UNDEFINED:
1954	dict_stats_assert_initialized_index(index);
1955	DBUG_VOID_RETURN;
1956	case `0`:
1957	/ The root node of the tree is a leaf /
1958	size = `1`;
1959	}
1960
1961	index->stat_n_leaf_pages = size;
1962
1963	mtr_start(&mtr);
1964
1965	mtr_sx_lock(dict_index_get_lock(index), &mtr);
1966
1967	root_level = btr_height_get(index, &mtr);
1968
1969	n_uniq = dict_index_get_n_unique(index);
1970
1971	/ If the tree has just one level (and one page) or if the user*
1972	has requested to sample too many pages then do full scan.
1973
1974	For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1975	will be sampled, so in total N_SAMPLE_PAGES(index) n_uniq leaf*
1976	pages will be sampled. If that number is bigger than the total
1977	number of leaf pages then do full scan of the leaf level instead
1978	since it will be faster and will give better results. /*
1979
1980	if (root_level == `0`
1981	\|\| N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1982
1983	if (root_level == `0`) {
1984	DEBUG_PRINTF(" %s(): just one page,"
1985	" doing full scan\n", __func__);
1986	} else {
1987	DEBUG_PRINTF(" %s(): too many pages requested for"
1988	" sampling, doing full scan\n", __func__);
1989	}
1990
1991	/ do full scan of level 0; save results directly*
1992	into the index /*
1993
1994	dict_stats_analyze_index_level(index,
1995	`0` / leaf level /,
1996	index->stat_n_diff_key_vals,
1997	&total_recs,
1998	&total_pages,
1999	NULL / boundaries not needed /,
2000	&mtr);
2001
2002	for (ulint i = `0`; i < n_uniq; i++) {
2003	index->stat_n_sample_sizes[i] = total_pages;
2004	}
2005
2006	mtr_commit(&mtr);
2007
2008	dict_stats_assert_initialized_index(index);
2009	DBUG_VOID_RETURN;
2010	}
2011
2012	/ For each level that is being scanned in the btree, this contains the*
2013	number of different key values for all possible n-column prefixes. /*
2014	ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
2015	ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2016
2017	/ For each level that is being scanned in the btree, this contains the*
2018	index of the last record from each group of equal records (when
2019	comparing only the first n columns, n=1..n_uniq). /*
2020	boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2021	n_uniq);
2022
2023	/ For each n-column prefix this array contains the input data that is*
2024	used to calculate dict_index_t::stat_n_diff_key_vals[]. /*
2025	n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2026
2027	/ total_recs is also used to estimate the number of pages on one*
2028	level below, so at the start we have 1 page (the root) /*
2029	total_recs = `1`;
2030
2031	/ Here we use the following optimization:*
2032	If we find that level L is the first one (searching from the
2033	root) that contains at least D distinct keys when looking at
2034	the first n_prefix columns, then:
2035	if we look at the first n_prefix-1 columns then the first
2036	level that contains D distinct keys will be either L or a
2037	lower one.
2038	So if we find that the first level containing D distinct
2039	keys (on n_prefix columns) is L, we continue from L when
2040	searching for D distinct keys on n_prefix-1 columns. /*
2041	level = root_level;
2042	level_is_analyzed = false;
2043
2044	for (n_prefix = n_uniq; n_prefix >= `1`; n_prefix--) {
2045
2046	DEBUG_PRINTF(" %s(): searching level with >=%llu "
2047	"distinct records, n_prefix=" ULINTPF "\n",
2048	__func__, N_DIFF_REQUIRED(index), n_prefix);
2049
2050	/ Commit the mtr to release the tree S lock to allow*
2051	other threads to do some work too. /*
2052	mtr_commit(&mtr);
2053	mtr_start(&mtr);
2054	mtr_sx_lock(dict_index_get_lock(index), &mtr);
2055	if (root_level != btr_height_get(index, &mtr)) {
2056	/ Just quit if the tree has changed beyond*
2057	recognition here. The old stats from previous
2058	runs will remain in the values that we have
2059	not calculated yet. Initially when the index
2060	object is created the stats members are given
2061	some sensible values so leaving them untouched
2062	here even the first time will not cause us to
2063	read uninitialized memory later. /*
2064	break;
2065	}
2066
2067	/ check whether we should pick the current level;*
2068	we pick level 1 even if it does not have enough
2069	distinct records because we do not want to scan the
2070	leaf level because it may contain too many records /*
2071	if (level_is_analyzed
2072	&& (n_diff_on_level[n_prefix - `1`] >= N_DIFF_REQUIRED(index)
2073	\|\| level == `1`)) {
2074
2075	goto found_level;
2076	}
2077
2078	/ search for a level that contains enough distinct records /
2079
2080	if (level_is_analyzed && level > `1`) {
2081
2082	/ if this does not hold we should be on*
2083	"found_level" instead of here /*
2084	ut_ad(n_diff_on_level[n_prefix - `1`]
2085	< N_DIFF_REQUIRED(index));
2086
2087	level--;
2088	level_is_analyzed = false;
2089	}
2090
2091	/ descend into the tree, searching for "good enough" level /
2092	for (;;) {
2093
2094	/ make sure we do not scan the leaf level*
2095	accidentally, it may contain too many pages /*
2096	ut_ad(level > `0`);
2097
2098	/ scanning the same level twice is an optimization*
2099	bug /*
2100	ut_ad(!level_is_analyzed);
2101
2102	/ Do not scan if this would read too many pages.*
2103	Here we use the following fact:
2104	the number of pages on level L equals the number
2105	of records on level L+1, thus we deduce that the
2106	following call would scan total_recs pages, because
2107	total_recs is left from the previous iteration when
2108	we scanned one level upper or we have not scanned any
2109	levels yet in which case total_recs is 1. /*
2110	if (total_recs > N_SAMPLE_PAGES(index)) {
2111
2112	/ if the above cond is true then we are*
2113	not at the root level since on the root
2114	level total_recs == 1 (set before we
2115	enter the n-prefix loop) and cannot
2116	be > N_SAMPLE_PAGES(index) /*
2117	ut_a(level != root_level);
2118
2119	/ step one level back and be satisfied with*
2120	whatever it contains /*
2121	level++;
2122	level_is_analyzed = true;
2123
2124	break;
2125	}
2126
2127	dict_stats_analyze_index_level(index,
2128	level,
2129	n_diff_on_level,
2130	&total_recs,
2131	&total_pages,
2132	n_diff_boundaries,
2133	&mtr);
2134
2135	level_is_analyzed = true;
2136
2137	if (level == `1`
2138	\|\| n_diff_on_level[n_prefix - `1`]
2139	>= N_DIFF_REQUIRED(index)) {
2140	/ we have reached the last level we could scan*
2141	or we found a good level with many distinct
2142	records /*
2143	break;
2144	}
2145
2146	level--;
2147	level_is_analyzed = false;
2148	}
2149	found_level:
2150
2151	DEBUG_PRINTF(" %s(): found level " ULINTPF
2152	" that has " UINT64PF
2153	" distinct records for n_prefix=" ULINTPF "\n",
2154	__func__, level, n_diff_on_level[n_prefix - `1`],
2155	n_prefix);
2156	/ here we are either on level 1 or the level that we are on*
2157	contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2158	deeper levels because they would contain too many pages /*
2159
2160	ut_ad(level > `0`);
2161
2162	ut_ad(level_is_analyzed);
2163
2164	/ if any of these is 0 then there is exactly one page in the*
2165	B-tree and it is empty and we should have done full scan and
2166	should not be here /*
2167	ut_ad(total_recs > `0`);
2168	ut_ad(n_diff_on_level[n_prefix - `1`] > `0`);
2169
2170	ut_ad(N_SAMPLE_PAGES(index) > `0`);
2171
2172	n_diff_data_t* data = &n_diff_data[n_prefix - `1`];
2173
2174	data->level = level;
2175
2176	data->n_recs_on_level = total_recs;
2177
2178	data->n_diff_on_level = n_diff_on_level[n_prefix - `1`];
2179
2180	data->n_leaf_pages_to_analyze = std::min(
2181	N_SAMPLE_PAGES(index),
2182	n_diff_on_level[n_prefix - `1`]);
2183
2184	/ pick some records from this level and dive below them for*
2185	the given n_prefix /*
2186
2187	dict_stats_analyze_index_for_n_prefix(
2188	index, n_prefix, &n_diff_boundaries[n_prefix - `1`],
2189	data, &mtr);
2190	}
2191
2192	mtr_commit(&mtr);
2193
2194	UT_DELETE_ARRAY(n_diff_boundaries);
2195
2196	UT_DELETE_ARRAY(n_diff_on_level);
2197
2198	/ n_prefix == 0 means that the above loop did not end up prematurely*
2199	due to tree being changed and so n_diff_data[] is set up. /*
2200	if (n_prefix == `0`) {
2201	dict_stats_index_set_n_diff(n_diff_data, index);
2202	}
2203
2204	UT_DELETE_ARRAY(n_diff_data);
2205
2206	dict_stats_assert_initialized_index(index);
2207	DBUG_VOID_RETURN;
2208	}
2209
2210	/*******************************************************************//**
2211	Calculates new estimates for table and index statistics. This function
2212	is relatively slow and is used to calculate persistent statistics that
2213	will be saved on disk.
2214	@return DB_SUCCESS or error code /*
2215	static
2216	dberr_t
2217	dict_stats_update_persistent(
2218	/=========================/
2219	dict_table_t* table) /!< in/out: table /
2220	{
2221	dict_index_t* index;
2222
2223	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2224
2225	dict_table_stats_lock(table, RW_X_LATCH);
2226
2227	/ analyze the clustered index first /
2228
2229	index = dict_table_get_first_index(table);
2230
2231	if (index == NULL
2232	\|\| index->is_corrupted()
2233	\|\| (index->type \| DICT_UNIQUE) != (DICT_CLUSTERED \| DICT_UNIQUE)) {
2234
2235	/ Table definition is corrupt /
2236	dict_table_stats_unlock(table, RW_X_LATCH);
2237	dict_stats_empty_table(table, true);
2238
2239	return(DB_CORRUPTION);
2240	}
2241
2242	ut_ad(!dict_index_is_ibuf(index));
2243
2244	dict_stats_analyze_index(index);
2245
2246	ulint n_unique = dict_index_get_n_unique(index);
2247
2248	table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - `1`];
2249
2250	table->stat_clustered_index_size = index->stat_index_size;
2251
2252	/ analyze other indexes from the table, if any /
2253
2254	table->stat_sum_of_other_index_sizes = `0`;
2255
2256	for (index = dict_table_get_next_index(index);
2257	index != NULL;
2258	index = dict_table_get_next_index(index)) {
2259
2260	ut_ad(!dict_index_is_ibuf(index));
2261
2262	if (index->type & DICT_FTS \|\| dict_index_is_spatial(index)) {
2263	continue;
2264	}
2265
2266	dict_stats_empty_index(index, false);
2267
2268	if (dict_stats_should_ignore_index(index)) {
2269	continue;
2270	}
2271
2272	if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2273	dict_stats_analyze_index(index);
2274	}
2275
2276	table->stat_sum_of_other_index_sizes
2277	+= index->stat_index_size;
2278	}
2279
2280	table->stats_last_recalc = ut_time();
2281
2282	table->stat_modified_counter = `0`;
2283
2284	table->stat_initialized = TRUE;
2285
2286	dict_stats_assert_initialized(table);
2287
2288	dict_table_stats_unlock(table, RW_X_LATCH);
2289
2290	return(DB_SUCCESS);
2291	}
2292
2293	#include "mysql_com.h"
2294	/* Save an individual index's statistic into the persistent statistics*
2295	storage.
2296	@param[in] index index to be updated
2297	@param[in] last_update timestamp of the stat
2298	@param[in] stat_name name of the stat
2299	@param[in] stat_value value of the stat
2300	@param[in] sample_size n pages sampled or NULL
2301	@param[in] stat_description description of the stat
2302	@param[in,out] trx in case of NULL the function will
2303	allocate and free the trx object. If it is not NULL then it will be
2304	rolled back only in the case of error, but not freed.
2305	@return DB_SUCCESS or error code /*
2306	dberr_t
2307	dict_stats_save_index_stat(
2308	dict_index_t* index,
2309	ib_time_t last_update,
2310	const char* stat_name,
2311	ib_uint64_t stat_value,
2312	ib_uint64_t* sample_size,
2313	const char* stat_description,
2314	trx_t* trx)
2315	{
2316	dberr_t ret;
2317	pars_info_t* pinfo;
2318	char db_utf8[MAX_DB_UTF8_LEN];
2319	char table_utf8[MAX_TABLE_UTF8_LEN];
2320
2321	ut_ad(!trx \|\| trx->internal \|\| trx->mysql_thd);
2322	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
2323	ut_ad(mutex_own(&dict_sys->mutex));
2324
2325	dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2326	table_utf8, sizeof(table_utf8));
2327
2328	pinfo = pars_info_create();
2329	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2330	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2331	pars_info_add_str_literal(pinfo, "index_name", index->name);
2332	UNIV_MEM_ASSERT_RW_ABORT(&last_update, `4`);
2333	pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
2334	UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2335	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2336	UNIV_MEM_ASSERT_RW_ABORT(&stat_value, `8`);
2337	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2338	if (sample_size != NULL) {
2339	UNIV_MEM_ASSERT_RW_ABORT(sample_size, `8`);
2340	pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2341	} else {
2342	pars_info_add_literal(pinfo, "sample_size", NULL,
2343	UNIV_SQL_NULL, DATA_FIXBINARY, `0`);
2344	}
2345	UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2346	pars_info_add_str_literal(pinfo, "stat_description",
2347	stat_description);
2348
2349	ret = dict_stats_exec_sql(
2350	pinfo,
2351	"PROCEDURE INDEX_STATS_SAVE () IS\n"
2352	"BEGIN\n"
2353
2354	"DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2355	"WHERE\n"
2356	"database_name = :database_name AND\n"
2357	"table_name = :table_name AND\n"
2358	"index_name = :index_name AND\n"
2359	"stat_name = :stat_name;\n"
2360
2361	"INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2362	"VALUES\n"
2363	"(\n"
2364	":database_name,\n"
2365	":table_name,\n"
2366	":index_name,\n"
2367	":last_update,\n"
2368	":stat_name,\n"
2369	":stat_value,\n"
2370	":sample_size,\n"
2371	":stat_description\n"
2372	");\n"
2373	"END;", trx);
2374
2375	if (ret != DB_SUCCESS) {
2376	if (innodb_index_stats_not_found == false &&
2377	index->stats_error_printed == false) {
2378	ib::error () << "Cannot save index statistics for table "
2379	<< index->table->name
2380	<< ", index " << index->name
2381	<< ", stat name \"" << stat_name << "\": "
2382	<< ut_strerr(ret);
2383	index->stats_error_printed = true;
2384	}
2385	}
2386
2387	return(ret);
2388	}
2389
2390	/* Report an error if updating table statistics failed because*
2391	.ibd file is missing, table decryption failed or table is corrupted.
2392	@param[in,out] table Table
2393	@param[in] defragment true if statistics is for defragment
2394	@retval DB_DECRYPTION_FAILED if decryption of the table failed
2395	@retval DB_TABLESPACE_DELETED if .ibd file is missing
2396	@retval DB_CORRUPTION if table is marked as corrupted /*
2397	dberr_t
2398	dict_stats_report_error(dict_table_t* table, bool defragment)
2399	{
2400	dberr_t err;
2401
2402	const char* df = defragment ? " defragment" : "";
2403
2404	if (!table->space) {
2405	ib::warn () << "Cannot save" << df << " statistics for table "
2406	<< table->name
2407	<< " because the .ibd file is missing. "
2408	<< TROUBLESHOOTING_MSG;
2409	err = DB_TABLESPACE_DELETED;
2410	} else {
2411	ib::warn () << "Cannot save" << df << " statistics for table "
2412	<< table->name
2413	<< " because file "
2414	<< table->space->chain.start->name
2415	<< (table->corrupted
2416	? " is corrupted."
2417	: " cannot be decrypted.");
2418	err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
2419	}
2420
2421	dict_stats_empty_table(table, defragment);
2422	return err;
2423	}
2424
2425	/* Save the table's statistics into the persistent statistics storage.*
2426	@param[in] table_orig table whose stats to save
2427	@param[in] only_for_index if this is non-NULL, then stats for indexes
2428	that are not equal to it will not be saved, if NULL, then all indexes' stats
2429	are saved
2430	@return DB_SUCCESS or error code /*
2431	static
2432	dberr_t
2433	dict_stats_save(
2434	dict_table_t* table_orig,
2435	const index_id_t* only_for_index)
2436	{
2437	pars_info_t* pinfo;
2438	ib_time_t now;
2439	dberr_t ret;
2440	dict_table_t* table;
2441	char db_utf8[MAX_DB_UTF8_LEN];
2442	char table_utf8[MAX_TABLE_UTF8_LEN];
2443
2444	if (high_level_read_only) {
2445	return DB_READ_ONLY;
2446	}
2447
2448	if (!table_orig->is_readable()) {
2449	return (dict_stats_report_error(table_orig));
2450	}
2451
2452	table = dict_stats_snapshot_create(table_orig);
2453
2454	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2455	table_utf8, sizeof(table_utf8));
2456
2457	now = ut_time();
2458	rw_lock_x_lock(dict_operation_lock);
2459	mutex_enter(&dict_sys->mutex);
2460
2461	pinfo = pars_info_create();
2462
2463	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2464	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2465	pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
2466	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2467	pars_info_add_ull_literal(pinfo, "clustered_index_size",
2468	table->stat_clustered_index_size);
2469	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2470	table->stat_sum_of_other_index_sizes);
2471
2472	ret = dict_stats_exec_sql(
2473	pinfo,
2474	"PROCEDURE TABLE_STATS_SAVE () IS\n"
2475	"BEGIN\n"
2476
2477	"DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2478	"WHERE\n"
2479	"database_name = :database_name AND\n"
2480	"table_name = :table_name;\n"
2481
2482	"INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2483	"VALUES\n"
2484	"(\n"
2485	":database_name,\n"
2486	":table_name,\n"
2487	":last_update,\n"
2488	":n_rows,\n"
2489	":clustered_index_size,\n"
2490	":sum_of_other_index_sizes\n"
2491	");\n"
2492	"END;", NULL);
2493
2494	if (ret != DB_SUCCESS) {
2495	ib::error () << "Cannot save table statistics for table "
2496	<< table->name << ": " << ut_strerr(ret);
2497
2498	mutex_exit(&dict_sys->mutex);
2499	rw_lock_x_unlock(dict_operation_lock);
2500
2501	dict_stats_snapshot_free(table);
2502
2503	return(ret);
2504	}
2505
2506	trx_t* trx = trx_create();
2507	trx_start_internal(trx);
2508
2509	dict_index_t* index;
2510	index_map_t indexes(
2511	(ut_strcmp_functor ()),
2512	index_map_t_allocator (mem_key_dict_stats_index_map_t));
2513
2514	/ Below we do all the modifications in innodb_index_stats in a single*
2515	transaction for performance reasons. Modifying more than one row in a
2516	single transaction may deadlock with other transactions if they
2517	lock the rows in different order. Other transaction could be for
2518	example when we DROP a table and do
2519	DELETE FROM innodb_index_stats WHERE database_name = '...'
2520	AND table_name = '...'; which will affect more than one row. To
2521	prevent deadlocks we always lock the rows in the same order - the
2522	order of the PK, which is (database_name, table_name, index_name,
2523	stat_name). This is why below we sort the indexes by name and then
2524	for each index, do the mods ordered by stat_name. /*
2525
2526	for (index = dict_table_get_first_index(table);
2527	index != NULL;
2528	index = dict_table_get_next_index(index)) {
2529
2530	indexes [index->name] = index;
2531	}
2532
2533	index_map_t::const_iterator it;
2534
2535	for (it = indexes.begin(); it != indexes.end(); ++it) {
2536
2537	index = it ->second;
2538
2539	if (only_for_index != NULL && index->id != *only_for_index) {
2540	continue;
2541	}
2542
2543	if (dict_stats_should_ignore_index(index)) {
2544	continue;
2545	}
2546
2547	ut_ad(!dict_index_is_ibuf(index));
2548
2549	for (unsigned i = `0`; i < index->n_uniq; i++) {
2550
2551	char stat_name[`16`];
2552	char stat_description[`1024`];
2553
2554	snprintf(stat_name, sizeof(stat_name),
2555	"n_diff_pfx%02u", i + `1`);
2556
2557	/ craft a string that contains the column names /
2558	snprintf(stat_description, sizeof(stat_description),
2559	"%s", index->fields[`0`].name ());
2560	for (unsigned j = `1`; j <= i; j++) {
2561	size_t len;
2562
2563	len = strlen(stat_description);
2564
2565	snprintf(stat_description + len,
2566	sizeof(stat_description) - len,
2567	",%s", index->fields[j].name ());
2568	}
2569
2570	ret = dict_stats_save_index_stat(
2571	index, now, stat_name,
2572	index->stat_n_diff_key_vals[i],
2573	&index->stat_n_sample_sizes[i],
2574	stat_description, trx);
2575
2576	if (ret != DB_SUCCESS) {
2577	goto end;
2578	}
2579	}
2580
2581	ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2582	index->stat_n_leaf_pages,
2583	NULL,
2584	"Number of leaf pages "
2585	"in the index", trx);
2586	if (ret != DB_SUCCESS) {
2587	goto end;
2588	}
2589
2590	ret = dict_stats_save_index_stat(index, now, "size",
2591	index->stat_index_size,
2592	NULL,
2593	"Number of pages "
2594	"in the index", trx);
2595	if (ret != DB_SUCCESS) {
2596	goto end;
2597	}
2598	}
2599
2600	trx_commit_for_mysql(trx);
2601
2602	end:
2603	trx_free(trx);
2604
2605	mutex_exit(&dict_sys->mutex);
2606	rw_lock_x_unlock(dict_operation_lock);
2607
2608	dict_stats_snapshot_free(table);
2609
2610	return(ret);
2611	}
2612
2613	/*******************************************************************//**
2614	Called for the row that is selected by
2615	SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2616	The second argument is a pointer to the table and the fetched stats are
2617	written to it.
2618	@return non-NULL dummy /*
2619	static
2620	ibool
2621	dict_stats_fetch_table_stats_step(
2622	/==============================/
2623	void* node_void, /!< in: select node /
2624	void* table_void) /!< out: table /
2625	{
2626	sel_node_t* node = (sel_node_t*) node_void;
2627	dict_table_t* table = (dict_table_t*) table_void;
2628	que_common_t* cnode;
2629	int i;
2630
2631	/ this should loop exactly 3 times - for*
2632	n_rows,clustered_index_size,sum_of_other_index_sizes /*
2633	for (cnode = static_cast<que_common_t*>(node->select_list), i = `0`;
2634	cnode != NULL;
2635	cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2636	i++) {
2637
2638	const byte* data;
2639	dfield_t* dfield = que_node_get_val(cnode);
2640	dtype_t* type = dfield_get_type(dfield);
2641	ulint len = dfield_get_len(dfield);
2642
2643	data = static_cast<const byte*>(dfield_get_data(dfield));
2644
2645	switch (i) {
2646	case `0`: / mysql.innodb_table_stats.n_rows /
2647
2648	ut_a(dtype_get_mtype(type) == DATA_INT);
2649	ut_a(len == `8`);
2650
2651	table->stat_n_rows = mach_read_from_8(data);
2652
2653	break;
2654
2655	case `1`: / mysql.innodb_table_stats.clustered_index_size /
2656
2657	ut_a(dtype_get_mtype(type) == DATA_INT);
2658	ut_a(len == `8`);
2659
2660	table->stat_clustered_index_size
2661	= (ulint) mach_read_from_8(data);
2662
2663	break;
2664
2665	case `2`: / mysql.innodb_table_stats.sum_of_other_index_sizes /
2666
2667	ut_a(dtype_get_mtype(type) == DATA_INT);
2668	ut_a(len == `8`);
2669
2670	table->stat_sum_of_other_index_sizes
2671	= (ulint) mach_read_from_8(data);
2672
2673	break;
2674
2675	default:
2676
2677	/ someone changed SELECT*
2678	n_rows,clustered_index_size,sum_of_other_index_sizes
2679	to select more columns from innodb_table_stats without
2680	adjusting here /*
2681	ut_error;
2682	}
2683	}
2684
2685	/ if i < 3 this means someone changed the*
2686	SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2687	to select less columns from innodb_table_stats without adjusting here;
2688	if i > 3 we would have ut_error'ed earlier /*
2689	ut_a(i == `3` /n_rows,clustered_index_size,sum_of_other_index_sizes/);
2690
2691	/ XXX this is not used but returning non-NULL is necessary /
2692	return(TRUE);
2693	}
2694
2695	/* Aux struct used to pass a table and a boolean to*
2696	dict_stats_fetch_index_stats_step(). /*
2697	struct index_fetch_t {
2698	dict_table_t* table; /!< table whose indexes are to be modified /
2699	bool stats_were_modified; /!< will be set to true if at*
2700	least one index stats were modified /*
2701	};
2702
2703	/*******************************************************************//**
2704	Called for the rows that are selected by
2705	SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2706	The second argument is a pointer to the table and the fetched stats are
2707	written to its indexes.
2708	Let a table has N indexes and each index has Ui unique columns for i=1..N,
2709	then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2710	So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2711	NAVG(Ui). In each call it searches for the currently fetched index into*
2712	table->indexes linearly, assuming this list is not sorted. Thus, overall,
2713	fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2714	is the number of indexes.
2715	This can be improved if we sort table->indexes in a temporary area just once
2716	and then search in that sorted list. Then the complexity will be O(Nlog(N)).*
2717	We assume a table will not have more than 100 indexes, so we go with the
2718	simpler N^2 algorithm.
2719	@return non-NULL dummy /*
2720	static
2721	ibool
2722	dict_stats_fetch_index_stats_step(
2723	/==============================/
2724	void* node_void, /!< in: select node /
2725	void* arg_void) /!< out: table + a flag that tells if we*
2726	modified anything /*
2727	{
2728	sel_node_t* node = (sel_node_t*) node_void;
2729	index_fetch_t* arg = (index_fetch_t*) arg_void;
2730	dict_table_t* table = arg->table;
2731	dict_index_t* index = NULL;
2732	que_common_t* cnode;
2733	const char* stat_name = NULL;
2734	ulint stat_name_len = ULINT_UNDEFINED;
2735	ib_uint64_t stat_value = UINT64_UNDEFINED;
2736	ib_uint64_t sample_size = UINT64_UNDEFINED;
2737	int i;
2738
2739	/ this should loop exactly 4 times - for the columns that*
2740	were selected: index_name,stat_name,stat_value,sample_size /*
2741	for (cnode = static_cast<que_common_t*>(node->select_list), i = `0`;
2742	cnode != NULL;
2743	cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2744	i++) {
2745
2746	const byte* data;
2747	dfield_t* dfield = que_node_get_val(cnode);
2748	dtype_t* type = dfield_get_type(dfield);
2749	ulint len = dfield_get_len(dfield);
2750
2751	data = static_cast<const byte*>(dfield_get_data(dfield));
2752
2753	switch (i) {
2754	case `0`: / mysql.innodb_index_stats.index_name /
2755
2756	ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2757
2758	/ search for index in table's indexes whose name*
2759	matches data; the fetched index name is in data,
2760	has no terminating '\0' and has length len /*
2761	for (index = dict_table_get_first_index(table);
2762	index != NULL;
2763	index = dict_table_get_next_index(index)) {
2764
2765	if (index->is_committed()
2766	&& strlen(index->name) == len
2767	&& memcmp(index->name, data, len) == `0`) {
2768	/ the corresponding index was found /
2769	break;
2770	}
2771	}
2772
2773	/ if index is NULL here this means that*
2774	mysql.innodb_index_stats contains more rows than the
2775	number of indexes in the table; this is ok, we just
2776	return ignoring those extra rows; in other words
2777	dict_stats_fetch_index_stats_step() has been called
2778	for a row from index_stats with unknown index_name
2779	column /*
2780	if (index == NULL) {
2781
2782	return(TRUE);
2783	}
2784
2785	break;
2786
2787	case `1`: / mysql.innodb_index_stats.stat_name /
2788
2789	ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2790
2791	ut_a(index != NULL);
2792
2793	stat_name = (const char*) data;
2794	stat_name_len = len;
2795
2796	break;
2797
2798	case `2`: / mysql.innodb_index_stats.stat_value /
2799
2800	ut_a(dtype_get_mtype(type) == DATA_INT);
2801	ut_a(len == `8`);
2802
2803	ut_a(index != NULL);
2804	ut_a(stat_name != NULL);
2805	ut_a(stat_name_len != ULINT_UNDEFINED);
2806
2807	stat_value = mach_read_from_8(data);
2808
2809	break;
2810
2811	case `3`: / mysql.innodb_index_stats.sample_size /
2812
2813	ut_a(dtype_get_mtype(type) == DATA_INT);
2814	ut_a(len == `8` \|\| len == UNIV_SQL_NULL);
2815
2816	ut_a(index != NULL);
2817	ut_a(stat_name != NULL);
2818	ut_a(stat_name_len != ULINT_UNDEFINED);
2819	ut_a(stat_value != UINT64_UNDEFINED);
2820
2821	if (len == UNIV_SQL_NULL) {
2822	break;
2823	}
2824	/ else /
2825
2826	sample_size = mach_read_from_8(data);
2827
2828	break;
2829
2830	default:
2831
2832	/ someone changed*
2833	SELECT index_name,stat_name,stat_value,sample_size
2834	to select more columns from innodb_index_stats without
2835	adjusting here /*
2836	ut_error;
2837	}
2838	}
2839
2840	/ if i < 4 this means someone changed the*
2841	SELECT index_name,stat_name,stat_value,sample_size
2842	to select less columns from innodb_index_stats without adjusting here;
2843	if i > 4 we would have ut_error'ed earlier /*
2844	ut_a(i == `4` / index_name,stat_name,stat_value,sample_size /);
2845
2846	ut_a(index != NULL);
2847	ut_a(stat_name != NULL);
2848	ut_a(stat_name_len != ULINT_UNDEFINED);
2849	ut_a(stat_value != UINT64_UNDEFINED);
2850	/ sample_size could be UINT64_UNDEFINED here, if it is NULL /
2851
2852	#define PFX "n_diff_pfx"
2853	#define PFX_LEN 10
2854
2855	if (stat_name_len == `4` / strlen("size") /
2856	&& strncasecmp("size", stat_name, stat_name_len) == `0`) {
2857	index->stat_index_size = (ulint) stat_value;
2858	arg->stats_were_modified = true;
2859	} else if (stat_name_len == `12` / strlen("n_leaf_pages") /
2860	&& strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2861	== `0`) {
2862	index->stat_n_leaf_pages = (ulint) stat_value;
2863	arg->stats_were_modified = true;
2864	} else if (stat_name_len == `12` / strlen("n_page_split") /
2865	&& strncasecmp("n_page_split", stat_name, stat_name_len)
2866	== `0`) {
2867	index->stat_defrag_n_page_split = (ulint) stat_value;
2868	arg->stats_were_modified = true;
2869	} else if (stat_name_len == `13` / strlen("n_pages_freed") /
2870	&& strncasecmp("n_pages_freed", stat_name, stat_name_len)
2871	== `0`) {
2872	index->stat_defrag_n_pages_freed = (ulint) stat_value;
2873	arg->stats_were_modified = true;
2874	} else if (stat_name_len > PFX_LEN / e.g. stat_name=="n_diff_pfx01" /
2875	&& strncasecmp(PFX, stat_name, PFX_LEN) == `0`) {
2876
2877	const char* num_ptr;
2878	unsigned long n_pfx;
2879
2880	/ point num_ptr into "1" from "n_diff_pfx12..." /
2881	num_ptr = stat_name + PFX_LEN;
2882
2883	/ stat_name should have exactly 2 chars appended to PFX*
2884	and they should be digits /*
2885	if (stat_name_len != PFX_LEN + `2`
2886	\|\| num_ptr[`0`] < `'0'` \|\| num_ptr[`0`] > `'9'`
2887	\|\| num_ptr[`1`] < `'0'` \|\| num_ptr[`1`] > `'9'`) {
2888
2889	char db_utf8[MAX_DB_UTF8_LEN];
2890	char table_utf8[MAX_TABLE_UTF8_LEN];
2891
2892	dict_fs2utf8(table->name.m_name,
2893	db_utf8, sizeof(db_utf8),
2894	table_utf8, sizeof(table_utf8));
2895
2896	ib::info out;
2897	out << "Ignoring strange row from "
2898	<< INDEX_STATS_NAME_PRINT << " WHERE"
2899	" database_name = '" << db_utf8
2900	<< "' AND table_name = '" << table_utf8
2901	<< "' AND index_name = '" << index->name ()
2902	<< "' AND stat_name = '";
2903	out.write(stat_name, stat_name_len);
2904	out << "'; because stat_name is malformed";
2905	return(TRUE);
2906	}
2907	/ else /
2908
2909	/ extract 12 from "n_diff_pfx12..." into n_pfx*
2910	note that stat_name does not have a terminating '\0' /*
2911	n_pfx = ulong(num_ptr[`0`] - `'0'`) * `10` + ulong(num_ptr[`1`] - `'0'`);
2912
2913	ulint n_uniq = index->n_uniq;
2914
2915	if (n_pfx == `0` \|\| n_pfx > n_uniq) {
2916
2917	char db_utf8[MAX_DB_UTF8_LEN];
2918	char table_utf8[MAX_TABLE_UTF8_LEN];
2919
2920	dict_fs2utf8(table->name.m_name,
2921	db_utf8, sizeof(db_utf8),
2922	table_utf8, sizeof(table_utf8));
2923
2924	ib::info out;
2925	out << "Ignoring strange row from "
2926	<< INDEX_STATS_NAME_PRINT << " WHERE"
2927	" database_name = '" << db_utf8
2928	<< "' AND table_name = '" << table_utf8
2929	<< "' AND index_name = '" << index->name ()
2930	<< "' AND stat_name = '";
2931	out.write(stat_name, stat_name_len);
2932	out << "'; because stat_name is out of range, the index"
2933	" has " << n_uniq << " unique columns";
2934
2935	return(TRUE);
2936	}
2937	/ else /
2938
2939	index->stat_n_diff_key_vals[n_pfx - `1`] = stat_value;
2940
2941	if (sample_size != UINT64_UNDEFINED) {
2942	index->stat_n_sample_sizes[n_pfx - `1`] = sample_size;
2943	} else {
2944	/ hmm, strange... the user must have UPDATEd the*
2945	table manually and SET sample_size = NULL /*
2946	index->stat_n_sample_sizes[n_pfx - `1`] = `0`;
2947	}
2948
2949	index->stat_n_non_null_key_vals[n_pfx - `1`] = `0`;
2950
2951	arg->stats_were_modified = true;
2952	} else {
2953	/ silently ignore rows with unknown stat_name, the*
2954	user may have developed her own stats /*
2955	}
2956
2957	/ XXX this is not used but returning non-NULL is necessary /
2958	return(TRUE);
2959	}
2960
2961	/*******************************************************************//**
2962	Read table's statistics from the persistent statistics storage.
2963	@return DB_SUCCESS or error code /*
2964	static
2965	dberr_t
2966	dict_stats_fetch_from_ps(
2967	/=====================/
2968	dict_table_t* table) /!< in/out: table /
2969	{
2970	index_fetch_t index_fetch_arg;
2971	trx_t* trx;
2972	pars_info_t* pinfo;
2973	dberr_t ret;
2974	char db_utf8[MAX_DB_UTF8_LEN];
2975	char table_utf8[MAX_TABLE_UTF8_LEN];
2976
2977	ut_ad(!mutex_own(&dict_sys->mutex));
2978
2979	/ Initialize all stats to dummy values before fetching because if*
2980	the persistent storage contains incomplete stats (e.g. missing stats
2981	for some index) then we would end up with (partially) uninitialized
2982	stats. /*
2983	dict_stats_empty_table(table, true);
2984
2985	trx = trx_create();
2986
2987	/ Use 'read-uncommitted' so that the SELECTs we execute*
2988	do not get blocked in case some user has locked the rows we
2989	are SELECTing /*
2990
2991	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2992
2993	if (srv_read_only_mode) {
2994	trx_start_internal_read_only(trx);
2995	} else {
2996	trx_start_internal(trx);
2997	}
2998
2999	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
3000	table_utf8, sizeof(table_utf8));
3001
3002	pinfo = pars_info_create();
3003
3004	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3005
3006	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3007
3008	pars_info_bind_function(pinfo,
3009	"fetch_table_stats_step",
3010	dict_stats_fetch_table_stats_step,
3011	table);
3012
3013	index_fetch_arg.table = table;
3014	index_fetch_arg.stats_were_modified = false;
3015	pars_info_bind_function(pinfo,
3016	"fetch_index_stats_step",
3017	dict_stats_fetch_index_stats_step,
3018	&index_fetch_arg);
3019
3020	ret = que_eval_sql(pinfo,
3021	"PROCEDURE FETCH_STATS () IS\n"
3022	"found INT;\n"
3023	"DECLARE FUNCTION fetch_table_stats_step;\n"
3024	"DECLARE FUNCTION fetch_index_stats_step;\n"
3025	"DECLARE CURSOR table_stats_cur IS\n"
3026	" SELECT\n"
3027	/ if you change the selected fields, be*
3028	sure to adjust
3029	dict_stats_fetch_table_stats_step() /*
3030	" n_rows,\n"
3031	" clustered_index_size,\n"
3032	" sum_of_other_index_sizes\n"
3033	" FROM \"" TABLE_STATS_NAME "\"\n"
3034	" WHERE\n"
3035	" database_name = :database_name AND\n"
3036	" table_name = :table_name;\n"
3037	"DECLARE CURSOR index_stats_cur IS\n"
3038	" SELECT\n"
3039	/ if you change the selected fields, be*
3040	sure to adjust
3041	dict_stats_fetch_index_stats_step() /*
3042	" index_name,\n"
3043	" stat_name,\n"
3044	" stat_value,\n"
3045	" sample_size\n"
3046	" FROM \"" INDEX_STATS_NAME "\"\n"
3047	" WHERE\n"
3048	" database_name = :database_name AND\n"
3049	" table_name = :table_name;\n"
3050
3051	"BEGIN\n"
3052
3053	"OPEN table_stats_cur;\n"
3054	"FETCH table_stats_cur INTO\n"
3055	" fetch_table_stats_step();\n"
3056	"IF (SQL % NOTFOUND) THEN\n"
3057	" CLOSE table_stats_cur;\n"
3058	" RETURN;\n"
3059	"END IF;\n"
3060	"CLOSE table_stats_cur;\n"
3061
3062	"OPEN index_stats_cur;\n"
3063	"found := 1;\n"
3064	"WHILE found = 1 LOOP\n"
3065	" FETCH index_stats_cur INTO\n"
3066	" fetch_index_stats_step();\n"
3067	" IF (SQL % NOTFOUND) THEN\n"
3068	" found := 0;\n"
3069	" END IF;\n"
3070	"END LOOP;\n"
3071	"CLOSE index_stats_cur;\n"
3072
3073	"END;",
3074	TRUE, trx);
3075	/ pinfo is freed by que_eval_sql() /
3076
3077	trx_commit_for_mysql(trx);
3078
3079	trx_free(trx);
3080
3081	if (!index_fetch_arg.stats_were_modified) {
3082	return(DB_STATS_DO_NOT_EXIST);
3083	}
3084
3085	return(ret);
3086	}
3087
3088	/*******************************************************************//**
3089	Clear defragmentation stats modified counter for all indices in table. /*
3090	static
3091	void
3092	dict_stats_empty_defrag_modified_counter(
3093	dict_table_t* table) /!< in: table /
3094	{
3095	dict_index_t* index;
3096	ut_a(table);
3097	for (index = dict_table_get_first_index(table);
3098	index != NULL;
3099	index = dict_table_get_next_index(index)) {
3100	index->stat_defrag_modified_counter = `0`;
3101	}
3102	}
3103
3104	/*******************************************************************//**
3105	Fetches or calculates new estimates for index statistics. /*
3106	void
3107	dict_stats_update_for_index(
3108	/========================/
3109	dict_index_t* index) /!< in/out: index /
3110	{
3111	DBUG_ENTER("dict_stats_update_for_index");
3112
3113	ut_ad(!mutex_own(&dict_sys->mutex));
3114
3115	if (dict_stats_is_persistent_enabled(index->table)) {
3116
3117	if (dict_stats_persistent_storage_check(false)) {
3118	dict_table_stats_lock(index->table, RW_X_LATCH);
3119	dict_stats_analyze_index(index);
3120	dict_table_stats_unlock(index->table, RW_X_LATCH);
3121	dict_stats_save(index->table, &index->id);
3122	DBUG_VOID_RETURN;
3123	}
3124	/ else /
3125
3126	if (innodb_index_stats_not_found == false &&
3127	index->stats_error_printed == false) {
3128	/ Fall back to transient stats since the persistent*
3129	storage is not present or is corrupted /*
3130
3131	ib::info () << "Recalculation of persistent statistics"
3132	" requested for table " << index->table->name
3133	<< " index " << index->name
3134	<< " but the required"
3135	" persistent statistics storage is not present or is"
3136	" corrupted. Using transient stats instead.";
3137	index->stats_error_printed = false;
3138	}
3139	}
3140
3141	dict_table_stats_lock(index->table, RW_X_LATCH);
3142	dict_stats_update_transient_for_index(index);
3143	dict_table_stats_unlock(index->table, RW_X_LATCH);
3144
3145	DBUG_VOID_RETURN;
3146	}
3147
3148	/*******************************************************************//**
3149	Calculates new estimates for table and index statistics. The statistics
3150	are used in query optimization.
3151	@return DB_SUCCESS or error code /*
3152	dberr_t
3153	dict_stats_update(
3154	/==============/
3155	dict_table_t* table, /!< in/out: table /
3156	dict_stats_upd_option_t stats_upd_option)
3157	/!< in: whether to (re) calc*
3158	the stats or to fetch them from
3159	the persistent statistics
3160	storage /*
3161	{
3162	ut_ad(!mutex_own(&dict_sys->mutex));
3163
3164	if (!table->is_readable()) {
3165	return (dict_stats_report_error(table));
3166	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3167	/ If we have set a high innodb_force_recovery level, do*
3168	not calculate statistics, as a badly corrupted index can
3169	cause a crash in it. /*
3170	dict_stats_empty_table(table, false);
3171	return(DB_SUCCESS);
3172	}
3173
3174	switch (stats_upd_option) {
3175	case DICT_STATS_RECALC_PERSISTENT:
3176
3177	if (srv_read_only_mode) {
3178	goto transient;
3179	}
3180
3181	/ Persistent recalculation requested, called from*
3182	1) ANALYZE TABLE, or
3183	2) the auto recalculation background thread, or
3184	3) open table if stats do not exist on disk and auto recalc
3185	is enabled /*
3186
3187	/ InnoDB internal tables (e.g. SYS_TABLES) cannot have*
3188	persistent stats enabled /*
3189	ut_a(strchr(table->name.m_name, `'/'`) != NULL);
3190
3191	/ check if the persistent statistics storage exists*
3192	before calling the potentially slow function
3193	dict_stats_update_persistent(); that is a
3194	prerequisite for dict_stats_save() succeeding /*
3195	if (dict_stats_persistent_storage_check(false)) {
3196
3197	dberr_t err;
3198
3199	err = dict_stats_update_persistent(table);
3200
3201	if (err != DB_SUCCESS) {
3202	return(err);
3203	}
3204
3205	err = dict_stats_save(table, NULL);
3206
3207	return(err);
3208	}
3209
3210	/ Fall back to transient stats since the persistent*
3211	storage is not present or is corrupted /*
3212
3213	if (innodb_table_stats_not_found == false &&
3214	table->stats_error_printed == false) {
3215	ib::warn () << "Recalculation of persistent statistics"
3216	" requested for table "
3217	<< table->name
3218	<< " but the required persistent"
3219	" statistics storage is not present or is corrupted."
3220	" Using transient stats instead.";
3221	table->stats_error_printed = true;
3222	}
3223
3224	goto transient;
3225
3226	case DICT_STATS_RECALC_TRANSIENT:
3227
3228	goto transient;
3229
3230	case DICT_STATS_EMPTY_TABLE:
3231
3232	dict_stats_empty_table(table, true);
3233
3234	/ If table is using persistent stats,*
3235	then save the stats on disk /*
3236
3237	if (dict_stats_is_persistent_enabled(table)) {
3238
3239	if (dict_stats_persistent_storage_check(false)) {
3240
3241	return(dict_stats_save(table, NULL));
3242	}
3243
3244	return(DB_STATS_DO_NOT_EXIST);
3245	}
3246
3247	return(DB_SUCCESS);
3248
3249	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3250
3251	/ fetch requested, either fetch from persistent statistics*
3252	storage or use the old method /*
3253
3254	if (table->stat_initialized) {
3255	return(DB_SUCCESS);
3256	}
3257
3258	/ InnoDB internal tables (e.g. SYS_TABLES) cannot have*
3259	persistent stats enabled /*
3260	ut_a(strchr(table->name.m_name, `'/'`) != NULL);
3261
3262	if (!dict_stats_persistent_storage_check(false)) {
3263	/ persistent statistics storage does not exist*
3264	or is corrupted, calculate the transient stats /*
3265
3266	if (innodb_table_stats_not_found == false &&
3267	table->stats_error_printed == false) {
3268	ib::error () << "Fetch of persistent statistics"
3269	" requested for table "
3270	<< table->name
3271	<< " but the required system tables "
3272	<< TABLE_STATS_NAME_PRINT
3273	<< " and " << INDEX_STATS_NAME_PRINT
3274	<< " are not present or have unexpected"
3275	" structure. Using transient stats instead.";
3276	table->stats_error_printed = true;
3277	}
3278
3279	goto transient;
3280	}
3281
3282	dict_table_t* t;
3283
3284	/ Create a dummy table object with the same name and*
3285	indexes, suitable for fetching the stats into it. /*
3286	t = dict_stats_table_clone_create(table);
3287
3288	dberr_t err = dict_stats_fetch_from_ps(t);
3289
3290	t->stats_last_recalc = table->stats_last_recalc;
3291	t->stat_modified_counter = `0`;
3292	dict_stats_empty_defrag_modified_counter(t);
3293
3294	switch (err) {
3295	case DB_SUCCESS:
3296
3297	dict_table_stats_lock(table, RW_X_LATCH);
3298
3299	/ Pass reset_ignored_indexes=true as parameter*
3300	to dict_stats_copy. This will cause statictics
3301	for corrupted indexes to be set to empty values /*
3302	dict_stats_copy(table, t, true);
3303
3304	dict_stats_assert_initialized(table);
3305
3306	dict_table_stats_unlock(table, RW_X_LATCH);
3307
3308	dict_stats_table_clone_free(t);
3309
3310	return(DB_SUCCESS);
3311	case DB_STATS_DO_NOT_EXIST:
3312
3313	dict_stats_table_clone_free(t);
3314
3315	if (srv_read_only_mode) {
3316	goto transient;
3317	}
3318
3319	if (dict_stats_auto_recalc_is_enabled(table)) {
3320	return(dict_stats_update(
3321	table,
3322	DICT_STATS_RECALC_PERSISTENT));
3323	}
3324
3325	ib::info () << "Trying to use table " << table->name
3326	<< " which has persistent statistics enabled,"
3327	" but auto recalculation turned off and the"
3328	" statistics do not exist in "
3329	TABLE_STATS_NAME_PRINT
3330	" and " INDEX_STATS_NAME_PRINT
3331	". Please either run \"ANALYZE TABLE "
3332	<< table->name << ";\" manually or enable the"
3333	" auto recalculation with \"ALTER TABLE "
3334	<< table->name << " STATS_AUTO_RECALC=1;\"."
3335	" InnoDB will now use transient statistics for "
3336	<< table->name << ".";
3337
3338	goto transient;
3339	default:
3340
3341	dict_stats_table_clone_free(t);
3342
3343	if (innodb_table_stats_not_found == false &&
3344	table->stats_error_printed == false) {
3345	ib::error () << "Error fetching persistent statistics"
3346	" for table "
3347	<< table->name
3348	<< " from " TABLE_STATS_NAME_PRINT " and "
3349	INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
3350	<< ". Using transient stats method instead.";
3351	}
3352
3353	goto transient;
3354	}
3355	/ no "default:" in order to produce a compilation warning*
3356	about unhandled enumeration value /*
3357	}
3358
3359	transient:
3360
3361	dict_table_stats_lock(table, RW_X_LATCH);
3362
3363	dict_stats_update_transient(table);
3364
3365	dict_table_stats_unlock(table, RW_X_LATCH);
3366
3367	return(DB_SUCCESS);
3368	}
3369
3370	/*******************************************************************//**
3371	Removes the information for a particular index's stats from the persistent
3372	storage if it exists and if there is data stored for this index.
3373	This function creates its own trx and commits it.
3374	A note from Marko why we cannot edit user and sys_ tables in one trx:*
3375	marko: The problem is that ibuf merges should be disabled while we are
3376	rolling back dict transactions.
3377	marko: If ibuf merges are not disabled, we need to scan the .ibd files.*
3378	But we shouldn't open .ibd files before we have rolled back dict*
3379	transactions and opened the SYS_ records for the .ibd files.
3380	@return DB_SUCCESS or error code /*
3381	dberr_t
3382	dict_stats_drop_index(
3383	/==================/
3384	const char* db_and_table,/!< in: db and table, e.g. 'db/table' /
3385	const char* iname, /!< in: index name /
3386	char* errstr, /!< out: error message if != DB_SUCCESS*
3387	is returned /*
3388	ulint errstr_sz)/!< in: size of the errstr buffer /
3389	{
3390	char db_utf8[MAX_DB_UTF8_LEN];
3391	char table_utf8[MAX_TABLE_UTF8_LEN];
3392	pars_info_t* pinfo;
3393	dberr_t ret;
3394
3395	ut_ad(!mutex_own(&dict_sys->mutex));
3396
3397	/ skip indexes whose table names do not contain a database name*
3398	e.g. if we are dropping an index from SYS_TABLES /*
3399	if (strchr(db_and_table, `'/'`) == NULL) {
3400
3401	return(DB_SUCCESS);
3402	}
3403
3404	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3405	table_utf8, sizeof(table_utf8));
3406
3407	pinfo = pars_info_create();
3408
3409	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3410
3411	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3412
3413	pars_info_add_str_literal(pinfo, "index_name", iname);
3414
3415	rw_lock_x_lock(dict_operation_lock);
3416	mutex_enter(&dict_sys->mutex);
3417
3418	ret = dict_stats_exec_sql(
3419	pinfo,
3420	"PROCEDURE DROP_INDEX_STATS () IS\n"
3421	"BEGIN\n"
3422	"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3423	"database_name = :database_name AND\n"
3424	"table_name = :table_name AND\n"
3425	"index_name = :index_name;\n"
3426	"END;\n", NULL);
3427
3428	mutex_exit(&dict_sys->mutex);
3429	rw_lock_x_unlock(dict_operation_lock);
3430
3431	if (ret == DB_STATS_DO_NOT_EXIST) {
3432	ret = DB_SUCCESS;
3433	}
3434
3435	if (ret != DB_SUCCESS) {
3436	snprintf(errstr, errstr_sz,
3437	"Unable to delete statistics for index %s"
3438	" from %s%s: %s. They can be deleted later using"
3439	" DELETE FROM %s WHERE"
3440	" database_name = '%s' AND"
3441	" table_name = '%s' AND"
3442	" index_name = '%s';",
3443	iname,
3444	INDEX_STATS_NAME_PRINT,
3445	(ret == DB_LOCK_WAIT_TIMEOUT
3446	? " because the rows are locked"
3447	: ""),
3448	ut_strerr(ret),
3449	INDEX_STATS_NAME_PRINT,
3450	db_utf8,
3451	table_utf8,
3452	iname);
3453
3454	ut_print_timestamp(stderr);
3455	fprintf(stderr, " InnoDB: %s\n", errstr);
3456	}
3457
3458	return(ret);
3459	}
3460
3461	/*******************************************************************//**
3462	Executes
3463	DELETE FROM mysql.innodb_table_stats
3464	WHERE database_name = '...' AND table_name = '...';
3465	Creates its own transaction and commits it.
3466	@return DB_SUCCESS or error code /*
3467	UNIV_INLINE
3468	dberr_t
3469	dict_stats_delete_from_table_stats(
3470	/===============================/
3471	const char* database_name, /!< in: database name, e.g. 'db' /
3472	const char* table_name) /!< in: table name, e.g. 'table' /
3473	{
3474	pars_info_t* pinfo;
3475	dberr_t ret;
3476
3477	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3478	ut_ad(mutex_own(&dict_sys->mutex));
3479
3480	pinfo = pars_info_create();
3481
3482	pars_info_add_str_literal(pinfo, "database_name", database_name);
3483	pars_info_add_str_literal(pinfo, "table_name", table_name);
3484
3485	ret = dict_stats_exec_sql(
3486	pinfo,
3487	"PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3488	"BEGIN\n"
3489	"DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3490	"database_name = :database_name AND\n"
3491	"table_name = :table_name;\n"
3492	"END;\n", NULL);
3493
3494	return(ret);
3495	}
3496
3497	/*******************************************************************//**
3498	Executes
3499	DELETE FROM mysql.innodb_index_stats
3500	WHERE database_name = '...' AND table_name = '...';
3501	Creates its own transaction and commits it.
3502	@return DB_SUCCESS or error code /*
3503	UNIV_INLINE
3504	dberr_t
3505	dict_stats_delete_from_index_stats(
3506	/===============================/
3507	const char* database_name, /!< in: database name, e.g. 'db' /
3508	const char* table_name) /!< in: table name, e.g. 'table' /
3509	{
3510	pars_info_t* pinfo;
3511	dberr_t ret;
3512
3513	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3514	ut_ad(mutex_own(&dict_sys->mutex));
3515
3516	pinfo = pars_info_create();
3517
3518	pars_info_add_str_literal(pinfo, "database_name", database_name);
3519	pars_info_add_str_literal(pinfo, "table_name", table_name);
3520
3521	ret = dict_stats_exec_sql(
3522	pinfo,
3523	"PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3524	"BEGIN\n"
3525	"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3526	"database_name = :database_name AND\n"
3527	"table_name = :table_name;\n"
3528	"END;\n", NULL);
3529
3530	return(ret);
3531	}
3532
3533	/*******************************************************************//**
3534	Removes the statistics for a table and all of its indexes from the
3535	persistent statistics storage if it exists and if there is data stored for
3536	the table. This function creates its own transaction and commits it.
3537	@return DB_SUCCESS or error code /*
3538	dberr_t
3539	dict_stats_drop_table(
3540	/==================/
3541	const char* db_and_table, /!< in: db and table, e.g. 'db/table' /
3542	char* errstr, /!< out: error message*
3543	if != DB_SUCCESS is returned /*
3544	ulint errstr_sz) /!< in: size of errstr buffer /
3545	{
3546	char db_utf8[MAX_DB_UTF8_LEN];
3547	char table_utf8[MAX_TABLE_UTF8_LEN];
3548	dberr_t ret;
3549
3550	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3551	ut_ad(mutex_own(&dict_sys->mutex));
3552
3553	/ skip tables that do not contain a database name*
3554	e.g. if we are dropping SYS_TABLES /*
3555	if (strchr(db_and_table, `'/'`) == NULL) {
3556
3557	return(DB_SUCCESS);
3558	}
3559
3560	/ skip innodb_table_stats and innodb_index_stats themselves /
3561	if (strcmp(db_and_table, TABLE_STATS_NAME) == `0`
3562	\|\| strcmp(db_and_table, INDEX_STATS_NAME) == `0`) {
3563
3564	return(DB_SUCCESS);
3565	}
3566
3567	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3568	table_utf8, sizeof(table_utf8));
3569
3570	ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3571
3572	if (ret == DB_SUCCESS) {
3573	ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3574	}
3575
3576	if (ret == DB_STATS_DO_NOT_EXIST) {
3577	ret = DB_SUCCESS;
3578	}
3579
3580	if (ret != DB_SUCCESS) {
3581
3582	snprintf(errstr, errstr_sz,
3583	"Unable to delete statistics for table %s.%s: %s."
3584	" They can be deleted later using"
3585
3586	" DELETE FROM %s WHERE"
3587	" database_name = '%s' AND"
3588	" table_name = '%s';"
3589
3590	" DELETE FROM %s WHERE"
3591	" database_name = '%s' AND"
3592	" table_name = '%s';",
3593
3594	db_utf8, table_utf8,
3595	ut_strerr(ret),
3596
3597	INDEX_STATS_NAME_PRINT,
3598	db_utf8, table_utf8,
3599
3600	TABLE_STATS_NAME_PRINT,
3601	db_utf8, table_utf8);
3602	}
3603
3604	return(ret);
3605	}
3606
3607	/*******************************************************************//**
3608	Executes
3609	UPDATE mysql.innodb_table_stats SET
3610	database_name = '...', table_name = '...'
3611	WHERE database_name = '...' AND table_name = '...';
3612	Creates its own transaction and commits it.
3613	@return DB_SUCCESS or error code /*
3614	UNIV_INLINE
3615	dberr_t
3616	dict_stats_rename_table_in_table_stats(
3617	/===================================/
3618	const char* old_dbname_utf8,/!< in: database name, e.g. 'olddb' /
3619	const char* old_tablename_utf8,/!< in: table name, e.g. 'oldtable' /
3620	const char* new_dbname_utf8,/!< in: database name, e.g. 'newdb' /
3621	const char* new_tablename_utf8)/!< in: table name, e.g. 'newtable' /
3622	{
3623	pars_info_t* pinfo;
3624	dberr_t ret;
3625
3626	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3627	ut_ad(mutex_own(&dict_sys->mutex));
3628
3629	pinfo = pars_info_create();
3630
3631	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3632	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3633	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3634	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3635
3636	ret = dict_stats_exec_sql(
3637	pinfo,
3638	"PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3639	"BEGIN\n"
3640	"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3641	"database_name = :new_dbname_utf8,\n"
3642	"table_name = :new_tablename_utf8\n"
3643	"WHERE\n"
3644	"database_name = :old_dbname_utf8 AND\n"
3645	"table_name = :old_tablename_utf8;\n"
3646	"END;\n", NULL);
3647
3648	return(ret);
3649	}
3650
3651	/*******************************************************************//**
3652	Executes
3653	UPDATE mysql.innodb_index_stats SET
3654	database_name = '...', table_name = '...'
3655	WHERE database_name = '...' AND table_name = '...';
3656	Creates its own transaction and commits it.
3657	@return DB_SUCCESS or error code /*
3658	UNIV_INLINE
3659	dberr_t
3660	dict_stats_rename_table_in_index_stats(
3661	/===================================/
3662	const char* old_dbname_utf8,/!< in: database name, e.g. 'olddb' /
3663	const char* old_tablename_utf8,/!< in: table name, e.g. 'oldtable' /
3664	const char* new_dbname_utf8,/!< in: database name, e.g. 'newdb' /
3665	const char* new_tablename_utf8)/!< in: table name, e.g. 'newtable' /
3666	{
3667	pars_info_t* pinfo;
3668	dberr_t ret;
3669
3670	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3671	ut_ad(mutex_own(&dict_sys->mutex));
3672
3673	pinfo = pars_info_create();
3674
3675	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3676	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3677	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3678	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3679
3680	ret = dict_stats_exec_sql(
3681	pinfo,
3682	"PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3683	"BEGIN\n"
3684	"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3685	"database_name = :new_dbname_utf8,\n"
3686	"table_name = :new_tablename_utf8\n"
3687	"WHERE\n"
3688	"database_name = :old_dbname_utf8 AND\n"
3689	"table_name = :old_tablename_utf8;\n"
3690	"END;\n", NULL);
3691
3692	return(ret);
3693	}
3694
3695	/*******************************************************************//**
3696	Renames a table in InnoDB persistent stats storage.
3697	This function creates its own transaction and commits it.
3698	@return DB_SUCCESS or error code /*
3699	dberr_t
3700	dict_stats_rename_table(
3701	/====================/
3702	const char* old_name, /!< in: old name, e.g. 'db/table' /
3703	const char* new_name, /!< in: new name, e.g. 'db/table' /
3704	char* errstr, /!< out: error string if != DB_SUCCESS*
3705	is returned /*
3706	size_t errstr_sz) /!< in: errstr size /
3707	{
3708	char old_db_utf8[MAX_DB_UTF8_LEN];
3709	char new_db_utf8[MAX_DB_UTF8_LEN];
3710	char old_table_utf8[MAX_TABLE_UTF8_LEN];
3711	char new_table_utf8[MAX_TABLE_UTF8_LEN];
3712	dberr_t ret;
3713
3714	ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
3715	ut_ad(!mutex_own(&dict_sys->mutex));
3716
3717	/ skip innodb_table_stats and innodb_index_stats themselves /
3718	if (strcmp(old_name, TABLE_STATS_NAME) == `0`
3719	\|\| strcmp(old_name, INDEX_STATS_NAME) == `0`
3720	\|\| strcmp(new_name, TABLE_STATS_NAME) == `0`
3721	\|\| strcmp(new_name, INDEX_STATS_NAME) == `0`) {
3722
3723	return(DB_SUCCESS);
3724	}
3725
3726	dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3727	old_table_utf8, sizeof(old_table_utf8));
3728
3729	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3730	new_table_utf8, sizeof(new_table_utf8));
3731
3732	rw_lock_x_lock(dict_operation_lock);
3733	mutex_enter(&dict_sys->mutex);
3734
3735	ulint n_attempts = `0`;
3736	do {
3737	n_attempts++;
3738
3739	ret = dict_stats_rename_table_in_table_stats(
3740	old_db_utf8, old_table_utf8,
3741	new_db_utf8, new_table_utf8);
3742
3743	if (ret == DB_DUPLICATE_KEY) {
3744	dict_stats_delete_from_table_stats(
3745	new_db_utf8, new_table_utf8);
3746	}
3747
3748	if (ret == DB_STATS_DO_NOT_EXIST) {
3749	ret = DB_SUCCESS;
3750	}
3751
3752	if (ret != DB_SUCCESS) {
3753	mutex_exit(&dict_sys->mutex);
3754	rw_lock_x_unlock(dict_operation_lock);
3755	os_thread_sleep(`200000` / 0.2 sec /);
3756	rw_lock_x_lock(dict_operation_lock);
3757	mutex_enter(&dict_sys->mutex);
3758	}
3759	} while ((ret == DB_DEADLOCK
3760	\|\| ret == DB_DUPLICATE_KEY
3761	\|\| ret == DB_LOCK_WAIT_TIMEOUT)
3762	&& n_attempts < `5`);
3763
3764	if (ret != DB_SUCCESS) {
3765	snprintf(errstr, errstr_sz,
3766	"Unable to rename statistics from"
3767	" %s.%s to %s.%s in %s: %s."
3768	" They can be renamed later using"
3769
3770	" UPDATE %s SET"
3771	" database_name = '%s',"
3772	" table_name = '%s'"
3773	" WHERE"
3774	" database_name = '%s' AND"
3775	" table_name = '%s';",
3776
3777	old_db_utf8, old_table_utf8,
3778	new_db_utf8, new_table_utf8,
3779	TABLE_STATS_NAME_PRINT,
3780	ut_strerr(ret),
3781
3782	TABLE_STATS_NAME_PRINT,
3783	new_db_utf8, new_table_utf8,
3784	old_db_utf8, old_table_utf8);
3785	mutex_exit(&dict_sys->mutex);
3786	rw_lock_x_unlock(dict_operation_lock);
3787	return(ret);
3788	}
3789	/ else /
3790
3791	n_attempts = `0`;
3792	do {
3793	n_attempts++;
3794
3795	ret = dict_stats_rename_table_in_index_stats(
3796	old_db_utf8, old_table_utf8,
3797	new_db_utf8, new_table_utf8);
3798
3799	if (ret == DB_DUPLICATE_KEY) {
3800	dict_stats_delete_from_index_stats(
3801	new_db_utf8, new_table_utf8);
3802	}
3803
3804	if (ret == DB_STATS_DO_NOT_EXIST) {
3805	ret = DB_SUCCESS;
3806	}
3807
3808	if (ret != DB_SUCCESS) {
3809	mutex_exit(&dict_sys->mutex);
3810	rw_lock_x_unlock(dict_operation_lock);
3811	os_thread_sleep(`200000` / 0.2 sec /);
3812	rw_lock_x_lock(dict_operation_lock);
3813	mutex_enter(&dict_sys->mutex);
3814	}
3815	} while ((ret == DB_DEADLOCK
3816	\|\| ret == DB_DUPLICATE_KEY
3817	\|\| ret == DB_LOCK_WAIT_TIMEOUT)
3818	&& n_attempts < `5`);
3819
3820	mutex_exit(&dict_sys->mutex);
3821	rw_lock_x_unlock(dict_operation_lock);
3822
3823	if (ret != DB_SUCCESS) {
3824	snprintf(errstr, errstr_sz,
3825	"Unable to rename statistics from"
3826	" %s.%s to %s.%s in %s: %s."
3827	" They can be renamed later using"
3828
3829	" UPDATE %s SET"
3830	" database_name = '%s',"
3831	" table_name = '%s'"
3832	" WHERE"
3833	" database_name = '%s' AND"
3834	" table_name = '%s';",
3835
3836	old_db_utf8, old_table_utf8,
3837	new_db_utf8, new_table_utf8,
3838	INDEX_STATS_NAME_PRINT,
3839	ut_strerr(ret),
3840
3841	INDEX_STATS_NAME_PRINT,
3842	new_db_utf8, new_table_utf8,
3843	old_db_utf8, old_table_utf8);
3844	}
3845
3846	return(ret);
3847	}
3848
3849	#ifdef MYSQL_RENAME_INDEX
3850	/*******************************************************************//**
3851	Renames an index in InnoDB persistent stats storage.
3852	This function creates its own transaction and commits it.
3853	@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3854	if the persistent stats do not exist. /*
3855	dberr_t
3856	dict_stats_rename_index(
3857	/====================/
3858	const dict_table_t* table, /!< in: table whose index*
3859	is renamed /*
3860	const char* old_index_name, /!< in: old index name /
3861	const char* new_index_name) /!< in: new index name /
3862	{
3863	rw_lock_x_lock(dict_operation_lock);
3864	mutex_enter(&dict_sys->mutex);
3865
3866	if (!dict_stats_persistent_storage_check(true)) {
3867	mutex_exit(&dict_sys->mutex);
3868	rw_lock_x_unlock(dict_operation_lock);
3869	return(DB_STATS_DO_NOT_EXIST);
3870	}
3871
3872	char dbname_utf8[MAX_DB_UTF8_LEN];
3873	char tablename_utf8[MAX_TABLE_UTF8_LEN];
3874
3875	dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3876	tablename_utf8, sizeof(tablename_utf8));
3877
3878	pars_info_t* pinfo;
3879
3880	pinfo = pars_info_create();
3881
3882	pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3883	pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3884	pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3885	pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3886
3887	dberr_t ret;
3888
3889	ret = dict_stats_exec_sql(
3890	pinfo,
3891	"PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3892	"BEGIN\n"
3893	"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3894	"index_name = :new_index_name\n"
3895	"WHERE\n"
3896	"database_name = :dbname_utf8 AND\n"
3897	"table_name = :tablename_utf8 AND\n"
3898	"index_name = :old_index_name;\n"
3899	"END;\n", NULL);
3900
3901	mutex_exit(&dict_sys->mutex);
3902	rw_lock_x_unlock(dict_operation_lock);
3903
3904	return(ret);
3905	}
3906	#endif /* MYSQL_RENAME_INDEX */
3907
3908	/ tests @{ /
3909	#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3910
3911	/ The following unit tests test some of the functions in this file*
3912	individually, such testing cannot be performed by the mysql-test framework
3913	via SQL. /*
3914
3915	/ test_dict_table_schema_check() @{ /
3916	void
3917	test_dict_table_schema_check()
3918	{
3919	/*
3920	CREATE TABLE tcheck (
3921	c01 VARCHAR(123),
3922	c02 INT,
3923	c03 INT NOT NULL,
3924	c04 INT UNSIGNED,
3925	c05 BIGINT,
3926	c06 BIGINT UNSIGNED NOT NULL,
3927	c07 TIMESTAMP
3928	) ENGINE=INNODB;
3929	*/
3930	/ definition for the table 'test/tcheck' /
3931	dict_col_meta_t columns[] = {
3932	{"c01", DATA_VARCHAR, `0`, `123`},
3933	{"c02", DATA_INT, `0`, `4`},
3934	{"c03", DATA_INT, DATA_NOT_NULL, `4`},
3935	{"c04", DATA_INT, DATA_UNSIGNED, `4`},
3936	{"c05", DATA_INT, `0`, `8`},
3937	{"c06", DATA_INT, DATA_NOT_NULL \| DATA_UNSIGNED, `8`},
3938	{"c07", DATA_INT, `0`, `4`},
3939	{"c_extra", DATA_INT, `0`, `4`}
3940	};
3941	dict_table_schema_t schema = {
3942	"test/tcheck",
3943	`0` / will be set individually for each test below /,
3944	columns
3945	};
3946	char errstr[`512`];
3947
3948	snprintf(errstr, sizeof(errstr), "Table not found");
3949
3950	/ prevent any data dictionary modifications while we are checking*
3951	the tables' structure /*
3952
3953	mutex_enter(&dict_sys->mutex);
3954
3955	/ check that a valid table is reported as valid /
3956	schema.n_cols = `7`;
3957	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3958	== DB_SUCCESS) {
3959	printf("OK: test.tcheck ok\n");
3960	} else {
3961	printf("ERROR: %s\n", errstr);
3962	printf("ERROR: test.tcheck not present or corrupted\n");
3963	goto test_dict_table_schema_check_end;
3964	}
3965
3966	/ check columns with wrong length /
3967	schema.columns[`1`].len = `8`;
3968	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3969	!= DB_SUCCESS) {
3970	printf("OK: test.tcheck.c02 has different length and is"
3971	" reported as corrupted\n");
3972	} else {
3973	printf("OK: test.tcheck.c02 has different length but is"
3974	" reported as ok\n");
3975	goto test_dict_table_schema_check_end;
3976	}
3977	schema.columns[`1`].len = `4`;
3978
3979	/ request that c02 is NOT NULL while actually it does not have*
3980	this flag set /*
3981	schema.columns[`1`].prtype_mask \|= DATA_NOT_NULL;
3982	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3983	!= DB_SUCCESS) {
3984	printf("OK: test.tcheck.c02 does not have NOT NULL while"
3985	" it should and is reported as corrupted\n");
3986	} else {
3987	printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3988	" it should and is not reported as corrupted\n");
3989	goto test_dict_table_schema_check_end;
3990	}
3991	schema.columns[`1`].prtype_mask &= ~DATA_NOT_NULL;
3992
3993	/ check a table that contains some extra columns /
3994	schema.n_cols = `6`;
3995	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3996	== DB_SUCCESS) {
3997	printf("ERROR: test.tcheck has more columns but is not"
3998	" reported as corrupted\n");
3999	goto test_dict_table_schema_check_end;
4000	} else {
4001	printf("OK: test.tcheck has more columns and is"
4002	" reported as corrupted\n");
4003	}
4004
4005	/ check a table that has some columns missing /
4006	schema.n_cols = `8`;
4007	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4008	!= DB_SUCCESS) {
4009	printf("OK: test.tcheck has missing columns and is"
4010	" reported as corrupted\n");
4011	} else {
4012	printf("ERROR: test.tcheck has missing columns but is"
4013	" reported as ok\n");
4014	goto test_dict_table_schema_check_end;
4015	}
4016
4017	/ check non-existent table /
4018	schema.table_name = "test/tcheck_nonexistent";
4019	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4020	!= DB_SUCCESS) {
4021	printf("OK: test.tcheck_nonexistent is not present\n");
4022	} else {
4023	printf("ERROR: test.tcheck_nonexistent is present!?\n");
4024	goto test_dict_table_schema_check_end;
4025	}
4026
4027	test_dict_table_schema_check_end:
4028
4029	mutex_exit(&dict_sys->mutex);
4030	}
4031	/ @} /
4032
4033	/ save/fetch aux macros @{ /
4034	#define TEST_DATABASE_NAME "foobardb"
4035	#define TEST_TABLE_NAME "test_dict_stats"
4036
4037	#define TEST_N_ROWS 111
4038	#define TEST_CLUSTERED_INDEX_SIZE 222
4039	#define TEST_SUM_OF_OTHER_INDEX_SIZES 333
4040
4041	#define TEST_IDX1_NAME "tidx1"
4042	#define TEST_IDX1_COL1_NAME "tidx1_col1"
4043	#define TEST_IDX1_INDEX_SIZE 123
4044	#define TEST_IDX1_N_LEAF_PAGES 234
4045	#define TEST_IDX1_N_DIFF1 50
4046	#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
4047
4048	#define TEST_IDX2_NAME "tidx2"
4049	#define TEST_IDX2_COL1_NAME "tidx2_col1"
4050	#define TEST_IDX2_COL2_NAME "tidx2_col2"
4051	#define TEST_IDX2_COL3_NAME "tidx2_col3"
4052	#define TEST_IDX2_COL4_NAME "tidx2_col4"
4053	#define TEST_IDX2_INDEX_SIZE 321
4054	#define TEST_IDX2_N_LEAF_PAGES 432
4055	#define TEST_IDX2_N_DIFF1 60
4056	#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
4057	#define TEST_IDX2_N_DIFF2 61
4058	#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
4059	#define TEST_IDX2_N_DIFF3 62
4060	#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
4061	#define TEST_IDX2_N_DIFF4 63
4062	#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
4063	/ @} /
4064
4065	/ test_dict_stats_save() @{ /
4066	void
4067	test_dict_stats_save()
4068	{
4069	dict_table_t table;
4070	dict_index_t index1;
4071	dict_field_t index1_fields[`1`];
4072	ib_uint64_t index1_stat_n_diff_key_vals[`1`];
4073	ib_uint64_t index1_stat_n_sample_sizes[`1`];
4074	dict_index_t index2;
4075	dict_field_t index2_fields[`4`];
4076	ib_uint64_t index2_stat_n_diff_key_vals[`4`];
4077	ib_uint64_t index2_stat_n_sample_sizes[`4`];
4078	dberr_t ret;
4079
4080	/ craft a dummy dict_table_t /
4081	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4082	table.stat_n_rows = TEST_N_ROWS;
4083	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4084	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4085	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4086	UT_LIST_ADD_LAST(table.indexes, &index1);
4087	UT_LIST_ADD_LAST(table.indexes, &index2);
4088	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4089	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4090
4091	index1.name = TEST_IDX1_NAME;
4092	index1.table = &table;
4093	index1.cached = `1`;
4094	index1.n_uniq = `1`;
4095	index1.fields = index1_fields;
4096	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4097	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4098	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4099	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4100	index1_fields[`0`].name = TEST_IDX1_COL1_NAME;
4101	index1_stat_n_diff_key_vals[`0`] = TEST_IDX1_N_DIFF1;
4102	index1_stat_n_sample_sizes[`0`] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4103
4104	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4105	index2.name = TEST_IDX2_NAME;
4106	index2.table = &table;
4107	index2.cached = `1`;
4108	index2.n_uniq = `4`;
4109	index2.fields = index2_fields;
4110	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4111	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4112	index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4113	index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4114	index2_fields[`0`].name = TEST_IDX2_COL1_NAME;
4115	index2_fields[`1`].name = TEST_IDX2_COL2_NAME;
4116	index2_fields[`2`].name = TEST_IDX2_COL3_NAME;
4117	index2_fields[`3`].name = TEST_IDX2_COL4_NAME;
4118	index2_stat_n_diff_key_vals[`0`] = TEST_IDX2_N_DIFF1;
4119	index2_stat_n_diff_key_vals[`1`] = TEST_IDX2_N_DIFF2;
4120	index2_stat_n_diff_key_vals[`2`] = TEST_IDX2_N_DIFF3;
4121	index2_stat_n_diff_key_vals[`3`] = TEST_IDX2_N_DIFF4;
4122	index2_stat_n_sample_sizes[`0`] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4123	index2_stat_n_sample_sizes[`1`] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4124	index2_stat_n_sample_sizes[`2`] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4125	index2_stat_n_sample_sizes[`3`] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4126
4127	ret = dict_stats_save(&table, NULL);
4128
4129	ut_a(ret == DB_SUCCESS);
4130
4131	printf("\nOK: stats saved successfully, now go ahead and read"
4132	" what's inside %s and %s:\n\n",
4133	TABLE_STATS_NAME_PRINT,
4134	INDEX_STATS_NAME_PRINT);
4135
4136	printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4137	"FROM %s\n"
4138	"WHERE\n"
4139	"database_name = '%s' AND\n"
4140	"table_name = '%s' AND\n"
4141	"n_rows = %d AND\n"
4142	"clustered_index_size = %d AND\n"
4143	"sum_of_other_index_sizes = %d;\n"
4144	"\n",
4145	TABLE_STATS_NAME_PRINT,
4146	TEST_DATABASE_NAME,
4147	TEST_TABLE_NAME,
4148	TEST_N_ROWS,
4149	TEST_CLUSTERED_INDEX_SIZE,
4150	TEST_SUM_OF_OTHER_INDEX_SIZES);
4151
4152	printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4153	"FROM %s\n"
4154	"WHERE\n"
4155	"database_name = '%s' AND\n"
4156	"table_name = '%s' AND\n"
4157	"index_name = '%s' AND\n"
4158	"(\n"
4159	" (stat_name = 'size' AND stat_value = %d AND"
4160	" sample_size IS NULL) OR\n"
4161	" (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4162	" sample_size IS NULL) OR\n"
4163	" (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4164	" sample_size = '%d' AND stat_description = '%s')\n"
4165	");\n"
4166	"\n",
4167	INDEX_STATS_NAME_PRINT,
4168	TEST_DATABASE_NAME,
4169	TEST_TABLE_NAME,
4170	TEST_IDX1_NAME,
4171	TEST_IDX1_INDEX_SIZE,
4172	TEST_IDX1_N_LEAF_PAGES,
4173	TEST_IDX1_N_DIFF1,
4174	TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4175	TEST_IDX1_COL1_NAME);
4176
4177	printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4178	"FROM %s\n"
4179	"WHERE\n"
4180	"database_name = '%s' AND\n"
4181	"table_name = '%s' AND\n"
4182	"index_name = '%s' AND\n"
4183	"(\n"
4184	" (stat_name = 'size' AND stat_value = %d AND"
4185	" sample_size IS NULL) OR\n"
4186	" (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4187	" sample_size IS NULL) OR\n"
4188	" (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4189	" sample_size = '%d' AND stat_description = '%s') OR\n"
4190	" (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4191	" sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4192	" (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4193	" sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4194	" (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4195	" sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4196	");\n"
4197	"\n",
4198	INDEX_STATS_NAME_PRINT,
4199	TEST_DATABASE_NAME,
4200	TEST_TABLE_NAME,
4201	TEST_IDX2_NAME,
4202	TEST_IDX2_INDEX_SIZE,
4203	TEST_IDX2_N_LEAF_PAGES,
4204	TEST_IDX2_N_DIFF1,
4205	TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4206	TEST_IDX2_N_DIFF2,
4207	TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4208	TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4209	TEST_IDX2_N_DIFF3,
4210	TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4211	TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4212	TEST_IDX2_N_DIFF4,
4213	TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4214	TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4215	TEST_IDX2_COL4_NAME);
4216	}
4217	/ @} /
4218
4219	/ test_dict_stats_fetch_from_ps() @{ /
4220	void
4221	test_dict_stats_fetch_from_ps()
4222	{
4223	dict_table_t table;
4224	dict_index_t index1;
4225	ib_uint64_t index1_stat_n_diff_key_vals[`1`];
4226	ib_uint64_t index1_stat_n_sample_sizes[`1`];
4227	dict_index_t index2;
4228	ib_uint64_t index2_stat_n_diff_key_vals[`4`];
4229	ib_uint64_t index2_stat_n_sample_sizes[`4`];
4230	dberr_t ret;
4231
4232	/ craft a dummy dict_table_t /
4233	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4234	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4235	UT_LIST_ADD_LAST(table.indexes, &index1);
4236	UT_LIST_ADD_LAST(table.indexes, &index2);
4237	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4238
4239	index1.name = TEST_IDX1_NAME;
4240	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4241	index1.cached = `1`;
4242	index1.n_uniq = `1`;
4243	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4244	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4245
4246	index2.name = TEST_IDX2_NAME;
4247	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4248	index2.cached = `1`;
4249	index2.n_uniq = `4`;
4250	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4251	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4252
4253	ret = dict_stats_fetch_from_ps(&table);
4254
4255	ut_a(ret == DB_SUCCESS);
4256
4257	ut_a(table.stat_n_rows == TEST_N_ROWS);
4258	ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4259	ut_a(table.stat_sum_of_other_index_sizes
4260	== TEST_SUM_OF_OTHER_INDEX_SIZES);
4261
4262	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4263	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4264	ut_a(index1_stat_n_diff_key_vals[`0`] == TEST_IDX1_N_DIFF1);
4265	ut_a(index1_stat_n_sample_sizes[`0`] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4266
4267	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4268	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4269	ut_a(index2_stat_n_diff_key_vals[`0`] == TEST_IDX2_N_DIFF1);
4270	ut_a(index2_stat_n_sample_sizes[`0`] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4271	ut_a(index2_stat_n_diff_key_vals[`1`] == TEST_IDX2_N_DIFF2);
4272	ut_a(index2_stat_n_sample_sizes[`1`] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4273	ut_a(index2_stat_n_diff_key_vals[`2`] == TEST_IDX2_N_DIFF3);
4274	ut_a(index2_stat_n_sample_sizes[`2`] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4275	ut_a(index2_stat_n_diff_key_vals[`3`] == TEST_IDX2_N_DIFF4);
4276	ut_a(index2_stat_n_sample_sizes[`3`] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4277
4278	printf("OK: fetch successful\n");
4279	}
4280	/ @} /
4281
4282	/ test_dict_stats_all() @{ /
4283	void
4284	test_dict_stats_all()
4285	{
4286	test_dict_table_schema_check();
4287
4288	test_dict_stats_save();
4289
4290	test_dict_stats_fetch_from_ps();
4291	}
4292	/ @} /
4293
4294	#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4295	/ @} /
4296

Browse the source code of MariaDB/storage/innobase/dict/dict0stats.cc