1/* Copyright (C) 2009 MySQL AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/**
17 @file
18
19 @brief
20 functions to update persitent statistical tables and to read from them
21
22 @defgroup Query_Optimizer Query Optimizer
23 @{
24*/
25
26#include "mariadb.h"
27#include "sql_base.h"
28#include "key.h"
29#include "sql_statistics.h"
30#include "opt_range.h"
31#include "uniques.h"
32#include "my_atomic.h"
33#include "sql_show.h"
34
35/*
36 The system variable 'use_stat_tables' can take one of the
37 following values:
38 "never", "complementary", "preferably".
39 If the values of the variable 'use_stat_tables' is set to
40 "never then any statistical data from the persistent statistical tables
41 is ignored by the optimizer.
42 If the value of the variable 'use_stat_tables' is set to
43 "complementary" then a particular statistical characteristic is used
44 by the optimizer only if the database engine does not provide similar
45 statistics. For example, 'nulls_ratio' for table columns currently
46 are not provided by any engine. So optimizer uses this statistical data
47 from the statistical tables. At the same time it does not use
48 'avg_frequency' for any index prefix from the statistical tables since
49 the a similar statistical characteristic 'records_per_key' can be
50 requested from the database engine.
51 If the value the variable 'use_stat_tables' is set to
52 "preferably" the optimizer uses a particular statistical data only if
53 it can't be found in the statistical data.
54 If an ANALYZE command is executed then it results in collecting
55 statistical data for the tables specified by the command and storing
56 the collected statistics in the persistent statistical tables only
57 when the value of the variable 'use_stat_tables' is not
58 equal to "never".
59*/
60
61/* Currently there are only 3 persistent statistical tables */
62static const uint STATISTICS_TABLES= 3;
63
64/*
65 The names of the statistical tables in this array must correspond the
66 definitions of the tables in the file ../scripts/mysql_system_tables.sql
67*/
68static const LEX_CSTRING stat_table_name[STATISTICS_TABLES]=
69{
70 { STRING_WITH_LEN("table_stats") },
71 { STRING_WITH_LEN("column_stats") },
72 { STRING_WITH_LEN("index_stats") }
73};
74
75
76/**
77 @details
78 The function builds a list of TABLE_LIST elements for system statistical
79 tables using array of TABLE_LIST passed as a parameter.
80 The lock type of each element is set to TL_READ if for_write = FALSE,
81 otherwise it is set to TL_WRITE.
82*/
83
84static
85inline void init_table_list_for_stat_tables(TABLE_LIST *tables, bool for_write)
86{
87 uint i;
88
89 memset((char *) &tables[0], 0, sizeof(TABLE_LIST) * STATISTICS_TABLES);
90
91 for (i= 0; i < STATISTICS_TABLES; i++)
92 {
93 tables[i].db= MYSQL_SCHEMA_NAME;
94 tables[i].table_name= stat_table_name[i];
95 tables[i].alias= stat_table_name[i];
96 tables[i].lock_type= for_write ? TL_WRITE : TL_READ;
97 if (i < STATISTICS_TABLES - 1)
98 tables[i].next_global= tables[i].next_local=
99 tables[i].next_name_resolution_table= &tables[i+1];
100 if (i != 0)
101 tables[i].prev_global= &tables[i-1].next_global;
102 }
103}
104
105
106/**
107 @details
108 The function builds a TABLE_LIST containing only one element 'tbl' for
109 the statistical table called 'stat_tab_name'.
110 The lock type of the element is set to TL_READ if for_write = FALSE,
111 otherwise it is set to TL_WRITE.
112*/
113
114static inline
115void init_table_list_for_single_stat_table(TABLE_LIST *tbl,
116 const LEX_CSTRING *stat_tab_name,
117 bool for_write)
118{
119 memset((char *) tbl, 0, sizeof(TABLE_LIST));
120
121 tbl->db= MYSQL_SCHEMA_NAME;
122 tbl->table_name= *stat_tab_name;
123 tbl->alias= *stat_tab_name;
124 tbl->lock_type= for_write ? TL_WRITE : TL_READ;
125}
126
127
128static Table_check_intact_log_error stat_table_intact;
129
130static const
131TABLE_FIELD_TYPE table_stat_fields[TABLE_STAT_N_FIELDS] =
132{
133 {
134 { STRING_WITH_LEN("db_name") },
135 { STRING_WITH_LEN("varchar(64)") },
136 { STRING_WITH_LEN("utf8") }
137 },
138 {
139 { STRING_WITH_LEN("table_name") },
140 { STRING_WITH_LEN("varchar(64)") },
141 { STRING_WITH_LEN("utf8") }
142 },
143 {
144 { STRING_WITH_LEN("cardinality") },
145 { STRING_WITH_LEN("bigint(21)") },
146 { NULL, 0 }
147 },
148};
149static const uint table_stat_pk_col[]= {0,1};
150static const TABLE_FIELD_DEF
151table_stat_def= {TABLE_STAT_N_FIELDS, table_stat_fields, 2, table_stat_pk_col };
152
153static const
154TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
155{
156 {
157 { STRING_WITH_LEN("db_name") },
158 { STRING_WITH_LEN("varchar(64)") },
159 { STRING_WITH_LEN("utf8") }
160 },
161 {
162 { STRING_WITH_LEN("table_name") },
163 { STRING_WITH_LEN("varchar(64)") },
164 { STRING_WITH_LEN("utf8") }
165 },
166 {
167 { STRING_WITH_LEN("column_name") },
168 { STRING_WITH_LEN("varchar(64)") },
169 { STRING_WITH_LEN("utf8") }
170 },
171 {
172 { STRING_WITH_LEN("min_value") },
173 { STRING_WITH_LEN("varbinary(255)") },
174 { NULL, 0 }
175 },
176 {
177 { STRING_WITH_LEN("max_value") },
178 { STRING_WITH_LEN("varbinary(255)") },
179 { NULL, 0 }
180 },
181 {
182 { STRING_WITH_LEN("nulls_ratio") },
183 { STRING_WITH_LEN("decimal(12,4)") },
184 { NULL, 0 }
185 },
186 {
187 { STRING_WITH_LEN("avg_length") },
188 { STRING_WITH_LEN("decimal(12,4)") },
189 { NULL, 0 }
190 },
191 {
192 { STRING_WITH_LEN("avg_frequency") },
193 { STRING_WITH_LEN("decimal(12,4)") },
194 { NULL, 0 }
195 },
196 {
197 { STRING_WITH_LEN("hist_size") },
198 { STRING_WITH_LEN("tinyint(3)") },
199 { NULL, 0 }
200 },
201 {
202 { STRING_WITH_LEN("hist_type") },
203 { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')") },
204 { STRING_WITH_LEN("utf8") }
205 },
206 {
207 { STRING_WITH_LEN("histogram") },
208 { STRING_WITH_LEN("varbinary(255)") },
209 { NULL, 0 }
210 }
211};
212static const uint column_stat_pk_col[]= {0,1,2};
213static const TABLE_FIELD_DEF
214column_stat_def= {COLUMN_STAT_N_FIELDS, column_stat_fields, 3, column_stat_pk_col};
215
216static const
217TABLE_FIELD_TYPE index_stat_fields[INDEX_STAT_N_FIELDS] =
218{
219 {
220 { STRING_WITH_LEN("db_name") },
221 { STRING_WITH_LEN("varchar(64)") },
222 { STRING_WITH_LEN("utf8") }
223 },
224 {
225 { STRING_WITH_LEN("table_name") },
226 { STRING_WITH_LEN("varchar(64)") },
227 { STRING_WITH_LEN("utf8") }
228 },
229 {
230 { STRING_WITH_LEN("index") },
231 { STRING_WITH_LEN("varchar(64)") },
232 { STRING_WITH_LEN("utf8") }
233 },
234 {
235 { STRING_WITH_LEN("prefix_arity") },
236 { STRING_WITH_LEN("int(11)") },
237 { NULL, 0 }
238 },
239 {
240 { STRING_WITH_LEN("avg_frequency") },
241 { STRING_WITH_LEN("decimal(12,4)") },
242 { NULL, 0 }
243 }
244};
245static const uint index_stat_pk_col[]= {0,1,2,3};
246static const TABLE_FIELD_DEF
247index_stat_def= {INDEX_STAT_N_FIELDS, index_stat_fields, 4, index_stat_pk_col};
248
249
250/**
251 @brief
252 Open all statistical tables and lock them
253*/
254
255static
256inline int open_stat_tables(THD *thd, TABLE_LIST *tables,
257 Open_tables_backup *backup,
258 bool for_write)
259{
260 int rc;
261
262 Dummy_error_handler deh; // suppress errors
263 thd->push_internal_handler(&deh);
264 init_table_list_for_stat_tables(tables, for_write);
265 init_mdl_requests(tables);
266 rc= open_system_tables_for_read(thd, tables, backup);
267 thd->pop_internal_handler();
268
269
270 /* If the number of tables changes, we should revise the check below. */
271 DBUG_ASSERT(STATISTICS_TABLES == 3);
272
273 if (!rc &&
274 (stat_table_intact.check(tables[TABLE_STAT].table, &table_stat_def) ||
275 stat_table_intact.check(tables[COLUMN_STAT].table, &column_stat_def) ||
276 stat_table_intact.check(tables[INDEX_STAT].table, &index_stat_def)))
277 {
278 close_system_tables(thd, backup);
279 rc= 1;
280 }
281
282 return rc;
283}
284
285
286/**
287 @brief
288 Open a statistical table and lock it
289*/
290static
291inline int open_single_stat_table(THD *thd, TABLE_LIST *table,
292 const LEX_CSTRING *stat_tab_name,
293 Open_tables_backup *backup,
294 bool for_write)
295{
296 init_table_list_for_single_stat_table(table, stat_tab_name, for_write);
297 init_mdl_requests(table);
298 return open_system_tables_for_read(thd, table, backup);
299}
300
301
302/*
303 The class Column_statistics_collected is a helper class used to collect
304 statistics on a table column. The class is derived directly from
305 the class Column_statistics, and, additionally to the fields of the
306 latter, it contains the fields to accumulate the results of aggregation
307 for the number of nulls in the column and for the size of the column
308 values. There is also a container for distinct column values used
309 to calculate the average number of records per distinct column value.
310*/
311
312class Column_statistics_collected :public Column_statistics
313{
314
315private:
316 Field *column; /* The column to collect statistics on */
317 ha_rows nulls; /* To accumulate the number of nulls in the column */
318 ulonglong column_total_length; /* To accumulate the size of column values */
319 Count_distinct_field *count_distinct; /* The container for distinct
320 column values */
321
322 bool is_single_pk_col; /* TRUE <-> the only column of the primary key */
323
324public:
325
326 inline void init(THD *thd, Field * table_field);
327 inline bool add(ha_rows rowno);
328 inline void finish(ha_rows rows);
329 inline void cleanup();
330};
331
332
333/**
334 Stat_table is the base class for classes Table_stat, Column_stat and
335 Index_stat. The methods of these classes allow us to read statistical
336 data from statistical tables, write collected statistical data into
337 statistical tables and update statistical data in these tables
338 as well as update access fields belonging to the primary key and
339 delete records by prefixes of the primary key.
340 Objects of the classes Table_stat, Column_stat and Index stat are used
341 for reading/writing statistics from/into persistent tables table_stats,
342 column_stats and index_stats correspondingly. These tables are stored in
343 the system database 'mysql'.
344
345 Statistics is read and written always for a given database table t. When
346 an object of any of these classes is created a pointer to the TABLE
347 structure for this database table is passed as a parameter to the constructor
348 of the object. The other parameter is a pointer to the TABLE structure for
349 the corresponding statistical table st. So construction of an object to
350 read/write statistical data on table t from/into statistical table st
351 requires both table t and st to be opened.
352 In some cases the TABLE structure for table t may be undefined. Then
353 the objects of the classes Table_stat, Column_stat and Index stat are
354 created by the alternative constructor that require only the name
355 of the table t and the name of the database it belongs to. Currently the
356 alternative constructors are used only in the cases when some records
357 belonging to the table are to be deleted, or its keys are to be updated
358
359 Reading/writing statistical data from/into a statistical table is always
360 performed by a key. At the moment there is only one key defined for each
361 statistical table and this key is primary.
362 The primary key for the table table_stats is built as (db_name, table_name).
363 The primary key for the table column_stats is built as (db_name, table_name,
364 column_name).
365 The primary key for the table index_stats is built as (db_name, table_name,
366 index_name, prefix_arity).
367
368 Reading statistical data from a statistical table is performed by the
369 following pattern. First a table dependent method sets the values of the
370 the fields that comprise the lookup key. Then an implementation of the
371 method get_stat_values() declared in Stat_table as a pure virtual method
372 finds the row from the statistical table by the set key. If the row is
373 found the values of statistical fields are read from this row and are
374 distributed in the internal structures.
375
376 Let's assume the statistical data is read for table t from database db.
377
378 When statistical data is searched in the table table_stats first
379 Table_stat::set_key_fields() should set the fields of db_name and
380 table_name. Then get_stat_values looks for a row by the set key value,
381 and, if the row is found, reads the value from the column
382 table_stats.cardinality into the field read_stat.cardinality of the TABLE
383 structure for table t and sets the value of read_stat.cardinality_is_null
384 from this structure to FALSE. If the value of the 'cardinality' column
385 in the row is null or if no row is found read_stat.cardinality_is_null
386 is set to TRUE.
387
388 When statistical data is searched in the table column_stats first
389 Column_stat::set_key_fields() should set the fields of db_name, table_name
390 and column_name with column_name taken out of the only parameter f of the
391 Field* type passed to this method. After this get_stat_values looks
392 for a row by the set key value. If the row is found the values of statistical
393 data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency,
394 hist_size, hist_type, histogram are read into internal structures. Values
395 of nulls_ratio, avg_length, avg_frequency, hist_size, hist_type, histogram
396 are read into the corresponding fields of the read_stat structure from
397 the Field object f, while values from min_value and max_value are copied
398 into the min_value and max_value record buffers attached to the TABLE
399 structure for table t.
400 If the value of a statistical column in the found row is null, then the
401 corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off.
402 Otherwise the flag is set on. If no row is found for the column the all flags
403 in f->column_stat_nulls are set off.
404
405 When statistical data is searched in the table index_stats first
406 Index_stat::set_key_fields() has to be called to set the fields of db_name,
407 table_name, index_name and prefix_arity. The value of index_name is extracted
408 from the first parameter key_info of the KEY* type passed to the method.
409 This parameter specifies the index of interest idx. The second parameter
410 passed to the method specifies the arity k of the index prefix for which
411 statistical data is to be read. E.g. if the index idx consists of 3
412 components (p1,p2,p3) the table index_stats usually will contain 3 rows for
413 this index: the first - for the prefix (p1), the second - for the prefix
414 (p1,p2), and the third - for the the prefix (p1,p2,p3). After the key fields
415 has been set a call of get_stat_value looks for a row by the set key value.
416 If the row is found and the value of the avg_frequency column is not null
417 then this value is assigned to key_info->read_stat.avg_frequency[k].
418 Otherwise 0 is assigned to this element.
419
420 The method Stat_table::update_stat is used to write statistical data
421 collected in the internal structures into a statistical table st.
422 It is assumed that before any invocation of this method a call of the
423 function st.set_key_fields has set the values of the primary key fields
424 that serve to locate the row from the statistical table st where the
425 the collected statistical data from internal structures are to be written
426 to. The statistical data is written from the counterparts of the
427 statistical fields of internal structures into which it would be read
428 by the functions get_stat_values. The counterpart fields are used
429 only when statistics is collected
430 When updating/inserting a row from the statistical table st the method
431 Stat_table::update_stat calls the implementation of the pure virtual
432 method store_field_values to transfer statistical data from the fields
433 of internal structures to the fields of record buffer used for updates
434 of the statistical table st.
435*/
436
437class Stat_table
438{
439
440private:
441
442 /* Handler used for the retrieval of the statistical table stat_table */
443 handler *stat_file;
444
445 uint stat_key_length; /* Length of the key to access stat_table */
446 uchar *record[2]; /* Record buffers used to access/update stat_table */
447 uint stat_key_idx; /* The number of the key to access stat_table */
448
449 /* This is a helper function used only by the Stat_table constructors */
450 void common_init_stat_table()
451 {
452 stat_file= stat_table->file;
453 /* Currently any statistical table has only one key */
454 stat_key_idx= 0;
455 stat_key_info= &stat_table->key_info[stat_key_idx];
456 stat_key_length= stat_key_info->key_length;
457 record[0]= stat_table->record[0];
458 record[1]= stat_table->record[1];
459 }
460
461protected:
462
463 /* Statistical table to read statistics from or to update/delete */
464 TABLE *stat_table;
465 KEY *stat_key_info; /* Structure for the index to access stat_table */
466
467 /* Table for which statistical data is read / updated */
468 TABLE *table;
469 TABLE_SHARE *table_share; /* Table share for 'table */
470 const LEX_CSTRING *db_name; /* Name of the database containing 'table' */
471 const LEX_CSTRING *table_name; /* Name of the table 'table' */
472
473 void store_record_for_update()
474 {
475 store_record(stat_table, record[1]);
476 }
477
478 void store_record_for_lookup()
479 {
480 DBUG_ASSERT(record[0] == stat_table->record[0]);
481 }
482
483 bool update_record()
484 {
485 int err;
486 if ((err= stat_file->ha_update_row(record[1], record[0])) &&
487 err != HA_ERR_RECORD_IS_THE_SAME)
488 return TRUE;
489 /* Make change permanent and avoid 'table is marked as crashed' errors */
490 stat_file->extra(HA_EXTRA_FLUSH);
491 return FALSE;
492 }
493
494public:
495
496
497 /**
498 @details
499 This constructor has to be called by any constructor of the derived
500 classes. The constructor 'tunes' the private and protected members of
501 the constructed object to the statistical table 'stat_table' with the
502 statistical data of our interest and to the table 'tab' for which this
503 statistics has been collected.
504 */
505
506 Stat_table(TABLE *stat, TABLE *tab)
507 :stat_table(stat), table(tab)
508 {
509 table_share= tab->s;
510 common_init_stat_table();
511 db_name= &table_share->db;
512 table_name= &table_share->table_name;
513 }
514
515
516 /**
517 @details
518 This constructor has to be called by any constructor of the derived
519 classes. The constructor 'tunes' the private and protected members of
520 the constructed object to the statistical table 'stat_table' with the
521 statistical data of our interest and to the table t for which this
522 statistics has been collected. The table t is uniquely specified
523 by the database name 'db' and the table name 'tab'.
524 */
525
526 Stat_table(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab)
527 :stat_table(stat), table_share(NULL),db_name(db), table_name(tab)
528 {
529 common_init_stat_table();
530 }
531
532
533 virtual ~Stat_table() {}
534
535 /**
536 @brief
537 Store the given values of fields for database name and table name
538
539 @details
540 This is a purely virtual method.
541 The implementation for any derived class shall store the given
542 values of the database name and table name in the corresponding
543 fields of stat_table.
544
545 @note
546 The method is called by the update_table_name_key_parts function.
547 */
548
549 virtual void change_full_table_name(const LEX_CSTRING *db, const LEX_CSTRING *tab)= 0;
550
551
552 /**
553 @brief
554 Store statistical data into fields of the statistical table
555
556 @details
557 This is a purely virtual method.
558 The implementation for any derived class shall put the appropriate
559 statistical data into the corresponding fields of stat_table.
560
561 @note
562 The method is called by the update_stat function.
563 */
564
565 virtual void store_stat_fields()= 0;
566
567
568 /**
569 @brief
570 Read statistical data from fields of the statistical table
571
572 @details
573 This is a purely virtual method.
574 The implementation for any derived read shall read the appropriate
575 statistical data from the corresponding fields of stat_table.
576 */
577
578 virtual void get_stat_values()= 0;
579
580
581 /**
582 @brief
583 Find a record in the statistical table by a primary key
584
585 @details
586 The function looks for a record in stat_table by its primary key.
587 It assumes that the key fields have been already stored in the record
588 buffer of stat_table.
589
590 @retval
591 FALSE the record is not found
592 @retval
593 TRUE the record is found
594 */
595
596 bool find_stat()
597 {
598 uchar key[MAX_KEY_LENGTH];
599 key_copy(key, record[0], stat_key_info, stat_key_length);
600 return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, key,
601 HA_WHOLE_KEY, HA_READ_KEY_EXACT);
602 }
603
604
605 /**
606 @brief
607 Find a record in the statistical table by a key prefix value
608
609 @details
610 The function looks for a record in stat_table by the key value consisting
611 of 'prefix_parts' major components for the primary index.
612 It assumes that the key prefix fields have been already stored in the record
613 buffer of stat_table.
614
615 @retval
616 FALSE the record is not found
617 @retval
618 TRUE the record is found
619 */
620
621 bool find_next_stat_for_prefix(uint prefix_parts)
622 {
623 uchar key[MAX_KEY_LENGTH];
624 uint prefix_key_length= 0;
625 for (uint i= 0; i < prefix_parts; i++)
626 prefix_key_length+= stat_key_info->key_part[i].store_length;
627 key_copy(key, record[0], stat_key_info, prefix_key_length);
628 key_part_map prefix_map= (key_part_map) ((1 << prefix_parts) - 1);
629 return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, key,
630 prefix_map, HA_READ_KEY_EXACT);
631 }
632
633
634 /**
635 @brief
636 Update/insert a record in the statistical table with new statistics
637
638 @details
639 The function first looks for a record by its primary key in the statistical
640 table stat_table. If the record is found the function updates statistical
641 fields of the records. The data for these fields are taken from internal
642 structures containing info on the table 'table'. If the record is not
643 found the function inserts a new record with the primary key set to the
644 search key and the statistical data taken from the internal structures.
645 The function assumes that the key fields have been already stored in
646 the record buffer of stat_table.
647
648 @retval
649 FALSE success with the update/insert of the record
650 @retval
651 TRUE failure with the update/insert of the record
652
653 @note
654 The function calls the virtual method store_stat_fields to populate the
655 statistical fields of the updated/inserted row with new statistics.
656 */
657
658 bool update_stat()
659 {
660 if (find_stat())
661 {
662 bool res;
663 store_record_for_update();
664 store_stat_fields();
665 res= update_record();
666 DBUG_ASSERT(res == 0);
667 return res;
668 }
669 else
670 {
671 int err;
672 store_stat_fields();
673 if ((err= stat_file->ha_write_row(record[0])))
674 {
675 DBUG_ASSERT(0);
676 return TRUE;
677 }
678 /* Make change permanent and avoid 'table is marked as crashed' errors */
679 stat_file->extra(HA_EXTRA_FLUSH);
680 }
681 return FALSE;
682 }
683
684
685 /**
686 @brief
687 Update the table name fields in the current record of stat_table
688
689 @details
690 The function updates the fields containing database name and table name
691 for the last found record in the statistical table stat_table.
692 The corresponding names for update is taken from the parameters
693 db and tab.
694
695 @retval
696 FALSE success with the update of the record
697 @retval
698 TRUE failure with the update of the record
699
700 @note
701 The function calls the virtual method change_full_table_name
702 to store the new names in the record buffer used for updates.
703 */
704
705 bool update_table_name_key_parts(const LEX_CSTRING *db, const LEX_CSTRING *tab)
706 {
707 store_record_for_update();
708 change_full_table_name(db, tab);
709 bool rc= update_record();
710 store_record_for_lookup();
711 return rc;
712 }
713
714
715 /**
716 @brief
717 Delete the current record of the statistical table stat_table
718
719 @details
720 The function deletes the last found record from the statistical
721 table stat_table.
722
723 @retval
724 FALSE success with the deletion of the record
725 @retval
726 TRUE failure with the deletion of the record
727 */
728
729 bool delete_stat()
730 {
731 int err;
732 if ((err= stat_file->ha_delete_row(record[0])))
733 return TRUE;
734 /* Make change permanent and avoid 'table is marked as crashed' errors */
735 stat_file->extra(HA_EXTRA_FLUSH);
736 return FALSE;
737 }
738
739 friend class Stat_table_write_iter;
740};
741
742
743/*
744 An object of the class Table_stat is created to read statistical
745 data on tables from the statistical table table_stats, to update
746 table_stats with such statistical data, or to update columns
747 of the primary key, or to delete the record by its primary key or
748 its prefix.
749 Rows from the statistical table are read and updated always by
750 primary key.
751*/
752
753class Table_stat: public Stat_table
754{
755
756private:
757
758 Field *db_name_field; /* Field for the column table_stats.db_name */
759 Field *table_name_field; /* Field for the column table_stats.table_name */
760
761 void common_init_table_stat()
762 {
763 db_name_field= stat_table->field[TABLE_STAT_DB_NAME];
764 table_name_field= stat_table->field[TABLE_STAT_TABLE_NAME];
765 }
766
767 void change_full_table_name(const LEX_CSTRING *db, const LEX_CSTRING *tab)
768 {
769 db_name_field->store(db->str, db->length, system_charset_info);
770 table_name_field->store(tab->str, tab->length, system_charset_info);
771 }
772
773public:
774
775 /**
776 @details
777 The constructor 'tunes' the private and protected members of the
778 constructed object for the statistical table table_stats to read/update
779 statistics on table 'tab'. The TABLE structure for the table table_stat
780 must be passed as a value for the parameter 'stat'.
781 */
782
783 Table_stat(TABLE *stat, TABLE *tab) :Stat_table(stat, tab)
784 {
785 common_init_table_stat();
786 }
787
788
789 /**
790 @details
791 The constructor 'tunes' the private and protected members of the
792 object constructed for the statistical table table_stat for
793 the future updates/deletes of the record concerning the table 'tab'
794 from the database 'db'.
795 */
796
797 Table_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab)
798 :Stat_table(stat, db, tab)
799 {
800 common_init_table_stat();
801 }
802
803
804 /**
805 @brief
806 Set the key fields for the statistical table table_stat
807
808 @details
809 The function sets the values of the fields db_name and table_name
810 in the record buffer for the statistical table table_stat.
811 These fields comprise the primary key for the table.
812
813 @note
814 The function is supposed to be called before any use of the
815 method find_stat for an object of the Table_stat class.
816 */
817
818 void set_key_fields()
819 {
820 db_name_field->store(db_name->str, db_name->length, system_charset_info);
821 table_name_field->store(table_name->str, table_name->length,
822 system_charset_info);
823 }
824
825
826 /**
827 @brief
828 Store statistical data into statistical fields of table_stat
829
830 @details
831 This implementation of a purely virtual method sets the value of the
832 column 'cardinality' of the statistical table table_stat according to
833 the value of the flag write_stat.cardinality_is_null and the value of
834 the field write_stat.cardinality' from the TABLE structure for 'table'.
835 */
836
837 void store_stat_fields()
838 {
839 Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY];
840 if (table->collected_stats->cardinality_is_null)
841 stat_field->set_null();
842 else
843 {
844 stat_field->set_notnull();
845 stat_field->store(table->collected_stats->cardinality,true);
846 }
847 }
848
849
850 /**
851 @brief
852 Read statistical data from statistical fields of table_stat
853
854 @details
855 This implementation of a purely virtual method first looks for a record
856 the statistical table table_stat by its primary key set the record
857 buffer with the help of Table_stat::set_key_fields. Then, if the row is
858 found the function reads the value of the column 'cardinality' of the table
859 table_stat and sets the value of the flag read_stat.cardinality_is_null
860 and the value of the field read_stat.cardinality' from the TABLE structure
861 for 'table' accordingly.
862 */
863
864 void get_stat_values()
865 {
866 Table_statistics *read_stats= table_share->stats_cb.table_stats;
867 read_stats->cardinality_is_null= TRUE;
868 read_stats->cardinality= 0;
869 if (find_stat())
870 {
871 Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY];
872 if (!stat_field->is_null())
873 {
874 read_stats->cardinality_is_null= FALSE;
875 read_stats->cardinality= stat_field->val_int();
876 }
877 }
878 }
879
880};
881
882
883/*
884 An object of the class Column_stat is created to read statistical data
885 on table columns from the statistical table column_stats, to update
886 column_stats with such statistical data, or to update columns
887 of the primary key, or to delete the record by its primary key or
888 its prefix.
889 Rows from the statistical table are read and updated always by
890 primary key.
891*/
892
893class Column_stat: public Stat_table
894{
895
896private:
897
898 Field *db_name_field; /* Field for the column column_stats.db_name */
899 Field *table_name_field; /* Field for the column column_stats.table_name */
900 Field *column_name_field; /* Field for the column column_stats.column_name */
901
902 Field *table_field; /* Field from 'table' to read /update statistics on */
903
904 void common_init_column_stat_table()
905 {
906 db_name_field= stat_table->field[COLUMN_STAT_DB_NAME];
907 table_name_field= stat_table->field[COLUMN_STAT_TABLE_NAME];
908 column_name_field= stat_table->field[COLUMN_STAT_COLUMN_NAME];
909 }
910
911 void change_full_table_name(const LEX_CSTRING *db, const LEX_CSTRING *tab)
912 {
913 db_name_field->store(db->str, db->length, system_charset_info);
914 table_name_field->store(tab->str, tab->length, system_charset_info);
915 }
916
917public:
918
919 /**
920 @details
921 The constructor 'tunes' the private and protected members of the
922 constructed object for the statistical table column_stats to read/update
923 statistics on fields of the table 'tab'. The TABLE structure for the table
924 column_stats must be passed as a value for the parameter 'stat'.
925 */
926
927 Column_stat(TABLE *stat, TABLE *tab) :Stat_table(stat, tab)
928 {
929 common_init_column_stat_table();
930 }
931
932
933 /**
934 @details
935 The constructor 'tunes' the private and protected members of the
936 object constructed for the statistical table column_stats for
937 the future updates/deletes of the record concerning the table 'tab'
938 from the database 'db'.
939 */
940
941 Column_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab)
942 :Stat_table(stat, db, tab)
943 {
944 common_init_column_stat_table();
945 }
946
947 /**
948 @brief
949 Set table name fields for the statistical table column_stats
950
951 @details
952 The function stores the values of the fields db_name and table_name
953 of the statistical table column_stats in the record buffer.
954 */
955
956 void set_full_table_name()
957 {
958 db_name_field->store(db_name->str, db_name->length, system_charset_info);
959 table_name_field->store(table_name->str, table_name->length,
960 system_charset_info);
961 }
962
963
964 /**
965 @brief
966 Set the key fields for the statistical table column_stats
967
968 @param
969 col Field for the 'table' column to read/update statistics on
970
971 @details
972 The function stores the values of the fields db_name, table_name and
973 column_name in the record buffer for the statistical table column_stats.
974 These fields comprise the primary key for the table.
975 It also sets table_field to the passed parameter.
976
977 @note
978 The function is supposed to be called before any use of the
979 method find_stat for an object of the Column_stat class.
980 */
981
982 void set_key_fields(Field *col)
983 {
984 set_full_table_name();
985 column_name_field->store(col->field_name.str, col->field_name.length,
986 system_charset_info);
987 table_field= col;
988 }
989
990
991 /**
992 @brief
993 Update the table name fields in the current record of stat_table
994
995 @details
996 The function updates the primary key fields containing database name,
997 table name, and column name for the last found record in the statistical
998 table column_stats.
999
1000 @retval
1001 FALSE success with the update of the record
1002 @retval
1003 TRUE failure with the update of the record
1004 */
1005
1006 bool update_column_key_part(const char *col)
1007 {
1008 store_record_for_update();
1009 set_full_table_name();
1010 column_name_field->store(col, strlen(col), system_charset_info);
1011 bool rc= update_record();
1012 store_record_for_lookup();
1013 return rc;
1014 }
1015
1016
1017 /**
1018 @brief
1019 Store statistical data into statistical fields of column_stats
1020
1021 @details
1022 This implementation of a purely virtual method sets the value of the
1023 columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length',
1024 'avg_frequency', 'hist_size', 'hist_type' and 'histogram' of the
1025 stistical table columns_stat according to the contents of the bitmap
1026 write_stat.column_stat_nulls and the values of the fields min_value,
1027 max_value, nulls_ratio, avg_length, avg_frequency, hist_size, hist_type
1028 and histogram of the structure write_stat from the Field structure
1029 for the field 'table_field'.
1030 The value of the k-th column in the table columns_stat is set to NULL
1031 if the k-th bit in the bitmap 'column_stat_nulls' is set to 1.
1032
1033 @note
1034 A value from the field min_value/max_value is always converted
1035 into a varbinary string. If the length of the column 'min_value'/'max_value'
1036 is less than the length of the string the string is trimmed to fit the
1037 length of the column.
1038 */
1039
1040 void store_stat_fields()
1041 {
1042 char buff[MAX_FIELD_WIDTH];
1043 String val(buff, sizeof(buff), &my_charset_bin);
1044
1045 for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++)
1046 {
1047 Field *stat_field= stat_table->field[i];
1048 if (table_field->collected_stats->is_null(i))
1049 stat_field->set_null();
1050 else
1051 {
1052 stat_field->set_notnull();
1053 switch (i) {
1054 case COLUMN_STAT_MIN_VALUE:
1055 if (table_field->type() == MYSQL_TYPE_BIT)
1056 stat_field->store(table_field->collected_stats->min_value->val_int(),true);
1057 else
1058 {
1059 table_field->collected_stats->min_value->val_str(&val);
1060 stat_field->store(val.ptr(), val.length(), &my_charset_bin);
1061 }
1062 break;
1063 case COLUMN_STAT_MAX_VALUE:
1064 if (table_field->type() == MYSQL_TYPE_BIT)
1065 stat_field->store(table_field->collected_stats->max_value->val_int(),true);
1066 else
1067 {
1068 table_field->collected_stats->max_value->val_str(&val);
1069 stat_field->store(val.ptr(), val.length(), &my_charset_bin);
1070 }
1071 break;
1072 case COLUMN_STAT_NULLS_RATIO:
1073 stat_field->store(table_field->collected_stats->get_nulls_ratio());
1074 break;
1075 case COLUMN_STAT_AVG_LENGTH:
1076 stat_field->store(table_field->collected_stats->get_avg_length());
1077 break;
1078 case COLUMN_STAT_AVG_FREQUENCY:
1079 stat_field->store(table_field->collected_stats->get_avg_frequency());
1080 break;
1081 case COLUMN_STAT_HIST_SIZE:
1082 stat_field->store(table_field->collected_stats->histogram.get_size());
1083 break;
1084 case COLUMN_STAT_HIST_TYPE:
1085 stat_field->store(table_field->collected_stats->histogram.get_type() +
1086 1);
1087 break;
1088 case COLUMN_STAT_HISTOGRAM:
1089 const char * col_histogram=
1090 (const char *) (table_field->collected_stats->histogram.get_values());
1091 stat_field->store(col_histogram,
1092 table_field->collected_stats->histogram.get_size(),
1093 &my_charset_bin);
1094 break;
1095 }
1096 }
1097 }
1098 }
1099
1100
1101 /**
1102 @brief
1103 Read statistical data from statistical fields of column_stats
1104
1105 @details
1106 This implementation of a purely virtual method first looks for a record
1107 in the statistical table column_stats by its primary key set in the record
1108 buffer with the help of Column_stat::set_key_fields. Then, if the row is
1109 found, the function reads the values of the columns 'min_value',
1110 'max_value', 'nulls_ratio', 'avg_length', 'avg_frequency', 'hist_size' and
1111 'hist_type" of the table column_stat and sets accordingly the value of
1112 the bitmap read_stat.column_stat_nulls' and the values of the fields
1113 min_value, max_value, nulls_ratio, avg_length, avg_frequency, hist_size and
1114 hist_type of the structure read_stat from the Field structure for the field
1115 'table_field'.
1116 */
1117
1118 void get_stat_values()
1119 {
1120 table_field->read_stats->set_all_nulls();
1121
1122 if (table_field->read_stats->min_value)
1123 table_field->read_stats->min_value->set_null();
1124 if (table_field->read_stats->max_value)
1125 table_field->read_stats->max_value->set_null();
1126
1127 if (find_stat())
1128 {
1129 char buff[MAX_FIELD_WIDTH];
1130 String val(buff, sizeof(buff), &my_charset_bin);
1131
1132 for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++)
1133 {
1134 Field *stat_field= stat_table->field[i];
1135
1136 if (!stat_field->is_null() &&
1137 (i > COLUMN_STAT_MAX_VALUE ||
1138 (i == COLUMN_STAT_MIN_VALUE &&
1139 table_field->read_stats->min_value) ||
1140 (i == COLUMN_STAT_MAX_VALUE &&
1141 table_field->read_stats->max_value)))
1142 {
1143 table_field->read_stats->set_not_null(i);
1144
1145 switch (i) {
1146 case COLUMN_STAT_MIN_VALUE:
1147 table_field->read_stats->min_value->set_notnull();
1148 stat_field->val_str(&val);
1149 table_field->read_stats->min_value->store(val.ptr(), val.length(),
1150 &my_charset_bin);
1151 break;
1152 case COLUMN_STAT_MAX_VALUE:
1153 table_field->read_stats->max_value->set_notnull();
1154 stat_field->val_str(&val);
1155 table_field->read_stats->max_value->store(val.ptr(), val.length(),
1156 &my_charset_bin);
1157 break;
1158 case COLUMN_STAT_NULLS_RATIO:
1159 table_field->read_stats->set_nulls_ratio(stat_field->val_real());
1160 break;
1161 case COLUMN_STAT_AVG_LENGTH:
1162 table_field->read_stats->set_avg_length(stat_field->val_real());
1163 break;
1164 case COLUMN_STAT_AVG_FREQUENCY:
1165 table_field->read_stats->set_avg_frequency(stat_field->val_real());
1166 break;
1167 case COLUMN_STAT_HIST_SIZE:
1168 table_field->read_stats->histogram.set_size(stat_field->val_int());
1169 break;
1170 case COLUMN_STAT_HIST_TYPE:
1171 Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
1172 1);
1173 table_field->read_stats->histogram.set_type(hist_type);
1174 break;
1175 }
1176 }
1177 }
1178 }
1179 }
1180
1181
1182 /**
1183 @brief
1184 Read histogram from of column_stats
1185
1186 @details
1187 This method first looks for a record in the statistical table column_stats
1188 by its primary key set the record buffer with the help of
1189 Column_stat::set_key_fields. Then, if the row is found, the function reads
1190 the value of the column 'histogram' of the table column_stat and sets
1191 accordingly the corresponding bit in the bitmap read_stat.column_stat_nulls.
1192 The method assumes that the value of histogram size and the pointer to
1193 the histogram location has been already set in the fields size and values
1194 of read_stats->histogram.
1195 */
1196
1197 void get_histogram_value()
1198 {
1199 if (find_stat())
1200 {
1201 char buff[MAX_FIELD_WIDTH];
1202 String val(buff, sizeof(buff), &my_charset_bin);
1203 uint fldno= COLUMN_STAT_HISTOGRAM;
1204 Field *stat_field= stat_table->field[fldno];
1205 table_field->read_stats->set_not_null(fldno);
1206 stat_field->val_str(&val);
1207 memcpy(table_field->read_stats->histogram.get_values(),
1208 val.ptr(), table_field->read_stats->histogram.get_size());
1209 }
1210 }
1211
1212};
1213
1214
1215/*
1216 An object of the class Index_stat is created to read statistical
1217 data on tables from the statistical table table_stat, to update
1218 index_stats with such statistical data, or to update columns
1219 of the primary key, or to delete the record by its primary key or
1220 its prefix.
1221 Rows from the statistical table are read and updated always by
1222 primary key.
1223*/
1224
1225class Index_stat: public Stat_table
1226{
1227
1228private:
1229
1230 Field *db_name_field; /* Field for the column index_stats.db_name */
1231 Field *table_name_field; /* Field for the column index_stats.table_name */
1232 Field *index_name_field; /* Field for the column index_stats.table_name */
1233 Field *prefix_arity_field; /* Field for the column index_stats.prefix_arity */
1234
1235 KEY *table_key_info; /* Info on the index to read/update statistics on */
1236 uint prefix_arity; /* Number of components of the index prefix of interest */
1237
1238 void common_init_index_stat_table()
1239 {
1240 db_name_field= stat_table->field[INDEX_STAT_DB_NAME];
1241 table_name_field= stat_table->field[INDEX_STAT_TABLE_NAME];
1242 index_name_field= stat_table->field[INDEX_STAT_INDEX_NAME];
1243 prefix_arity_field= stat_table->field[INDEX_STAT_PREFIX_ARITY];
1244 }
1245
1246 void change_full_table_name(const LEX_CSTRING *db, const LEX_CSTRING *tab)
1247 {
1248 db_name_field->store(db->str, db->length, system_charset_info);
1249 table_name_field->store(tab->str, tab->length, system_charset_info);
1250 }
1251
1252public:
1253
1254
1255 /**
1256 @details
1257 The constructor 'tunes' the private and protected members of the
1258 constructed object for the statistical table index_stats to read/update
1259 statistics on prefixes of different indexes of the table 'tab'.
1260 The TABLE structure for the table index_stats must be passed as a value
1261 for the parameter 'stat'.
1262 */
1263
1264 Index_stat(TABLE *stat, TABLE*tab) :Stat_table(stat, tab)
1265 {
1266 common_init_index_stat_table();
1267 }
1268
1269
1270 /**
1271 @details
1272 The constructor 'tunes' the private and protected members of the
1273 object constructed for the statistical table index_stats for
1274 the future updates/deletes of the record concerning the table 'tab'
1275 from the database 'db'.
1276 */
1277
1278 Index_stat(TABLE *stat, const LEX_CSTRING *db, const LEX_CSTRING *tab)
1279 :Stat_table(stat, db, tab)
1280 {
1281 common_init_index_stat_table();
1282 }
1283
1284
1285 /**
1286 @brief
1287 Set table name fields for the statistical table index_stats
1288
1289 @details
1290 The function stores the values of the fields db_name and table_name
1291 of the statistical table index_stats in the record buffer.
1292 */
1293
1294 void set_full_table_name()
1295 {
1296 db_name_field->store(db_name->str, db_name->length, system_charset_info);
1297 table_name_field->store(table_name->str, table_name->length,
1298 system_charset_info);
1299 }
1300
1301 /**
1302 @brief
1303 Set the key fields of index_stats used to access records for index prefixes
1304
1305 @param
1306 index_info Info for the index of 'table' to read/update statistics on
1307
1308 @details
1309 The function sets the values of the fields db_name, table_name and
1310 index_name in the record buffer for the statistical table index_stats.
1311 It also sets table_key_info to the passed parameter.
1312
1313 @note
1314 The function is supposed to be called before any use of the method
1315 find_next_stat_for_prefix for an object of the Index_stat class.
1316 */
1317
1318 void set_index_prefix_key_fields(KEY *index_info)
1319 {
1320 set_full_table_name();
1321 const char *index_name= index_info->name.str;
1322 index_name_field->store(index_name, index_info->name.length,
1323 system_charset_info);
1324 table_key_info= index_info;
1325 }
1326
1327
1328 /**
1329 @brief
1330 Set the key fields for the statistical table index_stats
1331
1332 @param
1333 index_info Info for the index of 'table' to read/update statistics on
1334 @param
1335 index_prefix_arity Number of components in the index prefix of interest
1336
1337 @details
1338 The function sets the values of the fields db_name, table_name and
1339 index_name, prefix_arity in the record buffer for the statistical
1340 table index_stats. These fields comprise the primary key for the table.
1341
1342 @note
1343 The function is supposed to be called before any use of the
1344 method find_stat for an object of the Index_stat class.
1345 */
1346
1347 void set_key_fields(KEY *index_info, uint index_prefix_arity)
1348 {
1349 set_index_prefix_key_fields(index_info);
1350 prefix_arity= index_prefix_arity;
1351 prefix_arity_field->store(index_prefix_arity, TRUE);
1352 }
1353
1354
1355 /**
1356 @brief
1357 Store statistical data into statistical fields of table index_stats
1358
1359 @details
1360 This implementation of a purely virtual method sets the value of the
1361 column 'avg_frequency' of the statistical table index_stats according to
1362 the value of write_stat.avg_frequency[Index_stat::prefix_arity]
1363 from the KEY_INFO structure 'table_key_info'.
1364 If the value of write_stat. avg_frequency[Index_stat::prefix_arity] is
1365 equal to 0, the value of the column is set to NULL.
1366 */
1367
1368 void store_stat_fields()
1369 {
1370 Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY];
1371 double avg_frequency=
1372 table_key_info->collected_stats->get_avg_frequency(prefix_arity-1);
1373 if (avg_frequency == 0)
1374 stat_field->set_null();
1375 else
1376 {
1377 stat_field->set_notnull();
1378 stat_field->store(avg_frequency);
1379 }
1380 }
1381
1382
1383 /**
1384 @brief
1385 Read statistical data from statistical fields of index_stats
1386
1387 @details
1388 This implementation of a purely virtual method first looks for a record the
1389 statistical table index_stats by its primary key set the record buffer with
1390 the help of Index_stat::set_key_fields. If the row is found the function
1391 reads the value of the column 'avg_freguency' of the table index_stat and
1392 sets the value of read_stat.avg_frequency[Index_stat::prefix_arity]
1393 from the KEY_INFO structure 'table_key_info' accordingly. If the value of
1394 the column is NULL, read_stat.avg_frequency[Index_stat::prefix_arity] is
1395 set to 0. Otherwise, read_stat.avg_frequency[Index_stat::prefix_arity] is
1396 set to the value of the column.
1397 */
1398
1399 void get_stat_values()
1400 {
1401 double avg_frequency= 0;
1402 if(find_stat())
1403 {
1404 Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY];
1405 if (!stat_field->is_null())
1406 avg_frequency= stat_field->val_real();
1407 }
1408 table_key_info->read_stats->set_avg_frequency(prefix_arity-1, avg_frequency);
1409 }
1410
1411};
1412
1413
1414/*
1415 An iterator to enumerate statistics table rows which allows to modify
1416 the rows while reading them.
1417
1418 Used by RENAME TABLE handling to assign new dbname.tablename to statistic
1419 rows.
1420*/
1421class Stat_table_write_iter
1422{
1423 Stat_table *owner;
1424 IO_CACHE io_cache;
1425 uchar *rowid_buf;
1426 uint rowid_size;
1427
1428public:
1429 Stat_table_write_iter(Stat_table *stat_table_arg)
1430 : owner(stat_table_arg), rowid_buf(NULL),
1431 rowid_size(owner->stat_file->ref_length)
1432 {
1433 my_b_clear(&io_cache);
1434 }
1435
1436 /*
1437 Initialize the iterator. It will return rows with n_keyparts matching the
1438 curernt values.
1439
1440 @return false - OK
1441 true - Error
1442 */
1443 bool init(uint n_keyparts)
1444 {
1445 if (!(rowid_buf= (uchar*)my_malloc(rowid_size, MYF(0))))
1446 return true;
1447
1448 if (open_cached_file(&io_cache, mysql_tmpdir, TEMP_PREFIX,
1449 1024, MYF(MY_WME)))
1450 return true;
1451
1452 handler *h= owner->stat_file;
1453 uchar key[MAX_KEY_LENGTH];
1454 uint prefix_len= 0;
1455 for (uint i= 0; i < n_keyparts; i++)
1456 prefix_len += owner->stat_key_info->key_part[i].store_length;
1457
1458 key_copy(key, owner->record[0], owner->stat_key_info,
1459 prefix_len);
1460 key_part_map prefix_map= (key_part_map) ((1 << n_keyparts) - 1);
1461 h->ha_index_init(owner->stat_key_idx, false);
1462 int res= h->ha_index_read_map(owner->record[0], key, prefix_map,
1463 HA_READ_KEY_EXACT);
1464 if (res)
1465 {
1466 reinit_io_cache(&io_cache, READ_CACHE, 0L, 0, 0);
1467 /* "Key not found" is not considered an error */
1468 return (res == HA_ERR_KEY_NOT_FOUND)? false: true;
1469 }
1470
1471 do {
1472 h->position(owner->record[0]);
1473 my_b_write(&io_cache, h->ref, rowid_size);
1474
1475 } while (!h->ha_index_next_same(owner->record[0], key, prefix_len));
1476
1477 /* Prepare for reading */
1478 reinit_io_cache(&io_cache, READ_CACHE, 0L, 0, 0);
1479 h->ha_index_or_rnd_end();
1480 if (h->ha_rnd_init(false))
1481 return true;
1482
1483 return false;
1484 }
1485
1486 /*
1487 Read the next row.
1488
1489 @return
1490 false OK
1491 true No more rows or error.
1492 */
1493 bool get_next_row()
1494 {
1495 if (!my_b_inited(&io_cache) || my_b_read(&io_cache, rowid_buf, rowid_size))
1496 return true; /* No more data */
1497
1498 handler *h= owner->stat_file;
1499 /*
1500 We should normally be able to find the row that we have rowid for. If we
1501 don't, let's consider this an error.
1502 */
1503 int res= h->ha_rnd_pos(owner->record[0], rowid_buf);
1504
1505 return (res==0)? false : true;
1506 }
1507
1508 void cleanup()
1509 {
1510 if (rowid_buf)
1511 my_free(rowid_buf);
1512 rowid_buf= NULL;
1513 owner->stat_file->ha_index_or_rnd_end();
1514 close_cached_file(&io_cache);
1515 my_b_clear(&io_cache);
1516 }
1517
1518 ~Stat_table_write_iter()
1519 {
1520 cleanup();
1521 }
1522};
1523
1524/*
1525 Histogram_builder is a helper class that is used to build histograms
1526 for columns
1527*/
1528
1529class Histogram_builder
1530{
1531 Field *column; /* table field for which the histogram is built */
1532 uint col_length; /* size of this field */
1533 ha_rows records; /* number of records the histogram is built for */
1534 Field *min_value; /* pointer to the minimal value for the field */
1535 Field *max_value; /* pointer to the maximal value for the field */
1536 Histogram *histogram; /* the histogram location */
1537 uint hist_width; /* the number of points in the histogram */
1538 double bucket_capacity; /* number of rows in a bucket of the histogram */
1539 uint curr_bucket; /* number of the current bucket to be built */
1540 ulonglong count; /* number of values retrieved */
1541 ulonglong count_distinct; /* number of distinct values retrieved */
1542
1543public:
1544 Histogram_builder(Field *col, uint col_len, ha_rows rows)
1545 : column(col), col_length(col_len), records(rows)
1546 {
1547 Column_statistics *col_stats= col->collected_stats;
1548 min_value= col_stats->min_value;
1549 max_value= col_stats->max_value;
1550 histogram= &col_stats->histogram;
1551 hist_width= histogram->get_width();
1552 bucket_capacity= (double) records / (hist_width + 1);
1553 curr_bucket= 0;
1554 count= 0;
1555 count_distinct= 0;
1556 }
1557
1558 ulonglong get_count_distinct() { return count_distinct; }
1559
1560 int next(void *elem, element_count elem_cnt)
1561 {
1562 count_distinct++;
1563 count+= elem_cnt;
1564 if (curr_bucket == hist_width)
1565 return 0;
1566 if (count > bucket_capacity * (curr_bucket + 1))
1567 {
1568 column->store_field_value((uchar *) elem, col_length);
1569 histogram->set_value(curr_bucket,
1570 column->pos_in_interval(min_value, max_value));
1571 curr_bucket++;
1572 while (curr_bucket != hist_width &&
1573 count > bucket_capacity * (curr_bucket + 1))
1574 {
1575 histogram->set_prev_value(curr_bucket);
1576 curr_bucket++;
1577 }
1578 }
1579 return 0;
1580 }
1581};
1582
1583
1584C_MODE_START
1585
1586int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
1587{
1588 Histogram_builder *hist_builder= (Histogram_builder *) arg;
1589 return hist_builder->next(elem, elem_cnt);
1590}
1591
1592C_MODE_END
1593
1594
1595/*
1596 The class Count_distinct_field is a helper class used to calculate
1597 the number of distinct values for a column. The class employs the
1598 Unique class for this purpose.
1599 The class Count_distinct_field is used only by the function
1600 collect_statistics_for_table to calculate the values for
1601 column avg_frequency of the statistical table column_stats.
1602*/
1603
1604class Count_distinct_field: public Sql_alloc
1605{
1606protected:
1607
1608 /* Field for which the number of distinct values is to be find out */
1609 Field *table_field;
1610 Unique *tree; /* The helper object to contain distinct values */
1611 uint tree_key_length; /* The length of the keys for the elements of 'tree */
1612
1613public:
1614
1615 Count_distinct_field() {}
1616
1617 /**
1618 @param
1619 field Field for which the number of distinct values is
1620 to be find out
1621 @param
1622 max_heap_table_size The limit for the memory used by the RB tree container
1623 of the constructed Unique object 'tree'
1624
1625 @details
1626 The constructor sets the values of 'table_field' and 'tree_key_length',
1627 and then calls the 'new' operation to create a Unique object for 'tree'.
1628 The type of 'field' and the value max_heap_table_size of determine the set
1629 of the parameters to be passed to the constructor of the Unique object.
1630 */
1631
1632 Count_distinct_field(Field *field, size_t max_heap_table_size)
1633 {
1634 table_field= field;
1635 tree_key_length= field->pack_length();
1636
1637 tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
1638 tree_key_length, max_heap_table_size, 1);
1639 }
1640
1641 virtual ~Count_distinct_field()
1642 {
1643 delete tree;
1644 tree= NULL;
1645 }
1646
1647 /*
1648 @brief
1649 Check whether the Unique object tree has been successfully created
1650 */
1651 bool exists()
1652 {
1653 return (tree != NULL);
1654 }
1655
1656 /*
1657 @brief
1658 Add the value of 'field' to the container of the Unique object 'tree'
1659 */
1660 virtual bool add()
1661 {
1662 return tree->unique_add(table_field->ptr);
1663 }
1664
1665 /*
1666 @brief
1667 Calculate the number of elements accumulated in the container of 'tree'
1668 */
1669 ulonglong get_value()
1670 {
1671 ulonglong count;
1672 if (tree->elements == 0)
1673 return (ulonglong) tree->elements_in_tree();
1674 count= 0;
1675 tree->walk(table_field->table, count_distinct_walk, (void*) &count);
1676 return count;
1677 }
1678
1679 /*
1680 @brief
1681 Build the histogram for the elements accumulated in the container of 'tree'
1682 */
1683 ulonglong get_value_with_histogram(ha_rows rows)
1684 {
1685 Histogram_builder hist_builder(table_field, tree_key_length, rows);
1686 tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder);
1687 return hist_builder.get_count_distinct();
1688 }
1689
1690 /*
1691 @brief
1692 Get the size of the histogram in bytes built for table_field
1693 */
1694 uint get_hist_size()
1695 {
1696 return table_field->collected_stats->histogram.get_size();
1697 }
1698
1699 /*
1700 @brief
1701 Get the pointer to the histogram built for table_field
1702 */
1703 uchar *get_histogram()
1704 {
1705 return table_field->collected_stats->histogram.get_values();
1706 }
1707
1708};
1709
1710
1711static
1712int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2)
1713{
1714 ulonglong *val1= (ulonglong *) key1;
1715 ulonglong *val2= (ulonglong *) key2;
1716 return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1;
1717}
1718
1719
1720/*
1721 The class Count_distinct_field_bit is derived from the class
1722 Count_distinct_field to be used only for fields of the MYSQL_TYPE_BIT type.
1723 The class provides a different implementation for the method add
1724*/
1725
1726class Count_distinct_field_bit: public Count_distinct_field
1727{
1728public:
1729
1730 Count_distinct_field_bit(Field *field, size_t max_heap_table_size)
1731 {
1732 table_field= field;
1733 tree_key_length= sizeof(ulonglong);
1734
1735 tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
1736 (void*) &tree_key_length,
1737 tree_key_length, max_heap_table_size, 1);
1738 }
1739
1740 bool add()
1741 {
1742 longlong val= table_field->val_int();
1743 return tree->unique_add(&val);
1744 }
1745};
1746
1747
1748/*
1749 The class Index_prefix_calc is a helper class used to calculate the values
1750 for the column 'avg_frequency' of the statistical table index_stats.
1751 For any table t from the database db and any k-component prefix of the
1752 index i for this table the row from index_stats with the primary key
1753 (db,t,i,k) must contain in the column 'avg_frequency' either NULL or
1754 the number that is the ratio of N and V, where N is the number of index
1755 entries without NULL values in the first k components of the index i,
1756 and V is the number of distinct tuples composed of the first k components
1757 encountered among these index entries.
1758 Currently the objects of this class are used only by the function
1759 collect_statistics_for_index.
1760*/
1761
1762class Index_prefix_calc: public Sql_alloc
1763{
1764
1765private:
1766
1767 /* Table containing index specified by index_info */
1768 TABLE *index_table;
1769 /* Info for the index i for whose prefix 'avg_frequency' is calculated */
1770 KEY *index_info;
1771 /* The maximum number of the components in the prefixes of interest */
1772 uint prefixes;
1773 bool empty;
1774
1775 /* This structure is created for every k components of the index i */
1776 class Prefix_calc_state
1777 {
1778 public:
1779 /*
1780 The number of the scanned index entries without nulls
1781 in the first k components
1782 */
1783 ulonglong entry_count;
1784 /*
1785 The number if the scanned index entries without nulls with
1786 the last encountered k-component prefix
1787 */
1788 ulonglong prefix_count;
1789 /* The values of the last encountered k-component prefix */
1790 Cached_item *last_prefix;
1791 };
1792
1793 /*
1794 Array of structures used to calculate 'avg_frequency' for different
1795 prefixes of the index i
1796 */
1797 Prefix_calc_state *calc_state;
1798
1799public:
1800
1801 bool is_single_comp_pk;
1802 bool is_partial_fields_present;
1803
1804 Index_prefix_calc(THD *thd, TABLE *table, KEY *key_info)
1805 : index_table(table), index_info(key_info)
1806 {
1807 uint i;
1808 Prefix_calc_state *state;
1809 uint key_parts= table->actual_n_key_parts(key_info);
1810 empty= TRUE;
1811 prefixes= 0;
1812 LINT_INIT_STRUCT(calc_state);
1813
1814 is_partial_fields_present= is_single_comp_pk= FALSE;
1815 uint pk= table->s->primary_key;
1816 if ((uint) (table->key_info - key_info) == pk &&
1817 table->key_info[pk].user_defined_key_parts == 1)
1818 {
1819 prefixes= 1;
1820 is_single_comp_pk= TRUE;
1821 return;
1822 }
1823
1824 if ((calc_state=
1825 (Prefix_calc_state *) thd->alloc(sizeof(Prefix_calc_state)*key_parts)))
1826 {
1827 uint keyno= (uint)(key_info-table->key_info);
1828 for (i= 0, state= calc_state; i < key_parts; i++, state++)
1829 {
1830 /*
1831 Do not consider prefixes containing a component that is only part
1832 of the field. This limitation is set to avoid fetching data when
1833 calculating the values of 'avg_frequency' for prefixes.
1834 */
1835 if (!key_info->key_part[i].field->part_of_key.is_set(keyno))
1836 {
1837 is_partial_fields_present= TRUE;
1838 break;
1839 }
1840
1841 if (!(state->last_prefix=
1842 new (thd->mem_root) Cached_item_field(thd,
1843 key_info->key_part[i].field)))
1844 break;
1845 state->entry_count= state->prefix_count= 0;
1846 prefixes++;
1847 }
1848 }
1849 }
1850
1851
1852 /**
1853 @breif
1854 Change the elements of calc_state after reading the next index entry
1855
1856 @details
1857 This function is to be called at the index scan each time the next
1858 index entry has been read into the record buffer.
1859 For each of the index prefixes the function checks whether nulls
1860 are encountered in any of the k components of the prefix.
1861 If this is not the case the value of calc_state[k-1].entry_count
1862 is incremented by 1. Then the function checks whether the value of
1863 any of these k components has changed. If so, the value of
1864 calc_state[k-1].prefix_count is incremented by 1.
1865 */
1866
1867 void add()
1868 {
1869 uint i;
1870 Prefix_calc_state *state;
1871 uint first_changed= prefixes;
1872 for (i= prefixes, state= calc_state+prefixes-1; i; i--, state--)
1873 {
1874 if (state->last_prefix->cmp())
1875 first_changed= i-1;
1876 }
1877 if (empty)
1878 {
1879 first_changed= 0;
1880 empty= FALSE;
1881 }
1882 for (i= 0, state= calc_state; i < prefixes; i++, state++)
1883 {
1884 if (state->last_prefix->null_value)
1885 break;
1886 if (i >= first_changed)
1887 state->prefix_count++;
1888 state->entry_count++;
1889 }
1890 }
1891
1892 /**
1893 @brief
1894 Calculate the values of avg_frequency for all prefixes of an index
1895
1896 @details
1897 This function is to be called after the index scan to count the number
1898 of distinct index prefixes has been done. The function calculates
1899 the value of avg_frequency for the index prefix with k components
1900 as calc_state[k-1].entry_count/calc_state[k-1].prefix_count.
1901 If calc_state[k-1].prefix_count happens to be 0, the value of
1902 avg_frequency[k-1] is set to 0, i.e. is considered as unknown.
1903 */
1904
1905 void get_avg_frequency()
1906 {
1907 uint i;
1908 Prefix_calc_state *state;
1909
1910 if (is_single_comp_pk)
1911 {
1912 index_info->collected_stats->set_avg_frequency(0, 1.0);
1913 return;
1914 }
1915
1916 for (i= 0, state= calc_state; i < prefixes; i++, state++)
1917 {
1918 if (i < prefixes)
1919 {
1920 double val= state->prefix_count == 0 ?
1921 0 : (double) state->entry_count / state->prefix_count;
1922 index_info->collected_stats->set_avg_frequency(i, val);
1923 }
1924 }
1925 }
1926};
1927
1928
1929/**
1930 @brief
1931 Create fields for min/max values to collect column statistics
1932
1933 @param
1934 table Table the fields are created for
1935
1936 @details
1937 The function first allocates record buffers to store min/max values
1938 for 'table's fields. Then for each table field f it creates Field structures
1939 that points to these buffers rather that to the record buffer as the
1940 Field object for f does. The pointers of the created fields are placed
1941 in the collected_stats structure of the Field object for f.
1942 The function allocates the buffers for min/max values in the table
1943 memory.
1944
1945 @note
1946 The buffers allocated when min/max values are used to read statistics
1947 from the persistent statistical tables differ from those buffers that
1948 are used when statistics on min/max values for column is collected
1949 as they are allocated in different mem_roots.
1950 The same is true for the fields created for min/max values.
1951*/
1952
1953static
1954void create_min_max_statistical_fields_for_table(TABLE *table)
1955{
1956 uint rec_buff_length= table->s->rec_buff_length;
1957
1958 if ((table->collected_stats->min_max_record_buffers=
1959 (uchar *) alloc_root(&table->mem_root, 2*rec_buff_length)))
1960 {
1961 uchar *record= table->collected_stats->min_max_record_buffers;
1962 memset(record, 0, 2*rec_buff_length);
1963
1964 for (uint i=0; i < 2; i++, record+= rec_buff_length)
1965 {
1966 for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
1967 {
1968 Field *fld;
1969 Field *table_field= *field_ptr;
1970 my_ptrdiff_t diff= record-table->record[0];
1971 if (!bitmap_is_set(table->read_set, table_field->field_index))
1972 continue;
1973 if (!(fld= table_field->clone(&table->mem_root, table, diff, TRUE)))
1974 continue;
1975 if (i == 0)
1976 table_field->collected_stats->min_value= fld;
1977 else
1978 table_field->collected_stats->max_value= fld;
1979 }
1980 }
1981 }
1982}
1983
1984
1985/**
1986 @brief
1987 Create fields for min/max values to read column statistics
1988
1989 @param
1990 thd Thread handler
1991 @param
1992 table_share Table share the fields are created for
1993 @param
1994 is_safe TRUE <-> at any time only one thread can perform the function
1995
1996 @details
1997 The function first allocates record buffers to store min/max values
1998 for 'table_share's fields. Then for each field f it creates Field structures
1999 that points to these buffers rather that to the record buffer as the
2000 Field object for f does. The pointers of the created fields are placed
2001 in the read_stats structure of the Field object for f.
2002 The function allocates the buffers for min/max values in the table share
2003 memory.
2004 If the parameter is_safe is TRUE then it is guaranteed that at any given time
2005 only one thread is executed the code of the function.
2006
2007 @note
2008 The buffers allocated when min/max values are used to collect statistics
2009 from the persistent statistical tables differ from those buffers that
2010 are used when statistics on min/max values for column is read as they
2011 are allocated in different mem_roots.
2012 The same is true for the fields created for min/max values.
2013*/
2014
2015static
2016void create_min_max_statistical_fields_for_table_share(THD *thd,
2017 TABLE_SHARE *table_share)
2018{
2019 TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb;
2020 Table_statistics *stats= stats_cb->table_stats;
2021
2022 if (stats->min_max_record_buffers)
2023 return;
2024
2025 uint rec_buff_length= table_share->rec_buff_length;
2026
2027 if ((stats->min_max_record_buffers=
2028 (uchar *) alloc_root(&stats_cb->mem_root, 2*rec_buff_length)))
2029 {
2030 uchar *record= stats->min_max_record_buffers;
2031 memset(record, 0, 2*rec_buff_length);
2032
2033 for (uint i=0; i < 2; i++, record+= rec_buff_length)
2034 {
2035 for (Field **field_ptr= table_share->field; *field_ptr; field_ptr++)
2036 {
2037 Field *fld;
2038 Field *table_field= *field_ptr;
2039 my_ptrdiff_t diff= record - table_share->default_values;
2040 if (!(fld= table_field->clone(&stats_cb->mem_root, diff)))
2041 continue;
2042 if (i == 0)
2043 table_field->read_stats->min_value= fld;
2044 else
2045 table_field->read_stats->max_value= fld;
2046 }
2047 }
2048 }
2049
2050}
2051
2052
2053/**
2054 @brief
2055 Allocate memory for the table's statistical data to be collected
2056
2057 @param
2058 table Table for which the memory for statistical data is allocated
2059
2060 @note
2061 The function allocates the memory for the statistical data on 'table' with
2062 the intention to collect the data there. The memory is allocated for
2063 the statistics on the table, on the table's columns, and on the table's
2064 indexes. The memory is allocated in the table's mem_root.
2065
2066 @retval
2067 0 If the memory for all statistical data has been successfully allocated
2068 @retval
2069 1 Otherwise
2070
2071 @note
2072 Each thread allocates its own memory to collect statistics on the table
2073 It allows us, for example, to collect statistics on the different indexes
2074 of the same table in parallel.
2075*/
2076
2077int alloc_statistics_for_table(THD* thd, TABLE *table)
2078{
2079 Field **field_ptr;
2080 uint fields;
2081
2082 DBUG_ENTER("alloc_statistics_for_table");
2083
2084
2085 Table_statistics *table_stats=
2086 (Table_statistics *) alloc_root(&table->mem_root,
2087 sizeof(Table_statistics));
2088
2089 fields= table->s->fields ;
2090 Column_statistics_collected *column_stats=
2091 (Column_statistics_collected *) alloc_root(&table->mem_root,
2092 sizeof(Column_statistics_collected) *
2093 (fields+1));
2094
2095 uint keys= table->s->keys;
2096 Index_statistics *index_stats=
2097 (Index_statistics *) alloc_root(&table->mem_root,
2098 sizeof(Index_statistics) * keys);
2099
2100 uint key_parts= table->s->ext_key_parts;
2101 ulong *idx_avg_frequency= (ulong*) alloc_root(&table->mem_root,
2102 sizeof(ulong) * key_parts);
2103
2104 uint columns= 0;
2105 for (field_ptr= table->field; *field_ptr; field_ptr++)
2106 {
2107 if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
2108 columns++;
2109 }
2110 uint hist_size= thd->variables.histogram_size;
2111 Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type);
2112 uchar *histogram= NULL;
2113 if (hist_size > 0)
2114 histogram= (uchar *) alloc_root(&table->mem_root, hist_size * columns);
2115
2116 if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency ||
2117 (hist_size && !histogram))
2118 DBUG_RETURN(1);
2119
2120 table->collected_stats= table_stats;
2121 table_stats->column_stats= column_stats;
2122 table_stats->index_stats= index_stats;
2123 table_stats->idx_avg_frequency= idx_avg_frequency;
2124 table_stats->histograms= histogram;
2125
2126 memset(column_stats, 0, sizeof(Column_statistics) * (fields+1));
2127
2128 for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++)
2129 {
2130 (*field_ptr)->collected_stats= column_stats;
2131 (*field_ptr)->collected_stats->max_value= NULL;
2132 (*field_ptr)->collected_stats->min_value= NULL;
2133 if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
2134 {
2135 column_stats->histogram.set_size(hist_size);
2136 column_stats->histogram.set_type(hist_type);
2137 column_stats->histogram.set_values(histogram);
2138 histogram+= hist_size;
2139 }
2140 }
2141
2142 memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts);
2143
2144 KEY *key_info, *end;
2145 for (key_info= table->key_info, end= key_info + table->s->keys;
2146 key_info < end;
2147 key_info++, index_stats++)
2148 {
2149 key_info->collected_stats= index_stats;
2150 key_info->collected_stats->init_avg_frequency(idx_avg_frequency);
2151 idx_avg_frequency+= key_info->ext_key_parts;
2152 }
2153
2154 create_min_max_statistical_fields_for_table(table);
2155
2156 DBUG_RETURN(0);
2157}
2158
2159
2160/**
2161 @brief
2162 Check whether any persistent statistics for the processed command is needed
2163
2164 @param
2165 thd The thread handle
2166
2167 @details
2168 The function checks whether any persitent statistics for the processed
2169 command is needed to be read.
2170
2171 @retval
2172 TRUE statistics is needed to be read
2173 @retval
2174 FALSE Otherwise
2175*/
2176
2177static
2178inline bool statistics_for_command_is_needed(THD *thd)
2179{
2180 if (thd->bootstrap || thd->variables.use_stat_tables == NEVER)
2181 return FALSE;
2182
2183 switch(thd->lex->sql_command) {
2184 case SQLCOM_SELECT:
2185 case SQLCOM_INSERT:
2186 case SQLCOM_INSERT_SELECT:
2187 case SQLCOM_UPDATE:
2188 case SQLCOM_UPDATE_MULTI:
2189 case SQLCOM_DELETE:
2190 case SQLCOM_DELETE_MULTI:
2191 case SQLCOM_REPLACE:
2192 case SQLCOM_REPLACE_SELECT:
2193 break;
2194 default:
2195 return FALSE;
2196 }
2197
2198 return TRUE;
2199}
2200
2201
2202/**
2203 @brief
2204 Allocate memory for the statistical data used by a table share
2205
2206 @param
2207 thd Thread handler
2208 @param
2209 table_share Table share for which the memory for statistical data is allocated
2210 @param
2211 is_safe TRUE <-> at any time only one thread can perform the function
2212
2213 @note
2214 The function allocates the memory for the statistical data on a table in the
2215 table's share memory with the intention to read the statistics there from
2216 the system persistent statistical tables mysql.table_stat, mysql.column_stats,
2217 mysql.index_stats. The memory is allocated for the statistics on the table,
2218 on the tables's columns, and on the table's indexes. The memory is allocated
2219 in the table_share's mem_root.
2220 If the parameter is_safe is TRUE then it is guaranteed that at any given time
2221 only one thread is executed the code of the function.
2222
2223 @retval
2224 0 If the memory for all statistical data has been successfully allocated
2225 @retval
2226 1 Otherwise
2227
2228 @note
2229 The situation when more than one thread try to allocate memory for
2230 statistical data is rare. It happens under the following scenario:
2231 1. One thread executes a query over table t with the system variable
2232 'use_stat_tables' set to 'never'.
2233 2. After this the second thread sets 'use_stat_tables' to 'preferably'
2234 and executes a query over table t.
2235 3. Simultaneously the third thread sets 'use_stat_tables' to 'preferably'
2236 and executes a query over table t.
2237 Here the second and the third threads try to allocate the memory for
2238 statistical data at the same time. The precautions are taken to
2239 guarantee the correctness of the allocation.
2240
2241 @note
2242 Currently the function always is called with the parameter is_safe set
2243 to FALSE.
2244*/
2245
2246int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
2247 bool is_safe)
2248{
2249
2250 Field **field_ptr;
2251 KEY *key_info, *end;
2252 TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb;
2253
2254 DBUG_ENTER("alloc_statistics_for_table_share");
2255
2256 DEBUG_SYNC(thd, "statistics_mem_alloc_start1");
2257 DEBUG_SYNC(thd, "statistics_mem_alloc_start2");
2258
2259 if (!statistics_for_command_is_needed(thd))
2260 DBUG_RETURN(1);
2261
2262 if (!is_safe)
2263 mysql_mutex_lock(&table_share->LOCK_share);
2264
2265 if (stats_cb->stats_can_be_read)
2266 {
2267 if (!is_safe)
2268 mysql_mutex_unlock(&table_share->LOCK_share);
2269 DBUG_RETURN(0);
2270 }
2271
2272 Table_statistics *table_stats= stats_cb->table_stats;
2273 if (!table_stats)
2274 {
2275 table_stats= (Table_statistics *) alloc_root(&stats_cb->mem_root,
2276 sizeof(Table_statistics));
2277 if (!table_stats)
2278 {
2279 if (!is_safe)
2280 mysql_mutex_unlock(&table_share->LOCK_share);
2281 DBUG_RETURN(1);
2282 }
2283 memset(table_stats, 0, sizeof(Table_statistics));
2284 stats_cb->table_stats= table_stats;
2285 }
2286
2287 uint fields= table_share->fields;
2288 Column_statistics *column_stats= table_stats->column_stats;
2289 if (!column_stats)
2290 {
2291 column_stats= (Column_statistics *) alloc_root(&stats_cb->mem_root,
2292 sizeof(Column_statistics) *
2293 (fields+1));
2294 if (column_stats)
2295 {
2296 memset(column_stats, 0, sizeof(Column_statistics) * (fields+1));
2297 table_stats->column_stats= column_stats;
2298 for (field_ptr= table_share->field;
2299 *field_ptr;
2300 field_ptr++, column_stats++)
2301 {
2302 (*field_ptr)->read_stats= column_stats;
2303 (*field_ptr)->read_stats->min_value= NULL;
2304 (*field_ptr)->read_stats->max_value= NULL;
2305 }
2306 create_min_max_statistical_fields_for_table_share(thd, table_share);
2307 }
2308 }
2309
2310 uint keys= table_share->keys;
2311 Index_statistics *index_stats= table_stats->index_stats;
2312 if (!index_stats)
2313 {
2314 index_stats= (Index_statistics *) alloc_root(&stats_cb->mem_root,
2315 sizeof(Index_statistics) *
2316 keys);
2317 if (index_stats)
2318 {
2319 table_stats->index_stats= index_stats;
2320 for (key_info= table_share->key_info, end= key_info + keys;
2321 key_info < end;
2322 key_info++, index_stats++)
2323 {
2324 key_info->read_stats= index_stats;
2325 }
2326 }
2327 }
2328
2329 uint key_parts= table_share->ext_key_parts;
2330 ulong *idx_avg_frequency= table_stats->idx_avg_frequency;
2331 if (!idx_avg_frequency)
2332 {
2333 idx_avg_frequency= (ulong*) alloc_root(&stats_cb->mem_root,
2334 sizeof(ulong) * key_parts);
2335 if (idx_avg_frequency)
2336 {
2337 memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts);
2338 table_stats->idx_avg_frequency= idx_avg_frequency;
2339 for (key_info= table_share->key_info, end= key_info + keys;
2340 key_info < end;
2341 key_info++)
2342 {
2343 key_info->read_stats->init_avg_frequency(idx_avg_frequency);
2344 idx_avg_frequency+= key_info->ext_key_parts;
2345 }
2346 }
2347 }
2348
2349 if (column_stats && index_stats && idx_avg_frequency)
2350 stats_cb->stats_can_be_read= TRUE;
2351
2352 if (!is_safe)
2353 mysql_mutex_unlock(&table_share->LOCK_share);
2354
2355 DBUG_RETURN(0);
2356}
2357
2358
2359/**
2360 @brief
2361 Allocate memory for the histogram used by a table share
2362
2363 @param
2364 thd Thread handler
2365 @param
2366 table_share Table share for which the memory for histogram data is allocated
2367 @param
2368 is_safe TRUE <-> at any time only one thread can perform the function
2369
2370 @note
2371 The function allocates the memory for the histogram built for a table in the
2372 table's share memory with the intention to read the data there from the
2373 system persistent statistical table mysql.column_stats,
2374 The memory is allocated in the table_share's mem_root.
2375 If the parameter is_safe is TRUE then it is guaranteed that at any given time
2376 only one thread is executed the code of the function.
2377
2378 @retval
2379 0 If the memory for all statistical data has been successfully allocated
2380 @retval
2381 1 Otherwise
2382
2383 @note
2384 Currently the function always is called with the parameter is_safe set
2385 to FALSE.
2386*/
2387
2388static
2389int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share,
2390 bool is_safe)
2391{
2392 TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb;
2393
2394 DBUG_ENTER("alloc_histograms_for_table_share");
2395
2396 if (!is_safe)
2397 mysql_mutex_lock(&table_share->LOCK_share);
2398
2399 if (stats_cb->histograms_can_be_read)
2400 {
2401 if (!is_safe)
2402 mysql_mutex_unlock(&table_share->LOCK_share);
2403 DBUG_RETURN(0);
2404 }
2405
2406 Table_statistics *table_stats= stats_cb->table_stats;
2407 ulong total_hist_size= table_stats->total_hist_size;
2408
2409 if (total_hist_size && !table_stats->histograms)
2410 {
2411 uchar *histograms= (uchar *) alloc_root(&stats_cb->mem_root,
2412 total_hist_size);
2413 if (!histograms)
2414 {
2415 if (!is_safe)
2416 mysql_mutex_unlock(&table_share->LOCK_share);
2417 DBUG_RETURN(1);
2418 }
2419 memset(histograms, 0, total_hist_size);
2420 table_stats->histograms= histograms;
2421 stats_cb->histograms_can_be_read= TRUE;
2422 }
2423
2424 if (!is_safe)
2425 mysql_mutex_unlock(&table_share->LOCK_share);
2426
2427 DBUG_RETURN(0);
2428
2429}
2430
2431/**
2432 @brief
2433 Initialize the aggregation fields to collect statistics on a column
2434
2435 @param
2436 thd Thread handler
2437 @param
2438 table_field Column to collect statistics for
2439*/
2440
2441inline
2442void Column_statistics_collected::init(THD *thd, Field *table_field)
2443{
2444 size_t max_heap_table_size= (size_t)thd->variables.max_heap_table_size;
2445 TABLE *table= table_field->table;
2446 uint pk= table->s->primary_key;
2447
2448 is_single_pk_col= FALSE;
2449
2450 if (pk != MAX_KEY && table->key_info[pk].user_defined_key_parts == 1 &&
2451 table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1)
2452 is_single_pk_col= TRUE;
2453
2454 column= table_field;
2455
2456 set_all_nulls();
2457
2458 nulls= 0;
2459 column_total_length= 0;
2460 if (is_single_pk_col)
2461 count_distinct= NULL;
2462 if (table_field->flags & BLOB_FLAG)
2463 count_distinct= NULL;
2464 else
2465 {
2466 count_distinct=
2467 table_field->type() == MYSQL_TYPE_BIT ?
2468 new Count_distinct_field_bit(table_field, max_heap_table_size) :
2469 new Count_distinct_field(table_field, max_heap_table_size);
2470 }
2471 if (count_distinct && !count_distinct->exists())
2472 count_distinct= NULL;
2473}
2474
2475
2476/**
2477 @brief
2478 Perform aggregation for a row when collecting statistics on a column
2479
2480 @param
2481 rowno The order number of the row
2482*/
2483
2484inline
2485bool Column_statistics_collected::add(ha_rows rowno)
2486{
2487
2488 bool err= 0;
2489 if (column->is_null())
2490 nulls++;
2491 else
2492 {
2493 column_total_length+= column->value_length();
2494 if (min_value && column->update_min(min_value, rowno == nulls))
2495 set_not_null(COLUMN_STAT_MIN_VALUE);
2496 if (max_value && column->update_max(max_value, rowno == nulls))
2497 set_not_null(COLUMN_STAT_MAX_VALUE);
2498 if (count_distinct)
2499 err= count_distinct->add();
2500 }
2501 return err;
2502}
2503
2504
2505/**
2506 @brief
2507 Get the results of aggregation when collecting the statistics on a column
2508
2509 @param
2510 rows The total number of rows in the table
2511*/
2512
2513inline
2514void Column_statistics_collected::finish(ha_rows rows)
2515{
2516 double val;
2517
2518 if (rows)
2519 {
2520 val= (double) nulls / rows;
2521 set_nulls_ratio(val);
2522 set_not_null(COLUMN_STAT_NULLS_RATIO);
2523 }
2524 if (rows - nulls)
2525 {
2526 val= (double) column_total_length / (rows - nulls);
2527 set_avg_length(val);
2528 set_not_null(COLUMN_STAT_AVG_LENGTH);
2529 }
2530 if (count_distinct)
2531 {
2532 ulonglong distincts;
2533 uint hist_size= count_distinct->get_hist_size();
2534 if (hist_size == 0)
2535 distincts= count_distinct->get_value();
2536 else
2537 distincts= count_distinct->get_value_with_histogram(rows - nulls);
2538 if (distincts)
2539 {
2540 val= (double) (rows - nulls) / distincts;
2541 set_avg_frequency(val);
2542 set_not_null(COLUMN_STAT_AVG_FREQUENCY);
2543 }
2544 else
2545 hist_size= 0;
2546 histogram.set_size(hist_size);
2547 set_not_null(COLUMN_STAT_HIST_SIZE);
2548 if (hist_size && distincts)
2549 {
2550 set_not_null(COLUMN_STAT_HIST_TYPE);
2551 histogram.set_values(count_distinct->get_histogram());
2552 set_not_null(COLUMN_STAT_HISTOGRAM);
2553 }
2554 delete count_distinct;
2555 count_distinct= NULL;
2556 }
2557 else if (is_single_pk_col)
2558 {
2559 val= 1.0;
2560 set_avg_frequency(val);
2561 set_not_null(COLUMN_STAT_AVG_FREQUENCY);
2562 }
2563}
2564
2565
2566/**
2567 @brief
2568 Clean up auxiliary structures used for aggregation
2569*/
2570
2571inline
2572void Column_statistics_collected::cleanup()
2573{
2574 if (count_distinct)
2575 {
2576 delete count_distinct;
2577 count_distinct= NULL;
2578 }
2579}
2580
2581
2582/**
2583 @brief
2584 Collect statistical data on an index
2585
2586 @param
2587 table The table the index belongs to
2588 index The number of this index in the table
2589
2590 @details
2591 The function collects the value of 'avg_frequency' for the prefixes
2592 on an index from 'table'. The index is specified by its number.
2593 If the scan is successful the calculated statistics is saved in the
2594 elements of the array write_stat.avg_frequency of the KEY_INFO structure
2595 for the index. The statistics for the prefix with k components is saved
2596 in the element number k-1.
2597
2598 @retval
2599 0 If the statistics has been successfully collected
2600 @retval
2601 1 Otherwise
2602
2603 @note
2604 The function collects statistics for the index prefixes for one index
2605 scan during which no data is fetched from the table records. That's why
2606 statistical data for prefixes that contain part of a field is not
2607 collected.
2608 The function employs an object of the helper class Index_prefix_calc to
2609 count for each index prefix the number of index entries without nulls and
2610 the number of distinct entries among them.
2611
2612*/
2613
2614static
2615int collect_statistics_for_index(THD *thd, TABLE *table, uint index)
2616{
2617 int rc= 0;
2618 KEY *key_info= &table->key_info[index];
2619 ha_rows rows= 0;
2620
2621 DBUG_ENTER("collect_statistics_for_index");
2622
2623 /* No statistics for FULLTEXT indexes. */
2624 if (key_info->flags & (HA_FULLTEXT|HA_SPATIAL))
2625 DBUG_RETURN(rc);
2626
2627 Index_prefix_calc index_prefix_calc(thd, table, key_info);
2628
2629 DEBUG_SYNC(table->in_use, "statistics_collection_start1");
2630 DEBUG_SYNC(table->in_use, "statistics_collection_start2");
2631
2632 if (index_prefix_calc.is_single_comp_pk)
2633 {
2634 index_prefix_calc.get_avg_frequency();
2635 DBUG_RETURN(rc);
2636 }
2637
2638 /*
2639 Request "only index read" in case of absence of fields which are
2640 partially in the index to avoid problems with partitioning (for example)
2641 which want to get whole field value.
2642 */
2643 if (!index_prefix_calc.is_partial_fields_present)
2644 table->file->ha_start_keyread(index);
2645 table->file->ha_index_init(index, TRUE);
2646 rc= table->file->ha_index_first(table->record[0]);
2647 while (rc != HA_ERR_END_OF_FILE)
2648 {
2649 if (thd->killed)
2650 break;
2651
2652 if (rc)
2653 break;
2654 rows++;
2655 index_prefix_calc.add();
2656 rc= table->file->ha_index_next(table->record[0]);
2657 }
2658 table->file->ha_end_keyread();
2659 table->file->ha_index_end();
2660
2661 rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1;
2662
2663 if (!rc)
2664 index_prefix_calc.get_avg_frequency();
2665
2666 DBUG_RETURN(rc);
2667}
2668
2669
2670/**
2671 @brief
2672 Collect statistical data for a table
2673
2674 @param
2675 thd The thread handle
2676 @param
2677 table The table to collect statistics on
2678
2679 @details
2680 The function collects data for various statistical characteristics on
2681 the table 'table'. These data is saved in the internal fields that could
2682 be reached from 'table'. The data is prepared to be saved in the persistent
2683 statistical table by the function update_statistics_for_table.
2684 The collected statistical values are not placed in the same fields that
2685 keep the statistical data used by the optimizer. Therefore, at any time,
2686 there is no collision between the statistics being collected and the one
2687 used by the optimizer to look for optimal query execution plans for other
2688 clients.
2689
2690 @retval
2691 0 If the statistics has been successfully collected
2692 @retval
2693 1 Otherwise
2694
2695 @note
2696 The function first collects statistical data for statistical characteristics
2697 to be saved in the statistical tables table_stat and column_stats. To do this
2698 it performs a full table scan of 'table'. At this scan the function collects
2699 statistics on each column of the table and count the total number of the
2700 scanned rows. To calculate the value of 'avg_frequency' for a column the
2701 function constructs an object of the helper class Count_distinct_field
2702 (or its derivation). Currently this class cannot count the number of
2703 distinct values for blob columns. So the value of 'avg_frequency' for
2704 blob columns is always null.
2705 After the full table scan the function calls collect_statistics_for_index
2706 for each table index. The latter performs full index scan for each index.
2707
2708 @note
2709 Currently the statistical data is collected indiscriminately for all
2710 columns/indexes of 'table', for all statistical characteristics.
2711 TODO. Collect only specified statistical characteristics for specified
2712 columns/indexes.
2713
2714 @note
2715 Currently the process of collecting statistical data is not optimized.
2716 For example, 'avg_frequency' for a column could be copied from the
2717 'avg_frequency' collected for an index if this column is used as the
2718 first component of the index. Min and min values for this column could
2719 be extracted from the index as well.
2720*/
2721
2722int collect_statistics_for_table(THD *thd, TABLE *table)
2723{
2724 int rc;
2725 Field **field_ptr;
2726 Field *table_field;
2727 ha_rows rows= 0;
2728 handler *file=table->file;
2729
2730 DBUG_ENTER("collect_statistics_for_table");
2731
2732 table->collected_stats->cardinality_is_null= TRUE;
2733 table->collected_stats->cardinality= 0;
2734
2735 for (field_ptr= table->field; *field_ptr; field_ptr++)
2736 {
2737 table_field= *field_ptr;
2738 if (!bitmap_is_set(table->read_set, table_field->field_index))
2739 continue;
2740 table_field->collected_stats->init(thd, table_field);
2741 }
2742
2743 restore_record(table, s->default_values);
2744
2745 /* Perform a full table scan to collect statistics on 'table's columns */
2746 if (!(rc= file->ha_rnd_init(TRUE)))
2747 {
2748 DEBUG_SYNC(table->in_use, "statistics_collection_start");
2749
2750 while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
2751 {
2752 if (thd->killed)
2753 break;
2754
2755 if (rc)
2756 break;
2757
2758 for (field_ptr= table->field; *field_ptr; field_ptr++)
2759 {
2760 table_field= *field_ptr;
2761 if (!bitmap_is_set(table->read_set, table_field->field_index))
2762 continue;
2763 if ((rc= table_field->collected_stats->add(rows)))
2764 break;
2765 }
2766 if (rc)
2767 break;
2768 rows++;
2769 }
2770 file->ha_rnd_end();
2771 }
2772 rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1;
2773
2774 /*
2775 Calculate values for all statistical characteristics on columns and
2776 and for each field f of 'table' save them in the write_stat structure
2777 from the Field object for f.
2778 */
2779 if (!rc)
2780 {
2781 table->collected_stats->cardinality_is_null= FALSE;
2782 table->collected_stats->cardinality= rows;
2783 }
2784
2785 bitmap_clear_all(table->write_set);
2786 for (field_ptr= table->field; *field_ptr; field_ptr++)
2787 {
2788 table_field= *field_ptr;
2789 if (!bitmap_is_set(table->read_set, table_field->field_index))
2790 continue;
2791 bitmap_set_bit(table->write_set, table_field->field_index);
2792 if (!rc)
2793 table_field->collected_stats->finish(rows);
2794 else
2795 table_field->collected_stats->cleanup();
2796 }
2797 bitmap_clear_all(table->write_set);
2798
2799 if (!rc)
2800 {
2801 uint key;
2802 key_map::Iterator it(table->keys_in_use_for_query);
2803
2804 MY_BITMAP *save_read_set= table->read_set;
2805 table->read_set= &table->tmp_set;
2806 bitmap_set_all(table->read_set);
2807
2808 /* Collect statistics for indexes */
2809 while ((key= it++) != key_map::Iterator::BITMAP_END)
2810 {
2811 if ((rc= collect_statistics_for_index(thd, table, key)))
2812 break;
2813 }
2814
2815 table->read_set= save_read_set;
2816 }
2817
2818 DBUG_RETURN(rc);
2819}
2820
2821
2822/**
2823 @brief
2824 Update statistics for a table in the persistent statistical tables
2825
2826 @param
2827 thd The thread handle
2828 @param
2829 table The table to collect statistics on
2830
2831 @details
2832 For each statistical table st the function looks for the rows from this
2833 table that contain statistical data on 'table'. If rows with given
2834 statistical characteristics exist they are updated with the new statistical
2835 values taken from internal structures for 'table'. Otherwise new rows
2836 with these statistical characteristics are added into st.
2837 It is assumed that values stored in the statistical tables are found and
2838 saved by the function collect_statistics_for_table.
2839
2840 @retval
2841 0 If all statistical tables has been successfully updated
2842 @retval
2843 1 Otherwise
2844
2845 @note
2846 The function is called when executing the ANALYZE actions for 'table'.
2847 The function first unlocks the opened table the statistics on which has
2848 been collected, but does not closes it, so all collected statistical data
2849 remains in internal structures for 'table'. Then the function opens the
2850 statistical tables and writes the statistical data for 'table'into them.
2851 It is not allowed just to open statistical tables for writing when some
2852 other tables are locked for reading.
2853 After the statistical tables have been opened they are updated one by one
2854 with the new statistics on 'table'. Objects of the helper classes
2855 Table_stat, Column_stat and Index_stat are employed for this.
2856 After having been updated the statistical system tables are closed.
2857*/
2858
2859int update_statistics_for_table(THD *thd, TABLE *table)
2860{
2861 TABLE_LIST tables[STATISTICS_TABLES];
2862 Open_tables_backup open_tables_backup;
2863 uint i;
2864 int err;
2865 enum_binlog_format save_binlog_format;
2866 int rc= 0;
2867 TABLE *stat_table;
2868
2869 DBUG_ENTER("update_statistics_for_table");
2870
2871 DEBUG_SYNC(thd, "statistics_update_start");
2872
2873 if (open_stat_tables(thd, tables, &open_tables_backup, TRUE))
2874 DBUG_RETURN(rc);
2875
2876 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
2877
2878 /* Update the statistical table table_stats */
2879 stat_table= tables[TABLE_STAT].table;
2880 Table_stat table_stat(stat_table, table);
2881 restore_record(stat_table, s->default_values);
2882 table_stat.set_key_fields();
2883 err= table_stat.update_stat();
2884 if (err)
2885 rc= 1;
2886
2887 /* Update the statistical table colum_stats */
2888 stat_table= tables[COLUMN_STAT].table;
2889 Column_stat column_stat(stat_table, table);
2890 for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
2891 {
2892 Field *table_field= *field_ptr;
2893 if (!bitmap_is_set(table->read_set, table_field->field_index))
2894 continue;
2895 restore_record(stat_table, s->default_values);
2896 column_stat.set_key_fields(table_field);
2897 err= column_stat.update_stat();
2898 if (err && !rc)
2899 rc= 1;
2900 }
2901
2902 /* Update the statistical table index_stats */
2903 stat_table= tables[INDEX_STAT].table;
2904 uint key;
2905 key_map::Iterator it(table->keys_in_use_for_query);
2906 Index_stat index_stat(stat_table, table);
2907
2908 while ((key= it++) != key_map::Iterator::BITMAP_END)
2909 {
2910 KEY *key_info= table->key_info+key;
2911 uint key_parts= table->actual_n_key_parts(key_info);
2912 for (i= 0; i < key_parts; i++)
2913 {
2914 restore_record(stat_table, s->default_values);
2915 index_stat.set_key_fields(key_info, i+1);
2916 err= index_stat.update_stat();
2917 if (err && !rc)
2918 rc= 1;
2919 }
2920 }
2921
2922 thd->restore_stmt_binlog_format(save_binlog_format);
2923
2924 close_system_tables(thd, &open_tables_backup);
2925
2926 DBUG_RETURN(rc);
2927}
2928
2929
2930/**
2931 @brief
2932 Read statistics for a table from the persistent statistical tables
2933
2934 @param
2935 thd The thread handle
2936 @param
2937 table The table to read statistics on
2938 @param
2939 stat_tables The array of TABLE_LIST objects for statistical tables
2940
2941 @details
2942 For each statistical table the function looks for the rows from this
2943 table that contain statistical data on 'table'. If such rows is found
2944 the data from statistical columns of it is read into the appropriate
2945 fields of internal structures for 'table'. Later at the query processing
2946 this data are supposed to be used by the optimizer.
2947 The parameter stat_tables should point to an array of TABLE_LIST
2948 objects for all statistical tables linked into a list. All statistical
2949 tables are supposed to be opened.
2950 The function is called by read_statistics_for_tables_if_needed().
2951
2952 @retval
2953 0 If data has been successfully read for the table
2954 @retval
2955 1 Otherwise
2956
2957 @note
2958 Objects of the helper classes Table_stat, Column_stat and Index_stat
2959 are employed to read statistical data from the statistical tables.
2960 now.
2961*/
2962
2963static
2964int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
2965{
2966 uint i;
2967 TABLE *stat_table;
2968 Field *table_field;
2969 Field **field_ptr;
2970 KEY *key_info, *key_info_end;
2971 TABLE_SHARE *table_share= table->s;
2972 Table_statistics *read_stats= table_share->stats_cb.table_stats;
2973
2974 DBUG_ENTER("read_statistics_for_table");
2975
2976 /* Read statistics from the statistical table table_stats */
2977 stat_table= stat_tables[TABLE_STAT].table;
2978 Table_stat table_stat(stat_table, table);
2979 table_stat.set_key_fields();
2980 table_stat.get_stat_values();
2981
2982 /* Read statistics from the statistical table column_stats */
2983 stat_table= stat_tables[COLUMN_STAT].table;
2984 ulong total_hist_size= 0;
2985 Column_stat column_stat(stat_table, table);
2986 for (field_ptr= table_share->field; *field_ptr; field_ptr++)
2987 {
2988 table_field= *field_ptr;
2989 column_stat.set_key_fields(table_field);
2990 column_stat.get_stat_values();
2991 total_hist_size+= table_field->read_stats->histogram.get_size();
2992 }
2993 read_stats->total_hist_size= total_hist_size;
2994
2995 /* Read statistics from the statistical table index_stats */
2996 stat_table= stat_tables[INDEX_STAT].table;
2997 Index_stat index_stat(stat_table, table);
2998 for (key_info= table_share->key_info,
2999 key_info_end= key_info + table_share->keys;
3000 key_info < key_info_end; key_info++)
3001 {
3002 uint key_parts= key_info->ext_key_parts;
3003 for (i= 0; i < key_parts; i++)
3004 {
3005 index_stat.set_key_fields(key_info, i+1);
3006 index_stat.get_stat_values();
3007 }
3008
3009 key_part_map ext_key_part_map= key_info->ext_key_part_map;
3010 if (key_info->user_defined_key_parts != key_info->ext_key_parts &&
3011 key_info->read_stats->get_avg_frequency(key_info->user_defined_key_parts) == 0)
3012 {
3013 KEY *pk_key_info= table_share->key_info + table_share->primary_key;
3014 uint k= key_info->user_defined_key_parts;
3015 uint pk_parts= pk_key_info->user_defined_key_parts;
3016 ha_rows n_rows= read_stats->cardinality;
3017 double k_dist= n_rows / key_info->read_stats->get_avg_frequency(k-1);
3018 uint m= 0;
3019 for (uint j= 0; j < pk_parts; j++)
3020 {
3021 if (!(ext_key_part_map & 1 << j))
3022 {
3023 for (uint l= k; l < k + m; l++)
3024 {
3025 double avg_frequency=
3026 pk_key_info->read_stats->get_avg_frequency(j-1);
3027 set_if_smaller(avg_frequency, 1);
3028 double val= pk_key_info->read_stats->get_avg_frequency(j) /
3029 avg_frequency;
3030 key_info->read_stats->set_avg_frequency (l, val);
3031 }
3032 }
3033 else
3034 {
3035 double avg_frequency= pk_key_info->read_stats->get_avg_frequency(j);
3036 key_info->read_stats->set_avg_frequency(k + m, avg_frequency);
3037 m++;
3038 }
3039 }
3040 for (uint l= k; l < k + m; l++)
3041 {
3042 double avg_frequency= key_info->read_stats->get_avg_frequency(l);
3043 if (avg_frequency == 0 || read_stats->cardinality_is_null)
3044 avg_frequency= 1;
3045 else if (avg_frequency > 1)
3046 {
3047 avg_frequency/= k_dist;
3048 set_if_bigger(avg_frequency, 1);
3049 }
3050 key_info->read_stats->set_avg_frequency(l, avg_frequency);
3051 }
3052 }
3053 }
3054
3055 table->stats_is_read= TRUE;
3056
3057 DBUG_RETURN(0);
3058}
3059
3060
3061/**
3062 @brief
3063 Check whether any statistics is to be read for tables from a table list
3064
3065 @param
3066 thd The thread handle
3067 @param
3068 tables The tables list for whose tables the check is to be done
3069
3070 @details
3071 The function checks whether for any of the tables opened and locked for
3072 a statement statistics from statistical tables is needed to be read.
3073
3074 @retval
3075 TRUE statistics for any of the tables is needed to be read
3076 @retval
3077 FALSE Otherwise
3078*/
3079
3080static
3081bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables)
3082{
3083 if (!tables)
3084 return FALSE;
3085
3086 if (!statistics_for_command_is_needed(thd))
3087 return FALSE;
3088
3089 /*
3090 Do not read statistics for any query that explicity involves
3091 statistical tables, failure to to do so we may end up
3092 in a deadlock.
3093 */
3094
3095 for (TABLE_LIST *tl= tables; tl; tl= tl->next_global)
3096 {
3097 if (!tl->is_view_or_derived() && tl->table)
3098 {
3099 TABLE_SHARE *table_share= tl->table->s;
3100 if (table_share &&
3101 table_share->table_category != TABLE_CATEGORY_USER
3102 && is_stat_table(&tl->db, &tl->alias))
3103 return FALSE;
3104 }
3105 }
3106
3107 for (TABLE_LIST *tl= tables; tl; tl= tl->next_global)
3108 {
3109 if (!tl->is_view_or_derived() && tl->table)
3110 {
3111 TABLE_SHARE *table_share= tl->table->s;
3112 if (table_share &&
3113 table_share->stats_cb.stats_can_be_read &&
3114 (!table_share->stats_cb.stats_is_read ||
3115 (!table_share->stats_cb.histograms_are_read &&
3116 thd->variables.optimizer_use_condition_selectivity > 3)))
3117 return TRUE;
3118 if (table_share->stats_cb.stats_is_read)
3119 tl->table->stats_is_read= TRUE;
3120 if (table_share->stats_cb.histograms_are_read)
3121 tl->table->histograms_are_read= TRUE;
3122 }
3123 }
3124
3125 return FALSE;
3126}
3127
3128
3129/**
3130 @brief
3131 Read histogram for a table from the persistent statistical tables
3132
3133 @param
3134 thd The thread handle
3135 @param
3136 table The table to read histograms for
3137 @param
3138 stat_tables The array of TABLE_LIST objects for statistical tables
3139
3140 @details
3141 For the statistical table columns_stats the function looks for the rows
3142 from this table that contain statistical data on 'table'. If such rows
3143 are found the histograms from them are read into the memory allocated
3144 for histograms of 'table'. Later at the query processing these histogram
3145 are supposed to be used by the optimizer.
3146 The parameter stat_tables should point to an array of TABLE_LIST
3147 objects for all statistical tables linked into a list. All statistical
3148 tables are supposed to be opened.
3149 The function is called by read_statistics_for_tables_if_needed().
3150
3151 @retval
3152 0 If data has been successfully read for the table
3153 @retval
3154 1 Otherwise
3155
3156 @note
3157 Objects of the helper Column_stat are employed read histogram
3158 from the statistical table column_stats now.
3159*/
3160
3161static
3162int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
3163{
3164 TABLE_SHARE *table_share= table->s;
3165
3166 DBUG_ENTER("read_histograms_for_table");
3167
3168 if (!table_share->stats_cb.histograms_can_be_read)
3169 {
3170 (void) alloc_histograms_for_table_share(thd, table_share, FALSE);
3171 }
3172 if (table_share->stats_cb.histograms_can_be_read &&
3173 !table_share->stats_cb.histograms_are_read)
3174 {
3175 Field **field_ptr;
3176 uchar *histogram= table_share->stats_cb.table_stats->histograms;
3177 TABLE *stat_table= stat_tables[COLUMN_STAT].table;
3178 Column_stat column_stat(stat_table, table);
3179 for (field_ptr= table_share->field; *field_ptr; field_ptr++)
3180 {
3181 Field *table_field= *field_ptr;
3182 uint hist_size= table_field->read_stats->histogram.get_size();
3183 if (hist_size)
3184 {
3185 column_stat.set_key_fields(table_field);
3186 table_field->read_stats->histogram.set_values(histogram);
3187 column_stat.get_histogram_value();
3188 histogram+= hist_size;
3189 }
3190 }
3191 }
3192
3193 DBUG_RETURN(0);
3194}
3195
3196/**
3197 @brief
3198 Read statistics for tables from a table list if it is needed
3199
3200 @param
3201 thd The thread handle
3202 @param
3203 tables The tables list for whose tables to read statistics
3204
3205 @details
3206 The function first checks whether for any of the tables opened and locked
3207 for a statement statistics from statistical tables is needed to be read.
3208 Then, if so, it opens system statistical tables for read and reads
3209 the statistical data from them for those tables from the list for which it
3210 makes sense. Then the function closes system statistical tables.
3211
3212 @retval
3213 0 Statistics for tables was successfully read
3214 @retval
3215 1 Otherwise
3216*/
3217
3218int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables)
3219{
3220 TABLE_LIST stat_tables[STATISTICS_TABLES];
3221 Open_tables_backup open_tables_backup;
3222
3223 DBUG_ENTER("read_statistics_for_tables_if_needed");
3224
3225 DEBUG_SYNC(thd, "statistics_read_start");
3226
3227 if (!statistics_for_tables_is_needed(thd, tables))
3228 DBUG_RETURN(0);
3229
3230 if (open_stat_tables(thd, stat_tables, &open_tables_backup, FALSE))
3231 {
3232 thd->clear_error();
3233 DBUG_RETURN(1);
3234 }
3235
3236 for (TABLE_LIST *tl= tables; tl; tl= tl->next_global)
3237 {
3238 if (!tl->is_view_or_derived() && tl->table)
3239 {
3240 TABLE_SHARE *table_share= tl->table->s;
3241 if (table_share &&
3242 table_share->stats_cb.stats_can_be_read &&
3243 !table_share->stats_cb.stats_is_read)
3244 {
3245 (void) read_statistics_for_table(thd, tl->table, stat_tables);
3246 table_share->stats_cb.stats_is_read= TRUE;
3247 }
3248 if (table_share->stats_cb.stats_is_read)
3249 tl->table->stats_is_read= TRUE;
3250 if (thd->variables.optimizer_use_condition_selectivity > 3 &&
3251 table_share && !table_share->stats_cb.histograms_are_read)
3252 {
3253 (void) read_histograms_for_table(thd, tl->table, stat_tables);
3254 table_share->stats_cb.histograms_are_read= TRUE;
3255 }
3256 if (table_share->stats_cb.stats_is_read)
3257 tl->table->histograms_are_read= TRUE;
3258 }
3259 }
3260
3261 close_system_tables(thd, &open_tables_backup);
3262
3263 DBUG_RETURN(0);
3264}
3265
3266
3267/**
3268 @brief
3269 Delete statistics on a table from all statistical tables
3270
3271 @param
3272 thd The thread handle
3273 @param
3274 db The name of the database the table belongs to
3275 @param
3276 tab The name of the table whose statistics is to be deleted
3277
3278 @details
3279 The function delete statistics on the table called 'tab' of the database
3280 'db' from all statistical tables: table_stats, column_stats, index_stats.
3281
3282 @retval
3283 0 If all deletions are successful
3284 @retval
3285 1 Otherwise
3286
3287 @note
3288 The function is called when executing the statement DROP TABLE 'tab'.
3289*/
3290
3291int delete_statistics_for_table(THD *thd, LEX_CSTRING *db, LEX_CSTRING *tab)
3292{
3293 int err;
3294 enum_binlog_format save_binlog_format;
3295 TABLE *stat_table;
3296 TABLE_LIST tables[STATISTICS_TABLES];
3297 Open_tables_backup open_tables_backup;
3298 int rc= 0;
3299
3300 DBUG_ENTER("delete_statistics_for_table");
3301
3302 if (open_stat_tables(thd, tables, &open_tables_backup, TRUE))
3303 DBUG_RETURN(rc);
3304
3305 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
3306
3307 /* Delete statistics on table from the statistical table index_stats */
3308 stat_table= tables[INDEX_STAT].table;
3309 Index_stat index_stat(stat_table, db, tab);
3310 index_stat.set_full_table_name();
3311 while (index_stat.find_next_stat_for_prefix(2))
3312 {
3313 err= index_stat.delete_stat();
3314 if (err & !rc)
3315 rc= 1;
3316 }
3317
3318 /* Delete statistics on table from the statistical table column_stats */
3319 stat_table= tables[COLUMN_STAT].table;
3320 Column_stat column_stat(stat_table, db, tab);
3321 column_stat.set_full_table_name();
3322 while (column_stat.find_next_stat_for_prefix(2))
3323 {
3324 err= column_stat.delete_stat();
3325 if (err & !rc)
3326 rc= 1;
3327 }
3328
3329 /* Delete statistics on table from the statistical table table_stats */
3330 stat_table= tables[TABLE_STAT].table;
3331 Table_stat table_stat(stat_table, db, tab);
3332 table_stat.set_key_fields();
3333 if (table_stat.find_stat())
3334 {
3335 err= table_stat.delete_stat();
3336 if (err & !rc)
3337 rc= 1;
3338 }
3339
3340 err= del_global_table_stat(thd, db, tab);
3341 if (err & !rc)
3342 rc= 1;
3343
3344 thd->restore_stmt_binlog_format(save_binlog_format);
3345
3346 close_system_tables(thd, &open_tables_backup);
3347
3348 DBUG_RETURN(rc);
3349}
3350
3351
3352/**
3353 @brief
3354 Delete statistics on a column of the specified table
3355
3356 @param
3357 thd The thread handle
3358 @param
3359 tab The table the column belongs to
3360 @param
3361 col The field of the column whose statistics is to be deleted
3362
3363 @details
3364 The function delete statistics on the column 'col' belonging to the table
3365 'tab' from the statistical table column_stats.
3366
3367 @retval
3368 0 If the deletion is successful
3369 @retval
3370 1 Otherwise
3371
3372 @note
3373 The function is called when dropping a table column or when changing
3374 the definition of this column.
3375*/
3376
3377int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col)
3378{
3379 int err;
3380 enum_binlog_format save_binlog_format;
3381 TABLE *stat_table;
3382 TABLE_LIST tables;
3383 Open_tables_backup open_tables_backup;
3384 int rc= 0;
3385
3386 DBUG_ENTER("delete_statistics_for_column");
3387
3388 if (open_single_stat_table(thd, &tables, &stat_table_name[1],
3389 &open_tables_backup, TRUE))
3390 {
3391 thd->clear_error();
3392 DBUG_RETURN(rc);
3393 }
3394
3395 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
3396
3397 stat_table= tables.table;
3398 Column_stat column_stat(stat_table, tab);
3399 column_stat.set_key_fields(col);
3400 if (column_stat.find_stat())
3401 {
3402 err= column_stat.delete_stat();
3403 if (err)
3404 rc= 1;
3405 }
3406
3407 thd->restore_stmt_binlog_format(save_binlog_format);
3408
3409 close_system_tables(thd, &open_tables_backup);
3410
3411 DBUG_RETURN(rc);
3412}
3413
3414
3415/**
3416 @brief
3417 Delete statistics on an index of the specified table
3418
3419 @param
3420 thd The thread handle
3421 @param
3422 tab The table the index belongs to
3423 @param
3424 key_info The descriptor of the index whose statistics is to be deleted
3425 @param
3426 ext_prefixes_only Delete statistics only on the index prefixes extended by
3427 the components of the primary key
3428
3429 @details
3430 The function delete statistics on the index specified by 'key_info'
3431 defined on the table 'tab' from the statistical table index_stats.
3432
3433 @retval
3434 0 If the deletion is successful
3435 @retval
3436 1 Otherwise
3437
3438 @note
3439 The function is called when dropping an index, or dropping/changing the
3440 definition of a column used in the definition of the index.
3441*/
3442
3443int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
3444 bool ext_prefixes_only)
3445{
3446 int err;
3447 enum_binlog_format save_binlog_format;
3448 TABLE *stat_table;
3449 TABLE_LIST tables;
3450 Open_tables_backup open_tables_backup;
3451 int rc= 0;
3452
3453 DBUG_ENTER("delete_statistics_for_index");
3454
3455 if (open_single_stat_table(thd, &tables, &stat_table_name[2],
3456 &open_tables_backup, TRUE))
3457 {
3458 thd->clear_error();
3459 DBUG_RETURN(rc);
3460 }
3461
3462 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
3463
3464 stat_table= tables.table;
3465 Index_stat index_stat(stat_table, tab);
3466 if (!ext_prefixes_only)
3467 {
3468 index_stat.set_index_prefix_key_fields(key_info);
3469 while (index_stat.find_next_stat_for_prefix(3))
3470 {
3471 err= index_stat.delete_stat();
3472 if (err && !rc)
3473 rc= 1;
3474 }
3475 }
3476 else
3477 {
3478 for (uint i= key_info->user_defined_key_parts; i < key_info->ext_key_parts; i++)
3479 {
3480 index_stat.set_key_fields(key_info, i+1);
3481 if (index_stat.find_next_stat_for_prefix(4))
3482 {
3483 err= index_stat.delete_stat();
3484 if (err && !rc)
3485 rc= 1;
3486 }
3487 }
3488 }
3489
3490 err= del_global_index_stat(thd, tab, key_info);
3491 if (err && !rc)
3492 rc= 1;
3493
3494 thd->restore_stmt_binlog_format(save_binlog_format);
3495
3496 close_system_tables(thd, &open_tables_backup);
3497
3498 DBUG_RETURN(rc);
3499}
3500
3501
3502/**
3503 @brief
3504 Rename a table in all statistical tables
3505
3506 @param
3507 thd The thread handle
3508 @param
3509 db The name of the database the table belongs to
3510 @param
3511 tab The name of the table to be renamed in statistical tables
3512 @param
3513 new_tab The new name of the table
3514
3515 @details
3516 The function replaces the name of the table 'tab' from the database 'db'
3517 for 'new_tab' in all all statistical tables: table_stats, column_stats,
3518 index_stats.
3519
3520 @retval
3521 0 If all updates of the table name are successful
3522 @retval
3523 1 Otherwise
3524
3525 @note
3526 The function is called when executing any statement that renames a table
3527*/
3528
3529int rename_table_in_stat_tables(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *tab,
3530 const LEX_CSTRING *new_db, const LEX_CSTRING *new_tab)
3531{
3532 int err;
3533 enum_binlog_format save_binlog_format;
3534 TABLE *stat_table;
3535 TABLE_LIST tables[STATISTICS_TABLES];
3536 Open_tables_backup open_tables_backup;
3537 int rc= 0;
3538 DBUG_ENTER("rename_table_in_stat_tables");
3539
3540 if (open_stat_tables(thd, tables, &open_tables_backup, TRUE))
3541 DBUG_RETURN(0); // not an error
3542
3543 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
3544
3545 /* Rename table in the statistical table index_stats */
3546 stat_table= tables[INDEX_STAT].table;
3547 Index_stat index_stat(stat_table, db, tab);
3548 index_stat.set_full_table_name();
3549
3550 Stat_table_write_iter index_iter(&index_stat);
3551 if (index_iter.init(2))
3552 rc= 1;
3553 while (!index_iter.get_next_row())
3554 {
3555 err= index_stat.update_table_name_key_parts(new_db, new_tab);
3556 if (err & !rc)
3557 rc= 1;
3558 index_stat.set_full_table_name();
3559 }
3560 index_iter.cleanup();
3561
3562 /* Rename table in the statistical table column_stats */
3563 stat_table= tables[COLUMN_STAT].table;
3564 Column_stat column_stat(stat_table, db, tab);
3565 column_stat.set_full_table_name();
3566 Stat_table_write_iter column_iter(&column_stat);
3567 if (column_iter.init(2))
3568 rc= 1;
3569 while (!column_iter.get_next_row())
3570 {
3571 err= column_stat.update_table_name_key_parts(new_db, new_tab);
3572 if (err & !rc)
3573 rc= 1;
3574 column_stat.set_full_table_name();
3575 }
3576 column_iter.cleanup();
3577
3578 /* Rename table in the statistical table table_stats */
3579 stat_table= tables[TABLE_STAT].table;
3580 Table_stat table_stat(stat_table, db, tab);
3581 table_stat.set_key_fields();
3582 if (table_stat.find_stat())
3583 {
3584 err= table_stat.update_table_name_key_parts(new_db, new_tab);
3585 if (err & !rc)
3586 rc= 1;
3587 }
3588
3589 thd->restore_stmt_binlog_format(save_binlog_format);
3590
3591 close_system_tables(thd, &open_tables_backup);
3592
3593 DBUG_RETURN(rc);
3594}
3595
3596
3597/**
3598 @brief
3599 Rename a column in the statistical table column_stats
3600
3601 @param
3602 thd The thread handle
3603 @param
3604 tab The table the column belongs to
3605 @param
3606 col The column to be renamed
3607 @param
3608 new_name The new column name
3609
3610 @details
3611 The function replaces the name of the column 'col' belonging to the table
3612 'tab' for 'new_name' in the statistical table column_stats.
3613
3614 @retval
3615 0 If all updates of the table name are successful
3616 @retval
3617 1 Otherwise
3618
3619 @note
3620 The function is called when executing any statement that renames a column,
3621 but does not change the column definition.
3622*/
3623
3624int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
3625 const char *new_name)
3626{
3627 int err;
3628 enum_binlog_format save_binlog_format;
3629 TABLE *stat_table;
3630 TABLE_LIST tables;
3631 Open_tables_backup open_tables_backup;
3632 int rc= 0;
3633
3634 DBUG_ENTER("rename_column_in_stat_tables");
3635
3636 if (tab->s->tmp_table != NO_TMP_TABLE)
3637 DBUG_RETURN(0);
3638
3639 if (open_single_stat_table(thd, &tables, &stat_table_name[1],
3640 &open_tables_backup, TRUE))
3641 {
3642 thd->clear_error();
3643 DBUG_RETURN(rc);
3644 }
3645
3646 save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
3647
3648 /* Rename column in the statistical table table_stat */
3649 stat_table= tables.table;
3650 Column_stat column_stat(stat_table, tab);
3651 column_stat.set_key_fields(col);
3652 if (column_stat.find_stat())
3653 {
3654 err= column_stat.update_column_key_part(new_name);
3655 if (err & !rc)
3656 rc= 1;
3657 }
3658
3659 thd->restore_stmt_binlog_format(save_binlog_format);
3660
3661 close_system_tables(thd, &open_tables_backup);
3662
3663 DBUG_RETURN(rc);
3664}
3665
3666
3667/**
3668 @brief
3669 Set statistics for a table that will be used by the optimizer
3670
3671 @param
3672 thd The thread handle
3673 @param
3674 table The table to set statistics for
3675
3676 @details
3677 Depending on the value of thd->variables.use_stat_tables
3678 the function performs the settings for the table that will control
3679 from where the statistical data used by the optimizer will be taken.
3680*/
3681
3682void set_statistics_for_table(THD *thd, TABLE *table)
3683{
3684 TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb;
3685 Table_statistics *read_stats= stats_cb->table_stats;
3686 Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
3687 table->used_stat_records=
3688 (use_stat_table_mode <= COMPLEMENTARY ||
3689 !table->stats_is_read || read_stats->cardinality_is_null) ?
3690 table->file->stats.records : read_stats->cardinality;
3691 KEY *key_info, *key_info_end;
3692 for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
3693 key_info < key_info_end; key_info++)
3694 {
3695 key_info->is_statistics_from_stat_tables=
3696 (use_stat_table_mode > COMPLEMENTARY &&
3697 table->stats_is_read &&
3698 key_info->read_stats->avg_frequency_is_inited() &&
3699 key_info->read_stats->get_avg_frequency(0) > 0.5);
3700 }
3701}
3702
3703
3704/**
3705 @brief
3706 Get the average frequency for a column
3707
3708 @param
3709 field The column whose average frequency is required
3710
3711 @retval
3712 The required average frequency
3713*/
3714
3715double get_column_avg_frequency(Field * field)
3716{
3717 double res;
3718 TABLE *table= field->table;
3719
3720 /*
3721 Statistics is shared by table instances and is accessed through
3722 the table share. If table->s->field is not set for 'table', then
3723 no column statistics is available for the table .
3724 */
3725 if (!table->s->field)
3726 {
3727 res= (double)table->stat_records();
3728 return res;
3729 }
3730
3731 Column_statistics *col_stats= field->read_stats;
3732
3733 if (!col_stats)
3734 res= (double)table->stat_records();
3735 else
3736 res= col_stats->get_avg_frequency();
3737 return res;
3738}
3739
3740
3741/**
3742 @brief
3743 Estimate the number of rows in a column range using data from stat tables
3744
3745 @param
3746 field The column whose range cardinality is to be estimated
3747 @param
3748 min_endp The left end of the range whose cardinality is required
3749 @param
3750 max_endp The right end of the range whose cardinality is required
3751 @param
3752 range_flag The range flags
3753
3754 @details
3755 The function gets an estimate of the number of rows in a column range
3756 using the statistical data from the table column_stats.
3757
3758 @retval
3759 - The required estimate of the rows in the column range
3760 - If there is some kind of error, this function should return DBL_MAX (and
3761 not HA_POS_ERROR as that is an integer constant).
3762
3763*/
3764
3765double get_column_range_cardinality(Field *field,
3766 key_range *min_endp,
3767 key_range *max_endp,
3768 uint range_flag)
3769{
3770 double res;
3771 TABLE *table= field->table;
3772 Column_statistics *col_stats= field->read_stats;
3773 double tab_records= (double)table->stat_records();
3774
3775 if (!col_stats)
3776 return tab_records;
3777 /*
3778 Use statistics for a table only when we have actually read
3779 the statistics from the stat tables. For example due to
3780 chances of getting a deadlock we disable reading statistics for
3781 a table.
3782 */
3783
3784 if (!table->stats_is_read)
3785 return tab_records;
3786
3787 double col_nulls= tab_records * col_stats->get_nulls_ratio();
3788
3789 double col_non_nulls= tab_records - col_nulls;
3790
3791 bool nulls_incl= field->null_ptr && min_endp && min_endp->key[0] &&
3792 !(range_flag & NEAR_MIN);
3793
3794 if (col_non_nulls < 1)
3795 {
3796 if (nulls_incl)
3797 res= col_nulls;
3798 else
3799 res= 0;
3800 }
3801 else if (min_endp && max_endp && min_endp->length == max_endp->length &&
3802 !memcmp(min_endp->key, max_endp->key, min_endp->length))
3803 {
3804 if (nulls_incl)
3805 {
3806 /* This is null single point range */
3807 res= col_nulls;
3808 }
3809 else
3810 {
3811 double avg_frequency= col_stats->get_avg_frequency();
3812 res= avg_frequency;
3813 if (avg_frequency > 1.0 + 0.000001 &&
3814 col_stats->min_max_values_are_provided())
3815 {
3816 Histogram *hist= &col_stats->histogram;
3817 if (hist->is_available())
3818 {
3819 store_key_image_to_rec(field, (uchar *) min_endp->key,
3820 field->key_length());
3821 double pos= field->pos_in_interval(col_stats->min_value,
3822 col_stats->max_value);
3823 res= col_non_nulls *
3824 hist->point_selectivity(pos,
3825 avg_frequency / col_non_nulls);
3826 }
3827 }
3828 else if (avg_frequency == 0.0)
3829 {
3830 /* This actually means there is no statistics data */
3831 res= tab_records;
3832 }
3833 }
3834 }
3835 else
3836 {
3837 if (col_stats->min_max_values_are_provided())
3838 {
3839 double sel, min_mp_pos, max_mp_pos;
3840
3841 if (min_endp && !(field->null_ptr && min_endp->key[0]))
3842 {
3843 store_key_image_to_rec(field, (uchar *) min_endp->key,
3844 field->key_length());
3845 min_mp_pos= field->pos_in_interval(col_stats->min_value,
3846 col_stats->max_value);
3847 }
3848 else
3849 min_mp_pos= 0.0;
3850 if (max_endp)
3851 {
3852 store_key_image_to_rec(field, (uchar *) max_endp->key,
3853 field->key_length());
3854 max_mp_pos= field->pos_in_interval(col_stats->min_value,
3855 col_stats->max_value);
3856 }
3857 else
3858 max_mp_pos= 1.0;
3859
3860 Histogram *hist= &col_stats->histogram;
3861 if (!hist->is_available())
3862 sel= (max_mp_pos - min_mp_pos);
3863 else
3864 sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
3865 res= col_non_nulls * sel;
3866 set_if_bigger(res, col_stats->get_avg_frequency());
3867 }
3868 else
3869 res= col_non_nulls;
3870 if (nulls_incl)
3871 res+= col_nulls;
3872 }
3873 return res;
3874}
3875
3876
3877
3878/*
3879 Estimate selectivity of "col=const" using a histogram
3880
3881 @param pos Position of the "const" between column's min_value and
3882 max_value. This is a number in [0..1] range.
3883 @param avg_sel Average selectivity of condition "col=const" in this table.
3884 It is calcuated as (#non_null_values / #distinct_values).
3885
3886 @return
3887 Expected condition selectivity (a number between 0 and 1)
3888
3889 @notes
3890 [re_zero_length_buckets] If a bucket with zero value-length is in the
3891 middle of the histogram, we will not have min==max. Example: suppose,
3892 pos_value=0x12, and the histogram is:
3893
3894 #n #n+1 #n+2
3895 ... 0x10 0x12 0x12 0x14 ...
3896 |
3897 +------------- bucket with zero value-length
3898
3899 Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
3900
3901 The problem happens at the histogram ends. if pos_value=0, and the
3902 histogram is:
3903
3904 0x00 0x10 ...
3905
3906 then min=0, max=0. This means pos_value is contained within bucket #0,
3907 but on the other hand, histogram data says that the bucket has only one
3908 value.
3909*/
3910
3911double Histogram::point_selectivity(double pos, double avg_sel)
3912{
3913 double sel;
3914 /* Find the bucket that contains the value 'pos'. */
3915 uint min= find_bucket(pos, TRUE);
3916 uint pos_value= (uint) (pos * prec_factor());
3917
3918 /* Find how many buckets this value occupies */
3919 uint max= min;
3920 while (max + 1 < get_width() && get_value(max + 1) == pos_value)
3921 max++;
3922
3923 /*
3924 A special case: we're looking at a single bucket, and that bucket has
3925 zero value-length. Use the multi-bucket formula (attempt to use
3926 single-bucket formula will cause divison by zero).
3927
3928 For more details see [re_zero_length_buckets] above.
3929 */
3930 if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
3931 max++;
3932
3933 if (max > min)
3934 {
3935 /*
3936 The value occupies multiple buckets. Use start_bucket ... end_bucket as
3937 selectivity.
3938 */
3939 double bucket_sel= 1.0/(get_width() + 1);
3940 sel= bucket_sel * (max - min + 1);
3941 }
3942 else
3943 {
3944 /*
3945 The value 'pos' fits within one single histogram bucket.
3946
3947 Histogram buckets have the same numbers of rows, but they cover
3948 different ranges of values.
3949
3950 We assume that values are uniformly distributed across the [0..1] value
3951 range.
3952 */
3953
3954 /*
3955 If all buckets covered value ranges of the same size, the width of
3956 value range would be:
3957 */
3958 double avg_bucket_width= 1.0 / (get_width() + 1);
3959
3960 /*
3961 Let's see what is the width of value range that our bucket is covering.
3962 (min==max currently. they are kept in the formula just in case we
3963 will want to extend it to handle multi-bucket case)
3964 */
3965 double inv_prec_factor= (double) 1.0 / prec_factor();
3966 double current_bucket_width=
3967 (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
3968 (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
3969
3970 DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
3971
3972 /*
3973 So:
3974 - each bucket has the same #rows
3975 - values are unformly distributed across the [min_value,max_value] domain.
3976
3977 If a bucket has value range that's N times bigger then average, than
3978 each value will have to have N times fewer rows than average.
3979 */
3980 sel= avg_sel * avg_bucket_width / current_bucket_width;
3981
3982 /*
3983 (Q: if we just follow this proportion we may end up in a situation
3984 where number of different values we expect to find in this bucket
3985 exceeds the number of rows that this histogram has in a bucket. Are
3986 we ok with this or we would want to have certain caps?)
3987 */
3988 }
3989 return sel;
3990}
3991
3992/*
3993 Check whether the table is one of the persistent statistical tables.
3994*/
3995bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table)
3996{
3997 DBUG_ASSERT(db->str && table->str);
3998
3999 if (!my_strcasecmp(table_alias_charset, db->str, MYSQL_SCHEMA_NAME.str))
4000 {
4001 for (uint i= 0; i < STATISTICS_TABLES; i ++)
4002 {
4003 if (!my_strcasecmp(table_alias_charset, table->str, stat_table_name[i].str))
4004 return true;
4005 }
4006 }
4007 return false;
4008}
4009