1/* Copyright 2006-2008 MySQL AB, 2008 Sun Microsystems, Inc.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16#ifndef SQL_STATISTICS_H
17#define SQL_STATISTICS_H
18
19typedef
20enum enum_use_stat_tables_mode
21{
22 NEVER,
23 COMPLEMENTARY,
24 PEFERABLY,
25} Use_stat_tables_mode;
26
27typedef
28enum enum_histogram_type
29{
30 SINGLE_PREC_HB,
31 DOUBLE_PREC_HB
32} Histogram_type;
33
34enum enum_stat_tables
35{
36 TABLE_STAT,
37 COLUMN_STAT,
38 INDEX_STAT,
39};
40
41
42/*
43 These enumeration types comprise the dictionary of three
44 statistical tables table_stat, column_stat and index_stat
45 as they defined in ../scripts/mysql_system_tables.sql.
46
47 It would be nice if the declarations of these types were
48 generated automatically by the table definitions.
49*/
50
51enum enum_table_stat_col
52{
53 TABLE_STAT_DB_NAME,
54 TABLE_STAT_TABLE_NAME,
55 TABLE_STAT_CARDINALITY,
56 TABLE_STAT_N_FIELDS
57};
58
59enum enum_column_stat_col
60{
61 COLUMN_STAT_DB_NAME,
62 COLUMN_STAT_TABLE_NAME,
63 COLUMN_STAT_COLUMN_NAME,
64 COLUMN_STAT_MIN_VALUE,
65 COLUMN_STAT_MAX_VALUE,
66 COLUMN_STAT_NULLS_RATIO,
67 COLUMN_STAT_AVG_LENGTH,
68 COLUMN_STAT_AVG_FREQUENCY,
69 COLUMN_STAT_HIST_SIZE,
70 COLUMN_STAT_HIST_TYPE,
71 COLUMN_STAT_HISTOGRAM,
72 COLUMN_STAT_N_FIELDS
73};
74
75enum enum_index_stat_col
76{
77 INDEX_STAT_DB_NAME,
78 INDEX_STAT_TABLE_NAME,
79 INDEX_STAT_INDEX_NAME,
80 INDEX_STAT_PREFIX_ARITY,
81 INDEX_STAT_AVG_FREQUENCY,
82 INDEX_STAT_N_FIELDS
83};
84
85inline
86Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
87{
88 return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
89}
90
91int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
92int collect_statistics_for_table(THD *thd, TABLE *table);
93int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
94 bool is_safe);
95int alloc_statistics_for_table(THD *thd, TABLE *table);
96int update_statistics_for_table(THD *thd, TABLE *table);
97int delete_statistics_for_table(THD *thd, LEX_CSTRING *db, LEX_CSTRING *tab);
98int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
99int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
100 bool ext_prefixes_only);
101int rename_table_in_stat_tables(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *tab,
102 const LEX_CSTRING *new_db, const LEX_CSTRING *new_tab);
103int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
104 const char *new_name);
105void set_statistics_for_table(THD *thd, TABLE *table);
106
107double get_column_avg_frequency(Field * field);
108
109double get_column_range_cardinality(Field *field,
110 key_range *min_endp,
111 key_range *max_endp,
112 uint range_flag);
113bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
114
115class Histogram
116{
117
118private:
119 Histogram_type type;
120 uint8 size; /* Size of values array, in bytes */
121 uchar *values;
122
123 uint prec_factor()
124 {
125 switch (type) {
126 case SINGLE_PREC_HB:
127 return ((uint) (1 << 8) - 1);
128 case DOUBLE_PREC_HB:
129 return ((uint) (1 << 16) - 1);
130 }
131 return 1;
132 }
133
134public:
135 uint get_width()
136 {
137 switch (type) {
138 case SINGLE_PREC_HB:
139 return size;
140 case DOUBLE_PREC_HB:
141 return size / 2;
142 }
143 return 0;
144 }
145
146private:
147 uint get_value(uint i)
148 {
149 DBUG_ASSERT(i < get_width());
150 switch (type) {
151 case SINGLE_PREC_HB:
152 return (uint) (((uint8 *) values)[i]);
153 case DOUBLE_PREC_HB:
154 return (uint) uint2korr(values + i * 2);
155 }
156 return 0;
157 }
158
159 /* Find the bucket which value 'pos' falls into. */
160 uint find_bucket(double pos, bool first)
161 {
162 uint val= (uint) (pos * prec_factor());
163 int lp= 0;
164 int rp= get_width() - 1;
165 int d= get_width() / 2;
166 uint i= lp + d;
167 for ( ; d; d= (rp - lp) / 2, i= lp + d)
168 {
169 if (val == get_value(i))
170 break;
171 if (val < get_value(i))
172 rp= i;
173 else if (val > get_value(i + 1))
174 lp= i + 1;
175 else
176 break;
177 }
178
179 if (val > get_value(i) && i < (get_width() - 1))
180 i++;
181
182 if (val == get_value(i))
183 {
184 if (first)
185 {
186 while(i && val == get_value(i - 1))
187 i--;
188 }
189 else
190 {
191 while(i + 1 < get_width() && val == get_value(i + 1))
192 i++;
193 }
194 }
195 return i;
196 }
197
198public:
199
200 uint get_size() { return (uint) size; }
201
202 Histogram_type get_type() { return type; }
203
204 uchar *get_values() { return (uchar *) values; }
205
206 void set_size (ulonglong sz) { size= (uint8) sz; }
207
208 void set_type (Histogram_type t) { type= t; }
209
210 void set_values (uchar *vals) { values= (uchar *) vals; }
211
212 bool is_available() { return get_size() > 0 && get_values(); }
213
214 void set_value(uint i, double val)
215 {
216 switch (type) {
217 case SINGLE_PREC_HB:
218 ((uint8 *) values)[i]= (uint8) (val * prec_factor());
219 return;
220 case DOUBLE_PREC_HB:
221 int2store(values + i * 2, val * prec_factor());
222 return;
223 }
224 }
225
226 void set_prev_value(uint i)
227 {
228 switch (type) {
229 case SINGLE_PREC_HB:
230 ((uint8 *) values)[i]= ((uint8 *) values)[i-1];
231 return;
232 case DOUBLE_PREC_HB:
233 int2store(values + i * 2, uint2korr(values + i * 2 - 2));
234 return;
235 }
236 }
237
238 double range_selectivity(double min_pos, double max_pos)
239 {
240 double sel;
241 double bucket_sel= 1.0/(get_width() + 1);
242 uint min= find_bucket(min_pos, TRUE);
243 uint max= find_bucket(max_pos, FALSE);
244 sel= bucket_sel * (max - min + 1);
245 return sel;
246 }
247
248 /*
249 Estimate selectivity of "col=const" using a histogram
250 */
251 double point_selectivity(double pos, double avg_sel);
252};
253
254
255class Columns_statistics;
256class Index_statistics;
257
258/* Statistical data on a table */
259
260class Table_statistics
261{
262
263public:
264 my_bool cardinality_is_null; /* TRUE if the cardinality is unknown */
265 ha_rows cardinality; /* Number of rows in the table */
266 uchar *min_max_record_buffers; /* Record buffers for min/max values */
267 Column_statistics *column_stats; /* Array of statistical data for columns */
268 Index_statistics *index_stats; /* Array of statistical data for indexes */
269 ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
270 ulong total_hist_size; /* Total size of all histograms */
271 uchar *histograms; /* Sequence of histograms */
272};
273
274
275/*
276 Statistical data on a column
277
278 Note: objects of this class may be "empty", where they have almost all fields
279 as zeros, for example, get_avg_frequency() will return 0.
280
281 objects are allocated in alloc_statistics_for_table[_share].
282*/
283
284class Column_statistics
285{
286
287private:
288 static const uint Scale_factor_nulls_ratio= 100000;
289 static const uint Scale_factor_avg_length= 100000;
290 static const uint Scale_factor_avg_frequency= 100000;
291
292public:
293 /*
294 Bitmap indicating what statistical characteristics
295 are available for the column
296 */
297 uint32 column_stat_nulls;
298
299 /* For the below two, see comments in get_column_range_cardinality() */
300 /* Minimum value for the column */
301 Field *min_value;
302 /* Maximum value for the column */
303 Field *max_value;
304
305private:
306
307 /*
308 The ratio Z/N multiplied by the scale factor Scale_factor_nulls_ratio,
309 where
310 N is the total number of rows,
311 Z is the number of nulls in the column
312 */
313 ulong nulls_ratio;
314
315 /*
316 Average number of bytes occupied by the representation of a
317 value of the column in memory buffers such as join buffer
318 multiplied by the scale factor Scale_factor_avg_length.
319 CHAR values are stripped of trailing spaces.
320 Flexible values are stripped of their length prefixes.
321 */
322 ulong avg_length;
323
324 /*
325 The ratio N/D multiplied by the scale factor Scale_factor_avg_frequency,
326 where
327 N is the number of rows with not null value in the column,
328 D the number of distinct values among them
329 */
330 ulong avg_frequency;
331
332public:
333
334 Histogram histogram;
335
336 void set_all_nulls()
337 {
338 column_stat_nulls=
339 ((1 << (COLUMN_STAT_HISTOGRAM-COLUMN_STAT_COLUMN_NAME))-1) <<
340 (COLUMN_STAT_COLUMN_NAME+1);
341 }
342
343 void set_not_null(uint stat_field_no)
344 {
345 column_stat_nulls&= ~(1 << stat_field_no);
346 }
347
348 bool is_null(uint stat_field_no)
349 {
350 return MY_TEST(column_stat_nulls & (1 << stat_field_no));
351 }
352
353 double get_nulls_ratio()
354 {
355 return (double) nulls_ratio / Scale_factor_nulls_ratio;
356 }
357
358 double get_avg_length()
359 {
360 return (double) avg_length / Scale_factor_avg_length;
361 }
362
363 double get_avg_frequency()
364 {
365 return (double) avg_frequency / Scale_factor_avg_frequency;
366 }
367
368 void set_nulls_ratio (double val)
369 {
370 nulls_ratio= (ulong) (val * Scale_factor_nulls_ratio);
371 }
372
373 void set_avg_length (double val)
374 {
375 avg_length= (ulong) (val * Scale_factor_avg_length);
376 }
377
378 void set_avg_frequency (double val)
379 {
380 avg_frequency= (ulong) (val * Scale_factor_avg_frequency);
381 }
382
383 bool min_max_values_are_provided()
384 {
385 return !is_null(COLUMN_STAT_MIN_VALUE) &&
386 !is_null(COLUMN_STAT_MIN_VALUE);
387 }
388};
389
390
391/* Statistical data on an index prefixes */
392
393class Index_statistics
394{
395
396private:
397 static const uint Scale_factor_avg_frequency= 100000;
398 /*
399 The k-th element of this array contains the ratio N/D
400 multiplied by the scale factor Scale_factor_avg_frequency,
401 where N is the number of index entries without nulls
402 in the first k components, and D is the number of distinct
403 k-component prefixes among them
404 */
405 ulong *avg_frequency;
406
407public:
408
409 void init_avg_frequency(ulong *ptr) { avg_frequency= ptr; }
410
411 bool avg_frequency_is_inited() { return avg_frequency != NULL; }
412
413 double get_avg_frequency(uint i)
414 {
415 return (double) avg_frequency[i] / Scale_factor_avg_frequency;
416 }
417
418 void set_avg_frequency(uint i, double val)
419 {
420 avg_frequency[i]= (ulong) (val * Scale_factor_avg_frequency);
421 }
422
423};
424
425#endif /* SQL_STATISTICS_H */
426